aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-31 21:22:58 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-31 21:22:58 +0000
commit5ffd83dbcc34f10e07f6d3e968ae6365869615f4 (patch)
tree0e9f5cf729dde39f949698fddef45a34e2bc7f44 /contrib/llvm-project/llvm/lib/Target
parent1799696096df87b52968b8996d00c91e0a5de8d9 (diff)
parentcfca06d7963fa0909f90483b42a6d7d194d01e08 (diff)
downloadsrc-5ffd83dbcc34f10e07f6d3e968ae6365869615f4.tar.gz
src-5ffd83dbcc34f10e07f6d3e968ae6365869615f4.zip
Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp
master 2e10b7a39b9, the last commit before the llvmorg-12-init tag, from which release/11.x was branched. Note that for now, I rolled back all our local changes to make merging easier, and I will reapply the still-relevant ones after updating to 11.0.0-rc1.
Notes
Notes: svn path=/projects/clang1100-import/; revision=363742
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td148
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp161
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td145
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp327
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp601
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp743
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp2874
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h232
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td558
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td124
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp557
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h99
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td607
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp104
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp140
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp443
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td2140
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryoDetails.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td60
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp202
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h69
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp95
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp (renamed from contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallLowering.cpp)96
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h (renamed from contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallLowering.h)2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (renamed from contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp)1396
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (renamed from contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp)116
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h (renamed from contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h)13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp507
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp (renamed from contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp)49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp (renamed from contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp)202
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h (renamed from contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h)0
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td1918
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp265
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td278
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp201
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp247
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp714
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td69
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td114
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def80
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp583
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp493
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td90
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp2341
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h122
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp2747
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h93
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp359
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp153
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp255
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp137
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp2136
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp142
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp181
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h172
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp115
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp507
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h94
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp280
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td183
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/CaymanInstructions.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td75
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp109
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp123
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp73
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp100
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td239
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp997
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp2276
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp203
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp373
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp496
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp1090
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td98
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td677
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp627
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp303
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp190
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h142
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegister.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp175
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp139
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp326
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp1387
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h165
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td453
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp160
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp147
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td104
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td153
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp202
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h153
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp114
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VIInstructions.td13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td139
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td242
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCAsmPrinter.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCFrameLowering.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCFrameLowering.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrFormats.td48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.td30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARM.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARM.td73
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp267
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.h26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp662
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h190
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp95
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp65
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.td48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantPoolValue.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp858
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp94
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp239
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp532
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp2359
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrCDE.td666
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td146
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td2090
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td486
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td266
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp311
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp1116
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMMCInstLower.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td130
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp532
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp442
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp85
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp147
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp911
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp592
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp286
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp464
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h66
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRCallingConv.td18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp276
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp411
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td70
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.td27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp159
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp96
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPF.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFAsmPrinter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFMCInstLower.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp131
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp250
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp148
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/Hexagon.td99
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonArch.h37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCallingConv.td32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepArch.h39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepArch.td26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td493
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td7665
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepITypes.h87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepITypes.td87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td5827
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td6032
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td5948
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMappings.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMask.h2821
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepOperands.td182
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h200
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp275
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIICScalar.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp330
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp224
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormats.td103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td86
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormatsV65.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp260
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsics.td76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td70
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPseudo.td18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td82
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSchedule.td26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonScheduleV67.td39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonScheduleV67T.td61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp117
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVExtract.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp167
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp165
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp730
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h83
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/Lanai.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.h21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMCInstLower.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MCInstLower.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp307
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrFormats.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrInfo.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips.td35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16HardFloat.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16RegisterInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.h20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsBranchExpansion.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsCCState.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp114
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsDSPInstrFormats.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp362
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFormats.td47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td145
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsInstructionSelector.cpp125
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp221
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.h26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp142
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h60
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSERegisterInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSchedule.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetObjectFile.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetStreamer.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.td7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp96
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp164
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp83
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp112
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h54
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp124
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp380
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td93
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp586
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp91
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp942
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp510
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp2820
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h179
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td130
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp1393
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h185
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td493
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrPrefix.td1035
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td5803
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp203
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.def45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.h22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp288
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h95
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp167
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h44
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp579
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp75
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h90
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td139
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp618
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp523
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp364
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp107
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h56
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td300
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td634
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td51
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td873
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td99
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td27
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetObjectFile.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp19
-rwxr-xr-xcontrib/llvm-project/llvm/lib/Target/Sparc/LeonFeatures.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcCallingConv.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp102
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrAliases.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.td4
-rwxr-xr-xcontrib/llvm-project/llvm/lib/Target/Sparc/SparcSchedule.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetObjectFile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp386
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp120
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp323
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp650
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h35
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td198
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp318
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td418
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td60
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZProcessors.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp421
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h51
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Target.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/TargetLoweringObjectFile.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp1454
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp560
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp118
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp224
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp135
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h61
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp227
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h (renamed from contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h)29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp165
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp225
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h95
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.h20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VE.h293
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VE.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp283
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td70
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp185
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp269
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp863
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td176
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp494
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td1991
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEMachineFunctionInfo.h48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp62
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td131
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h434
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.td7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp138
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp89
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp128
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp564
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td837
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td167
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td480
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td544
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp173
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp179
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp616
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp969
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h107
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp195
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp696
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp (renamed from contrib/llvm-project/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp)37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h (renamed from contrib/llvm-project/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h)18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86.td57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp30
-rwxr-xr-xcontrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp89
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp269
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp438
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h68
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp1217
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp10041
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h1724
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp102
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InsertWait.cpp151
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td119
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td2614
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td179
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrBuilder.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td157
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td70
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA3Info.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA3Info.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td97
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp206
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td47
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td199
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp1102
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h72
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td194
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td444
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td104
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td73
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InterleavedAccess.cpp159
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h26
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp980
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86MachineFunctionInfo.h52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86OptimizeLEAs.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86PadShortFunction.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86PartialReduction.cpp490
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td17
-rwxr-xr-xcontrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SchedHaswell.td87
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SchedSandyBridge.td21
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeClient.td65
-rwxr-xr-xcontrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td336
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Schedule.td17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBdVer2.td43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBtVer2.td7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td82
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver1.td7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td96
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.h12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp181
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp443
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp1766
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h119
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86WinAllocaExpander.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp99
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCore.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.td13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreMCInstLower.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.h2
1119 files changed, 117271 insertions, 51401 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h
index ac765ebcddc0..fd35b530e3ce 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.h
@@ -38,6 +38,8 @@ FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createAArch64StorePairSuppressPass();
FunctionPass *createAArch64ExpandPseudoPass();
+FunctionPass *createAArch64SLSHardeningPass();
+FunctionPass *createAArch64IndirectThunks();
FunctionPass *createAArch64SpeculationHardeningPass();
FunctionPass *createAArch64LoadStoreOptimizationPass();
FunctionPass *createAArch64SIMDInstrOptPass();
@@ -52,11 +54,13 @@ FunctionPass *createAArch64BranchTargetsPass();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
FunctionPass *createAArch64CollectLOHPass();
+ModulePass *createSVEIntrinsicOptsPass();
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &,
AArch64Subtarget &, AArch64RegisterBankInfo &);
FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone);
-FunctionPass *createAArch64StackTaggingPass(bool MergeInit);
+FunctionPass *createAArch64PostLegalizeCombiner(bool IsOptNone);
+FunctionPass *createAArch64StackTaggingPass(bool IsOptNone);
FunctionPass *createAArch64StackTaggingPreRAPass();
void initializeAArch64A53Fix835769Pass(PassRegistry&);
@@ -70,16 +74,19 @@ void initializeAArch64ConditionalComparesPass(PassRegistry&);
void initializeAArch64ConditionOptimizerPass(PassRegistry&);
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
void initializeAArch64ExpandPseudoPass(PassRegistry&);
+void initializeAArch64SLSHardeningPass(PassRegistry&);
void initializeAArch64SpeculationHardeningPass(PassRegistry&);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
+void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PromoteConstantPass(PassRegistry&);
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
void initializeAArch64StorePairSuppressPass(PassRegistry&);
void initializeFalkorHWPFFixPass(PassRegistry&);
void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
void initializeLDTLSCleanupPass(PassRegistry&);
+void initializeSVEIntrinsicOptsPass(PassRegistry&);
void initializeAArch64StackTaggingPass(PassRegistry&);
void initializeAArch64StackTaggingPreRAPass(PassRegistry&);
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
index 6e57543c4c0f..534af9686af0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
@@ -42,11 +42,11 @@ def FeatureAES : SubtargetFeature<
"Enable AES support", [FeatureNEON]>;
// Crypto has been split up and any combination is now valid (see the
-// crypto defintions above). Also, crypto is now context sensitive:
+// crypto definitions above). Also, crypto is now context sensitive:
// it has a different meaning for e.g. Armv8.4 than it has for Armv8.2.
// Therefore, we rely on Clang, the user interacing tool, to pass on the
// appropriate crypto options. But here in the backend, crypto has very little
-// meaning anymore. We kept the Crypto defintion here for backward
+// meaning anymore. We kept the Crypto definition here for backward
// compatibility, and now imply features SHA2 and AES, which was the
// "traditional" meaning of Crypto.
def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
@@ -101,7 +101,25 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
"true", "Enable v8.2 data Cache Clean to Point of Persistence" >;
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
- "Enable Scalable Vector Extension (SVE) instructions">;
+ "Enable Scalable Vector Extension (SVE) instructions", [FeatureFullFP16]>;
+
+// This flag is currently still labeled as Experimental, but when fully
+// implemented this should tell the compiler to use the zeroing pseudos to
+// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
+// lanes are known to be zero. The pseudos will then be expanded using the
+// MOVPRFX instruction to zero the inactive lanes. This feature should only be
+// enabled if MOVPRFX instructions are known to merge with the destructive
+// operations they prefix.
+//
+// This feature could similarly be extended to support cheap merging of _any_
+// value into the inactive lanes using the MOVPRFX instruction that uses
+// merging-predication.
+def FeatureExperimentalZeroingPseudos
+ : SubtargetFeature<"use-experimental-zeroing-pseudos",
+ "UseExperimentalZeroingPseudos", "true",
+ "Hint to the compiler that the MOVPRFX instruction is "
+ "merged with destructive operations",
+ []>;
def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
"Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>;
@@ -142,7 +160,7 @@ def FeatureStrictAlign : SubtargetFeature<"strict-align",
"Disallow all unaligned memory "
"access">;
-foreach i = {1-7,9-15,18,20-28} in
+foreach i = {1-7,9-15,18,20-28,30} in
def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true",
"Reserve X"#i#", making it unavailable "
"as a GPR">;
@@ -240,11 +258,11 @@ def FeatureDotProd : SubtargetFeature<
def FeaturePA : SubtargetFeature<
"pa", "HasPA", "true",
- "Enable v8.3-A Pointer Authentication enchancement">;
+ "Enable v8.3-A Pointer Authentication extension">;
def FeatureJS : SubtargetFeature<
"jsconv", "HasJS", "true",
- "Enable v8.3-A JavaScript FP conversion enchancement",
+ "Enable v8.3-A JavaScript FP conversion instructions",
[FeatureFPARMv8]>;
def FeatureCCIDX : SubtargetFeature<
@@ -281,6 +299,11 @@ def FeatureAM : SubtargetFeature<
"am", "HasAM", "true",
"Enable v8.4-A Activity Monitors extension">;
+def FeatureAMVS : SubtargetFeature<
+ "amvs", "HasAMVS", "true",
+ "Enable v8.6-A Activity Monitors Virtualization support",
+ [FeatureAM]>;
+
def FeatureSEL2 : SubtargetFeature<
"sel2", "HasSEL2", "true",
"Enable v8.4-A Secure Exception Level 2 extension">;
@@ -365,6 +388,25 @@ def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
"true", "Use an instruction sequence for taking the address of a global "
"that allows a memory tag in the upper address bits">;
+def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16",
+ "true", "Enable BFloat16 Extension" >;
+
+def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8",
+ "true", "Enable Matrix Multiply Int8 Extension">;
+
+def FeatureMatMulFP32 : SubtargetFeature<"f32mm", "HasMatMulFP32",
+ "true", "Enable Matrix Multiply FP32 Extension", [FeatureSVE]>;
+
+def FeatureMatMulFP64 : SubtargetFeature<"f64mm", "HasMatMulFP64",
+ "true", "Enable Matrix Multiply FP64 Extension", [FeatureSVE]>;
+
+def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps",
+ "true", "Enable fine grained virtualization traps extension">;
+
+def FeatureEnhancedCounterVirtualization :
+ SubtargetFeature<"ecv", "HasEnhancedCounterVirtualization",
+ "true", "Enable enhanced counter virtualization extension">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
@@ -391,8 +433,13 @@ def HasV8_5aOps : SubtargetFeature<
"v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
[HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict,
FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
- FeatureBranchTargetId]
->;
+ FeatureBranchTargetId]>;
+
+def HasV8_6aOps : SubtargetFeature<
+ "v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions",
+
+ [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps,
+ FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -429,6 +476,17 @@ def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP",
"true", "Permit use of TPIDR_EL"#i#" for the TLS base">;
//===----------------------------------------------------------------------===//
+// Control codegen mitigation against Straight Line Speculation vulnerability.
+//===----------------------------------------------------------------------===//
+
+def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr",
+ "HardenSlsRetBr", "true",
+ "Harden against straight line speculation across RET and BR instructions">;
+def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr",
+ "HardenSlsBlr", "true",
+ "Harden against straight line speculation across BLR instructions">;
+
+//===----------------------------------------------------------------------===//
// AArch64 Processors supported.
//
@@ -568,6 +626,67 @@ def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
FeatureSSBS
]>;
+def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
+ "Cortex-A77 ARM processors", [
+ HasV8_2aOps,
+ FeatureFPARMv8,
+ FeatureNEON, FeatureRCPC,
+ FeatureCrypto,
+ FeatureFullFP16,
+ FeatureDotProd
+ ]>;
+
+def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
+ "CortexA78",
+ "Cortex-A78 ARM processors", [
+ HasV8_2aOps,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeatureRCPC,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureSPE,
+ FeatureFullFP16,
+ FeatureSSBS,
+ FeatureDotProd]>;
+
+def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
+ "Cortex-X1 ARM processors", [
+ HasV8_2aOps,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeatureRCPC,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureSPE,
+ FeatureFullFP16,
+ FeatureDotProd]>;
+
+def ProcA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
+ "Fujitsu A64FX processors", [
+ HasV8_2aOps,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureSHA2,
+ FeaturePerfMon,
+ FeatureFullFP16,
+ FeatureSVE,
+ FeaturePostRAScheduler,
+ FeatureComplxNum
+ ]>;
+
+def ProcCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
+ "Nvidia Carmel processors", [
+ HasV8_2aOps,
+ FeatureNEON,
+ FeatureCrypto,
+ FeatureFullFP16
+ ]>;
+
// Note that cyclone does not fuse AES instructions, but newer apple chips do
// perform the fusion and cyclone is used by default when targetting apple OSes.
def ProcAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
@@ -868,7 +987,7 @@ def : ProcessorModel<"generic", NoSchedModel, [
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
-// ETE and TRBE are future architecture extensions. We temporariliy enable them
+// ETE and TRBE are future architecture extensions. We temporarily enable them
// by default for users targeting generic AArch64, until it is decided in which
// armv8.x-a architecture revision they will end up. The extensions do not
// affect code generated by the compiler and can be used only by explicitly
@@ -877,6 +996,7 @@ def : ProcessorModel<"generic", NoSchedModel, [
]>;
def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
+def : ProcessorModel<"cortex-a34", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
@@ -887,6 +1007,9 @@ def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>;
def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
+def : ProcessorModel<"cortex-a77", CortexA57Model, [ProcA77]>;
+def : ProcessorModel<"cortex-a78", CortexA57Model, [ProcA78]>;
+def : ProcessorModel<"cortex-x1", CortexA57Model, [ProcX1]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>;
def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
@@ -926,6 +1049,13 @@ def : ProcessorModel<"apple-s5", CycloneModel, [ProcAppleA12]>;
// Alias for the latest Apple processor model supported by LLVM.
def : ProcessorModel<"apple-latest", CycloneModel, [ProcAppleA13]>;
+// Fujitsu A64FX
+// FIXME: Scheduling model is not implemented yet.
+def : ProcessorModel<"a64fx", NoSchedModel, [ProcA64FX]>;
+
+// Nvidia Carmel
+def : ProcessorModel<"carmel", NoSchedModel, [ProcCarmel]>;
+
//===----------------------------------------------------------------------===//
// Assembly parser
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 6da089d1859a..3a94820dac8d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -84,8 +84,8 @@ public:
return MCInstLowering.lowerOperand(MO, MCOp);
}
- void EmitStartOfAsmFile(Module &M) override;
- void EmitJumpTableInfo() override;
+ void emitStartOfAsmFile(Module &M) override;
+ void emitJumpTableInfo() override;
void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB, unsigned JTI);
@@ -112,7 +112,9 @@ public:
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
const MachineInstr *MI);
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
+
+ void emitFunctionHeaderComment() override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AsmPrinter::getAnalysisUsage(AU);
@@ -139,7 +141,7 @@ public:
}
// Emit the rest of the function body.
- EmitFunctionBody();
+ emitFunctionBody();
// Emit the XRay table for this function.
emitXRayTable();
@@ -162,10 +164,10 @@ private:
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
- void EmitFunctionBodyEnd() override;
+ void emitFunctionBodyEnd() override;
MCSymbol *GetCPISymbol(unsigned CPID) const override;
- void EmitEndOfAsmFile(Module &M) override;
+ void emitEndOfAsmFile(Module &M) override;
AArch64FunctionInfo *AArch64FI = nullptr;
@@ -182,7 +184,7 @@ private:
} // end anonymous namespace
-void AArch64AsmPrinter::EmitStartOfAsmFile(Module &M) {
+void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) {
if (!TM.getTargetTriple().isOSBinFormatELF())
return;
@@ -225,22 +227,29 @@ void AArch64AsmPrinter::EmitStartOfAsmFile(Module &M) {
OutStreamer->SwitchSection(Nt);
// Emit the note header.
- EmitAlignment(Align(8));
- OutStreamer->EmitIntValue(4, 4); // data size for "GNU\0"
- OutStreamer->EmitIntValue(4 * 4, 4); // Elf_Prop size
- OutStreamer->EmitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4);
- OutStreamer->EmitBytes(StringRef("GNU", 4)); // note name
+ emitAlignment(Align(8));
+ OutStreamer->emitInt32(4); // data size for "GNU\0"
+ OutStreamer->emitInt32(4 * 4); // Elf_Prop size
+ OutStreamer->emitInt32(ELF::NT_GNU_PROPERTY_TYPE_0);
+ OutStreamer->emitBytes(StringRef("GNU", 4)); // note name
// Emit the PAC/BTI properties.
- OutStreamer->EmitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4);
- OutStreamer->EmitIntValue(4, 4); // data size
- OutStreamer->EmitIntValue(Flags, 4); // data
- OutStreamer->EmitIntValue(0, 4); // pad
+ OutStreamer->emitInt32(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND);
+ OutStreamer->emitInt32(4); // data size
+ OutStreamer->emitInt32(Flags); // data
+ OutStreamer->emitInt32(0); // pad
OutStreamer->endSection(Nt);
OutStreamer->SwitchSection(Cur);
}
+void AArch64AsmPrinter::emitFunctionHeaderComment() {
+ const AArch64FunctionInfo *FI = MF->getInfo<AArch64FunctionInfo>();
+ Optional<std::string> OutlinerString = FI->getOutliningStyle();
+ if (OutlinerString != None)
+ OutStreamer->GetCommentOS() << ' ' << OutlinerString;
+}
+
void AArch64AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
{
const Function &F = MF->getFunction();
@@ -290,9 +299,9 @@ void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
// ;DATA: higher 32 bits of the address of the trampoline
// LDP X0, X30, [SP], #16 ; pop X0 and the link register from the stack
//
- OutStreamer->EmitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4);
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->EmitLabel(CurSled);
+ OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
// Emit "B #32" instruction, which jumps over the next 28 bytes.
@@ -303,8 +312,8 @@ void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
for (int8_t I = 0; I < NoopsInSledCount; I++)
EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
- OutStreamer->EmitLabel(Target);
- recordSled(CurSled, MI, Kind);
+ OutStreamer->emitLabel(Target);
+ recordSled(CurSled, MI, Kind, 2);
}
void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
@@ -363,25 +372,25 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
Sym->getName()));
- OutStreamer->EmitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
- OutStreamer->EmitSymbolAttribute(Sym, MCSA_Weak);
- OutStreamer->EmitSymbolAttribute(Sym, MCSA_Hidden);
- OutStreamer->EmitLabel(Sym);
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_Weak);
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_Hidden);
+ OutStreamer->emitLabel(Sym);
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::UBFMXri)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::UBFMXri)
.addReg(AArch64::X16)
.addReg(Reg)
.addImm(4)
.addImm(55),
*STI);
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBroX)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::LDRBBroX)
.addReg(AArch64::W16)
.addReg(AArch64::X9)
.addReg(AArch64::X16)
.addImm(0)
.addImm(0),
*STI);
- OutStreamer->EmitInstruction(
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::SUBSXrs)
.addReg(AArch64::XZR)
.addReg(AArch64::X16)
@@ -389,33 +398,33 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)),
*STI);
MCSymbol *HandleMismatchOrPartialSym = OutContext.createTempSymbol();
- OutStreamer->EmitInstruction(
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::Bcc)
.addImm(AArch64CC::NE)
.addExpr(MCSymbolRefExpr::create(HandleMismatchOrPartialSym,
OutContext)),
*STI);
MCSymbol *ReturnSym = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(ReturnSym);
- OutStreamer->EmitInstruction(
+ OutStreamer->emitLabel(ReturnSym);
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::RET).addReg(AArch64::LR), *STI);
- OutStreamer->EmitLabel(HandleMismatchOrPartialSym);
+ OutStreamer->emitLabel(HandleMismatchOrPartialSym);
if (IsShort) {
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWri)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::SUBSWri)
.addReg(AArch64::WZR)
.addReg(AArch64::W16)
.addImm(15)
.addImm(0),
*STI);
MCSymbol *HandleMismatchSym = OutContext.createTempSymbol();
- OutStreamer->EmitInstruction(
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::Bcc)
.addImm(AArch64CC::HI)
.addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)),
*STI);
- OutStreamer->EmitInstruction(
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::ANDXri)
.addReg(AArch64::X17)
.addReg(Reg)
@@ -423,59 +432,59 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
*STI);
unsigned Size = 1 << (AccessInfo & 0xf);
if (Size != 1)
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ADDXri)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::ADDXri)
.addReg(AArch64::X17)
.addReg(AArch64::X17)
.addImm(Size - 1)
.addImm(0),
*STI);
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWrs)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::SUBSWrs)
.addReg(AArch64::WZR)
.addReg(AArch64::W16)
.addReg(AArch64::W17)
.addImm(0),
*STI);
- OutStreamer->EmitInstruction(
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::Bcc)
.addImm(AArch64CC::LS)
.addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)),
*STI);
- OutStreamer->EmitInstruction(
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::ORRXri)
.addReg(AArch64::X16)
.addReg(Reg)
.addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)),
*STI);
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBui)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::LDRBBui)
.addReg(AArch64::W16)
.addReg(AArch64::X16)
.addImm(0),
*STI);
- OutStreamer->EmitInstruction(
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::SUBSXrs)
.addReg(AArch64::XZR)
.addReg(AArch64::X16)
.addReg(Reg)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)),
*STI);
- OutStreamer->EmitInstruction(
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::Bcc)
.addImm(AArch64CC::EQ)
.addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)),
*STI);
- OutStreamer->EmitLabel(HandleMismatchSym);
+ OutStreamer->emitLabel(HandleMismatchSym);
}
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::STPXpre)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::STPXpre)
.addReg(AArch64::SP)
.addReg(AArch64::X0)
.addReg(AArch64::X1)
.addReg(AArch64::SP)
.addImm(-32),
*STI);
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::STPXi)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::STPXi)
.addReg(AArch64::FP)
.addReg(AArch64::LR)
.addReg(AArch64::SP)
@@ -483,13 +492,13 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
*STI);
if (Reg != AArch64::X0)
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ORRXrs)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::ORRXrs)
.addReg(AArch64::X0)
.addReg(AArch64::XZR)
.addReg(Reg)
.addImm(0),
*STI);
- OutStreamer->EmitInstruction(MCInstBuilder(AArch64::MOVZXi)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::MOVZXi)
.addReg(AArch64::X1)
.addImm(AccessInfo)
.addImm(0),
@@ -498,14 +507,14 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
// Intentionally load the GOT entry and branch to it, rather than possibly
// late binding the function, which may clobber the registers before we have
// a chance to save them.
- OutStreamer->EmitInstruction(
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::ADRP)
.addReg(AArch64::X16)
.addExpr(AArch64MCExpr::create(
HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_PAGE,
OutContext)),
*STI);
- OutStreamer->EmitInstruction(
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::LDRXui)
.addReg(AArch64::X16)
.addReg(AArch64::X16)
@@ -513,12 +522,12 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_LO12,
OutContext)),
*STI);
- OutStreamer->EmitInstruction(
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI);
}
}
-void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
+void AArch64AsmPrinter::emitEndOfAsmFile(Module &M) {
EmitHwasanMemaccessSymbols(M);
const Triple &TT = TM.getTargetTriple();
@@ -528,7 +537,7 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
// implementation of multiple entry points). If this doesn't occur, the
// linker can safely perform dead code stripping. Since LLVM never
// generates code that does this, it is always safe to set.
- OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
emitStackMaps(SM);
}
@@ -543,12 +552,12 @@ void AArch64AsmPrinter::EmitLOHs() {
"Label hasn't been inserted for LOH related instruction");
MCArgs.push_back(LabelIt->second);
}
- OutStreamer->EmitLOHDirective(D.getKind(), MCArgs);
+ OutStreamer->emitLOHDirective(D.getKind(), MCArgs);
MCArgs.clear();
}
}
-void AArch64AsmPrinter::EmitFunctionBodyEnd() {
+void AArch64AsmPrinter::emitFunctionBodyEnd() {
if (!AArch64FI->getLOHRelated().empty())
EmitLOHs();
}
@@ -740,11 +749,10 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
assert(NOps == 4);
OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
// cast away const; DIetc do not take const operands for some reason.
- OS << cast<DILocalVariable>(MI->getOperand(NOps - 2).getMetadata())
- ->getName();
+ OS << MI->getDebugVariable()->getName();
OS << " <- ";
// Frame address. Currently handles register +- offset only.
- assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ assert(MI->getDebugOperand(0).isReg() && MI->isDebugOffsetImm());
OS << '[';
printOperand(MI, 0, OS);
OS << '+';
@@ -754,7 +762,7 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
printOperand(MI, NOps - 2, OS);
}
-void AArch64AsmPrinter::EmitJumpTableInfo() {
+void AArch64AsmPrinter::emitJumpTableInfo() {
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (!MJTI) return;
@@ -782,8 +790,8 @@ void AArch64AsmPrinter::EmitJumpTableInfo() {
if (JTBBs.empty()) continue;
unsigned Size = AFI->getJumpTableEntrySize(JTI);
- EmitAlignment(Align(Size));
- OutStreamer->EmitLabel(GetJTISymbol(JTI));
+ emitAlignment(Align(Size));
+ OutStreamer->emitLabel(GetJTISymbol(JTI));
for (auto *JTBB : JTBBs)
emitJumpTableEntry(MJTI, JTBB, JTI);
@@ -811,7 +819,7 @@ void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
Value, MCConstantExpr::create(2, OutContext), OutContext);
}
- OutStreamer->EmitValue(Value, Size);
+ OutStreamer->emitValue(Value, Size);
}
/// Small jump tables contain an unsigned byte or half, representing the offset
@@ -867,7 +875,7 @@ void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
auto &Ctx = OutStreamer.getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer.EmitLabel(MILabel);
+ OutStreamer.emitLabel(MILabel);
SM.recordStackMap(*MILabel, MI);
assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
@@ -897,7 +905,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
auto &Ctx = OutStreamer.getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer.EmitLabel(MILabel);
+ OutStreamer.emitLabel(MILabel);
SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers Opers(&MI);
@@ -981,7 +989,7 @@ void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
// instructions) auto-generated.
#include "AArch64GenMCPseudoLowering.inc"
-void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
@@ -991,7 +999,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *LOHLabel = createTempSymbol("loh");
// Associate the instruction with the label
LOHInstToLabel[MI] = LOHLabel;
- OutStreamer->EmitLabel(LOHLabel);
+ OutStreamer->emitLabel(LOHLabel);
}
AArch64TargetStreamer *TS =
@@ -1014,7 +1022,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInstLowering.Lower(MI, Inst);
EmitToStreamer(*OutStreamer, Inst);
CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
- OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym);
+ OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
return;
}
}
@@ -1067,7 +1075,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> TmpStr;
raw_svector_ostream OS(TmpStr);
PrintDebugValueComment(MI, OS);
- OutStreamer->EmitRawText(StringRef(OS.str()));
+ OutStreamer->emitRawText(StringRef(OS.str()));
}
return;
@@ -1080,7 +1088,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (needsCFIMoves() == CFI_M_None)
return;
- OutStreamer->EmitCFIBKeyFrame();
+ OutStreamer->emitCFIBKeyFrame();
return;
}
}
@@ -1106,6 +1114,25 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
+ case AArch64::SpeculationBarrierISBDSBEndBB: {
+ // Print DSB SYS + ISB
+ MCInst TmpInstDSB;
+ TmpInstDSB.setOpcode(AArch64::DSB);
+ TmpInstDSB.addOperand(MCOperand::createImm(0xf));
+ EmitToStreamer(*OutStreamer, TmpInstDSB);
+ MCInst TmpInstISB;
+ TmpInstISB.setOpcode(AArch64::ISB);
+ TmpInstISB.addOperand(MCOperand::createImm(0xf));
+ EmitToStreamer(*OutStreamer, TmpInstISB);
+ return;
+ }
+ case AArch64::SpeculationBarrierSBEndBB: {
+ // Print SB
+ MCInst TmpInstSB;
+ TmpInstSB.setOpcode(AArch64::SB);
+ EmitToStreamer(*OutStreamer, TmpInstSB);
+ return;
+ }
case AArch64::TLSDESC_CALLSEQ: {
/// lower this to:
/// adrp x0, :tlsdesc:var
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
index a0695cef615f..84ec5afcc9c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -38,18 +38,17 @@ static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
- CCState &State, unsigned SlotAlign) {
+ CCState &State, Align SlotAlign) {
unsigned Size = LocVT.getSizeInBits() / 8;
const Align StackAlign =
State.getMachineFunction().getDataLayout().getStackAlignment();
- const Align OrigAlign(ArgFlags.getOrigAlign());
- const Align Align = std::min(OrigAlign, StackAlign);
+ const Align OrigAlign = ArgFlags.getNonZeroOrigAlign();
+ const Align Alignment = std::min(OrigAlign, StackAlign);
for (auto &It : PendingMembers) {
- It.convertToMem(State.AllocateStack(
- Size, std::max((unsigned)Align.value(), SlotAlign)));
+ It.convertToMem(State.AllocateStack(Size, std::max(Alignment, SlotAlign)));
State.addLoc(It);
- SlotAlign = 1;
+ SlotAlign = Align(1);
}
// All pending members have now been allocated
@@ -72,7 +71,7 @@ static bool CC_AArch64_Custom_Stack_Block(
if (!ArgFlags.isInConsecutiveRegsLast())
return true;
- return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8);
+ return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, Align(8));
}
/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
@@ -146,7 +145,7 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
for (auto Reg : RegList)
State.AllocateReg(Reg);
- unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
+ const Align SlotAlign = Subtarget.isTargetDarwin() ? Align(1) : Align(8);
return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index a0b2d7712b66..fdcc890bf589 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -10,9 +10,6 @@
//
//===----------------------------------------------------------------------===//
-/// CCIfAlign - Match of the original alignment of the arg
-class CCIfAlign<string Align, CCAction A> :
- CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
/// CCIfBigEndian - Match only if we're in big endian mode.
class CCIfBigEndian<CCAction A> :
CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;
@@ -33,9 +30,9 @@ def CC_AArch64_AAPCS : CallingConv<[
// Big endian vectors must be passed as if they were 1-element vectors so that
// their lanes are in a consistent order.
- CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
+ CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v4bf16, v8i8],
CCBitConvertToType<f64>>>,
- CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
+ CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
CCBitConvertToType<f128>>>,
// In AAPCS, an SRet is passed in X8, not X0 like a normal pointer parameter.
@@ -75,10 +72,10 @@ def CC_AArch64_AAPCS : CallingConv<[
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
- nxv2f32, nxv4f32, nxv2f64],
+ nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
- nxv2f32, nxv4f32, nxv2f64],
+ nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
CCPassIndirect<i64>>,
CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
@@ -102,22 +99,24 @@ def CC_AArch64_AAPCS : CallingConv<[
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
- CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>,
+ CCIfType<[i1, i8, i16, f16, bf16], CCAssignToStack<8, 8>>,
CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
- CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
+ CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16],
CCAssignToStack<8, 8>>,
- CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToStack<16, 16>>
]>;
@@ -132,9 +131,9 @@ def RetCC_AArch64_AAPCS : CallingConv<[
// Big endian vectors must be passed as if they were 1-element vectors so that
// their lanes are in a consistent order.
- CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
+ CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v4bf16, v8i8],
CCBitConvertToType<f64>>>,
- CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
+ CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
CCBitConvertToType<f128>>>,
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -144,18 +143,20 @@ def RetCC_AArch64_AAPCS : CallingConv<[
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
- nxv2f32, nxv4f32, nxv2f64],
+ nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
@@ -165,7 +166,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
// Vararg functions on windows pass floats in integer registers
let Entry = 1 in
def CC_AArch64_Win64_VarArg : CallingConv<[
- CCIfType<[f16, f32], CCPromoteToType<f64>>,
+ CCIfType<[f16, bf16, f32], CCPromoteToType<f64>>,
CCIfType<[f64], CCBitConvertToType<i64>>,
CCDelegateTo<CC_AArch64_AAPCS>
]>;
@@ -219,19 +220,22 @@ def CC_AArch64_DarwinPCS : CallingConv<[
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
- CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
+ CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16 || ValVT == MVT::bf16",
+ CCAssignToStack<2, 2>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
// Re-demote pointers to 32-bits so we don't end up storing 64-bit
@@ -239,9 +243,9 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
CCIfPtr<CCIfILP32<CCAssignToStack<4, 4>>>,
- CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
+ CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16, v4bf16],
CCAssignToStack<8, 8>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToStack<16, 16>>
]>;
@@ -255,14 +259,14 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
// Handle all scalar types as either i64 or f64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
- CCIfType<[f16, f32], CCPromoteToType<f64>>,
+ CCIfType<[f16, bf16, f32], CCPromoteToType<f64>>,
// Everything is on the stack.
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
- CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
CCAssignToStack<8, 8>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToStack<16, 16>>
]>;
@@ -275,16 +279,16 @@ def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
// Handle all scalar types as either i32 or f32.
CCIfType<[i8, i16], CCPromoteToType<i32>>,
- CCIfType<[f16], CCPromoteToType<f32>>,
+ CCIfType<[f16, bf16], CCPromoteToType<f32>>,
// Everything is on the stack.
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
- CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
CCAssignToStack<8, 8>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToStack<16, 16>>
]>;
@@ -377,11 +381,9 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
-// Darwin puts the frame-record at the top of the callee-save area.
-def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
- X23, X24, X25, X26, X27, X28,
- D8, D9, D10, D11,
- D12, D13, D14, D15)>;
+// A variant for treating X18 as callee saved, when interfacing with
+// code that needs X18 to be preserved.
+def CSR_AArch64_AAPCS_X18 : CalleeSavedRegs<(add X18, CSR_AArch64_AAPCS)>;
// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
@@ -421,33 +423,7 @@ def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23),
def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
def CSR_AArch64_AAPCS_SwiftError
- : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>;
-
-// The function used by Darwin to obtain the address of a thread-local variable
-// guarantees more than a normal AAPCS function. x16 and x17 are used on the
-// fast path for calculation, but other registers except X0 (argument/return)
-// and LR (it is a call, after all) are preserved.
-def CSR_AArch64_TLS_Darwin
- : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
- FP,
- (sequence "Q%u", 0, 31))>;
-
-// We can only handle a register pair with adjacent registers, the register pair
-// should belong to the same class as well. Since the access function on the
-// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
-// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
-def CSR_AArch64_CXX_TLS_Darwin
- : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS,
- (sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
- (sequence "D%u", 0, 31))>;
-
-// CSRs that are handled by prologue, epilogue.
-def CSR_AArch64_CXX_TLS_Darwin_PE
- : CalleeSavedRegs<(add LR, FP)>;
-
-// CSRs that are handled explicitly via copies.
-def CSR_AArch64_CXX_TLS_Darwin_ViaCopy
- : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>;
+ : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
// The ELF stub used for TLS-descriptor access saves every feasible
// register. Only X0 and LR are clobbered.
@@ -472,14 +448,57 @@ def CSR_AArch64_StackProbe_Windows
(sequence "X%u", 18, 28), FP, SP,
(sequence "Q%u", 0, 31))>;
+// Darwin variants of AAPCS.
+// Darwin puts the frame-record at the top of the callee-save area.
+def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+ X23, X24, X25, X26, X27, X28,
+ D8, D9, D10, D11,
+ D12, D13, D14, D15)>;
+
+def CSR_Darwin_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21,
+ X22, X23, X24, X25, X26, X27,
+ X28, (sequence "Q%u", 8, 23))>;
+def CSR_Darwin_AArch64_AAPCS_ThisReturn
+ : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, X0)>;
+
+def CSR_Darwin_AArch64_AAPCS_SwiftError
+ : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>;
+
+// The function used by Darwin to obtain the address of a thread-local variable
+// guarantees more than a normal AAPCS function. x16 and x17 are used on the
+// fast path for calculation, but other registers except X0 (argument/return)
+// and LR (it is a call, after all) are preserved.
+def CSR_Darwin_AArch64_TLS
+ : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
+ FP,
+ (sequence "Q%u", 0, 31))>;
+
+// We can only handle a register pair with adjacent registers, the register pair
+// should belong to the same class as well. Since the access function on the
+// fast path calls a function that follows CSR_Darwin_AArch64_TLS,
+// CSR_Darwin_AArch64_CXX_TLS should be a subset of CSR_Darwin_AArch64_TLS.
+def CSR_Darwin_AArch64_CXX_TLS
+ : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS,
+ (sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
+ (sequence "D%u", 0, 31))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_Darwin_AArch64_CXX_TLS_PE
+ : CalleeSavedRegs<(add LR, FP)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_Darwin_AArch64_CXX_TLS_ViaCopy
+ : CalleeSavedRegs<(sub CSR_Darwin_AArch64_CXX_TLS, LR, FP)>;
+
+def CSR_Darwin_AArch64_RT_MostRegs
+ : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sequence "X%u", 9, 15))>;
+
// Variants of the standard calling conventions for shadow call stack.
// These all preserve x18 in addition to any other registers.
def CSR_AArch64_NoRegs_SCS
: CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>;
def CSR_AArch64_AllRegs_SCS
: CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>;
-def CSR_AArch64_CXX_TLS_Darwin_SCS
- : CalleeSavedRegs<(add CSR_AArch64_CXX_TLS_Darwin, X18)>;
def CSR_AArch64_AAPCS_SwiftError_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
def CSR_AArch64_RT_MostRegs_SCS
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 688bd1b28e85..3f244ba10102 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -105,6 +105,10 @@ struct LDTLSCleanup : public MachineFunctionPass {
TII->get(TargetOpcode::COPY), AArch64::X0)
.addReg(TLSBaseAddrReg);
+ // Update the call site info.
+ if (I.shouldUpdateCallSiteInfo())
+ I.getMF()->eraseCallSiteInfo(&I);
+
// Erase the TLS_base_addr instruction.
I.eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index 35e6fef24363..efdb1131abc9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -382,7 +382,7 @@ static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,
/// Update state when seeing and ADRP instruction.
static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI,
- LOHInfo &Info) {
+ LOHInfo &Info, LOHInfo *LOHInfos) {
if (Info.LastADRP != nullptr) {
LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n"
<< '\t' << MI << '\t' << *Info.LastADRP);
@@ -393,12 +393,24 @@ static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI,
// Produce LOH directive if possible.
if (Info.IsCandidate) {
switch (Info.Type) {
- case MCLOH_AdrpAdd:
+ case MCLOH_AdrpAdd: {
+ // ADRPs and ADDs for this candidate may be split apart if using
+ // GlobalISel instead of pseudo-expanded. If that happens, the
+ // def register of the ADD may have a use in between. Adding an LOH in
+ // this case can cause the linker to rewrite the ADRP to write to that
+ // register, clobbering the use.
+ const MachineInstr *AddMI = Info.MI0;
+ int DefIdx = mapRegToGPRIndex(MI.getOperand(0).getReg());
+ int OpIdx = mapRegToGPRIndex(AddMI->getOperand(0).getReg());
+ LOHInfo DefInfo = LOHInfos[OpIdx];
+ if (DefIdx != OpIdx && (DefInfo.OneUser || DefInfo.MultiUsers))
+ break;
LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n"
<< '\t' << MI << '\t' << *Info.MI0);
AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0});
++NumADRSimpleCandidate;
break;
+ }
case MCLOH_AdrpLdr:
if (supportLoadFromLiteral(*Info.MI0)) {
LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n"
@@ -522,7 +534,8 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
// Walk the basic block backwards and update the per register state machine
// in the process.
- for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+ for (const MachineInstr &MI :
+ instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
case AArch64::ADDXri:
@@ -544,7 +557,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand &Op0 = MI.getOperand(0);
int Idx = mapRegToGPRIndex(Op0.getReg());
if (Idx >= 0) {
- handleADRP(MI, AFI, LOHInfos[Idx]);
+ handleADRP(MI, AFI, LOHInfos[Idx], LOHInfos);
continue;
}
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
index bb99f2516ecf..aa41cae289e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -11,8 +11,74 @@
include "llvm/Target/GlobalISel/Combine.td"
+def fconstant_to_constant : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_FCONSTANT):$root,
+ [{ return matchFConstantToConstant(*${root}, MRI); }]),
+ (apply [{ applyFConstantToConstant(*${root}); }])>;
+
def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
"AArch64GenPreLegalizerCombinerHelper", [all_combines,
- elide_br_by_inverting_cond]> {
+ elide_br_by_inverting_cond,
+ fconstant_to_constant]> {
let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
+ let StateClass = "AArch64PreLegalizerCombinerHelperState";
+ let AdditionalArguments = [];
+}
+
+// Matchdata for combines which replace a G_SHUFFLE_VECTOR with a
+// target-specific opcode.
+def shuffle_matchdata : GIDefMatchData<"ShuffleVectorPseudo">;
+
+def rev : GICombineRule<
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return matchREV(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def zip : GICombineRule<
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return matchZip(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def uzp : GICombineRule<
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return matchUZP(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def dup: GICombineRule <
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return matchDup(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def trn : GICombineRule<
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return matchTRN(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+def ext: GICombineRule <
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return matchEXT(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyEXT(*${root}, ${matchinfo}); }])
+>;
+
+// Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo
+// instruction.
+def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>;
+
+def AArch64PostLegalizerCombinerHelper
+ : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
+ [erase_undef_store, combines_for_extload,
+ sext_already_extended, shuffle_vector_pseudos]> {
+ let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
index 259238705965..57dc8a4061f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -79,7 +79,7 @@ void AArch64CompressJumpTables::scanFunction() {
for (MachineBasicBlock &MBB : *MF) {
const Align Alignment = MBB.getAlignment();
unsigned AlignedOffset;
- if (Alignment == Align::None())
+ if (Alignment == Align(1))
AlignedOffset = Offset;
else
AlignedOffset = alignTo(Offset, Alignment);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
index 25e23e4623de..e90e8e3da057 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -194,12 +194,8 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
// There must not be any instruction between DefMI and MI that clobbers or
// reads NZCV.
- MachineBasicBlock::iterator I(DefMI), E(MI);
- for (I = std::next(I); I != E; ++I) {
- if (I->modifiesRegister(AArch64::NZCV, TRI) ||
- I->readsRegister(AArch64::NZCV, TRI))
- return false;
- }
+ if (isNZCVTouchedInInstructionRange(DefMI, MI, TRI))
+ return false;
LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
LLVM_DEBUG(DefMI.print(dbgs()));
LLVM_DEBUG(dbgs() << " ");
@@ -253,12 +249,8 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
return false;
// There must not be any instruction between DefMI and MI that clobbers or
// reads NZCV.
- MachineBasicBlock::iterator I(DefMI), E(MI);
- for (I = std::next(I); I != E; ++I) {
- if (I->modifiesRegister(AArch64::NZCV, TRI) ||
- I->readsRegister(AArch64::NZCV, TRI))
- return false;
- }
+ if (isNZCVTouchedInInstructionRange(DefMI, MI, TRI))
+ return false;
LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
LLVM_DEBUG(DefMI.print(dbgs()));
LLVM_DEBUG(dbgs() << " ");
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 51b2ce029701..64f0bb63762d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -145,11 +145,11 @@ void AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
// instructions.
MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
MachineBasicBlock *MBB) {
- MachineBasicBlock::iterator I = MBB->getFirstTerminator();
- if (I == MBB->end())
+ MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
+ if (Term == MBB->end())
return nullptr;
- if (I->getOpcode() != AArch64::Bcc)
+ if (Term->getOpcode() != AArch64::Bcc)
return nullptr;
// Since we may modify cmp of this MBB, make sure NZCV does not live out.
@@ -158,32 +158,33 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
return nullptr;
// Now find the instruction controlling the terminator.
- for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
- --I;
- assert(!I->isTerminator() && "Spurious terminator");
+ for (MachineBasicBlock::iterator B = MBB->begin(), It = Term; It != B;) {
+ It = prev_nodbg(It, B);
+ MachineInstr &I = *It;
+ assert(!I.isTerminator() && "Spurious terminator");
// Check if there is any use of NZCV between CMP and Bcc.
- if (I->readsRegister(AArch64::NZCV))
+ if (I.readsRegister(AArch64::NZCV))
return nullptr;
- switch (I->getOpcode()) {
+ switch (I.getOpcode()) {
// cmp is an alias for subs with a dead destination register.
case AArch64::SUBSWri:
case AArch64::SUBSXri:
// cmn is an alias for adds with a dead destination register.
case AArch64::ADDSWri:
case AArch64::ADDSXri: {
- unsigned ShiftAmt = AArch64_AM::getShiftValue(I->getOperand(3).getImm());
- if (!I->getOperand(2).isImm()) {
- LLVM_DEBUG(dbgs() << "Immediate of cmp is symbolic, " << *I << '\n');
+ unsigned ShiftAmt = AArch64_AM::getShiftValue(I.getOperand(3).getImm());
+ if (!I.getOperand(2).isImm()) {
+ LLVM_DEBUG(dbgs() << "Immediate of cmp is symbolic, " << I << '\n');
return nullptr;
- } else if (I->getOperand(2).getImm() << ShiftAmt >= 0xfff) {
- LLVM_DEBUG(dbgs() << "Immediate of cmp may be out of range, " << *I
+ } else if (I.getOperand(2).getImm() << ShiftAmt >= 0xfff) {
+ LLVM_DEBUG(dbgs() << "Immediate of cmp may be out of range, " << I
<< '\n');
return nullptr;
- } else if (!MRI->use_empty(I->getOperand(0).getReg())) {
- LLVM_DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
+ } else if (!MRI->use_nodbg_empty(I.getOperand(0).getReg())) {
+ LLVM_DEBUG(dbgs() << "Destination of cmp is not dead, " << I << '\n');
return nullptr;
}
- return &*I;
+ return &I;
}
// Prevent false positive case like:
// cmp w19, #0
@@ -294,12 +295,10 @@ void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
.add(BrMI.getOperand(1));
BrMI.eraseFromParent();
- MBB->updateTerminator();
-
++NumConditionsAdjusted;
}
-// Parse a condition code returned by AnalyzeBranch, and compute the CondCode
+// Parse a condition code returned by analyzeBranch, and compute the CondCode
// corresponding to TBB.
// Returns true if parsing was successful, otherwise false is returned.
static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 054ef8f482ca..82e8df3b73f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -157,7 +157,7 @@ public:
MachineInstr *CmpMI;
private:
- /// The branch condition in Head as determined by AnalyzeBranch.
+ /// The branch condition in Head as determined by analyzeBranch.
SmallVector<MachineOperand, 4> HeadCond;
/// The condition code that makes Head branch to CmpBB.
@@ -267,7 +267,7 @@ bool SSACCmpConv::isDeadDef(unsigned DstReg) {
return MRI->use_nodbg_empty(DstReg);
}
-// Parse a condition code returned by AnalyzeBranch, and compute the CondCode
+// Parse a condition code returned by analyzeBranch, and compute the CondCode
// corresponding to TBB.
// Return
static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
@@ -317,7 +317,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
// Now find the instruction controlling the terminator.
for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
- --I;
+ I = prev_nodbg(I, MBB->begin());
assert(!I->isTerminator() && "Spurious terminator");
switch (I->getOpcode()) {
// cmp is an alias for subs with a dead destination register.
@@ -509,7 +509,7 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
// landing pad.
if (!TBB || HeadCond.empty()) {
LLVM_DEBUG(
- dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n");
+ dbgs() << "analyzeBranch didn't find conditional branch in Head.\n");
++NumHeadBranchRejs;
return false;
}
@@ -536,7 +536,7 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
if (!TBB || CmpBBCond.empty()) {
LLVM_DEBUG(
- dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n");
+ dbgs() << "analyzeBranch didn't find conditional branch in CmpBB.\n");
++NumCmpBranchRejs;
return false;
}
@@ -710,7 +710,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
.add(CmpMI->getOperand(1)); // Branch target.
}
CmpMI->eraseFromParent();
- Head->updateTerminator();
+ Head->updateTerminator(CmpBB->getNextNode());
RemovedBlocks.push_back(CmpBB);
CmpBB->eraseFromParent();
@@ -828,7 +828,7 @@ void AArch64ConditionalCompares::updateDomTree(
assert(Node != HeadNode && "Cannot erase the head node");
assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
while (Node->getNumChildren())
- DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
+ DomTree->changeImmediateDominator(Node->back(), HeadNode);
DomTree->eraseNode(RemovedMBB);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 3b8f8a19fe49..9e65ad2e18f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -68,6 +68,8 @@ private:
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned BitSize);
+ bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
unsigned ExtendImm, unsigned ZeroReg,
@@ -78,6 +80,9 @@ private:
bool expandSetTagLoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
+ bool expandSVESpillFill(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, unsigned Opc,
+ unsigned N);
};
} // end anonymous namespace
@@ -344,27 +349,225 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
return true;
}
+/// \brief Expand Pseudos to Instructions with destructive operands.
+///
+/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
+/// or for fixing relaxed register allocation conditions to comply with
+/// the instructions register constraints. The latter case may be cheaper
+/// than setting the register constraints in the register allocator,
+/// since that will insert regular MOV instructions rather than MOVPRFX.
+///
+/// Example (after register allocation):
+///
+/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
+///
+/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
+/// * We cannot map directly to FSUB_ZPmZ_B because the register
+/// constraints of the instruction are not met.
+/// * Also the _ZERO specifies the false lanes need to be zeroed.
+///
+/// We first try to see if the destructive operand == result operand,
+/// if not, we try to swap the operands, e.g.
+///
+/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
+///
+/// But because FSUB_ZPmZ is not commutative, this is semantically
+/// different, so we need a reverse instruction:
+///
+/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
+///
+/// Then we implement the zeroing of the false lanes of Z0 by adding
+/// a zeroing MOVPRFX instruction:
+///
+/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
+/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
+///
+/// Note that this can only be done for _ZERO or _UNDEF variants where
+/// we can guarantee the false lanes to be zeroed (by implementing this)
+/// or that they are undef (don't care / not used), otherwise the
+/// swapping of operands is illegal because the operation is not
+/// (or cannot be emulated to be) fully commutative.
+bool AArch64ExpandPseudo::expand_DestructiveOp(
+ MachineInstr &MI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
+ uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
+ uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
+ bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+
+ if (DType == AArch64::DestructiveBinary)
+ assert(DstReg != MI.getOperand(3).getReg());
+
+ bool UseRev = false;
+ unsigned PredIdx, DOPIdx, SrcIdx;
+ switch (DType) {
+ case AArch64::DestructiveBinaryComm:
+ case AArch64::DestructiveBinaryCommWithRev:
+ if (DstReg == MI.getOperand(3).getReg()) {
+ // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
+ std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
+ UseRev = true;
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case AArch64::DestructiveBinary:
+ case AArch64::DestructiveBinaryImm:
+ std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
+ break;
+ default:
+ llvm_unreachable("Unsupported Destructive Operand type");
+ }
+
+#ifndef NDEBUG
+ // MOVPRFX can only be used if the destination operand
+ // is the destructive operand, not as any other operand,
+ // so the Destructive Operand must be unique.
+ bool DOPRegIsUnique = false;
+ switch (DType) {
+ case AArch64::DestructiveBinaryComm:
+ case AArch64::DestructiveBinaryCommWithRev:
+ DOPRegIsUnique =
+ DstReg != MI.getOperand(DOPIdx).getReg() ||
+ MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
+ break;
+ case AArch64::DestructiveBinaryImm:
+ DOPRegIsUnique = true;
+ break;
+ }
+#endif
+
+ // Resolve the reverse opcode
+ if (UseRev) {
+ int NewOpcode;
+ // e.g. DIV -> DIVR
+ if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
+ Opcode = NewOpcode;
+ // e.g. DIVR -> DIV
+ else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
+ Opcode = NewOpcode;
+ }
+
+ // Get the right MOVPRFX
+ uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
+ unsigned MovPrfx, MovPrfxZero;
+ switch (ElementSize) {
+ case AArch64::ElementSizeNone:
+ case AArch64::ElementSizeB:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
+ break;
+ case AArch64::ElementSizeH:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
+ break;
+ case AArch64::ElementSizeS:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
+ break;
+ case AArch64::ElementSizeD:
+ MovPrfx = AArch64::MOVPRFX_ZZ;
+ MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
+ break;
+ default:
+ llvm_unreachable("Unsupported ElementSize");
+ }
+
+ //
+ // Create the destructive operation (if required)
+ //
+ MachineInstrBuilder PRFX, DOP;
+ if (FalseZero) {
+#ifndef NDEBUG
+ assert(DOPRegIsUnique && "The destructive operand should be unique");
+#endif
+ assert(ElementSize != AArch64::ElementSizeNone &&
+ "This instruction is unpredicated");
+
+ // Merge source operand into destination register
+ PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
+ .addReg(DstReg, RegState::Define)
+ .addReg(MI.getOperand(PredIdx).getReg())
+ .addReg(MI.getOperand(DOPIdx).getReg());
+
+ // After the movprfx, the destructive operand is same as Dst
+ DOPIdx = 0;
+ } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
+#ifndef NDEBUG
+ assert(DOPRegIsUnique && "The destructive operand should be unique");
+#endif
+ PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
+ .addReg(DstReg, RegState::Define)
+ .addReg(MI.getOperand(DOPIdx).getReg());
+ DOPIdx = 0;
+ }
+
+ //
+ // Create the destructive operation
+ //
+ DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
+
+ switch (DType) {
+ case AArch64::DestructiveBinaryImm:
+ case AArch64::DestructiveBinaryComm:
+ case AArch64::DestructiveBinaryCommWithRev:
+ DOP.add(MI.getOperand(PredIdx))
+ .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
+ .add(MI.getOperand(SrcIdx));
+ break;
+ }
+
+ if (PRFX) {
+ finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
+ transferImpOps(MI, PRFX, DOP);
+ } else
+ transferImpOps(MI, DOP, DOP);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64ExpandPseudo::expandSetTagLoop(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
- Register SizeReg = MI.getOperand(2).getReg();
- Register AddressReg = MI.getOperand(3).getReg();
+ Register SizeReg = MI.getOperand(0).getReg();
+ Register AddressReg = MI.getOperand(1).getReg();
MachineFunction *MF = MBB.getParent();
- bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
- const unsigned OpCode =
+ bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
+ const unsigned OpCode1 =
+ ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
+ const unsigned OpCode2 =
ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
+ unsigned Size = MI.getOperand(2).getImm();
+ assert(Size > 0 && Size % 16 == 0);
+ if (Size % (16 * 2) != 0) {
+ BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
+ .addReg(AddressReg)
+ .addReg(AddressReg)
+ .addImm(1);
+ Size -= 16;
+ }
+ MachineBasicBlock::iterator I =
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
+ .addImm(Size);
+ expandMOVImm(MBB, I, 64);
+
auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
MF->insert(++MBB.getIterator(), LoopBB);
MF->insert(++LoopBB->getIterator(), DoneBB);
- BuildMI(LoopBB, DL, TII->get(OpCode))
+ BuildMI(LoopBB, DL, TII->get(OpCode2))
.addDef(AddressReg)
.addReg(AddressReg)
.addReg(AddressReg)
@@ -402,6 +605,28 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
return true;
}
+bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned Opc, unsigned N) {
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
+ MachineInstr &MI = *MBBI;
+ for (unsigned Offset = 0; Offset < N; ++Offset) {
+ int ImmOffset = MI.getOperand(2).getImm() + Offset;
+ bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
+ assert(ImmOffset >= -256 && ImmOffset < 256 &&
+ "Immediate spill offset out of range");
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
+ .addReg(
+ TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
+ Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
+ .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
+ .addImm(ImmOffset);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -409,10 +634,76 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
+
+ // Check if we can expand the destructive op
+ int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
+ if (OrigInstr != -1) {
+ auto &Orig = TII->get(OrigInstr);
+ if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
+ != AArch64::NotDestructive) {
+ return expand_DestructiveOp(MI, MBB, MBBI);
+ }
+ }
+
switch (Opcode) {
default:
break;
+ case AArch64::BSPv8i8:
+ case AArch64::BSPv16i8: {
+ Register DstReg = MI.getOperand(0).getReg();
+ if (DstReg == MI.getOperand(3).getReg()) {
+ // Expand to BIT
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
+ : AArch64::BITv16i8))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(1));
+ } else if (DstReg == MI.getOperand(2).getReg()) {
+ // Expand to BIF
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
+ : AArch64::BIFv16i8))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(1));
+ } else {
+ // Expand to BSL, use additional move if required
+ if (DstReg == MI.getOperand(1).getReg()) {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
+ : AArch64::BSLv16i8))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ } else {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
+ : AArch64::ORRv16i8))
+ .addReg(DstReg,
+ RegState::Define |
+ getRenamableRegState(MI.getOperand(0).isRenamable()))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(1));
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
+ : AArch64::BSLv16i8))
+ .add(MI.getOperand(0))
+ .addReg(DstReg,
+ RegState::Kill |
+ getRenamableRegState(MI.getOperand(0).isRenamable()))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ }
+ }
+ MI.eraseFromParent();
+ return true;
+ }
+
case AArch64::ADDWrr:
case AArch64::SUBWrr:
case AArch64::ADDXrr:
@@ -599,10 +890,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
Register DstReg = MI.getOperand(0).getReg();
auto SysReg = AArch64SysReg::TPIDR_EL0;
MachineFunction *MF = MBB.getParent();
- if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
- MF->getTarget().getCodeModel() == CodeModel::Kernel)
- SysReg = AArch64SysReg::TPIDR_EL1;
- else if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
+ if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
SysReg = AArch64SysReg::TPIDR_EL3;
else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
SysReg = AArch64SysReg::TPIDR_EL2;
@@ -676,7 +964,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
// almost always point to SP-after-prologue; if not, emit a longer
// instruction sequence.
int BaseOffset = -AFI->getTaggedBasePointerOffset();
- unsigned FrameReg;
+ Register FrameReg;
StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
/*PreferFP=*/false,
@@ -706,9 +994,26 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
+ case AArch64::STGloop_wback:
+ case AArch64::STZGloop_wback:
+ return expandSetTagLoop(MBB, MBBI, NextMBBI);
case AArch64::STGloop:
case AArch64::STZGloop:
- return expandSetTagLoop(MBB, MBBI, NextMBBI);
+ report_fatal_error(
+ "Non-writeback variants of STGloop / STZGloop should not "
+ "survive past PrologEpilogInserter.");
+ case AArch64::STR_ZZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
+ case AArch64::STR_ZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
+ case AArch64::STR_ZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
+ case AArch64::LDR_ZZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
+ case AArch64::LDR_ZZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
+ case AArch64::LDR_ZZXI:
+ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
}
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index c1fc183b04f6..538863ebe95a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -823,9 +823,6 @@ bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
TRI = ST.getRegisterInfo();
- assert(TRI->trackLivenessAfterRegAlloc(Fn) &&
- "Register liveness not available!");
-
MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
Modified = false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 7e9c68f2bb30..0f63f4ca62e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -434,11 +434,9 @@ unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
// Materialize via constant pool. MachineConstantPool wants an explicit
// alignment.
- unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
- if (Align == 0)
- Align = DL.getTypeAllocSize(CFP->getType());
+ Align Alignment = DL.getPrefTypeAlign(CFP->getType());
- unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
@@ -1130,7 +1128,7 @@ void AArch64FastISel::addLoadStoreOperands(Address &Addr,
// and alignment should be based on the VT.
MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
// Now add the rest of the operands.
MIB.addFrameIndex(FI).addImm(Offset);
} else {
@@ -3137,7 +3135,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
Addr.setReg(AArch64::SP);
Addr.setOffset(VA.getLocMemOffset() + BEAlign);
- unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
+ Align Alignment = DL.getABITypeAlign(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
@@ -3272,7 +3270,8 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Issue the call.
MachineInstrBuilder MIB;
if (Subtarget->useSmallAddressing()) {
- const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
+ const MCInstrDesc &II =
+ TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
if (Symbol)
MIB.addSym(Symbol, 0);
@@ -3305,7 +3304,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (!CallReg)
return false;
- const MCInstrDesc &II = TII.get(AArch64::BLR);
+ const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
CallReg = constrainOperandRegClass(II, CallReg, 0);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 651ad9ad4c83..efa3fd5ca9ce 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -170,8 +170,45 @@ static cl::opt<bool>
cl::desc("reverse the CSR restore sequence"),
cl::init(false), cl::Hidden);
+static cl::opt<bool> StackTaggingMergeSetTag(
+ "stack-tagging-merge-settag",
+ cl::desc("merge settag instruction in function epilog"), cl::init(true),
+ cl::Hidden);
+
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
+/// Returns the argument pop size.
+static uint64_t getArgumentPopSize(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ bool IsTailCallReturn = false;
+ if (MBB.end() != MBBI) {
+ unsigned RetOpcode = MBBI->getOpcode();
+ IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
+ RetOpcode == AArch64::TCRETURNri ||
+ RetOpcode == AArch64::TCRETURNriBTI;
+ }
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ uint64_t ArgumentPopSize = 0;
+ if (IsTailCallReturn) {
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+
+ // For a tail-call in a callee-pops-arguments environment, some or all of
+ // the stack may actually be in use for the call's arguments, this is
+ // calculated during LowerCall and consumed here...
+ ArgumentPopSize = StackAdjust.getImm();
+ } else {
+ // ... otherwise the amount to pop is *all* of the argument space,
+ // conveniently stored in the MachineFunctionInfo by
+ // LowerFormalArguments. This will, of course, be zero for the C calling
+ // convention.
+ ArgumentPopSize = AFI->getArgumentStackToRestore();
+ }
+
+ return ArgumentPopSize;
+}
+
/// This is the biggest offset to the stack pointer we can encode in aarch64
/// instructions (without using a separate calculation and a temp register).
/// Note that the exception here are vector stores/loads which cannot encode any
@@ -304,10 +341,8 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
if (!hasReservedCallFrame(MF)) {
- unsigned Align = getStackAlignment();
-
int64_t Amount = I->getOperand(0).getImm();
- Amount = alignTo(Amount, Align);
+ Amount = alignTo(Amount, getStackAlign());
if (!IsDestroy)
Amount = -Amount;
@@ -498,6 +533,39 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
return true;
}
+bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
+ MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
+ if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
+ return false;
+
+ if (MBB.empty())
+ return true;
+
+ // Disable combined SP bump if the last instruction is an MTE tag store. It
+ // is almost always better to merge SP adjustment into those instructions.
+ MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
+ MachineBasicBlock::iterator Begin = MBB.begin();
+ while (LastI != Begin) {
+ --LastI;
+ if (LastI->isTransient())
+ continue;
+ if (!LastI->getFlag(MachineInstr::FrameDestroy))
+ break;
+ }
+ switch (LastI->getOpcode()) {
+ case AArch64::STGloop:
+ case AArch64::STZGloop:
+ case AArch64::STGOffset:
+ case AArch64::STZGOffset:
+ case AArch64::ST2GOffset:
+ case AArch64::STZ2GOffset:
+ return false;
+ default:
+ return true;
+ }
+ llvm_unreachable("unreachable");
+}
+
// Given a load or a store instruction, generate an appropriate unwinding SEH
// code on Windows.
static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
@@ -958,11 +1026,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
// Encode the stack size of the leaf function.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
@@ -1090,7 +1158,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
+ BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
.addReg(AArch64::X16, RegState::Kill)
.addReg(AArch64::X15, RegState::Implicit | RegState::Define)
.addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
@@ -1172,8 +1240,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
false, NeedsWinCFI, &HasWinCFI);
if (NeedsRealignment) {
- const unsigned Alignment = MFI.getMaxAlignment();
- const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+ const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
assert(NrBitsToZero > 1);
assert(scratchSPReg != AArch64::SP);
@@ -1314,15 +1381,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (HasFP) {
// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
- nullptr, Reg, StackGrowth - FixedObject));
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - StackGrowth));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
} else {
// Encode the stack size of the leaf function.
unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -1381,7 +1448,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
- bool IsTailCallReturn = false;
bool NeedsWinCFI = needsWinCFI(MF);
bool HasWinCFI = false;
bool IsFunclet = false;
@@ -1392,10 +1458,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (MBB.end() != MBBI) {
DL = MBBI->getDebugLoc();
- unsigned RetOpcode = MBBI->getOpcode();
- IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
- RetOpcode == AArch64::TCRETURNri ||
- RetOpcode == AArch64::TCRETURNriBTI;
IsFunclet = isFuncletReturnInstr(*MBBI);
}
@@ -1410,21 +1472,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// Initial and residual are named for consistency with the prologue. Note that
// in the epilogue, the residual adjustment is executed first.
- uint64_t ArgumentPopSize = 0;
- if (IsTailCallReturn) {
- MachineOperand &StackAdjust = MBBI->getOperand(1);
-
- // For a tail-call in a callee-pops-arguments environment, some or all of
- // the stack may actually be in use for the call's arguments, this is
- // calculated during LowerCall and consumed here...
- ArgumentPopSize = StackAdjust.getImm();
- } else {
- // ... otherwise the amount to pop is *all* of the argument space,
- // conveniently stored in the MachineFunctionInfo by
- // LowerFormalArguments. This will, of course, be zero for the C calling
- // convention.
- ArgumentPopSize = AFI->getArgumentStackToRestore();
- }
+ uint64_t ArgumentPopSize = getArgumentPopSize(MF, MBB);
// The stack frame should be like below,
//
@@ -1467,7 +1515,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// function.
if (MF.hasEHFunclets())
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
- bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
+ bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.
if (!CombineSPBump && PrologueSaveSize != 0) {
@@ -1664,7 +1712,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
/// SP-relative and simple call frames aren't used.
int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
- unsigned &FrameReg) const {
+ Register &FrameReg) const {
return resolveFrameIndexReference(
MF, FI, FrameReg,
/*PreferFP=*/
@@ -1707,7 +1755,7 @@ int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
}
StackOffset AArch64FrameLowering::resolveFrameIndexReference(
- const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP,
+ const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP,
bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
@@ -1719,7 +1767,7 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
- unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
+ Register &FrameReg, bool PreferFP, bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
@@ -1770,10 +1818,8 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
bool CanUseBP = RegInfo->hasBasePointer(MF);
if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
UseFP = PreferFP;
- else if (!CanUseBP) { // Can't use BP. Forced to use FP.
- assert(!SVEStackSize && "Expected BP to be available");
+ else if (!CanUseBP) // Can't use BP. Forced to use FP.
UseFP = true;
- }
// else we can use BP and FP, but the offset from FP won't fit.
// That will make us scavenge registers which we can probably avoid by
// using BP. If it won't fit for BP either, we'll scavenge anyway.
@@ -1939,7 +1985,7 @@ struct RegPairInfo {
} // end anonymous namespace
static void computeCalleeSaveRegisterPairs(
- MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
+ MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) {
@@ -2064,8 +2110,8 @@ static void computeCalleeSaveRegisterPairs(
FixupDone = true;
ByteOffset -= 8;
assert(ByteOffset % 16 == 0);
- assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
- MFI.setObjectAlignment(RPI.FrameIdx, 16);
+ assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
+ MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
}
int Offset = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
@@ -2084,8 +2130,7 @@ static void computeCalleeSaveRegisterPairs(
bool AArch64FrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
bool NeedsWinCFI = needsWinCFI(MF);
@@ -2148,32 +2193,33 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// Rationale: This sequence saves uop updates compared to a sequence of
// pre-increment spills like stp xi,xj,[sp,#-16]!
// Note: Similar rationale and sequence for restores in epilog.
- unsigned Size, Align;
+ unsigned Size;
+ Align Alignment;
switch (RPI.Type) {
case RegPairInfo::GPR:
StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
Size = 8;
- Align = 8;
+ Alignment = Align(8);
break;
case RegPairInfo::FPR64:
StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
Size = 8;
- Align = 8;
+ Alignment = Align(8);
break;
case RegPairInfo::FPR128:
StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
Size = 16;
- Align = 16;
+ Alignment = Align(16);
break;
case RegPairInfo::ZPR:
StrOpc = AArch64::STR_ZXI;
Size = 16;
- Align = 16;
+ Alignment = Align(16);
break;
case RegPairInfo::PPR:
StrOpc = AArch64::STR_PXI;
Size = 2;
- Align = 2;
+ Alignment = Align(2);
break;
}
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
@@ -2202,7 +2248,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
- MachineMemOperand::MOStore, Size, Align));
+ MachineMemOperand::MOStore, Size, Alignment));
}
MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
@@ -2210,8 +2256,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// where factor*scale is implicit
.setMIFlag(MachineInstr::FrameSetup);
MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
- MachineMemOperand::MOStore, Size, Align));
+ MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
+ MachineMemOperand::MOStore, Size, Alignment));
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameSetup);
@@ -2226,8 +2272,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
bool AArch64FrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
@@ -2254,32 +2299,33 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
// ldp x22, x21, [sp, #0] // addImm(+0)
// Note: see comment in spillCalleeSavedRegisters()
unsigned LdrOpc;
- unsigned Size, Align;
+ unsigned Size;
+ Align Alignment;
switch (RPI.Type) {
case RegPairInfo::GPR:
LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
Size = 8;
- Align = 8;
+ Alignment = Align(8);
break;
case RegPairInfo::FPR64:
LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
Size = 8;
- Align = 8;
+ Alignment = Align(8);
break;
case RegPairInfo::FPR128:
LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
Size = 16;
- Align = 16;
+ Alignment = Align(16);
break;
case RegPairInfo::ZPR:
LdrOpc = AArch64::LDR_ZXI;
Size = 16;
- Align = 16;
+ Alignment = Align(16);
break;
case RegPairInfo::PPR:
LdrOpc = AArch64::LDR_PXI;
Size = 2;
- Align = 2;
+ Alignment = Align(2);
break;
}
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
@@ -2302,7 +2348,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
MIB.addReg(Reg2, getDefRegState(true));
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
- MachineMemOperand::MOLoad, Size, Align));
+ MachineMemOperand::MOLoad, Size, Alignment));
}
MIB.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
@@ -2311,7 +2357,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
.setMIFlag(MachineInstr::FrameDestroy);
MIB.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
- MachineMemOperand::MOLoad, Size, Align));
+ MachineMemOperand::MOLoad, Size, Alignment));
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
};
@@ -2354,6 +2400,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
@@ -2402,6 +2449,16 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&
+ !Subtarget.isTargetWindows()) {
+ // For Windows calling convention on a non-windows OS, where X18 is treated
+ // as reserved, back up X18 when entering non-windows code (marked with the
+ // Windows calling convention) and restore when returning regardless of
+ // whether the individual function uses it - it might call other functions
+ // that clobber it.
+ SavedRegs.set(AArch64::X18);
+ }
+
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
unsigned SVECSStackSize = 0;
@@ -2473,8 +2530,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass &RC = AArch64::GPR64RegClass;
unsigned Size = TRI->getSpillSize(RC);
- unsigned Align = TRI->getSpillAlignment(RC);
- int FI = MFI.CreateStackObject(Size, Align, false);
+ Align Alignment = TRI->getSpillAlign(RC);
+ int FI = MFI.CreateStackObject(Size, Alignment, false);
RS->addScavengingFrameIndex(FI);
LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
<< " as the emergency spill slot.\n");
@@ -2555,12 +2612,12 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
// Then process all callee saved slots.
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
// Make sure to align the last callee save slot.
- MFI.setObjectAlignment(MaxCSFrameIndex, 16U);
+ MFI.setObjectAlignment(MaxCSFrameIndex, Align(16));
// Assign offsets to the callee save slots.
for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
Offset += MFI.getObjectSize(I);
- Offset = alignTo(Offset, MFI.getObjectAlignment(I));
+ Offset = alignTo(Offset, MFI.getObjectAlign(I));
if (AssignOffsets)
Assign(I, -Offset);
}
@@ -2582,15 +2639,15 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
// Allocate all SVE locals and spills
for (unsigned FI : ObjectsToAllocate) {
- unsigned Align = MFI.getObjectAlignment(FI);
+ Align Alignment = MFI.getObjectAlign(FI);
// FIXME: Given that the length of SVE vectors is not necessarily a power of
// two, we'd need to align every object dynamically at runtime if the
// alignment is larger than 16. This is not yet supported.
- if (Align > 16)
+ if (Alignment > Align(16))
report_fatal_error(
"Alignment of scalable vectors > 16 bytes is not yet supported");
- Offset = alignTo(Offset + MFI.getObjectSize(FI), Align);
+ Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
if (AssignOffsets)
Assign(FI, -Offset);
}
@@ -2660,11 +2717,401 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
.addImm(0);
}
-/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
-/// the update. This is easily retrieved as it is exactly the offset that is set
-/// in processFunctionBeforeFrameFinalized.
+namespace {
+struct TagStoreInstr {
+ MachineInstr *MI;
+ int64_t Offset, Size;
+ explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size)
+ : MI(MI), Offset(Offset), Size(Size) {}
+};
+
+class TagStoreEdit {
+ MachineFunction *MF;
+ MachineBasicBlock *MBB;
+ MachineRegisterInfo *MRI;
+ // Tag store instructions that are being replaced.
+ SmallVector<TagStoreInstr, 8> TagStores;
+ // Combined memref arguments of the above instructions.
+ SmallVector<MachineMemOperand *, 8> CombinedMemRefs;
+
+ // Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg +
+ // FrameRegOffset + Size) with the address tag of SP.
+ Register FrameReg;
+ StackOffset FrameRegOffset;
+ int64_t Size;
+ // If not None, move FrameReg to (FrameReg + FrameRegUpdate) at the end.
+ Optional<int64_t> FrameRegUpdate;
+ // MIFlags for any FrameReg updating instructions.
+ unsigned FrameRegUpdateFlags;
+
+ // Use zeroing instruction variants.
+ bool ZeroData;
+ DebugLoc DL;
+
+ void emitUnrolled(MachineBasicBlock::iterator InsertI);
+ void emitLoop(MachineBasicBlock::iterator InsertI);
+
+public:
+ TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData)
+ : MBB(MBB), ZeroData(ZeroData) {
+ MF = MBB->getParent();
+ MRI = &MF->getRegInfo();
+ }
+ // Add an instruction to be replaced. Instructions must be added in the
+ // ascending order of Offset, and have to be adjacent.
+ void addInstruction(TagStoreInstr I) {
+ assert((TagStores.empty() ||
+ TagStores.back().Offset + TagStores.back().Size == I.Offset) &&
+ "Non-adjacent tag store instructions.");
+ TagStores.push_back(I);
+ }
+ void clear() { TagStores.clear(); }
+ // Emit equivalent code at the given location, and erase the current set of
+ // instructions. May skip if the replacement is not profitable. May invalidate
+ // the input iterator and replace it with a valid one.
+ void emitCode(MachineBasicBlock::iterator &InsertI,
+ const AArch64FrameLowering *TFI, bool IsLast);
+};
+
+void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
+ const AArch64InstrInfo *TII =
+ MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
+
+ const int64_t kMinOffset = -256 * 16;
+ const int64_t kMaxOffset = 255 * 16;
+
+ Register BaseReg = FrameReg;
+ int64_t BaseRegOffsetBytes = FrameRegOffset.getBytes();
+ if (BaseRegOffsetBytes < kMinOffset ||
+ BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
+ Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
+ emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
+ {BaseRegOffsetBytes, MVT::i8}, TII);
+ BaseReg = ScratchReg;
+ BaseRegOffsetBytes = 0;
+ }
+
+ MachineInstr *LastI = nullptr;
+ while (Size) {
+ int64_t InstrSize = (Size > 16) ? 32 : 16;
+ unsigned Opcode =
+ InstrSize == 16
+ ? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset)
+ : (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset);
+ MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode))
+ .addReg(AArch64::SP)
+ .addReg(BaseReg)
+ .addImm(BaseRegOffsetBytes / 16)
+ .setMemRefs(CombinedMemRefs);
+ // A store to [BaseReg, #0] should go last for an opportunity to fold the
+ // final SP adjustment in the epilogue.
+ if (BaseRegOffsetBytes == 0)
+ LastI = I;
+ BaseRegOffsetBytes += InstrSize;
+ Size -= InstrSize;
+ }
+
+ if (LastI)
+ MBB->splice(InsertI, MBB, LastI);
+}
+
+void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
+ const AArch64InstrInfo *TII =
+ MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
+
+ Register BaseReg = FrameRegUpdate
+ ? FrameReg
+ : MRI->createVirtualRegister(&AArch64::GPR64RegClass);
+ Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
+
+ emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII);
+
+ int64_t LoopSize = Size;
+ // If the loop size is not a multiple of 32, split off one 16-byte store at
+ // the end to fold BaseReg update into.
+ if (FrameRegUpdate && *FrameRegUpdate)
+ LoopSize -= LoopSize % 32;
+ MachineInstr *LoopI = BuildMI(*MBB, InsertI, DL,
+ TII->get(ZeroData ? AArch64::STZGloop_wback
+ : AArch64::STGloop_wback))
+ .addDef(SizeReg)
+ .addDef(BaseReg)
+ .addImm(LoopSize)
+ .addReg(BaseReg)
+ .setMemRefs(CombinedMemRefs);
+ if (FrameRegUpdate)
+ LoopI->setFlags(FrameRegUpdateFlags);
+
+ int64_t ExtraBaseRegUpdate =
+ FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getBytes() - Size) : 0;
+ if (LoopSize < Size) {
+ assert(FrameRegUpdate);
+ assert(Size - LoopSize == 16);
+ // Tag 16 more bytes at BaseReg and update BaseReg.
+ BuildMI(*MBB, InsertI, DL,
+ TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
+ .addDef(BaseReg)
+ .addReg(BaseReg)
+ .addReg(BaseReg)
+ .addImm(1 + ExtraBaseRegUpdate / 16)
+ .setMemRefs(CombinedMemRefs)
+ .setMIFlags(FrameRegUpdateFlags);
+ } else if (ExtraBaseRegUpdate) {
+ // Update BaseReg.
+ BuildMI(
+ *MBB, InsertI, DL,
+ TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
+ .addDef(BaseReg)
+ .addReg(BaseReg)
+ .addImm(std::abs(ExtraBaseRegUpdate))
+ .addImm(0)
+ .setMIFlags(FrameRegUpdateFlags);
+ }
+}
+
+// Check if *II is a register update that can be merged into STGloop that ends
+// at (Reg + Size). RemainingOffset is the required adjustment to Reg after the
+// end of the loop.
+bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
+ int64_t Size, int64_t *TotalOffset) {
+ MachineInstr &MI = *II;
+ if ((MI.getOpcode() == AArch64::ADDXri ||
+ MI.getOpcode() == AArch64::SUBXri) &&
+ MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
+ unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm());
+ int64_t Offset = MI.getOperand(2).getImm() << Shift;
+ if (MI.getOpcode() == AArch64::SUBXri)
+ Offset = -Offset;
+ int64_t AbsPostOffset = std::abs(Offset - Size);
+ const int64_t kMaxOffset =
+ 0xFFF; // Max encoding for unshifted ADDXri / SUBXri
+ if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
+ *TotalOffset = Offset;
+ return true;
+ }
+ }
+ return false;
+}
+
+void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
+ SmallVectorImpl<MachineMemOperand *> &MemRefs) {
+ MemRefs.clear();
+ for (auto &TS : TSE) {
+ MachineInstr *MI = TS.MI;
+ // An instruction without memory operands may access anything. Be
+ // conservative and return an empty list.
+ if (MI->memoperands_empty()) {
+ MemRefs.clear();
+ return;
+ }
+ MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());
+ }
+}
+
+void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
+ const AArch64FrameLowering *TFI, bool IsLast) {
+ if (TagStores.empty())
+ return;
+ TagStoreInstr &FirstTagStore = TagStores[0];
+ TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
+ Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
+ DL = TagStores[0].MI->getDebugLoc();
+
+ Register Reg;
+ FrameRegOffset = TFI->resolveFrameOffsetReference(
+ *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg,
+ /*PreferFP=*/false, /*ForSimm=*/true);
+ FrameReg = Reg;
+ FrameRegUpdate = None;
+
+ mergeMemRefs(TagStores, CombinedMemRefs);
+
+ LLVM_DEBUG(dbgs() << "Replacing adjacent STG instructions:\n";
+ for (const auto &Instr
+ : TagStores) { dbgs() << " " << *Instr.MI; });
+
+ // Size threshold where a loop becomes shorter than a linear sequence of
+ // tagging instructions.
+ const int kSetTagLoopThreshold = 176;
+ if (Size < kSetTagLoopThreshold) {
+ if (TagStores.size() < 2)
+ return;
+ emitUnrolled(InsertI);
+ } else {
+ MachineInstr *UpdateInstr = nullptr;
+ int64_t TotalOffset;
+ if (IsLast) {
+ // See if we can merge base register update into the STGloop.
+ // This is done in AArch64LoadStoreOptimizer for "normal" stores,
+ // but STGloop is way too unusual for that, and also it only
+ // realistically happens in function epilogue. Also, STGloop is expanded
+ // before that pass.
+ if (InsertI != MBB->end() &&
+ canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getBytes() + Size,
+ &TotalOffset)) {
+ UpdateInstr = &*InsertI++;
+ LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "
+ << *UpdateInstr);
+ }
+ }
+
+ if (!UpdateInstr && TagStores.size() < 2)
+ return;
+
+ if (UpdateInstr) {
+ FrameRegUpdate = TotalOffset;
+ FrameRegUpdateFlags = UpdateInstr->getFlags();
+ }
+ emitLoop(InsertI);
+ if (UpdateInstr)
+ UpdateInstr->eraseFromParent();
+ }
+
+ for (auto &TS : TagStores)
+ TS.MI->eraseFromParent();
+}
+
+bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
+ int64_t &Size, bool &ZeroData) {
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ unsigned Opcode = MI.getOpcode();
+ ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGOffset ||
+ Opcode == AArch64::STZ2GOffset);
+
+ if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
+ if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead())
+ return false;
+ if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI())
+ return false;
+ Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex());
+ Size = MI.getOperand(2).getImm();
+ return true;
+ }
+
+ if (Opcode == AArch64::STGOffset || Opcode == AArch64::STZGOffset)
+ Size = 16;
+ else if (Opcode == AArch64::ST2GOffset || Opcode == AArch64::STZ2GOffset)
+ Size = 32;
+ else
+ return false;
+
+ if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI())
+ return false;
+
+ Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) +
+ 16 * MI.getOperand(2).getImm();
+ return true;
+}
+
+// Detect a run of memory tagging instructions for adjacent stack frame slots,
+// and replace them with a shorter instruction sequence:
+// * replace STG + STG with ST2G
+// * replace STGloop + STGloop with STGloop
+// This code needs to run when stack slot offsets are already known, but before
+// FrameIndex operands in STG instructions are eliminated.
+MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
+ const AArch64FrameLowering *TFI,
+ RegScavenger *RS) {
+ bool FirstZeroData;
+ int64_t Size, Offset;
+ MachineInstr &MI = *II;
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::iterator NextI = ++II;
+ if (&MI == &MBB->instr_back())
+ return II;
+ if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))
+ return II;
+
+ SmallVector<TagStoreInstr, 4> Instrs;
+ Instrs.emplace_back(&MI, Offset, Size);
+
+ constexpr int kScanLimit = 10;
+ int Count = 0;
+ for (MachineBasicBlock::iterator E = MBB->end();
+ NextI != E && Count < kScanLimit; ++NextI) {
+ MachineInstr &MI = *NextI;
+ bool ZeroData;
+ int64_t Size, Offset;
+ // Collect instructions that update memory tags with a FrameIndex operand
+ // and (when applicable) constant size, and whose output registers are dead
+ // (the latter is almost always the case in practice). Since these
+ // instructions effectively have no inputs or outputs, we are free to skip
+ // any non-aliasing instructions in between without tracking used registers.
+ if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {
+ if (ZeroData != FirstZeroData)
+ break;
+ Instrs.emplace_back(&MI, Offset, Size);
+ continue;
+ }
+
+ // Only count non-transient, non-tagging instructions toward the scan
+ // limit.
+ if (!MI.isTransient())
+ ++Count;
+
+ // Just in case, stop before the epilogue code starts.
+ if (MI.getFlag(MachineInstr::FrameSetup) ||
+ MI.getFlag(MachineInstr::FrameDestroy))
+ break;
+
+ // Reject anything that may alias the collected instructions.
+ if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects())
+ break;
+ }
+
+ // New code will be inserted after the last tagging instruction we've found.
+ MachineBasicBlock::iterator InsertI = Instrs.back().MI;
+ InsertI++;
+
+ llvm::stable_sort(Instrs,
+ [](const TagStoreInstr &Left, const TagStoreInstr &Right) {
+ return Left.Offset < Right.Offset;
+ });
+
+ // Make sure that we don't have any overlapping stores.
+ int64_t CurOffset = Instrs[0].Offset;
+ for (auto &Instr : Instrs) {
+ if (CurOffset > Instr.Offset)
+ return NextI;
+ CurOffset = Instr.Offset + Instr.Size;
+ }
+
+ // Find contiguous runs of tagged memory and emit shorter instruction
+ // sequencies for them when possible.
+ TagStoreEdit TSE(MBB, FirstZeroData);
+ Optional<int64_t> EndOffset;
+ for (auto &Instr : Instrs) {
+ if (EndOffset && *EndOffset != Instr.Offset) {
+ // Found a gap.
+ TSE.emitCode(InsertI, TFI, /*IsLast = */ false);
+ TSE.clear();
+ }
+
+ TSE.addInstruction(Instr);
+ EndOffset = Instr.Offset + Instr.Size;
+ }
+
+ TSE.emitCode(InsertI, TFI, /*IsLast = */ true);
+
+ return InsertI;
+}
+} // namespace
+
+void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
+ MachineFunction &MF, RegScavenger *RS = nullptr) const {
+ if (StackTaggingMergeSetTag)
+ for (auto &BB : MF)
+ for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();)
+ II = tryMergeAdjacentSTG(II, this, RS);
+}
+
+/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP
+/// before the update. This is easily retrieved as it is exactly the offset
+/// that is set in processFunctionBeforeFrameFinalized.
int AArch64FrameLowering::getFrameIndexReferencePreferSP(
- const MachineFunction &MF, int FI, unsigned &FrameReg,
+ const MachineFunction &MF, int FI, Register &FrameReg,
bool IgnoreSPUpdates) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (IgnoreSPUpdates) {
@@ -2693,5 +3140,5 @@ unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
// This is the amount of stack a funclet needs to allocate.
return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
- getStackAlignment());
+ getStackAlign());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index b5719feb6b15..9d0a6d9eaf25 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -24,8 +24,9 @@ public:
: TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16),
true /*StackRealignable*/) {}
- void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) const;
+ void
+ emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -39,23 +40,24 @@ public:
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg, bool PreferFP,
+ Register &FrameReg, bool PreferFP,
bool ForSimm) const;
StackOffset resolveFrameOffsetReference(const MachineFunction &MF,
int64_t ObjectOffset, bool isFixed,
- bool isSVE, unsigned &FrameReg,
+ bool isSVE, Register &FrameReg,
bool PreferFP, bool ForSimm) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
/// Can this function use the red zone for local allocations.
bool canUseRedZone(const MachineFunction &MF) const;
@@ -77,12 +79,16 @@ public:
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const override;
+ void
+ processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF,
+ RegScavenger *RS) const override;
+
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override;
unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
- unsigned &FrameReg,
+ Register &FrameReg,
bool IgnoreSPUpdates) const override;
int getNonLocalFrameIndexReference(const MachineFunction &MF,
int FI) const override;
@@ -107,6 +113,8 @@ private:
int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
int &MinCSFrameIndex,
int &MaxCSFrameIndex) const;
+ bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
+ unsigned StackBumpBytes) const;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index a51aa85a931c..10c477853353 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -62,6 +62,9 @@ public:
unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
+ template <signed Low, signed High, signed Scale>
+ bool SelectRDVLImm(SDValue N, SDValue &Imm);
+
bool tryMLAV64LaneV128(SDNode *N);
bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
@@ -159,6 +162,24 @@ public:
return false;
}
+ bool SelectDupZero(SDValue N) {
+ switch(N->getOpcode()) {
+ case AArch64ISD::DUP:
+ case ISD::SPLAT_VECTOR: {
+ auto Opnd0 = N->getOperand(0);
+ if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
+ if (CN->isNullValue())
+ return true;
+ if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
+ if (CN->isZero())
+ return true;
+ break;
+ }
+ }
+
+ return false;
+ }
+
template<MVT::SimpleValueType VT>
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
return SelectSVEAddSubImm(N, VT, Imm, Shift);
@@ -169,6 +190,11 @@ public:
return SelectSVELogicalImm(N, VT, Imm);
}
+ template <unsigned Low, unsigned High>
+ bool SelectSVEShiftImm64(SDValue N, SDValue &Imm) {
+ return SelectSVEShiftImm64(N, Low, High, Imm);
+ }
+
// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
template<signed Min, signed Max, signed Scale, bool Shift>
bool SelectCntImm(SDValue N, SDValue &Imm) {
@@ -197,6 +223,9 @@ public:
/// unchanged; otherwise a REG_SEQUENCE value is returned.
SDValue createDTuple(ArrayRef<SDValue> Vecs);
SDValue createQTuple(ArrayRef<SDValue> Vecs);
+ // Form a sequence of SVE registers for instructions using list of vectors,
+ // e.g. structured loads and stores (ldN, stN).
+ SDValue createZTuple(ArrayRef<SDValue> Vecs);
/// Generic helper for the createDTuple/createQTuple
/// functions. Those should almost always be called instead.
@@ -216,11 +245,31 @@ public:
unsigned SubRegIdx);
void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
+ void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, const unsigned Opc);
+
+ bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
+ /// SVE Reg+Imm addressing mode.
+ template <int64_t Min, int64_t Max>
+ bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ /// SVE Reg+Reg address mode.
+ template <unsigned Scale>
+ bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
+ return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
+ }
void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
+ template <unsigned Scale>
+ void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned Opc_rr,
+ const unsigned Opc_ri);
+ template <unsigned Scale>
+ std::tuple<unsigned, SDValue, SDValue>
+ findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
+ const unsigned Opc_ri, const SDValue &OldBase,
+ const SDValue &OldOffset);
bool tryBitfieldExtractOp(SDNode *N);
bool tryBitfieldExtractOpFromSExt(SDNode *N);
@@ -268,13 +317,19 @@ private:
bool SelectCMP_SWAP(SDNode *N);
+ bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift);
+
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);
bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
+ bool SelectSVEShiftImm64(SDValue N, uint64_t Low, uint64_t High,
+ SDValue &Imm);
bool SelectSVEArithImm(SDValue N, SDValue &Imm);
+ bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
+ SDValue &Offset);
};
} // end anonymous namespace
@@ -679,6 +734,23 @@ static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
return SDValue(Node, 0);
}
+// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
+template<signed Low, signed High, signed Scale>
+bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
+ if (!isa<ConstantSDNode>(N))
+ return false;
+
+ int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
+ if ((MulImm % std::abs(Scale)) == 0) {
+ int64_t RDVLImm = MulImm / Scale;
+ if ((RDVLImm >= Low) && (RDVLImm <= High)) {
+ Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
/// SelectArithExtendedRegister - Select a "extended register" operand. This
/// operand folds in an extend followed by an optional left shift.
@@ -832,16 +904,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
if (!GAN)
return true;
- if (GAN->getOffset() % Size == 0) {
- const GlobalValue *GV = GAN->getGlobal();
- unsigned Alignment = GV->getAlignment();
- Type *Ty = GV->getValueType();
- if (Alignment == 0 && Ty->isSized())
- Alignment = DL.getABITypeAlignment(Ty);
-
- if (Alignment >= Size)
- return true;
- }
+ if (GAN->getOffset() % Size == 0 &&
+ GAN->getGlobal()->getPointerAlignment(DL) >= Size)
+ return true;
}
if (CurDAG->isBaseWithConstantOffset(N)) {
@@ -1132,6 +1197,16 @@ SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
return createTuple(Regs, RegClassIDs, SubRegs);
}
+SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
+ static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
+ AArch64::ZPR3RegClassID,
+ AArch64::ZPR4RegClassID};
+ static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
+ AArch64::zsub2, AArch64::zsub3};
+
+ return createTuple(Regs, RegClassIDs, SubRegs);
+}
+
SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
const unsigned RegClassIDs[],
const unsigned SubRegs[]) {
@@ -1240,6 +1315,8 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
}
} else if (VT == MVT::f16) {
Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
+ } else if (VT == MVT::bf16) {
+ Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
} else if (VT == MVT::f32) {
Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
} else if (VT == MVT::f64 || VT.is64BitVector()) {
@@ -1334,6 +1411,54 @@ void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
CurDAG->RemoveDeadNode(N);
}
+/// Optimize \param OldBase and \param OldOffset selecting the best addressing
+/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
+/// new Base and an SDValue representing the new offset.
+template <unsigned Scale>
+std::tuple<unsigned, SDValue, SDValue>
+AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
+ const unsigned Opc_ri,
+ const SDValue &OldBase,
+ const SDValue &OldOffset) {
+ SDValue NewBase = OldBase;
+ SDValue NewOffset = OldOffset;
+ // Detect a possible Reg+Imm addressing mode.
+ const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
+ N, OldBase, NewBase, NewOffset);
+
+ // Detect a possible reg+reg addressing mode, but only if we haven't already
+ // detected a Reg+Imm one.
+ const bool IsRegReg =
+ !IsRegImm && SelectSVERegRegAddrMode<Scale>(OldBase, NewBase, NewOffset);
+
+ // Select the instruction.
+ return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
+}
+
+void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
+ const unsigned Opc) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue Chain = N->getOperand(0);
+
+ SDValue Ops[] = {N->getOperand(1), // Predicate
+ N->getOperand(2), // Memory operand
+ CurDAG->getTargetConstant(0, DL, MVT::i64), Chain};
+
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
+
+ SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
+ SDValue SuperReg = SDValue(Load, 0);
+ for (unsigned i = 0; i < NumVecs; ++i)
+ ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
+ AArch64::zsub0 + i, DL, VT, SuperReg));
+
+ // Copy chain
+ unsigned ChainIdx = NumVecs;
+ ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
+ CurDAG->RemoveDeadNode(N);
+}
+
void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {
SDLoc dl(N);
@@ -1354,6 +1479,49 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
ReplaceNode(N, St);
}
+template <unsigned Scale>
+void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
+ const unsigned Opc_rr,
+ const unsigned Opc_ri) {
+ SDLoc dl(N);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
+ SDValue RegSeq = createZTuple(Regs);
+
+ // Optimize addressing mode.
+ unsigned Opc;
+ SDValue Offset, Base;
+ std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore<Scale>(
+ N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
+ CurDAG->getTargetConstant(0, dl, MVT::i64));
+
+ SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
+ Base, // address
+ Offset, // offset
+ N->getOperand(0)}; // chain
+ SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
+
+ ReplaceNode(N, St);
+}
+
+bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ SDLoc dl(N);
+ const DataLayout &DL = CurDAG->getDataLayout();
+ const TargetLowering *TLI = getTargetLowering();
+
+ // Try to match it for the frame address
+ if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
+ int FI = FINode->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+ return true;
+ }
+
+ return false;
+}
+
void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {
SDLoc dl(N);
@@ -2632,7 +2800,8 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
// bits that are implicitly ANDed off by the above opcodes and if so, skip
// the AND.
uint64_t MaskImm;
- if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
+ if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
+ !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
return false;
if (countTrailingOnes(MaskImm) < Bits)
@@ -2879,6 +3048,32 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
return true;
}
+bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base,
+ SDValue &Offset) {
+ auto C = dyn_cast<ConstantSDNode>(N);
+ if (!C)
+ return false;
+
+ auto Ty = N->getValueType(0);
+
+ int64_t Imm = C->getSExtValue();
+ SDLoc DL(N);
+
+ if ((Imm >= -128) && (Imm <= 127)) {
+ Base = CurDAG->getTargetConstant(Imm, DL, Ty);
+ Offset = CurDAG->getTargetConstant(0, DL, Ty);
+ return true;
+ }
+
+ if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) {
+ Base = CurDAG->getTargetConstant(Imm/256, DL, Ty);
+ Offset = CurDAG->getTargetConstant(8, DL, Ty);
+ return true;
+ }
+
+ return false;
+}
+
bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) {
if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
const int64_t ImmVal = CNode->getZExtValue();
@@ -2917,7 +3112,7 @@ bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
int64_t ImmVal = CNode->getSExtValue();
SDLoc DL(N);
- if (ImmVal >= -127 && ImmVal < 127) {
+ if (ImmVal >= -128 && ImmVal < 128) {
Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
return true;
}
@@ -2975,6 +3170,24 @@ bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) {
return false;
}
+// This method is only needed to "cast" i64s into i32s when the value
+// is a valid shift which has been splatted into a vector with i64 elements.
+// Every other type is fine in tablegen.
+bool AArch64DAGToDAGISel::SelectSVEShiftImm64(SDValue N, uint64_t Low,
+ uint64_t High, SDValue &Imm) {
+ if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
+ uint64_t ImmVal = CN->getZExtValue();
+ SDLoc DL(N);
+
+ if (ImmVal >= Low && ImmVal <= High) {
+ Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
// tagp(FrameIndex, IRGstack, tag_offset):
// since the offset between FrameIndex and IRGstack is a compile-time
@@ -3027,6 +3240,63 @@ void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
ReplaceNode(N, N3);
}
+// NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
+// vector types larger than NEON don't have a matching SubRegIndex.
+static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
+ assert(V.getValueType().isScalableVector() &&
+ V.getValueType().getSizeInBits().getKnownMinSize() ==
+ AArch64::SVEBitsPerBlock &&
+ "Expected to extract from a packed scalable vector!");
+ assert(VT.isFixedLengthVector() &&
+ "Expected to extract a fixed length vector!");
+
+ SDLoc DL(V);
+ switch (VT.getSizeInBits()) {
+ case 64: {
+ auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
+ return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
+ }
+ case 128: {
+ auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
+ return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
+ }
+ default: {
+ auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
+ return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
+ }
+ }
+}
+
+// NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
+// vector types larger than NEON don't have a matching SubRegIndex.
+static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
+ assert(VT.isScalableVector() &&
+ VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock &&
+ "Expected to insert into a packed scalable vector!");
+ assert(V.getValueType().isFixedLengthVector() &&
+ "Expected to insert a fixed length vector!");
+
+ SDLoc DL(V);
+ switch (V.getValueType().getSizeInBits()) {
+ case 64: {
+ auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
+ auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+ return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
+ SDValue(Container, 0), V, SubReg);
+ }
+ case 128: {
+ auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
+ auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+ return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
+ SDValue(Container, 0), V, SubReg);
+ }
+ default: {
+ auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
+ return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
+ }
+ }
+}
+
void AArch64DAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
@@ -3100,6 +3370,52 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
break;
+ case ISD::EXTRACT_SUBVECTOR: {
+ // Bail when not a "cast" like extract_subvector.
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
+ break;
+
+ // Bail when normal isel can do the job.
+ EVT InVT = Node->getOperand(0).getValueType();
+ if (VT.isScalableVector() || InVT.isFixedLengthVector())
+ break;
+
+ // NOTE: We can only get here when doing fixed length SVE code generation.
+ // We do manual selection because the types involved are not linked to real
+ // registers (despite being legal) and must be coerced into SVE registers.
+ //
+ // NOTE: If the above changes, be aware that selection will still not work
+ // because the td definition of extract_vector does not support extracting
+ // a fixed length vector from a scalable vector.
+
+ ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
+ return;
+ }
+
+ case ISD::INSERT_SUBVECTOR: {
+ // Bail when not a "cast" like insert_subvector.
+ if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
+ break;
+ if (!Node->getOperand(0).isUndef())
+ break;
+
+ // Bail when normal isel should do the job.
+ EVT InVT = Node->getOperand(1).getValueType();
+ if (VT.isFixedLengthVector() || InVT.isScalableVector())
+ break;
+
+ // NOTE: We can only get here when doing fixed length SVE code generation.
+ // We do manual selection because the types involved are not linked to real
+ // registers (despite being legal) and must be coerced into SVE registers.
+ //
+ // NOTE: If the above changes, be aware that selection will still not work
+ // because the td definition of insert_vector does not support inserting a
+ // fixed length vector into a scalable vector.
+
+ ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
+ return;
+ }
+
case ISD::Constant: {
// Materialize zero constants as copies from WZR/XZR. This allows
// the coalescer to propagate these into other instructions.
@@ -3185,10 +3501,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3212,10 +3528,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3239,10 +3555,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3266,10 +3582,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3293,10 +3609,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3320,10 +3636,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3347,10 +3663,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3374,10 +3690,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3401,10 +3717,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3426,7 +3742,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectLoadLane(Node, 2, AArch64::LD2i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectLoadLane(Node, 2, AArch64::LD2i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3444,7 +3760,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectLoadLane(Node, 3, AArch64::LD3i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectLoadLane(Node, 3, AArch64::LD3i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3462,7 +3778,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectLoadLane(Node, 4, AArch64::LD4i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectLoadLane(Node, 4, AArch64::LD4i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3537,10 +3853,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 2, AArch64::ST1Twov16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 2, AArch64::ST1Twov4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 2, AArch64::ST1Twov8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3565,10 +3883,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 3, AArch64::ST1Threev16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 3, AArch64::ST1Threev4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 3, AArch64::ST1Threev8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3593,10 +3913,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 4, AArch64::ST1Fourv16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 4, AArch64::ST1Fourv4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 4, AArch64::ST1Fourv8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3621,10 +3943,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 2, AArch64::ST2Twov16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 2, AArch64::ST2Twov4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 2, AArch64::ST2Twov8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3649,10 +3973,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 3, AArch64::ST3Threev16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 3, AArch64::ST3Threev4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 3, AArch64::ST3Threev8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3677,10 +4003,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectStore(Node, 4, AArch64::ST4Fourv16b);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v4bf16) {
SelectStore(Node, 4, AArch64::ST4Fourv4h);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
+ VT == MVT::v8bf16) {
SelectStore(Node, 4, AArch64::ST4Fourv8h);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3703,7 +4031,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectStoreLane(Node, 2, AArch64::ST2i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectStoreLane(Node, 2, AArch64::ST2i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3722,7 +4050,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectStoreLane(Node, 3, AArch64::ST3i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectStoreLane(Node, 3, AArch64::ST3i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3741,7 +4069,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectStoreLane(Node, 4, AArch64::ST4i8);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectStoreLane(Node, 4, AArch64::ST4i16);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -3755,6 +4083,69 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case Intrinsic::aarch64_sve_st2: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedStore</*Scale=*/0>(Node, 2, AArch64::ST2B,
+ AArch64::ST2B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedStore</*Scale=*/1>(Node, 2, AArch64::ST2H,
+ AArch64::ST2H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedStore</*Scale=*/2>(Node, 2, AArch64::ST2W,
+ AArch64::ST2W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedStore</*Scale=*/3>(Node, 2, AArch64::ST2D,
+ AArch64::ST2D_IMM);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sve_st3: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedStore</*Scale=*/0>(Node, 3, AArch64::ST3B,
+ AArch64::ST3B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedStore</*Scale=*/1>(Node, 3, AArch64::ST3H,
+ AArch64::ST3H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedStore</*Scale=*/2>(Node, 3, AArch64::ST3W,
+ AArch64::ST3W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedStore</*Scale=*/3>(Node, 3, AArch64::ST3D,
+ AArch64::ST3D_IMM);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sve_st4: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedStore</*Scale=*/0>(Node, 4, AArch64::ST4B,
+ AArch64::ST4B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedStore</*Scale=*/1>(Node, 4, AArch64::ST4H,
+ AArch64::ST4H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedStore</*Scale=*/2>(Node, 4, AArch64::ST4W,
+ AArch64::ST4W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedStore</*Scale=*/3>(Node, 4, AArch64::ST4D,
+ AArch64::ST4D_IMM);
+ return;
+ }
+ break;
+ }
}
break;
}
@@ -3765,10 +4156,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3793,10 +4184,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3821,10 +4212,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3849,10 +4240,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3877,10 +4268,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3905,10 +4296,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3933,10 +4324,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3961,10 +4352,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -3989,10 +4380,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4017,10 +4408,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4043,7 +4434,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4062,7 +4453,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4081,7 +4472,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4100,7 +4491,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4122,10 +4513,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4151,10 +4542,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4180,10 +4571,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4209,10 +4600,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4238,10 +4629,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4267,10 +4658,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
} else if (VT == MVT::v16i8) {
SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
return;
- } else if (VT == MVT::v4i16 || VT == MVT::v4f16) {
+ } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
return;
- } else if (VT == MVT::v8i16 || VT == MVT::v8f16) {
+ } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
return;
} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
@@ -4294,7 +4685,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4314,7 +4705,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4334,7 +4725,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
return;
} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
- VT == MVT::v8f16) {
+ VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
return;
} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
@@ -4348,6 +4739,57 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case AArch64ISD::SVE_LD2_MERGE_ZERO: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 2, AArch64::LD2B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 2, AArch64::LD2H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 2, AArch64::LD2W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 2, AArch64::LD2D_IMM);
+ return;
+ }
+ break;
+ }
+ case AArch64ISD::SVE_LD3_MERGE_ZERO: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 3, AArch64::LD3B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 3, AArch64::LD3H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 3, AArch64::LD3W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 3, AArch64::LD3D_IMM);
+ return;
+ }
+ break;
+ }
+ case AArch64ISD::SVE_LD4_MERGE_ZERO: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 4, AArch64::LD4B_IMM);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 4, AArch64::LD4H_IMM);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 4, AArch64::LD4W_IMM);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 4, AArch64::LD4D_IMM);
+ return;
+ }
+ break;
+ }
}
// Select the default instruction
@@ -4360,3 +4802,130 @@ FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new AArch64DAGToDAGISel(TM, OptLevel);
}
+
+/// When \p PredVT is a scalable vector predicate in the form
+/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
+/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. If the input
+/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
+/// EVT.
+static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT) {
+ if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
+ return EVT();
+
+ if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
+ PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
+ return EVT();
+
+ ElementCount EC = PredVT.getVectorElementCount();
+ EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min);
+ EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC);
+ return MemVT;
+}
+
+/// Return the EVT of the data associated to a memory operation in \p
+/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
+static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
+ if (isa<MemSDNode>(Root))
+ return cast<MemSDNode>(Root)->getMemoryVT();
+
+ if (isa<MemIntrinsicSDNode>(Root))
+ return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
+
+ const unsigned Opcode = Root->getOpcode();
+ // For custom ISD nodes, we have to look at them individually to extract the
+ // type of the data moved to/from memory.
+ switch (Opcode) {
+ case AArch64ISD::LD1_MERGE_ZERO:
+ case AArch64ISD::LD1S_MERGE_ZERO:
+ case AArch64ISD::LDNF1_MERGE_ZERO:
+ case AArch64ISD::LDNF1S_MERGE_ZERO:
+ return cast<VTSDNode>(Root->getOperand(3))->getVT();
+ case AArch64ISD::ST1_PRED:
+ return cast<VTSDNode>(Root->getOperand(4))->getVT();
+ default:
+ break;
+ }
+
+ if (Opcode != ISD::INTRINSIC_VOID)
+ return EVT();
+
+ const unsigned IntNo =
+ cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue();
+ if (IntNo != Intrinsic::aarch64_sve_prf)
+ return EVT();
+
+ // We are using an SVE prefetch intrinsic. Type must be inferred
+ // from the width of the predicate.
+ return getPackedVectorTypeFromPredicateType(
+ Ctx, Root->getOperand(2)->getValueType(0));
+}
+
+/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
+/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
+/// where Root is the memory access using N for its address.
+template <int64_t Min, int64_t Max>
+bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
+ SDValue &Base,
+ SDValue &OffImm) {
+ const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
+
+ if (MemVT == EVT())
+ return false;
+
+ if (N.getOpcode() != ISD::ADD)
+ return false;
+
+ SDValue VScale = N.getOperand(1);
+ if (VScale.getOpcode() != ISD::VSCALE)
+ return false;
+
+ TypeSize TS = MemVT.getSizeInBits();
+ int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8;
+ int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
+
+ if ((MulImm % MemWidthBytes) != 0)
+ return false;
+
+ int64_t Offset = MulImm / MemWidthBytes;
+ if (Offset < Min || Offset > Max)
+ return false;
+
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
+ return true;
+}
+
+/// Select register plus register addressing mode for SVE, with scaled
+/// offset.
+bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
+ SDValue &Base,
+ SDValue &Offset) {
+ if (N.getOpcode() != ISD::ADD)
+ return false;
+
+ // Process an ADD node.
+ const SDValue LHS = N.getOperand(0);
+ const SDValue RHS = N.getOperand(1);
+
+ // 8 bit data does not come with the SHL node, so it is treated
+ // separately.
+ if (Scale == 0) {
+ Base = LHS;
+ Offset = RHS;
+ return true;
+ }
+
+ // Check if the RHS is a shift node with a constant.
+ if (RHS.getOpcode() != ISD::SHL)
+ return false;
+
+ const SDValue ShiftRHS = RHS.getOperand(1);
+ if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
+ if (C->getZExtValue() == Scale) {
+ Base = LHS;
+ Offset = RHS.getOperand(0);
+ return true;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 23f05eaad944..85db14ab66fe 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -99,11 +99,6 @@ STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
-static cl::opt<bool>
-EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
- cl::desc("Allow AArch64 SLI/SRI formation"),
- cl::init(false));
-
// FIXME: The necessary dtprel relocations don't seem to be supported
// well in the GNU bfd and gold linkers at the moment. Therefore, by
// default, for now, fall back to GeneralDynamic code generation.
@@ -121,6 +116,18 @@ EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
+/// Returns true if VT's elements occupy the lowest bit positions of its
+/// associated register class without any intervening space.
+///
+/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
+/// same register class, but only nxv8f16 can be treated as a packed vector.
+static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
+ assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ "Expected legal vector type!");
+ return VT.isFixedLengthVector() ||
+ VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
+}
+
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -137,6 +144,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (Subtarget->hasFPARMv8()) {
addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
+ addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
@@ -153,6 +161,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addDRTypeForNEON(MVT::v1i64);
addDRTypeForNEON(MVT::v1f64);
addDRTypeForNEON(MVT::v4f16);
+ addDRTypeForNEON(MVT::v4bf16);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
@@ -161,6 +170,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
addQRTypeForNEON(MVT::v8f16);
+ addQRTypeForNEON(MVT::v8bf16);
}
if (Subtarget->hasSVE()) {
@@ -183,21 +193,51 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
+ if (Subtarget->hasBF16()) {
+ addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
+ addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
+ }
+
+ if (useSVEForFixedLengthVectors()) {
+ for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
+ if (useSVEForFixedLengthVectorVT(VT))
+ addRegisterClass(VT, &AArch64::ZPRRegClass);
+
+ for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
+ if (useSVEForFixedLengthVectorVT(VT))
+ addRegisterClass(VT, &AArch64::ZPRRegClass);
+ }
+
for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
- setOperationAction(ISD::SMAX, VT, Legal);
- setOperationAction(ISD::UMAX, VT, Legal);
- setOperationAction(ISD::SMIN, VT, Legal);
- setOperationAction(ISD::UMIN, VT, Legal);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
}
for (auto VT :
{ MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
+
+ for (auto VT :
+ { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
+ MVT::nxv2f64 }) {
+ setCondCodeAction(ISD::SETO, VT, Expand);
+ setCondCodeAction(ISD::SETOLT, VT, Expand);
+ setCondCodeAction(ISD::SETOLE, VT, Expand);
+ setCondCodeAction(ISD::SETULT, VT, Expand);
+ setCondCodeAction(ISD::SETULE, VT, Expand);
+ setCondCodeAction(ISD::SETUGE, VT, Expand);
+ setCondCodeAction(ISD::SETUGT, VT, Expand);
+ setCondCodeAction(ISD::SETUEQ, VT, Expand);
+ setCondCodeAction(ISD::SETUNE, VT, Expand);
+ }
}
// Compute derived properties from the register classes
@@ -349,12 +389,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTR, VT, Expand);
}
+ // AArch64 doesn't have i32 MULH{S|U}.
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
// AArch64 doesn't have {U|S}MUL_LOHI.
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i128, Custom);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
@@ -547,6 +592,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::i128, Custom);
+ // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
+ // custom lowering, as there are no un-paired non-temporal stores and
+ // legalization will break up 256 bit inputs.
+ setOperationAction(ISD::STORE, MVT::v32i8, Custom);
+ setOperationAction(ISD::STORE, MVT::v16i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v16f16, Custom);
+ setOperationAction(ISD::STORE, MVT::v8i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v8f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4f64, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i64, Custom);
+
// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
// This requires the Performance Monitors extension.
if (Subtarget->hasPerfMon())
@@ -596,6 +652,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
// Indexed loads and stores are supported.
for (unsigned im = (unsigned)ISD::PRE_INC;
@@ -607,6 +664,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setIndexedLoadAction(im, MVT::f64, Legal);
setIndexedLoadAction(im, MVT::f32, Legal);
setIndexedLoadAction(im, MVT::f16, Legal);
+ setIndexedLoadAction(im, MVT::bf16, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
@@ -614,6 +672,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setIndexedStoreAction(im, MVT::f64, Legal);
setIndexedStoreAction(im, MVT::f32, Legal);
setIndexedStoreAction(im, MVT::f16, Legal);
+ setIndexedStoreAction(im, MVT::bf16, Legal);
}
// Trap.
@@ -791,6 +850,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
+
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
}
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
@@ -847,6 +908,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
}
+ if (Subtarget->hasSVE())
+ setOperationAction(ISD::VSCALE, MVT::i32, Custom);
+
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
}
@@ -855,11 +919,60 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// splat of 0 or undef) once vector selects supported in SVE codegen. See
// D68877 for more details.
for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
- if (isTypeLegal(VT))
+ if (isTypeLegal(VT)) {
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ setOperationAction(ISD::SMIN, VT, Custom);
+ setOperationAction(ISD::UMIN, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, Custom);
+ setOperationAction(ISD::UMAX, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ if (VT.getScalarType() == MVT::i1)
+ setOperationAction(ISD::SETCC, VT, Custom);
+ }
}
+
+ for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32})
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
+
+ for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
+ if (isTypeLegal(VT)) {
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::FMA, VT, Custom);
+ }
+ }
+
+ // NOTE: Currently this has to happen after computeRegisterProperties rather
+ // than the preferred option of combining it with the addRegisterClass call.
+ if (useSVEForFixedLengthVectors()) {
+ for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
+ if (useSVEForFixedLengthVectorVT(VT))
+ addTypeForFixedLengthSVE(VT);
+ for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
+ if (useSVEForFixedLengthVectorVT(VT))
+ addTypeForFixedLengthSVE(VT);
+
+ // 64bit results can mean a bigger than NEON input.
+ for (auto VT : {MVT::v8i8, MVT::v4i16})
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
+
+ // 128bit results imply a bigger than NEON input.
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+ for (auto VT : {MVT::v8f16, MVT::v4f32})
+ setOperationAction(ISD::FP_ROUND, VT, Expand);
+ }
}
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
@@ -944,6 +1057,24 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
}
}
+void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
+ assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
+
+ // By default everything must be expanded.
+ for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
+ setOperationAction(Op, VT, Expand);
+
+ // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+
+ // Lower fixed length vector operations to scalable equivalents.
+ setOperationAction(ISD::ADD, VT, Custom);
+ setOperationAction(ISD::FADD, VT, Custom);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+}
+
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
addRegisterClass(VT, &AArch64::FPR64RegClass);
addTypeForNEON(VT, MVT::v2i32);
@@ -954,10 +1085,12 @@ void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
addTypeForNEON(VT, MVT::v4i32);
}
-EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
- EVT VT) const {
+EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
+ LLVMContext &C, EVT VT) const {
if (!VT.isVector())
return MVT::i32;
+ if (VT.isScalableVector())
+ return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
@@ -1057,7 +1190,8 @@ static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
}
bool AArch64TargetLowering::targetShrinkDemandedConstant(
- SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ TargetLoweringOpt &TLO) const {
// Delay this optimization to as late as possible.
if (!TLO.LegalOps)
return false;
@@ -1074,7 +1208,7 @@ bool AArch64TargetLowering::targetShrinkDemandedConstant(
"i32 or i64 is expected after legalization.");
// Exit early if we demand all bits.
- if (Demanded.countPopulation() == Size)
+ if (DemandedBits.countPopulation() == Size)
return false;
unsigned NewOpc;
@@ -1095,7 +1229,7 @@ bool AArch64TargetLowering::targetShrinkDemandedConstant(
if (!C)
return false;
uint64_t Imm = C->getZExtValue();
- return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
+ return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
}
/// computeKnownBitsForTargetNode - Determine which of the bits specified in
@@ -1199,7 +1333,7 @@ bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
// Same as above but handling LLTs instead.
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
- LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+ LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (Subtarget->requiresStrictAlign())
return false;
@@ -1214,7 +1348,7 @@ bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.
- Align <= 2 ||
+ Alignment <= 2 ||
// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on micro-benchmarks and olden/bh.
@@ -1230,183 +1364,246 @@ AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
}
const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+#define MAKE_CASE(V) \
+ case V: \
+ return #V;
switch ((AArch64ISD::NodeType)Opcode) {
- case AArch64ISD::FIRST_NUMBER: break;
- case AArch64ISD::CALL: return "AArch64ISD::CALL";
- case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
- case AArch64ISD::ADR: return "AArch64ISD::ADR";
- case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
- case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
- case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
- case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
- case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
- case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
- case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
- case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
- case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
- case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
- case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
- case AArch64ISD::ADC: return "AArch64ISD::ADC";
- case AArch64ISD::SBC: return "AArch64ISD::SBC";
- case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
- case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
- case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
- case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
- case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
- case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
- case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
- case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
- case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
- case AArch64ISD::STRICT_FCMP: return "AArch64ISD::STRICT_FCMP";
- case AArch64ISD::STRICT_FCMPE: return "AArch64ISD::STRICT_FCMPE";
- case AArch64ISD::DUP: return "AArch64ISD::DUP";
- case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
- case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
- case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
- case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
- case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
- case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
- case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
- case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
- case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
- case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
- case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
- case AArch64ISD::BICi: return "AArch64ISD::BICi";
- case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
- case AArch64ISD::BSL: return "AArch64ISD::BSL";
- case AArch64ISD::NEG: return "AArch64ISD::NEG";
- case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
- case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
- case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
- case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
- case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
- case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
- case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
- case AArch64ISD::REV16: return "AArch64ISD::REV16";
- case AArch64ISD::REV32: return "AArch64ISD::REV32";
- case AArch64ISD::REV64: return "AArch64ISD::REV64";
- case AArch64ISD::EXT: return "AArch64ISD::EXT";
- case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
- case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
- case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
- case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
- case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
- case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
- case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
- case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
- case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
- case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
- case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
- case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
- case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
- case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
- case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
- case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
- case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
- case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
- case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
- case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
- case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
- case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
- case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
- case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
- case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
- case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
- case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
- case AArch64ISD::SMAXV_PRED: return "AArch64ISD::SMAXV_PRED";
- case AArch64ISD::UMAXV_PRED: return "AArch64ISD::UMAXV_PRED";
- case AArch64ISD::SMINV_PRED: return "AArch64ISD::SMINV_PRED";
- case AArch64ISD::UMINV_PRED: return "AArch64ISD::UMINV_PRED";
- case AArch64ISD::ORV_PRED: return "AArch64ISD::ORV_PRED";
- case AArch64ISD::EORV_PRED: return "AArch64ISD::EORV_PRED";
- case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED";
- case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N";
- case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N";
- case AArch64ISD::LASTA: return "AArch64ISD::LASTA";
- case AArch64ISD::LASTB: return "AArch64ISD::LASTB";
- case AArch64ISD::REV: return "AArch64ISD::REV";
- case AArch64ISD::TBL: return "AArch64ISD::TBL";
- case AArch64ISD::NOT: return "AArch64ISD::NOT";
- case AArch64ISD::BIT: return "AArch64ISD::BIT";
- case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
- case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
- case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
- case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
- case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
- case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
- case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
- case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
- case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
- case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
- case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
- case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
- case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
- case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
- case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
- case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
- case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
- case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
- case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
- case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
- case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
- case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
- case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
- case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
- case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
- case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
- case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
- case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
- case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
- case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
- case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
- case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
- case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
- case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
- case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
- case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
- case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
- case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
- case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
- case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
- case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
- case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
- case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
- case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
- case AArch64ISD::STG: return "AArch64ISD::STG";
- case AArch64ISD::STZG: return "AArch64ISD::STZG";
- case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
- case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
- case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI";
- case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
- case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
- case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
- case AArch64ISD::INSR: return "AArch64ISD::INSR";
- case AArch64ISD::PTEST: return "AArch64ISD::PTEST";
- case AArch64ISD::PTRUE: return "AArch64ISD::PTRUE";
- case AArch64ISD::GLD1: return "AArch64ISD::GLD1";
- case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED";
- case AArch64ISD::GLD1_SXTW: return "AArch64ISD::GLD1_SXTW";
- case AArch64ISD::GLD1_UXTW: return "AArch64ISD::GLD1_UXTW";
- case AArch64ISD::GLD1_SXTW_SCALED: return "AArch64ISD::GLD1_SXTW_SCALED";
- case AArch64ISD::GLD1_UXTW_SCALED: return "AArch64ISD::GLD1_UXTW_SCALED";
- case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM";
- case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S";
- case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED";
- case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW";
- case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW";
- case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";
- case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";
- case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";
- case AArch64ISD::SST1: return "AArch64ISD::SST1";
- case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
- case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
- case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
- case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
- case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
- case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";
- case AArch64ISD::LDP: return "AArch64ISD::LDP";
- case AArch64ISD::STP: return "AArch64ISD::STP";
- }
+ case AArch64ISD::FIRST_NUMBER:
+ break;
+ MAKE_CASE(AArch64ISD::CALL)
+ MAKE_CASE(AArch64ISD::ADRP)
+ MAKE_CASE(AArch64ISD::ADR)
+ MAKE_CASE(AArch64ISD::ADDlow)
+ MAKE_CASE(AArch64ISD::LOADgot)
+ MAKE_CASE(AArch64ISD::RET_FLAG)
+ MAKE_CASE(AArch64ISD::BRCOND)
+ MAKE_CASE(AArch64ISD::CSEL)
+ MAKE_CASE(AArch64ISD::FCSEL)
+ MAKE_CASE(AArch64ISD::CSINV)
+ MAKE_CASE(AArch64ISD::CSNEG)
+ MAKE_CASE(AArch64ISD::CSINC)
+ MAKE_CASE(AArch64ISD::THREAD_POINTER)
+ MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
+ MAKE_CASE(AArch64ISD::ADD_PRED)
+ MAKE_CASE(AArch64ISD::SDIV_PRED)
+ MAKE_CASE(AArch64ISD::UDIV_PRED)
+ MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::UMAX_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SHL_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SRL_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SRA_MERGE_OP1)
+ MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::ADC)
+ MAKE_CASE(AArch64ISD::SBC)
+ MAKE_CASE(AArch64ISD::ADDS)
+ MAKE_CASE(AArch64ISD::SUBS)
+ MAKE_CASE(AArch64ISD::ADCS)
+ MAKE_CASE(AArch64ISD::SBCS)
+ MAKE_CASE(AArch64ISD::ANDS)
+ MAKE_CASE(AArch64ISD::CCMP)
+ MAKE_CASE(AArch64ISD::CCMN)
+ MAKE_CASE(AArch64ISD::FCCMP)
+ MAKE_CASE(AArch64ISD::FCMP)
+ MAKE_CASE(AArch64ISD::STRICT_FCMP)
+ MAKE_CASE(AArch64ISD::STRICT_FCMPE)
+ MAKE_CASE(AArch64ISD::DUP)
+ MAKE_CASE(AArch64ISD::DUPLANE8)
+ MAKE_CASE(AArch64ISD::DUPLANE16)
+ MAKE_CASE(AArch64ISD::DUPLANE32)
+ MAKE_CASE(AArch64ISD::DUPLANE64)
+ MAKE_CASE(AArch64ISD::MOVI)
+ MAKE_CASE(AArch64ISD::MOVIshift)
+ MAKE_CASE(AArch64ISD::MOVIedit)
+ MAKE_CASE(AArch64ISD::MOVImsl)
+ MAKE_CASE(AArch64ISD::FMOV)
+ MAKE_CASE(AArch64ISD::MVNIshift)
+ MAKE_CASE(AArch64ISD::MVNImsl)
+ MAKE_CASE(AArch64ISD::BICi)
+ MAKE_CASE(AArch64ISD::ORRi)
+ MAKE_CASE(AArch64ISD::BSP)
+ MAKE_CASE(AArch64ISD::NEG)
+ MAKE_CASE(AArch64ISD::EXTR)
+ MAKE_CASE(AArch64ISD::ZIP1)
+ MAKE_CASE(AArch64ISD::ZIP2)
+ MAKE_CASE(AArch64ISD::UZP1)
+ MAKE_CASE(AArch64ISD::UZP2)
+ MAKE_CASE(AArch64ISD::TRN1)
+ MAKE_CASE(AArch64ISD::TRN2)
+ MAKE_CASE(AArch64ISD::REV16)
+ MAKE_CASE(AArch64ISD::REV32)
+ MAKE_CASE(AArch64ISD::REV64)
+ MAKE_CASE(AArch64ISD::EXT)
+ MAKE_CASE(AArch64ISD::VSHL)
+ MAKE_CASE(AArch64ISD::VLSHR)
+ MAKE_CASE(AArch64ISD::VASHR)
+ MAKE_CASE(AArch64ISD::VSLI)
+ MAKE_CASE(AArch64ISD::VSRI)
+ MAKE_CASE(AArch64ISD::CMEQ)
+ MAKE_CASE(AArch64ISD::CMGE)
+ MAKE_CASE(AArch64ISD::CMGT)
+ MAKE_CASE(AArch64ISD::CMHI)
+ MAKE_CASE(AArch64ISD::CMHS)
+ MAKE_CASE(AArch64ISD::FCMEQ)
+ MAKE_CASE(AArch64ISD::FCMGE)
+ MAKE_CASE(AArch64ISD::FCMGT)
+ MAKE_CASE(AArch64ISD::CMEQz)
+ MAKE_CASE(AArch64ISD::CMGEz)
+ MAKE_CASE(AArch64ISD::CMGTz)
+ MAKE_CASE(AArch64ISD::CMLEz)
+ MAKE_CASE(AArch64ISD::CMLTz)
+ MAKE_CASE(AArch64ISD::FCMEQz)
+ MAKE_CASE(AArch64ISD::FCMGEz)
+ MAKE_CASE(AArch64ISD::FCMGTz)
+ MAKE_CASE(AArch64ISD::FCMLEz)
+ MAKE_CASE(AArch64ISD::FCMLTz)
+ MAKE_CASE(AArch64ISD::SADDV)
+ MAKE_CASE(AArch64ISD::UADDV)
+ MAKE_CASE(AArch64ISD::SRHADD)
+ MAKE_CASE(AArch64ISD::URHADD)
+ MAKE_CASE(AArch64ISD::SMINV)
+ MAKE_CASE(AArch64ISD::UMINV)
+ MAKE_CASE(AArch64ISD::SMAXV)
+ MAKE_CASE(AArch64ISD::UMAXV)
+ MAKE_CASE(AArch64ISD::SMAXV_PRED)
+ MAKE_CASE(AArch64ISD::UMAXV_PRED)
+ MAKE_CASE(AArch64ISD::SMINV_PRED)
+ MAKE_CASE(AArch64ISD::UMINV_PRED)
+ MAKE_CASE(AArch64ISD::ORV_PRED)
+ MAKE_CASE(AArch64ISD::EORV_PRED)
+ MAKE_CASE(AArch64ISD::ANDV_PRED)
+ MAKE_CASE(AArch64ISD::CLASTA_N)
+ MAKE_CASE(AArch64ISD::CLASTB_N)
+ MAKE_CASE(AArch64ISD::LASTA)
+ MAKE_CASE(AArch64ISD::LASTB)
+ MAKE_CASE(AArch64ISD::REV)
+ MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
+ MAKE_CASE(AArch64ISD::TBL)
+ MAKE_CASE(AArch64ISD::FADD_PRED)
+ MAKE_CASE(AArch64ISD::FADDA_PRED)
+ MAKE_CASE(AArch64ISD::FADDV_PRED)
+ MAKE_CASE(AArch64ISD::FMA_PRED)
+ MAKE_CASE(AArch64ISD::FMAXV_PRED)
+ MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
+ MAKE_CASE(AArch64ISD::FMINV_PRED)
+ MAKE_CASE(AArch64ISD::FMINNMV_PRED)
+ MAKE_CASE(AArch64ISD::NOT)
+ MAKE_CASE(AArch64ISD::BIT)
+ MAKE_CASE(AArch64ISD::CBZ)
+ MAKE_CASE(AArch64ISD::CBNZ)
+ MAKE_CASE(AArch64ISD::TBZ)
+ MAKE_CASE(AArch64ISD::TBNZ)
+ MAKE_CASE(AArch64ISD::TC_RETURN)
+ MAKE_CASE(AArch64ISD::PREFETCH)
+ MAKE_CASE(AArch64ISD::SITOF)
+ MAKE_CASE(AArch64ISD::UITOF)
+ MAKE_CASE(AArch64ISD::NVCAST)
+ MAKE_CASE(AArch64ISD::SQSHL_I)
+ MAKE_CASE(AArch64ISD::UQSHL_I)
+ MAKE_CASE(AArch64ISD::SRSHR_I)
+ MAKE_CASE(AArch64ISD::URSHR_I)
+ MAKE_CASE(AArch64ISD::SQSHLU_I)
+ MAKE_CASE(AArch64ISD::WrapperLarge)
+ MAKE_CASE(AArch64ISD::LD2post)
+ MAKE_CASE(AArch64ISD::LD3post)
+ MAKE_CASE(AArch64ISD::LD4post)
+ MAKE_CASE(AArch64ISD::ST2post)
+ MAKE_CASE(AArch64ISD::ST3post)
+ MAKE_CASE(AArch64ISD::ST4post)
+ MAKE_CASE(AArch64ISD::LD1x2post)
+ MAKE_CASE(AArch64ISD::LD1x3post)
+ MAKE_CASE(AArch64ISD::LD1x4post)
+ MAKE_CASE(AArch64ISD::ST1x2post)
+ MAKE_CASE(AArch64ISD::ST1x3post)
+ MAKE_CASE(AArch64ISD::ST1x4post)
+ MAKE_CASE(AArch64ISD::LD1DUPpost)
+ MAKE_CASE(AArch64ISD::LD2DUPpost)
+ MAKE_CASE(AArch64ISD::LD3DUPpost)
+ MAKE_CASE(AArch64ISD::LD4DUPpost)
+ MAKE_CASE(AArch64ISD::LD1LANEpost)
+ MAKE_CASE(AArch64ISD::LD2LANEpost)
+ MAKE_CASE(AArch64ISD::LD3LANEpost)
+ MAKE_CASE(AArch64ISD::LD4LANEpost)
+ MAKE_CASE(AArch64ISD::ST2LANEpost)
+ MAKE_CASE(AArch64ISD::ST3LANEpost)
+ MAKE_CASE(AArch64ISD::ST4LANEpost)
+ MAKE_CASE(AArch64ISD::SMULL)
+ MAKE_CASE(AArch64ISD::UMULL)
+ MAKE_CASE(AArch64ISD::FRECPE)
+ MAKE_CASE(AArch64ISD::FRECPS)
+ MAKE_CASE(AArch64ISD::FRSQRTE)
+ MAKE_CASE(AArch64ISD::FRSQRTS)
+ MAKE_CASE(AArch64ISD::STG)
+ MAKE_CASE(AArch64ISD::STZG)
+ MAKE_CASE(AArch64ISD::ST2G)
+ MAKE_CASE(AArch64ISD::STZ2G)
+ MAKE_CASE(AArch64ISD::SUNPKHI)
+ MAKE_CASE(AArch64ISD::SUNPKLO)
+ MAKE_CASE(AArch64ISD::UUNPKHI)
+ MAKE_CASE(AArch64ISD::UUNPKLO)
+ MAKE_CASE(AArch64ISD::INSR)
+ MAKE_CASE(AArch64ISD::PTEST)
+ MAKE_CASE(AArch64ISD::PTRUE)
+ MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
+ MAKE_CASE(AArch64ISD::ST1_PRED)
+ MAKE_CASE(AArch64ISD::SST1_PRED)
+ MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
+ MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
+ MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
+ MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
+ MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
+ MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
+ MAKE_CASE(AArch64ISD::SSTNT1_PRED)
+ MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
+ MAKE_CASE(AArch64ISD::LDP)
+ MAKE_CASE(AArch64ISD::STP)
+ MAKE_CASE(AArch64ISD::STNP)
+ MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::INDEX_VECTOR)
+ }
+#undef MAKE_CASE
return nullptr;
}
@@ -1478,12 +1675,6 @@ MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
return BB;
}
-MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad(
- MachineInstr &MI, MachineBasicBlock *BB) const {
- MI.eraseFromParent();
- return BB;
-}
-
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
@@ -1502,8 +1693,6 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
case AArch64::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
- case AArch64::CATCHPAD:
- return EmitLoweredCatchPad(MI, BB);
}
}
@@ -1734,14 +1923,22 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
LHS = LHS.getOperand(1);
- } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
- !isUnsignedIntSetCC(CC)) {
- // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
- // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
- // of the signed comparisons.
- Opcode = AArch64ISD::ANDS;
- RHS = LHS.getOperand(1);
- LHS = LHS.getOperand(0);
+ } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
+ if (LHS.getOpcode() == ISD::AND) {
+ // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
+ // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
+ // of the signed comparisons.
+ const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
+ DAG.getVTList(VT, MVT_CC),
+ LHS.getOperand(0),
+ LHS.getOperand(1));
+ // Replace all users of (and X, Y) with newly generated (ands X, Y)
+ DAG.ReplaceAllUsesWith(LHS, ANDSNode);
+ return ANDSNode.getValue(1);
+ } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
+ // Use result of ANDS
+ return LHS.getValue(1);
+ }
}
return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
@@ -2331,15 +2528,6 @@ SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
}
-// Returns true if the given Op is the overflow flag result of an overflow
-// intrinsic operation.
-static bool isOverflowIntrOpRes(SDValue Op) {
- unsigned Opc = Op.getOpcode();
- return (Op.getResNo() == 1 &&
- (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
-}
-
static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
SDValue Sel = Op.getOperand(0);
SDValue Other = Op.getOperand(1);
@@ -2352,7 +2540,7 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
// (csel 1, 0, invert(cc), overflow_op_bool)
// ... which later gets transformed to just a cset instruction with an
// inverted condition code, rather than a cset + eor sequence.
- if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
+ if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
return SDValue();
@@ -2527,13 +2715,19 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
- if (SrcVal.getValueType() != MVT::f128) {
+ EVT SrcVT = SrcVal.getValueType();
+
+ if (SrcVT != MVT::f128) {
+ // Expand cases where the input is a vector bigger than NEON.
+ if (useSVEForFixedLengthVectorVT(SrcVT))
+ return SDValue();
+
// It's legal except when f128 is involved
return Op;
}
RTLIB::Libcall LC;
- LC = RTLIB::getFPROUND(SrcVal.getValueType(), Op.getValueType());
+ LC = RTLIB::getFPROUND(SrcVT, Op.getValueType());
// FP_ROUND node has a second operand indicating whether it is known to be
// precise. That doesn't take part in the LibCall so we can't directly use
@@ -2720,7 +2914,8 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
}
static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
- if (Op.getValueType() != MVT::f16)
+ EVT OpVT = Op.getValueType();
+ if (OpVT != MVT::f16 && OpVT != MVT::bf16)
return SDValue();
assert(Op.getOperand(0).getValueType() == MVT::i16);
@@ -2729,7 +2924,7 @@ static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
return SDValue(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
}
@@ -2858,16 +3053,19 @@ SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
- SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
- DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
- MVT::i64));
+ SDValue Chain = Op.getOperand(0);
+ SDValue FPCR_64 = DAG.getNode(
+ ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
+ {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
+ Chain = FPCR_64.getValue(1);
SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32));
- return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
- DAG.getConstant(3, dl, MVT::i32));
+ SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, dl, MVT::i32));
+ return DAG.getMergeValues({AND, Chain}, dl);
}
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
@@ -2939,6 +3137,12 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
+static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
+ int Pattern) {
+ return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
+ DAG.getTargetConstant(Pattern, DL, MVT::i32));
+}
+
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -3026,6 +3230,26 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_ptrue:
return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
Op.getOperand(1));
+ case Intrinsic::aarch64_sve_dupq_lane:
+ return LowerDUPQLane(Op, DAG);
+ case Intrinsic::aarch64_sve_convert_from_svbool:
+ return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_convert_to_svbool: {
+ EVT OutVT = Op.getValueType();
+ EVT InVT = Op.getOperand(1).getValueType();
+ // Return the operand if the cast isn't changing type,
+ // i.e. <n x 16 x i1> -> <n x 16 x i1>
+ if (InVT == OutVT)
+ return Op.getOperand(1);
+ // Otherwise, zero the newly introduced lanes.
+ SDValue Reinterpret =
+ DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Op.getOperand(1));
+ SDValue Mask = getPTrue(DAG, dl, InVT, AArch64SVEPredPattern::all);
+ SDValue MaskReinterpret =
+ DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Mask);
+ return DAG.getNode(ISD::AND, dl, OutVT, Reinterpret, MaskReinterpret);
+ }
case Intrinsic::aarch64_sve_insr: {
SDValue Scalar = Op.getOperand(2);
@@ -3058,6 +3282,29 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
"llvm.eh.recoverfp must take a function as the first argument");
return IncomingFPOp;
}
+
+ case Intrinsic::aarch64_neon_vsri:
+ case Intrinsic::aarch64_neon_vsli: {
+ EVT Ty = Op.getValueType();
+
+ if (!Ty.isVector())
+ report_fatal_error("Unexpected type for aarch64_neon_vsli");
+
+ assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
+
+ bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
+ unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
+ return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
+ Op.getOperand(3));
+ }
+
+ case Intrinsic::aarch64_neon_srhadd:
+ case Intrinsic::aarch64_neon_urhadd: {
+ bool IsSignedAdd = IntNo == Intrinsic::aarch64_neon_srhadd;
+ unsigned Opcode = IsSignedAdd ? AArch64ISD::SRHADD : AArch64ISD::URHADD;
+ return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
+ }
}
}
@@ -3112,10 +3359,13 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
EVT MemVT = StoreNode->getMemoryVT();
if (VT.isVector()) {
+ if (useSVEForFixedLengthVectorVT(VT))
+ return LowerFixedLengthVectorStoreToSVE(Op, DAG);
+
unsigned AS = StoreNode->getAddressSpace();
- unsigned Align = StoreNode->getAlignment();
- if (Align < MemVT.getStoreSize() &&
- !allowsMisalignedMemoryAccesses(MemVT, AS, Align,
+ Align Alignment = StoreNode->getAlign();
+ if (Alignment < MemVT.getStoreSize() &&
+ !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment.value(),
StoreNode->getMemOperand()->getFlags(),
nullptr)) {
return scalarizeVectorStore(StoreNode, DAG);
@@ -3124,6 +3374,30 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
if (StoreNode->isTruncatingStore()) {
return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
}
+ // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
+ // the custom lowering, as there are no un-paired non-temporal stores and
+ // legalization will break up 256 bit inputs.
+ if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
+ MemVT.getVectorElementCount().Min % 2u == 0 &&
+ ((MemVT.getScalarSizeInBits() == 8u ||
+ MemVT.getScalarSizeInBits() == 16u ||
+ MemVT.getScalarSizeInBits() == 32u ||
+ MemVT.getScalarSizeInBits() == 64u))) {
+ SDValue Lo =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
+ MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
+ StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
+ SDValue Hi = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, Dl,
+ MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
+ StoreNode->getValue(),
+ DAG.getConstant(MemVT.getVectorElementCount().Min / 2, Dl, MVT::i64));
+ SDValue Result = DAG.getMemIntrinsicNode(
+ AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
+ {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
+ StoreNode->getMemoryVT(), StoreNode->getMemOperand());
+ return Result;
+ }
} else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
assert(StoreNode->getValue()->getValueType(0) == MVT::i128);
SDValue Lo =
@@ -3194,11 +3468,15 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::UMULO:
return LowerXALUO(Op, DAG);
case ISD::FADD:
+ if (useSVEForFixedLengthVectorVT(Op.getValueType()))
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
case ISD::FSUB:
return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
case ISD::FMUL:
return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
+ case ISD::FMA:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
case ISD::FDIV:
return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
case ISD::FP_ROUND:
@@ -3226,6 +3504,20 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerSPLAT_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::INSERT_SUBVECTOR:
+ return LowerINSERT_SUBVECTOR(Op, DAG);
+ case ISD::SDIV:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED);
+ case ISD::UDIV:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED);
+ case ISD::SMIN:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1);
+ case ISD::UMIN:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_MERGE_OP1);
+ case ISD::SMAX:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_MERGE_OP1);
+ case ISD::UMAX:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_MERGE_OP1);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
@@ -3279,7 +3571,66 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::VSCALE:
+ return LowerVSCALE(Op, DAG);
+ case ISD::TRUNCATE:
+ return LowerTRUNCATE(Op, DAG);
+ case ISD::LOAD:
+ if (useSVEForFixedLengthVectorVT(Op.getValueType()))
+ return LowerFixedLengthVectorLoadToSVE(Op, DAG);
+ llvm_unreachable("Unexpected request to lower ISD::LOAD");
+ case ISD::ADD:
+ if (useSVEForFixedLengthVectorVT(Op.getValueType()))
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
+ llvm_unreachable("Unexpected request to lower ISD::ADD");
+ }
+}
+
+bool AArch64TargetLowering::useSVEForFixedLengthVectors() const {
+ // Prefer NEON unless larger SVE registers are available.
+ return Subtarget->hasSVE() && Subtarget->getMinSVEVectorSizeInBits() >= 256;
+}
+
+bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(EVT VT) const {
+ if (!useSVEForFixedLengthVectors())
+ return false;
+
+ if (!VT.isFixedLengthVector())
+ return false;
+
+ // Fixed length predicates should be promoted to i8.
+ // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
+ if (VT.getVectorElementType() == MVT::i1)
+ return false;
+
+ // Don't use SVE for vectors we cannot scalarize if required.
+ switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::f16:
+ case MVT::f32:
+ case MVT::f64:
+ break;
}
+
+ // Ensure NEON MVTs only belong to a single register class.
+ if (VT.getSizeInBits() <= 128)
+ return false;
+
+ // Don't use SVE for types that don't fit.
+ if (VT.getSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
+ return false;
+
+ // TODO: Perhaps an artificial restriction, but worth having whilst getting
+ // the base fixed length SVE support in place.
+ if (!VT.isPow2VectorType())
+ return false;
+
+ return true;
}
//===----------------------------------------------------------------------===//
@@ -3292,9 +3643,6 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
switch (CC) {
default:
report_fatal_error("Unsupported calling convention.");
- case CallingConv::AArch64_SVE_VectorCall:
- // Calling SVE functions is currently not yet supported.
- report_fatal_error("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
case CallingConv::GHC:
@@ -3317,6 +3665,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::CFGuard_Check:
return CC_AArch64_Win64_CFGuard_Check;
case CallingConv::AArch64_VectorCall:
+ case CallingConv::AArch64_SVE_VectorCall:
return CC_AArch64_AAPCS;
}
}
@@ -3404,7 +3753,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
RC = &AArch64::GPR32RegClass;
else if (RegVT == MVT::i64)
RC = &AArch64::GPR64RegClass;
- else if (RegVT == MVT::f16)
+ else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
RC = &AArch64::FPR16RegClass;
else if (RegVT == MVT::f32)
RC = &AArch64::FPR32RegClass;
@@ -3435,7 +3784,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
- llvm_unreachable("Spilling of SVE vectors not yet implemented");
+ break;
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
break;
@@ -3452,7 +3801,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
} else { // VA.isRegLoc()
assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
unsigned ArgOffset = VA.getLocMemOffset();
- unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
+ unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
+ ? VA.getLocVT().getSizeInBits()
+ : VA.getValVT().getSizeInBits()) / 8;
uint32_t BEAlign = 0;
if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
@@ -3478,7 +3829,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
- llvm_unreachable("Spilling of SVE vectors not yet implemented");
+ MemVT = VA.getLocVT();
+ break;
case CCValAssign::SExt:
ExtType = ISD::SEXTLOAD;
break;
@@ -3496,6 +3848,15 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
MemVT);
}
+
+ if (VA.getLocInfo() == CCValAssign::Indirect) {
+ assert(VA.getValVT().isScalableVector() &&
+ "Only scalable vectors can be passed indirectly");
+ // If value is passed via pointer - do a load.
+ ArgValue =
+ DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo());
+ }
+
if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
ArgValue, DAG.getValueType(MVT::i32));
@@ -3611,7 +3972,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
// The extra size here, if triggered, will always be 8.
MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
} else
- GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
+ GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
@@ -3643,7 +4004,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
int FPRIdx = 0;
if (FPRSaveSize != 0) {
- FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
+ FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
@@ -3764,6 +4125,13 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
CallingConv::ID CallerCC = CallerF.getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
+ // When using the Windows calling convention on a non-windows OS, we want
+ // to back up and restore X18 in such functions; we can't do a tail call
+ // from those functions.
+ if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
+ CalleeCC != CallingConv::Win64)
+ return false;
+
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible (see
// X86) but less efficient and uglier in LowerCall.
@@ -3856,6 +4224,18 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ // If any of the arguments is passed indirectly, it must be SVE, so the
+ // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
+ // allocate space on the stack. That is why we determine this explicitly here
+ // the call cannot be a tailcall.
+ if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
+ assert((A.getLocInfo() != CCValAssign::Indirect ||
+ A.getValVT().isScalableVector()) &&
+ "Expected value to be scalable");
+ return A.getLocInfo() == CCValAssign::Indirect;
+ }))
+ return false;
+
// If the stack arguments for this call do not fit into our own save area then
// the call cannot be made tail.
if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
@@ -3934,7 +4314,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Check if it's really possible to do a tail call.
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
- if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
+ if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
@@ -4044,7 +4424,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVector<SDValue, 8> MemOpChains;
auto PtrVT = getPointerTy(DAG.getDataLayout());
- if (IsVarArg && CLI.CS && CLI.CS.isMustTailCall()) {
+ if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
for (const auto &F : Forwards) {
SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
@@ -4096,7 +4476,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
- llvm_unreachable("Spilling of SVE vectors not yet implemented");
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
+ Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
+ int FI = MFI.CreateStackObject(
+ VA.getValVT().getStoreSize().getKnownMinSize(), Alignment, false);
+ MFI.setStackID(FI, TargetStackID::SVEVector);
+
+ SDValue SpillSlot = DAG.getFrameIndex(
+ FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
+ Chain = DAG.getStore(
+ Chain, DL, Arg, SpillSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ Arg = SpillSlot;
+ break;
}
if (VA.isRegLoc()) {
@@ -4132,7 +4525,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
RegsToPass.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.EnableDebugEntryValues)
+ if (Options.EmitCallSiteInfo)
CSInfo.emplace_back(VA.getLocReg(), i);
}
} else {
@@ -4144,8 +4537,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// FIXME: This works on big-endian for composite byvals, which are the
// common case. It should also work for fundamental types too.
uint32_t BEAlign = 0;
- unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
- : VA.getValVT().getSizeInBits();
+ unsigned OpSize;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ OpSize = VA.getLocVT().getSizeInBits();
+ else
+ OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
+ : VA.getValVT().getSizeInBits();
OpSize = (OpSize + 7) / 8;
if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
!Flags.isInConsecutiveRegs()) {
@@ -4181,10 +4578,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue SizeNode =
DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
SDValue Cpy = DAG.getMemcpy(
- Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
+ Chain, DL, DstAddr, Arg, SizeNode,
+ Outs[i].Flags.getNonZeroByValAlign(),
/*isVol = */ false, /*AlwaysInline = */ false,
- /*isTailCall = */ false,
- DstInfo, MachinePointerInfo());
+ /*isTailCall = */ false, DstInfo, MachinePointerInfo());
MemOpChains.push_back(Cpy);
} else {
@@ -4318,6 +4715,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
InFlag = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
@@ -4483,7 +4881,7 @@ SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
- return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
N->getOffset(), Flag);
}
@@ -4974,7 +5372,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
// instruction.
- if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
+ if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
@@ -5058,8 +5456,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
Cmp);
}
- assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
- LHS.getValueType() == MVT::f64);
+ assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 ||
+ LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two branches to implement.
@@ -5185,6 +5583,15 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::i64)
UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
return UaddLV;
+ } else if (VT == MVT::i128) {
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);
+
+ SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
+ SDValue UaddLV = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+ DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
}
assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
@@ -5504,9 +5911,17 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
SDValue FVal = Op->getOperand(2);
SDLoc DL(Op);
+ EVT Ty = Op.getValueType();
+ if (Ty.isScalableVector()) {
+ SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
+ MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());
+ SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
+ return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
+ }
+
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
// instruction.
- if (isOverflowIntrOpRes(CCVal)) {
+ if (ISD::isOverflowIntrOpRes(CCVal)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
return SDValue();
@@ -5717,9 +6132,9 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
- DAG.getConstant(VaListSize, DL, MVT::i32), PtrSize,
- false, false, false, MachinePointerInfo(DestSV),
- MachinePointerInfo(SrcSV));
+ DAG.getConstant(VaListSize, DL, MVT::i32),
+ Align(PtrSize), false, false, false,
+ MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
}
SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
@@ -5731,7 +6146,7 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
- unsigned Align = Op.getConstantOperandVal(3);
+ MaybeAlign Align(Op.getConstantOperandVal(3));
unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
auto PtrVT = getPointerTy(DAG.getDataLayout());
auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
@@ -5740,12 +6155,11 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
Chain = VAList.getValue(1);
VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
- if (Align > MinSlotSize) {
- assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
+ if (Align && *Align > MinSlotSize) {
VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
- DAG.getConstant(Align - 1, DL, PtrVT));
+ DAG.getConstant(Align->value() - 1, DL, PtrVT));
VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
- DAG.getConstant(-(int64_t)Align, DL, PtrVT));
+ DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
}
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
@@ -7076,7 +7490,8 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
// vrev <4 x i16> -> REV32
if (VT.getVectorElementType() == MVT::i16 ||
- VT.getVectorElementType() == MVT::f16)
+ VT.getVectorElementType() == MVT::f16 ||
+ VT.getVectorElementType() == MVT::bf16)
return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
// vrev <4 x i8> -> REV16
assert(VT.getVectorElementType() == MVT::i8);
@@ -7089,7 +7504,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
unsigned Opcode;
if (EltTy == MVT::i8)
Opcode = AArch64ISD::DUPLANE8;
- else if (EltTy == MVT::i16 || EltTy == MVT::f16)
+ else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
Opcode = AArch64ISD::DUPLANE16;
else if (EltTy == MVT::i32 || EltTy == MVT::f32)
Opcode = AArch64ISD::DUPLANE32;
@@ -7196,7 +7611,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
static unsigned getDUPLANEOp(EVT EltType) {
if (EltType == MVT::i8)
return AArch64ISD::DUPLANE8;
- if (EltType == MVT::i16 || EltType == MVT::f16)
+ if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
return AArch64ISD::DUPLANE16;
if (EltType == MVT::i32 || EltType == MVT::f32)
return AArch64ISD::DUPLANE32;
@@ -7405,18 +7820,16 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
// Extend input splat value where needed to fit into a GPR (32b or 64b only)
// FPRs don't have this restriction.
switch (ElemVT.getSimpleVT().SimpleTy) {
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
- return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
- case MVT::i64:
- SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
- return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
case MVT::i1: {
+ // The only legal i1 vectors are SVE vectors, so we can use SVE-specific
+ // lowering code.
+ if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
+ if (ConstVal->isOne())
+ return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
+ // TODO: Add special case for constant false
+ }
// The general case of i1. There isn't any natural way to do this,
// so we use some trickery with whilelo.
- // TODO: Add special cases for splat of constant true/false.
SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
DAG.getValueType(MVT::i1));
@@ -7425,15 +7838,76 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID,
DAG.getConstant(0, dl, MVT::i64), SplatVal);
}
- // TODO: we can support float types, but haven't added patterns yet.
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
+ break;
+ case MVT::i64:
+ SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
+ break;
case MVT::f16:
+ case MVT::bf16:
case MVT::f32:
case MVT::f64:
+ // Fine as is
+ break;
default:
report_fatal_error("Unsupported SPLAT_VECTOR input operand type");
}
+
+ return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
}
+SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
+ EVT VT = Op.getValueType();
+ if (!isTypeLegal(VT) || !VT.isScalableVector())
+ return SDValue();
+
+ // Current lowering only supports the SVE-ACLE types.
+ if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
+ return SDValue();
+
+ // The DUPQ operation is indepedent of element type so normalise to i64s.
+ SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
+ SDValue Idx128 = Op.getOperand(2);
+
+ // DUPQ can be used when idx is in range.
+ auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
+ if (CIdx && (CIdx->getZExtValue() <= 3)) {
+ SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
+ SDNode *DUPQ =
+ DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI);
+ return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0));
+ }
+
+ // The ACLE says this must produce the same result as:
+ // svtbl(data, svadd_x(svptrue_b64(),
+ // svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
+ // index * 2))
+ SDValue One = DAG.getConstant(1, DL, MVT::i64);
+ SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
+
+ // create the vector 0,1,0,1,...
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ SDValue SV = DAG.getNode(AArch64ISD::INDEX_VECTOR,
+ DL, MVT::nxv2i64, Zero, One);
+ SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
+
+ // create the vector idx64,idx64+1,idx64,idx64+1,...
+ SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
+ SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
+ SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);
+
+ // create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
+ SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
+}
+
+
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
APInt &UndefBits) {
EVT VT = BVN->getValueType(0);
@@ -7684,8 +8158,10 @@ static unsigned getIntrinsicID(const SDNode *N) {
// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
-// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
-// Also, logical shift right -> sri, with the same structure.
+// BUILD_VECTORs with constant element C1, C2 is a constant, and:
+// - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
+// - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
+// The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
@@ -7694,49 +8170,70 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
- // Is the first op an AND?
- const SDValue And = N->getOperand(0);
- if (And.getOpcode() != ISD::AND)
+ SDValue And;
+ SDValue Shift;
+
+ SDValue FirstOp = N->getOperand(0);
+ unsigned FirstOpc = FirstOp.getOpcode();
+ SDValue SecondOp = N->getOperand(1);
+ unsigned SecondOpc = SecondOp.getOpcode();
+
+ // Is one of the operands an AND or a BICi? The AND may have been optimised to
+ // a BICi in order to use an immediate instead of a register.
+ // Is the other operand an shl or lshr? This will have been turned into:
+ // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
+ if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) &&
+ (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) {
+ And = FirstOp;
+ Shift = SecondOp;
+
+ } else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) &&
+ (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) {
+ And = SecondOp;
+ Shift = FirstOp;
+ } else
return SDValue();
- // Is the second op an shl or lshr?
- SDValue Shift = N->getOperand(1);
- // This will have been turned into: AArch64ISD::VSHL vector, #shift
- // or AArch64ISD::VLSHR vector, #shift
- unsigned ShiftOpc = Shift.getOpcode();
- if ((ShiftOpc != AArch64ISD::VSHL && ShiftOpc != AArch64ISD::VLSHR))
- return SDValue();
- bool IsShiftRight = ShiftOpc == AArch64ISD::VLSHR;
+ bool IsAnd = And.getOpcode() == ISD::AND;
+ bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;
// Is the shift amount constant?
ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
if (!C2node)
return SDValue();
- // Is the and mask vector all constant?
uint64_t C1;
- if (!isAllConstantBuildVector(And.getOperand(1), C1))
- return SDValue();
+ if (IsAnd) {
+ // Is the and mask vector all constant?
+ if (!isAllConstantBuildVector(And.getOperand(1), C1))
+ return SDValue();
+ } else {
+ // Reconstruct the corresponding AND immediate from the two BICi immediates.
+ ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
+ ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
+ assert(C1nodeImm && C1nodeShift);
+ C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
+ }
- // Is C1 == ~C2, taking into account how much one can shift elements of a
- // particular size?
+ // Is C1 == ~(Ones(ElemSizeInBits) << C2) or
+ // C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
+ // how much one can shift elements of a particular size?
uint64_t C2 = C2node->getZExtValue();
unsigned ElemSizeInBits = VT.getScalarSizeInBits();
if (C2 > ElemSizeInBits)
return SDValue();
- unsigned ElemMask = (1 << ElemSizeInBits) - 1;
- if ((C1 & ElemMask) != (~C2 & ElemMask))
+
+ APInt C1AsAPInt(ElemSizeInBits, C1);
+ APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
+ : APInt::getLowBitsSet(ElemSizeInBits, C2);
+ if (C1AsAPInt != RequiredC1)
return SDValue();
SDValue X = And.getOperand(0);
SDValue Y = Shift.getOperand(0);
- unsigned Intrin =
- IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
- SDValue ResultSLI =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
- Shift.getOperand(1));
+ unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
+ SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
LLVM_DEBUG(N->dump(&DAG));
@@ -7750,10 +8247,8 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SelectionDAG &DAG) const {
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
- if (EnableAArch64SlrGeneration) {
- if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
- return Res;
- }
+ if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
+ return Res;
EVT VT = Op.getValueType();
@@ -8041,8 +8536,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
EVT EltTy = VT.getVectorElementType();
- assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
- "Unsupported floating-point vector type");
+ assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
+ EltTy == MVT::f64) && "Unsupported floating-point vector type");
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n");
@@ -8161,11 +8656,12 @@ SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
- VT == MVT::v8f16)
+ VT == MVT::v8f16 || VT == MVT::v8bf16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
- VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
+ VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
+ VT != MVT::v4bf16)
return SDValue();
// For V64 types, we perform insertion by expanding the value
@@ -8195,11 +8691,12 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
- VT == MVT::v8f16)
+ VT == MVT::v8f16 || VT == MVT::v8bf16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
- VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
+ VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
+ VT != MVT::v4bf16)
return SDValue();
// For V64 types, we perform extraction by expanding the value
@@ -8219,32 +8716,57 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
- EVT VT = Op.getOperand(0).getValueType();
- SDLoc dl(Op);
- // Just in case...
- if (!VT.isVector())
- return SDValue();
-
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!Cst)
- return SDValue();
- unsigned Val = Cst->getZExtValue();
+ assert(Op.getValueType().isFixedLengthVector() &&
+ "Only cases that extract a fixed length vector are supported!");
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Size = Op.getValueSizeInBits();
+ if (InVT.isScalableVector()) {
+ // This will be matched by custom code during ISelDAGToDAG.
+ if (Idx == 0 && isPackedVectorType(InVT, DAG))
+ return Op;
+
+ return SDValue();
+ }
+
// This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
- if (Val == 0)
+ if (Idx == 0 && InVT.getSizeInBits() <= 128)
return Op;
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
- if (Size == 64 && Val * VT.getScalarSizeInBits() == 64)
+ if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64)
+ return Op;
+
+ return SDValue();
+}
+
+SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getValueType().isScalableVector() &&
+ "Only expect to lower inserts into scalable vectors!");
+
+ EVT InVT = Op.getOperand(1).getValueType();
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+
+ // We don't have any patterns for scalable vector yet.
+ if (InVT.isScalableVector() || !useSVEForFixedLengthVectorVT(InVT))
+ return SDValue();
+
+ // This will be matched by custom code during ISelDAGToDAG.
+ if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
return Op;
return SDValue();
}
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
+ // Currently no fixed length shuffles that require SVE are legal.
+ if (useSVEForFixedLengthVectorVT(VT))
+ return false;
+
if (VT.getVectorNumElements() == 4 &&
(VT.is128BitVector() || VT.is64BitVector())) {
unsigned PFIndexes[4];
@@ -8324,6 +8846,81 @@ static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
}
+// Attempt to form urhadd(OpA, OpB) from
+// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1)).
+// The original form of this expression is
+// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and before this function
+// is called the srl will have been lowered to AArch64ISD::VLSHR and the
+// ((OpA + OpB + 1) >> 1) expression will have been changed to (OpB - (~OpA)).
+// This pass can also recognize a variant of this pattern that uses sign
+// extension instead of zero extension and form a srhadd(OpA, OpB) from it.
+SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ if (!VT.isVector() || VT.isScalableVector())
+ return Op;
+
+ if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
+ return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
+
+ // Since we are looking for a right shift by a constant value of 1 and we are
+ // operating on types at least 16 bits in length (sign/zero extended OpA and
+ // OpB, which are at least 8 bits), it follows that the truncate will always
+ // discard the shifted-in bit and therefore the right shift will be logical
+ // regardless of the signedness of OpA and OpB.
+ SDValue Shift = Op.getOperand(0);
+ if (Shift.getOpcode() != AArch64ISD::VLSHR)
+ return Op;
+
+ // Is the right shift using an immediate value of 1?
+ uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
+ if (ShiftAmount != 1)
+ return Op;
+
+ SDValue Sub = Shift->getOperand(0);
+ if (Sub.getOpcode() != ISD::SUB)
+ return Op;
+
+ SDValue Xor = Sub.getOperand(1);
+ if (Xor.getOpcode() != ISD::XOR)
+ return Op;
+
+ SDValue ExtendOpA = Xor.getOperand(0);
+ SDValue ExtendOpB = Sub.getOperand(0);
+ unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
+ unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
+ if (!(ExtendOpAOpc == ExtendOpBOpc &&
+ (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
+ return Op;
+
+ // Is the result of the right shift being truncated to the same value type as
+ // the original operands, OpA and OpB?
+ SDValue OpA = ExtendOpA.getOperand(0);
+ SDValue OpB = ExtendOpB.getOperand(0);
+ EVT OpAVT = OpA.getValueType();
+ assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
+ if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
+ return Op;
+
+ // Is the XOR using a constant amount of all ones in the right hand side?
+ uint64_t C;
+ if (!isAllConstantBuildVector(Xor.getOperand(1), C))
+ return Op;
+
+ unsigned ElemSizeInBits = VT.getScalarSizeInBits();
+ APInt CAsAPInt(ElemSizeInBits, C);
+ if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
+ return Op;
+
+ SDLoc DL(Op);
+ bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
+ unsigned RHADDOpc = IsSignExtend ? AArch64ISD::SRHADD : AArch64ISD::URHADD;
+ SDValue ResultURHADD = DAG.getNode(RHADDOpc, DL, VT, OpA, OpB);
+
+ return ResultURHADD;
+}
+
SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -8339,6 +8936,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
llvm_unreachable("unexpected shift opcode");
case ISD::SHL:
+ if (VT.isScalableVector())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_MERGE_OP1);
+
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
DAG.getConstant(Cnt, DL, MVT::i32));
@@ -8348,6 +8948,12 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
Op.getOperand(0), Op.getOperand(1));
case ISD::SRA:
case ISD::SRL:
+ if (VT.isScalableVector()) {
+ unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_MERGE_OP1
+ : AArch64ISD::SRL_MERGE_OP1;
+ return LowerToPredicatedOp(Op, DAG, Opc);
+ }
+
// Right shift immediate
if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
unsigned Opc =
@@ -8470,6 +9076,12 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
SelectionDAG &DAG) const {
+ if (Op.getValueType().isScalableVector()) {
+ if (Op.getOperand(0).getValueType().isFloatingPoint())
+ return Op;
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
+ }
+
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -8645,7 +9257,8 @@ AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ MaybeAlign Align =
+ cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
EVT VT = Node->getValueType(0);
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
@@ -8655,7 +9268,7 @@ AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
if (Align)
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
- DAG.getConstant(-(uint64_t)Align, dl, VT));
+ DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
SDValue Ops[2] = {SP, Chain};
return DAG.getMergeValues(Ops, dl);
@@ -8670,7 +9283,7 @@ AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
if (Align)
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
- DAG.getConstant(-(uint64_t)Align, dl, VT));
+ DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
@@ -8680,6 +9293,41 @@ AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops, dl);
}
+SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(VT != MVT::i64 && "Expected illegal VSCALE node");
+
+ SDLoc DL(Op);
+ APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
+ return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)),
+ DL, VT);
+}
+
+/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
+template <unsigned NumVecs>
+static bool setInfoSVEStN(AArch64TargetLowering::IntrinsicInfo &Info,
+ const CallInst &CI) {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Retrieve EC from first vector argument.
+ const EVT VT = EVT::getEVT(CI.getArgOperand(0)->getType());
+ ElementCount EC = VT.getVectorElementCount();
+#ifndef NDEBUG
+ // Check the assumption that all input vectors are the same type.
+ for (unsigned I = 0; I < NumVecs; ++I)
+ assert(VT == EVT::getEVT(CI.getArgOperand(I)->getType()) &&
+ "Invalid type.");
+#endif
+ // memVT is `NumVecs * VT`.
+ Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
+ EC * NumVecs);
+ Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
+ Info.offset = 0;
+ Info.align.reset();
+ Info.flags = MachineMemOperand::MOStore;
+ return true;
+}
+
/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
/// specified in the intrinsic calls.
@@ -8689,6 +9337,12 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
unsigned Intrinsic) const {
auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
+ case Intrinsic::aarch64_sve_st2:
+ return setInfoSVEStN<2>(Info, I);
+ case Intrinsic::aarch64_sve_st3:
+ return setInfoSVEStN<3>(Info, I);
+ case Intrinsic::aarch64_sve_st4:
+ return setInfoSVEStN<4>(Info, I);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
@@ -8745,7 +9399,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
+ Info.align = DL.getABITypeAlign(PtrTy->getElementType());
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
@@ -8756,7 +9410,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
+ Info.align = DL.getABITypeAlign(PtrTy->getElementType());
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
@@ -8781,21 +9435,25 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::aarch64_sve_ldnt1: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.memVT = MVT::getVT(I.getType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
- Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
+ Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+ Info.flags = MachineMemOperand::MOLoad;
+ if (Intrinsic == Intrinsic::aarch64_sve_ldnt1)
+ Info.flags |= MachineMemOperand::MONonTemporal;
return true;
}
case Intrinsic::aarch64_sve_stnt1: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.memVT = MVT::getVT(I.getOperand(0)->getType());
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
- Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
- Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
+ Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+ Info.flags = MachineMemOperand::MOStore;
+ if (Intrinsic == Intrinsic::aarch64_sve_stnt1)
+ Info.flags |= MachineMemOperand::MONonTemporal;
return true;
}
default:
@@ -8970,21 +9628,22 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
/// or upper half of the vector elements.
static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
- auto *FullVT = cast<VectorType>(FullV->getType());
- auto *HalfVT = cast<VectorType>(HalfV->getType());
- return FullVT->getBitWidth() == 2 * HalfVT->getBitWidth();
+ auto *FullTy = FullV->getType();
+ auto *HalfTy = HalfV->getType();
+ return FullTy->getPrimitiveSizeInBits().getFixedSize() ==
+ 2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
};
auto extractHalf = [](Value *FullV, Value *HalfV) {
- auto *FullVT = cast<VectorType>(FullV->getType());
- auto *HalfVT = cast<VectorType>(HalfV->getType());
+ auto *FullVT = cast<FixedVectorType>(FullV->getType());
+ auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
};
- Constant *M1, *M2;
+ ArrayRef<int> M1, M2;
Value *S1Op1, *S2Op1;
- if (!match(Op1, m_ShuffleVector(m_Value(S1Op1), m_Undef(), m_Constant(M1))) ||
- !match(Op2, m_ShuffleVector(m_Value(S2Op1), m_Undef(), m_Constant(M2))))
+ if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) ||
+ !match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
return false;
// Check that the operands are half as wide as the result and we extract
@@ -8997,7 +9656,7 @@ static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
// elements.
int M1Start = -1;
int M2Start = -1;
- int NumElements = cast<VectorType>(Op1->getType())->getNumElements() * 2;
+ int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
!ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
@@ -9023,6 +9682,22 @@ static bool areExtractExts(Value *Ext1, Value *Ext2) {
return true;
}
+/// Check if Op could be used with vmull_high_p64 intrinsic.
+static bool isOperandOfVmullHighP64(Value *Op) {
+ Value *VectorOperand = nullptr;
+ ConstantInt *ElementIndex = nullptr;
+ return match(Op, m_ExtractElt(m_Value(VectorOperand),
+ m_ConstantInt(ElementIndex))) &&
+ ElementIndex->getValue() == 1 &&
+ isa<FixedVectorType>(VectorOperand->getType()) &&
+ cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
+}
+
+/// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
+static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
+ return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
+}
+
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
@@ -9039,6 +9714,15 @@ bool AArch64TargetLowering::shouldSinkOperands(
Ops.push_back(&II->getOperandUse(0));
Ops.push_back(&II->getOperandUse(1));
return true;
+
+ case Intrinsic::aarch64_neon_pmull64:
+ if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
+ II->getArgOperand(1)))
+ return false;
+ Ops.push_back(&II->getArgOperandUse(0));
+ Ops.push_back(&II->getArgOperandUse(1));
+ return true;
+
default:
return false;
}
@@ -9071,12 +9755,12 @@ bool AArch64TargetLowering::shouldSinkOperands(
}
bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
- unsigned &RequiredAligment) const {
+ Align &RequiredAligment) const {
if (!LoadedType.isSimple() ||
(!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
return false;
// Cyclone supports unaligned accesses.
- RequiredAligment = 0;
+ RequiredAligment = Align(1);
unsigned NumBits = LoadedType.getSizeInBits();
return NumBits == 32 || NumBits == 64;
}
@@ -9090,7 +9774,7 @@ AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
}
MachineMemOperand::Flags
-AArch64TargetLowering::getMMOFlags(const Instruction &I) const {
+AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
return MOStridedAccess;
@@ -9104,7 +9788,7 @@ bool AArch64TargetLowering::isLegalInterleavedAccessType(
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
// Ensure the number of vector elements is greater than 1.
- if (VecTy->getNumElements() < 2)
+ if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
return false;
// Ensure the element type is legal.
@@ -9138,22 +9822,24 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
const DataLayout &DL = LI->getModule()->getDataLayout();
- VectorType *VecTy = Shuffles[0]->getType();
+ VectorType *VTy = Shuffles[0]->getType();
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
- if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
return false;
- unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
+ unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
+
+ auto *FVTy = cast<FixedVectorType>(VTy);
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
- Type *EltTy = VecTy->getVectorElementType();
+ Type *EltTy = FVTy->getElementType();
if (EltTy->isPointerTy())
- VecTy =
- VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
+ FVTy =
+ FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
IRBuilder<> Builder(LI);
@@ -9163,19 +9849,19 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
if (NumLoads > 1) {
// If we're going to generate more than one load, reset the sub-vector type
// to something legal.
- VecTy = VectorType::get(VecTy->getVectorElementType(),
- VecTy->getVectorNumElements() / NumLoads);
+ FVTy = FixedVectorType::get(FVTy->getElementType(),
+ FVTy->getNumElements() / NumLoads);
// We will compute the pointer operand of each load from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
- BaseAddr, VecTy->getVectorElementType()->getPointerTo(
- LI->getPointerAddressSpace()));
+ BaseAddr,
+ FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
}
- Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
- Type *Tys[2] = {VecTy, PtrTy};
+ Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
+ Type *Tys[2] = {FVTy, PtrTy};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
Intrinsic::aarch64_neon_ld3,
Intrinsic::aarch64_neon_ld4};
@@ -9192,9 +9878,8 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
- BaseAddr =
- Builder.CreateConstGEP1_32(VecTy->getVectorElementType(), BaseAddr,
- VecTy->getVectorNumElements() * Factor);
+ BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
+ FVTy->getNumElements() * Factor);
CallInst *LdN = Builder.CreateCall(
LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
@@ -9209,8 +9894,8 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
SubVec = Builder.CreateIntToPtr(
- SubVec, VectorType::get(SVI->getType()->getVectorElementType(),
- VecTy->getVectorNumElements()));
+ SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
+ FVTy->getNumElements()));
SubVecs[SVI].push_back(SubVec);
}
}
@@ -9261,13 +9946,12 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
- VectorType *VecTy = SVI->getType();
- assert(VecTy->getVectorNumElements() % Factor == 0 &&
- "Invalid interleaved store");
+ auto *VecTy = cast<FixedVectorType>(SVI->getType());
+ assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
- unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
- Type *EltTy = VecTy->getVectorElementType();
- VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
+ unsigned LaneLen = VecTy->getNumElements() / Factor;
+ Type *EltTy = VecTy->getElementType();
+ auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
@@ -9287,14 +9971,15 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// vectors to integer vectors.
if (EltTy->isPointerTy()) {
Type *IntTy = DL.getIntPtrType(EltTy);
- unsigned NumOpElts = Op0->getType()->getVectorNumElements();
+ unsigned NumOpElts =
+ cast<FixedVectorType>(Op0->getType())->getNumElements();
// Convert to the corresponding integer vector.
- Type *IntVecTy = VectorType::get(IntTy, NumOpElts);
+ auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
- SubVecTy = VectorType::get(IntTy, LaneLen);
+ SubVecTy = FixedVectorType::get(IntTy, LaneLen);
}
// The base address of the store.
@@ -9304,14 +9989,14 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// If we're going to generate more than one store, reset the lane length
// and sub-vector type to something legal.
LaneLen /= NumStores;
- SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
+ SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
// We will compute the pointer operand of each store from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
- BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
- SI->getPointerAddressSpace()));
+ BaseAddr,
+ SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
}
auto Mask = SVI->getShuffleMask();
@@ -9333,7 +10018,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
unsigned IdxI = StoreCount * LaneLen * Factor + i;
if (Mask[IdxI] >= 0) {
Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
+ Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
} else {
unsigned StartMask = 0;
for (unsigned j = 1; j < LaneLen; j++) {
@@ -9349,14 +10034,14 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// Note: StartMask cannot be negative, it's checked in
// isReInterleaveMask
Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
+ Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
}
}
// If we generating more than one store, we compute the base address of
// subsequent stores as an offset from the previous.
if (StoreCount > 0)
- BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getVectorElementType(),
+ BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
BaseAddr, LaneLen * Factor);
Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
@@ -9365,16 +10050,59 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
return true;
}
-static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
- unsigned AlignCheck) {
- return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
- (DstAlign == 0 || DstAlign % AlignCheck == 0));
+// Lower an SVE structured load intrinsic returning a tuple type to target
+// specific intrinsic taking the same input but returning a multi-result value
+// of the split tuple type.
+//
+// E.g. Lowering an LD3:
+//
+// call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32(
+// <vscale x 4 x i1> %pred,
+// <vscale x 4 x i32>* %addr)
+//
+// Output DAG:
+//
+// t0: ch = EntryToken
+// t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0
+// t4: i64,ch = CopyFromReg t0, Register:i64 %1
+// t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4
+// t6: nxv12i32 = concat_vectors t5, t5:1, t5:2
+//
+// This is called pre-legalization to avoid widening/splitting issues with
+// non-power-of-2 tuple types used for LD3, such as nxv12i32.
+SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
+ ArrayRef<SDValue> LoadOps,
+ EVT VT, SelectionDAG &DAG,
+ const SDLoc &DL) const {
+ assert(VT.isScalableVector() && "Can only lower scalable vectors");
+
+ unsigned N, Opcode;
+ static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
+ {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
+ {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
+ {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
+
+ std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
+ assert(VT.getVectorElementCount().Min % N == 0 &&
+ "invalid tuple vector type!");
+
+ EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorElementCount() / N);
+ assert(isTypeLegal(SplitVT));
+
+ SmallVector<EVT, 5> VTs(N, SplitVT);
+ VTs.push_back(MVT::Other); // Chain
+ SDVTList NodeTys = DAG.getVTList(VTs);
+
+ SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps);
+ SmallVector<SDValue, 4> PseudoLoadOps;
+ for (unsigned I = 0; I < N; ++I)
+ PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps);
}
EVT AArch64TargetLowering::getOptimalMemOpType(
- uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes) const {
bool CanImplicitFloat =
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
@@ -9382,9 +10110,9 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
// taken one instruction to materialize the v2i64 zero and one store (with
// restrictive addressing mode). Just do i64 stores.
- bool IsSmallMemset = IsMemset && Size < 32;
- auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
- if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
+ bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
+ auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
+ if (Op.isAligned(AlignCheck))
return true;
bool Fast;
return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
@@ -9392,22 +10120,20 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
Fast;
};
- if (CanUseNEON && IsMemset && !IsSmallMemset &&
- AlignmentIsAcceptable(MVT::v2i64, 16))
+ if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
+ AlignmentIsAcceptable(MVT::v2i64, Align(16)))
return MVT::v2i64;
- if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
+ if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
return MVT::f128;
- if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
+ if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
return MVT::i64;
- if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
+ if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
return MVT::i32;
return MVT::Other;
}
LLT AArch64TargetLowering::getOptimalMemOpLLT(
- uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes) const {
bool CanImplicitFloat =
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
@@ -9415,9 +10141,9 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
// taken one instruction to materialize the v2i64 zero and one store (with
// restrictive addressing mode). Just do i64 stores.
- bool IsSmallMemset = IsMemset && Size < 32;
- auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
- if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
+ bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
+ auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
+ if (Op.isAligned(AlignCheck))
return true;
bool Fast;
return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
@@ -9425,14 +10151,14 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
Fast;
};
- if (CanUseNEON && IsMemset && !IsSmallMemset &&
- AlignmentIsAcceptable(MVT::v2i64, 16))
+ if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
+ AlignmentIsAcceptable(MVT::v2i64, Align(16)))
return LLT::vector(2, 64);
- if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
+ if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
return LLT::scalar(128);
- if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
+ if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
return LLT::scalar(64);
- if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
+ if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
return LLT::scalar(32);
return LLT();
}
@@ -9479,6 +10205,10 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
return false;
+ // FIXME: Update this method to support scalable addressing modes.
+ if (isa<ScalableVectorType>(Ty))
+ return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale;
+
// check reg + imm case:
// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
uint64_t NumBytes = 0;
@@ -10185,7 +10915,7 @@ static SDValue tryCombineToBSL(SDNode *N,
}
if (FoundMatch)
- return DAG.getNode(AArch64ISD::BSL, DL, VT, SDValue(BVN0, 0),
+ return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
N0->getOperand(1 - i), N1->getOperand(1 - j));
}
@@ -10242,29 +10972,81 @@ static SDValue performSVEAndCombine(SDNode *N,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ SelectionDAG &DAG = DCI.DAG;
SDValue Src = N->getOperand(0);
+ unsigned Opc = Src->getOpcode();
+
+ // Zero/any extend of an unsigned unpack
+ if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
+ SDValue UnpkOp = Src->getOperand(0);
+ SDValue Dup = N->getOperand(1);
+
+ if (Dup.getOpcode() != AArch64ISD::DUP)
+ return SDValue();
+
+ SDLoc DL(N);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
+ uint64_t ExtVal = C->getZExtValue();
+
+ // If the mask is fully covered by the unpack, we don't need to push
+ // a new AND onto the operand
+ EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
+ if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
+ (ExtVal == 0xFFFF && EltTy == MVT::i16) ||
+ (ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
+ return Src;
+
+ // Truncate to prevent a DUP with an over wide constant
+ APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
+
+ // Otherwise, make sure we propagate the AND to the operand
+ // of the unpack
+ Dup = DAG.getNode(AArch64ISD::DUP, DL,
+ UnpkOp->getValueType(0),
+ DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
+
+ SDValue And = DAG.getNode(ISD::AND, DL,
+ UnpkOp->getValueType(0), UnpkOp, Dup);
+
+ return DAG.getNode(Opc, DL, N->getValueType(0), And);
+ }
+
SDValue Mask = N->getOperand(1);
if (!Src.hasOneUse())
return SDValue();
- // GLD1* instructions perform an implicit zero-extend, which makes them
+ EVT MemVT;
+
+ // SVE load instructions perform an implicit zero-extend, which makes them
// perfect candidates for combining.
- switch (Src->getOpcode()) {
- case AArch64ISD::GLD1:
- case AArch64ISD::GLD1_SCALED:
- case AArch64ISD::GLD1_SXTW:
- case AArch64ISD::GLD1_SXTW_SCALED:
- case AArch64ISD::GLD1_UXTW:
- case AArch64ISD::GLD1_UXTW_SCALED:
- case AArch64ISD::GLD1_IMM:
+ switch (Opc) {
+ case AArch64ISD::LD1_MERGE_ZERO:
+ case AArch64ISD::LDNF1_MERGE_ZERO:
+ case AArch64ISD::LDFF1_MERGE_ZERO:
+ MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
+ break;
+ case AArch64ISD::GLD1_MERGE_ZERO:
+ case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
+ case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
+ case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLD1_IMM_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
+ case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
+ case AArch64ISD::GLDNT1_MERGE_ZERO:
+ MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
break;
default:
return SDValue();
}
- EVT MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
-
if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
return Src;
@@ -10348,6 +11130,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
SDLoc dl(N);
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
// Optimize concat_vectors of truncated vectors, where the intermediate
// type is illegal, to avoid said illegality, e.g.,
@@ -10360,9 +11143,8 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
// on both input and result type, so we might generate worse code.
// On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
- if (N->getNumOperands() == 2 &&
- N0->getOpcode() == ISD::TRUNCATE &&
- N1->getOpcode() == ISD::TRUNCATE) {
+ if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
+ N1Opc == ISD::TRUNCATE) {
SDValue N00 = N0->getOperand(0);
SDValue N10 = N1->getOperand(0);
EVT N00VT = N00.getValueType();
@@ -10387,6 +11169,52 @@ static SDValue performConcatVectorsCombine(SDNode *N,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ // Optimise concat_vectors of two [us]rhadds that use extracted subvectors
+ // from the same original vectors. Combine these into a single [us]rhadd that
+ // operates on the two original vectors. Example:
+ // (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
+ // extract_subvector (v16i8 OpB,
+ // <0>))),
+ // (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>),
+ // extract_subvector (v16i8 OpB,
+ // <8>)))))
+ // ->
+ // (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
+ if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
+ (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD)) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ SDValue N10 = N1->getOperand(0);
+ SDValue N11 = N1->getOperand(1);
+
+ EVT N00VT = N00.getValueType();
+ EVT N10VT = N10.getValueType();
+
+ if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
+ SDValue N00Source = N00->getOperand(0);
+ SDValue N01Source = N01->getOperand(0);
+ SDValue N10Source = N10->getOperand(0);
+ SDValue N11Source = N11->getOperand(0);
+
+ if (N00Source == N10Source && N01Source == N11Source &&
+ N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
+ assert(N0.getValueType() == N1.getValueType());
+
+ uint64_t N00Index = N00.getConstantOperandVal(1);
+ uint64_t N01Index = N01.getConstantOperandVal(1);
+ uint64_t N10Index = N10.getConstantOperandVal(1);
+ uint64_t N11Index = N11.getConstantOperandVal(1);
+
+ if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
+ N10Index == N00VT.getVectorNumElements())
+ return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
+ }
+ }
+ }
+
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
@@ -10405,7 +11233,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// becomes
// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
- if (N1->getOpcode() != ISD::BITCAST)
+ if (N1Opc != ISD::BITCAST)
return SDValue();
SDValue RHS = N1->getOperand(0);
MVT RHSTy = RHS.getValueType().getSimpleVT();
@@ -10869,6 +11697,35 @@ static SDValue LowerSVEIntReduction(SDNode *N, unsigned Opc,
return SDValue();
}
+static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Op2 = N->getOperand(2);
+ EVT ScalarTy = Op1.getValueType();
+
+ if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) {
+ Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
+ }
+
+ return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, N->getValueType(0),
+ Op1, Op2);
+}
+
+static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
+ SDLoc dl(N);
+ SDValue Scalar = N->getOperand(3);
+ EVT ScalarTy = Scalar.getValueType();
+
+ if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
+ Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
+
+ SDValue Passthru = N->getOperand(1);
+ SDValue Pred = N->getOperand(2);
+ return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
+ Pred, Scalar, Passthru);
+}
+
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
SDLoc dl(N);
LLVMContext &Ctx = *DAG.getContext();
@@ -10894,8 +11751,7 @@ static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
}
-static SDValue tryConvertSVEWideCompare(SDNode *N, unsigned ReplacementIID,
- bool Invert,
+static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalize())
@@ -10948,18 +11804,12 @@ static SDValue tryConvertSVEWideCompare(SDNode *N, unsigned ReplacementIID,
}
}
+ if (!Imm)
+ return SDValue();
+
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
- SDValue ID = DAG.getTargetConstant(ReplacementIID, DL, MVT::i64);
- SDValue Op0, Op1;
- if (Invert) {
- Op0 = Splat;
- Op1 = N->getOperand(2);
- } else {
- Op0 = N->getOperand(2);
- Op1 = Splat;
- }
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- ID, Pred, Op0, Op1);
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
+ N->getOperand(2), Splat, DAG.getCondCode(CC));
}
return SDValue();
@@ -10989,6 +11839,46 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
return DAG.getZExtOrTrunc(Res, DL, VT);
}
+static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
+ SelectionDAG &DAG) {
+ SDLoc DL(N);
+
+ SDValue Pred = N->getOperand(1);
+ SDValue VecToReduce = N->getOperand(2);
+
+ EVT ReduceVT = VecToReduce.getValueType();
+ SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
+
+ // SVE reductions set the whole vector register with the first element
+ // containing the reduction result, which we'll now extract.
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
+ Zero);
+}
+
+static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
+ SelectionDAG &DAG) {
+ SDLoc DL(N);
+
+ SDValue Pred = N->getOperand(1);
+ SDValue InitVal = N->getOperand(2);
+ SDValue VecToReduce = N->getOperand(3);
+ EVT ReduceVT = VecToReduce.getValueType();
+
+ // Ordered reductions use the first lane of the result vector as the
+ // reduction's initial value.
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
+ DAG.getUNDEF(ReduceVT), InitVal, Zero);
+
+ SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);
+
+ // SVE reductions set the whole vector register with the first element
+ // containing the reduction result, which we'll now extract.
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
+ Zero);
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -11057,38 +11947,107 @@ static SDValue performIntrinsicCombine(SDNode *N,
return LowerSVEIntReduction(N, AArch64ISD::EORV_PRED, DAG);
case Intrinsic::aarch64_sve_andv:
return LowerSVEIntReduction(N, AArch64ISD::ANDV_PRED, DAG);
+ case Intrinsic::aarch64_sve_index:
+ return LowerSVEIntrinsicIndex(N, DAG);
+ case Intrinsic::aarch64_sve_dup:
+ return LowerSVEIntrinsicDUP(N, DAG);
+ case Intrinsic::aarch64_sve_dup_x:
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
+ N->getOperand(1));
case Intrinsic::aarch64_sve_ext:
return LowerSVEIntrinsicEXT(N, DAG);
+ case Intrinsic::aarch64_sve_smin:
+ return DAG.getNode(AArch64ISD::SMIN_MERGE_OP1, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_umin:
+ return DAG.getNode(AArch64ISD::UMIN_MERGE_OP1, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_smax:
+ return DAG.getNode(AArch64ISD::SMAX_MERGE_OP1, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_umax:
+ return DAG.getNode(AArch64ISD::UMAX_MERGE_OP1, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_lsl:
+ return DAG.getNode(AArch64ISD::SHL_MERGE_OP1, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_lsr:
+ return DAG.getNode(AArch64ISD::SRL_MERGE_OP1, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_asr:
+ return DAG.getNode(AArch64ISD::SRA_MERGE_OP1, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_cmphs:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
+ break;
+ case Intrinsic::aarch64_sve_cmphi:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
+ break;
+ case Intrinsic::aarch64_sve_cmpge:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETGE));
+ break;
+ case Intrinsic::aarch64_sve_cmpgt:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETGT));
+ break;
+ case Intrinsic::aarch64_sve_cmpeq:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
+ break;
+ case Intrinsic::aarch64_sve_cmpne:
+ if (!N->getOperand(2).getValueType().isFloatingPoint())
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
+ N->getValueType(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), DAG.getCondCode(ISD::SETNE));
+ break;
+ case Intrinsic::aarch64_sve_fadda:
+ return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
+ case Intrinsic::aarch64_sve_faddv:
+ return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
+ case Intrinsic::aarch64_sve_fmaxnmv:
+ return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
+ case Intrinsic::aarch64_sve_fmaxv:
+ return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
+ case Intrinsic::aarch64_sve_fminnmv:
+ return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
+ case Intrinsic::aarch64_sve_fminv:
+ return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG);
+ case Intrinsic::aarch64_sve_sel:
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmpeq_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpeq,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
case Intrinsic::aarch64_sve_cmpne_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpne,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
case Intrinsic::aarch64_sve_cmpge_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpge,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
case Intrinsic::aarch64_sve_cmpgt_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpgt,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
case Intrinsic::aarch64_sve_cmplt_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpgt,
- true, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
case Intrinsic::aarch64_sve_cmple_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpge,
- true, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
case Intrinsic::aarch64_sve_cmphs_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphs,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
case Intrinsic::aarch64_sve_cmphi_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphi,
- false, DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
case Intrinsic::aarch64_sve_cmplo_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphi, true,
- DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
case Intrinsic::aarch64_sve_cmpls_wide:
- return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphs, true,
- DCI, DAG);
+ return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
case Intrinsic::aarch64_sve_ptest_any:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::ANY_ACTIVE);
@@ -11166,14 +12125,14 @@ static SDValue performExtendCombine(SDNode *N,
if (!ResVT.isSimple() || !SrcVT.isSimple())
return SDValue();
- // If the source VT is a 64-bit vector, we can play games and get the
- // better results we want.
- if (SrcVT.getSizeInBits() != 64)
+ // If the source VT is a 64-bit fixed or scalable vector, we can play games
+ // and get the better results we want.
+ if (SrcVT.getSizeInBits().getKnownMinSize() != 64)
return SDValue();
unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
- unsigned ElementCount = SrcVT.getVectorNumElements();
- SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount);
+ ElementCount SrcEC = SrcVT.getVectorElementCount();
+ SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), SrcEC);
SDLoc DL(N);
Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
@@ -11181,17 +12140,14 @@ static SDValue performExtendCombine(SDNode *N,
// bit source.
EVT LoVT, HiVT;
SDValue Lo, Hi;
- unsigned NumElements = ResVT.getVectorNumElements();
- assert(!(NumElements & 1) && "Splitting vector, but not in half!");
- LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
- ResVT.getVectorElementType(), NumElements / 2);
+ LoVT = HiVT = ResVT.getHalfNumVectorElementsVT(*DAG.getContext());
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
- LoVT.getVectorNumElements());
+ LoVT.getVectorElementCount());
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
DAG.getConstant(0, DL, MVT::i64));
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getConstant(InNVT.getVectorNumElements(), DL, MVT::i64));
+ DAG.getConstant(InNVT.getVectorMinNumElements(), DL, MVT::i64));
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
@@ -11240,11 +12196,71 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
return NewST1;
}
+// Returns an SVE type that ContentTy can be trivially sign or zero extended
+// into.
+static MVT getSVEContainerType(EVT ContentTy) {
+ assert(ContentTy.isSimple() && "No SVE containers for extended types");
+
+ switch (ContentTy.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("No known SVE container for this MVT type");
+ case MVT::nxv2i8:
+ case MVT::nxv2i16:
+ case MVT::nxv2i32:
+ case MVT::nxv2i64:
+ case MVT::nxv2f32:
+ case MVT::nxv2f64:
+ return MVT::nxv2i64;
+ case MVT::nxv4i8:
+ case MVT::nxv4i16:
+ case MVT::nxv4i32:
+ case MVT::nxv4f32:
+ return MVT::nxv4i32;
+ case MVT::nxv8i8:
+ case MVT::nxv8i16:
+ case MVT::nxv8f16:
+ case MVT::nxv8bf16:
+ return MVT::nxv8i16;
+ case MVT::nxv16i8:
+ return MVT::nxv16i8;
+ }
+}
+
+static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ if (VT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
+ return SDValue();
+
+ EVT ContainerVT = VT;
+ if (ContainerVT.isInteger())
+ ContainerVT = getSVEContainerType(ContainerVT);
+
+ SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
+ SDValue Ops[] = { N->getOperand(0), // Chain
+ N->getOperand(2), // Pg
+ N->getOperand(3), // Base
+ DAG.getValueType(VT) };
+
+ SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
+ SDValue LoadChain = SDValue(Load.getNode(), 1);
+
+ if (ContainerVT.isInteger() && (VT != ContainerVT))
+ Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
+
+ return DAG.getMergeValues({ Load, LoadChain }, DL);
+}
+
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
EVT PtrTy = N->getOperand(3).getValueType();
+ if (VT == MVT::nxv8bf16 &&
+ !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
+ return SDValue();
+
EVT LoadVT = VT;
if (VT.isFloatingPoint())
LoadVT = VT.changeTypeToInteger();
@@ -11265,6 +12281,58 @@ static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
return L;
}
+template <unsigned Opcode>
+static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
+ static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
+ Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
+ "Unsupported opcode.");
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ EVT LoadVT = VT;
+ if (VT.isFloatingPoint())
+ LoadVT = VT.changeTypeToInteger();
+
+ SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
+ SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
+ SDValue LoadChain = SDValue(Load.getNode(), 1);
+
+ if (VT.isFloatingPoint())
+ Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));
+
+ return DAG.getMergeValues({Load, LoadChain}, DL);
+}
+
+static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ SDValue Data = N->getOperand(2);
+ EVT DataVT = Data.getValueType();
+ EVT HwSrcVt = getSVEContainerType(DataVT);
+ SDValue InputVT = DAG.getValueType(DataVT);
+
+ if (DataVT == MVT::nxv8bf16 &&
+ !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
+ return SDValue();
+
+ if (DataVT.isFloatingPoint())
+ InputVT = DAG.getValueType(HwSrcVt);
+
+ SDValue SrcNew;
+ if (Data.getValueType().isFloatingPoint())
+ SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
+ else
+ SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
+
+ SDValue Ops[] = { N->getOperand(0), // Chain
+ SrcNew,
+ N->getOperand(4), // Base
+ N->getOperand(3), // Pg
+ InputVT
+ };
+
+ return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
+}
+
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
@@ -11272,6 +12340,10 @@ static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
EVT DataVT = Data.getValueType();
EVT PtrTy = N->getOperand(4).getValueType();
+ if (DataVT == MVT::nxv8bf16 &&
+ !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
+ return SDValue();
+
if (DataVT.isFloatingPoint())
Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
@@ -11301,6 +12373,10 @@ static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
SDValue StVal = St.getValue();
EVT VT = StVal.getValueType();
+ // Avoid scalarizing zero splat stores for scalable vectors.
+ if (VT.isScalableVector())
+ return SDValue();
+
// It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
// 2, 3 or 4 i32 elements.
int NumVecElts = VT.getVectorNumElements();
@@ -11423,7 +12499,8 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SDValue StVal = S->getValue();
EVT VT = StVal.getValueType();
- if (!VT.isVector())
+
+ if (!VT.isFixedLengthVector())
return SDValue();
// If we get a splat of zeros, convert this vector store to a store of
@@ -11494,6 +12571,9 @@ static SDValue performPostLD1Combine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
+ if (VT.isScalableVector())
+ return SDValue();
+
unsigned LoadIdx = IsLaneOp ? 1 : 0;
SDNode *LD = N->getOperand(LoadIdx).getNode();
// If it is not LOAD, can not do such combine.
@@ -12333,32 +13413,57 @@ static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(MinOffset, DL, MVT::i64));
}
-// Returns an SVE type that ContentTy can be trivially sign or zero extended
-// into.
-static MVT getSVEContainerType(EVT ContentTy) {
- assert(ContentTy.isSimple() && "No SVE containers for extended types");
+// Turns the vector of indices into a vector of byte offstes by scaling Offset
+// by (BitWidth / 8).
+static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
+ SDLoc DL, unsigned BitWidth) {
+ assert(Offset.getValueType().isScalableVector() &&
+ "This method is only for scalable vectors of offsets");
- switch (ContentTy.getSimpleVT().SimpleTy) {
- default:
- llvm_unreachable("No known SVE container for this MVT type");
- case MVT::nxv2i8:
- case MVT::nxv2i16:
- case MVT::nxv2i32:
- case MVT::nxv2i64:
- case MVT::nxv2f32:
- case MVT::nxv2f64:
- return MVT::nxv2i64;
- case MVT::nxv4i8:
- case MVT::nxv4i16:
- case MVT::nxv4i32:
- case MVT::nxv4f32:
- return MVT::nxv4i32;
- }
+ SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64);
+ SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);
+
+ return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
}
-static SDValue performST1ScatterCombine(SDNode *N, SelectionDAG &DAG,
- unsigned Opcode,
- bool OnlyPackedOffsets = true) {
+/// Check if the value of \p OffsetInBytes can be used as an immediate for
+/// the gather load/prefetch and scatter store instructions with vector base and
+/// immediate offset addressing mode:
+///
+/// [<Zn>.[S|D]{, #<imm>}]
+///
+/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
+
+inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
+ unsigned ScalarSizeInBytes) {
+ // The immediate is not a multiple of the scalar size.
+ if (OffsetInBytes % ScalarSizeInBytes)
+ return false;
+
+ // The immediate is out of range.
+ if (OffsetInBytes / ScalarSizeInBytes > 31)
+ return false;
+
+ return true;
+}
+
+/// Check if the value of \p Offset represents a valid immediate for the SVE
+/// gather load/prefetch and scatter store instructiona with vector base and
+/// immediate offset addressing mode:
+///
+/// [<Zn>.[S|D]{, #<imm>}]
+///
+/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
+static bool isValidImmForSVEVecImmAddrMode(SDValue Offset,
+ unsigned ScalarSizeInBytes) {
+ ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
+ return OffsetConst && isValidImmForSVEVecImmAddrMode(
+ OffsetConst->getZExtValue(), ScalarSizeInBytes);
+}
+
+static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
+ unsigned Opcode,
+ bool OnlyPackedOffsets = true) {
const SDValue Src = N->getOperand(2);
const EVT SrcVT = Src->getValueType(0);
assert(SrcVT.isScalableVector() &&
@@ -12378,11 +13483,46 @@ static SDValue performST1ScatterCombine(SDNode *N, SelectionDAG &DAG,
// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)
- const SDValue Base = N->getOperand(4);
+ SDValue Base = N->getOperand(4);
// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(5);
+ // For "scalar + vector of indices", just scale the indices. This only
+ // applies to non-temporal scatters because there's no instruction that takes
+ // indicies.
+ if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
+ Offset =
+ getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
+ Opcode = AArch64ISD::SSTNT1_PRED;
+ }
+
+ // In the case of non-temporal gather loads there's only one SVE instruction
+ // per data-size: "scalar + vector", i.e.
+ // * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
+ // Since we do have intrinsics that allow the arguments to be in a different
+ // order, we may need to swap them to match the spec.
+ if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
+ std::swap(Base, Offset);
+
+ // SST1_IMM requires that the offset is an immediate that is:
+ // * a multiple of #SizeInBytes,
+ // * in the range [0, 31 x #SizeInBytes],
+ // where #SizeInBytes is the size in bytes of the stored items. For
+ // immediates outside that range and non-immediate scalar offsets use SST1 or
+ // SST1_UXTW instead.
+ if (Opcode == AArch64ISD::SST1_IMM_PRED) {
+ if (!isValidImmForSVEVecImmAddrMode(Offset,
+ SrcVT.getScalarSizeInBits() / 8)) {
+ if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
+ Opcode = AArch64ISD::SST1_UXTW_PRED;
+ else
+ Opcode = AArch64ISD::SST1_PRED;
+
+ std::swap(Base, Offset);
+ }
+ }
+
auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();
@@ -12400,9 +13540,9 @@ static SDValue performST1ScatterCombine(SDNode *N, SelectionDAG &DAG,
// Source value type that is representable in hardware
EVT HwSrcVt = getSVEContainerType(SrcVT);
- // Keep the original type of the input data to store - this is needed to
- // differentiate between ST1B, ST1H, ST1W and ST1D. For FP values we want the
- // integer equivalent, so just use HwSrcVt.
+ // Keep the original type of the input data to store - this is needed to be
+ // able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
+ // FP values we want the integer equivalent, so just use HwSrcVt.
SDValue InputVT = DAG.getValueType(SrcVT);
if (SrcVT.isFloatingPoint())
InputVT = DAG.getValueType(HwSrcVt);
@@ -12425,24 +13565,67 @@ static SDValue performST1ScatterCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Opcode, DL, VTs, Ops);
}
-static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,
- unsigned Opcode,
- bool OnlyPackedOffsets = true) {
- EVT RetVT = N->getValueType(0);
+static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
+ unsigned Opcode,
+ bool OnlyPackedOffsets = true) {
+ const EVT RetVT = N->getValueType(0);
assert(RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors");
+
SDLoc DL(N);
+ // Make sure that the loaded data will fit into an SVE register
if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();
// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)
- const SDValue Base = N->getOperand(3);
+ SDValue Base = N->getOperand(3);
// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(4);
+ // For "scalar + vector of indices", just scale the indices. This only
+ // applies to non-temporal gathers because there's no instruction that takes
+ // indicies.
+ if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
+ Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
+ RetVT.getScalarSizeInBits());
+ Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
+ }
+
+ // In the case of non-temporal gather loads there's only one SVE instruction
+ // per data-size: "scalar + vector", i.e.
+ // * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
+ // Since we do have intrinsics that allow the arguments to be in a different
+ // order, we may need to swap them to match the spec.
+ if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
+ Offset.getValueType().isVector())
+ std::swap(Base, Offset);
+
+ // GLD{FF}1_IMM requires that the offset is an immediate that is:
+ // * a multiple of #SizeInBytes,
+ // * in the range [0, 31 x #SizeInBytes],
+ // where #SizeInBytes is the size in bytes of the loaded items. For
+ // immediates outside that range and non-immediate scalar offsets use
+ // GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
+ if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
+ Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
+ if (!isValidImmForSVEVecImmAddrMode(Offset,
+ RetVT.getScalarSizeInBits() / 8)) {
+ if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
+ Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
+ ? AArch64ISD::GLD1_UXTW_MERGE_ZERO
+ : AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
+ else
+ Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
+ ? AArch64ISD::GLD1_MERGE_ZERO
+ : AArch64ISD::GLDFF1_MERGE_ZERO;
+
+ std::swap(Base, Offset);
+ }
+ }
+
auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();
@@ -12457,10 +13640,9 @@ static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,
// Return value type that is representable in hardware
EVT HwRetVt = getSVEContainerType(RetVT);
- // Keep the original output value type around - this will better inform
- // optimisations (e.g. instruction folding when load is followed by
- // zext/sext). This will only be used for ints, so the value for FPs
- // doesn't matter.
+ // Keep the original output value type around - this is needed to be able to
+ // select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
+ // values we want the integer equivalent, so just use HwRetVT.
SDValue OutVT = DAG.getValueType(RetVT);
if (RetVT.isFloatingPoint())
OutVT = DAG.getValueType(HwRetVt);
@@ -12484,55 +13666,126 @@ static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getMergeValues({Load, LoadChain}, DL);
}
-
static SDValue
performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ SDLoc DL(N);
SDValue Src = N->getOperand(0);
unsigned Opc = Src->getOpcode();
- // Gather load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
+ // Sign extend of an unsigned unpack -> signed unpack
+ if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
+
+ unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
+ : AArch64ISD::SUNPKLO;
+
+ // Push the sign extend to the operand of the unpack
+ // This is necessary where, for example, the operand of the unpack
+ // is another unpack:
+ // 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
+ // ->
+ // 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
+ // ->
+ // 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
+ SDValue ExtOp = Src->getOperand(0);
+ auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ EVT EltTy = VT.getVectorElementType();
+ (void)EltTy;
+
+ assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
+ "Sign extending from an invalid type");
+
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(),
+ VT.getVectorElementCount() * 2);
+
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
+ ExtOp, DAG.getValueType(ExtVT));
+
+ return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
+ }
+
+ // SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
// for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
unsigned NewOpc;
+ unsigned MemVTOpNum = 4;
switch (Opc) {
- case AArch64ISD::GLD1:
- NewOpc = AArch64ISD::GLD1S;
+ case AArch64ISD::LD1_MERGE_ZERO:
+ NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
+ MemVTOpNum = 3;
break;
- case AArch64ISD::GLD1_SCALED:
- NewOpc = AArch64ISD::GLD1S_SCALED;
+ case AArch64ISD::LDNF1_MERGE_ZERO:
+ NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
+ MemVTOpNum = 3;
break;
- case AArch64ISD::GLD1_SXTW:
- NewOpc = AArch64ISD::GLD1S_SXTW;
+ case AArch64ISD::LDFF1_MERGE_ZERO:
+ NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
+ MemVTOpNum = 3;
break;
- case AArch64ISD::GLD1_SXTW_SCALED:
- NewOpc = AArch64ISD::GLD1S_SXTW_SCALED;
+ case AArch64ISD::GLD1_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
break;
- case AArch64ISD::GLD1_UXTW:
- NewOpc = AArch64ISD::GLD1S_UXTW;
+ case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
break;
- case AArch64ISD::GLD1_UXTW_SCALED:
- NewOpc = AArch64ISD::GLD1S_UXTW_SCALED;
+ case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
break;
- case AArch64ISD::GLD1_IMM:
- NewOpc = AArch64ISD::GLD1S_IMM;
+ case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
+ break;
+ case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
+ break;
+ case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
+ break;
+ case AArch64ISD::GLD1_IMM_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
+ break;
+ case AArch64ISD::GLDFF1_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
+ break;
+ case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
+ break;
+ case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
+ break;
+ case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
+ break;
+ case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
+ break;
+ case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
+ break;
+ case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
+ break;
+ case AArch64ISD::GLDNT1_MERGE_ZERO:
+ NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
break;
default:
return SDValue();
}
EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- EVT GLD1SrcMemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
+ EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
- if ((SignExtSrcVT != GLD1SrcMemVT) || !Src.hasOneUse())
+ if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
return SDValue();
EVT DstVT = N->getValueType(0);
SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);
- SDValue Ops[] = {Src->getOperand(0), Src->getOperand(1), Src->getOperand(2),
- Src->getOperand(3), Src->getOperand(4)};
+
+ SmallVector<SDValue, 5> Ops;
+ for (unsigned I = 0; I < Src->getNumOperands(); ++I)
+ Ops.push_back(Src->getOperand(I));
SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
DCI.CombineTo(N, ExtLoad);
@@ -12542,6 +13795,51 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return SDValue(N, 0);
}
+/// Legalize the gather prefetch (scalar + vector addressing mode) when the
+/// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
+/// != nxv2i32) do not need legalization.
+static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
+ const unsigned OffsetPos = 4;
+ SDValue Offset = N->getOperand(OffsetPos);
+
+ // Not an unpacked vector, bail out.
+ if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
+ return SDValue();
+
+ // Extend the unpacked offset vector to 64-bit lanes.
+ SDLoc DL(N);
+ Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset);
+ SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
+ // Replace the offset operand with the 64-bit one.
+ Ops[OffsetPos] = Offset;
+
+ return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
+}
+
+/// Combines a node carrying the intrinsic
+/// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
+/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
+/// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
+/// sve gather prefetch instruction with vector plus immediate addressing mode.
+static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
+ unsigned ScalarSizeInBytes) {
+ const unsigned ImmPos = 4, OffsetPos = 3;
+ // No need to combine the node if the immediate is valid...
+ if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
+ return SDValue();
+
+ // ...otherwise swap the offset base with the offset...
+ SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
+ std::swap(Ops[ImmPos], Ops[OffsetPos]);
+ // ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
+ // `aarch64_sve_prfb_gather_uxtw_index`.
+ SDLoc DL(N);
+ Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
+ MVT::i64);
+
+ return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -12606,6 +13904,23 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
+ return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
+ case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
+ return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
+ case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
+ return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
+ case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
+ return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
+ case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
+ return legalizeSVEGatherPrefetchOffsVec(N, DAG);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
@@ -12630,44 +13945,180 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performNEONPostLDSTCombine(N, DCI, DAG);
case Intrinsic::aarch64_sve_ldnt1:
return performLDNT1Combine(N, DAG);
+ case Intrinsic::aarch64_sve_ld1rq:
+ return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
+ case Intrinsic::aarch64_sve_ld1ro:
+ return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
+ case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_ldnt1_gather:
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_ldnt1_gather_index:
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_ld1:
+ return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_ldnf1:
+ return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_ldff1:
+ return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_st1:
+ return performST1Combine(N, DAG);
case Intrinsic::aarch64_sve_stnt1:
return performSTNT1Combine(N, DAG);
+ case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
+ case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
+ case Intrinsic::aarch64_sve_stnt1_scatter:
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
+ case Intrinsic::aarch64_sve_stnt1_scatter_index:
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED);
case Intrinsic::aarch64_sve_ld1_gather:
- return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1);
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1_gather_index:
- return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SCALED);
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLD1_SCALED_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1_gather_sxtw:
- return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW,
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw:
- return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW,
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
- return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED,
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
- return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED,
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_ldff1_gather:
+ return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_ldff1_gather_index:
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
+ case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
- case Intrinsic::aarch64_sve_ld1_gather_imm:
- return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM);
+ case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
+ return performGatherLoadCombine(N, DAG,
+ AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
case Intrinsic::aarch64_sve_st1_scatter:
- return performST1ScatterCombine(N, DAG, AArch64ISD::SST1);
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED);
case Intrinsic::aarch64_sve_st1_scatter_index:
- return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SCALED);
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED);
case Intrinsic::aarch64_sve_st1_scatter_sxtw:
- return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW,
- /*OnlyPackedOffsets=*/false);
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED,
+ /*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw:
- return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW,
- /*OnlyPackedOffsets=*/false);
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED,
+ /*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
- return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED,
- /*OnlyPackedOffsets=*/false);
+ return performScatterStoreCombine(N, DAG,
+ AArch64ISD::SST1_SXTW_SCALED_PRED,
+ /*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
- return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED,
- /*OnlyPackedOffsets=*/false);
- case Intrinsic::aarch64_sve_st1_scatter_imm:
- return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_IMM);
+ return performScatterStoreCombine(N, DAG,
+ AArch64ISD::SST1_UXTW_SCALED_PRED,
+ /*OnlyPackedOffsets=*/false);
+ case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
+ return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
+ case Intrinsic::aarch64_sve_tuple_get: {
+ SDLoc DL(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Src1 = N->getOperand(2);
+ SDValue Idx = N->getOperand(3);
+
+ uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
+ EVT ResVT = N->getValueType(0);
+ uint64_t NumLanes = ResVT.getVectorElementCount().Min;
+ SDValue Val =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1,
+ DAG.getConstant(IdxConst * NumLanes, DL, MVT::i32));
+ return DAG.getMergeValues({Val, Chain}, DL);
+ }
+ case Intrinsic::aarch64_sve_tuple_set: {
+ SDLoc DL(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Tuple = N->getOperand(2);
+ SDValue Idx = N->getOperand(3);
+ SDValue Vec = N->getOperand(4);
+
+ EVT TupleVT = Tuple.getValueType();
+ uint64_t TupleLanes = TupleVT.getVectorElementCount().Min;
+
+ uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
+ uint64_t NumLanes = Vec.getValueType().getVectorElementCount().Min;
+
+ if ((TupleLanes % NumLanes) != 0)
+ report_fatal_error("invalid tuple vector!");
+
+ uint64_t NumVecs = TupleLanes / NumLanes;
+
+ SmallVector<SDValue, 4> Opnds;
+ for (unsigned I = 0; I < NumVecs; ++I) {
+ if (I == IdxConst)
+ Opnds.push_back(Vec);
+ else {
+ Opnds.push_back(
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, Vec.getValueType(), Tuple,
+ DAG.getConstant(I * NumLanes, DL, MVT::i32)));
+ }
+ }
+ SDValue Concat =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
+ return DAG.getMergeValues({Concat, Chain}, DL);
+ }
+ case Intrinsic::aarch64_sve_tuple_create2:
+ case Intrinsic::aarch64_sve_tuple_create3:
+ case Intrinsic::aarch64_sve_tuple_create4: {
+ SDLoc DL(N);
+ SDValue Chain = N->getOperand(0);
+
+ SmallVector<SDValue, 4> Opnds;
+ for (unsigned I = 2; I < N->getNumOperands(); ++I)
+ Opnds.push_back(N->getOperand(I));
+
+ EVT VT = Opnds[0].getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ VT.getVectorElementCount() *
+ (N->getNumOperands() - 2));
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
+ return DAG.getMergeValues({Concat, Chain}, DL);
+ }
+ case Intrinsic::aarch64_sve_ld2:
+ case Intrinsic::aarch64_sve_ld3:
+ case Intrinsic::aarch64_sve_ld4: {
+ SDLoc DL(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Mask = N->getOperand(2);
+ SDValue BasePtr = N->getOperand(3);
+ SDValue LoadOps[] = {Chain, Mask, BasePtr};
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ SDValue Result =
+ LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL);
+ return DAG.getMergeValues({Result, Chain}, DL);
+ }
default:
break;
}
@@ -12799,7 +14250,8 @@ static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SDLoc DL(N);
SDValue Op = N->getOperand(0);
- if (N->getValueType(0) != MVT::i16 || Op.getValueType() != MVT::f16)
+ if (N->getValueType(0) != MVT::i16 ||
+ (Op.getValueType() != MVT::f16 && Op.getValueType() != MVT::bf16))
return;
Op = SDValue(
@@ -12834,6 +14286,40 @@ static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
return std::make_pair(Lo, Hi);
}
+void AArch64TargetLowering::ReplaceExtractSubVectorResults(
+ SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+ SDValue In = N->getOperand(0);
+ EVT InVT = In.getValueType();
+
+ // Common code will handle these just fine.
+ if (!InVT.isScalableVector() || !InVT.isInteger())
+ return;
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // The following checks bail if this is not a halving operation.
+
+ ElementCount ResEC = VT.getVectorElementCount();
+
+ if (InVT.getVectorElementCount().Min != (ResEC.Min * 2))
+ return;
+
+ auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!CIndex)
+ return;
+
+ unsigned Index = CIndex->getZExtValue();
+ if ((Index != 0) && (Index != ResEC.Min))
+ return;
+
+ unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
+ EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+
+ SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
+}
+
// Create an even/odd pair of X registers holding integer value V.
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDLoc dl(V.getNode());
@@ -12897,10 +14383,12 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
if (DAG.getDataLayout().isBigEndian())
std::swap(SubReg1, SubReg2);
- Results.push_back(DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
- SDValue(CmpSwap, 0)));
- Results.push_back(DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
- SDValue(CmpSwap, 0)));
+ SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
+ SDValue(CmpSwap, 0));
+ SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
+ SDValue(CmpSwap, 0));
+ Results.push_back(
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
Results.push_back(SDValue(CmpSwap, 1)); // Chain out
return;
}
@@ -12916,8 +14404,8 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
- Results.push_back(SDValue(CmpSwap, 0));
- Results.push_back(SDValue(CmpSwap, 1));
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
+ SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
Results.push_back(SDValue(CmpSwap, 3));
}
@@ -12937,6 +14425,9 @@ void AArch64TargetLowering::ReplaceNodeResults(
Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
return;
+ case ISD::CTPOP:
+ Results.push_back(LowerCTPOP(SDValue(N, 0), DAG));
+ return;
case AArch64ISD::SADDV:
ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
return;
@@ -12984,6 +14475,9 @@ void AArch64TargetLowering::ReplaceNodeResults(
Results.append({Pair, Result.getValue(2) /* Chain */});
return;
}
+ case ISD::EXTRACT_SUBVECTOR:
+ ReplaceExtractSubVectorResults(N, Results, DAG);
+ return;
case ISD::INTRINSIC_WO_CHAIN: {
EVT VT = N->getValueType(0);
assert((VT == MVT::i8 || VT == MVT::i16) &&
@@ -13094,7 +14588,7 @@ AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
- if (getTargetMachine().getOptLevel() == 0)
+ if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return AtomicExpansionKind::None;
return AtomicExpansionKind::LLSC;
}
@@ -13353,8 +14847,7 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
- bool OptSize =
- Attr.hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize);
+ bool OptSize = Attr.hasFnAttribute(Attribute::MinSize);
return OptSize && !VT.isVector();
}
@@ -13384,3 +14877,280 @@ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
bool AArch64TargetLowering::needsFixedCatchObjects() const {
return false;
}
+
+bool AArch64TargetLowering::shouldLocalize(
+ const MachineInstr &MI, const TargetTransformInfo *TTI) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_GLOBAL_VALUE: {
+ // On Darwin, TLS global vars get selected into function calls, which
+ // we don't want localized, as they can get moved into the middle of a
+ // another call sequence.
+ const GlobalValue &GV = *MI.getOperand(1).getGlobal();
+ if (GV.isThreadLocal() && Subtarget->isTargetMachO())
+ return false;
+ break;
+ }
+ // If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
+ // localizable.
+ case AArch64::ADRP:
+ case AArch64::G_ADD_LOW:
+ return true;
+ default:
+ break;
+ }
+ return TargetLoweringBase::shouldLocalize(MI, TTI);
+}
+
+bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
+ if (isa<ScalableVectorType>(Inst.getType()))
+ return true;
+
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
+ if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
+ return true;
+
+ return false;
+}
+
+// Return the largest legal scalable vector type that matches VT's element type.
+static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) {
+ assert(VT.isFixedLengthVector() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ "Expected legal fixed length vector!");
+ switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("unexpected element type for SVE container");
+ case MVT::i8:
+ return EVT(MVT::nxv16i8);
+ case MVT::i16:
+ return EVT(MVT::nxv8i16);
+ case MVT::i32:
+ return EVT(MVT::nxv4i32);
+ case MVT::i64:
+ return EVT(MVT::nxv2i64);
+ case MVT::f16:
+ return EVT(MVT::nxv8f16);
+ case MVT::f32:
+ return EVT(MVT::nxv4f32);
+ case MVT::f64:
+ return EVT(MVT::nxv2f64);
+ }
+}
+
+// Return a PTRUE with active lanes corresponding to the extent of VT.
+static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
+ EVT VT) {
+ assert(VT.isFixedLengthVector() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ "Expected legal fixed length vector!");
+
+ int PgPattern;
+ switch (VT.getVectorNumElements()) {
+ default:
+ llvm_unreachable("unexpected element count for SVE predicate");
+ case 1:
+ PgPattern = AArch64SVEPredPattern::vl1;
+ break;
+ case 2:
+ PgPattern = AArch64SVEPredPattern::vl2;
+ break;
+ case 4:
+ PgPattern = AArch64SVEPredPattern::vl4;
+ break;
+ case 8:
+ PgPattern = AArch64SVEPredPattern::vl8;
+ break;
+ case 16:
+ PgPattern = AArch64SVEPredPattern::vl16;
+ break;
+ case 32:
+ PgPattern = AArch64SVEPredPattern::vl32;
+ break;
+ case 64:
+ PgPattern = AArch64SVEPredPattern::vl64;
+ break;
+ case 128:
+ PgPattern = AArch64SVEPredPattern::vl128;
+ break;
+ case 256:
+ PgPattern = AArch64SVEPredPattern::vl256;
+ break;
+ }
+
+ // TODO: For vectors that are exactly getMaxSVEVectorSizeInBits big, we can
+ // use AArch64SVEPredPattern::all, which can enable the use of unpredicated
+ // variants of instructions when available.
+
+ MVT MaskVT;
+ switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("unexpected element type for SVE predicate");
+ case MVT::i8:
+ MaskVT = MVT::nxv16i1;
+ break;
+ case MVT::i16:
+ case MVT::f16:
+ MaskVT = MVT::nxv8i1;
+ break;
+ case MVT::i32:
+ case MVT::f32:
+ MaskVT = MVT::nxv4i1;
+ break;
+ case MVT::i64:
+ case MVT::f64:
+ MaskVT = MVT::nxv2i1;
+ break;
+ }
+
+ return DAG.getNode(AArch64ISD::PTRUE, DL, MaskVT,
+ DAG.getTargetConstant(PgPattern, DL, MVT::i64));
+}
+
+static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
+ EVT VT) {
+ assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ "Expected legal scalable vector!");
+ auto PredTy = VT.changeVectorElementType(MVT::i1);
+ return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
+}
+
+static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT) {
+ if (VT.isFixedLengthVector())
+ return getPredicateForFixedLengthVector(DAG, DL, VT);
+
+ return getPredicateForScalableVector(DAG, DL, VT);
+}
+
+// Grow V to consume an entire SVE register.
+static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
+ assert(VT.isScalableVector() &&
+ "Expected to convert into a scalable vector!");
+ assert(V.getValueType().isFixedLengthVector() &&
+ "Expected a fixed length vector operand!");
+ SDLoc DL(V);
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
+}
+
+// Shrink V so it's just big enough to maintain a VT's worth of data.
+static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
+ assert(VT.isFixedLengthVector() &&
+ "Expected to convert into a fixed length vector!");
+ assert(V.getValueType().isScalableVector() &&
+ "Expected a scalable vector operand!");
+ SDLoc DL(V);
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
+}
+
+// Convert all fixed length vector loads larger than NEON to masked_loads.
+SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
+ SDValue Op, SelectionDAG &DAG) const {
+ auto Load = cast<LoadSDNode>(Op);
+
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+
+ auto NewLoad = DAG.getMaskedLoad(
+ ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
+ getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT),
+ Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(),
+ Load->getExtensionType());
+
+ auto Result = convertFromScalableVector(DAG, VT, NewLoad);
+ SDValue MergedValues[2] = {Result, Load->getChain()};
+ return DAG.getMergeValues(MergedValues, DL);
+}
+
+// Convert all fixed length vector stores larger than NEON to masked_stores.
+SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
+ SDValue Op, SelectionDAG &DAG) const {
+ auto Store = cast<StoreSDNode>(Op);
+
+ SDLoc DL(Op);
+ EVT VT = Store->getValue().getValueType();
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+
+ auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
+ return DAG.getMaskedStore(
+ Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
+ getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(),
+ Store->getMemOperand(), Store->getAddressingMode(),
+ Store->isTruncatingStore());
+}
+
+SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
+ SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
+
+ SDLoc DL(Op);
+ SDValue Val = Op.getOperand(0);
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
+ Val = convertToScalableVector(DAG, ContainerVT, Val);
+
+ // Repeatedly truncate Val until the result is of the desired element type.
+ switch (ContainerVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("unimplemented container type");
+ case MVT::nxv2i64:
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
+ Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
+ if (VT.getVectorElementType() == MVT::i32)
+ break;
+ LLVM_FALLTHROUGH;
+ case MVT::nxv4i32:
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
+ Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
+ if (VT.getVectorElementType() == MVT::i16)
+ break;
+ LLVM_FALLTHROUGH;
+ case MVT::nxv8i16:
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
+ Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
+ assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!");
+ break;
+ }
+
+ return convertFromScalableVector(DAG, VT, Val);
+}
+
+SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
+ SelectionDAG &DAG,
+ unsigned NewOp) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ auto Pg = getPredicateForVector(DAG, DL, VT);
+
+ if (useSVEForFixedLengthVectorVT(VT)) {
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+
+ // Create list of operands by convereting existing ones to scalable types.
+ SmallVector<SDValue, 4> Operands = {Pg};
+ for (const SDValue &V : Op->op_values()) {
+ if (isa<CondCodeSDNode>(V)) {
+ Operands.push_back(V);
+ continue;
+ }
+
+ assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
+ "Only fixed length vectors are supported!");
+ Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
+ }
+
+ auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
+ return convertFromScalableVector(DAG, VT, ScalableRes);
+ }
+
+ assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
+
+ SmallVector<SDValue, 4> Operands = {Pg};
+ for (const SDValue &V : Op->op_values()) {
+ assert((isa<CondCodeSDNode>(V) || V.getValueType().isScalableVector()) &&
+ "Only scalable vectors are supported!");
+ Operands.push_back(V);
+ }
+
+ return DAG.getNode(NewOp, DL, VT, Operands);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 5ec453e274dc..4fe77481706b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -25,6 +25,26 @@ namespace llvm {
namespace AArch64ISD {
+// For predicated nodes where the result is a vector, the operation is
+// controlled by a governing predicate and the inactive lanes are explicitly
+// defined with a value, please stick the following naming convention:
+//
+// _MERGE_OP<n> The result value is a vector with inactive lanes equal
+// to source operand OP<n>.
+//
+// _MERGE_ZERO The result value is a vector with inactive lanes
+// actively zeroed.
+//
+// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
+// to the last source operand which only purpose is being
+// a passthru value.
+//
+// For other cases where no explicit action is needed to set the inactive lanes,
+// or when the result is not a vector and it is needed or helpful to
+// distinguish a node from similar unpredicated nodes, use:
+//
+// _PRED
+//
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
@@ -52,6 +72,22 @@ enum NodeType : unsigned {
ADC,
SBC, // adc, sbc instructions
+ // Arithmetic instructions
+ ADD_PRED,
+ FADD_PRED,
+ SDIV_PRED,
+ UDIV_PRED,
+ FMA_PRED,
+ SMIN_MERGE_OP1,
+ UMIN_MERGE_OP1,
+ SMAX_MERGE_OP1,
+ UMAX_MERGE_OP1,
+ SHL_MERGE_OP1,
+ SRL_MERGE_OP1,
+ SRA_MERGE_OP1,
+
+ SETCC_MERGE_ZERO,
+
// Arithmetic instructions which write flags.
ADDS,
SUBS,
@@ -90,9 +126,9 @@ enum NodeType : unsigned {
BICi,
ORRi,
- // Vector bit select: similar to ISD::VSELECT but not all bits within an
+ // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
// element must be identical.
- BSL,
+ BSP,
// Vector arithmetic negation
NEG,
@@ -121,6 +157,10 @@ enum NodeType : unsigned {
SRSHR_I,
URSHR_I,
+ // Vector shift by constant and insert
+ VSLI,
+ VSRI,
+
// Vector comparisons
CMEQ,
CMGE,
@@ -148,6 +188,10 @@ enum NodeType : unsigned {
SADDV,
UADDV,
+ // Vector rounding halving addition
+ SRHADD,
+ URHADD,
+
// Vector across-lanes min/max
// Only the lower result lane is defined.
SMINV,
@@ -166,7 +210,7 @@ enum NodeType : unsigned {
// Vector bitwise negation
NOT,
- // Vector bitwise selection
+ // Vector bitwise insertion
BIT,
// Compare-and-branch
@@ -196,8 +240,10 @@ enum NodeType : unsigned {
UMULL,
// Reciprocal estimates and steps.
- FRECPE, FRECPS,
- FRSQRTE, FRSQRTS,
+ FRECPE,
+ FRECPS,
+ FRSQRTE,
+ FRSQRTS,
SUNPKHI,
SUNPKLO,
@@ -211,35 +257,93 @@ enum NodeType : unsigned {
REV,
TBL,
+ // Floating-point reductions.
+ FADDA_PRED,
+ FADDV_PRED,
+ FMAXV_PRED,
+ FMAXNMV_PRED,
+ FMINV_PRED,
+ FMINNMV_PRED,
+
INSR,
PTEST,
PTRUE,
+ DUP_MERGE_PASSTHRU,
+ INDEX_VECTOR,
+
+ REINTERPRET_CAST,
+
+ LD1_MERGE_ZERO,
+ LD1S_MERGE_ZERO,
+ LDNF1_MERGE_ZERO,
+ LDNF1S_MERGE_ZERO,
+ LDFF1_MERGE_ZERO,
+ LDFF1S_MERGE_ZERO,
+ LD1RQ_MERGE_ZERO,
+ LD1RO_MERGE_ZERO,
+
+ // Structured loads.
+ SVE_LD2_MERGE_ZERO,
+ SVE_LD3_MERGE_ZERO,
+ SVE_LD4_MERGE_ZERO,
+
// Unsigned gather loads.
- GLD1,
- GLD1_SCALED,
- GLD1_UXTW,
- GLD1_SXTW,
- GLD1_UXTW_SCALED,
- GLD1_SXTW_SCALED,
- GLD1_IMM,
+ GLD1_MERGE_ZERO,
+ GLD1_SCALED_MERGE_ZERO,
+ GLD1_UXTW_MERGE_ZERO,
+ GLD1_SXTW_MERGE_ZERO,
+ GLD1_UXTW_SCALED_MERGE_ZERO,
+ GLD1_SXTW_SCALED_MERGE_ZERO,
+ GLD1_IMM_MERGE_ZERO,
// Signed gather loads
- GLD1S,
- GLD1S_SCALED,
- GLD1S_UXTW,
- GLD1S_SXTW,
- GLD1S_UXTW_SCALED,
- GLD1S_SXTW_SCALED,
- GLD1S_IMM,
+ GLD1S_MERGE_ZERO,
+ GLD1S_SCALED_MERGE_ZERO,
+ GLD1S_UXTW_MERGE_ZERO,
+ GLD1S_SXTW_MERGE_ZERO,
+ GLD1S_UXTW_SCALED_MERGE_ZERO,
+ GLD1S_SXTW_SCALED_MERGE_ZERO,
+ GLD1S_IMM_MERGE_ZERO,
+
+ // Unsigned gather loads.
+ GLDFF1_MERGE_ZERO,
+ GLDFF1_SCALED_MERGE_ZERO,
+ GLDFF1_UXTW_MERGE_ZERO,
+ GLDFF1_SXTW_MERGE_ZERO,
+ GLDFF1_UXTW_SCALED_MERGE_ZERO,
+ GLDFF1_SXTW_SCALED_MERGE_ZERO,
+ GLDFF1_IMM_MERGE_ZERO,
+
+ // Signed gather loads.
+ GLDFF1S_MERGE_ZERO,
+ GLDFF1S_SCALED_MERGE_ZERO,
+ GLDFF1S_UXTW_MERGE_ZERO,
+ GLDFF1S_SXTW_MERGE_ZERO,
+ GLDFF1S_UXTW_SCALED_MERGE_ZERO,
+ GLDFF1S_SXTW_SCALED_MERGE_ZERO,
+ GLDFF1S_IMM_MERGE_ZERO,
+
+ // Non-temporal gather loads
+ GLDNT1_MERGE_ZERO,
+ GLDNT1_INDEX_MERGE_ZERO,
+ GLDNT1S_MERGE_ZERO,
+
+ // Contiguous masked store.
+ ST1_PRED,
+
// Scatter store
- SST1,
- SST1_SCALED,
- SST1_UXTW,
- SST1_SXTW,
- SST1_UXTW_SCALED,
- SST1_SXTW_SCALED,
- SST1_IMM,
+ SST1_PRED,
+ SST1_SCALED_PRED,
+ SST1_UXTW_PRED,
+ SST1_SXTW_PRED,
+ SST1_UXTW_SCALED_PRED,
+ SST1_SXTW_SCALED_PRED,
+ SST1_IMM_PRED,
+
+ // Non-temporal scatter store
+ SSTNT1_PRED,
+ SSTNT1_INDEX_PRED,
// Strict (exception-raising) floating point comparison
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
@@ -276,7 +380,8 @@ enum NodeType : unsigned {
STZ2G,
LDP,
- STP
+ STP,
+ STNP
};
} // end namespace AArch64ISD
@@ -325,7 +430,8 @@ public:
return MVT::getIntegerVT(64);
}
- bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
TargetLoweringOpt &TLO) const override;
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
@@ -337,9 +443,10 @@ public:
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;
/// LLT variant.
- bool allowsMisalignedMemoryAccesses(
- LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
- bool *Fast = nullptr) const override;
+ bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
+ Align Alignment,
+ MachineMemOperand::Flags Flags,
+ bool *Fast = nullptr) const override;
/// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -380,9 +487,6 @@ public:
MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
- MachineBasicBlock *BB) const;
-
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
@@ -406,7 +510,7 @@ public:
bool shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
- bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
+ bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
@@ -422,13 +526,11 @@ public:
bool shouldConsiderGEPOffsetSplit() const override;
- EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+ EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
- LLT getOptimalMemOpLLT(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const override;
+ LLT getOptimalMemOpLLT(const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
/// Return true if the addressing mode represented by AM is legal for this
/// target, for a load/store of the specified type.
@@ -467,6 +569,13 @@ public:
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const override;
+ bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
+ bool MathUsed) const override {
+ // Using overflow ops for overflow checks only should beneficial on
+ // AArch64.
+ return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
+ }
+
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
@@ -501,7 +610,7 @@ public:
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
// FIXME: This is a guess. Has this been defined yet?
return AArch64::X0;
@@ -509,7 +618,7 @@ public:
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
// FIXME: This is a guess. Has this been defined yet?
return AArch64::X1;
@@ -615,13 +724,27 @@ public:
unsigned getNumInterleavedAccesses(VectorType *VecTy,
const DataLayout &DL) const;
- MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
+ MachineMemOperand::Flags getTargetMMOFlags(
+ const Instruction &I) const override;
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
CallingConv::ID CallConv,
bool isVarArg) const override;
/// Used for exception handling on Win64.
bool needsFixedCatchObjects() const override;
+
+ bool fallBackToDAGISel(const Instruction &Inst) const override;
+
+ /// SVE code generation for fixed length vectors does not custom lower
+ /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
+ /// merge. However, merging them creates a BUILD_VECTOR that is just as
+ /// illegal as the original, thus leading to an infinite legalisation loop.
+ /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
+ /// vector types this override can be removed.
+ bool mergeStoresAfterLegalization(EVT VT) const override {
+ return !useSVEForFixedLengthVectors();
+ }
+
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -630,6 +753,7 @@ private:
bool isExtFreeImpl(const Instruction *Ext) const override;
void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT);
+ void addTypeForFixedLengthSVE(MVT VT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
@@ -733,7 +857,11 @@ private:
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
+ unsigned NewOp) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
@@ -750,6 +878,8 @@ private:
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
@@ -757,6 +887,13 @@ private:
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
SDValue &Size,
SelectionDAG &DAG) const;
+ SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
+ EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
+
+ SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
+ SelectionDAG &DAG) const;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
@@ -811,10 +948,19 @@ private:
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
+ void ReplaceExtractSubVectorResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
void finalizeLowering(MachineFunction &MF) const override;
+
+ bool shouldLocalize(const MachineInstr &MI,
+ const TargetTransformInfo *TTI) const override;
+
+ bool useSVEForFixedLengthVectors() const;
+ bool useSVEForFixedLengthVectorVT(EVT VT) const;
};
namespace AArch64 {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 11ba69878847..6df7970f4d82 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -20,6 +20,30 @@ class Format<bits<2> val> {
def PseudoFrm : Format<0>;
def NormalFrm : Format<1>; // Do we need any others?
+// Enum describing whether an instruction is
+// destructive in its first source operand.
+class DestructiveInstTypeEnum<bits<4> val> {
+ bits<4> Value = val;
+}
+def NotDestructive : DestructiveInstTypeEnum<0>;
+// Destructive in its first operand and can be MOVPRFX'd, but has no other
+// special properties.
+def DestructiveOther : DestructiveInstTypeEnum<1>;
+def DestructiveUnary : DestructiveInstTypeEnum<2>;
+def DestructiveBinaryImm : DestructiveInstTypeEnum<3>;
+def DestructiveBinaryShImmUnpred : DestructiveInstTypeEnum<4>;
+def DestructiveBinary : DestructiveInstTypeEnum<5>;
+def DestructiveBinaryComm : DestructiveInstTypeEnum<6>;
+def DestructiveBinaryCommWithRev : DestructiveInstTypeEnum<7>;
+def DestructiveTernaryCommWithRev : DestructiveInstTypeEnum<8>;
+
+class FalseLanesEnum<bits<2> val> {
+ bits<2> Value = val;
+}
+def FalseLanesNone : FalseLanesEnum<0>;
+def FalseLanesZero : FalseLanesEnum<1>;
+def FalseLanesUndef : FalseLanesEnum<2>;
+
// AArch64 Instruction Format
class AArch64Inst<Format f, string cstr> : Instruction {
field bits<32> Inst; // Instruction encoding.
@@ -34,6 +58,16 @@ class AArch64Inst<Format f, string cstr> : Instruction {
let Namespace = "AArch64";
Format F = f;
bits<2> Form = F.Value;
+
+ // Defaults
+ FalseLanesEnum FalseLanes = FalseLanesNone;
+ DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
+ ElementSizeEnum ElementSize = ElementSizeNone;
+
+ let TSFlags{8-7} = FalseLanes.Value;
+ let TSFlags{6-3} = DestructiveInstType.Value;
+ let TSFlags{2-0} = ElementSize.Value;
+
let Pattern = [];
let Constraints = cstr;
}
@@ -48,6 +82,7 @@ class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = "">
dag InOperandList = iops;
let Pattern = pattern;
let isCodeGenOnly = 1;
+ let isPseudo = 1;
}
// Real instructions (have encoding information)
@@ -56,14 +91,6 @@ class EncodedI<string cstr, list<dag> pattern> : AArch64Inst<NormalFrm, cstr> {
let Size = 4;
}
-// Enum describing whether an instruction is
-// destructive in its first source operand.
-class DestructiveInstTypeEnum<bits<1> val> {
- bits<1> Value = val;
-}
-def NotDestructive : DestructiveInstTypeEnum<0>;
-def Destructive : DestructiveInstTypeEnum<1>;
-
// Normal instructions
class I<dag oops, dag iops, string asm, string operands, string cstr,
list<dag> pattern>
@@ -71,13 +98,6 @@ class I<dag oops, dag iops, string asm, string operands, string cstr,
dag OutOperandList = oops;
dag InOperandList = iops;
let AsmString = !strconcat(asm, operands);
-
- // Destructive operations (SVE)
- DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
- ElementSizeEnum ElementSize = ElementSizeB;
-
- let TSFlags{3} = DestructiveInstType.Value;
- let TSFlags{2-0} = ElementSize.Value;
}
class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
@@ -327,6 +347,18 @@ def simm5_32b : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -16 && Imm < 16; }]>
let DecoderMethod = "DecodeSImm<5>";
}
+def simm5_8b : Operand<i32>, ImmLeaf<i32, [{ return (int8_t)Imm >= -16 && (int8_t)Imm < 16; }]> {
+ let ParserMatchClass = SImm5Operand;
+ let DecoderMethod = "DecodeSImm<5>";
+ let PrintMethod = "printSImm<8>";
+}
+
+def simm5_16b : Operand<i32>, ImmLeaf<i32, [{ return (int16_t)Imm >= -16 && (int16_t)Imm < 16; }]> {
+ let ParserMatchClass = SImm5Operand;
+ let DecoderMethod = "DecodeSImm<5>";
+ let PrintMethod = "printSImm<16>";
+}
+
// simm7sN predicate - True if the immediate is a multiple of N in the range
// [-64 * N, 63 * N].
@@ -349,6 +381,8 @@ def simm7s16 : Operand<i32> {
let PrintMethod = "printImmScale<16>";
}
+def am_sve_fi : ComplexPattern<i64, 2, "SelectAddrModeFrameIndexSVE", []>;
+
def am_indexed7s8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>;
def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;
def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;
@@ -358,6 +392,9 @@ def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>;
def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>;
+def UImmS1XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i64);
+}]>;
def UImmS2XForm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() / 2, SDLoc(N), MVT::i64);
}]>;
@@ -446,6 +483,19 @@ def uimm6s16 : Operand<i64>, ImmLeaf<i64,
let ParserMatchClass = UImm6s16Operand;
}
+def SImmS2XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() / 2, SDLoc(N), MVT::i64);
+}]>;
+def SImmS3XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() / 3, SDLoc(N), MVT::i64);
+}]>;
+def SImmS4XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() / 4, SDLoc(N), MVT::i64);
+}]>;
+def SImmS16XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() / 16, SDLoc(N), MVT::i64);
+}]>;
+
// simm6sN predicate - True if the immediate is a multiple of N in the range
// [-32 * N, 31 * N].
def SImm6s1Operand : SImmScaledMemoryIndexed<6, 1>;
@@ -461,6 +511,7 @@ def SImm4s2Operand : SImmScaledMemoryIndexed<4, 2>;
def SImm4s3Operand : SImmScaledMemoryIndexed<4, 3>;
def SImm4s4Operand : SImmScaledMemoryIndexed<4, 4>;
def SImm4s16Operand : SImmScaledMemoryIndexed<4, 16>;
+def SImm4s32Operand : SImmScaledMemoryIndexed<4, 32>;
def simm4s1 : Operand<i64>, ImmLeaf<i64,
[{ return Imm >=-8 && Imm <= 7; }]> {
@@ -469,31 +520,37 @@ def simm4s1 : Operand<i64>, ImmLeaf<i64,
}
def simm4s2 : Operand<i64>, ImmLeaf<i64,
-[{ return Imm >=-16 && Imm <= 14 && (Imm % 2) == 0x0; }]> {
+[{ return Imm >=-16 && Imm <= 14 && (Imm % 2) == 0x0; }], SImmS2XForm> {
let PrintMethod = "printImmScale<2>";
let ParserMatchClass = SImm4s2Operand;
let DecoderMethod = "DecodeSImm<4>";
}
def simm4s3 : Operand<i64>, ImmLeaf<i64,
-[{ return Imm >=-24 && Imm <= 21 && (Imm % 3) == 0x0; }]> {
+[{ return Imm >=-24 && Imm <= 21 && (Imm % 3) == 0x0; }], SImmS3XForm> {
let PrintMethod = "printImmScale<3>";
let ParserMatchClass = SImm4s3Operand;
let DecoderMethod = "DecodeSImm<4>";
}
def simm4s4 : Operand<i64>, ImmLeaf<i64,
-[{ return Imm >=-32 && Imm <= 28 && (Imm % 4) == 0x0; }]> {
+[{ return Imm >=-32 && Imm <= 28 && (Imm % 4) == 0x0; }], SImmS4XForm> {
let PrintMethod = "printImmScale<4>";
let ParserMatchClass = SImm4s4Operand;
let DecoderMethod = "DecodeSImm<4>";
}
def simm4s16 : Operand<i64>, ImmLeaf<i64,
-[{ return Imm >=-128 && Imm <= 112 && (Imm % 16) == 0x0; }]> {
+[{ return Imm >=-128 && Imm <= 112 && (Imm % 16) == 0x0; }], SImmS16XForm> {
let PrintMethod = "printImmScale<16>";
let ParserMatchClass = SImm4s16Operand;
let DecoderMethod = "DecodeSImm<4>";
}
+def simm4s32 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >=-256 && Imm <= 224 && (Imm % 32) == 0x0; }]> {
+ let PrintMethod = "printImmScale<32>";
+ let ParserMatchClass = SImm4s32Operand;
+ let DecoderMethod = "DecodeSImm<4>";
+}
def Imm1_8Operand : AsmImmRange<1, 8>;
def Imm1_16Operand : AsmImmRange<1, 16>;
@@ -647,6 +704,13 @@ def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
let DecoderMethod = "DecodeVecShiftR32Imm";
let ParserMatchClass = Imm1_32Operand;
}
+def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
+}]> {
+ let EncoderMethod = "getVecShiftR64OpValue";
+ let DecoderMethod = "DecodeVecShiftR64Imm";
+ let ParserMatchClass = Imm1_64Operand;
+}
def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm0_7Operand : AsmImmRange<0, 7>;
@@ -683,6 +747,36 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_63Operand;
}
+// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant
+// (ImmLeaf)
+def tvecshiftL8 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 8);
+}]> {
+ let EncoderMethod = "getVecShiftL8OpValue";
+ let DecoderMethod = "DecodeVecShiftL8Imm";
+ let ParserMatchClass = Imm0_7Operand;
+}
+def tvecshiftL16 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 16);
+}]> {
+ let EncoderMethod = "getVecShiftL16OpValue";
+ let DecoderMethod = "DecodeVecShiftL16Imm";
+ let ParserMatchClass = Imm0_15Operand;
+}
+def tvecshiftL32 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 32);
+}]> {
+ let EncoderMethod = "getVecShiftL32OpValue";
+ let DecoderMethod = "DecodeVecShiftL32Imm";
+ let ParserMatchClass = Imm0_31Operand;
+}
+def tvecshiftL64 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 64);
+}]> {
+ let EncoderMethod = "getVecShiftL64OpValue";
+ let DecoderMethod = "DecodeVecShiftL64Imm";
+ let ParserMatchClass = Imm0_63Operand;
+}
// Crazy immediate formats used by 32-bit and 64-bit logical immediate
// instructions for splatting repeating bit patterns across the immediate.
@@ -796,7 +890,7 @@ def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
}
// timm0_31 predicate - same ass imm0_31, but use TargetConstant (TimmLeaf)
-// instead of Contant (ImmLeaf)
+// instead of Constant (ImmLeaf)
def timm0_31 : Operand<i64>, TImmLeaf<i64, [{
return ((uint64_t)Imm) < 32;
}]> {
@@ -832,7 +926,7 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
}
// imm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7]
-def imm32_0_7 : Operand<i32>, ImmLeaf<i32, [{
+def imm32_0_7 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 8;
}]> {
let ParserMatchClass = Imm0_7Operand;
@@ -1091,29 +1185,44 @@ class AsmVectorIndex<int Min, int Max, string NamePrefix=""> : AsmOperandClass {
let RenderMethod = "addVectorIndexOperands";
}
-class AsmVectorIndexOpnd<ValueType ty, AsmOperandClass mc, code pred>
- : Operand<ty>, ImmLeaf<ty, pred> {
+class AsmVectorIndexOpnd<ValueType ty, AsmOperandClass mc>
+ : Operand<ty> {
let ParserMatchClass = mc;
let PrintMethod = "printVectorIndex";
}
+multiclass VectorIndex<ValueType ty, AsmOperandClass mc, code pred> {
+ def "" : AsmVectorIndexOpnd<ty, mc>, ImmLeaf<ty, pred>;
+ def _timm : AsmVectorIndexOpnd<ty, mc>, TImmLeaf<ty, pred>;
+}
+
def VectorIndex1Operand : AsmVectorIndex<1, 1>;
def VectorIndexBOperand : AsmVectorIndex<0, 15>;
def VectorIndexHOperand : AsmVectorIndex<0, 7>;
def VectorIndexSOperand : AsmVectorIndex<0, 3>;
def VectorIndexDOperand : AsmVectorIndex<0, 1>;
-def VectorIndex1 : AsmVectorIndexOpnd<i64, VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>;
-def VectorIndexB : AsmVectorIndexOpnd<i64, VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>;
-def VectorIndexH : AsmVectorIndexOpnd<i64, VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>;
-def VectorIndexS : AsmVectorIndexOpnd<i64, VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>;
-def VectorIndexD : AsmVectorIndexOpnd<i64, VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>;
-
-def VectorIndex132b : AsmVectorIndexOpnd<i32, VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>;
-def VectorIndexB32b : AsmVectorIndexOpnd<i32, VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>;
-def VectorIndexH32b : AsmVectorIndexOpnd<i32, VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>;
-def VectorIndexS32b : AsmVectorIndexOpnd<i32, VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>;
-def VectorIndexD32b : AsmVectorIndexOpnd<i32, VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>;
+defm VectorIndex1 : VectorIndex<i64, VectorIndex1Operand,
+ [{ return ((uint64_t)Imm) == 1; }]>;
+defm VectorIndexB : VectorIndex<i64, VectorIndexBOperand,
+ [{ return ((uint64_t)Imm) < 16; }]>;
+defm VectorIndexH : VectorIndex<i64, VectorIndexHOperand,
+ [{ return ((uint64_t)Imm) < 8; }]>;
+defm VectorIndexS : VectorIndex<i64, VectorIndexSOperand,
+ [{ return ((uint64_t)Imm) < 4; }]>;
+defm VectorIndexD : VectorIndex<i64, VectorIndexDOperand,
+ [{ return ((uint64_t)Imm) < 2; }]>;
+
+defm VectorIndex132b : VectorIndex<i32, VectorIndex1Operand,
+ [{ return ((uint64_t)Imm) == 1; }]>;
+defm VectorIndexB32b : VectorIndex<i32, VectorIndexBOperand,
+ [{ return ((uint64_t)Imm) < 16; }]>;
+defm VectorIndexH32b : VectorIndex<i32, VectorIndexHOperand,
+ [{ return ((uint64_t)Imm) < 8; }]>;
+defm VectorIndexS32b : VectorIndex<i32, VectorIndexSOperand,
+ [{ return ((uint64_t)Imm) < 4; }]>;
+defm VectorIndexD32b : VectorIndex<i32, VectorIndexDOperand,
+ [{ return ((uint64_t)Imm) < 2; }]>;
def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">;
def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">;
@@ -1121,16 +1230,21 @@ def SVEVectorIndexExtDupSOperand : AsmVectorIndex<0, 15, "SVE">;
def SVEVectorIndexExtDupDOperand : AsmVectorIndex<0, 7, "SVE">;
def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">;
-def sve_elm_idx_extdup_b
- : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupBOperand, [{ return ((uint64_t)Imm) < 64; }]>;
-def sve_elm_idx_extdup_h
- : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupHOperand, [{ return ((uint64_t)Imm) < 32; }]>;
-def sve_elm_idx_extdup_s
- : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupSOperand, [{ return ((uint64_t)Imm) < 16; }]>;
-def sve_elm_idx_extdup_d
- : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupDOperand, [{ return ((uint64_t)Imm) < 8; }]>;
-def sve_elm_idx_extdup_q
- : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupQOperand, [{ return ((uint64_t)Imm) < 4; }]>;
+defm sve_elm_idx_extdup_b
+ : VectorIndex<i64, SVEVectorIndexExtDupBOperand,
+ [{ return ((uint64_t)Imm) < 64; }]>;
+defm sve_elm_idx_extdup_h
+ : VectorIndex<i64, SVEVectorIndexExtDupHOperand,
+ [{ return ((uint64_t)Imm) < 32; }]>;
+defm sve_elm_idx_extdup_s
+ : VectorIndex<i64, SVEVectorIndexExtDupSOperand,
+ [{ return ((uint64_t)Imm) < 16; }]>;
+defm sve_elm_idx_extdup_d
+ : VectorIndex<i64, SVEVectorIndexExtDupDOperand,
+ [{ return ((uint64_t)Imm) < 8; }]>;
+defm sve_elm_idx_extdup_q
+ : VectorIndex<i64, SVEVectorIndexExtDupQOperand,
+ [{ return ((uint64_t)Imm) < 4; }]>;
// 8-bit immediate for AdvSIMD where 64-bit values of the form:
// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
@@ -1533,6 +1647,8 @@ class BaseAuthLoad<bit M, bit W, dag oops, dag iops, string asm,
let Inst{10} = 1;
let Inst{9-5} = Rn;
let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeAuthLoadInstruction";
}
multiclass AuthLoad<bit M, string asm, Operand opr> {
@@ -4333,14 +4449,14 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
SDPatternOperator OpN> {
// Unscaled half-precision to 32-bit
def UWHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR32, asm,
- [(set GPR32:$Rd, (OpN FPR16:$Rn))]> {
+ [(set GPR32:$Rd, (OpN (f16 FPR16:$Rn)))]> {
let Inst{31} = 0; // 32-bit GPR flag
let Predicates = [HasFullFP16];
}
// Unscaled half-precision to 64-bit
def UXHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR64, asm,
- [(set GPR64:$Rd, (OpN FPR16:$Rn))]> {
+ [(set GPR64:$Rd, (OpN (f16 FPR16:$Rn)))]> {
let Inst{31} = 1; // 64-bit GPR flag
let Predicates = [HasFullFP16];
}
@@ -4375,7 +4491,7 @@ multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
// Scaled half-precision to 32-bit
def SWHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR32,
fixedpoint_f16_i32, asm,
- [(set GPR32:$Rd, (OpN (fmul FPR16:$Rn,
+ [(set GPR32:$Rd, (OpN (fmul (f16 FPR16:$Rn),
fixedpoint_f16_i32:$scale)))]> {
let Inst{31} = 0; // 32-bit GPR flag
let scale{5} = 1;
@@ -4385,7 +4501,7 @@ multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
// Scaled half-precision to 64-bit
def SXHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR64,
fixedpoint_f16_i64, asm,
- [(set GPR64:$Rd, (OpN (fmul FPR16:$Rn,
+ [(set GPR64:$Rd, (OpN (fmul (f16 FPR16:$Rn),
fixedpoint_f16_i64:$scale)))]> {
let Inst{31} = 1; // 64-bit GPR flag
let Predicates = [HasFullFP16];
@@ -4501,7 +4617,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
// Scaled
def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_f16_i32, asm,
- [(set FPR16:$Rd,
+ [(set (f16 FPR16:$Rd),
(fdiv (node GPR32:$Rn),
fixedpoint_f16_i32:$scale))]> {
let Inst{31} = 0; // 32-bit GPR flag
@@ -4529,7 +4645,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
}
def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_f16_i64, asm,
- [(set FPR16:$Rd,
+ [(set (f16 FPR16:$Rd),
(fdiv (node GPR64:$Rn),
fixedpoint_f16_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
@@ -4702,7 +4818,7 @@ class BaseFPConversion<bits<2> type, bits<2> opcode, RegisterClass dstType,
multiclass FPConversion<string asm> {
// Double-precision to Half-precision
def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm,
- [(set FPR16:$Rd, (any_fpround FPR64:$Rn))]>;
+ [(set (f16 FPR16:$Rd), (any_fpround FPR64:$Rn))]>;
// Double-precision to Single-precision
def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm,
@@ -4710,11 +4826,11 @@ multiclass FPConversion<string asm> {
// Half-precision to Double-precision
def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm,
- [(set FPR64:$Rd, (fpextend FPR16:$Rn))]>;
+ [(set FPR64:$Rd, (fpextend (f16 FPR16:$Rn)))]>;
// Half-precision to Single-precision
def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm,
- [(set FPR32:$Rd, (fpextend FPR16:$Rn))]>;
+ [(set FPR32:$Rd, (fpextend (f16 FPR16:$Rn)))]>;
// Single-precision to Double-precision
def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm,
@@ -4722,7 +4838,7 @@ multiclass FPConversion<string asm> {
// Single-precision to Half-precision
def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm,
- [(set FPR16:$Rd, (any_fpround FPR32:$Rn))]>;
+ [(set (f16 FPR16:$Rd), (any_fpround FPR32:$Rn))]>;
}
//---
@@ -4824,7 +4940,7 @@ multiclass TwoOperandFPData<bits<4> opcode, string asm,
multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> {
def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm,
- [(set FPR16:$Rd, (fneg (node FPR16:$Rn, (f16 FPR16:$Rm))))]> {
+ [(set (f16 FPR16:$Rd), (fneg (node (f16 FPR16:$Rn), (f16 FPR16:$Rm))))]> {
let Inst{23-22} = 0b11; // 16-bit size flag
let Predicates = [HasFullFP16];
}
@@ -4866,7 +4982,7 @@ class BaseThreeOperandFPData<bit isNegated, bit isSub,
multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm,
SDPatternOperator node> {
def Hrrr : BaseThreeOperandFPData<isNegated, isSub, FPR16, asm,
- [(set FPR16:$Rd,
+ [(set (f16 FPR16:$Rd),
(node (f16 FPR16:$Rn), (f16 FPR16:$Rm), (f16 FPR16:$Ra)))]> {
let Inst{23-22} = 0b11; // 16-bit size flag
let Predicates = [HasFullFP16];
@@ -4928,7 +5044,7 @@ multiclass FPComparison<bit signalAllNans, string asm,
SDPatternOperator OpNode = null_frag> {
let Defs = [NZCV] in {
def Hrr : BaseTwoOperandFPComparison<signalAllNans, FPR16, asm,
- [(OpNode FPR16:$Rn, (f16 FPR16:$Rm)), (implicit NZCV)]> {
+ [(OpNode (f16 FPR16:$Rn), (f16 FPR16:$Rm)), (implicit NZCV)]> {
let Inst{23-22} = 0b11;
let Predicates = [HasFullFP16];
}
@@ -5142,6 +5258,47 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
let Inst{4-0} = Rd;
}
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDThreeSameVectorPseudo<RegisterOperand regtype, list<dag> pattern>
+ : Pseudo<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), pattern>,
+ Sched<[WriteV]>;
+
+multiclass SIMDLogicalThreeVectorPseudo<SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVectorPseudo<V64,
+ [(set (v8i8 V64:$dst),
+ (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8 : BaseSIMDThreeSameVectorPseudo<V128,
+ [(set (v16i8 V128:$dst),
+ (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
+ (v16i8 V128:$Rm)))]>;
+
+ def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS),
+ (v4i16 V64:$RHS))),
+ (!cast<Instruction>(NAME#"v8i8")
+ V64:$LHS, V64:$MHS, V64:$RHS)>;
+ def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS),
+ (v2i32 V64:$RHS))),
+ (!cast<Instruction>(NAME#"v8i8")
+ V64:$LHS, V64:$MHS, V64:$RHS)>;
+ def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS),
+ (v1i64 V64:$RHS))),
+ (!cast<Instruction>(NAME#"v8i8")
+ V64:$LHS, V64:$MHS, V64:$RHS)>;
+
+ def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS),
+ (v8i16 V128:$RHS))),
+ (!cast<Instruction>(NAME#"v16i8")
+ V128:$LHS, V128:$MHS, V128:$RHS)>;
+ def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS),
+ (v4i32 V128:$RHS))),
+ (!cast<Instruction>(NAME#"v16i8")
+ V128:$LHS, V128:$MHS, V128:$RHS)>;
+ def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS),
+ (v2i64 V128:$RHS))),
+ (!cast<Instruction>(NAME#"v16i8")
+ V128:$LHS, V128:$MHS, V128:$RHS)>;
+}
+
// All operand sizes distinguished in the encoding.
multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
@@ -5362,7 +5519,7 @@ multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
}
multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
- string asm, SDPatternOperator OpNode> {
+ string asm, SDPatternOperator OpNode = null_frag> {
def v8i8 : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64,
asm, ".8b",
[(set (v8i8 V64:$dst),
@@ -5402,11 +5559,11 @@ multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
// ARMv8.2-A Dot Product Instructions (Vector): These instructions extract
// bytes from S-sized elements.
-class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1,
+class BaseSIMDThreeSameVectorDot<bit Q, bit U, bit Mixed, string asm, string kind1,
string kind2, RegisterOperand RegType,
ValueType AccumType, ValueType InputType,
SDPatternOperator OpNode> :
- BaseSIMDThreeSameVectorTied<Q, U, 0b100, 0b10010, RegType, asm, kind1,
+ BaseSIMDThreeSameVectorTied<Q, U, 0b100, {0b1001, Mixed}, RegType, asm, kind1,
[(set (AccumType RegType:$dst),
(OpNode (AccumType RegType:$Rd),
(InputType RegType:$Rn),
@@ -5414,10 +5571,10 @@ class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1,
let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
}
-multiclass SIMDThreeSameVectorDot<bit U, string asm, SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64,
+multiclass SIMDThreeSameVectorDot<bit U, bit Mixed, string asm, SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVectorDot<0, U, Mixed, asm, ".2s", ".8b", V64,
v2i32, v8i8, OpNode>;
- def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128,
+ def v16i8 : BaseSIMDThreeSameVectorDot<1, U, Mixed, asm, ".4s", ".16b", V128,
v4i32, v16i8, OpNode>;
}
@@ -6581,13 +6738,13 @@ multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
SDPatternOperator OpNode = null_frag> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
+ def NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
- def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
+ def NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
[(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
let Predicates = [HasNEON, HasFullFP16] in {
- def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
- [(set FPR16:$Rd, (OpNode FPR16:$Rn, FPR16:$Rm))]>;
+ def NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+ [(set (f16 FPR16:$Rd), (OpNode (f16 FPR16:$Rn), (f16 FPR16:$Rm)))]>;
} // Predicates = [HasNEON, HasFullFP16]
}
@@ -6598,12 +6755,12 @@ multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<3> opc, string asm,
SDPatternOperator OpNode = null_frag> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
+ def NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
- def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
+ def NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
[(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>;
let Predicates = [HasNEON, HasFullFP16] in {
- def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+ def NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
[]>;
} // Predicates = [HasNEON, HasFullFP16]
}
@@ -6794,7 +6951,7 @@ multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
[(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
let Predicates = [HasNEON, HasFullFP16] in {
def v1i16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
- [(set FPR16:$Rd, (OpNode (f16 FPR16:$Rn)))]>;
+ [(set (f16 FPR16:$Rd), (OpNode (f16 FPR16:$Rn)))]>;
}
}
@@ -6936,10 +7093,10 @@ multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
let Predicates = [HasNEON, HasFullFP16] in {
def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64,
asm, ".4h",
- [(set FPR16:$Rd, (intOp (v4f16 V64:$Rn)))]>;
+ [(set (f16 FPR16:$Rd), (intOp (v4f16 V64:$Rn)))]>;
def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128,
asm, ".8h",
- [(set FPR16:$Rd, (intOp (v8f16 V128:$Rn)))]>;
+ [(set (f16 FPR16:$Rd), (intOp (v8f16 V128:$Rn)))]>;
} // Predicates = [HasNEON, HasFullFP16]
def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128,
asm, ".4s",
@@ -7136,7 +7293,7 @@ class SIMDInsMainMovAlias<string size, Instruction inst,
(inst V128:$dst, idxtype:$idx, regtype:$src)>;
class SIMDInsElementMovAlias<string size, Instruction inst,
Operand idxtype>
- : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" #
+ : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2"
# "|" # size #"\t$dst$idx, $src$idx2}",
(inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>;
@@ -7377,7 +7534,7 @@ class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
class SIMDScalarCPYAlias<string asm, string size, Instruction inst,
RegisterClass regtype, RegisterOperand vectype, Operand idxtype>
- : InstAlias<asm # "{\t$dst, $src" # size # "$index" #
+ : InstAlias<asm # "{\t$dst, $src" # size # "$index"
# "|\t$dst, $src$index}",
(inst regtype:$dst, vectype:$src, idxtype:$index), 0>;
@@ -7651,13 +7808,152 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
let Inst{4-0} = Rd;
}
+
+//----------------------------------------------------------------------------
+// Armv8.6 BFloat16 Extension
+//----------------------------------------------------------------------------
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in {
+
+class BaseSIMDThreeSameVectorBFDot<bit Q, bit U, string asm, string kind1,
+ string kind2, RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType>
+ : BaseSIMDThreeSameVectorTied<Q, U, 0b010, 0b11111, RegType, asm, kind1, [(set (AccumType RegType:$dst),
+ (int_aarch64_neon_bfdot (AccumType RegType:$Rd),
+ (InputType RegType:$Rn),
+ (InputType RegType:$Rm)))]> {
+ let AsmString = !strconcat(asm,
+ "{\t$Rd" # kind1 # ", $Rn" # kind2 #
+ ", $Rm" # kind2 # "}");
+}
+
+multiclass SIMDThreeSameVectorBFDot<bit U, string asm> {
+ def v4bf16 : BaseSIMDThreeSameVectorBFDot<0, U, asm, ".2s", ".4h", V64,
+ v2f32, v8i8>;
+ def v8bf16 : BaseSIMDThreeSameVectorBFDot<1, U, asm, ".4s", ".8h", V128,
+ v4f32, v16i8>;
+}
+
+class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
+ string dst_kind, string lhs_kind,
+ string rhs_kind,
+ RegisterOperand RegType,
+ ValueType AccumType,
+ ValueType InputType>
+ : BaseSIMDIndexedTied<Q, U, 0b0, 0b01, 0b1111,
+ RegType, RegType, V128, VectorIndexS,
+ asm, "", dst_kind, lhs_kind, rhs_kind,
+ [(set (AccumType RegType:$dst),
+ (AccumType (int_aarch64_neon_bfdot
+ (AccumType RegType:$Rd),
+ (InputType RegType:$Rn),
+ (InputType (bitconvert (AccumType
+ (AArch64duplane32 (v4f32 V128:$Rm),
+ VectorIndexH:$idx)))))))]> {
+
+ bits<2> idx;
+ let Inst{21} = idx{0}; // L
+ let Inst{11} = idx{1}; // H
+}
+
+multiclass SIMDThreeSameVectorBF16DotI<bit U, string asm> {
+
+ def v4bf16 : BaseSIMDThreeSameVectorBF16DotI<0, U, asm, ".2s", ".4h",
+ ".2h", V64, v2f32, v8i8>;
+ def v8bf16 : BaseSIMDThreeSameVectorBF16DotI<1, U, asm, ".4s", ".8h",
+ ".2h", V128, v4f32, v16i8>;
+}
+
+class SIMDBF16MLAL<bit Q, string asm, SDPatternOperator OpNode>
+ : BaseSIMDThreeSameVectorTied<Q, 0b1, 0b110, 0b11111, V128, asm, ".4s",
+ [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
+ (v16i8 V128:$Rn),
+ (v16i8 V128:$Rm)))]> {
+ let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}");
+}
+
+class SIMDBF16MLALIndex<bit Q, string asm, SDPatternOperator OpNode>
+ : I<(outs V128:$dst),
+ (ins V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx), asm,
+ "{\t$Rd.4s, $Rn.8h, $Rm.h$idx}", "$Rd = $dst",
+ [(set (v4f32 V128:$dst),
+ (v4f32 (OpNode (v4f32 V128:$Rd),
+ (v16i8 V128:$Rn),
+ (v16i8 (bitconvert (v8bf16
+ (AArch64duplane16 (v8bf16 V128_lo:$Rm),
+ VectorIndexH:$idx)))))))]>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<4> Rm;
+ bits<3> idx;
+
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29-22} = 0b00111111;
+ let Inst{21-20} = idx{1-0};
+ let Inst{19-16} = Rm;
+ let Inst{15-12} = 0b1111;
+ let Inst{11} = idx{2}; // H
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class SIMDThreeSameVectorBF16MatrixMul<string asm>
+ : BaseSIMDThreeSameVectorTied<1, 1, 0b010, 0b11101,
+ V128, asm, ".4s",
+ [(set (v4f32 V128:$dst),
+ (int_aarch64_neon_bfmmla (v4f32 V128:$Rd),
+ (v16i8 V128:$Rn),
+ (v16i8 V128:$Rm)))]> {
+ let AsmString = !strconcat(asm, "{\t$Rd", ".4s", ", $Rn", ".8h",
+ ", $Rm", ".8h", "}");
+}
+
+class SIMD_BFCVTN
+ : BaseSIMDMixedTwoVector<0, 0, 0b10, 0b10110, V128, V128,
+ "bfcvtn", ".4h", ".4s",
+ [(set (v8bf16 V128:$Rd),
+ (int_aarch64_neon_bfcvtn (v4f32 V128:$Rn)))]>;
+
+class SIMD_BFCVTN2
+ : BaseSIMDMixedTwoVectorTied<1, 0, 0b10, 0b10110, V128, V128,
+ "bfcvtn2", ".8h", ".4s",
+ [(set (v8bf16 V128:$dst),
+ (int_aarch64_neon_bfcvtn2 (v8bf16 V128:$Rd), (v4f32 V128:$Rn)))]>;
+
+class BF16ToSinglePrecision<string asm>
+ : I<(outs FPR16:$Rd), (ins FPR32:$Rn), asm, "\t$Rd, $Rn", "",
+ [(set (bf16 FPR16:$Rd), (int_aarch64_neon_bfcvt (f32 FPR32:$Rn)))]>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-10} = 0b0001111001100011010000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+} // End of let mayStore = 0, mayLoad = 0, hasSideEffects = 0
+
+//----------------------------------------------------------------------------
+// Armv8.6 Matrix Multiply Extension
+//----------------------------------------------------------------------------
+
+class SIMDThreeSameVectorMatMul<bit B, bit U, string asm, SDPatternOperator OpNode>
+ : BaseSIMDThreeSameVectorTied<1, U, 0b100, {0b1010, B}, V128, asm, ".4s",
+ [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd),
+ (v16i8 V128:$Rn),
+ (v16i8 V128:$Rm)))]> {
+ let AsmString = asm # "{\t$Rd.4s, $Rn.16b, $Rm.16b}";
+}
+
+//----------------------------------------------------------------------------
// ARMv8.2-A Dot Product Instructions (Indexed)
-class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
- string lhs_kind, string rhs_kind,
+class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, bit Mixed, bits<2> size, string asm,
+ string dst_kind, string lhs_kind, string rhs_kind,
RegisterOperand RegType,
ValueType AccumType, ValueType InputType,
SDPatternOperator OpNode> :
- BaseSIMDIndexedTied<Q, U, 0b0, 0b10, 0b1110, RegType, RegType, V128,
+ BaseSIMDIndexedTied<Q, U, 0b0, size, {0b111, Mixed}, RegType, RegType, V128,
VectorIndexS, asm, "", dst_kind, lhs_kind, rhs_kind,
[(set (AccumType RegType:$dst),
(AccumType (OpNode (AccumType RegType:$Rd),
@@ -7670,11 +7966,11 @@ class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
let Inst{11} = idx{1}; // H
}
-multiclass SIMDThreeSameVectorDotIndex<bit U, string asm,
+multiclass SIMDThreeSameVectorDotIndex<bit U, bit Mixed, bits<2> size, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b",
+ def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, Mixed, size, asm, ".2s", ".8b", ".4b",
V64, v2i32, v8i8, OpNode>;
- def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b",
+ def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, Mixed, size, asm, ".4s", ".16b", ".4b",
V128, v4i32, v16i8, OpNode>;
}
@@ -7813,6 +8109,34 @@ multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
}
multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ // Patterns for f16: DUPLANE, DUP scalar and vector_extract.
+ def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn),
+ (AArch64duplane16 (v8f16 V128_lo:$Rm),
+ VectorIndexH:$idx))),
+ (!cast<Instruction>(INST # "v8i16_indexed")
+ V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx)>;
+ def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn),
+ (AArch64dup (f16 FPR16Op_lo:$Rm)))),
+ (!cast<Instruction>(INST # "v8i16_indexed") V128:$Rd, V128:$Rn,
+ (SUBREG_TO_REG (i32 0), (f16 FPR16Op_lo:$Rm), hsub), (i64 0))>;
+
+ def : Pat<(v4f16 (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn),
+ (AArch64duplane16 (v8f16 V128_lo:$Rm),
+ VectorIndexH:$idx))),
+ (!cast<Instruction>(INST # "v4i16_indexed")
+ V64:$Rd, V64:$Rn, V128_lo:$Rm, VectorIndexH:$idx)>;
+ def : Pat<(v4f16 (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn),
+ (AArch64dup (f16 FPR16Op_lo:$Rm)))),
+ (!cast<Instruction>(INST # "v4i16_indexed") V64:$Rd, V64:$Rn,
+ (SUBREG_TO_REG (i32 0), (f16 FPR16Op_lo:$Rm), hsub), (i64 0))>;
+
+ def : Pat<(f16 (OpNode (f16 FPR16:$Rd), (f16 FPR16:$Rn),
+ (vector_extract (v8f16 V128_lo:$Rm), VectorIndexH:$idx))),
+ (!cast<Instruction>(INST # "v1i16_indexed") FPR16:$Rd, FPR16:$Rn,
+ V128_lo:$Rm, VectorIndexH:$idx)>;
+ } // Predicates = [HasNEON, HasFullFP16]
+
// 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.
def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
(AArch64duplane32 (v4f32 V128:$Rm),
@@ -7847,15 +8171,11 @@ multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
(!cast<Instruction>(INST # "v2i64_indexed") V128:$Rd, V128:$Rn,
(SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
- // 2 variants for 32-bit scalar version: extract from .2s or from .4s
+ // Covers 2 variants for 32-bit scalar version: extract from .2s or from .4s
def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
(vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx))),
(!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn,
V128:$Rm, VectorIndexS:$idx)>;
- def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
- (vector_extract (v2f32 V64:$Rm), VectorIndexS:$idx))),
- (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn,
- (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
// 1 variant for 64-bit scalar version: extract from .1d or from .2d
def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
@@ -7940,6 +8260,64 @@ multiclass SIMDFPIndexedTied<bit U, bits<4> opc, string asm> {
}
}
+multiclass SIMDIndexedHSPatterns<SDPatternOperator OpNodeLane,
+ SDPatternOperator OpNodeLaneQ> {
+
+ def : Pat<(v4i16 (OpNodeLane
+ (v4i16 V64:$Rn), (v4i16 V64_lo:$Rm),
+ VectorIndexS32b:$idx)),
+ (!cast<Instruction>(NAME # v4i16_indexed) $Rn,
+ (SUBREG_TO_REG (i32 0), (v4i16 V64_lo:$Rm), dsub),
+ (UImmS1XForm $idx))>;
+
+ def : Pat<(v4i16 (OpNodeLaneQ
+ (v4i16 V64:$Rn), (v8i16 V128_lo:$Rm),
+ VectorIndexH32b:$idx)),
+ (!cast<Instruction>(NAME # v4i16_indexed) $Rn, $Rm,
+ (UImmS1XForm $idx))>;
+
+ def : Pat<(v8i16 (OpNodeLane
+ (v8i16 V128:$Rn), (v4i16 V64_lo:$Rm),
+ VectorIndexS32b:$idx)),
+ (!cast<Instruction>(NAME # v8i16_indexed) $Rn,
+ (SUBREG_TO_REG (i32 0), $Rm, dsub),
+ (UImmS1XForm $idx))>;
+
+ def : Pat<(v8i16 (OpNodeLaneQ
+ (v8i16 V128:$Rn), (v8i16 V128_lo:$Rm),
+ VectorIndexH32b:$idx)),
+ (!cast<Instruction>(NAME # v8i16_indexed) $Rn, $Rm,
+ (UImmS1XForm $idx))>;
+
+ def : Pat<(v2i32 (OpNodeLane
+ (v2i32 V64:$Rn), (v2i32 V64:$Rm),
+ VectorIndexD32b:$idx)),
+ (!cast<Instruction>(NAME # v2i32_indexed) $Rn,
+ (SUBREG_TO_REG (i32 0), (v2i32 V64_lo:$Rm), dsub),
+ (UImmS1XForm $idx))>;
+
+ def : Pat<(v2i32 (OpNodeLaneQ
+ (v2i32 V64:$Rn), (v4i32 V128:$Rm),
+ VectorIndexS32b:$idx)),
+ (!cast<Instruction>(NAME # v2i32_indexed) $Rn, $Rm,
+ (UImmS1XForm $idx))>;
+
+ def : Pat<(v4i32 (OpNodeLane
+ (v4i32 V128:$Rn), (v2i32 V64:$Rm),
+ VectorIndexD32b:$idx)),
+ (!cast<Instruction>(NAME # v4i32_indexed) $Rn,
+ (SUBREG_TO_REG (i32 0), $Rm, dsub),
+ (UImmS1XForm $idx))>;
+
+ def : Pat<(v4i32 (OpNodeLaneQ
+ (v4i32 V128:$Rn),
+ (v4i32 V128:$Rm),
+ VectorIndexS32b:$idx)),
+ (!cast<Instruction>(NAME # v4i32_indexed) $Rn, $Rm,
+ (UImmS1XForm $idx))>;
+
+}
+
multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm,
SDPatternOperator OpNode> {
def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64,
@@ -10154,15 +10532,15 @@ class ComplexRotationOperand<int Angle, int Remainder, string Type>
let DiagnosticType = "InvalidComplexRotation" # Type;
let Name = "ComplexRotation" # Type;
}
-def complexrotateop : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }],
- SDNodeXForm<imm, [{
+def complexrotateop : Operand<i32>, TImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }],
+ SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((N->getSExtValue() / 90), SDLoc(N), MVT::i32);
}]>> {
let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">;
let PrintMethod = "printComplexRotationOp<90, 0>";
}
-def complexrotateopodd : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }],
- SDNodeXForm<imm, [{
+def complexrotateopodd : Operand<i32>, TImmLeaf<i32, [{ return Imm >= 0 && Imm <= 270; }],
+ SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(((N->getSExtValue() - 90) / 180), SDLoc(N), MVT::i32);
}]>> {
let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td
new file mode 100644
index 000000000000..a0e7c782f68c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -0,0 +1,124 @@
+//=----- AArch64InstrGISel.td - AArch64 GISel target pseudos -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 GlobalISel target pseudo instruction definitions. This is kept
+// separately from the other tablegen files for organizational purposes, but
+// share the same infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+
+class AArch64GenericInstruction : GenericInstruction {
+ let Namespace = "AArch64";
+}
+
+// A pseudo to represent a relocatable add instruction as part of address
+// computation.
+def G_ADD_LOW : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src, type2:$imm);
+ let hasSideEffects = 0;
+}
+
+// Pseudo for a rev16 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV16 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+// Pseudo for a rev32 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV32 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+// Pseudo for a rev64 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_REV64 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+// Represents an uzp1 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_UZP1 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$v1, type0:$v2);
+ let hasSideEffects = 0;
+}
+
+// Represents an uzp2 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_UZP2 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$v1, type0:$v2);
+ let hasSideEffects = 0;
+}
+
+// Represents a zip1 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_ZIP1 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$v1, type0:$v2);
+ let hasSideEffects = 0;
+}
+
+// Represents a zip2 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_ZIP2 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$v1, type0:$v2);
+ let hasSideEffects = 0;
+}
+
+// Represents a dup instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_DUP: AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$lane);
+ let hasSideEffects = 0;
+}
+// Represents a trn1 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_TRN1 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$v1, type0:$v2);
+ let hasSideEffects = 0;
+}
+
+// Represents a trn2 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_TRN2 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$v1, type0:$v2);
+ let hasSideEffects = 0;
+}
+
+// Represents an ext instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_EXT: AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm);
+}
+
+def : GINodeEquiv<G_REV16, AArch64rev16>;
+def : GINodeEquiv<G_REV32, AArch64rev32>;
+def : GINodeEquiv<G_REV64, AArch64rev64>;
+def : GINodeEquiv<G_UZP1, AArch64uzp1>;
+def : GINodeEquiv<G_UZP2, AArch64uzp2>;
+def : GINodeEquiv<G_ZIP1, AArch64zip1>;
+def : GINodeEquiv<G_ZIP2, AArch64zip2>;
+def : GINodeEquiv<G_DUP, AArch64dup>;
+def : GINodeEquiv<G_TRN1, AArch64trn1>;
+def : GINodeEquiv<G_TRN2, AArch64trn2>;
+def : GINodeEquiv<G_EXT, AArch64ext>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 54f3f7c10132..5139ae5ccaf1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -24,9 +24,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -111,6 +111,14 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// This gets lowered to an instruction sequence which takes 16 bytes
NumBytes = 16;
break;
+ case AArch64::SpeculationBarrierISBDSBEndBB:
+ // This gets lowered to 2 4-byte instructions.
+ NumBytes = 8;
+ break;
+ case AArch64::SpeculationBarrierSBEndBB:
+ // This gets lowered to 1 4-byte instructions.
+ NumBytes = 4;
+ break;
case AArch64::JumpTableDest32:
case AArch64::JumpTableDest16:
case AArch64::JumpTableDest8:
@@ -119,11 +127,25 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case AArch64::SPACE:
NumBytes = MI.getOperand(1).getImm();
break;
+ case TargetOpcode::BUNDLE:
+ NumBytes = getInstBundleLength(MI);
+ break;
}
return NumBytes;
}
+unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI.getIterator();
+ MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += getInstSizeInBytes(*I);
+ }
+ return Size;
+}
+
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
SmallVectorImpl<MachineOperand> &Cond) {
// Block ends with fall-through condbranch.
@@ -216,6 +238,12 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
if (I == MBB.end())
return false;
+ // Skip over SpeculationBarrierEndBB terminators
+ if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
+ I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
+ --I;
+ }
+
if (!isUnpredicatedTerminator(*I))
return false;
@@ -496,8 +524,9 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles,
+ Register DstReg, Register TrueReg,
+ Register FalseReg, int &CondCycles,
+ int &TrueCycles,
int &FalseCycles) const {
// Check register classes.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -506,6 +535,12 @@ bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
if (!RC)
return false;
+ // Also need to check the dest regclass, in case we're trying to optimize
+ // something like:
+ // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
+ if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
+ return false;
+
// Expanding cbz/tbz requires an extra cycle of latency on the condition.
unsigned ExtraCondLat = Cond.size() != 1;
@@ -538,9 +573,9 @@ bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DstReg,
+ const DebugLoc &DL, Register DstReg,
ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg) const {
+ Register TrueReg, Register FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// Parse the condition code, see parseCondBranch() above.
@@ -910,7 +945,7 @@ bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
}
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
+ Register &SrcReg, Register &DstReg,
unsigned &SubIdx) const {
switch (MI.getOpcode()) {
default:
@@ -935,6 +970,7 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned WidthA = 0, WidthB = 0;
+ bool OffsetAIsScalable = false, OffsetBIsScalable = false;
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
@@ -948,9 +984,14 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
// base are identical, and the offset of a lower memory access +
// the width doesn't overlap the offset of a higher memory access,
// then the memory accesses are different.
- if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
- getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
- if (BaseOpA->isIdenticalTo(*BaseOpB)) {
+ // If OffsetAIsScalable and OffsetBIsScalable are both true, they
+ // are assumed to have the same scale (vscale).
+ if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
+ WidthA, TRI) &&
+ getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
+ WidthB, TRI)) {
+ if (BaseOpA->isIdenticalTo(*BaseOpB) &&
+ OffsetAIsScalable == OffsetBIsScalable) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
@@ -984,8 +1025,8 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
-bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
+bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &CmpMask,
int &CmpValue) const {
// The first operand can be a frame index where we'd normally expect a
// register.
@@ -1156,10 +1197,9 @@ static bool areCFlagsAccessedBetweenInstrs(
return MI.getIterator() == From;
}) != To->getParent()->rend());
- // We iterate backward starting \p To until we hit \p From.
- for (--To; To != From; --To) {
- const MachineInstr &Instr = *To;
-
+ // We iterate backward starting at \p To until we hit \p From.
+ for (const MachineInstr &Instr :
+ instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
if (((AccessToCheck & AK_Write) &&
Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
@@ -1180,7 +1220,7 @@ static bool areCFlagsAccessedBetweenInstrs(
/// instruction.
/// Only comparison with zero is supported.
bool AArch64InstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
int CmpValue, const MachineRegisterInfo *MRI) const {
assert(CmpInstr.getParent());
assert(MRI);
@@ -1416,10 +1456,9 @@ static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
return false;
UsedNZCV NZCVUsedAfterCmp;
- for (auto I = std::next(CmpInstr->getIterator()),
- E = CmpInstr->getParent()->instr_end();
- I != E; ++I) {
- const MachineInstr &Instr = *I;
+ for (const MachineInstr &Instr :
+ instructionsWithoutDebug(std::next(CmpInstr->getIterator()),
+ CmpInstr->getParent()->instr_end())) {
if (Instr.readsRegister(AArch64::NZCV, TRI)) {
AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
@@ -1684,6 +1723,8 @@ unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
+ case AArch64::LDR_PXI:
+ case AArch64::STR_PXI:
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
@@ -1796,9 +1837,37 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STNPSi:
case AArch64::LDG:
case AArch64::STGPi:
+ case AArch64::LD1B_IMM:
+ case AArch64::LD1H_IMM:
+ case AArch64::LD1W_IMM:
+ case AArch64::LD1D_IMM:
+ case AArch64::ST1B_IMM:
+ case AArch64::ST1H_IMM:
+ case AArch64::ST1W_IMM:
+ case AArch64::ST1D_IMM:
+ case AArch64::LD1B_H_IMM:
+ case AArch64::LD1SB_H_IMM:
+ case AArch64::LD1H_S_IMM:
+ case AArch64::LD1SH_S_IMM:
+ case AArch64::LD1W_D_IMM:
+ case AArch64::LD1SW_D_IMM:
+ case AArch64::ST1B_H_IMM:
+ case AArch64::ST1H_S_IMM:
+ case AArch64::ST1W_D_IMM:
+ case AArch64::LD1B_S_IMM:
+ case AArch64::LD1SB_S_IMM:
+ case AArch64::LD1H_D_IMM:
+ case AArch64::LD1SH_D_IMM:
+ case AArch64::ST1B_S_IMM:
+ case AArch64::ST1H_D_IMM:
+ case AArch64::LD1B_D_IMM:
+ case AArch64::LD1SB_D_IMM:
+ case AArch64::ST1B_D_IMM:
return 3;
case AArch64::ADDG:
case AArch64::STGOffset:
+ case AArch64::LDR_PXI:
+ case AArch64::STR_PXI:
return 2;
}
}
@@ -1978,20 +2047,25 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
return true;
}
-bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
- const MachineOperand *&BaseOp,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore())
return false;
- unsigned Width;
- return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
+ const MachineOperand *BaseOp;
+ if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
+ Width, TRI))
+ return false;
+ BaseOps.push_back(BaseOp);
+ return true;
}
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
- unsigned &Width, const TargetRegisterInfo *TRI) const {
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
if (LdSt.getNumExplicitOperands() == 3) {
@@ -2010,7 +2084,7 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
// Get the scaling factor for the instruction and set the width for the
// instruction.
- unsigned Scale = 0;
+ TypeSize Scale(0U, false);
int64_t Dummy1, Dummy2;
// If this returns false, then it's an instruction we don't want to handle.
@@ -2022,12 +2096,13 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
// set to 1.
if (LdSt.getNumExplicitOperands() == 3) {
BaseOp = &LdSt.getOperand(1);
- Offset = LdSt.getOperand(2).getImm() * Scale;
+ Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize();
} else {
assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
BaseOp = &LdSt.getOperand(2);
- Offset = LdSt.getOperand(3).getImm() * Scale;
+ Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize();
}
+ OffsetIsScalable = Scale.isScalable();
if (!BaseOp->isReg() && !BaseOp->isFI())
return false;
@@ -2043,26 +2118,28 @@ AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
return OfsOp;
}
-bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
+bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
unsigned &Width, int64_t &MinOffset,
int64_t &MaxOffset) {
+ const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
switch (Opcode) {
// Not a memory operation or something we want to handle.
default:
- Scale = Width = 0;
+ Scale = TypeSize::Fixed(0);
+ Width = 0;
MinOffset = MaxOffset = 0;
return false;
case AArch64::STRWpost:
case AArch64::LDRWpost:
Width = 32;
- Scale = 4;
+ Scale = TypeSize::Fixed(4);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDURQi:
case AArch64::STURQi:
Width = 16;
- Scale = 1;
+ Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2072,7 +2149,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STURXi:
case AArch64::STURDi:
Width = 8;
- Scale = 1;
+ Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2082,7 +2159,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STURWi:
case AArch64::STURSi:
Width = 4;
- Scale = 1;
+ Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2093,7 +2170,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STURHi:
case AArch64::STURHHi:
Width = 2;
- Scale = 1;
+ Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2104,7 +2181,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STURBi:
case AArch64::STURBBi:
Width = 1;
- Scale = 1;
+ Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -2112,14 +2189,15 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDNPQi:
case AArch64::STPQi:
case AArch64::STNPQi:
- Scale = 16;
+ Scale = TypeSize::Fixed(16);
Width = 32;
MinOffset = -64;
MaxOffset = 63;
break;
case AArch64::LDRQui:
case AArch64::STRQui:
- Scale = Width = 16;
+ Scale = TypeSize::Fixed(16);
+ Width = 16;
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2131,7 +2209,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STPDi:
case AArch64::STNPXi:
case AArch64::STNPDi:
- Scale = 8;
+ Scale = TypeSize::Fixed(8);
Width = 16;
MinOffset = -64;
MaxOffset = 63;
@@ -2141,7 +2219,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDRDui:
case AArch64::STRXui:
case AArch64::STRDui:
- Scale = Width = 8;
+ Scale = TypeSize::Fixed(8);
+ Width = 8;
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2153,7 +2232,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::STPSi:
case AArch64::STNPWi:
case AArch64::STNPSi:
- Scale = 4;
+ Scale = TypeSize::Fixed(4);
Width = 8;
MinOffset = -64;
MaxOffset = 63;
@@ -2163,7 +2242,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDRSWui:
case AArch64::STRWui:
case AArch64::STRSui:
- Scale = Width = 4;
+ Scale = TypeSize::Fixed(4);
+ Width = 4;
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2173,7 +2253,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDRSHXui:
case AArch64::STRHui:
case AArch64::STRHHui:
- Scale = Width = 2;
+ Scale = TypeSize::Fixed(2);
+ Width = 2;
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -2183,18 +2264,19 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDRSBXui:
case AArch64::STRBui:
case AArch64::STRBBui:
- Scale = Width = 1;
+ Scale = TypeSize::Fixed(1);
+ Width = 1;
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::ADDG:
- Scale = 16;
+ Scale = TypeSize::Fixed(16);
Width = 0;
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::TAGPstack:
- Scale = 16;
+ Scale = TypeSize::Fixed(16);
Width = 0;
// TAGP with a negative offset turns into SUBP, which has a maximum offset
// of 63 (not 64!).
@@ -2204,31 +2286,110 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
case AArch64::LDG:
case AArch64::STGOffset:
case AArch64::STZGOffset:
- Scale = Width = 16;
+ Scale = TypeSize::Fixed(16);
+ Width = 16;
MinOffset = -256;
MaxOffset = 255;
break;
+ case AArch64::STR_ZZZZXI:
+ case AArch64::LDR_ZZZZXI:
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector * 4;
+ MinOffset = -256;
+ MaxOffset = 252;
+ break;
+ case AArch64::STR_ZZZXI:
+ case AArch64::LDR_ZZZXI:
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector * 3;
+ MinOffset = -256;
+ MaxOffset = 253;
+ break;
+ case AArch64::STR_ZZXI:
+ case AArch64::LDR_ZZXI:
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector * 2;
+ MinOffset = -256;
+ MaxOffset = 254;
+ break;
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
- Scale = Width = 2;
+ Scale = TypeSize::Scalable(2);
+ Width = SVEMaxBytesPerVector / 8;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:
- Scale = Width = 16;
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector;
MinOffset = -256;
MaxOffset = 255;
break;
+ case AArch64::LD1B_IMM:
+ case AArch64::LD1H_IMM:
+ case AArch64::LD1W_IMM:
+ case AArch64::LD1D_IMM:
+ case AArch64::ST1B_IMM:
+ case AArch64::ST1H_IMM:
+ case AArch64::ST1W_IMM:
+ case AArch64::ST1D_IMM:
+ // A full vectors worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::Scalable(16);
+ Width = SVEMaxBytesPerVector;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD1B_H_IMM:
+ case AArch64::LD1SB_H_IMM:
+ case AArch64::LD1H_S_IMM:
+ case AArch64::LD1SH_S_IMM:
+ case AArch64::LD1W_D_IMM:
+ case AArch64::LD1SW_D_IMM:
+ case AArch64::ST1B_H_IMM:
+ case AArch64::ST1H_S_IMM:
+ case AArch64::ST1W_D_IMM:
+ // A half vector worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::Scalable(8);
+ Width = SVEMaxBytesPerVector / 2;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD1B_S_IMM:
+ case AArch64::LD1SB_S_IMM:
+ case AArch64::LD1H_D_IMM:
+ case AArch64::LD1SH_D_IMM:
+ case AArch64::ST1B_S_IMM:
+ case AArch64::ST1H_D_IMM:
+ // A quarter vector worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::Scalable(4);
+ Width = SVEMaxBytesPerVector / 4;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD1B_D_IMM:
+ case AArch64::LD1SB_D_IMM:
+ case AArch64::ST1B_D_IMM:
+ // A eighth vector worth of data
+ // Width = mbytes * elements
+ Scale = TypeSize::Scalable(2);
+ Width = SVEMaxBytesPerVector / 8;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
- Scale = 16;
+ Scale = TypeSize::Fixed(16);
Width = 32;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STGPi:
- Scale = Width = 16;
+ Scale = TypeSize::Fixed(16);
+ Width = 16;
MinOffset = -64;
MaxOffset = 63;
break;
@@ -2363,9 +2524,13 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
/// Detect opportunities for ldp/stp formation.
///
/// Only called for LdSt for which getMemOperandWithOffset returns true.
-bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
- const MachineOperand &BaseOp2,
- unsigned NumLoads) const {
+bool AArch64InstrInfo::shouldClusterMemOps(
+ ArrayRef<const MachineOperand *> BaseOps1,
+ ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
+ unsigned NumBytes) const {
+ assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
+ const MachineOperand &BaseOp1 = *BaseOps1.front();
+ const MachineOperand &BaseOp2 = *BaseOps2.front();
const MachineInstr &FirstLdSt = *BaseOp1.getParent();
const MachineInstr &SecondLdSt = *BaseOp2.getParent();
if (BaseOp1.getType() != BaseOp2.getType())
@@ -2379,7 +2544,7 @@ bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
return false;
// Only cluster up to a single pair.
- if (NumLoads > 1)
+ if (NumLoads > 2)
return false;
if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
@@ -2822,11 +2987,11 @@ static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const MCInstrDesc &MCID,
- unsigned SrcReg, bool IsKill,
+ Register SrcReg, bool IsKill,
unsigned SubIdx0, unsigned SubIdx1, int FI,
MachineMemOperand *MMO) {
- unsigned SrcReg0 = SrcReg;
- unsigned SrcReg1 = SrcReg;
+ Register SrcReg0 = SrcReg;
+ Register SrcReg1 = SrcReg;
if (Register::isPhysicalRegister(SrcReg)) {
SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
SubIdx0 = 0;
@@ -2842,18 +3007,19 @@ static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
}
void AArch64InstrInfo::storeRegToStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
bool isKill, int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
unsigned Opc = 0;
bool Offset = true;
+ unsigned StackID = TargetStackID::Default;
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -2862,6 +3028,11 @@ void AArch64InstrInfo::storeRegToStackSlot(
case 2:
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::STRHui;
+ else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_PXI;
+ StackID = TargetStackID::SVEVector;
+ }
break;
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
@@ -2901,6 +3072,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
get(AArch64::STPXi), SrcReg, isKill,
AArch64::sube64, AArch64::subo64, FI, MMO);
return;
+ } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 24:
@@ -2919,6 +3094,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d;
Offset = false;
+ } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 48:
@@ -2926,6 +3105,10 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Threev2d;
Offset = false;
+ } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 64:
@@ -2933,19 +3116,13 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d;
Offset = false;
+ } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
+ Opc = AArch64::STR_ZZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
}
- unsigned StackID = TargetStackID::Default;
- if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
- Opc = AArch64::STR_PXI;
- StackID = TargetStackID::SVEVector;
- } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
- Opc = AArch64::STR_ZXI;
- StackID = TargetStackID::SVEVector;
- }
assert(Opc && "Unknown register class");
MFI.setStackID(FI, StackID);
@@ -2962,11 +3139,11 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const MCInstrDesc &MCID,
- unsigned DestReg, unsigned SubIdx0,
+ Register DestReg, unsigned SubIdx0,
unsigned SubIdx1, int FI,
MachineMemOperand *MMO) {
- unsigned DestReg0 = DestReg;
- unsigned DestReg1 = DestReg;
+ Register DestReg0 = DestReg;
+ Register DestReg1 = DestReg;
bool IsUndef = true;
if (Register::isPhysicalRegister(DestReg)) {
DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
@@ -2984,18 +3161,19 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
}
void AArch64InstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
unsigned Opc = 0;
bool Offset = true;
+ unsigned StackID = TargetStackID::Default;
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -3004,6 +3182,11 @@ void AArch64InstrInfo::loadRegFromStackSlot(
case 2:
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRHui;
+ else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_PXI;
+ StackID = TargetStackID::SVEVector;
+ }
break;
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
@@ -3043,6 +3226,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
get(AArch64::LDPXi), DestReg, AArch64::sube64,
AArch64::subo64, FI, MMO);
return;
+ } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 24:
@@ -3061,6 +3248,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d;
Offset = false;
+ } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 48:
@@ -3068,6 +3259,10 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Threev2d;
Offset = false;
+ } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
case 64:
@@ -3075,20 +3270,14 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d;
Offset = false;
+ } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
+ Opc = AArch64::LDR_ZZZZXI;
+ StackID = TargetStackID::SVEVector;
}
break;
}
- unsigned StackID = TargetStackID::Default;
- if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
- Opc = AArch64::LDR_PXI;
- StackID = TargetStackID::SVEVector;
- } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
- Opc = AArch64::LDR_ZXI;
- StackID = TargetStackID::SVEVector;
- }
assert(Opc && "Unknown register class");
MFI.setStackID(FI, StackID);
@@ -3100,6 +3289,17 @@ void AArch64InstrInfo::loadRegFromStackSlot(
MI.addMemOperand(MMO);
}
+bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
+ const MachineInstr &UseMI,
+ const TargetRegisterInfo *TRI) {
+ return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
+ UseMI.getIterator()),
+ [TRI](const MachineInstr &I) {
+ return I.modifiesRegister(AArch64::NZCV, TRI) ||
+ I.readsRegister(AArch64::NZCV, TRI);
+ });
+}
+
// Helper function to emit a frame offset adjustment from a given
// pointer (SrcReg), stored into DestReg. This function is explicit
// in that it requires the opcode.
@@ -3146,6 +3346,10 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
// assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
+ Register TmpReg = DestReg;
+ if (TmpReg == AArch64::XZR)
+ TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
+ &AArch64::GPR64RegClass);
do {
uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
unsigned LocalShiftSize = 0;
@@ -3155,7 +3359,11 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
}
assert((ThisVal >> ShiftSize) <= MaxEncoding &&
"Encoding cannot handle value that big");
- auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
+
+ Offset -= ThisVal << LocalShiftSize;
+ if (Offset == 0)
+ TmpReg = DestReg;
+ auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
.addReg(SrcReg)
.addImm(Sign * (int)ThisVal);
if (ShiftSize)
@@ -3176,8 +3384,8 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
.addImm(Imm)
.setMIFlag(Flag);
- assert((Offset - Imm) == 0 && "Expected remaining offset to be zero to "
- "emit a single SEH directive");
+ assert(Offset == 0 && "Expected remaining offset to be zero to "
+ "emit a single SEH directive");
} else if (DestReg == AArch64::SP) {
if (HasWinCFI)
*HasWinCFI = true;
@@ -3190,8 +3398,7 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
*HasWinCFI = true;
}
- SrcReg = DestReg;
- Offset -= ThisVal << LocalShiftSize;
+ SrcReg = TmpReg;
} while (Offset);
}
@@ -3414,18 +3621,6 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
return nullptr;
}
-static bool isSVEScaledImmInstruction(unsigned Opcode) {
- switch (Opcode) {
- case AArch64::LDR_ZXI:
- case AArch64::STR_ZXI:
- case AArch64::LDR_PXI:
- case AArch64::STR_PXI:
- return true;
- default:
- return false;
- }
-}
-
int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
StackOffset &SOffset,
bool *OutUseUnscaledOp,
@@ -3458,20 +3653,23 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
case AArch64::ST1Fourv1d:
case AArch64::IRG:
case AArch64::IRGstack:
+ case AArch64::STGloop:
+ case AArch64::STZGloop:
return AArch64FrameOffsetCannotUpdate;
}
// Get the min/max offset and the scale.
- unsigned Scale, Width;
+ TypeSize ScaleValue(0U, false);
+ unsigned Width;
int64_t MinOff, MaxOff;
- if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff,
+ if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
MaxOff))
llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
// Construct the complete offset.
- bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode());
- int64_t Offset =
- IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes());
+ bool IsMulVL = ScaleValue.isScalable();
+ unsigned Scale = ScaleValue.getKnownMinSize();
+ int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes();
const MachineOperand &ImmOpnd =
MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
@@ -3484,9 +3682,14 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
if (useUnscaledOp &&
- !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff))
+ !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
+ MaxOff))
llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
+ Scale = ScaleValue.getKnownMinSize();
+ assert(IsMulVL == ScaleValue.isScalable() &&
+ "Unscaled opcode has different value for scalable");
+
int64_t Remainder = Offset % Scale;
assert(!(Remainder && useUnscaledOp) &&
"Cannot have remainder when using unscaled op");
@@ -5791,6 +5994,35 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
});
+ // We check to see if CFI Instructions are present, and if they are
+ // we find the number of CFI Instructions in the candidates.
+ unsigned CFICount = 0;
+ MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
+ for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
+ Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
+ const std::vector<MCCFIInstruction> &CFIInstructions =
+ RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
+ if (MBBI->isCFIInstruction()) {
+ unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
+ MCCFIInstruction CFI = CFIInstructions[CFIIndex];
+ CFICount++;
+ }
+ MBBI++;
+ }
+
+ // We compare the number of found CFI Instructions to the number of CFI
+ // instructions in the parent function for each candidate. We must check this
+ // since if we outline one of the CFI instructions in a function, we have to
+ // outline them all for correctness. If we do not, the address offsets will be
+ // incorrect between the two sections of the program.
+ for (outliner::Candidate &C : RepeatedSequenceLocs) {
+ std::vector<MCCFIInstruction> CFIInstructions =
+ C.getMF()->getFrameInstructions();
+
+ if (CFICount > 0 && CFICount != CFIInstructions.size())
+ return outliner::OutlinedFunction();
+ }
+
// Returns true if an instructions is safe to fix up, false otherwise.
auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
if (MI.isCall())
@@ -5811,23 +6043,29 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
if (MI.mayLoadOrStore()) {
const MachineOperand *Base; // Filled with the base operand of MI.
int64_t Offset; // Filled with the offset of MI.
+ bool OffsetIsScalable;
// Does it allow us to offset the base operand and is the base the
// register SP?
- if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
- Base->getReg() != AArch64::SP)
+ if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
+ !Base->isReg() || Base->getReg() != AArch64::SP)
+ return false;
+
+ // Fixe-up code below assumes bytes.
+ if (OffsetIsScalable)
return false;
// Find the minimum/maximum offset for this instruction and check
// if fixing it up would be in range.
int64_t MinOffset,
MaxOffset; // Unscaled offsets for the instruction.
- unsigned Scale; // The scale to multiply the offsets by.
+ TypeSize Scale(0U, false); // The scale to multiply the offsets by.
unsigned DummyWidth;
getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
Offset += 16; // Update the offset to what it would be if we outlined.
- if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
+ if (Offset < MinOffset * (int64_t)Scale.getFixedSize() ||
+ Offset > MaxOffset * (int64_t)Scale.getFixedSize())
return false;
// It's in range, so we can outline it.
@@ -5854,7 +6092,9 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
}
else if (LastInstrOpcode == AArch64::BL ||
- (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
+ ((LastInstrOpcode == AArch64::BLR ||
+ LastInstrOpcode == AArch64::BLRNoIP) &&
+ !HasBTI)) {
// FIXME: Do we need to check if the code after this uses the value of LR?
FrameID = MachineOutlinerThunk;
NumBytesToCreateFrame = 0;
@@ -5960,6 +6200,11 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
}
}
+ // If we have CFI instructions, we can only outline if the outlined section
+ // can be a tail call
+ if (FrameID != MachineOutlinerTailCall && CFICount > 0)
+ return outliner::OutlinedFunction();
+
return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
NumBytesToCreateFrame, FrameID);
}
@@ -5986,6 +6231,10 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
if (!AFI || AFI->hasRedZone().getValueOr(true))
return false;
+ // FIXME: Teach the outliner to generate/handle Windows unwind info.
+ if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
+ return false;
+
// It's safe to outline from MF.
return true;
}
@@ -6081,6 +6330,15 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
if (FuncInfo->getLOHRelated().count(&MI))
return outliner::InstrType::Illegal;
+ // We can only outline these if we will tail call the outlined function, or
+ // fix up the CFI offsets. Currently, CFI instructions are outlined only if
+ // in a tail call.
+ //
+ // FIXME: If the proper fixups for the offset are implemented, this should be
+ // possible.
+ if (MI.isCFIInstruction())
+ return outliner::InstrType::Legal;
+
// Don't allow debug values to impact outlining type.
if (MI.isDebugInstr() || MI.isIndirectDebugValue())
return outliner::InstrType::Invisible;
@@ -6150,10 +6408,11 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
// If we don't know anything about the callee, assume it depends on the
// stack layout of the caller. In that case, it's only legal to outline
- // as a tail-call. Whitelist the call instructions we know about so we
+ // as a tail-call. Explicitly list the call instructions we know about so we
// don't get unexpected results with call pseudo-instructions.
auto UnknownCallOutlineType = outliner::InstrType::Illegal;
- if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
+ if (MI.getOpcode() == AArch64::BLR ||
+ MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
if (!Callee)
@@ -6205,26 +6464,29 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
const MachineOperand *Base;
unsigned Width;
int64_t Offset;
+ bool OffsetIsScalable;
// Is this a load or store with an immediate offset with SP as the base?
if (!MI.mayLoadOrStore() ||
- !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
+ !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
+ &RI) ||
(Base->isReg() && Base->getReg() != AArch64::SP))
continue;
// It is, so we have to fix it up.
- unsigned Scale;
+ TypeSize Scale(0U, false);
int64_t Dummy1, Dummy2;
MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
assert(Scale != 0 && "Unexpected opcode!");
+ assert(!OffsetIsScalable && "Expected offset to be a byte offset");
// We've pushed the return address to the stack, so add 16 to the offset.
// This is safe, since we already checked if it would overflow when we
// checked if this instruction was legal to outline.
- int64_t NewImm = (Offset + 16) / Scale;
+ int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize();
StackOffsetOperand.setImm(NewImm);
}
}
@@ -6285,15 +6547,21 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
void AArch64InstrInfo::buildOutlinedFrame(
MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const {
- // For thunk outlining, rewrite the last instruction from a call to a
- // tail-call.
- if (OF.FrameConstructionID == MachineOutlinerThunk) {
+
+ AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
+
+ if (OF.FrameConstructionID == MachineOutlinerTailCall)
+ FI->setOutliningStyle("Tail Call");
+ else if (OF.FrameConstructionID == MachineOutlinerThunk) {
+ // For thunk outlining, rewrite the last instruction from a call to a
+ // tail-call.
MachineInstr *Call = &*--MBB.instr_end();
unsigned TailOpcode;
if (Call->getOpcode() == AArch64::BL) {
TailOpcode = AArch64::TCRETURNdi;
} else {
- assert(Call->getOpcode() == AArch64::BLR);
+ assert(Call->getOpcode() == AArch64::BLR ||
+ Call->getOpcode() == AArch64::BLRNoIP);
TailOpcode = AArch64::TCRETURNriALL;
}
MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
@@ -6301,6 +6569,8 @@ void AArch64InstrInfo::buildOutlinedFrame(
.addImm(0);
MBB.insert(MBB.end(), TC);
Call->eraseFromParent();
+
+ FI->setOutliningStyle("Thunk");
}
bool IsLeafFunction = true;
@@ -6320,7 +6590,8 @@ void AArch64InstrInfo::buildOutlinedFrame(
IsLeafFunction = false;
// LR has to be a live in so that we can save it.
- MBB.addLiveIn(AArch64::LR);
+ if (!MBB.isLiveIn(AArch64::LR))
+ MBB.addLiveIn(AArch64::LR);
MachineBasicBlock::iterator It = MBB.begin();
MachineBasicBlock::iterator Et = MBB.end();
@@ -6343,7 +6614,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
// Add a CFI saying the stack was moved 16 B down.
int64_t StackPosEntry =
- MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
.addCFIIndex(StackPosEntry)
.setMIFlags(MachineInstr::FrameSetup);
@@ -6351,7 +6622,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
// Add a CFI saying that the LR that we want to find is now 16 B higher than
// before.
int64_t LRPosEntry =
- MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
+ MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
.addCFIIndex(LRPosEntry)
.setMIFlags(MachineInstr::FrameSetup);
@@ -6399,13 +6670,20 @@ void AArch64InstrInfo::buildOutlinedFrame(
}
// It's not a tail call, so we have to insert the return ourselves.
+
+ // LR has to be a live in so that we can return to it.
+ if (!MBB.isLiveIn(AArch64::LR))
+ MBB.addLiveIn(AArch64::LR);
+
MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
- .addReg(AArch64::LR, RegState::Undef);
+ .addReg(AArch64::LR);
MBB.insert(MBB.end(), ret);
signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
ShouldSignReturnAddrWithAKey);
+ FI->setOutliningStyle("Function");
+
// Did we have to modify the stack by saving the link register?
if (OF.FrameConstructionID != MachineOutlinerDefault)
return;
@@ -6519,7 +6797,8 @@ Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
// TODO: Handle cases where Reg is a super- or sub-register of the
// destination register.
- if (Reg != MI.getOperand(0).getReg())
+ const MachineOperand &Op0 = MI.getOperand(0);
+ if (!Op0.isReg() || Reg != Op0.getReg())
return None;
switch (MI.getOpcode()) {
@@ -6614,5 +6893,17 @@ AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
return TargetInstrInfo::describeLoadedValue(MI, Reg);
}
+uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
+ return get(Opc).TSFlags & AArch64::ElementSizeMask;
+}
+
+unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
+ if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
+ return AArch64::BLRNoIP;
+ else
+ return AArch64::BLR;
+}
+
#define GET_INSTRINFO_HELPERS
+#define GET_INSTRMAP_INFO
#include "AArch64GenInstrInfo.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 66e517e54903..298c04d81708 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/TypeSize.h"
#define GET_INSTRINFO_HEADER
#include "AArch64GenInstrInfo.inc"
@@ -51,8 +52,8 @@ public:
bool isAsCheapAsAMove(const MachineInstr &MI) const override;
- bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &DstReg, unsigned &SubIdx) const override;
+ bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg,
+ Register &DstReg, unsigned &SubIdx) const override;
bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
@@ -112,14 +113,19 @@ public:
/// Hint that pairing the given load or store is unprofitable.
static void suppressLdStPair(MachineInstr &MI);
- bool getMemOperandWithOffset(const MachineInstr &MI,
- const MachineOperand *&BaseOp,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
+ /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`.
+ /// This is true for some SVE instructions like ldr/str that have a
+ /// 'reg + imm' addressing mode where the immediate is an index to the
+ /// scalable vector located at 'reg + imm * vscale x #bytes'.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
const MachineOperand *&BaseOp,
- int64_t &Offset, unsigned &Width,
+ int64_t &Offset, bool &OffsetIsScalable,
+ unsigned &Width,
const TargetRegisterInfo *TRI) const;
/// Return the immediate offset of the base register in a load/store \p LdSt.
@@ -129,12 +135,12 @@ public:
/// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
///
/// For unscaled instructions, \p Scale is set to 1.
- static bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
+ static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, unsigned &Width,
int64_t &MinOffset, int64_t &MaxOffset);
- bool shouldClusterMemOps(const MachineOperand &BaseOp1,
- const MachineOperand &BaseOp2,
- unsigned NumLoads) const override;
+ bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ ArrayRef<const MachineOperand *> BaseOps2,
+ unsigned NumLoads, unsigned NumBytes) const override;
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
@@ -149,13 +155,13 @@ public:
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ MachineBasicBlock::iterator MBBI, Register SrcReg,
bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, unsigned DestReg,
+ MachineBasicBlock::iterator MBBI, Register DestReg,
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
@@ -191,11 +197,12 @@ public:
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
- unsigned, unsigned, int &, int &, int &) const override;
+ Register, Register, Register, int &, int &,
+ int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DstReg,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg) const override;
+ const DebugLoc &DL, Register DstReg,
+ ArrayRef<MachineOperand> Cond, Register TrueReg,
+ Register FalseReg) const override;
void getNoop(MCInst &NopInst) const override;
bool isSchedulingBoundary(const MachineInstr &MI,
@@ -205,13 +212,13 @@ public:
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
- bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
+ bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &CmpMask,
int &CmpValue) const override;
/// optimizeCompareInstr - Convert the instruction supplying the argument to
/// the comparison into one that sets the zero bit in the flags register.
- bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int CmpMask, int CmpValue,
+ bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const override;
bool optimizeCondBranch(MachineInstr &MI) const override;
@@ -264,6 +271,8 @@ public:
MachineBasicBlock::iterator &It, MachineFunction &MF,
const outliner::Candidate &C) const override;
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
+ /// Returns the vector element size (B, H, S or D) of an SVE opcode.
+ uint64_t getElementSizeForOpcode(unsigned Opc) const;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
static bool isFalkorShiftExtFast(const MachineInstr &MI);
@@ -288,6 +297,8 @@ protected:
isCopyInstrImpl(const MachineInstr &MI) const override;
private:
+ unsigned getInstBundleLength(const MachineInstr &MI) const;
+
/// Sets the offsets on outlined instructions in \p MBB which use SP
/// so that they will be valid post-outlining.
///
@@ -305,6 +316,12 @@ private:
unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
};
+/// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
+/// which either reads or clobbers NZCV.
+bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
+ const MachineInstr &UseMI,
+ const TargetRegisterInfo *TRI);
+
/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
/// plus Offset. This is intended to be used from within the prolog/epilog
/// insertion (PEI) pass, where a virtual scratch register may be allocated
@@ -369,12 +386,24 @@ static inline bool isCondBranchOpcode(int Opc) {
}
static inline bool isIndirectBranchOpcode(int Opc) {
- return Opc == AArch64::BR;
+ switch (Opc) {
+ case AArch64::BR:
+ case AArch64::BRAA:
+ case AArch64::BRAB:
+ case AArch64::BRAAZ:
+ case AArch64::BRABZ:
+ return true;
+ }
+ return false;
}
+/// Return opcode to be used for indirect calls.
+unsigned getBLRCallOpcode(const MachineFunction &MF);
+
// struct TSFlags {
#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
-#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 1-bit
+#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bit
+#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits
// }
namespace AArch64 {
@@ -389,13 +418,31 @@ enum ElementSizeType {
};
enum DestructiveInstType {
- DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
- NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
- Destructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
+ DestructiveInstTypeMask = TSFLAG_DESTRUCTIVE_INST_TYPE(0xf),
+ NotDestructive = TSFLAG_DESTRUCTIVE_INST_TYPE(0x0),
+ DestructiveOther = TSFLAG_DESTRUCTIVE_INST_TYPE(0x1),
+ DestructiveUnary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x2),
+ DestructiveBinaryImm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x3),
+ DestructiveBinaryShImmUnpred = TSFLAG_DESTRUCTIVE_INST_TYPE(0x4),
+ DestructiveBinary = TSFLAG_DESTRUCTIVE_INST_TYPE(0x5),
+ DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6),
+ DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7),
+ DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8),
+};
+
+enum FalseLaneType {
+ FalseLanesMask = TSFLAG_FALSE_LANE_TYPE(0x3),
+ FalseLanesZero = TSFLAG_FALSE_LANE_TYPE(0x1),
+ FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2),
};
#undef TSFLAG_ELEMENT_SIZE_TYPE
#undef TSFLAG_DESTRUCTIVE_INST_TYPE
+#undef TSFLAG_FALSE_LANE_TYPE
+
+int getSVEPseudoMap(uint16_t Opcode);
+int getSVERevInstr(uint16_t Opcode);
+int getSVENonRevInstr(uint16_t Opcode);
}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1e3bf299b265..f4a5f639e497 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -14,142 +14,154 @@
// ARM Instruction Predicate Definitions.
//
def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
- AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+ AssemblerPredicate<(all_of HasV8_1aOps), "armv8.1a">;
def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
- AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
+ AssemblerPredicate<(all_of HasV8_2aOps), "armv8.2a">;
def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
- AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
+ AssemblerPredicate<(all_of HasV8_3aOps), "armv8.3a">;
def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
- AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+ AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
- AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
+ AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
+def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
+ AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
def HasVH : Predicate<"Subtarget->hasVH()">,
- AssemblerPredicate<"FeatureVH", "vh">;
+ AssemblerPredicate<(all_of FeatureVH), "vh">;
def HasLOR : Predicate<"Subtarget->hasLOR()">,
- AssemblerPredicate<"FeatureLOR", "lor">;
+ AssemblerPredicate<(all_of FeatureLOR), "lor">;
def HasPA : Predicate<"Subtarget->hasPA()">,
- AssemblerPredicate<"FeaturePA", "pa">;
+ AssemblerPredicate<(all_of FeaturePA), "pa">;
def HasJS : Predicate<"Subtarget->hasJS()">,
- AssemblerPredicate<"FeatureJS", "jsconv">;
+ AssemblerPredicate<(all_of FeatureJS), "jsconv">;
def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
- AssemblerPredicate<"FeatureCCIDX", "ccidx">;
+ AssemblerPredicate<(all_of FeatureCCIDX), "ccidx">;
def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
- AssemblerPredicate<"FeatureComplxNum", "complxnum">;
+ AssemblerPredicate<(all_of FeatureComplxNum), "complxnum">;
def HasNV : Predicate<"Subtarget->hasNV()">,
- AssemblerPredicate<"FeatureNV", "nv">;
+ AssemblerPredicate<(all_of FeatureNV), "nv">;
def HasRASv8_4 : Predicate<"Subtarget->hasRASv8_4()">,
- AssemblerPredicate<"FeatureRASv8_4", "rasv8_4">;
+ AssemblerPredicate<(all_of FeatureRASv8_4), "rasv8_4">;
def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
- AssemblerPredicate<"FeatureMPAM", "mpam">;
+ AssemblerPredicate<(all_of FeatureMPAM), "mpam">;
def HasDIT : Predicate<"Subtarget->hasDIT()">,
- AssemblerPredicate<"FeatureDIT", "dit">;
+ AssemblerPredicate<(all_of FeatureDIT), "dit">;
def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
- AssemblerPredicate<"FeatureTRACEV8_4", "tracev8.4">;
+ AssemblerPredicate<(all_of FeatureTRACEV8_4), "tracev8.4">;
def HasAM : Predicate<"Subtarget->hasAM()">,
- AssemblerPredicate<"FeatureAM", "am">;
+ AssemblerPredicate<(all_of FeatureAM), "am">;
def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
- AssemblerPredicate<"FeatureSEL2", "sel2">;
+ AssemblerPredicate<(all_of FeatureSEL2), "sel2">;
def HasPMU : Predicate<"Subtarget->hasPMU()">,
- AssemblerPredicate<"FeaturePMU", "pmu">;
+ AssemblerPredicate<(all_of FeaturePMU), "pmu">;
def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
- AssemblerPredicate<"FeatureTLB_RMI", "tlb-rmi">;
+ AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">;
def HasFMI : Predicate<"Subtarget->hasFMI()">,
- AssemblerPredicate<"FeatureFMI", "fmi">;
+ AssemblerPredicate<(all_of FeatureFMI), "fmi">;
def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">,
- AssemblerPredicate<"FeatureRCPC_IMMO", "rcpc-immo">;
+ AssemblerPredicate<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
- AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
+ AssemblerPredicate<(all_of FeatureFPARMv8), "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
- AssemblerPredicate<"FeatureNEON", "neon">;
+ AssemblerPredicate<(all_of FeatureNEON), "neon">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
- AssemblerPredicate<"FeatureCrypto", "crypto">;
+ AssemblerPredicate<(all_of FeatureCrypto), "crypto">;
def HasSM4 : Predicate<"Subtarget->hasSM4()">,
- AssemblerPredicate<"FeatureSM4", "sm4">;
+ AssemblerPredicate<(all_of FeatureSM4), "sm4">;
def HasSHA3 : Predicate<"Subtarget->hasSHA3()">,
- AssemblerPredicate<"FeatureSHA3", "sha3">;
+ AssemblerPredicate<(all_of FeatureSHA3), "sha3">;
def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
- AssemblerPredicate<"FeatureSHA2", "sha2">;
+ AssemblerPredicate<(all_of FeatureSHA2), "sha2">;
def HasAES : Predicate<"Subtarget->hasAES()">,
- AssemblerPredicate<"FeatureAES", "aes">;
+ AssemblerPredicate<(all_of FeatureAES), "aes">;
def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
- AssemblerPredicate<"FeatureDotProd", "dotprod">;
+ AssemblerPredicate<(all_of FeatureDotProd), "dotprod">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
- AssemblerPredicate<"FeatureCRC", "crc">;
+ AssemblerPredicate<(all_of FeatureCRC), "crc">;
def HasLSE : Predicate<"Subtarget->hasLSE()">,
- AssemblerPredicate<"FeatureLSE", "lse">;
+ AssemblerPredicate<(all_of FeatureLSE), "lse">;
def HasRAS : Predicate<"Subtarget->hasRAS()">,
- AssemblerPredicate<"FeatureRAS", "ras">;
+ AssemblerPredicate<(all_of FeatureRAS), "ras">;
def HasRDM : Predicate<"Subtarget->hasRDM()">,
- AssemblerPredicate<"FeatureRDM", "rdm">;
+ AssemblerPredicate<(all_of FeatureRDM), "rdm">;
def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
- AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
+ AssemblerPredicate<(all_of FeatureFullFP16), "fullfp16">;
def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
- AssemblerPredicate<"FeatureFP16FML", "fp16fml">;
+ AssemblerPredicate<(all_of FeatureFP16FML), "fp16fml">;
def HasSPE : Predicate<"Subtarget->hasSPE()">,
- AssemblerPredicate<"FeatureSPE", "spe">;
+ AssemblerPredicate<(all_of FeatureSPE), "spe">;
def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
- AssemblerPredicate<"FeatureFuseAES",
+ AssemblerPredicate<(all_of FeatureFuseAES),
"fuse-aes">;
def HasSVE : Predicate<"Subtarget->hasSVE()">,
- AssemblerPredicate<"FeatureSVE", "sve">;
+ AssemblerPredicate<(all_of FeatureSVE), "sve">;
def HasSVE2 : Predicate<"Subtarget->hasSVE2()">,
- AssemblerPredicate<"FeatureSVE2", "sve2">;
+ AssemblerPredicate<(all_of FeatureSVE2), "sve2">;
def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">,
- AssemblerPredicate<"FeatureSVE2AES", "sve2-aes">;
+ AssemblerPredicate<(all_of FeatureSVE2AES), "sve2-aes">;
def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
- AssemblerPredicate<"FeatureSVE2SM4", "sve2-sm4">;
+ AssemblerPredicate<(all_of FeatureSVE2SM4), "sve2-sm4">;
def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
- AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">;
+ AssemblerPredicate<(all_of FeatureSVE2SHA3), "sve2-sha3">;
def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
- AssemblerPredicate<"FeatureSVE2BitPerm", "sve2-bitperm">;
+ AssemblerPredicate<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
- AssemblerPredicate<"FeatureRCPC", "rcpc">;
+ AssemblerPredicate<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
- AssemblerPredicate<"FeatureAltFPCmp", "altnzcv">;
+ AssemblerPredicate<(all_of FeatureAltFPCmp), "altnzcv">;
def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
- AssemblerPredicate<"FeatureFRInt3264", "frint3264">;
+ AssemblerPredicate<(all_of FeatureFRInt3264), "frint3264">;
def HasSB : Predicate<"Subtarget->hasSB()">,
- AssemblerPredicate<"FeatureSB", "sb">;
+ AssemblerPredicate<(all_of FeatureSB), "sb">;
def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
- AssemblerPredicate<"FeaturePredRes", "predres">;
+ AssemblerPredicate<(all_of FeaturePredRes), "predres">;
def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
- AssemblerPredicate<"FeatureCacheDeepPersist", "ccdp">;
+ AssemblerPredicate<(all_of FeatureCacheDeepPersist), "ccdp">;
def HasBTI : Predicate<"Subtarget->hasBTI()">,
- AssemblerPredicate<"FeatureBranchTargetId", "bti">;
+ AssemblerPredicate<(all_of FeatureBranchTargetId), "bti">;
def HasMTE : Predicate<"Subtarget->hasMTE()">,
- AssemblerPredicate<"FeatureMTE", "mte">;
+ AssemblerPredicate<(all_of FeatureMTE), "mte">;
def HasTME : Predicate<"Subtarget->hasTME()">,
- AssemblerPredicate<"FeatureTME", "tme">;
+ AssemblerPredicate<(all_of FeatureTME), "tme">;
def HasETE : Predicate<"Subtarget->hasETE()">,
- AssemblerPredicate<"FeatureETE", "ete">;
+ AssemblerPredicate<(all_of FeatureETE), "ete">;
def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
- AssemblerPredicate<"FeatureTRBE", "trbe">;
+ AssemblerPredicate<(all_of FeatureTRBE), "trbe">;
+def HasBF16 : Predicate<"Subtarget->hasBF16()">,
+ AssemblerPredicate<(all_of FeatureBF16), "bf16">;
+def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
+ AssemblerPredicate<(all_of FeatureMatMulInt8), "i8mm">;
+def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
+ AssemblerPredicate<(all_of FeatureMatMulFP32), "f32mm">;
+def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
+ AssemblerPredicate<(all_of FeatureMatMulFP64), "f64mm">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
+def UseExperimentalZeroingPseudos
+ : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
def UseAlternateSExtLoadCVTF32
: Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
def UseNegativeImmediates
- : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates",
+ : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
"NegativeImmediates">;
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
@@ -227,6 +239,10 @@ def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
+ SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>;
+
def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
@@ -245,6 +261,7 @@ def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
// Generates the general dynamic sequences, i.e.
// adrp x0, :tlsdesc:var
@@ -464,10 +481,12 @@ def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
+def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
+def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
-def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>;
+def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
@@ -535,6 +554,9 @@ def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+def AArch64srhadd : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>;
+def AArch64urhadd : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>;
+
def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -551,6 +573,7 @@ def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
@@ -571,6 +594,8 @@ let RecomputePerFunction = 1 in {
def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
+ def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
+ def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
// Toggles patterns which aren't beneficial in GlobalISel when we aren't
// optimizing. This allows us to selectively use patterns without impacting
// SelectionDAG's behaviour.
@@ -693,6 +718,14 @@ let hasSideEffects = 1, isCodeGenOnly = 1 in {
: Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
}
+// SpeculationBarrierEndBB must only be used after an unconditional control
+// flow, i.e. after a terminator for which isBarrier is True.
+let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
+ def SpeculationBarrierISBDSBEndBB
+ : Pseudo<(outs), (ins), []>, Sched<[]>;
+ def SpeculationBarrierSBEndBB
+ : Pseudo<(outs), (ins), []>, Sched<[]>;
+}
//===----------------------------------------------------------------------===//
// System instructions.
@@ -705,8 +738,15 @@ def : InstAlias<"wfe", (HINT 0b010)>;
def : InstAlias<"wfi", (HINT 0b011)>;
def : InstAlias<"sev", (HINT 0b100)>;
def : InstAlias<"sevl", (HINT 0b101)>;
+def : InstAlias<"dgh", (HINT 0b110)>;
def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>;
def : InstAlias<"csdb", (HINT 20)>;
+// In order to be able to write readable assembly, LLVM should accept assembly
+// inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
+// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
+// should not emit these mnemonics unless BTI is enabled.
+def : InstAlias<"bti", (HINT 32), 0>;
+def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>;
def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
@@ -738,10 +778,58 @@ def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> {
// ARMv8.2-A Dot Product
let Predicates = [HasDotProd] in {
-defm SDOT : SIMDThreeSameVectorDot<0, "sdot", int_aarch64_neon_sdot>;
-defm UDOT : SIMDThreeSameVectorDot<1, "udot", int_aarch64_neon_udot>;
-defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>;
-defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>;
+defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", int_aarch64_neon_sdot>;
+defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", int_aarch64_neon_udot>;
+defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", int_aarch64_neon_sdot>;
+defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", int_aarch64_neon_udot>;
+}
+
+// ARMv8.6-A BFloat
+let Predicates = [HasBF16] in {
+defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">;
+defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
+def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
+def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
+def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
+def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
+def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
+def BFCVTN : SIMD_BFCVTN;
+def BFCVTN2 : SIMD_BFCVTN2;
+def BFCVT : BF16ToSinglePrecision<"bfcvt">;
+}
+
+// ARMv8.6A AArch64 matrix multiplication
+let Predicates = [HasMatMulInt8] in {
+def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
+def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
+def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
+defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>;
+defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>;
+
+// sudot lane has a pattern where usdot is expected (there is no sudot).
+// The second operand is used in the dup operation to repeat the indexed
+// element.
+class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
+ string rhs_kind, RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType>
+ : BaseSIMDThreeSameVectorDotIndex<Q, 0, 1, 0b00, "sudot", dst_kind,
+ lhs_kind, rhs_kind, RegType, AccumType,
+ InputType, null_frag> {
+ let Pattern = [(set (AccumType RegType:$dst),
+ (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd),
+ (InputType (bitconvert (AccumType
+ (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx)))),
+ (InputType RegType:$Rn))))];
+}
+
+multiclass SIMDSUDOTIndex {
+ def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>;
+ def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
+}
+
+defm SUDOTlane : SIMDSUDOTIndex;
+
}
// ARMv8.2-A FP16 Fused Multiply-Add Long
@@ -826,38 +914,56 @@ let Predicates = [HasComplxNum, HasNEON] in {
// important for compatibility with other assemblers (e.g. GAS) when building
// software compatible with both CPUs that do or don't implement PA.
let Uses = [LR], Defs = [LR] in {
- def PACIAZ : SystemNoOperands<0b000, "hint #24">;
- def PACIBZ : SystemNoOperands<0b010, "hint #26">;
+ def PACIAZ : SystemNoOperands<0b000, "hint\t#24">;
+ def PACIBZ : SystemNoOperands<0b010, "hint\t#26">;
let isAuthenticated = 1 in {
- def AUTIAZ : SystemNoOperands<0b100, "hint #28">;
- def AUTIBZ : SystemNoOperands<0b110, "hint #30">;
+ def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">;
+ def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">;
}
}
let Uses = [LR, SP], Defs = [LR] in {
- def PACIASP : SystemNoOperands<0b001, "hint #25">;
- def PACIBSP : SystemNoOperands<0b011, "hint #27">;
+ def PACIASP : SystemNoOperands<0b001, "hint\t#25">;
+ def PACIBSP : SystemNoOperands<0b011, "hint\t#27">;
let isAuthenticated = 1 in {
- def AUTIASP : SystemNoOperands<0b101, "hint #29">;
- def AUTIBSP : SystemNoOperands<0b111, "hint #31">;
+ def AUTIASP : SystemNoOperands<0b101, "hint\t#29">;
+ def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">;
}
}
let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
- def PACIA1716 : SystemNoOperands<0b000, "hint #8">;
- def PACIB1716 : SystemNoOperands<0b010, "hint #10">;
+ def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">;
+ def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">;
let isAuthenticated = 1 in {
- def AUTIA1716 : SystemNoOperands<0b100, "hint #12">;
- def AUTIB1716 : SystemNoOperands<0b110, "hint #14">;
+ def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">;
+ def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">;
}
}
let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
- def XPACLRI : SystemNoOperands<0b111, "hint #7">;
-}
+ def XPACLRI : SystemNoOperands<0b111, "hint\t#7">;
+}
+
+// In order to be able to write readable assembly, LLVM should accept assembly
+// inputs that use pointer authentication mnemonics, even with PA disabled.
+// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
+// should not emit these mnemonics unless PA is enabled.
+def : InstAlias<"paciaz", (PACIAZ), 0>;
+def : InstAlias<"pacibz", (PACIBZ), 0>;
+def : InstAlias<"autiaz", (AUTIAZ), 0>;
+def : InstAlias<"autibz", (AUTIBZ), 0>;
+def : InstAlias<"paciasp", (PACIASP), 0>;
+def : InstAlias<"pacibsp", (PACIBSP), 0>;
+def : InstAlias<"autiasp", (AUTIASP), 0>;
+def : InstAlias<"autibsp", (AUTIBSP), 0>;
+def : InstAlias<"pacia1716", (PACIA1716), 0>;
+def : InstAlias<"pacib1716", (PACIB1716), 0>;
+def : InstAlias<"autia1716", (AUTIA1716), 0>;
+def : InstAlias<"autib1716", (AUTIB1716), 0>;
+def : InstAlias<"xpaclri", (XPACLRI), 0>;
// These pointer authentication instructions require armv8.3a
let Predicates = [HasPA] in {
- // When compiling with PA, there is a better mnemonic for these instructions.
+ // When PA is enabled, a better mnemonic should be emitted.
def : InstAlias<"paciaz", (PACIAZ), 1>;
def : InstAlias<"pacibz", (PACIBZ), 1>;
def : InstAlias<"autiaz", (AUTIAZ), 1>;
@@ -891,15 +997,23 @@ let Predicates = [HasPA] in {
def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>;
// Combined Instructions
- def BRAA : AuthBranchTwoOperands<0, 0, "braa">;
- def BRAB : AuthBranchTwoOperands<0, 1, "brab">;
- def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">;
- def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">;
+ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def BRAA : AuthBranchTwoOperands<0, 0, "braa">;
+ def BRAB : AuthBranchTwoOperands<0, 1, "brab">;
+ }
+ let isCall = 1, Defs = [LR], Uses = [SP] in {
+ def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">;
+ def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">;
+ }
- def BRAAZ : AuthOneOperand<0b000, 0, "braaz">;
- def BRABZ : AuthOneOperand<0b000, 1, "brabz">;
- def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">;
- def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">;
+ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def BRAAZ : AuthOneOperand<0b000, 0, "braaz">;
+ def BRABZ : AuthOneOperand<0b000, 1, "brabz">;
+ }
+ let isCall = 1, Defs = [LR], Uses = [SP] in {
+ def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">;
+ def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">;
+ }
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def RETAA : AuthReturn<0b010, 0, "retaa">;
@@ -1545,17 +1659,29 @@ def TAGPstack
// register / expression for the tagged base pointer of the current function.
def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
-// Large STG to be expanded into a loop. $Rm is the size, $Rn is start address.
-// $Rn_wback is one past the end of the range.
+// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
+// $Rn_wback is one past the end of the range. $Rm is the loop counter.
let isCodeGenOnly=1, mayStore=1 in {
+def STGloop_wback
+ : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
+ [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
+ Sched<[WriteAdr, WriteST]>;
+
+def STZGloop_wback
+ : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
+ [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
+ Sched<[WriteAdr, WriteST]>;
+
+// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
+// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
def STGloop
- : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
- [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
+ : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
+ [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
Sched<[WriteAdr, WriteST]>;
def STZGloop
- : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
- [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
+ : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
+ [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
Sched<[WriteAdr, WriteST]>;
}
@@ -1901,9 +2027,19 @@ def ERET : SpecialReturn<0b0100, "eret">;
def : InstAlias<"ret", (RET LR)>;
let isCall = 1, Defs = [LR], Uses = [SP] in {
-def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>;
+ def BLR : BranchReg<0b0001, "blr", []>;
+ def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
+ Sched<[WriteBrReg]>,
+ PseudoInstExpansion<(BLR GPR64:$Rn)>;
} // isCall
+def : Pat<(AArch64call GPR64:$Rn),
+ (BLR GPR64:$Rn)>,
+ Requires<[NoSLSBLRMitigation]>;
+def : Pat<(AArch64call GPR64noip:$Rn),
+ (BLRNoIP GPR64noip:$Rn)>,
+ Requires<[SLSBLRMitigation]>;
+
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
} // isBranch, isTerminator, isBarrier, isIndirectBranch
@@ -2136,6 +2272,7 @@ let Predicates = [IsLE] in {
defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
+ defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>;
}
defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>;
@@ -2150,6 +2287,7 @@ let Predicates = [IsLE] in {
defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v8bf16, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>;
}
} // AddedComplexity = 10
@@ -2232,6 +2370,10 @@ defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
[(set (f128 FPR128Op:$Rt),
(load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
+// bf16 load pattern
+def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
+
// For regular load, we do not have any alignment requirement.
// Thus, it is safe to directly map the vector loads with interesting
// addressing modes.
@@ -2281,6 +2423,8 @@ let Predicates = [IsLE] in {
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
}
def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
@@ -2304,6 +2448,8 @@ let Predicates = [IsLE] in {
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
}
def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
@@ -2388,11 +2534,11 @@ def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
const DataLayout &DL = MF->getDataLayout();
- MaybeAlign Align = G->getGlobal()->getPointerAlignment(DL);
- return Align && *Align >= 4 && G->getOffset() % 4 == 0;
+ Align Align = G->getGlobal()->getPointerAlignment(DL);
+ return Align >= 4 && G->getOffset() % 4 == 0;
}
if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
- return C->getAlignment() >= 4 && C->getOffset() % 4 == 0;
+ return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
return false;
}]>;
@@ -2432,7 +2578,7 @@ defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
[(set FPR8Op:$Rt,
(load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
- [(set FPR16Op:$Rt,
+ [(set (f16 FPR16Op:$Rt),
(load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
[(set (f32 FPR32Op:$Rt),
@@ -2729,6 +2875,10 @@ defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
(STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
+def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
+ (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>;
+
+
//---
// (Register offset)
@@ -2798,6 +2948,7 @@ let Predicates = [IsLE] in {
defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
+ defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>;
}
defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
@@ -2813,6 +2964,7 @@ let Predicates = [IsLE, UseSTRQro] in {
defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>;
}
} // AddedComplexity = 10
@@ -2873,6 +3025,11 @@ defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb",
(am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset))]>;
+// bf16 store pattern
+def : Pat<(store (bf16 FPR16Op:$Rt),
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>;
+
let AddedComplexity = 10 in {
// Match all store 64 bits width whose type is compatible with FPR64
@@ -2900,6 +3057,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v4f16 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(store (v4bf16 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
}
// Match all store 128 bits width whose type is compatible with FPR128
@@ -2930,6 +3090,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v8f16 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v8bf16 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
}
// truncstore i64
@@ -3037,6 +3200,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v4f16 FPR64:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v4bf16 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
}
// Match all store 128 bits width whose type is compatible with FPR128
@@ -3069,6 +3235,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v8f16 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v8bf16 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
}
} // AddedComplexity = 10
@@ -3610,10 +3779,6 @@ let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
Sched<[]>;
}
-let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,
- usesCustomInserter = 1 in
-def CATCHPAD : Pseudo<(outs), (ins), [(catchpad)]>, Sched<[]>;
-
//===----------------------------------------------------------------------===//
// Floating point immediate move.
//===----------------------------------------------------------------------===//
@@ -3795,12 +3960,16 @@ defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>
defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
-def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
-def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
-def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
-def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
-def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
-def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
+def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
+def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
+def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
+def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
+def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
+def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
+def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
+def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
+def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
+def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
// Patterns for vector long shift (by element width). These need to match all
// three of zext, sext and anyext so it's easier to pull the patterns out of the
@@ -3907,7 +4076,7 @@ defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrd
defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
-defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>;
+defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", AArch64srhadd>;
defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
@@ -3924,7 +4093,7 @@ defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
-defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
+defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>;
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
@@ -3941,33 +4110,36 @@ defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
-defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">;
-defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
-defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl",
- TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>;
defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
-
-def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
- (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
-def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
- (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
-def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
- (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
-def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
- (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
-
-def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
- (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
-def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
- (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
-def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
- (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
-def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
- (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+// Pseudo bitwise select pattern BSP.
+// It is expanded into BSL/BIT/BIF after register allocation.
+defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS),
+ (and (vnot node:$LHS), node:$RHS))>>;
+defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
+defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
+defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
+
+def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+
+def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
(ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
@@ -4676,6 +4848,7 @@ multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
defm : ExtPat<v8i8, v16i8, 8>;
defm : ExtPat<v4i16, v8i16, 4>;
defm : ExtPat<v4f16, v8f16, 4>;
+defm : ExtPat<v4bf16, v8bf16, 4>;
defm : ExtPat<v2i32, v4i32, 2>;
defm : ExtPat<v2f32, v4f32, 2>;
defm : ExtPat<v1i64, v2i64, 1>;
@@ -4797,16 +4970,29 @@ def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
(v4f16 (DUPv4i16lane
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
(i64 0)))>;
+def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))),
+ (v4bf16 (DUPv4i16lane
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
+ (i64 0)))>;
def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
(v8f16 (DUPv8i16lane
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
(i64 0)))>;
+def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
+ (v8bf16 (DUPv8i16lane
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
+ (i64 0)))>;
def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
(DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
(DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
+def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
+ (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
+def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
+ (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
+
def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
(DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
@@ -4922,6 +5108,11 @@ def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+ (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+ (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+
def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
(v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
(i32 FPR32:$Rn), ssub))>;
@@ -4938,6 +5129,11 @@ def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+ (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
+ (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+
def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
@@ -4963,6 +5159,23 @@ def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
(v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
(i64 0))>;
+def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn),
+ (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
+ (EXTRACT_SUBREG
+ (INSvi16lane
+ (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
+ VectorIndexS:$imm,
+ (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
+ (i64 0)),
+ dsub)>;
+
+def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn),
+ (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
+ (INSvi16lane
+ V128:$Rn, VectorIndexH:$imm,
+ (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
+ (i64 0))>;
+
def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
(f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
(EXTRACT_SUBREG
@@ -5044,6 +5257,7 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
}
defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
+defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
@@ -5057,6 +5271,9 @@ def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
(f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
(f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
+def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
+ (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
+
def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
(f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
@@ -5064,6 +5281,8 @@ def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
(f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
(f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
+def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
+ (bf16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
@@ -5079,6 +5298,7 @@ def : ConcatPat<v4i32, v2i32>;
def : ConcatPat<v4f32, v2f32>;
def : ConcatPat<v8i16, v4i16>;
def : ConcatPat<v8f16, v4f16>;
+def : ConcatPat<v8bf16, v4bf16>;
def : ConcatPat<v16i8, v8i8>;
// If the high lanes are undef, though, we can just ignore them:
@@ -5620,6 +5840,11 @@ def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
+defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
+ int_aarch64_neon_sqdmulh_laneq>;
+defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
+ int_aarch64_neon_sqrdmulh_laneq>;
+
// Generated by MachineCombine
defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
@@ -5787,8 +6012,8 @@ defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
-defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>;
-def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
+def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
(i32 vecshiftL64:$imm))),
(SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
@@ -5801,8 +6026,8 @@ defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
int_aarch64_neon_sqshrn>;
defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
int_aarch64_neon_sqshrun>;
-defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>;
-def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
+def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
(i32 vecshiftR64:$imm))),
(SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
@@ -6154,6 +6379,10 @@ def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
(LD1Rv4h GPR64sp:$Rn)>;
def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
(LD1Rv8h GPR64sp:$Rn)>;
+def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
+ (LD1Rv4h GPR64sp:$Rn)>;
+def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
+ (LD1Rv8h GPR64sp:$Rn)>;
class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
@@ -6168,6 +6397,7 @@ def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>;
+def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>;
class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
@@ -6183,6 +6413,7 @@ def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>;
+def : Ld1Lane64Pat<load, VectorIndexH, v4bf16, bf16, LD1i16>;
defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
@@ -6211,6 +6442,7 @@ def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>;
+def : St1Lane128Pat<store, VectorIndexH, v8bf16, bf16, ST1i16>;
let AddedComplexity = 19 in
class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
@@ -6226,6 +6458,7 @@ def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>;
+def : St1Lane64Pat<store, VectorIndexH, v4bf16, bf16, ST1i16>;
multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1,
@@ -6251,6 +6484,7 @@ defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
+defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>;
multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1,
@@ -6275,6 +6509,7 @@ defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
+defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>;
let mayStore = 1, hasSideEffects = 0 in {
defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
@@ -6515,6 +6750,7 @@ def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4bf16 (AArch64NvCast (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
@@ -6522,12 +6758,14 @@ def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4bf16 (AArch64NvCast (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4bf16 (AArch64NvCast (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
@@ -6535,6 +6773,7 @@ def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4bf16 (AArch64NvCast (f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
@@ -6551,6 +6790,7 @@ def : Pat<(v1f64 (AArch64NvCast (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8bf16 (AArch64NvCast (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
@@ -6559,6 +6799,7 @@ def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8bf16 (AArch64NvCast (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
@@ -6567,6 +6808,7 @@ def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8bf16 (AArch64NvCast (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
@@ -6575,6 +6817,7 @@ def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8bf16 (AArch64NvCast (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
@@ -6586,6 +6829,7 @@ def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8bf16 (AArch64NvCast (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
@@ -6594,6 +6838,7 @@ def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8bf16 (AArch64NvCast (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
let Predicates = [IsLE] in {
@@ -6601,6 +6846,7 @@ def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
@@ -6611,6 +6857,8 @@ def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
@@ -6625,6 +6873,8 @@ def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
(REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
(REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+def : Pat<(v4bf16 (bitconvert GPR64:$Xn)),
+ (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
(REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
@@ -6636,6 +6886,8 @@ def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
(REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
(REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
+ (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
(REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
}
@@ -6665,6 +6917,7 @@ def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
@@ -6676,6 +6929,8 @@ def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))),
(v1i64 (REV64v8i8 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
(v1i64 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))),
+ (v1i64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
(v1i64 (REV64v2i32 FPR64:$src))>;
}
@@ -6689,6 +6944,7 @@ def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
@@ -6703,6 +6959,8 @@ def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
(v2i32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
(v2i32 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))),
+ (v2i32 (REV32v4i16 FPR64:$src))>;
}
def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
@@ -6729,6 +6987,7 @@ def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
(v4i16 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
@@ -6737,6 +6996,13 @@ def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+
+def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>;
+def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
+def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>;
+def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
+def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>;
+def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
@@ -6751,8 +7017,22 @@ def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
(v4f16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
(v4f16 (REV64v4i16 FPR64:$src))>;
+
+def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))),
+ (v4bf16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))),
+ (v4bf16 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))),
+ (v4bf16 (REV16v8i8 FPR64:$src))>;
+def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))),
+ (v4bf16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))),
+ (v4bf16 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))),
+ (v4bf16 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>;
@@ -6762,6 +7042,7 @@ def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))),
@@ -6778,6 +7059,8 @@ def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))),
(v8i8 (REV64v8i8 FPR64:$src))>;
def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))),
(v8i8 (REV16v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))),
+ (v8i8 (REV16v8i8 FPR64:$src))>;
}
let Predicates = [IsLE] in {
@@ -6786,6 +7069,7 @@ def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))),
@@ -6798,6 +7082,8 @@ def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))),
(f64 (REV64v8i8 FPR64:$src))>;
def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))),
(f64 (REV64v4i16 FPR64:$src))>;
+def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))),
+ (f64 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
@@ -6808,6 +7094,7 @@ def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
@@ -6820,6 +7107,8 @@ def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
(v1f64 (REV64v2i32 FPR64:$src))>;
def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
(v1f64 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))),
+ (v1f64 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
@@ -6831,6 +7120,7 @@ def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
@@ -6845,6 +7135,8 @@ def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
(v2f32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
(v2f32 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))),
+ (v2f32 (REV32v4i16 FPR64:$src))>;
}
def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
@@ -6855,6 +7147,7 @@ def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
}
let Predicates = [IsBE] in {
@@ -6869,6 +7162,9 @@ def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
(f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
(REV64v8i16 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src), (i32 8)))>;
def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
(f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
@@ -6884,6 +7180,7 @@ def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
}
@@ -6897,6 +7194,8 @@ def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
(v2f64 (REV64v8i16 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
(v2f64 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))),
+ (v2f64 (REV64v8i16 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
(v2f64 (REV64v16i8 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
@@ -6908,6 +7207,7 @@ let Predicates = [IsLE] in {
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
@@ -6920,6 +7220,8 @@ def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
(v4f32 (REV32v8i16 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
(v4f32 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))),
+ (v4f32 (REV32v8i16 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
(v4f32 (REV32v16i8 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
@@ -6936,6 +7238,7 @@ def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
@@ -6951,6 +7254,8 @@ def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
(v2i64 (REV64v4i32 FPR128:$src))>;
def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
(v2i64 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))),
+ (v2i64 (REV64v8i16 FPR128:$src))>;
}
def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
@@ -6961,6 +7266,7 @@ def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
@@ -6977,6 +7283,8 @@ def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
(v4i32 (REV64v4i32 FPR128:$src))>;
def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
(v4i32 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))),
+ (v4i32 (REV32v8i16 FPR128:$src))>;
}
def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
@@ -7005,6 +7313,7 @@ def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
(v8i16 (REV32v8i16 FPR128:$src))>;
}
def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>;
@@ -7013,6 +7322,13 @@ def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
+
+def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>;
+def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
+def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
+def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
+def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
+def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))),
@@ -7029,8 +7345,24 @@ def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
(v8f16 (REV64v8i16 FPR128:$src))>;
def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
(v8f16 (REV32v8i16 FPR128:$src))>;
+
+def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))),
+ (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src),
+ (i32 8)))>;
+def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))),
+ (v8bf16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))),
+ (v8bf16 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))),
+ (v8bf16 (REV16v16i8 FPR128:$src))>;
+def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))),
+ (v8bf16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))),
+ (v8bf16 (REV32v8i16 FPR128:$src))>;
}
def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
@@ -7040,6 +7372,7 @@ def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
@@ -7058,6 +7391,8 @@ def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
(v16i8 (REV32v16i8 FPR128:$src))>;
def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
(v16i8 (REV16v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))),
+ (v16i8 (REV16v16i8 FPR128:$src))>;
}
def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
@@ -7068,6 +7403,8 @@ def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
(EXTRACT_SUBREG V128:$Rn, dsub)>;
def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
(EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
(EXTRACT_SUBREG V128:$Rn, dsub)>;
def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
@@ -7099,6 +7436,8 @@ multiclass InsertSubvectorUndef<ValueType Ty> {
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+ def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)),
+ (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
}
@@ -7324,3 +7663,5 @@ let AddedComplexity = 10 in {
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
+
+include "AArch64InstrGISel.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index cbca29b63b70..d975b8bd04fe 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -68,7 +69,7 @@ static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
// Enable register renaming to find additional store pairing opportunities.
static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
@@ -677,14 +678,14 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
"Expected promotable zero stores.");
- MachineBasicBlock::iterator NextI = I;
- ++NextI;
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// If NextI is the second of the two instructions to be merged, we need
// to skip one further. Either way we merge will invalidate the iterator,
// and we don't need to scan the new instruction, as it's a pairwise
// instruction, which we're not considering for further action anyway.
if (NextI == MergeMI)
- ++NextI;
+ NextI = next_nodbg(NextI, E);
unsigned Opc = I->getOpcode();
bool IsScaled = !TII->isUnscaledLdSt(Opc);
@@ -747,18 +748,17 @@ static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
const TargetRegisterInfo *TRI, unsigned Limit,
std::function<bool(MachineInstr &, bool)> &Fn) {
auto MBB = MI.getParent();
- for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(),
- E = MBB->rend();
- I != E; I++) {
+ for (MachineInstr &I :
+ instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
if (!Limit)
return false;
--Limit;
- bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) {
+ bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
TRI->regsOverlap(MOP.getReg(), DefReg);
});
- if (!Fn(*I, isDef))
+ if (!Fn(I, isDef))
return false;
if (isDef)
break;
@@ -782,14 +782,14 @@ MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
const LdStPairFlags &Flags) {
- MachineBasicBlock::iterator NextI = I;
- ++NextI;
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// If NextI is the second of the two instructions to be merged, we need
// to skip one further. Either way we merge will invalidate the iterator,
// and we don't need to scan the new instruction, as it's a pairwise
// instruction, which we're not considering for further action anyway.
if (NextI == Paired)
- ++NextI;
+ NextI = next_nodbg(NextI, E);
int SExtIdx = Flags.getSExtIdx();
unsigned Opc =
@@ -1008,8 +1008,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator
AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
MachineBasicBlock::iterator StoreI) {
- MachineBasicBlock::iterator NextI = LoadI;
- ++NextI;
+ MachineBasicBlock::iterator NextI =
+ next_nodbg(LoadI, LoadI->getParent()->end());
int LoadSize = TII->getMemScale(*LoadI);
int StoreSize = TII->getMemScale(*StoreI);
@@ -1144,24 +1144,11 @@ static int alignTo(int Num, int PowOf2) {
return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
}
-static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb,
- AliasAnalysis *AA) {
- // One of the instructions must modify memory.
- if (!MIa.mayStore() && !MIb.mayStore())
- return false;
-
- // Both instructions must be memory operations.
- if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore())
- return false;
-
- return MIa.mayAlias(AA, MIb, /*UseTBAA*/false);
-}
-
static bool mayAlias(MachineInstr &MIa,
SmallVectorImpl<MachineInstr *> &MemInsns,
AliasAnalysis *AA) {
for (MachineInstr *MIb : MemInsns)
- if (mayAlias(MIa, *MIb, AA))
+ if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
return true;
return false;
@@ -1187,7 +1174,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
unsigned Count = 0;
do {
- --MBBI;
+ MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there
@@ -1219,7 +1206,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
return false;
// If we encounter a store aliased with the load, return early.
- if (MI.mayStore() && mayAlias(LoadMI, MI, AA))
+ if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
return false;
} while (MBBI != B && Count < Limit);
return false;
@@ -1300,7 +1287,23 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
return false;
}
- auto canRenameMOP = [](const MachineOperand &MOP) {
+ auto canRenameMOP = [TRI](const MachineOperand &MOP) {
+ if (MOP.isReg()) {
+ auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
+ // Renaming registers with multiple disjunct sub-registers (e.g. the
+ // result of a LD3) means that all sub-registers are renamed, potentially
+ // impacting other instructions we did not check. Bail out.
+ // Note that this relies on the structure of the AArch64 register file. In
+ // particular, a subregister cannot be written without overwriting the
+ // whole register.
+ if (RegClass->HasDisjunctSubRegs) {
+ LLVM_DEBUG(
+ dbgs()
+ << " Cannot rename operands with multiple disjunct subregisters ("
+ << MOP << ")\n");
+ return false;
+ }
+ }
return MOP.isImplicit() ||
(MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
};
@@ -1439,7 +1442,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator MBBI = I;
MachineBasicBlock::iterator MBBIWithRenameReg;
MachineInstr &FirstMI = *I;
- ++MBBI;
+ MBBI = next_nodbg(MBBI, E);
bool MayLoad = FirstMI.mayLoad();
bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
@@ -1467,7 +1470,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Remember any instructions that read/write memory between FirstMI and MI.
SmallVector<MachineInstr *, 4> MemInsns;
- for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+ for (unsigned Count = 0; MBBI != E && Count < Limit;
+ MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
UsedInBetween.accumulate(MI);
@@ -1636,12 +1640,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
assert((Update->getOpcode() == AArch64::ADDXri ||
Update->getOpcode() == AArch64::SUBXri) &&
"Unexpected base register update instruction to merge!");
- MachineBasicBlock::iterator NextI = I;
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
// Return the instruction following the merged instruction, which is
// the instruction following our unmerged load. Unless that's the add/sub
// instruction we're merging, in which case it's the one after that.
- if (++NextI == Update)
- ++NextI;
+ if (NextI == Update)
+ NextI = next_nodbg(NextI, E);
int Value = Update->getOperand(2).getImm();
assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
@@ -1779,8 +1784,24 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
UsedRegUnits.clear();
- ++MBBI;
- for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+ MBBI = next_nodbg(MBBI, E);
+
+ // We can't post-increment the stack pointer if any instruction between
+ // the memory access (I) and the increment (MBBI) can access the memory
+ // region defined by [SP, MBBI].
+ const bool BaseRegSP = BaseReg == AArch64::SP;
+ if (BaseRegSP) {
+ // FIXME: For now, we always block the optimization over SP in windows
+ // targets as it requires to adjust the unwind/debug info, messing up
+ // the unwind info can actually cause a miscompile.
+ const MCAsmInfo *MAI = I->getMF()->getTarget().getMCAsmInfo();
+ if (MAI->usesWindowsCFI() &&
+ I->getMF()->getFunction().needsUnwindTableEntry())
+ return E;
+ }
+
+ for (unsigned Count = 0; MBBI != E && Count < Limit;
+ MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there
@@ -1797,8 +1818,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
// Otherwise, if the base register is used or modified, we have no match, so
// return early.
+ // If we are optimizing SP, do not allow instructions that may load or store
+ // in between the load and the optimized value update.
if (!ModifiedRegUnits.available(BaseReg) ||
- !UsedRegUnits.available(BaseReg))
+ !UsedRegUnits.available(BaseReg) ||
+ (BaseRegSP && MBBI->mayLoadOrStore()))
return E;
}
return E;
@@ -1835,7 +1859,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
UsedRegUnits.clear();
unsigned Count = 0;
do {
- --MBBI;
+ MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
// Don't count transient instructions towards the search limit since there
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..a37e38072554
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -0,0 +1,32 @@
+//=- AArch64MachineFunctionInfo.cpp - AArch64 Machine Function Info ---------=//
+
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements AArch64-specific per-machine-function
+/// information.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MachineFunctionInfo.h"
+
+using namespace llvm;
+
+yaml::AArch64FunctionInfo::AArch64FunctionInfo(
+ const llvm::AArch64FunctionInfo &MFI)
+ : HasRedZone(MFI.hasRedZone()) {}
+
+void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) {
+ MappingTraits<AArch64FunctionInfo>::mapping(YamlIO, *this);
+}
+
+void AArch64FunctionInfo::initializeBaseYamlFields(
+ const yaml::AArch64FunctionInfo &YamlMFI) {
+ if (YamlMFI.HasRedZone.hasValue())
+ HasRedZone = YamlMFI.HasRedZone;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 6ddb3fdb0046..84aa53f2bece 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/Function.h"
@@ -26,6 +27,10 @@
namespace llvm {
+namespace yaml {
+struct AArch64FunctionInfo;
+} // end namespace yaml
+
class MachineInstr;
/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
@@ -126,6 +131,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
// stack slot.
unsigned TaggedBasePointerOffset = 0;
+ /// OutliningStyle denotes, if a function was outined, how it was outlined,
+ /// e.g. Tail Call, Thunk, or Function if none apply.
+ Optional<std::string> OutliningStyle;
+
public:
AArch64FunctionInfo() = default;
@@ -137,6 +146,7 @@ public:
if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
HasRedZone = false;
}
+ void initializeBaseYamlFields(const yaml::AArch64FunctionInfo &YamlMFI);
unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; }
void setBytesInStackArgArea(unsigned bytes) { BytesInStackArgArea = bytes; }
@@ -173,6 +183,9 @@ public:
void setLocalStackSize(uint64_t Size) { LocalStackSize = Size; }
uint64_t getLocalStackSize() const { return LocalStackSize; }
+ void setOutliningStyle(std::string Style) { OutliningStyle = Style; }
+ Optional<std::string> getOutliningStyle() const { return OutliningStyle; }
+
void setCalleeSavedStackSize(unsigned Size) {
CalleeSavedStackSize = Size;
HasCalleeSavedStackSize = true;
@@ -333,6 +346,25 @@ private:
DenseMap<int, std::pair<unsigned, MCSymbol *>> JumpTableEntryInfo;
};
+namespace yaml {
+struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
+ Optional<bool> HasRedZone;
+
+ AArch64FunctionInfo() = default;
+ AArch64FunctionInfo(const llvm::AArch64FunctionInfo &MFI);
+
+ void mappingImpl(yaml::IO &YamlIO) override;
+ ~AArch64FunctionInfo() = default;
+};
+
+template <> struct MappingTraits<AArch64FunctionInfo> {
+ static void mapping(IO &YamlIO, AArch64FunctionInfo &MFI) {
+ YamlIO.mapOptional("hasRedZone", MFI.HasRedZone);
+ }
+};
+
+} // end namespace yaml
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
index 9135f1b40122..9044c94bc4fe 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -250,6 +250,20 @@ static bool isConstantUsingVectorTy(const Type *CstTy) {
return false;
}
+// Returns true if \p C contains only ConstantData leafs and no global values,
+// block addresses or constant expressions. Traverses ConstantAggregates.
+static bool containsOnlyConstantData(const Constant *C) {
+ if (isa<ConstantData>(C))
+ return true;
+
+ if (isa<GlobalValue>(C) || isa<BlockAddress>(C) || isa<ConstantExpr>(C))
+ return false;
+
+ return all_of(C->operands(), [](const Use &U) {
+ return containsOnlyConstantData(cast<Constant>(&U));
+ });
+}
+
/// Check if the given use (Instruction + OpIdx) of Cst should be converted into
/// a load of a global variable initialized with Cst.
/// A use should be converted if it is legal to do so.
@@ -304,7 +318,7 @@ static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr,
// Do not mess with inline asm.
const CallInst *CI = dyn_cast<const CallInst>(Instr);
- return !(CI && isa<const InlineAsm>(CI->getCalledValue()));
+ return !(CI && CI->isInlineAsm());
}
/// Check if the given Cst should be converted into
@@ -550,9 +564,10 @@ bool AArch64PromoteConstant::runOnFunction(Function &F,
for (Use &U : I.operands()) {
Constant *Cst = dyn_cast<Constant>(U);
// There is no point in promoting global values as they are already
- // global. Do not promote constant expressions either, as they may
- // require some code expansion.
- if (!Cst || isa<GlobalValue>(Cst) || isa<ConstantExpr>(Cst))
+ // global. Do not promote constants containing constant expression, global
+ // values or blockaddresses either, as they may require some code
+ // expansion.
+ if (!Cst || isa<GlobalValue>(Cst) || !containsOnlyConstantData(Cst))
continue;
// Check if this constant is worth promoting.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 14f839cd4f81..886158ca4490 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -43,24 +43,27 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
- if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
- return CSR_Win_AArch64_CFGuard_Check_SaveList;
- if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows())
- return CSR_Win_AArch64_AAPCS_SaveList;
+
if (MF->getFunction().getCallingConv() == CallingConv::GHC)
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_AArch64_NoRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_SaveList;
+
+ // Darwin has its own CSR_AArch64_AAPCS_SaveList, which means most CSR save
+ // lists depending on that will need to have their Darwin variant as well.
+ if (MF->getSubtarget<AArch64Subtarget>().isTargetDarwin())
+ return getDarwinCalleeSavedRegs(MF);
+
+ if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
+ return CSR_Win_AArch64_CFGuard_Check_SaveList;
+ if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows())
+ return CSR_Win_AArch64_AAPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
return CSR_AArch64_AAVPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall)
return CSR_AArch64_SVE_AAPCS_SaveList;
- if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
- return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
- CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
- CSR_AArch64_CXX_TLS_Darwin_SaveList;
if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering()
->supportSwiftError() &&
MF->getFunction().getAttributes().hasAttrSomewhere(
@@ -68,17 +71,47 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_AArch64_AAPCS_SwiftError_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
return CSR_AArch64_RT_MostRegs_SaveList;
- if (MF->getSubtarget<AArch64Subtarget>().isTargetDarwin())
- return CSR_Darwin_AArch64_AAPCS_SaveList;
+ if (MF->getFunction().getCallingConv() == CallingConv::Win64)
+ // This is for OSes other than Windows; Windows is a separate case further
+ // above.
+ return CSR_AArch64_AAPCS_X18_SaveList;
return CSR_AArch64_AAPCS_SaveList;
}
+const MCPhysReg *
+AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
+ assert(MF && "Invalid MachineFunction pointer.");
+ assert(MF->getSubtarget<AArch64Subtarget>().isTargetDarwin() &&
+ "Invalid subtarget for getDarwinCalleeSavedRegs");
+
+ if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
+ report_fatal_error(
+ "Calling convention CFGuard_Check is unsupported on Darwin.");
+ if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
+ return CSR_Darwin_AArch64_AAVPCS_SaveList;
+ if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall)
+ report_fatal_error(
+ "Calling convention SVE_VectorCall is unsupported on Darwin.");
+ if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
+ return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR()
+ ? CSR_Darwin_AArch64_CXX_TLS_PE_SaveList
+ : CSR_Darwin_AArch64_CXX_TLS_SaveList;
+ if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering()
+ ->supportSwiftError() &&
+ MF->getFunction().getAttributes().hasAttrSomewhere(
+ Attribute::SwiftError))
+ return CSR_Darwin_AArch64_AAPCS_SwiftError_SaveList;
+ if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
+ return CSR_Darwin_AArch64_RT_MostRegs_SaveList;
+ return CSR_Darwin_AArch64_AAPCS_SaveList;
+}
+
const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
MF->getInfo<AArch64FunctionInfo>()->isSplitCSR())
- return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList;
+ return CSR_Darwin_AArch64_CXX_TLS_ViaCopy_SaveList;
return nullptr;
}
@@ -113,6 +146,32 @@ AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
}
const uint32_t *
+AArch64RegisterInfo::getDarwinCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ assert(MF.getSubtarget<AArch64Subtarget>().isTargetDarwin() &&
+ "Invalid subtarget for getDarwinCallPreservedMask");
+
+ if (CC == CallingConv::CXX_FAST_TLS)
+ return CSR_Darwin_AArch64_CXX_TLS_RegMask;
+ if (CC == CallingConv::AArch64_VectorCall)
+ return CSR_Darwin_AArch64_AAVPCS_RegMask;
+ if (CC == CallingConv::AArch64_SVE_VectorCall)
+ report_fatal_error(
+ "Calling convention SVE_VectorCall is unsupported on Darwin.");
+ if (CC == CallingConv::CFGuard_Check)
+ report_fatal_error(
+ "Calling convention CFGuard_Check is unsupported on Darwin.");
+ if (MF.getSubtarget<AArch64Subtarget>()
+ .getTargetLowering()
+ ->supportSwiftError() &&
+ MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ return CSR_Darwin_AArch64_AAPCS_SwiftError_RegMask;
+ if (CC == CallingConv::PreserveMost)
+ return CSR_Darwin_AArch64_RT_MostRegs_RegMask;
+ return CSR_Darwin_AArch64_AAPCS_RegMask;
+}
+
+const uint32_t *
AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
bool SCS = MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack);
@@ -121,9 +180,14 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return SCS ? CSR_AArch64_NoRegs_SCS_RegMask : CSR_AArch64_NoRegs_RegMask;
if (CC == CallingConv::AnyReg)
return SCS ? CSR_AArch64_AllRegs_SCS_RegMask : CSR_AArch64_AllRegs_RegMask;
- if (CC == CallingConv::CXX_FAST_TLS)
- return SCS ? CSR_AArch64_CXX_TLS_Darwin_SCS_RegMask
- : CSR_AArch64_CXX_TLS_Darwin_RegMask;
+
+ // All the following calling conventions are handled differently on Darwin.
+ if (MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
+ if (SCS)
+ report_fatal_error("ShadowCallStack attribute not supported on Darwin.");
+ return getDarwinCallPreservedMask(MF, CC);
+ }
+
if (CC == CallingConv::AArch64_VectorCall)
return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
if (CC == CallingConv::AArch64_SVE_VectorCall)
@@ -145,7 +209,7 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
if (TT.isOSDarwin())
- return CSR_AArch64_TLS_Darwin_RegMask;
+ return CSR_Darwin_AArch64_TLS_RegMask;
assert(TT.isOSBinFormatELF() && "Invalid target");
return CSR_AArch64_TLS_ELF_RegMask;
@@ -186,6 +250,8 @@ AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
// In case that the calling convention does not use the same register for
// both, the function should return NULL (does not currently apply)
assert(CC != CallingConv::GHC && "should not be GHC calling convention.");
+ if (MF.getSubtarget<AArch64Subtarget>().isTargetDarwin())
+ return CSR_Darwin_AArch64_AAPCS_ThisReturn_RegMask;
return CSR_AArch64_AAPCS_ThisReturn_RegMask;
}
@@ -222,7 +288,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
- unsigned Reg) const {
+ MCRegister Reg) const {
return getReservedRegs(MF)[Reg];
}
@@ -240,11 +306,11 @@ void AArch64RegisterInfo::emitReservedArgRegCallError(
}
bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
- unsigned PhysReg) const {
+ MCRegister PhysReg) const {
return !isReservedReg(MF, PhysReg);
}
-bool AArch64RegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
+bool AArch64RegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
return PhysReg == AArch64::WZR || PhysReg == AArch64::XZR;
}
@@ -390,12 +456,16 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
if (isFrameOffsetLegal(MI, AArch64::SP, Offset))
return false;
+ // If even offset 0 is illegal, we don't want a virtual base register.
+ if (!isFrameOffsetLegal(MI, AArch64::SP, 0))
+ return false;
+
// The offset likely isn't legal; we want to allocate a virtual base register.
return true;
}
bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
- unsigned BaseReg,
+ Register BaseReg,
int64_t Offset) const {
assert(MI && "Unable to get the legal offset for nil instruction.");
StackOffset SaveOffset(Offset, MVT::i8);
@@ -405,7 +475,7 @@ bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
/// at the beginning of the basic block.
void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
- unsigned BaseReg,
+ Register BaseReg,
int FrameIdx,
int64_t Offset) const {
MachineBasicBlock::iterator Ins = MBB->begin();
@@ -426,7 +496,7 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addImm(Shifter);
}
-void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const {
// ARM doesn't need the general 64-bit offsets
StackOffset Off(Offset, MVT::i8);
@@ -445,6 +515,27 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
(void)Done;
}
+// Create a scratch register for the frame index elimination in an instruction.
+// This function has special handling of stack tagging loop pseudos, in which
+// case it can also change the instruction opcode (but not the operands).
+static Register
+createScratchRegisterForInstruction(MachineInstr &MI,
+ const AArch64InstrInfo *TII) {
+ // ST*Gloop have a reserved scratch register in operand 1. Use it, and also
+ // replace the instruction with the writeback variant because it will now
+ // satisfy the operand constraints for it.
+ if (MI.getOpcode() == AArch64::STGloop) {
+ MI.setDesc(TII->get(AArch64::STGloop_wback));
+ return MI.getOperand(1).getReg();
+ } else if (MI.getOpcode() == AArch64::STZGloop) {
+ MI.setDesc(TII->get(AArch64::STZGloop_wback));
+ return MI.getOperand(1).getReg();
+ } else {
+ return MI.getMF()->getRegInfo().createVirtualRegister(
+ &AArch64::GPR64RegClass);
+ }
+}
+
void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
@@ -461,7 +552,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
bool Tagged =
MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED;
- unsigned FrameReg;
+ Register FrameReg;
// Special handling of dbg_value, stackmap and patchpoint instructions.
if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP ||
@@ -531,8 +622,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above. Handle the rest, providing a register that is
// SP+LargeImm.
- Register ScratchReg =
- MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ Register ScratchReg = createScratchRegisterForInstruction(MI, TII);
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
}
@@ -572,6 +662,8 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
return 32;
case AArch64::FPR128_loRegClassID:
+ case AArch64::FPR64_loRegClassID:
+ case AArch64::FPR16_loRegClassID:
return 16;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 2c3f82c530d8..22a8ba76c611 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -34,7 +34,7 @@ public:
return getEncodingValue(i);
}
- bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+ bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const;
bool isAnyArgRegReserved(const MachineFunction &MF) const;
void emitReservedArgRegCallError(const MachineFunction &MF) const;
@@ -44,10 +44,13 @@ public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const MCPhysReg *getDarwinCalleeSavedRegs(const MachineFunction *MF) const;
const MCPhysReg *
getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
+ const uint32_t *getDarwinCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const;
unsigned getCSRFirstUseCost() const override {
// The cost will be compared against BlockFrequency where entry has the
@@ -83,8 +86,8 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
bool isAsmClobberable(const MachineFunction &MF,
- unsigned PhysReg) const override;
- bool isConstantPhysReg(unsigned PhysReg) const override;
+ MCRegister PhysReg) const override;
+ bool isConstantPhysReg(MCRegister PhysReg) const override;
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
unsigned Kind = 0) const override;
@@ -96,12 +99,12 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
+ bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
int64_t Offset) const override;
- void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB, Register BaseReg,
int FrameIdx,
int64_t Offset) const override;
- void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
@@ -118,10 +121,6 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- bool trackLivenessAfterRegAlloc(const MachineFunction&) const override {
- return true;
- }
-
unsigned getLocalAddressRegister(const MachineFunction &MF) const;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index f52feab03953..bd05c56009a1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -422,25 +422,35 @@ def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> {
let Size = 8;
}
-def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> {
+def FPR16 : RegisterClass<"AArch64", [f16, bf16], 16, (sequence "H%u", 0, 31)> {
+ let Size = 16;
+}
+
+def FPR16_lo : RegisterClass<"AArch64", [f16], 16, (trunc FPR16, 16)> {
let Size = 16;
}
def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
- v1i64, v4f16],
- 64, (sequence "D%u", 0, 31)>;
+ v1i64, v4f16, v4bf16],
+ 64, (sequence "D%u", 0, 31)>;
+def FPR64_lo : RegisterClass<"AArch64",
+ [v8i8, v4i16, v2i32, v1i64, v4f16, v4bf16, v2f32,
+ v1f64],
+ 64, (trunc FPR64, 16)>;
+
// We don't (yet) have an f128 legal type, so don't use that here. We
// normalize 128-bit vectors to v2f64 for arg passing and such, so use
// that here.
def FPR128 : RegisterClass<"AArch64",
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128,
- v8f16],
+ v8f16, v8bf16],
128, (sequence "Q%u", 0, 31)>;
// The lower 16 vector registers. Some instructions can only take registers
// in this range.
def FPR128_lo : RegisterClass<"AArch64",
- [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16,
+ v8bf16],
128, (trunc FPR128, 16)>;
// Pairs, triples, and quads of 64-bit vector registers.
@@ -503,6 +513,9 @@ def VectorRegLoAsmOperand : AsmOperandClass {
let Name = "VectorRegLo";
let PredicateMethod = "isNeonVectorRegLo";
}
+def V64_lo : RegisterOperand<FPR64_lo, "printVRegOperand"> {
+ let ParserMatchClass = VectorRegLoAsmOperand;
+}
def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
let ParserMatchClass = VectorRegLoAsmOperand;
}
@@ -641,6 +654,10 @@ def FPR16Op : RegisterOperand<FPR16, "printOperand"> {
let ParserMatchClass = FPRAsmOperand<"FPR16">;
}
+def FPR16Op_lo : RegisterOperand<FPR16_lo, "printOperand"> {
+ let ParserMatchClass = FPRAsmOperand<"FPR16_lo">;
+}
+
def FPR32Op : RegisterOperand<FPR32, "printOperand"> {
let ParserMatchClass = FPRAsmOperand<"FPR32">;
}
@@ -664,11 +681,11 @@ def XSeqPairs : RegisterTuples<[sube64, subo64],
[(decimate (rotl GPR64, 0), 2),
(decimate (rotl GPR64, 1), 2)]>;
-def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32,
+def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32,
(add WSeqPairs)>{
let Size = 64;
}
-def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64,
+def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64,
(add XSeqPairs)>{
let Size = 128;
}
@@ -780,7 +797,7 @@ def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
}
-// Enum descibing the element size for destructive
+// Enum describing the element size for destructive
// operations.
class ElementSizeEnum<bits<3> val> {
bits<3> Value = val;
@@ -862,6 +879,7 @@ def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>;
class ZPRClass<int lastreg> : RegisterClass<"AArch64",
[nxv16i8, nxv8i16, nxv4i32, nxv2i64,
nxv2f16, nxv4f16, nxv8f16,
+ nxv2bf16, nxv4bf16, nxv8bf16,
nxv2f32, nxv4f32,
nxv2f64],
128, (sequence "Z%u", 0, lastreg)> {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index 28a7e680849b..fc31e701d3af 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -219,7 +219,7 @@ shouldReplaceInst(MachineFunction *MF, const MCInstrDesc *InstDesc,
SmallVectorImpl<const MCInstrDesc*> &InstDescRepl) {
// Check if replacement decision is already available in the cached table.
// if so, return it.
- std::string Subtarget = SchedModel.getSubtargetInfo()->getCPU();
+ std::string Subtarget = std::string(SchedModel.getSubtargetInfo()->getCPU());
auto InstID = std::make_pair(InstDesc->getOpcode(), Subtarget);
if (SIMDInstrTable.find(InstID) != SIMDInstrTable.end())
return SIMDInstrTable[InstID];
@@ -288,7 +288,8 @@ bool AArch64SIMDInstrOpt::shouldExitEarly(MachineFunction *MF, Subpass SP) {
// For this optimization, check for all concerned instructions.
case Interleave:
- std::string Subtarget = SchedModel.getSubtargetInfo()->getCPU();
+ std::string Subtarget =
+ std::string(SchedModel.getSubtargetInfo()->getCPU());
if (InterlEarlyExit.find(Subtarget) != InterlEarlyExit.end())
return InterlEarlyExit[Subtarget];
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
new file mode 100644
index 000000000000..cb4dc8462f68
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
@@ -0,0 +1,443 @@
+//===- AArch64SLSHardening.cpp - Harden Straight Line Missspeculation -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass to insert code to mitigate against side channel
+// vulnerabilities that may happen under straight line miss-speculation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/IndirectThunks.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-sls-hardening"
+
+#define AARCH64_SLS_HARDENING_NAME "AArch64 sls hardening pass"
+
+namespace {
+
+class AArch64SLSHardening : public MachineFunctionPass {
+public:
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const AArch64Subtarget *ST;
+
+ static char ID;
+
+ AArch64SLSHardening() : MachineFunctionPass(ID) {
+ initializeAArch64SLSHardeningPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override { return AARCH64_SLS_HARDENING_NAME; }
+
+private:
+ bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const;
+ bool hardenBLRs(MachineBasicBlock &MBB) const;
+ MachineBasicBlock &ConvertBLRToBL(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator) const;
+};
+
+} // end anonymous namespace
+
+char AArch64SLSHardening::ID = 0;
+
+INITIALIZE_PASS(AArch64SLSHardening, "aarch64-sls-hardening",
+ AARCH64_SLS_HARDENING_NAME, false, false)
+
+static void insertSpeculationBarrier(const AArch64Subtarget *ST,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL,
+ bool AlwaysUseISBDSB = false) {
+ assert(MBBI != MBB.begin() &&
+ "Must not insert SpeculationBarrierEndBB as only instruction in MBB.");
+ assert(std::prev(MBBI)->isBarrier() &&
+ "SpeculationBarrierEndBB must only follow unconditional control flow "
+ "instructions.");
+ assert(std::prev(MBBI)->isTerminator() &&
+ "SpeculationBarrierEndBB must only follow terminators.");
+ const TargetInstrInfo *TII = ST->getInstrInfo();
+ unsigned BarrierOpc = ST->hasSB() && !AlwaysUseISBDSB
+ ? AArch64::SpeculationBarrierSBEndBB
+ : AArch64::SpeculationBarrierISBDSBEndBB;
+ if (MBBI == MBB.end() ||
+ (MBBI->getOpcode() != AArch64::SpeculationBarrierSBEndBB &&
+ MBBI->getOpcode() != AArch64::SpeculationBarrierISBDSBEndBB))
+ BuildMI(MBB, MBBI, DL, TII->get(BarrierOpc));
+}
+
+bool AArch64SLSHardening::runOnMachineFunction(MachineFunction &MF) {
+ ST = &MF.getSubtarget<AArch64Subtarget>();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+
+ bool Modified = false;
+ for (auto &MBB : MF) {
+ Modified |= hardenReturnsAndBRs(MBB);
+ Modified |= hardenBLRs(MBB);
+ }
+
+ return Modified;
+}
+
+static bool isBLR(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AArch64::BLR:
+ case AArch64::BLRNoIP:
+ return true;
+ case AArch64::BLRAA:
+ case AArch64::BLRAB:
+ case AArch64::BLRAAZ:
+ case AArch64::BLRABZ:
+ llvm_unreachable("Currently, LLVM's code generator does not support "
+ "producing BLRA* instructions. Therefore, there's no "
+ "support in this pass for those instructions.");
+ }
+ return false;
+}
+
+bool AArch64SLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const {
+ if (!ST->hardenSlsRetBr())
+ return false;
+ bool Modified = false;
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(), E = MBB.end();
+ MachineBasicBlock::iterator NextMBBI;
+ for (; MBBI != E; MBBI = NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ NextMBBI = std::next(MBBI);
+ if (MI.isReturn() || isIndirectBranchOpcode(MI.getOpcode())) {
+ assert(MI.isTerminator());
+ insertSpeculationBarrier(ST, MBB, std::next(MBBI), MI.getDebugLoc());
+ Modified = true;
+ }
+ }
+ return Modified;
+}
+
+static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_";
+
+static const struct ThunkNameAndReg {
+ const char* Name;
+ Register Reg;
+} SLSBLRThunks[] = {
+ { "__llvm_slsblr_thunk_x0", AArch64::X0},
+ { "__llvm_slsblr_thunk_x1", AArch64::X1},
+ { "__llvm_slsblr_thunk_x2", AArch64::X2},
+ { "__llvm_slsblr_thunk_x3", AArch64::X3},
+ { "__llvm_slsblr_thunk_x4", AArch64::X4},
+ { "__llvm_slsblr_thunk_x5", AArch64::X5},
+ { "__llvm_slsblr_thunk_x6", AArch64::X6},
+ { "__llvm_slsblr_thunk_x7", AArch64::X7},
+ { "__llvm_slsblr_thunk_x8", AArch64::X8},
+ { "__llvm_slsblr_thunk_x9", AArch64::X9},
+ { "__llvm_slsblr_thunk_x10", AArch64::X10},
+ { "__llvm_slsblr_thunk_x11", AArch64::X11},
+ { "__llvm_slsblr_thunk_x12", AArch64::X12},
+ { "__llvm_slsblr_thunk_x13", AArch64::X13},
+ { "__llvm_slsblr_thunk_x14", AArch64::X14},
+ { "__llvm_slsblr_thunk_x15", AArch64::X15},
+ // X16 and X17 are deliberately missing, as the mitigation requires those
+ // register to not be used in BLR. See comment in ConvertBLRToBL for more
+ // details.
+ { "__llvm_slsblr_thunk_x18", AArch64::X18},
+ { "__llvm_slsblr_thunk_x19", AArch64::X19},
+ { "__llvm_slsblr_thunk_x20", AArch64::X20},
+ { "__llvm_slsblr_thunk_x21", AArch64::X21},
+ { "__llvm_slsblr_thunk_x22", AArch64::X22},
+ { "__llvm_slsblr_thunk_x23", AArch64::X23},
+ { "__llvm_slsblr_thunk_x24", AArch64::X24},
+ { "__llvm_slsblr_thunk_x25", AArch64::X25},
+ { "__llvm_slsblr_thunk_x26", AArch64::X26},
+ { "__llvm_slsblr_thunk_x27", AArch64::X27},
+ { "__llvm_slsblr_thunk_x28", AArch64::X28},
+ { "__llvm_slsblr_thunk_x29", AArch64::FP},
+ // X30 is deliberately missing, for similar reasons as X16 and X17 are
+ // missing.
+ { "__llvm_slsblr_thunk_x31", AArch64::XZR},
+};
+
+namespace {
+struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
+ const char *getThunkPrefix() { return SLSBLRNamePrefix; }
+ bool mayUseThunk(const MachineFunction &MF) {
+ // FIXME: This could also check if there are any BLRs in the function
+ // to more accurately reflect if a thunk will be needed.
+ return MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr();
+ }
+ void insertThunks(MachineModuleInfo &MMI);
+ void populateThunk(MachineFunction &MF);
+};
+} // namespace
+
+void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+ // FIXME: It probably would be possible to filter which thunks to produce
+ // based on which registers are actually used in BLR instructions in this
+ // function. But would that be a worthwhile optimization?
+ for (auto T : SLSBLRThunks)
+ createThunkFunction(MMI, T.Name);
+}
+
+void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
+ // FIXME: How to better communicate Register number, rather than through
+ // name and lookup table?
+ assert(MF.getName().startswith(getThunkPrefix()));
+ auto ThunkIt = llvm::find_if(
+ SLSBLRThunks, [&MF](auto T) { return T.Name == MF.getName(); });
+ assert(ThunkIt != std::end(SLSBLRThunks));
+ Register ThunkReg = ThunkIt->Reg;
+
+ const TargetInstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ assert (MF.size() == 1);
+ MachineBasicBlock *Entry = &MF.front();
+ Entry->clear();
+
+ // These thunks need to consist of the following instructions:
+ // __llvm_slsblr_thunk_xN:
+ // BR xN
+ // barrierInsts
+ Entry->addLiveIn(ThunkReg);
+ // MOV X16, ThunkReg == ORR X16, XZR, ThunkReg, LSL #0
+ BuildMI(Entry, DebugLoc(), TII->get(AArch64::ORRXrs), AArch64::X16)
+ .addReg(AArch64::XZR)
+ .addReg(ThunkReg)
+ .addImm(0);
+ BuildMI(Entry, DebugLoc(), TII->get(AArch64::BR)).addReg(AArch64::X16);
+ // Make sure the thunks do not make use of the SB extension in case there is
+ // a function somewhere that will call to it that for some reason disabled
+ // the SB extension locally on that function, even though it's enabled for
+ // the module otherwise. Therefore set AlwaysUseISBSDB to true.
+ insertSpeculationBarrier(&MF.getSubtarget<AArch64Subtarget>(), *Entry,
+ Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/);
+}
+
+MachineBasicBlock &
+AArch64SLSHardening::ConvertBLRToBL(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ // Transform a BLR to a BL as follows:
+ // Before:
+ // |-----------------------------|
+ // | ... |
+ // | instI |
+ // | BLR xN |
+ // | instJ |
+ // | ... |
+ // |-----------------------------|
+ //
+ // After:
+ // |-----------------------------|
+ // | ... |
+ // | instI |
+ // | BL __llvm_slsblr_thunk_xN |
+ // | instJ |
+ // | ... |
+ // |-----------------------------|
+ //
+ // __llvm_slsblr_thunk_xN:
+ // |-----------------------------|
+ // | BR xN |
+ // | barrierInsts |
+ // |-----------------------------|
+ //
+ // The __llvm_slsblr_thunk_xN thunks are created by the SLSBLRThunkInserter.
+ // This function merely needs to transform BLR xN into BL
+ // __llvm_slsblr_thunk_xN.
+ //
+ // Since linkers are allowed to clobber X16 and X17 on function calls, the
+ // above mitigation only works if the original BLR instruction was not
+ // BLR X16 nor BLR X17. Code generation before must make sure that no BLR
+ // X16|X17 was produced if the mitigation is enabled.
+
+ MachineInstr &BLR = *MBBI;
+ assert(isBLR(BLR));
+ unsigned BLOpcode;
+ Register Reg;
+ bool RegIsKilled;
+ switch (BLR.getOpcode()) {
+ case AArch64::BLR:
+ case AArch64::BLRNoIP:
+ BLOpcode = AArch64::BL;
+ Reg = BLR.getOperand(0).getReg();
+ assert(Reg != AArch64::X16 && Reg != AArch64::X17 && Reg != AArch64::LR);
+ RegIsKilled = BLR.getOperand(0).isKill();
+ break;
+ case AArch64::BLRAA:
+ case AArch64::BLRAB:
+ case AArch64::BLRAAZ:
+ case AArch64::BLRABZ:
+ llvm_unreachable("BLRA instructions cannot yet be produced by LLVM, "
+ "therefore there is no need to support them for now.");
+ default:
+ llvm_unreachable("unhandled BLR");
+ }
+ DebugLoc DL = BLR.getDebugLoc();
+
+ // If we'd like to support also BLRAA and BLRAB instructions, we'd need
+ // a lot more different kind of thunks.
+ // For example, a
+ //
+ // BLRAA xN, xM
+ //
+ // instruction probably would need to be transformed to something like:
+ //
+ // BL __llvm_slsblraa_thunk_x<N>_x<M>
+ //
+ // __llvm_slsblraa_thunk_x<N>_x<M>:
+ // BRAA x<N>, x<M>
+ // barrierInsts
+ //
+ // Given that about 30 different values of N are possible and about 30
+ // different values of M are possible in the above, with the current way
+ // of producing indirect thunks, we'd be producing about 30 times 30, i.e.
+ // about 900 thunks (where most might not be actually called). This would
+ // multiply further by two to support both BLRAA and BLRAB variants of those
+ // instructions.
+ // If we'd want to support this, we'd probably need to look into a different
+ // way to produce thunk functions, based on which variants are actually
+ // needed, rather than producing all possible variants.
+ // So far, LLVM does never produce BLRA* instructions, so let's leave this
+ // for the future when LLVM can start producing BLRA* instructions.
+ MachineFunction &MF = *MBBI->getMF();
+ MCContext &Context = MBB.getParent()->getContext();
+ auto ThunkIt =
+ llvm::find_if(SLSBLRThunks, [Reg](auto T) { return T.Reg == Reg; });
+ assert (ThunkIt != std::end(SLSBLRThunks));
+ MCSymbol *Sym = Context.getOrCreateSymbol(ThunkIt->Name);
+
+ MachineInstr *BL = BuildMI(MBB, MBBI, DL, TII->get(BLOpcode)).addSym(Sym);
+
+ // Now copy the implicit operands from BLR to BL and copy other necessary
+ // info.
+ // However, both BLR and BL instructions implictly use SP and implicitly
+ // define LR. Blindly copying implicit operands would result in SP and LR
+ // operands to be present multiple times. While this may not be too much of
+ // an issue, let's avoid that for cleanliness, by removing those implicit
+ // operands from the BL created above before we copy over all implicit
+ // operands from the BLR.
+ int ImpLROpIdx = -1;
+ int ImpSPOpIdx = -1;
+ for (unsigned OpIdx = BL->getNumExplicitOperands();
+ OpIdx < BL->getNumOperands(); OpIdx++) {
+ MachineOperand Op = BL->getOperand(OpIdx);
+ if (!Op.isReg())
+ continue;
+ if (Op.getReg() == AArch64::LR && Op.isDef())
+ ImpLROpIdx = OpIdx;
+ if (Op.getReg() == AArch64::SP && !Op.isDef())
+ ImpSPOpIdx = OpIdx;
+ }
+ assert(ImpLROpIdx != -1);
+ assert(ImpSPOpIdx != -1);
+ int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx);
+ int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx);
+ BL->RemoveOperand(FirstOpIdxToRemove);
+ BL->RemoveOperand(SecondOpIdxToRemove);
+ // Now copy over the implicit operands from the original BLR
+ BL->copyImplicitOps(MF, BLR);
+ MF.moveCallSiteInfo(&BLR, BL);
+ // Also add the register called in the BLR as being used in the called thunk.
+ BL->addOperand(MachineOperand::CreateReg(Reg, false /*isDef*/, true /*isImp*/,
+ RegIsKilled /*isKill*/));
+ // Remove BLR instruction
+ MBB.erase(MBBI);
+
+ return MBB;
+}
+
+bool AArch64SLSHardening::hardenBLRs(MachineBasicBlock &MBB) const {
+ if (!ST->hardenSlsBlr())
+ return false;
+ bool Modified = false;
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator NextMBBI;
+ for (; MBBI != E; MBBI = NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ NextMBBI = std::next(MBBI);
+ if (isBLR(MI)) {
+ ConvertBLRToBL(MBB, MBBI);
+ Modified = true;
+ }
+ }
+ return Modified;
+}
+
+FunctionPass *llvm::createAArch64SLSHardeningPass() {
+ return new AArch64SLSHardening();
+}
+
+namespace {
+class AArch64IndirectThunks : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AArch64IndirectThunks() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "AArch64 Indirect Thunks"; }
+
+ bool doInitialization(Module &M) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ std::tuple<SLSBLRThunkInserter> TIs;
+
+ // FIXME: When LLVM moves to C++17, these can become folds
+ template <typename... ThunkInserterT>
+ static void initTIs(Module &M,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ (void)std::initializer_list<int>{
+ (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
+ }
+ template <typename... ThunkInserterT>
+ static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ bool Modified = false;
+ (void)std::initializer_list<int>{
+ Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
+ return Modified;
+ }
+};
+
+} // end anonymous namespace
+
+char AArch64IndirectThunks::ID = 0;
+
+FunctionPass *llvm::createAArch64IndirectThunks() {
+ return new AArch64IndirectThunks();
+}
+
+bool AArch64IndirectThunks::doInitialization(Module &M) {
+ initTIs(M, TIs);
+ return false;
+}
+
+bool AArch64IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << getPassName() << '\n');
+ auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ return runTIs(MMI, MF, TIs);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c849d7af9a40..28a54e6f7d79 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -10,65 +10,188 @@
//
//===----------------------------------------------------------------------===//
-def SDT_AArch64_GLD1 : SDTypeProfile<1, 4, [
+// For predicated nodes where the entire operation is controlled by a governing
+// predicate, please stick to a similar naming convention as used for the
+// ISD nodes:
+//
+// SDNode <=> AArch64ISD
+// -------------------------------
+// _m<n> <=> _MERGE_OP<n>
+// _mt <=> _MERGE_PASSTHRU
+// _z <=> _MERGE_ZERO
+// _p <=> _PRED
+//
+// Given the context of this file, it is not strictly necessary to use _p to
+// distinguish predicated from unpredicated nodes given that most SVE
+// instructions are predicated.
+
+// Contiguous loads - node definitions
+//
+def SDT_AArch64_LD1 : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
+]>;
+
+def AArch64ld1_z : SDNode<"AArch64ISD::LD1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64ld1s_z : SDNode<"AArch64ISD::LD1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+
+// Non-faulting & first-faulting loads - node definitions
+//
+def AArch64ldnf1_z : SDNode<"AArch64ISD::LDNF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_z : SDNode<"AArch64ISD::LDFF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+
+def AArch64ldnf1s_z : SDNode<"AArch64ISD::LDNF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_z : SDNode<"AArch64ISD::LDFF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+
+// Contiguous load and replicate - node definitions
+//
+
+def SDT_AArch64_LD1Replicate : SDTypeProfile<1, 2, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
+]>;
+
+def AArch64ld1rq_z : SDNode<"AArch64ISD::LD1RQ_MERGE_ZERO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1ro_z : SDNode<"AArch64ISD::LD1RO_MERGE_ZERO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>;
+
+// Gather loads - node definitions
+//
+def SDT_AArch64_GATHER_SV : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
-def SDT_AArch64_GLD1_IMM : SDTypeProfile<1, 4, [
+def SDT_AArch64_GATHER_VS : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
-def SDT_AArch64_SST1 : SDTypeProfile<0, 5, [
+def AArch64ld1_gather_z : SDNode<"AArch64ISD::GLD1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_scaled_z : SDNode<"AArch64ISD::GLD1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_uxtw_z : SDNode<"AArch64ISD::GLD1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_sxtw_z : SDNode<"AArch64ISD::GLD1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1_gather_imm_z : SDNode<"AArch64ISD::GLD1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+
+def AArch64ld1s_gather_z : SDNode<"AArch64ISD::GLD1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_scaled_z : SDNode<"AArch64ISD::GLD1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_uxtw_z : SDNode<"AArch64ISD::GLD1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_sxtw_z : SDNode<"AArch64ISD::GLD1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ld1s_gather_imm_z : SDNode<"AArch64ISD::GLD1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+
+def AArch64ldff1_gather_z : SDNode<"AArch64ISD::GLDFF1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1_gather_imm_z : SDNode<"AArch64ISD::GLDFF1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+
+def AArch64ldff1s_gather_z : SDNode<"AArch64ISD::GLDFF1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64ldff1s_gather_imm_z : SDNode<"AArch64ISD::GLDFF1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>;
+
+def AArch64ldnt1_gather_z : SDNode<"AArch64ISD::GLDNT1_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+def AArch64ldnt1s_gather_z : SDNode<"AArch64ISD::GLDNT1S_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
+
+// Contiguous stores - node definitions
+//
+def SDT_AArch64_ST1 : SDTypeProfile<0, 4, [
+ SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>,
+ SDTCVecEltisVT<2,i1>, SDTCisSameNumEltsAs<0,2>
+]>;
+
+def AArch64st1 : SDNode<"AArch64ISD::ST1_PRED", SDT_AArch64_ST1, [SDNPHasChain, SDNPMayStore]>;
+
+// Scatter stores - node definitions
+//
+def SDT_AArch64_SCATTER_SV : SDTypeProfile<0, 5, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
-def SDT_AArch64_SST1_IMM : SDTypeProfile<0, 5, [
+def SDT_AArch64_SCATTER_VS : SDTypeProfile<0, 5, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
-def AArch64st1_scatter : SDNode<"AArch64ISD::SST1", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
-def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
-def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
-def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
-def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
-def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
-def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM", SDT_AArch64_SST1_IMM, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
-
-def AArch64ld1_gather : SDNode<"AArch64ISD::GLD1", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1_gather_scaled : SDNode<"AArch64ISD::GLD1_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1_gather_uxtw : SDNode<"AArch64ISD::GLD1_UXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1_gather_sxtw : SDNode<"AArch64ISD::GLD1_SXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1_UXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1_SXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1_gather_imm : SDNode<"AArch64ISD::GLD1_IMM", SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-
-def AArch64ld1s_gather : SDNode<"AArch64ISD::GLD1S", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1s_gather_scaled : SDNode<"AArch64ISD::GLD1S_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1s_gather_uxtw : SDNode<"AArch64ISD::GLD1S_UXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1s_gather_sxtw : SDNode<"AArch64ISD::GLD1S_SXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
-def AArch64ld1s_gather_imm : SDNode<"AArch64ISD::GLD1S_IMM", SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
+def AArch64st1_scatter : SDNode<"AArch64ISD::SST1_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>;
+def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;
+
+def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;
+
+// AArch64 SVE/SVE2 - the remaining node definitions
+//
+
+// SVE CNT/INC/RDVL
+def sve_rdvl_imm : ComplexPattern<i32, 1, "SelectRDVLImm<-32, 31, 16>">;
+def sve_cnth_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 8>">;
+def sve_cntw_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 4>">;
+def sve_cntd_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 2>">;
+
+// SVE DEC
+def sve_cnth_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -8>">;
+def sve_cntw_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -4>">;
+def sve_cntd_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -2>">;
def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>;
+def AArch64faddv_p : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>;
+def AArch64fmaxv_p : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>;
+def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>;
+def AArch64fminv_p : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>;
+def AArch64fminnmv_p : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>;
+def AArch64smaxv_p : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;
+def AArch64umaxv_p : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;
+def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
+def AArch64uminv_p : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;
+def AArch64orv_p : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;
+def AArch64eorv_p : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;
+def AArch64andv_p : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;
+def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
+def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;
+
+def SDT_AArch64Arith : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>
+]>;
+
+def SDT_AArch64FMA : SDTypeProfile<1, 4, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>
+]>;
-def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;
-def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;
-def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
-def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;
-def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;
-def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;
-def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;
-def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
-def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;
+// Predicated operations with the result of inactive lanes being unspecified.
+def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
+def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
+def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;
+def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
+def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
+
+// Merging op1 into the inactive lanes.
+def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64umax_m1 : SDNode<"AArch64ISD::UMAX_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64lsl_m1 : SDNode<"AArch64ISD::SHL_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64lsr_m1 : SDNode<"AArch64ISD::SRL_MERGE_OP1", SDT_AArch64Arith>;
+def AArch64asr_m1 : SDNode<"AArch64ISD::SRA_MERGE_OP1", SDT_AArch64Arith>;
def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
+def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;
def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;
@@ -76,42 +199,57 @@ def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
-let Predicates = [HasSVE] in {
+def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>]>;
+def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED>;
+
+def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>;
+def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
- def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">;
- def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
- def RDFFR_P : sve_int_rdffr_unpred<"rdffr">;
- def SETFFR : sve_int_setffr<"setffr">;
- def WRFFR : sve_int_wrffr<"wrffr">;
+def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
- defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>;
- defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>;
- defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>;
- defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd", uaddsat>;
- defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub", ssubsat>;
- defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub", usubsat>;
+let Predicates = [HasSVE] in {
+ defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
+ def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
+ defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
+ def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
+ def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
+
+ defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add, null_frag>;
+ defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub, null_frag>;
+ defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat, int_aarch64_sve_sqadd_x>;
+ defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd", uaddsat, int_aarch64_sve_uqadd_x>;
+ defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub", ssubsat, int_aarch64_sve_sqsub_x>;
+ defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub", usubsat, int_aarch64_sve_uqsub_x>;
defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and", and>;
defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr", or>;
defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor", xor>;
defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", null_frag>;
- defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", int_aarch64_sve_add>;
- defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", int_aarch64_sve_sub>;
- defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", int_aarch64_sve_subr>;
+ defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", int_aarch64_sve_add, DestructiveBinaryComm>;
+ defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">;
+ defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
+
+ defm ADD_ZPZZ : sve_int_bin_pred_bhsd<AArch64add_p>;
+
+ let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
+ defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>;
+ defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_sub>;
+ defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_subr>;
+ }
defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_orr>;
defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_eor>;
defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>;
defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", int_aarch64_sve_bic>;
- defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>;
- defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>;
+ defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add, null_frag>;
+ defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub, null_frag>;
defm SUBR_ZI : sve_int_arith_imm0_subr<0b011, "subr", sub>;
- defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat>;
- defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>;
- defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>;
- defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
+ defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat, int_aarch64_sve_sqadd_x>;
+ defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat, int_aarch64_sve_uqadd_x>;
+ defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat, int_aarch64_sve_sqsub_x>;
+ defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat, int_aarch64_sve_uqsub_x>;
defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad>;
defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb>;
@@ -121,32 +259,45 @@ let Predicates = [HasSVE] in {
// SVE predicated integer reductions.
defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", int_aarch64_sve_saddv>;
defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", int_aarch64_sve_uaddv, int_aarch64_sve_saddv>;
- defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_pred>;
- defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_pred>;
- defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_pred>;
- defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", AArch64uminv_pred>;
- defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", AArch64orv_pred>;
- defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_pred>;
- defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_pred>;
+ defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_p>;
+ defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_p>;
+ defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_p>;
+ defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", AArch64uminv_p>;
+ defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", AArch64orv_p>;
+ defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_p>;
+ defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_p>;
defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn", or>;
defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>;
defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>;
- defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", smax>;
- defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", smin>;
- defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", umax>;
- defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", umin>;
+ defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_m1>;
+ defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_m1>;
+ defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_m1>;
+ defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_m1>;
- defm MUL_ZI : sve_int_arith_imm2<"mul", mul>;
- defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>;
+ defm MUL_ZI : sve_int_arith_imm2<"mul", mul>;
+ defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>;
defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", int_aarch64_sve_smulh>;
defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", int_aarch64_sve_umulh>;
- defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", int_aarch64_sve_sdiv>;
- defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", int_aarch64_sve_udiv>;
- defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>;
- defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", int_aarch64_sve_udivr>;
+ // Add unpredicated alternative for the mul instruction.
+ def : Pat<(mul nxv16i8:$Op1, nxv16i8:$Op2),
+ (MUL_ZPmZ_B (PTRUE_B 31), $Op1, $Op2)>;
+ def : Pat<(mul nxv8i16:$Op1, nxv8i16:$Op2),
+ (MUL_ZPmZ_H (PTRUE_H 31), $Op1, $Op2)>;
+ def : Pat<(mul nxv4i32:$Op1, nxv4i32:$Op2),
+ (MUL_ZPmZ_S (PTRUE_S 31), $Op1, $Op2)>;
+ def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2),
+ (MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>;
+
+ defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">;
+ defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">;
+ defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", "SDIVR_ZPZZ", int_aarch64_sve_sdivr, DestructiveBinaryCommWithRev, "SDIV_ZPmZ", /*isReverseInstr*/ 1>;
+ defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", "UDIVR_ZPZZ", int_aarch64_sve_udivr, DestructiveBinaryCommWithRev, "UDIV_ZPmZ", /*isReverseInstr*/ 1>;
+
+ defm SDIV_ZPZZ : sve_int_bin_pred_sd<AArch64sdiv_p>;
+ defm UDIV_ZPZZ : sve_int_bin_pred_sd<AArch64udiv_p>;
defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>;
defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>;
@@ -166,15 +317,20 @@ let Predicates = [HasSVE] in {
defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", int_aarch64_sve_cls>;
defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", int_aarch64_sve_clz>;
defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", int_aarch64_sve_cnt>;
+
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_3_Op_Pat<nxv8i16, int_aarch64_sve_cnt, nxv8i16, nxv8i1, nxv8bf16, !cast<Instruction>(CNT_ZPmZ_H)>;
+ }
+
defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", int_aarch64_sve_cnot>;
defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", int_aarch64_sve_not>;
defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>;
defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>;
- defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", int_aarch64_sve_smax>;
- defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", int_aarch64_sve_umax>;
- defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", int_aarch64_sve_smin>;
- defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", int_aarch64_sve_umin>;
+ defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", AArch64smax_m1>;
+ defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", AArch64umax_m1>;
+ defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", AArch64smin_m1>;
+ defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", AArch64umin_m1>;
defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>;
defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>;
@@ -190,19 +346,36 @@ let Predicates = [HasSVE] in {
defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>;
defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>;
- defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", int_aarch64_sve_fadd>;
- defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", int_aarch64_sve_fsub>;
- defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", int_aarch64_sve_fmul>;
- defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", int_aarch64_sve_fsubr>;
- defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", int_aarch64_sve_fmaxnm>;
- defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", int_aarch64_sve_fminnm>;
- defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", int_aarch64_sve_fmax>;
- defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", int_aarch64_sve_fmin>;
- defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", int_aarch64_sve_fabd>;
+ defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>;
+ defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">;
+ defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", int_aarch64_sve_fmul, DestructiveBinaryComm>;
+ defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>;
+ defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>;
+ defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>;
+ defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", int_aarch64_sve_fmax, DestructiveBinaryComm>;
+ defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", int_aarch64_sve_fmin, DestructiveBinaryComm>;
+ defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", "FABD_ZPZZ", int_aarch64_sve_fabd, DestructiveBinaryComm>;
defm FSCALE_ZPmZ : sve_fp_2op_p_zds_fscale<0b1001, "fscale", int_aarch64_sve_fscale>;
- defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", int_aarch64_sve_fmulx>;
- defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", int_aarch64_sve_fdivr>;
- defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", int_aarch64_sve_fdiv>;
+ defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", "FMULX_ZPZZ", int_aarch64_sve_fmulx, DestructiveBinaryComm>;
+ defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", int_aarch64_sve_fdivr, DestructiveBinaryCommWithRev, "FDIV_ZPmZ", /*isReverseInstr*/ 1>;
+ defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">;
+
+ defm FADD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fadd_p>;
+
+ let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
+ defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
+ defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
+ defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmul>;
+ defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsubr>;
+ defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmaxnm>;
+ defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fminnm>;
+ defm FMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmax>;
+ defm FMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmin>;
+ defm FABD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fabd>;
+ defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmulx>;
+ defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdivr>;
+ defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
+ }
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
@@ -226,6 +399,16 @@ let Predicates = [HasSVE] in {
defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad>;
defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb>;
+ // Add patterns for FMA where disabled lanes are undef.
+ // FIXME: Implement a pseudo so we can choose a better instruction after
+ // regalloc.
+ def : Pat<(nxv8f16 (AArch64fma_p nxv8i1:$P, nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3)),
+ (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
+ def : Pat<(nxv4f32 (AArch64fma_p nxv4i1:$P, nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3)),
+ (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
+ def : Pat<(nxv2f64 (AArch64fma_p nxv2i1:$P, nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3)),
+ (FMLA_ZPmZZ_D $P, $Op3, $Op1, $Op2)>;
+
defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>;
defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>;
@@ -235,12 +418,21 @@ let Predicates = [HasSVE] in {
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;
// SVE floating point reductions.
- defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", int_aarch64_sve_fadda>;
- defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", int_aarch64_sve_faddv>;
- defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", int_aarch64_sve_fmaxnmv>;
- defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", int_aarch64_sve_fminnmv>;
- defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", int_aarch64_sve_fmaxv>;
- defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", int_aarch64_sve_fminv>;
+ defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_p>;
+ defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_p>;
+ defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_p>;
+ defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_p>;
+ defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_p>;
+ defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_p>;
+
+ // Use more efficient NEON instructions to extract elements within the NEON
+ // part (first 128bits) of an SVE register.
+ def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
+ (f16 (EXTRACT_SUBREG (v8f16 (EXTRACT_SUBREG ZPR:$Zs, zsub)), hsub))>;
+ def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
+ (f32 (EXTRACT_SUBREG (v4f32 (EXTRACT_SUBREG ZPR:$Zs, zsub)), ssub))>;
+ def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
+ (f64 (EXTRACT_SUBREG (v2f64 (EXTRACT_SUBREG ZPR:$Zs, zsub)), dsub))>;
// Splat immediate (unpredicated)
defm DUP_ZI : sve_int_dup_imm<"dup">;
@@ -257,18 +449,88 @@ let Predicates = [HasSVE] in {
defm DUP_ZZI : sve_int_perm_dup_i<"dup">;
// Splat scalar register (predicated)
- defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy">;
- defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy">;
+ defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_mt>;
+ defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>;
+
+ let Predicates = [HasSVE, HasBF16] in {
+ def : Pat<(nxv8bf16 (AArch64dup_mt nxv8i1:$pg, bf16:$splat, nxv8bf16:$passthru)),
+ (CPY_ZPmV_H $passthru, $pg, $splat)>;
+ }
+
+ // Duplicate FP scalar into all vector elements
+ def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))),
+ (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
+ def : Pat<(nxv4f16 (AArch64dup (f16 FPR16:$src))),
+ (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
+ def : Pat<(nxv2f16 (AArch64dup (f16 FPR16:$src))),
+ (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
+ def : Pat<(nxv4f32 (AArch64dup (f32 FPR32:$src))),
+ (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;
+ def : Pat<(nxv2f32 (AArch64dup (f32 FPR32:$src))),
+ (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;
+ def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))),
+ (DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))),
+ (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
+ }
+
+ // Duplicate +0.0 into all vector elements
+ def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ def : Pat<(nxv4f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ def : Pat<(nxv2f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
+ def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
+ def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ }
+
+ // Duplicate Int immediate into all vector elements
+ def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
+ (DUP_ZI_B $a, $b)>;
+ def : Pat<(nxv8i16 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
+ (DUP_ZI_H $a, $b)>;
+ def : Pat<(nxv4i32 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
+ (DUP_ZI_S $a, $b)>;
+ def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm i32:$a, i32:$b)))),
+ (DUP_ZI_D $a, $b)>;
+
+ // Duplicate FP immediate into all vector elements
+ let AddedComplexity = 2 in {
+ def : Pat<(nxv8f16 (AArch64dup fpimm16:$imm8)),
+ (FDUP_ZI_H fpimm16:$imm8)>;
+ def : Pat<(nxv4f16 (AArch64dup fpimm16:$imm8)),
+ (FDUP_ZI_H fpimm16:$imm8)>;
+ def : Pat<(nxv2f16 (AArch64dup fpimm16:$imm8)),
+ (FDUP_ZI_H fpimm16:$imm8)>;
+ def : Pat<(nxv4f32 (AArch64dup fpimm32:$imm8)),
+ (FDUP_ZI_S fpimm32:$imm8)>;
+ def : Pat<(nxv2f32 (AArch64dup fpimm32:$imm8)),
+ (FDUP_ZI_S fpimm32:$imm8)>;
+ def : Pat<(nxv2f64 (AArch64dup fpimm64:$imm8)),
+ (FDUP_ZI_D fpimm64:$imm8)>;
+ }
// Select elements from either vector (predicated)
defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>;
defm SPLICE_ZPZ : sve_int_perm_splice<"splice", int_aarch64_sve_splice>;
+
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_3_Op_Pat<nxv8bf16, vselect, nxv8i1, nxv8bf16, nxv8bf16, SEL_ZPZZ_H>;
+ def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_splice, nxv8i1, nxv8bf16, nxv8bf16, SPLICE_ZPZ_H>;
+ }
+
defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>;
defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;
defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_2_Op_Pat<nxv8bf16, AArch64insr, nxv8bf16, bf16, INSR_ZV_H>;
+ }
+
defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", int_aarch64_sve_rbit>;
defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", int_aarch64_sve_revb, bswap>;
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
@@ -277,6 +539,10 @@ let Predicates = [HasSVE] in {
defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>;
defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_1_Op_Pat<nxv8bf16, AArch64rev, nxv8bf16, REV_ZZ_H>;
+ }
+
defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>;
defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo", AArch64uunpklo>;
@@ -290,34 +556,34 @@ let Predicates = [HasSVE] in {
def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>;
- def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">;
- def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">;
- def BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb">;
- def BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs">;
+ defm BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa", int_aarch64_sve_brkpa_z>;
+ defm BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas", null_frag>;
+ defm BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb", int_aarch64_sve_brkpb_z>;
+ defm BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs", null_frag>;
- def BRKN_PPzP : sve_int_brkn<0b0, "brkn">;
- def BRKNS_PPzP : sve_int_brkn<0b1, "brkns">;
+ defm BRKN_PPzP : sve_int_brkn<0b0, "brkn", int_aarch64_sve_brkn_z>;
+ defm BRKNS_PPzP : sve_int_brkn<0b1, "brkns", null_frag>;
- defm BRKA_PPzP : sve_int_break_z<0b000, "brka">;
- defm BRKA_PPmP : sve_int_break_m<0b001, "brka">;
- defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas">;
- defm BRKB_PPzP : sve_int_break_z<0b100, "brkb">;
- defm BRKB_PPmP : sve_int_break_m<0b101, "brkb">;
- defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs">;
+ defm BRKA_PPzP : sve_int_break_z<0b000, "brka", int_aarch64_sve_brka_z>;
+ defm BRKA_PPmP : sve_int_break_m<0b001, "brka", int_aarch64_sve_brka>;
+ defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas", null_frag>;
+ defm BRKB_PPzP : sve_int_break_z<0b100, "brkb", int_aarch64_sve_brkb_z>;
+ defm BRKB_PPmP : sve_int_break_m<0b101, "brkb", int_aarch64_sve_brkb>;
+ defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs", null_frag>;
def PTEST_PP : sve_int_ptest<0b010000, "ptest">;
def PFALSE : sve_int_pfalse<0b000000, "pfalse">;
defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
- defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z>;
+ defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z, and>;
defm BIC_PPzPP : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>;
- defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z>;
+ defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>;
defm SEL_PPPP : sve_int_pred_log<0b0011, "sel", vselect>;
defm ANDS_PPzPP : sve_int_pred_log<0b0100, "ands", null_frag>;
defm BICS_PPzPP : sve_int_pred_log<0b0101, "bics", null_frag>;
defm EORS_PPzPP : sve_int_pred_log<0b0110, "eors", null_frag>;
- defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z>;
+ defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z, or>;
defm ORN_PPzPP : sve_int_pred_log<0b1001, "orn", int_aarch64_sve_orn_z>;
defm NOR_PPzPP : sve_int_pred_log<0b1010, "nor", int_aarch64_sve_nor_z>;
defm NAND_PPzPP : sve_int_pred_log<0b1011, "nand", int_aarch64_sve_nand_z>;
@@ -333,11 +599,23 @@ let Predicates = [HasSVE] in {
defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta", int_aarch64_sve_clasta>;
defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb", int_aarch64_sve_clastb>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_3_Op_Pat<bf16, AArch64clasta_n, nxv8i1, bf16, nxv8bf16, CLASTA_VPZ_H>;
+ def : SVE_3_Op_Pat<bf16, AArch64clastb_n, nxv8i1, bf16, nxv8bf16, CLASTB_VPZ_H>;
+ def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_clasta, nxv8i1, nxv8bf16, nxv8bf16, CLASTA_ZPZ_H>;
+ def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_clastb, nxv8i1, nxv8bf16, nxv8bf16, CLASTB_ZPZ_H>;
+ }
+
defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta", AArch64lasta>;
defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb", AArch64lastb>;
defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta", AArch64lasta>;
defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb", AArch64lastb>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_2_Op_Pat<bf16, AArch64lasta, nxv8i1, nxv8bf16, LASTA_VPZ_H>;
+ def : SVE_2_Op_Pat<bf16, AArch64lastb, nxv8i1, nxv8bf16, LASTB_VPZ_H>;
+ }
+
// continuous load with reg+immediate
defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>;
defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>;
@@ -468,115 +746,115 @@ let Predicates = [HasSVE] in {
// Gathers using unscaled 32-bit offsets, e.g.
// ld1h z0.s, p0/z, [x0, z0.s, uxtw]
- defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
- defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", null_frag, null_frag, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
- defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
- defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", null_frag, null_frag, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
- defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
- defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
- defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
- defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
- defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
- defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
+ defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
+ defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
+ defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
+ defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
// Gathers using scaled 32-bit offsets, e.g.
// ld1h z0.s, p0/z, [x0, z0.s, uxtw #1]
- defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
- defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
- defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
- defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", null_frag, null_frag, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
- defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
- defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", null_frag, null_frag, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
+ defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
+ defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
+ defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
// Gathers using 32-bit pointers with scaled offset, e.g.
// ld1h z0.s, p0/z, [z0.s, #16]
- defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv4i8>;
- defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, null_frag, nxv4i8>;
- defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv4i8>;
- defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, null_frag, nxv4i8>;
- defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv4i16>;
- defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, null_frag, nxv4i16>;
- defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv4i16>;
- defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, null_frag, nxv4i16>;
- defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv4i32>;
- defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, null_frag, nxv4i32>;
+ defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm_z, nxv4i8>;
+ defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm_z, nxv4i8>;
+ defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm_z, nxv4i8>;
+ defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm_z, nxv4i8>;
+ defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm_z, nxv4i16>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm_z, nxv4i16>;
+ defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm_z, nxv4i16>;
+ defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm_z, nxv4i16>;
+ defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm_z, nxv4i32>;
+ defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm_z, nxv4i32>;
// Gathers using 64-bit pointers with scaled offset, e.g.
// ld1h z0.d, p0/z, [z0.d, #16]
- defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv2i8>;
- defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, null_frag, nxv2i8>;
- defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv2i8>;
- defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, null_frag, nxv2i8>;
- defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv2i16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, null_frag, nxv2i16>;
- defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv2i16>;
- defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, null_frag, nxv2i16>;
- defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, AArch64ld1s_gather_imm, nxv2i32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, null_frag, nxv2i32>;
- defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv2i32>;
- defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, null_frag, nxv2i32>;
- defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm, nxv2i64>;
- defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, null_frag, nxv2i64>;
+ defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm_z, nxv2i8>;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm_z, nxv2i8>;
+ defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm_z, nxv2i8>;
+ defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm_z, nxv2i8>;
+ defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm_z, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm_z, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm_z, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm_z, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, AArch64ld1s_gather_imm_z, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, AArch64ldff1s_gather_imm_z, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm_z, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm_z, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm_z, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, AArch64ldff1_gather_imm_z, nxv2i64>;
// Gathers using unscaled 64-bit offsets, e.g.
// ld1h z0.d, p0/z, [x0, z0.d]
- defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", AArch64ld1s_gather, nxv2i8>;
- defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", null_frag, nxv2i8>;
- defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather, nxv2i8>;
- defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", null_frag, nxv2i8>;
- defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", AArch64ld1s_gather, nxv2i16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", null_frag, nxv2i16>;
- defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather, nxv2i16>;
- defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", null_frag, nxv2i16>;
- defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", AArch64ld1s_gather, nxv2i32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", null_frag, nxv2i32>;
- defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather, nxv2i32>;
- defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", null_frag, nxv2i32>;
- defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather, nxv2i64>;
- defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", null_frag, nxv2i64>;
+ defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_z, nxv2i8>;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_z, nxv2i8>;
+ defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather_z, nxv2i8>;
+ defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_z, nxv2i8>;
+ defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_z, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_z, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather_z, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_z, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_z, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_z, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather_z, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_z, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather_z, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_z, nxv2i64>;
// Gathers using scaled 64-bit offsets, e.g.
// ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
- defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", AArch64ld1s_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", null_frag, ZPR64ExtLSL16, nxv2i16>;
- defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
- defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", null_frag, ZPR64ExtLSL16, nxv2i16>;
- defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", AArch64ld1s_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", null_frag, ZPR64ExtLSL32, nxv2i32>;
- defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
- defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", null_frag, ZPR64ExtLSL32, nxv2i32>;
- defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled, ZPR64ExtLSL64, nxv2i64>;
- defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", null_frag, ZPR64ExtLSL64, nxv2i64>;
+ defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", AArch64ld1s_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", AArch64ld1s_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled_z, ZPR64ExtLSL64, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL64, nxv2i64>;
// Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g.
// ld1h z0.d, p0/z, [x0, z0.d, uxtw]
- defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
- defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", null_frag, null_frag, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
- defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
- defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", null_frag, null_frag, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
- defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
- defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
- defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
- defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
- defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
- defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
- defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
- defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
+ defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
+ defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
// Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g.
// ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
- defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
- defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
- defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
- defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", null_frag, null_frag, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
- defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
- defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", null_frag, null_frag, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
- defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
- defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", null_frag, null_frag, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
- defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
- defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", null_frag, null_frag, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
+ defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
+ defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
+ defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
+ defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
// Non-temporal contiguous loads (register + immediate)
defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>;
@@ -640,16 +918,16 @@ let Predicates = [HasSVE] in {
// Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.s, p0, [z0.s, #16]
- defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", timm0_31, AArch64st1_scatter_imm, nxv4i8>;
- defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv4i16>;
- defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv4i32>;
+ defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", imm0_31, AArch64st1_scatter_imm, nxv4i8>;
+ defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv4i16>;
+ defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv4i32>;
// Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.d, p0, [z0.d, #16]
- defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", timm0_31, AArch64st1_scatter_imm, nxv2i8>;
- defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv2i16>;
- defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv2i32>;
- defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", tuimm5s8, AArch64st1_scatter_imm, nxv2i64>;
+ defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", imm0_31, AArch64st1_scatter_imm, nxv2i8>;
+ defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", uimm5s2, AArch64st1_scatter_imm, nxv2i16>;
+ defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", uimm5s4, AArch64st1_scatter_imm, nxv2i32>;
+ defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", uimm5s8, AArch64st1_scatter_imm, nxv2i64>;
// Scatters using unscaled 64-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d]
@@ -722,47 +1000,92 @@ let Predicates = [HasSVE] in {
def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>;
def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>;
+multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
+ // reg + imm
+ let AddedComplexity = 2 in {
+ def _reg_imm : Pat<(prefetch (PredTy PPR_3b:$gp), (am_sve_indexed_s6 GPR64sp:$base, simm6s1:$offset), (i32 sve_prfop:$prfop)),
+ (RegImmInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, simm6s1:$offset)>;
+ }
+
+ // reg + reg
+ let AddedComplexity = 1 in {
+ def _reg_reg : Pat<(prefetch (PredTy PPR_3b:$gp), (AddrCP GPR64sp:$base, GPR64:$index), (i32 sve_prfop:$prfop)),
+ (RegRegInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, GPR64:$index)>;
+ }
+
+ // default fallback
+ def _default : Pat<(prefetch (PredTy PPR_3b:$gp), GPR64:$base, (i32 sve_prfop:$prfop)),
+ (RegImmInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, (i64 0))>;
+ }
+
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv16i1, PRFB_PRI, PRFB_PRR, 0, am_sve_regreg_lsl0>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv8i1, PRFH_PRI, PRFH_PRR, 1, am_sve_regreg_lsl1>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv4i1, PRFW_PRI, PRFS_PRR, 2, am_sve_regreg_lsl2>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv2i1, PRFD_PRI, PRFD_PRR, 3, am_sve_regreg_lsl3>;
+
// Gather prefetch using scaled 32-bit offsets, e.g.
// prfh pldl1keep, p0, [x0, z0.s, uxtw #1]
- defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
- defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
- defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
- defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64>;
+ defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, int_aarch64_sve_prfb_gather_sxtw_index, int_aarch64_sve_prfb_gather_uxtw_index>;
+ defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16, int_aarch64_sve_prfh_gather_sxtw_index, int_aarch64_sve_prfh_gather_uxtw_index>;
+ defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32, int_aarch64_sve_prfw_gather_sxtw_index, int_aarch64_sve_prfw_gather_uxtw_index>;
+ defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64, int_aarch64_sve_prfd_gather_sxtw_index, int_aarch64_sve_prfd_gather_uxtw_index>;
// Gather prefetch using unpacked, scaled 32-bit offsets, e.g.
// prfh pldl1keep, p0, [x0, z0.d, uxtw #1]
- defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
- defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
- defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
- defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
+ defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, int_aarch64_sve_prfb_gather_sxtw_index, int_aarch64_sve_prfb_gather_uxtw_index>;
+ defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16, int_aarch64_sve_prfh_gather_sxtw_index, int_aarch64_sve_prfh_gather_uxtw_index>;
+ defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32, int_aarch64_sve_prfw_gather_sxtw_index, int_aarch64_sve_prfw_gather_uxtw_index>;
+ defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64, int_aarch64_sve_prfd_gather_sxtw_index, int_aarch64_sve_prfd_gather_uxtw_index>;
// Gather prefetch using scaled 64-bit offsets, e.g.
// prfh pldl1keep, p0, [x0, z0.d, lsl #1]
- defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8>;
- defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16>;
- defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32>;
- defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64>;
+ defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8, int_aarch64_sve_prfb_gather_index>;
+ defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16, int_aarch64_sve_prfh_gather_index>;
+ defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32, int_aarch64_sve_prfw_gather_index>;
+ defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64, int_aarch64_sve_prfd_gather_index>;
// Gather prefetch using 32/64-bit pointers with offset, e.g.
// prfh pldl1keep, p0, [z0.s, #16]
// prfh pldl1keep, p0, [z0.d, #16]
- defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31>;
- defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2>;
- defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4>;
- defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8>;
+ defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31, int_aarch64_sve_prfb_gather_scalar_offset>;
+ defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2, int_aarch64_sve_prfh_gather_scalar_offset>;
+ defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4, int_aarch64_sve_prfw_gather_scalar_offset>;
+ defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8, int_aarch64_sve_prfd_gather_scalar_offset>;
- defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31>;
- defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2>;
- defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4>;
- defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8>;
+ defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31, int_aarch64_sve_prfb_gather_scalar_offset>;
+ defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2, int_aarch64_sve_prfh_gather_scalar_offset>;
+ defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4, int_aarch64_sve_prfw_gather_scalar_offset>;
+ defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8, int_aarch64_sve_prfd_gather_scalar_offset>;
defm ADR_SXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_sxtw<0b00, "adr">;
defm ADR_UXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_uxtw<0b01, "adr">;
defm ADR_LSL_ZZZ_S : sve_int_bin_cons_misc_0_a_32_lsl<0b10, "adr">;
defm ADR_LSL_ZZZ_D : sve_int_bin_cons_misc_0_a_64_lsl<0b11, "adr">;
+ def : Pat<(nxv4i32 (int_aarch64_sve_adrb nxv4i32:$Op1, nxv4i32:$Op2)),
+ (ADR_LSL_ZZZ_S_0 $Op1, $Op2)>;
+ def : Pat<(nxv4i32 (int_aarch64_sve_adrh nxv4i32:$Op1, nxv4i32:$Op2)),
+ (ADR_LSL_ZZZ_S_1 $Op1, $Op2)>;
+ def : Pat<(nxv4i32 (int_aarch64_sve_adrw nxv4i32:$Op1, nxv4i32:$Op2)),
+ (ADR_LSL_ZZZ_S_2 $Op1, $Op2)>;
+ def : Pat<(nxv4i32 (int_aarch64_sve_adrd nxv4i32:$Op1, nxv4i32:$Op2)),
+ (ADR_LSL_ZZZ_S_3 $Op1, $Op2)>;
+
+ def : Pat<(nxv2i64 (int_aarch64_sve_adrb nxv2i64:$Op1, nxv2i64:$Op2)),
+ (ADR_LSL_ZZZ_D_0 $Op1, $Op2)>;
+ def : Pat<(nxv2i64 (int_aarch64_sve_adrh nxv2i64:$Op1, nxv2i64:$Op2)),
+ (ADR_LSL_ZZZ_D_1 $Op1, $Op2)>;
+ def : Pat<(nxv2i64 (int_aarch64_sve_adrw nxv2i64:$Op1, nxv2i64:$Op2)),
+ (ADR_LSL_ZZZ_D_2 $Op1, $Op2)>;
+ def : Pat<(nxv2i64 (int_aarch64_sve_adrd nxv2i64:$Op1, nxv2i64:$Op2)),
+ (ADR_LSL_ZZZ_D_3 $Op1, $Op2)>;
+
defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_2_Op_Pat<nxv8bf16, AArch64tbl, nxv8bf16, nxv8i16, TBL_ZZZ_H>;
+ }
+
defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>;
defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2", AArch64zip2>;
defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1", AArch64uzp1>;
@@ -770,6 +1093,15 @@ let Predicates = [HasSVE] in {
defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1", AArch64trn1>;
defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2", AArch64trn2>;
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_2_Op_Pat<nxv8bf16, AArch64zip1, nxv8bf16, nxv8bf16, ZIP1_ZZZ_H>;
+ def : SVE_2_Op_Pat<nxv8bf16, AArch64zip2, nxv8bf16, nxv8bf16, ZIP2_ZZZ_H>;
+ def : SVE_2_Op_Pat<nxv8bf16, AArch64uzp1, nxv8bf16, nxv8bf16, UZP1_ZZZ_H>;
+ def : SVE_2_Op_Pat<nxv8bf16, AArch64uzp2, nxv8bf16, nxv8bf16, UZP2_ZZZ_H>;
+ def : SVE_2_Op_Pat<nxv8bf16, AArch64trn1, nxv8bf16, nxv8bf16, TRN1_ZZZ_H>;
+ def : SVE_2_Op_Pat<nxv8bf16, AArch64trn2, nxv8bf16, nxv8bf16, TRN2_ZZZ_H>;
+ }
+
defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1>;
defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2>;
defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1>;
@@ -777,12 +1109,12 @@ let Predicates = [HasSVE] in {
defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>;
defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>;
- defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", int_aarch64_sve_cmphs, SETUGE>;
- defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", int_aarch64_sve_cmphi, SETUGT>;
- defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", int_aarch64_sve_cmpge, SETGE>;
- defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt", int_aarch64_sve_cmpgt, SETGT>;
- defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq", int_aarch64_sve_cmpeq, SETEQ>;
- defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne", int_aarch64_sve_cmpne, SETNE>;
+ defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
+ defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
+ defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
+ defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt", SETGT, SETLT>;
+ defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq", SETEQ, SETEQ>;
+ defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne", SETNE, SETNE>;
defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq", int_aarch64_sve_cmpeq_wide>;
defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne", int_aarch64_sve_cmpne_wide>;
@@ -795,22 +1127,22 @@ let Predicates = [HasSVE] in {
defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo", int_aarch64_sve_cmplo_wide>;
defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls", int_aarch64_sve_cmpls_wide>;
- defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge", SETGE, int_aarch64_sve_cmpge>;
- defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt", SETGT, int_aarch64_sve_cmpgt>;
- defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt", SETLT, null_frag, int_aarch64_sve_cmpgt>;
- defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple", SETLE, null_frag, int_aarch64_sve_cmpge>;
- defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq", SETEQ, int_aarch64_sve_cmpeq>;
- defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne", SETNE, int_aarch64_sve_cmpne>;
- defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs", SETUGE, int_aarch64_sve_cmphs>;
- defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi", SETUGT, int_aarch64_sve_cmphi>;
- defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, null_frag, int_aarch64_sve_cmphi>;
- defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, null_frag, int_aarch64_sve_cmphs>;
-
- defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge", int_aarch64_sve_fcmpge>;
- defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt", int_aarch64_sve_fcmpgt>;
- defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq", int_aarch64_sve_fcmpeq>;
- defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne", int_aarch64_sve_fcmpne>;
- defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo", int_aarch64_sve_fcmpuo>;
+ defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge", SETGE, SETLE>;
+ defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt", SETGT, SETLT>;
+ defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt", SETLT, SETGT>;
+ defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple", SETLE, SETGE>;
+ defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq", SETEQ, SETEQ>;
+ defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne", SETNE, SETEQ>;
+ defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs", SETUGE, SETULE>;
+ defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi", SETUGT, SETULT>;
+ defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
+ defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
+
+ defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge>;
+ defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt>;
+ defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq>;
+ defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone>;
+ defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
@@ -928,71 +1260,78 @@ let Predicates = [HasSVE] in {
defm INCP_ZP : sve_int_count_v<0b10000, "incp">;
defm DECP_ZP : sve_int_count_v<0b10100, "decp">;
- defm INDEX_RR : sve_int_index_rr<"index">;
- defm INDEX_IR : sve_int_index_ir<"index">;
- defm INDEX_RI : sve_int_index_ri<"index">;
- defm INDEX_II : sve_int_index_ii<"index">;
+ defm INDEX_RR : sve_int_index_rr<"index", index_vector>;
+ defm INDEX_IR : sve_int_index_ir<"index", index_vector>;
+ defm INDEX_RI : sve_int_index_ri<"index", index_vector>;
+ defm INDEX_II : sve_int_index_ii<"index", index_vector>;
// Unpredicated shifts
- defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr">;
- defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr">;
- defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl">;
+ defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_m1>;
+ defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_m1>;
+ defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_m1>;
defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">;
defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">;
defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">;
// Predicated shifts
- defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">;
- defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">;
+ defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr", "ASR_ZPZI">;
+ defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr", "LSR_ZPZI">;
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
- defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", int_aarch64_sve_asrd>;
+ defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
+
+ let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
+ defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64asr_m1>;
+ defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64lsr_m1>;
+ defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64lsl_m1>;
+ defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
+ }
- defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", int_aarch64_sve_asr>;
- defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", int_aarch64_sve_lsr>;
- defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", int_aarch64_sve_lsl>;
- defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", null_frag>;
- defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", null_frag>;
- defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", null_frag>;
+ defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_m1, "ASRR_ZPmZ">;
+ defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_m1, "LSRR_ZPmZ">;
+ defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", AArch64lsl_m1, "LSLR_ZPmZ">;
+ defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /*isReverseInstr*/ 1>;
+ defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /*isReverseInstr*/ 1>;
+ defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", /*isReverseInstr*/ 1>;
defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>;
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;
- defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv16i1, nxv4f32, ElementSizeS>;
- defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv16i1, nxv8f16, ElementSizeS>;
- defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
- defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
- defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
- defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
- defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, int_aarch64_sve_fcvtzs, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
- defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, int_aarch64_sve_fcvtzs, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
- defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, int_aarch64_sve_fcvtzu, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
- defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, int_aarch64_sve_fcvtzu, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
- defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, nxv8f16, nxv16i1, nxv2f64, ElementSizeD>;
- defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, nxv2f64, nxv16i1, nxv8f16, ElementSizeD>;
- defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, nxv4f32, nxv16i1, nxv2f64, ElementSizeD>;
- defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, nxv2f64, nxv16i1, nxv4f32, ElementSizeD>;
- defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, nxv2f64, nxv16i1, nxv4i32, ElementSizeD>;
- defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, nxv2f64, nxv16i1, nxv4i32, ElementSizeD>;
- defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, nxv8f16, nxv16i1, nxv4i32, ElementSizeS>;
- defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, nxv4f32, nxv16i1, nxv2i64, ElementSizeD>;
- defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, nxv8f16, nxv16i1, nxv4i32, ElementSizeS>;
- defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, nxv8f16, nxv16i1, nxv2i64, ElementSizeD>;
- defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, nxv4f32, nxv16i1, nxv2i64, ElementSizeD>;
- defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, nxv8f16, nxv16i1, nxv2i64, ElementSizeD>;
- defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
- defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
- defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, nxv4i32, nxv16i1, nxv2f64, ElementSizeD>;
- defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, nxv4i32, nxv16i1, nxv2f64, ElementSizeD>;
- defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, nxv2i64, nxv16i1, nxv4f32, ElementSizeD>;
- defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, nxv4i32, nxv16i1, nxv8f16, ElementSizeS>;
- defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv16i1, nxv8f16, ElementSizeD>;
- defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv16i1, nxv8f16, ElementSizeS>;
- defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv16i1, nxv8f16, ElementSizeD>;
- defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv16i1, nxv4f32, ElementSizeD>;
- defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv4i1, nxv4f32, ElementSizeS>;
+ defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv4i1, nxv8f16, ElementSizeS>;
+ defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
+ defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
+ defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
+ defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
+ defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, int_aarch64_sve_fcvtzs, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
+ defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, int_aarch64_sve_fcvtzs, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
+ defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, int_aarch64_sve_fcvtzu, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
+ defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, int_aarch64_sve_fcvtzu, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
+ defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, nxv8f16, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, nxv2f64, nxv2i1, nxv8f16, ElementSizeD>;
+ defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, nxv4f32, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, nxv2f64, nxv2i1, nxv4f32, ElementSizeD>;
+ defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+ defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+ defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>;
+ defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>;
+ defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, nxv8f16, nxv4i1, nxv4i32, ElementSizeS>;
+ defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, nxv4f32, nxv2i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, nxv8f16, nxv2i1, nxv2i64, ElementSizeD>;
+ defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
+ defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;
+ defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;
+ defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
+ defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv4i1, nxv8f16, ElementSizeS>;
+ defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv2i1, nxv8f16, ElementSizeD>;
+ defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv2i1, nxv4f32, ElementSizeD>;
+ defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>;
@@ -1004,6 +1343,18 @@ let Predicates = [HasSVE] in {
defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;
+ let Predicates = [HasBF16, HasSVE] in {
+ defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
+ defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>;
+ defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>;
+ defm BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>;
+ defm BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>;
+ defm BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>;
+ defm BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>;
+ defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>;
+ defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
+ }
+
// InstAliases
def : InstAlias<"mov $Zd, $Zn",
(ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>;
@@ -1089,6 +1440,20 @@ let Predicates = [HasSVE] in {
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
(FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+ // Pseudo instructions representing unpredicated LDR and STR for ZPR2,3,4.
+ // These get expanded to individual LDR_ZXI/STR_ZXI instructions in
+ // AArch64ExpandPseudoInsts.
+ let mayLoad = 1, hasSideEffects = 0 in {
+ def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ }
+ let mayStore = 1, hasSideEffects = 0 in {
+ def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ }
+
def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)),
(PTEST_PP PPR:$pg, PPR:$src)>;
def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)),
@@ -1098,6 +1463,25 @@ let Predicates = [HasSVE] in {
def : Pat<(AArch64ptest (nxv2i1 PPR:$pg), (nxv2i1 PPR:$src)),
(PTEST_PP PPR:$pg, PPR:$src)>;
+ // LD1R of 128-bit masked data
+ def : Pat<(nxv16i8 (AArch64ld1rq_z PPR:$gp, GPR64:$base)),
+ (LD1RQ_B_IMM $gp, $base, (i64 0))>;
+ def : Pat<(nxv8i16 (AArch64ld1rq_z PPR:$gp, GPR64:$base)),
+ (LD1RQ_H_IMM $gp, $base, (i64 0))>;
+ def : Pat<(nxv4i32 (AArch64ld1rq_z PPR:$gp, GPR64:$base)),
+ (LD1RQ_W_IMM $gp, $base, (i64 0))>;
+ def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, GPR64:$base)),
+ (LD1RQ_D_IMM $gp, $base, (i64 0))>;
+
+ def : Pat<(nxv16i8 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
+ (LD1RQ_B_IMM $gp, $base, simm4s16:$imm)>;
+ def : Pat<(nxv8i16 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
+ (LD1RQ_H_IMM $gp, $base, simm4s16:$imm)>;
+ def : Pat<(nxv4i32 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
+ (LD1RQ_W_IMM $gp, $base, simm4s16:$imm)>;
+ def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
+ (LD1RQ_D_IMM $gp, $base, simm4s16:$imm)>;
+
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
@@ -1105,346 +1489,899 @@ let Predicates = [HasSVE] in {
def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>;
- def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
- def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>;
- def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>;
- def : Pat<(nxv16i8 (bitconvert (nxv8f16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
- def : Pat<(nxv16i8 (bitconvert (nxv4f32 ZPR:$src))), (nxv16i8 ZPR:$src)>;
- def : Pat<(nxv16i8 (bitconvert (nxv2f64 ZPR:$src))), (nxv16i8 ZPR:$src)>;
-
- def : Pat<(nxv8i16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8i16 ZPR:$src)>;
- def : Pat<(nxv8i16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8i16 ZPR:$src)>;
- def : Pat<(nxv8i16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8i16 ZPR:$src)>;
- def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>;
- def : Pat<(nxv8i16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8i16 ZPR:$src)>;
- def : Pat<(nxv8i16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8i16 ZPR:$src)>;
-
- def : Pat<(nxv4i32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4i32 ZPR:$src)>;
- def : Pat<(nxv4i32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4i32 ZPR:$src)>;
- def : Pat<(nxv4i32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4i32 ZPR:$src)>;
- def : Pat<(nxv4i32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4i32 ZPR:$src)>;
- def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>;
- def : Pat<(nxv4i32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4i32 ZPR:$src)>;
-
- def : Pat<(nxv2i64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2i64 ZPR:$src)>;
- def : Pat<(nxv2i64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
- def : Pat<(nxv2i64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2i64 ZPR:$src)>;
- def : Pat<(nxv2i64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
- def : Pat<(nxv2i64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2i64 ZPR:$src)>;
- def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>;
-
- def : Pat<(nxv8f16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8f16 ZPR:$src)>;
- def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>;
- def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>;
- def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>;
- def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>;
- def : Pat<(nxv8f16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8f16 ZPR:$src)>;
-
- def : Pat<(nxv4f32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4f32 ZPR:$src)>;
- def : Pat<(nxv4f32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
- def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>;
- def : Pat<(nxv4f32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4f32 ZPR:$src)>;
- def : Pat<(nxv4f32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
- def : Pat<(nxv4f32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4f32 ZPR:$src)>;
-
- def : Pat<(nxv2f64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2f64 ZPR:$src)>;
- def : Pat<(nxv2f64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
- def : Pat<(nxv2f64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
- def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>;
- def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
- def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ // General case that we ideally never want to match.
+ def : Pat<(vscale GPR64:$scale), (MADDXrrr (UBFMXri (RDVLI_XI 1), 4, 63), $scale, XZR)>;
+
+ let AddedComplexity = 5 in {
+ def : Pat<(vscale (i64 1)), (UBFMXri (RDVLI_XI 1), 4, 63)>;
+ def : Pat<(vscale (i64 -1)), (SBFMXri (RDVLI_XI -1), 4, 63)>;
+
+ def : Pat<(vscale (sve_rdvl_imm i32:$imm)), (RDVLI_XI $imm)>;
+ def : Pat<(vscale (sve_cnth_imm i32:$imm)), (CNTH_XPiI 31, $imm)>;
+ def : Pat<(vscale (sve_cntw_imm i32:$imm)), (CNTW_XPiI 31, $imm)>;
+ def : Pat<(vscale (sve_cntd_imm i32:$imm)), (CNTD_XPiI 31, $imm)>;
+
+ def : Pat<(vscale (sve_cnth_imm_neg i32:$imm)), (SUBXrs XZR, (CNTH_XPiI 31, $imm), 0)>;
+ def : Pat<(vscale (sve_cntw_imm_neg i32:$imm)), (SUBXrs XZR, (CNTW_XPiI 31, $imm), 0)>;
+ def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>;
+ }
+
+ // FIXME: BigEndian requires an additional REV instruction to satisfy the
+ // constraint that none of the bits change when stored to memory as one
+ // type, and and reloaded as another type.
+ let Predicates = [IsLE] in {
+ def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+ def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+ def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+ def : Pat<(nxv16i8 (bitconvert (nxv8f16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+ def : Pat<(nxv16i8 (bitconvert (nxv4f32 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+ def : Pat<(nxv16i8 (bitconvert (nxv2f64 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+
+ def : Pat<(nxv8i16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+ def : Pat<(nxv8i16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+ def : Pat<(nxv8i16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+ def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+ def : Pat<(nxv8i16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+ def : Pat<(nxv8i16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+
+ def : Pat<(nxv4i32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+ def : Pat<(nxv4i32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+ def : Pat<(nxv4i32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+ def : Pat<(nxv4i32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+ def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+ def : Pat<(nxv4i32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+
+ def : Pat<(nxv2i64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv2i64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv2i64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv2i64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv2i64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+
+ def : Pat<(nxv8f16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+ def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+ def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+ def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+ def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+ def : Pat<(nxv8f16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+
+ def : Pat<(nxv4f32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+ def : Pat<(nxv4f32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+ def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+ def : Pat<(nxv4f32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+ def : Pat<(nxv4f32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+ def : Pat<(nxv4f32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+
+ def : Pat<(nxv2f64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ def : Pat<(nxv2f64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ def : Pat<(nxv2f64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+
+ }
+
+ let Predicates = [IsLE, HasBF16, HasSVE] in {
+ def : Pat<(nxv2i64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+ def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+ }
+
+ let Predicates = [IsLE, HasSVE, HasBF16] in {
+ def : Pat<(nxv8bf16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+ def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+ def : Pat<(nxv8bf16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+ def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+ def : Pat<(nxv8bf16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+ def : Pat<(nxv8bf16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+ def : Pat<(nxv8bf16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
+
+ def : Pat<(nxv16i8 (bitconvert (nxv8bf16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
+ def : Pat<(nxv8i16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8i16 ZPR:$src)>;
+ def : Pat<(nxv4i32 (bitconvert (nxv8bf16 ZPR:$src))), (nxv4i32 ZPR:$src)>;
+ def : Pat<(nxv2i64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
+ def : Pat<(nxv8f16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8f16 ZPR:$src)>;
+ def : Pat<(nxv4f32 (bitconvert (nxv8bf16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
+ def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+ }
+
+ def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv16i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv8i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv8i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv8i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv4i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv4i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv4i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv2i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+
+ def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)),
+ (AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>;
+ def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)),
+ (AND_PPzPP (PTRUE_H 31), PPR:$Ps1, PPR:$Ps2)>;
+ def : Pat<(nxv4i1 (and PPR:$Ps1, PPR:$Ps2)),
+ (AND_PPzPP (PTRUE_S 31), PPR:$Ps1, PPR:$Ps2)>;
+ def : Pat<(nxv2i1 (and PPR:$Ps1, PPR:$Ps2)),
+ (AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>;
// Add more complex addressing modes here as required
multiclass pred_load<ValueType Ty, ValueType PredTy, SDPatternOperator Load,
- Instruction RegImmInst> {
-
+ Instruction RegRegInst, Instruction RegImmInst, ComplexPattern AddrCP> {
+ // reg + reg
+ let AddedComplexity = 1 in {
+ def _reg_reg_z : Pat<(Ty (Load (AddrCP GPR64:$base, GPR64:$offset), (PredTy PPR:$gp), (SVEDup0Undef))),
+ (RegRegInst PPR:$gp, GPR64:$base, GPR64:$offset)>;
+ }
+ // reg + imm
+ let AddedComplexity = 2 in {
+ def _reg_imm_z : Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), (PredTy PPR:$gp), (SVEDup0Undef))),
+ (RegImmInst PPR:$gp, GPR64:$base, simm4s1:$offset)>;
+ }
def _default_z : Pat<(Ty (Load GPR64:$base, (PredTy PPR:$gp), (SVEDup0Undef))),
(RegImmInst PPR:$gp, GPR64:$base, (i64 0))>;
}
// 2-element contiguous loads
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i8, LD1B_D_IMM>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i8, LD1SB_D_IMM>;
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i16, LD1H_D_IMM>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i16, LD1SH_D_IMM>;
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i32, LD1W_D_IMM>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i32, LD1SW_D_IMM>;
- defm : pred_load<nxv2i64, nxv2i1, nonext_masked_load, LD1D_IMM>;
- defm : pred_load<nxv2f16, nxv2i1, nonext_masked_load, LD1H_D_IMM>;
- defm : pred_load<nxv2f32, nxv2i1, nonext_masked_load, LD1W_D_IMM>;
- defm : pred_load<nxv2f64, nxv2i1, nonext_masked_load, LD1D_IMM>;
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i8, LD1B_D, LD1B_D_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i8, LD1SB_D, LD1SB_D_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i16, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i16, LD1SH_D, LD1SH_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i32, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i32, LD1SW_D, LD1SW_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2i64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
+ defm : pred_load<nxv2f16, nxv2i1, nonext_masked_load, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2f32, nxv2i1, nonext_masked_load, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2f64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
// 4-element contiguous loads
- defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i8, LD1B_S_IMM>;
- defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i8, LD1SB_S_IMM>;
- defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i16, LD1H_S_IMM>;
- defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i16, LD1SH_S_IMM>;
- defm : pred_load<nxv4i32, nxv4i1, nonext_masked_load, LD1W_IMM>;
- defm : pred_load<nxv4f16, nxv4i1, nonext_masked_load, LD1H_S_IMM>;
- defm : pred_load<nxv4f32, nxv4i1, nonext_masked_load, LD1W_IMM>;
+ defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i8, LD1B_S, LD1B_S_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i8, LD1SB_S, LD1SB_S_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i16, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i16, LD1SH_S, LD1SH_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4i32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv4f16, nxv4i1, nonext_masked_load, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4f32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
// 8-element contiguous loads
- defm : pred_load<nxv8i16, nxv8i1, zext_masked_load_i8, LD1B_H_IMM>;
- defm : pred_load<nxv8i16, nxv8i1, asext_masked_load_i8, LD1SB_H_IMM>;
- defm : pred_load<nxv8i16, nxv8i1, nonext_masked_load, LD1H_IMM>;
- defm : pred_load<nxv8f16, nxv8i1, nonext_masked_load, LD1H_IMM>;
+ defm : pred_load<nxv8i16, nxv8i1, zext_masked_load_i8, LD1B_H, LD1B_H_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv8i16, nxv8i1, asext_masked_load_i8, LD1SB_H, LD1SB_H_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv8i16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv8f16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
+
+ let Predicates = [HasBF16, HasSVE] in {
+ defm : pred_load<nxv8bf16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
+ }
// 16-element contiguous loads
- defm : pred_load<nxv16i8, nxv16i1, nonext_masked_load, LD1B_IMM>;
+ defm : pred_load<nxv16i8, nxv16i1, nonext_masked_load, LD1B, LD1B_IMM, am_sve_regreg_lsl0>;
multiclass pred_store<ValueType Ty, ValueType PredTy, SDPatternOperator Store,
- Instruction RegImmInst> {
+ Instruction RegRegInst, Instruction RegImmInst, ComplexPattern AddrCP> {
+ // reg + reg
+ let AddedComplexity = 1 in {
+ def _reg_reg : Pat<(Store (Ty ZPR:$vec), (AddrCP GPR64:$base, GPR64:$offset), (PredTy PPR:$gp)),
+ (RegRegInst ZPR:$vec, PPR:$gp, GPR64:$base, GPR64:$offset)>;
+ }
+ // reg + imm
+ let AddedComplexity = 2 in {
+ def _reg_imm : Pat<(Store (Ty ZPR:$vec), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), (PredTy PPR:$gp)),
+ (RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, simm4s1:$offset)>;
+ }
def _default : Pat<(Store (Ty ZPR:$vec), GPR64:$base, (PredTy PPR:$gp)),
(RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, (i64 0))>;
}
// 2-element contiguous stores
- defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i8, ST1B_D_IMM>;
- defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i16, ST1H_D_IMM>;
- defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i32, ST1W_D_IMM>;
- defm : pred_store<nxv2i64, nxv2i1, nontrunc_masked_store, ST1D_IMM>;
- defm : pred_store<nxv2f16, nxv2i1, nontrunc_masked_store, ST1H_D_IMM>;
- defm : pred_store<nxv2f32, nxv2i1, nontrunc_masked_store, ST1W_D_IMM>;
- defm : pred_store<nxv2f64, nxv2i1, nontrunc_masked_store, ST1D_IMM>;
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i8, ST1B_D, ST1B_D_IMM, am_sve_regreg_lsl0>;
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i16, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i32, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv2i64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>;
+ defm : pred_store<nxv2f16, nxv2i1, nontrunc_masked_store, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv2f32, nxv2i1, nontrunc_masked_store, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv2f64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>;
// 4-element contiguous stores
- defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i8, ST1B_S_IMM>;
- defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i16, ST1H_S_IMM>;
- defm : pred_store<nxv4i32, nxv4i1, nontrunc_masked_store, ST1W_IMM>;
- defm : pred_store<nxv4f16, nxv4i1, nontrunc_masked_store, ST1H_S_IMM>;
- defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W_IMM>;
+ defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i8, ST1B_S, ST1B_S_IMM, am_sve_regreg_lsl0>;
+ defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i16, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv4i32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv4f16, nxv4i1, nontrunc_masked_store, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
// 8-element contiguous stores
- defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H_IMM>;
- defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;
- defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;
+ defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H, ST1B_H_IMM, am_sve_regreg_lsl0>;
+ defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
+
+ let Predicates = [HasBF16, HasSVE] in {
+ defm : pred_store<nxv8bf16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
+ }
// 16-element contiguous stores
- defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>;
+ defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B, ST1B_IMM, am_sve_regreg_lsl0>;
+
+ defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRR, LDNT1B_ZRI, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv8i16, nxv8i1, non_temporal_load, LDNT1H_ZRR, LDNT1H_ZRI, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4i32, nxv4i1, non_temporal_load, LDNT1W_ZRR, LDNT1W_ZRI, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2i64, nxv2i1, non_temporal_load, LDNT1D_ZRR, LDNT1D_ZRI, am_sve_regreg_lsl3>;
+
+ defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRR, STNT1B_ZRI, am_sve_regreg_lsl0>;
+ defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRR, STNT1H_ZRI, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRR, STNT1W_ZRI, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRR, STNT1D_ZRI, am_sve_regreg_lsl3>;
+
+ multiclass unpred_store<PatFrag Store, ValueType Ty, Instruction RegImmInst,
+ Instruction PTrue> {
+ let AddedComplexity = 1 in {
+ def _imm : Pat<(Store (Ty ZPR:$val), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset)),
+ (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
+ }
+ let AddedComplexity = 2 in {
+ def _fi : Pat<(Store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)),
+ (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
+ }
+
+ def : Pat<(Store (Ty ZPR:$val), GPR64:$base),
+ (RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>;
+ }
+
+ defm : unpred_store< store, nxv16i8, ST1B_IMM, PTRUE_B>;
+ defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H_IMM, PTRUE_H>;
+ defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S_IMM, PTRUE_S>;
+ defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D_IMM, PTRUE_D>;
+ defm : unpred_store< store, nxv8i16, ST1H_IMM, PTRUE_H>;
+ defm : unpred_store<truncstorevi16, nxv4i32, ST1H_S_IMM, PTRUE_S>;
+ defm : unpred_store<truncstorevi16, nxv2i64, ST1H_D_IMM, PTRUE_D>;
+ defm : unpred_store< store, nxv4i32, ST1W_IMM, PTRUE_S>;
+ defm : unpred_store<truncstorevi32, nxv2i64, ST1W_D_IMM, PTRUE_D>;
+ defm : unpred_store< store, nxv2i64, ST1D_IMM, PTRUE_D>;
+ defm : unpred_store< store, nxv8f16, ST1H_IMM, PTRUE_H>;
+ defm : unpred_store< store, nxv8bf16, ST1H_IMM, PTRUE_H>;
+ defm : unpred_store< store, nxv4f16, ST1H_S_IMM, PTRUE_S>;
+ defm : unpred_store< store, nxv2f16, ST1H_D_IMM, PTRUE_D>;
+ defm : unpred_store< store, nxv4f32, ST1W_IMM, PTRUE_S>;
+ defm : unpred_store< store, nxv4f32, ST1W_D_IMM, PTRUE_D>;
+ defm : unpred_store< store, nxv2f64, ST1D_IMM, PTRUE_D>;
+
+ multiclass unpred_load<PatFrag Load, ValueType Ty, Instruction RegImmInst,
+ Instruction PTrue> {
+ let AddedComplexity = 1 in {
+ def _imm: Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset))),
+ (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
+ }
+
+ let AddedComplexity = 2 in {
+ def _fi : Pat<(Ty (Load (am_sve_fi GPR64sp:$base, simm4s1:$offset))),
+ (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
+ }
+
+ def : Pat<(Ty (Load GPR64:$base)),
+ (RegImmInst (PTrue 31), GPR64:$base, (i64 0))>;
+ }
+
+ defm : unpred_load< load, nxv16i8, LD1B_IMM, PTRUE_B>;
+ defm : unpred_load< zextloadvi8, nxv8i16, LD1B_H_IMM, PTRUE_H>;
+ defm : unpred_load< zextloadvi8, nxv4i32, LD1B_S_IMM, PTRUE_S>;
+ defm : unpred_load< zextloadvi8, nxv2i64, LD1B_D_IMM, PTRUE_D>;
+ defm : unpred_load< extloadvi8, nxv8i16, LD1B_H_IMM, PTRUE_H>;
+ defm : unpred_load< extloadvi8, nxv4i32, LD1B_S_IMM, PTRUE_S>;
+ defm : unpred_load< extloadvi8, nxv2i64, LD1B_D_IMM, PTRUE_D>;
+ defm : unpred_load< sextloadvi8, nxv8i16, LD1SB_H_IMM, PTRUE_H>;
+ defm : unpred_load< sextloadvi8, nxv4i32, LD1SB_S_IMM, PTRUE_S>;
+ defm : unpred_load< sextloadvi8, nxv2i64, LD1SB_D_IMM, PTRUE_D>;
+ defm : unpred_load< load, nxv8i16, LD1H_IMM, PTRUE_H>;
+ defm : unpred_load<zextloadvi16, nxv4i32, LD1H_S_IMM, PTRUE_S>;
+ defm : unpred_load<zextloadvi16, nxv2i64, LD1H_D_IMM, PTRUE_D>;
+ defm : unpred_load< extloadvi16, nxv4i32, LD1H_S_IMM, PTRUE_S>;
+ defm : unpred_load< extloadvi16, nxv2i64, LD1H_D_IMM, PTRUE_D>;
+ defm : unpred_load<sextloadvi16, nxv4i32, LD1SH_S_IMM, PTRUE_S>;
+ defm : unpred_load<sextloadvi16, nxv2i64, LD1SH_D_IMM, PTRUE_D>;
+ defm : unpred_load< load, nxv4i32, LD1W_IMM, PTRUE_S>;
+ defm : unpred_load<zextloadvi32, nxv2i64, LD1W_D_IMM, PTRUE_D>;
+ defm : unpred_load< extloadvi32, nxv2i64, LD1W_D_IMM, PTRUE_D>;
+ defm : unpred_load<sextloadvi32, nxv2i64, LD1SW_D_IMM, PTRUE_D>;
+ defm : unpred_load< load, nxv2i64, LD1D_IMM, PTRUE_D>;
+ defm : unpred_load< load, nxv8f16, LD1H_IMM, PTRUE_H>;
+ defm : unpred_load< load, nxv8bf16, LD1H_IMM, PTRUE_H>;
+ defm : unpred_load< load, nxv4f16, LD1H_S_IMM, PTRUE_S>;
+ defm : unpred_load< load, nxv2f16, LD1H_D_IMM, PTRUE_D>;
+ defm : unpred_load< load, nxv4f32, LD1W_IMM, PTRUE_S>;
+ defm : unpred_load< load, nxv2f32, LD1W_D_IMM, PTRUE_D>;
+ defm : unpred_load< load, nxv2f64, LD1D_IMM, PTRUE_D>;
+
+ multiclass unpred_store_predicate<ValueType Ty, Instruction Store> {
+ def _fi : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)),
+ (Store PPR:$val, GPR64sp:$base, simm9:$offset)>;
+
+ def _default : Pat<(store (Ty PPR:$Val), GPR64:$base),
+ (Store PPR:$Val, GPR64:$base, (i64 0))>;
+ }
+
+ defm Pat_Store_P16 : unpred_store_predicate<nxv16i1, STR_PXI>;
+ defm Pat_Store_P8 : unpred_store_predicate<nxv8i1, STR_PXI>;
+ defm Pat_Store_P4 : unpred_store_predicate<nxv4i1, STR_PXI>;
+ defm Pat_Store_P2 : unpred_store_predicate<nxv2i1, STR_PXI>;
+
+ multiclass unpred_load_predicate<ValueType Ty, Instruction Load> {
+ def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm9:$offset))),
+ (Load GPR64sp:$base, simm9:$offset)>;
+
+ def _default : Pat<(Ty (load GPR64:$base)),
+ (Load GPR64:$base, (i64 0))>;
+ }
+
+ defm Pat_Load_P16 : unpred_load_predicate<nxv16i1, LDR_PXI>;
+ defm Pat_Load_P8 : unpred_load_predicate<nxv8i1, LDR_PXI>;
+ defm Pat_Load_P4 : unpred_load_predicate<nxv4i1, LDR_PXI>;
+ defm Pat_Load_P2 : unpred_load_predicate<nxv2i1, LDR_PXI>;
+
+ multiclass ld1<Instruction RegRegInst, Instruction RegImmInst, ValueType Ty,
+ SDPatternOperator Load, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
+ // reg + reg
+ let AddedComplexity = 1 in {
+ def : Pat<(Ty (Load (PredTy PPR:$gp), (AddrCP GPR64:$base, GPR64:$offset), MemVT)),
+ (RegRegInst PPR:$gp, GPR64sp:$base, GPR64:$offset)>;
+ }
+
+ // scalar + immediate (mul vl)
+ let AddedComplexity = 2 in {
+ def : Pat<(Ty (Load (PredTy PPR:$gp), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), MemVT)),
+ (RegImmInst PPR:$gp, GPR64sp:$base, simm4s1:$offset)>;
+ }
+
+ // base
+ def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)),
+ (RegImmInst PPR:$gp, GPR64sp:$base, (i64 0))>;
+ }
+
+ // 2-element contiguous loads
+ defm : ld1<LD1B_D, LD1B_D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1SB_D, LD1SB_D_IMM, nxv2i64, AArch64ld1s_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1H_D, LD1H_D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1SH_D, LD1SH_D_IMM, nxv2i64, AArch64ld1s_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1W_D, LD1W_D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
+ defm : ld1<LD1SW_D, LD1SW_D_IMM, nxv2i64, AArch64ld1s_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
+ defm : ld1<LD1D, LD1D_IMM, nxv2i64, AArch64ld1_z, nxv2i1, nxv2i64, am_sve_regreg_lsl3>;
+ defm : ld1<LD1D, LD1D_IMM, nxv2f64, AArch64ld1_z, nxv2i1, nxv2f64, am_sve_regreg_lsl3>;
+
+ // 4-element contiguous loads
+ defm : ld1<LD1B_S, LD1B_S_IMM, nxv4i32, AArch64ld1_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1SB_S, LD1SB_S_IMM, nxv4i32, AArch64ld1s_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1H_S, LD1H_S_IMM, nxv4i32, AArch64ld1_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1SH_S, LD1SH_S_IMM, nxv4i32, AArch64ld1s_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1W, LD1W_IMM, nxv4i32, AArch64ld1_z, nxv4i1, nxv4i32, am_sve_regreg_lsl2>;
+ defm : ld1<LD1W, LD1W_IMM, nxv4f32, AArch64ld1_z, nxv4i1, nxv4f32, am_sve_regreg_lsl2>;
+
+ // 8-element contiguous loads
+ defm : ld1<LD1B_H, LD1B_H_IMM, nxv8i16, AArch64ld1_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1SB_H, LD1SB_H_IMM, nxv8i16, AArch64ld1s_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
+ defm : ld1<LD1H, LD1H_IMM, nxv8i16, AArch64ld1_z, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1H, LD1H_IMM, nxv8f16, AArch64ld1_z, nxv8i1, nxv8f16, am_sve_regreg_lsl1>;
+
+ let Predicates = [HasBF16, HasSVE] in {
+ defm : ld1<LD1H, LD1H_IMM, nxv8bf16, AArch64ld1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
+ }
+
+ // 16-element contiguous loads
+ defm : ld1<LD1B, LD1B_IMM, nxv16i8, AArch64ld1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
+
+ multiclass ldnf1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT> {
+ // scalar + immediate (mul vl)
+ let AddedComplexity = 1 in {
+ def : Pat<(Ty (Load (PredTy PPR:$gp), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), MemVT)),
+ (I PPR:$gp, GPR64sp:$base, simm4s1:$offset)>;
+ }
+
+ // base
+ def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)),
+ (I PPR:$gp, GPR64sp:$base, (i64 0))>;
+ }
+
+ // 2-element contiguous non-faulting loads
+ defm : ldnf1<LDNF1B_D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i8>;
+ defm : ldnf1<LDNF1SB_D_IMM, nxv2i64, AArch64ldnf1s_z, nxv2i1, nxv2i8>;
+ defm : ldnf1<LDNF1H_D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i16>;
+ defm : ldnf1<LDNF1SH_D_IMM, nxv2i64, AArch64ldnf1s_z, nxv2i1, nxv2i16>;
+ defm : ldnf1<LDNF1W_D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i32>;
+ defm : ldnf1<LDNF1SW_D_IMM, nxv2i64, AArch64ldnf1s_z, nxv2i1, nxv2i32>;
+ defm : ldnf1<LDNF1D_IMM, nxv2i64, AArch64ldnf1_z, nxv2i1, nxv2i64>;
+ defm : ldnf1<LDNF1D_IMM, nxv2f64, AArch64ldnf1_z, nxv2i1, nxv2f64>;
+
+ // 4-element contiguous non-faulting loads
+ defm : ldnf1<LDNF1B_S_IMM, nxv4i32, AArch64ldnf1_z, nxv4i1, nxv4i8>;
+ defm : ldnf1<LDNF1SB_S_IMM, nxv4i32, AArch64ldnf1s_z, nxv4i1, nxv4i8>;
+ defm : ldnf1<LDNF1H_S_IMM, nxv4i32, AArch64ldnf1_z, nxv4i1, nxv4i16>;
+ defm : ldnf1<LDNF1SH_S_IMM, nxv4i32, AArch64ldnf1s_z, nxv4i1, nxv4i16>;
+ defm : ldnf1<LDNF1W_IMM, nxv4i32, AArch64ldnf1_z, nxv4i1, nxv4i32>;
+ defm : ldnf1<LDNF1W_IMM, nxv4f32, AArch64ldnf1_z, nxv4i1, nxv4f32>;
+
+ // 8-element contiguous non-faulting loads
+ defm : ldnf1<LDNF1B_H_IMM, nxv8i16, AArch64ldnf1_z, nxv8i1, nxv8i8>;
+ defm : ldnf1<LDNF1SB_H_IMM, nxv8i16, AArch64ldnf1s_z, nxv8i1, nxv8i8>;
+ defm : ldnf1<LDNF1H_IMM, nxv8i16, AArch64ldnf1_z, nxv8i1, nxv8i16>;
+ defm : ldnf1<LDNF1H_IMM, nxv8f16, AArch64ldnf1_z, nxv8i1, nxv8f16>;
+
+ let Predicates = [HasBF16, HasSVE] in {
+ defm : ldnf1<LDNF1H_IMM, nxv8bf16, AArch64ldnf1_z, nxv8i1, nxv8bf16>;
+ }
+
+ // 16-element contiguous non-faulting loads
+ defm : ldnf1<LDNF1B_IMM, nxv16i8, AArch64ldnf1_z, nxv16i1, nxv16i8>;
- defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRI>;
- defm : pred_load<nxv8i16, nxv8i1, non_temporal_load, LDNT1H_ZRI>;
- defm : pred_load<nxv4i32, nxv4i1, non_temporal_load, LDNT1W_ZRI>;
- defm : pred_load<nxv2i64, nxv2i1, non_temporal_load, LDNT1D_ZRI>;
+ multiclass ldff1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
+ // reg + reg
+ let AddedComplexity = 1 in {
+ def : Pat<(Ty (Load (PredTy PPR:$gp), (AddrCP GPR64:$base, GPR64:$offset), MemVT)),
+ (I PPR:$gp, GPR64sp:$base, GPR64:$offset)>;
+ }
- defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRI>;
- defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRI>;
- defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRI>;
- defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRI>;
+ // Base
+ def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)),
+ (I PPR:$gp, GPR64sp:$base, XZR)>;
+ }
+
+ // 2-element contiguous first faulting loads
+ defm : ldff1<LDFF1B_D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1SB_D, nxv2i64, AArch64ldff1s_z, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1H_D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1SH_D, nxv2i64, AArch64ldff1s_z, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1W_D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
+ defm : ldff1<LDFF1SW_D, nxv2i64, AArch64ldff1s_z, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
+ defm : ldff1<LDFF1D, nxv2i64, AArch64ldff1_z, nxv2i1, nxv2i64, am_sve_regreg_lsl3>;
+ defm : ldff1<LDFF1W_D, nxv2f32, AArch64ldff1_z, nxv2i1, nxv2f32, am_sve_regreg_lsl2>;
+ defm : ldff1<LDFF1D, nxv2f64, AArch64ldff1_z, nxv2i1, nxv2f64, am_sve_regreg_lsl3>;
+
+ // 4-element contiguous first faulting loads
+ defm : ldff1<LDFF1B_S, nxv4i32, AArch64ldff1_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1SB_S, nxv4i32, AArch64ldff1s_z, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1H_S, nxv4i32, AArch64ldff1_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1SH_S, nxv4i32, AArch64ldff1s_z, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1W, nxv4i32, AArch64ldff1_z, nxv4i1, nxv4i32, am_sve_regreg_lsl2>;
+ defm : ldff1<LDFF1W, nxv4f32, AArch64ldff1_z, nxv4i1, nxv4f32, am_sve_regreg_lsl2>;
+
+ // 8-element contiguous first faulting loads
+ defm : ldff1<LDFF1B_H, nxv8i16, AArch64ldff1_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1SB_H, nxv8i16, AArch64ldff1s_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
+ defm : ldff1<LDFF1H, nxv8i16, AArch64ldff1_z, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1H, nxv8f16, AArch64ldff1_z, nxv8i1, nxv8f16, am_sve_regreg_lsl1>;
+
+ let Predicates = [HasBF16, HasSVE] in {
+ defm : ldff1<LDFF1H, nxv8bf16, AArch64ldff1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
+ }
+
+ // 16-element contiguous first faulting loads
+ defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
+
+ multiclass st1<Instruction RegRegInst, Instruction RegImmInst, ValueType Ty,
+ SDPatternOperator Store, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
+ // reg + reg
+ let AddedComplexity = 1 in {
+ def : Pat<(Store (Ty ZPR:$vec), (AddrCP GPR64:$base, GPR64:$offset), (PredTy PPR:$gp), MemVT),
+ (RegRegInst ZPR:$vec, PPR:$gp, GPR64sp:$base, GPR64:$offset)>;
+ }
+
+ // scalar + immediate (mul vl)
+ let AddedComplexity = 2 in {
+ def : Pat<(Store (Ty ZPR:$vec), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), (PredTy PPR:$gp), MemVT),
+ (RegImmInst ZPR:$vec, PPR:$gp, GPR64sp:$base, simm4s1:$offset)>;
+ }
+
+ // base
+ def : Pat<(Store (Ty ZPR:$vec), GPR64:$base, (PredTy PPR:$gp), MemVT),
+ (RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, (i64 0))>;
+ }
+
+ // 2-element contiguous store
+ defm : st1<ST1B_D, ST1B_D_IMM, nxv2i64, AArch64st1, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
+ defm : st1<ST1H_D, ST1H_D_IMM, nxv2i64, AArch64st1, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
+ defm : st1<ST1W_D, ST1W_D_IMM, nxv2i64, AArch64st1, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
+ defm : st1<ST1D, ST1D_IMM, nxv2i64, AArch64st1, nxv2i1, nxv2i64, am_sve_regreg_lsl3>;
+
+ // 4-element contiguous store
+ defm : st1<ST1B_S, ST1B_S_IMM, nxv4i32, AArch64st1, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
+ defm : st1<ST1H_S, ST1H_S_IMM, nxv4i32, AArch64st1, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
+ defm : st1<ST1W, ST1W_IMM, nxv4i32, AArch64st1, nxv4i1, nxv4i32, am_sve_regreg_lsl2>;
+
+ // 8-element contiguous store
+ defm : st1<ST1B_H, ST1B_H_IMM, nxv8i16, AArch64st1, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
+ defm : st1<ST1H, ST1H_IMM, nxv8i16, AArch64st1, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
+
+ // 16-element contiguous store
+ defm : st1<ST1B, ST1B_IMM, nxv16i8, AArch64st1, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
+
+ def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i32 FPR32:$src), 0)),
+ (INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+ def : Pat<(nxv8i16 (vector_insert (nxv8i16 (undef)), (i32 FPR32:$src), 0)),
+ (INSERT_SUBREG (nxv8i16 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+ def : Pat<(nxv4i32 (vector_insert (nxv4i32 (undef)), (i32 FPR32:$src), 0)),
+ (INSERT_SUBREG (nxv4i32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+ def : Pat<(nxv2i64 (vector_insert (nxv2i64 (undef)), (i64 FPR64:$src), 0)),
+ (INSERT_SUBREG (nxv2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+
+ // Insert scalar into vector[0]
+ def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), (i32 GPR32:$src), 0)),
+ (CPY_ZPmR_B ZPR:$vec, (PTRUE_B 1), GPR32:$src)>;
+ def : Pat<(nxv8i16 (vector_insert (nxv8i16 ZPR:$vec), (i32 GPR32:$src), 0)),
+ (CPY_ZPmR_H ZPR:$vec, (PTRUE_H 1), GPR32:$src)>;
+ def : Pat<(nxv4i32 (vector_insert (nxv4i32 ZPR:$vec), (i32 GPR32:$src), 0)),
+ (CPY_ZPmR_S ZPR:$vec, (PTRUE_S 1), GPR32:$src)>;
+ def : Pat<(nxv2i64 (vector_insert (nxv2i64 ZPR:$vec), (i64 GPR64:$src), 0)),
+ (CPY_ZPmR_D ZPR:$vec, (PTRUE_D 1), GPR64:$src)>;
+
+ def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), 0)),
+ (SEL_ZPZZ_H (PTRUE_H 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), ZPR:$vec)>;
+ def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), 0)),
+ (SEL_ZPZZ_S (PTRUE_S 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), ZPR:$vec)>;
+ def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), 0)),
+ (SEL_ZPZZ_D (PTRUE_D 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), ZPR:$vec)>;
+
+ // Insert scalar into vector with scalar index
+ def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), GPR32:$src, GPR64:$index)),
+ (CPY_ZPmR_B ZPR:$vec,
+ (CMPEQ_PPzZZ_B (PTRUE_B 31),
+ (INDEX_II_B 0, 1),
+ (DUP_ZR_B (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ GPR32:$src)>;
+ def : Pat<(nxv8i16 (vector_insert (nxv8i16 ZPR:$vec), GPR32:$src, GPR64:$index)),
+ (CPY_ZPmR_H ZPR:$vec,
+ (CMPEQ_PPzZZ_H (PTRUE_H 31),
+ (INDEX_II_H 0, 1),
+ (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ GPR32:$src)>;
+ def : Pat<(nxv4i32 (vector_insert (nxv4i32 ZPR:$vec), GPR32:$src, GPR64:$index)),
+ (CPY_ZPmR_S ZPR:$vec,
+ (CMPEQ_PPzZZ_S (PTRUE_S 31),
+ (INDEX_II_S 0, 1),
+ (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ GPR32:$src)>;
+ def : Pat<(nxv2i64 (vector_insert (nxv2i64 ZPR:$vec), GPR64:$src, GPR64:$index)),
+ (CPY_ZPmR_D ZPR:$vec,
+ (CMPEQ_PPzZZ_D (PTRUE_D 31),
+ (INDEX_II_D 0, 1),
+ (DUP_ZR_D GPR64:$index)),
+ GPR64:$src)>;
+
+ // Insert FP scalar into vector with scalar index
+ def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)),
+ (CPY_ZPmV_H ZPR:$vec,
+ (CMPEQ_PPzZZ_H (PTRUE_H 31),
+ (INDEX_II_H 0, 1),
+ (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ $src)>;
+ def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)),
+ (CPY_ZPmV_S ZPR:$vec,
+ (CMPEQ_PPzZZ_S (PTRUE_S 31),
+ (INDEX_II_S 0, 1),
+ (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ $src)>;
+ def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), GPR64:$index)),
+ (CPY_ZPmV_D ZPR:$vec,
+ (CMPEQ_PPzZZ_D (PTRUE_D 31),
+ (INDEX_II_D 0, 1),
+ (DUP_ZR_D $index)),
+ $src)>;
+
+ // Extract element from vector with immediate index
+ def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>;
+ def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), ssub)>;
+ def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>;
+ def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
+ def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
+ def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>;
+ def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
+
+ // Extract element from vector with scalar index
+ def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index),
+ ZPR:$vec)>;
+ def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
+ ZPR:$vec)>;
+ def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
+ ZPR:$vec)>;
+ def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
+ ZPR:$vec)>;
+
+ def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
+ ZPR:$vec)>;
+ def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
+ ZPR:$vec)>;
+ def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
+ ZPR:$vec)>;
+}
+
+let Predicates = [HasSVE, HasMatMulInt8] in {
+ defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>;
+ defm UMMLA_ZZZ : sve_int_matmul<0b11, "ummla", int_aarch64_sve_ummla>;
+ defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>;
+ defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>;
+ defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>;
+ defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>;
+}
+
+let Predicates = [HasSVE, HasMatMulFP32] in {
+ defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32, int_aarch64_sve_fmmla, nxv4f32>;
+}
+
+let Predicates = [HasSVE, HasMatMulFP64] in {
+ defm FMMLA_ZZZ_D : sve_fp_matrix_mla<1, "fmmla", ZPR64, int_aarch64_sve_fmmla, nxv2f64>;
+ defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro_z>;
+ defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro_z>;
+ defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro_z>;
+ defm LD1RO_D_IMM : sve_mem_ldor_si<0b11, "ld1rod", Z_d, ZPR64, nxv2i64, nxv2i1, AArch64ld1ro_z>;
+ defm LD1RO_B : sve_mem_ldor_ss<0b00, "ld1rob", Z_b, ZPR8, GPR64NoXZRshifted8, nxv16i8, nxv16i1, AArch64ld1ro_z, am_sve_regreg_lsl0>;
+ defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16, nxv8i16, nxv8i1, AArch64ld1ro_z, am_sve_regreg_lsl1>;
+ defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32, nxv4i32, nxv4i1, AArch64ld1ro_z, am_sve_regreg_lsl2>;
+ defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro_z, am_sve_regreg_lsl3>;
+ defm ZIP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1", int_aarch64_sve_zip1q>;
+ defm ZIP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2", int_aarch64_sve_zip2q>;
+ defm UZP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1", int_aarch64_sve_uzp1q>;
+ defm UZP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 1, "uzp2", int_aarch64_sve_uzp2q>;
+ defm TRN1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 0, "trn1", int_aarch64_sve_trn1q>;
+ defm TRN2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 1, "trn2", int_aarch64_sve_trn2q>;
+}
+
+let Predicates = [HasSVE, HasMatMulFP64, HasBF16] in {
+ def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_zip1q, nxv8bf16, nxv8bf16, ZIP1_ZZZ_Q>;
+ def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_zip2q, nxv8bf16, nxv8bf16, ZIP2_ZZZ_Q>;
+ def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_uzp1q, nxv8bf16, nxv8bf16, UZP1_ZZZ_Q>;
+ def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_uzp2q, nxv8bf16, nxv8bf16, UZP2_ZZZ_Q>;
+ def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_trn1q, nxv8bf16, nxv8bf16, TRN1_ZZZ_Q>;
+ def : SVE_2_Op_Pat<nxv8bf16, int_aarch64_sve_trn2q, nxv8bf16, nxv8bf16, TRN2_ZZZ_Q>;
}
let Predicates = [HasSVE2] in {
// SVE2 integer multiply-add (indexed)
- defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla">;
- defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls">;
+ defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", int_aarch64_sve_mla_lane>;
+ defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls", int_aarch64_sve_mls_lane>;
// SVE2 saturating multiply-add high (indexed)
- defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah">;
- defm SQRDMLSH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b1, "sqrdmlsh">;
+ defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah", int_aarch64_sve_sqrdmlah_lane>;
+ defm SQRDMLSH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b1, "sqrdmlsh", int_aarch64_sve_sqrdmlsh_lane>;
// SVE2 saturating multiply-add high (vectors, unpredicated)
- defm SQRDMLAH_ZZZ : sve2_int_mla<0b0, "sqrdmlah">;
- defm SQRDMLSH_ZZZ : sve2_int_mla<0b1, "sqrdmlsh">;
+ defm SQRDMLAH_ZZZ : sve2_int_mla<0b0, "sqrdmlah", int_aarch64_sve_sqrdmlah>;
+ defm SQRDMLSH_ZZZ : sve2_int_mla<0b1, "sqrdmlsh", int_aarch64_sve_sqrdmlsh>;
// SVE2 integer multiply (indexed)
- defm MUL_ZZZI : sve2_int_mul_by_indexed_elem<0b1110, "mul">;
+ defm MUL_ZZZI : sve2_int_mul_by_indexed_elem<0b1110, "mul", int_aarch64_sve_mul_lane>;
// SVE2 saturating multiply high (indexed)
- defm SQDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1100, "sqdmulh">;
- defm SQRDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1101, "sqrdmulh">;
+ defm SQDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1100, "sqdmulh", int_aarch64_sve_sqdmulh_lane>;
+ defm SQRDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1101, "sqrdmulh", int_aarch64_sve_sqrdmulh_lane>;
// SVE2 signed saturating doubling multiply high (unpredicated)
- defm SQDMULH_ZZZ : sve2_int_mul<0b100, "sqdmulh">;
- defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh">;
+ defm SQDMULH_ZZZ : sve2_int_mul<0b100, "sqdmulh", int_aarch64_sve_sqdmulh>;
+ defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh", int_aarch64_sve_sqrdmulh>;
// SVE2 integer multiply vectors (unpredicated)
- defm MUL_ZZZ : sve2_int_mul<0b000, "mul">;
- defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh">;
- defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh">;
- def PMUL_ZZZ_B : sve2_int_mul<0b00, 0b001, "pmul", ZPR8>;
-
+ defm MUL_ZZZ : sve2_int_mul<0b000, "mul", mul>;
+ defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag>;
+ defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag>;
+ defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
+
+ // Add patterns for unpredicated version of smulh and umulh.
+ def : Pat<(nxv16i8 (int_aarch64_sve_smulh (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op1, nxv16i8:$Op2)),
+ (SMULH_ZZZ_B $Op1, $Op2)>;
+ def : Pat<(nxv8i16 (int_aarch64_sve_smulh (nxv8i1 (AArch64ptrue 31)), nxv8i16:$Op1, nxv8i16:$Op2)),
+ (SMULH_ZZZ_H $Op1, $Op2)>;
+ def : Pat<(nxv4i32 (int_aarch64_sve_smulh (nxv4i1 (AArch64ptrue 31)), nxv4i32:$Op1, nxv4i32:$Op2)),
+ (SMULH_ZZZ_S $Op1, $Op2)>;
+ def : Pat<(nxv2i64 (int_aarch64_sve_smulh (nxv2i1 (AArch64ptrue 31)), nxv2i64:$Op1, nxv2i64:$Op2)),
+ (SMULH_ZZZ_D $Op1, $Op2)>;
+ def : Pat<(nxv16i8 (int_aarch64_sve_umulh (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op1, nxv16i8:$Op2)),
+ (UMULH_ZZZ_B $Op1, $Op2)>;
+ def : Pat<(nxv8i16 (int_aarch64_sve_umulh (nxv8i1 (AArch64ptrue 31)), nxv8i16:$Op1, nxv8i16:$Op2)),
+ (UMULH_ZZZ_H $Op1, $Op2)>;
+ def : Pat<(nxv4i32 (int_aarch64_sve_umulh (nxv4i1 (AArch64ptrue 31)), nxv4i32:$Op1, nxv4i32:$Op2)),
+ (UMULH_ZZZ_S $Op1, $Op2)>;
+ def : Pat<(nxv2i64 (int_aarch64_sve_umulh (nxv2i1 (AArch64ptrue 31)), nxv2i64:$Op1, nxv2i64:$Op2)),
+ (UMULH_ZZZ_D $Op1, $Op2)>;
// SVE2 complex integer dot product (indexed)
- defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot">;
+ defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot", int_aarch64_sve_cdot_lane>;
// SVE2 complex integer dot product
- defm CDOT_ZZZ : sve2_cintx_dot<"cdot">;
+ defm CDOT_ZZZ : sve2_cintx_dot<"cdot", int_aarch64_sve_cdot>;
// SVE2 complex integer multiply-add (indexed)
- defm CMLA_ZZZI : sve2_cmla_by_indexed_elem<0b0, "cmla">;
+ defm CMLA_ZZZI : sve2_cmla_by_indexed_elem<0b0, "cmla", int_aarch64_sve_cmla_lane_x>;
// SVE2 complex saturating multiply-add (indexed)
- defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah">;
+ defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_lane_x>;
// SVE2 complex integer multiply-add
- defm CMLA_ZZZ : sve2_int_cmla<0b0, "cmla">;
- defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah">;
+ defm CMLA_ZZZ : sve2_int_cmla<0b0, "cmla", int_aarch64_sve_cmla_x>;
+ defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah", int_aarch64_sve_sqrdcmlah_x>;
// SVE2 integer multiply long (indexed)
- defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb">;
- defm SMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b001, "smullt">;
- defm UMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b010, "umullb">;
- defm UMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b011, "umullt">;
+ defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb", int_aarch64_sve_smullb_lane>;
+ defm SMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b001, "smullt", int_aarch64_sve_smullt_lane>;
+ defm UMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b010, "umullb", int_aarch64_sve_umullb_lane>;
+ defm UMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b011, "umullt", int_aarch64_sve_umullt_lane>;
// SVE2 saturating multiply (indexed)
- defm SQDMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b100, "sqdmullb">;
- defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt">;
+ defm SQDMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b100, "sqdmullb", int_aarch64_sve_sqdmullb_lane>;
+ defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt", int_aarch64_sve_sqdmullt_lane>;
// SVE2 integer multiply-add long (indexed)
- defm SMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1000, "smlalb">;
- defm SMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1001, "smlalt">;
- defm UMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1010, "umlalb">;
- defm UMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1011, "umlalt">;
- defm SMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1100, "smlslb">;
- defm SMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1101, "smlslt">;
- defm UMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1110, "umlslb">;
- defm UMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1111, "umlslt">;
+ defm SMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1000, "smlalb", int_aarch64_sve_smlalb_lane>;
+ defm SMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1001, "smlalt", int_aarch64_sve_smlalt_lane>;
+ defm UMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1010, "umlalb", int_aarch64_sve_umlalb_lane>;
+ defm UMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1011, "umlalt", int_aarch64_sve_umlalt_lane>;
+ defm SMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1100, "smlslb", int_aarch64_sve_smlslb_lane>;
+ defm SMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1101, "smlslt", int_aarch64_sve_smlslt_lane>;
+ defm UMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1110, "umlslb", int_aarch64_sve_umlslb_lane>;
+ defm UMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1111, "umlslt", int_aarch64_sve_umlslt_lane>;
// SVE2 integer multiply-add long (vectors, unpredicated)
- defm SMLALB_ZZZ : sve2_int_mla_long<0b10000, "smlalb">;
- defm SMLALT_ZZZ : sve2_int_mla_long<0b10001, "smlalt">;
- defm UMLALB_ZZZ : sve2_int_mla_long<0b10010, "umlalb">;
- defm UMLALT_ZZZ : sve2_int_mla_long<0b10011, "umlalt">;
- defm SMLSLB_ZZZ : sve2_int_mla_long<0b10100, "smlslb">;
- defm SMLSLT_ZZZ : sve2_int_mla_long<0b10101, "smlslt">;
- defm UMLSLB_ZZZ : sve2_int_mla_long<0b10110, "umlslb">;
- defm UMLSLT_ZZZ : sve2_int_mla_long<0b10111, "umlslt">;
+ defm SMLALB_ZZZ : sve2_int_mla_long<0b10000, "smlalb", int_aarch64_sve_smlalb>;
+ defm SMLALT_ZZZ : sve2_int_mla_long<0b10001, "smlalt", int_aarch64_sve_smlalt>;
+ defm UMLALB_ZZZ : sve2_int_mla_long<0b10010, "umlalb", int_aarch64_sve_umlalb>;
+ defm UMLALT_ZZZ : sve2_int_mla_long<0b10011, "umlalt", int_aarch64_sve_umlalt>;
+ defm SMLSLB_ZZZ : sve2_int_mla_long<0b10100, "smlslb", int_aarch64_sve_smlslb>;
+ defm SMLSLT_ZZZ : sve2_int_mla_long<0b10101, "smlslt", int_aarch64_sve_smlslt>;
+ defm UMLSLB_ZZZ : sve2_int_mla_long<0b10110, "umlslb", int_aarch64_sve_umlslb>;
+ defm UMLSLT_ZZZ : sve2_int_mla_long<0b10111, "umlslt", int_aarch64_sve_umlslt>;
// SVE2 saturating multiply-add long (indexed)
- defm SQDMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0100, "sqdmlalb">;
- defm SQDMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0101, "sqdmlalt">;
- defm SQDMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0110, "sqdmlslb">;
- defm SQDMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0111, "sqdmlslt">;
+ defm SQDMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0100, "sqdmlalb", int_aarch64_sve_sqdmlalb_lane>;
+ defm SQDMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0101, "sqdmlalt", int_aarch64_sve_sqdmlalt_lane>;
+ defm SQDMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0110, "sqdmlslb", int_aarch64_sve_sqdmlslb_lane>;
+ defm SQDMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0111, "sqdmlslt", int_aarch64_sve_sqdmlslt_lane>;
// SVE2 saturating multiply-add long (vectors, unpredicated)
- defm SQDMLALB_ZZZ : sve2_int_mla_long<0b11000, "sqdmlalb">;
- defm SQDMLALT_ZZZ : sve2_int_mla_long<0b11001, "sqdmlalt">;
- defm SQDMLSLB_ZZZ : sve2_int_mla_long<0b11010, "sqdmlslb">;
- defm SQDMLSLT_ZZZ : sve2_int_mla_long<0b11011, "sqdmlslt">;
+ defm SQDMLALB_ZZZ : sve2_int_mla_long<0b11000, "sqdmlalb", int_aarch64_sve_sqdmlalb>;
+ defm SQDMLALT_ZZZ : sve2_int_mla_long<0b11001, "sqdmlalt", int_aarch64_sve_sqdmlalt>;
+ defm SQDMLSLB_ZZZ : sve2_int_mla_long<0b11010, "sqdmlslb", int_aarch64_sve_sqdmlslb>;
+ defm SQDMLSLT_ZZZ : sve2_int_mla_long<0b11011, "sqdmlslt", int_aarch64_sve_sqdmlslt>;
// SVE2 saturating multiply-add interleaved long
- defm SQDMLALBT_ZZZ : sve2_int_mla_long<0b00010, "sqdmlalbt">;
- defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt">;
+ defm SQDMLALBT_ZZZ : sve2_int_mla_long<0b00010, "sqdmlalbt", int_aarch64_sve_sqdmlalbt>;
+ defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt", int_aarch64_sve_sqdmlslbt>;
// SVE2 integer halving add/subtract (predicated)
- defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd">;
- defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd">;
- defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub">;
- defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub">;
- defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd">;
- defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd">;
- defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr">;
- defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr">;
+ defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", int_aarch64_sve_shadd>;
+ defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", int_aarch64_sve_uhadd>;
+ defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", int_aarch64_sve_shsub>;
+ defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", int_aarch64_sve_uhsub>;
+ defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", int_aarch64_sve_srhadd>;
+ defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", int_aarch64_sve_urhadd>;
+ defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", int_aarch64_sve_shsubr>;
+ defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", int_aarch64_sve_uhsubr>;
// SVE2 integer pairwise add and accumulate long
- defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp">;
- defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp">;
+ defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp", int_aarch64_sve_sadalp>;
+ defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp", int_aarch64_sve_uadalp>;
// SVE2 integer pairwise arithmetic
- defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp">;
- defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp">;
- defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp">;
- defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp">;
- defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp">;
+ defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp", int_aarch64_sve_addp>;
+ defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp", int_aarch64_sve_smaxp>;
+ defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp", int_aarch64_sve_umaxp>;
+ defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp", int_aarch64_sve_sminp>;
+ defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp", int_aarch64_sve_uminp>;
// SVE2 integer unary operations (predicated)
- defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe">;
- defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b001, "ursqrte">;
- defm SQABS_ZPmZ : sve2_int_un_pred_arit<0b100, "sqabs">;
- defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg">;
+ defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe", int_aarch64_sve_urecpe>;
+ defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b001, "ursqrte", int_aarch64_sve_ursqrte>;
+ defm SQABS_ZPmZ : sve2_int_un_pred_arit<0b100, "sqabs", int_aarch64_sve_sqabs>;
+ defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg", int_aarch64_sve_sqneg>;
// SVE2 saturating add/subtract
- defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd">;
- defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd">;
- defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub">;
- defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub">;
- defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd">;
- defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd">;
- defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr">;
- defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr">;
+ defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", int_aarch64_sve_sqadd>;
+ defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", int_aarch64_sve_uqadd>;
+ defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", int_aarch64_sve_sqsub>;
+ defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", int_aarch64_sve_uqsub>;
+ defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", int_aarch64_sve_suqadd>;
+ defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", int_aarch64_sve_usqadd>;
+ defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", int_aarch64_sve_sqsubr>;
+ defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", int_aarch64_sve_uqsubr>;
// SVE2 saturating/rounding bitwise shift left (predicated)
- defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl">;
- defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl">;
- defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr">;
- defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr">;
- defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl">;
- defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl">;
- defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl">;
- defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl">;
- defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr">;
- defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr">;
- defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
- defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
+ defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl>;
+ defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl>;
+ defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag>;
+ defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag>;
+ defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl>;
+ defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl>;
+ defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl>;
+ defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl>;
+ defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag>;
+ defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag>;
+ defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag>;
+ defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>;
+
+ let Predicates = [HasSVE2, UseExperimentalZeroingPseudos] in {
+ defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
+ defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
+ defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_srshr>;
+ defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_urshr>;
+ defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<int_aarch64_sve_sqshlu>;
+ }
// SVE2 predicated shifts
- defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
- defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
- defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
- defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
- defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
+ defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">;
+ defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">;
+ defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>;
+ defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>;
+ defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>;
// SVE2 integer add/subtract long
- defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">;
- defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">;
- defm UADDLB_ZZZ : sve2_wide_int_arith_long<0b00010, "uaddlb">;
- defm UADDLT_ZZZ : sve2_wide_int_arith_long<0b00011, "uaddlt">;
- defm SSUBLB_ZZZ : sve2_wide_int_arith_long<0b00100, "ssublb">;
- defm SSUBLT_ZZZ : sve2_wide_int_arith_long<0b00101, "ssublt">;
- defm USUBLB_ZZZ : sve2_wide_int_arith_long<0b00110, "usublb">;
- defm USUBLT_ZZZ : sve2_wide_int_arith_long<0b00111, "usublt">;
- defm SABDLB_ZZZ : sve2_wide_int_arith_long<0b01100, "sabdlb">;
- defm SABDLT_ZZZ : sve2_wide_int_arith_long<0b01101, "sabdlt">;
- defm UABDLB_ZZZ : sve2_wide_int_arith_long<0b01110, "uabdlb">;
- defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt">;
+ defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>;
+ defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt", int_aarch64_sve_saddlt>;
+ defm UADDLB_ZZZ : sve2_wide_int_arith_long<0b00010, "uaddlb", int_aarch64_sve_uaddlb>;
+ defm UADDLT_ZZZ : sve2_wide_int_arith_long<0b00011, "uaddlt", int_aarch64_sve_uaddlt>;
+ defm SSUBLB_ZZZ : sve2_wide_int_arith_long<0b00100, "ssublb", int_aarch64_sve_ssublb>;
+ defm SSUBLT_ZZZ : sve2_wide_int_arith_long<0b00101, "ssublt", int_aarch64_sve_ssublt>;
+ defm USUBLB_ZZZ : sve2_wide_int_arith_long<0b00110, "usublb", int_aarch64_sve_usublb>;
+ defm USUBLT_ZZZ : sve2_wide_int_arith_long<0b00111, "usublt", int_aarch64_sve_usublt>;
+ defm SABDLB_ZZZ : sve2_wide_int_arith_long<0b01100, "sabdlb", int_aarch64_sve_sabdlb>;
+ defm SABDLT_ZZZ : sve2_wide_int_arith_long<0b01101, "sabdlt", int_aarch64_sve_sabdlt>;
+ defm UABDLB_ZZZ : sve2_wide_int_arith_long<0b01110, "uabdlb", int_aarch64_sve_uabdlb>;
+ defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt", int_aarch64_sve_uabdlt>;
// SVE2 integer add/subtract wide
- defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb">;
- defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt">;
- defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb">;
- defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt">;
- defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb">;
- defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt">;
- defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb">;
- defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt">;
+ defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb", int_aarch64_sve_saddwb>;
+ defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt", int_aarch64_sve_saddwt>;
+ defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb", int_aarch64_sve_uaddwb>;
+ defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt", int_aarch64_sve_uaddwt>;
+ defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb", int_aarch64_sve_ssubwb>;
+ defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt", int_aarch64_sve_ssubwt>;
+ defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb", int_aarch64_sve_usubwb>;
+ defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt", int_aarch64_sve_usubwt>;
// SVE2 integer multiply long
- defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb">;
- defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt">;
- defm SMULLB_ZZZ : sve2_wide_int_arith_long<0b11100, "smullb">;
- defm SMULLT_ZZZ : sve2_wide_int_arith_long<0b11101, "smullt">;
- defm UMULLB_ZZZ : sve2_wide_int_arith_long<0b11110, "umullb">;
- defm UMULLT_ZZZ : sve2_wide_int_arith_long<0b11111, "umullt">;
- defm PMULLB_ZZZ : sve2_pmul_long<0b0, "pmullb">;
- defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">;
+ defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb", int_aarch64_sve_sqdmullb>;
+ defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt", int_aarch64_sve_sqdmullt>;
+ defm SMULLB_ZZZ : sve2_wide_int_arith_long<0b11100, "smullb", int_aarch64_sve_smullb>;
+ defm SMULLT_ZZZ : sve2_wide_int_arith_long<0b11101, "smullt", int_aarch64_sve_smullt>;
+ defm UMULLB_ZZZ : sve2_wide_int_arith_long<0b11110, "umullb", int_aarch64_sve_umullb>;
+ defm UMULLT_ZZZ : sve2_wide_int_arith_long<0b11111, "umullt", int_aarch64_sve_umullt>;
+ defm PMULLB_ZZZ : sve2_pmul_long<0b0, "pmullb", int_aarch64_sve_pmullb_pair>;
+ defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt", int_aarch64_sve_pmullt_pair>;
// SVE2 bitwise shift and insert
- defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">;
- defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">;
+ defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri", int_aarch64_sve_sri>;
+ defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", int_aarch64_sve_sli>;
// SVE2 bitwise shift right and accumulate
- defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">;
- defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">;
- defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">;
- defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">;
+ defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", int_aarch64_sve_ssra>;
+ defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", int_aarch64_sve_usra>;
+ defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra>;
+ defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra>;
// SVE2 complex integer add
- defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">;
- defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd">;
+ defm CADD_ZZI : sve2_int_cadd<0b0, "cadd", int_aarch64_sve_cadd_x>;
+ defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd", int_aarch64_sve_sqcadd_x>;
// SVE2 integer absolute difference and accumulate
- defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba">;
- defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba">;
+ defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", int_aarch64_sve_saba>;
+ defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", int_aarch64_sve_uaba>;
// SVE2 integer absolute difference and accumulate long
- defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb">;
- defm SABALT_ZZZ : sve2_int_absdiff_accum_long<0b01, "sabalt">;
- defm UABALB_ZZZ : sve2_int_absdiff_accum_long<0b10, "uabalb">;
- defm UABALT_ZZZ : sve2_int_absdiff_accum_long<0b11, "uabalt">;
+ defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb", int_aarch64_sve_sabalb>;
+ defm SABALT_ZZZ : sve2_int_absdiff_accum_long<0b01, "sabalt", int_aarch64_sve_sabalt>;
+ defm UABALB_ZZZ : sve2_int_absdiff_accum_long<0b10, "uabalb", int_aarch64_sve_uabalb>;
+ defm UABALT_ZZZ : sve2_int_absdiff_accum_long<0b11, "uabalt", int_aarch64_sve_uabalt>;
// SVE2 integer add/subtract long with carry
- defm ADCLB_ZZZ : sve2_int_addsub_long_carry<0b00, "adclb">;
- defm ADCLT_ZZZ : sve2_int_addsub_long_carry<0b01, "adclt">;
- defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
- defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
+ defm ADCLB_ZZZ : sve2_int_addsub_long_carry<0b00, "adclb", int_aarch64_sve_adclb>;
+ defm ADCLT_ZZZ : sve2_int_addsub_long_carry<0b01, "adclt", int_aarch64_sve_adclt>;
+ defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb", int_aarch64_sve_sbclb>;
+ defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt", int_aarch64_sve_sbclt>;
// SVE2 bitwise shift right narrow (bottom)
defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb", int_aarch64_sve_sqshrunb>;
@@ -1489,29 +2426,29 @@ let Predicates = [HasSVE2] in {
defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt", int_aarch64_sve_sqxtunt>;
// SVE2 character match
- defm MATCH_PPzZZ : sve2_char_match<0b0, "match">;
- defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch">;
+ defm MATCH_PPzZZ : sve2_char_match<0b0, "match", int_aarch64_sve_match>;
+ defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch", int_aarch64_sve_nmatch>;
// SVE2 bitwise exclusive-or interleaved
- defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt">;
- defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb">;
+ defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt", int_aarch64_sve_eorbt>;
+ defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb", int_aarch64_sve_eortb>;
// SVE2 bitwise shift left long
- defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb">;
- defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt">;
- defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb">;
- defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt">;
+ defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb", int_aarch64_sve_sshllb>;
+ defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt", int_aarch64_sve_sshllt>;
+ defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb", int_aarch64_sve_ushllb>;
+ defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt", int_aarch64_sve_ushllt>;
// SVE2 integer add/subtract interleaved long
- defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt">;
- defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt">;
- defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb">;
+ defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>;
+ defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt", int_aarch64_sve_ssublbt>;
+ defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb", int_aarch64_sve_ssubltb>;
// SVE2 histogram generation (segment)
- def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg">;
+ def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg", int_aarch64_sve_histseg>;
// SVE2 histogram generation (vector)
- defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
+ defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt", int_aarch64_sve_histcnt>;
// SVE2 floating-point base 2 logarithm as integer
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>;
@@ -1542,50 +2479,57 @@ let Predicates = [HasSVE2] in {
defm FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt", int_aarch64_sve_fmlslt>;
// SVE2 bitwise ternary operations
- defm EOR3_ZZZZ_D : sve2_int_bitwise_ternary_op<0b000, "eor3">;
- defm BCAX_ZZZZ_D : sve2_int_bitwise_ternary_op<0b010, "bcax">;
- def BSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b001, "bsl">;
- def BSL1N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b011, "bsl1n">;
- def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">;
- def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">;
+ defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", int_aarch64_sve_eor3>;
+ defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", int_aarch64_sve_bcax>;
+ defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl>;
+ defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>;
+ defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>;
+ defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl>;
// SVE2 bitwise xor and rotate right by immediate
- defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;
+ defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", int_aarch64_sve_xar>;
// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
// SVE2 non-temporal gather loads
- defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
- defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
- defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
- defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
- defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
-
- defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
- defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
- defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
- defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
- defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
- defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
- defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
+ defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv4i8>;
+ defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather_z, nxv4i8>;
+ defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00100, "ldnt1sh", AArch64ldnt1s_gather_z, nxv4i16>;
+ defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00101, "ldnt1h", AArch64ldnt1_gather_z, nxv4i16>;
+ defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b01001, "ldnt1w", AArch64ldnt1_gather_z, nxv4i32>;
+
+ defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv2i8>;
+ defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10010, "ldnt1b", AArch64ldnt1_gather_z, nxv2i8>;
+ defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10100, "ldnt1sh", AArch64ldnt1s_gather_z, nxv2i16>;
+ defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10110, "ldnt1h", AArch64ldnt1_gather_z, nxv2i16>;
+ defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather_z, nxv2i32>;
+ defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather_z, nxv2i32>;
+ defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather_z, nxv2i64>;
// SVE2 vector splice (constructive)
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
// SVE2 non-temporal scatter stores
- defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
- defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
- defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
+ defm STNT1B_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b001, "stnt1b", AArch64stnt1_scatter, nxv4i8>;
+ defm STNT1H_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b011, "stnt1h", AArch64stnt1_scatter, nxv4i16>;
+ defm STNT1W_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b101, "stnt1w", AArch64stnt1_scatter, nxv4i32>;
- defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
- defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
- defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
- defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
+ defm STNT1B_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b000, "stnt1b", AArch64stnt1_scatter, nxv2i8>;
+ defm STNT1H_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b010, "stnt1h", AArch64stnt1_scatter, nxv2i16>;
+ defm STNT1W_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b100, "stnt1w", AArch64stnt1_scatter, nxv2i32>;
+ defm STNT1D_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b110, "stnt1d", AArch64stnt1_scatter, nxv2i64>;
// SVE2 table lookup (three sources)
- defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
- defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">;
+ defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>;
+ defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", int_aarch64_sve_tbx>;
+
+ let Predicates = [HasSVE, HasBF16] in {
+ def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_tbx, nxv8bf16, nxv8bf16, nxv8i16, TBX_ZZZ_H>;
+ def : Pat<(nxv8bf16 (int_aarch64_sve_tbl2 nxv8bf16:$Op1, nxv8bf16:$Op2, nxv8i16:$Op3)),
+ (nxv8bf16 (TBL_ZZZZ_H (REG_SEQUENCE ZPR2, nxv8bf16:$Op1, zsub0, nxv8bf16:$Op2, zsub1),
+ nxv8i16:$Op3))>;
+ }
// SVE2 integer compare scalar count and limit
defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>;
@@ -1599,43 +2543,41 @@ let Predicates = [HasSVE2] in {
defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi>;
// SVE2 pointer conflict compare
- defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;
- defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">;
+ defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">;
+ defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw", "int_aarch64_sve_whilerw">;
}
let Predicates = [HasSVE2AES] in {
// SVE2 crypto destructive binary operations
- def AESE_ZZZ_B : sve2_crypto_des_bin_op<0b00, "aese", ZPR8>;
- def AESD_ZZZ_B : sve2_crypto_des_bin_op<0b01, "aesd", ZPR8>;
+ defm AESE_ZZZ_B : sve2_crypto_des_bin_op<0b00, "aese", ZPR8, int_aarch64_sve_aese, nxv16i8>;
+ defm AESD_ZZZ_B : sve2_crypto_des_bin_op<0b01, "aesd", ZPR8, int_aarch64_sve_aesd, nxv16i8>;
// SVE2 crypto unary operations
- def AESMC_ZZ_B : sve2_crypto_unary_op<0b0, "aesmc">;
- def AESIMC_ZZ_B : sve2_crypto_unary_op<0b1, "aesimc">;
+ defm AESMC_ZZ_B : sve2_crypto_unary_op<0b0, "aesmc", int_aarch64_sve_aesmc>;
+ defm AESIMC_ZZ_B : sve2_crypto_unary_op<0b1, "aesimc", int_aarch64_sve_aesimc>;
// PMULLB and PMULLT instructions which operate with 64-bit source and
// 128-bit destination elements are enabled with crypto extensions, similar
// to NEON PMULL2 instruction.
- def PMULLB_ZZZ_Q : sve2_wide_int_arith<0b00, 0b11010, "pmullb",
- ZPR128, ZPR64, ZPR64>;
- def PMULLT_ZZZ_Q : sve2_wide_int_arith<0b00, 0b11011, "pmullt",
- ZPR128, ZPR64, ZPR64>;
+ defm PMULLB_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11010, "pmullb", int_aarch64_sve_pmullb_pair>;
+ defm PMULLT_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11011, "pmullt", int_aarch64_sve_pmullt_pair>;
}
let Predicates = [HasSVE2SM4] in {
// SVE2 crypto constructive binary operations
- def SM4EKEY_ZZZ_S : sve2_crypto_cons_bin_op<0b0, "sm4ekey", ZPR32>;
+ defm SM4EKEY_ZZZ_S : sve2_crypto_cons_bin_op<0b0, "sm4ekey", ZPR32, int_aarch64_sve_sm4ekey, nxv4i32>;
// SVE2 crypto destructive binary operations
- def SM4E_ZZZ_S : sve2_crypto_des_bin_op<0b10, "sm4e", ZPR32>;
+ defm SM4E_ZZZ_S : sve2_crypto_des_bin_op<0b10, "sm4e", ZPR32, int_aarch64_sve_sm4e, nxv4i32>;
}
let Predicates = [HasSVE2SHA3] in {
// SVE2 crypto constructive binary operations
- def RAX1_ZZZ_D : sve2_crypto_cons_bin_op<0b1, "rax1", ZPR64>;
+ defm RAX1_ZZZ_D : sve2_crypto_cons_bin_op<0b1, "rax1", ZPR64, int_aarch64_sve_rax1, nxv2i64>;
}
let Predicates = [HasSVE2BitPerm] in {
// SVE2 bitwise permute
- defm BEXT_ZZZ : sve2_misc_bitwise<0b1100, "bext">;
- defm BDEP_ZZZ : sve2_misc_bitwise<0b1101, "bdep">;
- defm BGRP_ZZZ : sve2_misc_bitwise<0b1110, "bgrp">;
+ defm BEXT_ZZZ : sve2_misc_bitwise<0b1100, "bext", int_aarch64_sve_bext_x>;
+ defm BDEP_ZZZ : sve2_misc_bitwise<0b1101, "bdep", int_aarch64_sve_bdep_x>;
+ defm BGRP_ZZZ : sve2_misc_bitwise<0b1110, "bgrp", int_aarch64_sve_bgrp_x>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td
index a760c4319005..7c40da05c305 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -502,7 +502,7 @@ def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
// Q form - v16i8, v8i16, v4i32, v2i64
// ASIMD bitwise insert, Q-form
-def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL)v16i8")>;
+def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL|BSP)v16i8")>;
// ASIMD duplicate, gen reg, D-form and Q-form
def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^CPY")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
index 5ddfe9e0e34c..8abcb804d5c7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -495,7 +495,7 @@ def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>;
// WriteV includes:
// SHLL,SSHLL,USHLL
// SLI,SRI
-// BIF,BIT,BSL
+// BIF,BIT,BSL,BSP
// EXT
// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN
// XTN2
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index 3272640cb3f2..8413a06ed391 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -661,7 +661,7 @@ def : InstRW<[M3WriteNEONY], (instrs FSQRTv2f64)>;
// ASIMD miscellaneous instructions.
def : InstRW<[M3WriteNALU1], (instregex "^RBITv")>;
-def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL|BSP)v")>;
def : InstRW<[M3WriteNEONB], (instregex "^DUPv.+gpr")>;
def : InstRW<[M3WriteNSHF1], (instregex "^DUPv.+lane")>;
def : InstRW<[M3WriteNSHF1], (instregex "^EXTv")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
index cac86491f56d..34e8beb423ce 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -804,7 +804,7 @@ def : InstRW<[M4WriteNEONY], (instrs FSQRTv2f64)>;
// ASIMD miscellaneous instructions.
def : InstRW<[M4WriteNALU1], (instregex "^RBITv")>;
-def : InstRW<[M4WriteNALU1], (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M4WriteNALU1], (instregex "^(BIF|BIT|BSL|BSP)v")>;
def : InstRW<[M4WriteNALU1], (instregex "^CL[STZ]v")>;
def : InstRW<[M4WriteNEONB], (instregex "^DUPv.+gpr")>;
def : InstRW<[M4WriteNSHF1], (instregex "^CPY")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
index 86477b81b8bf..403aac80e47b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
@@ -842,7 +842,7 @@ def : InstRW<[M5WriteNEONY], (instrs FSQRTv2f64)>;
// ASIMD miscellaneous instructions.
def : InstRW<[M5WriteNALU2], (instregex "^RBITv")>;
-def : InstRW<[M5WriteNALU2], (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M5WriteNALU2], (instregex "^(BIF|BIT|BSL|BSP)v")>;
def : InstRW<[M5WriteNALU2], (instregex "^CL[STZ]v")>;
def : InstRW<[M5WriteNEONB], (instregex "^DUPv.+gpr")>;
def : InstRW<[M5WriteNSHF2], (instregex "^CPY")>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index 697a0f69c58c..f2cd83caffa2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -911,7 +911,7 @@ def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>;
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL|BSP)v8i8$")>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>;
def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
@@ -935,7 +935,7 @@ def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
(instregex "^INSv(i32|i64)(gpr|lane)$")>;
def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
-def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL|BSP)v16i8$")>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryoDetails.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryoDetails.td
index 4c60992e6351..bc5ad0f8bece 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryoDetails.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryoDetails.td
@@ -462,13 +462,13 @@ def KryoWrite_1cyc_X_noRSV_74ln :
let Latency = 1; let NumMicroOps = 2;
}
def : InstRW<[KryoWrite_1cyc_X_noRSV_74ln],
- (instrs BIFv8i8, BITv8i8, BSLv8i8)>;
+ (instrs BIFv8i8, BITv8i8, BSLv8i8, BSPv8i8)>;
def KryoWrite_1cyc_X_X_75ln :
SchedWriteRes<[KryoUnitX, KryoUnitX]> {
let Latency = 1; let NumMicroOps = 2;
}
def : InstRW<[KryoWrite_1cyc_X_X_75ln],
- (instrs BIFv16i8, BITv16i8, BSLv16i8)>;
+ (instrs BIFv16i8, BITv16i8, BSLv16i8, BSPv16i8)>;
def KryoWrite_0cyc_noRSV_11ln :
SchedWriteRes<[]> {
let Latency = 0; let NumMicroOps = 1;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 233613f7be3a..95c29dd2a567 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -1482,7 +1482,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>;
// ASIMD bitwise insert, D-form
// ASIMD bitwise insert, Q-form
def : InstRW<[THX2T99Write_5Cyc_F01],
- (instregex "^BIFv", "^BITv", "^BSLv")>;
+ (instregex "^BIFv", "^BITv", "^BSLv", "^BSPv")>;
// ASIMD count, D-form
// ASIMD count, Q-form
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index ba61ed726e84..8f814d185e85 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
+ SDValue Size, Align Alignment, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
@@ -117,7 +117,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
- DstPtrInfo, MachineMemOperand::MOStore, ObjSize, 16);
+ DstPtrInfo, MachineMemOperand::MOStore, ObjSize, Align(16));
bool UseSetTagRangeLoop =
kSetTagLoopThreshold >= 0 && (int)ObjSize >= kSetTagLoopThreshold;
@@ -125,21 +125,18 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand,
ZeroData);
- if (ObjSize % 32 != 0) {
- SDNode *St1 = DAG.getMachineNode(
- ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex, dl,
- {MVT::i64, MVT::Other},
- {Addr, Addr, DAG.getTargetConstant(1, dl, MVT::i64), Chain});
- DAG.setNodeMemRefs(cast<MachineSDNode>(St1), {BaseMemOperand});
- ObjSize -= 16;
- Addr = SDValue(St1, 0);
- Chain = SDValue(St1, 1);
- }
-
const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
- SDValue Ops[] = {DAG.getConstant(ObjSize, dl, MVT::i64), Addr, Chain};
- SDNode *St = DAG.getMachineNode(
- ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops);
+
+ unsigned Opcode;
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Addr)->getIndex();
+ Addr = DAG.getTargetFrameIndex(FI, MVT::i64);
+ Opcode = ZeroData ? AArch64::STZGloop : AArch64::STGloop;
+ } else {
+ Opcode = ZeroData ? AArch64::STZGloop_wback : AArch64::STGloop_wback;
+ }
+ SDValue Ops[] = {DAG.getTargetConstant(ObjSize, dl, MVT::i64), Addr, Chain};
+ SDNode *St = DAG.getMachineNode(Opcode, dl, ResTys, Ops);
DAG.setNodeMemRefs(cast<MachineSDNode>(St), {BaseMemOperand});
return SDValue(St, 2);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index d0967fb973cc..d94fd8471b7b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -21,7 +21,8 @@ class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo {
public:
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
+ SDValue Size, Align Alignment,
+ bool isVolatile,
MachinePointerInfo DstPtrInfo) const override;
SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1, SDValue Op2,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h
index f95b5dc5246e..6fa1c744f77e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h
@@ -16,6 +16,7 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/TypeSize.h"
+#include <cassert>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index 975502818fcd..61f27cbc3b29 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -19,10 +19,13 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -44,6 +47,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
@@ -61,6 +65,11 @@ static cl::opt<bool> ClMergeInit(
"stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
cl::desc("merge stack variable initializers with tagging when possible"));
+static cl::opt<bool>
+ ClUseStackSafety("stack-tagging-use-stack-safety", cl::Hidden,
+ cl::init(true), cl::ZeroOrMore,
+ cl::desc("Use Stack Safety analysis results"));
+
static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
cl::init(40), cl::Hidden);
@@ -256,8 +265,9 @@ public:
Type *EltTy = VecTy->getElementType();
if (EltTy->isPointerTy()) {
uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
- Type *NewTy = VectorType::get(IntegerType::get(Ctx, EltSize),
- VecTy->getNumElements());
+ auto *NewTy = FixedVectorType::get(
+ IntegerType::get(Ctx, EltSize),
+ cast<FixedVectorType>(VecTy)->getNumElements());
V = IRB.CreatePointerCast(V, NewTy);
}
}
@@ -275,15 +285,17 @@ class AArch64StackTagging : public FunctionPass {
int Tag; // -1 for non-tagged allocations
};
- bool MergeInit;
+ const bool MergeInit;
+ const bool UseStackSafety;
public:
static char ID; // Pass ID, replacement for typeid
- AArch64StackTagging(bool MergeInit = true)
+ AArch64StackTagging(bool IsOptNone = false)
: FunctionPass(ID),
- MergeInit(ClMergeInit.getNumOccurrences() > 0 ? ClMergeInit
- : MergeInit) {
+ MergeInit(ClMergeInit.getNumOccurrences() ? ClMergeInit : !IsOptNone),
+ UseStackSafety(ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
+ : !IsOptNone) {
initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
}
@@ -305,13 +317,16 @@ public:
StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
private:
- Function *F;
- Function *SetTagFunc;
- const DataLayout *DL;
- AAResults *AA;
+ Function *F = nullptr;
+ Function *SetTagFunc = nullptr;
+ const DataLayout *DL = nullptr;
+ AAResults *AA = nullptr;
+ const StackSafetyGlobalInfo *SSI = nullptr;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ if (UseStackSafety)
+ AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
if (MergeInit)
AU.addRequired<AAResultsWrapperPass>();
}
@@ -323,11 +338,13 @@ char AArch64StackTagging::ID = 0;
INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
false, false)
-FunctionPass *llvm::createAArch64StackTaggingPass(bool MergeInit) {
- return new AArch64StackTagging(MergeInit);
+FunctionPass *llvm::createAArch64StackTaggingPass(bool IsOptNone) {
+ return new AArch64StackTagging(IsOptNone);
}
Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
@@ -400,7 +417,9 @@ bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
// dynamic alloca instrumentation for them as well.
!AI.isUsedWithInAlloca() &&
// swifterror allocas are register promoted by ISel
- !AI.isSwiftError();
+ !AI.isSwiftError() &&
+ // safe allocas are not interesting
+ !(SSI && SSI->isSafe(AI));
return IsInteresting;
}
@@ -482,7 +501,7 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
auto *NewAI = new AllocaInst(
TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
NewAI->takeName(Info.AI);
- NewAI->setAlignment(MaybeAlign(Info.AI->getAlignment()));
+ NewAI->setAlignment(Info.AI->getAlign());
NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
NewAI->setSwiftError(Info.AI->isSwiftError());
NewAI->copyMetadata(*Info.AI);
@@ -516,6 +535,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
return false;
+ if (UseStackSafety)
+ SSI = &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult();
F = &Fn;
DL = &Fn.getParent()->getDataLayout();
if (MergeInit)
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 5deb601822b8..a94856ef4fba 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -149,7 +149,9 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
continue;
const MachineOperand *BaseOp;
int64_t Offset;
- if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) &&
+ bool OffsetIsScalable;
+ if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable,
+ TRI) &&
BaseOp->isReg()) {
Register BaseReg = BaseOp->getReg();
if (PrevBaseReg == BaseReg) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 079e8f1764dc..029535cb98b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -13,12 +13,12 @@
#include "AArch64Subtarget.h"
#include "AArch64.h"
-#include "AArch64CallLowering.h"
#include "AArch64InstrInfo.h"
-#include "AArch64LegalizerInfo.h"
#include "AArch64PBQPRegAlloc.h"
-#include "AArch64RegisterBankInfo.h"
#include "AArch64TargetMachine.h"
+#include "GISel/AArch64CallLowering.h"
+#include "GISel/AArch64LegalizerInfo.h"
+#include "GISel/AArch64RegisterBankInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
@@ -47,6 +47,18 @@ static cl::opt<bool>
cl::desc("Call nonlazybind functions via direct GOT load"),
cl::init(false), cl::Hidden);
+static cl::opt<unsigned> SVEVectorBitsMax(
+ "aarch64-sve-vector-bits-max",
+ cl::desc("Assume SVE vector registers are at most this big, "
+ "with zero meaning no maximum size is assumed."),
+ cl::init(0), cl::Hidden);
+
+static cl::opt<unsigned> SVEVectorBitsMin(
+ "aarch64-sve-vector-bits-min",
+ cl::desc("Assume SVE vector registers are at least this big, "
+ "with zero meaning no minimum size is assumed."),
+ cl::init(0), cl::Hidden);
+
AArch64Subtarget &
AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
StringRef CPUString) {
@@ -68,6 +80,9 @@ void AArch64Subtarget::initializeProperties() {
switch (ARMProcFamily) {
case Others:
break;
+ case Carmel:
+ CacheLineSize = 64;
+ break;
case CortexA35:
break;
case CortexA53:
@@ -86,8 +101,16 @@ void AArch64Subtarget::initializeProperties() {
case CortexA73:
case CortexA75:
case CortexA76:
+ case CortexA77:
+ case CortexA78:
+ case CortexX1:
PrefFunctionLogAlignment = 4;
break;
+ case A64FX:
+ CacheLineSize = 256;
+ PrefFunctionLogAlignment = 5;
+ PrefLoopLogAlignment = 5;
+ break;
case AppleA7:
case AppleA10:
case AppleA11:
@@ -188,6 +211,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
ReserveXRegister.set(18);
CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
+ InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
Legalizer.reset(new AArch64LegalizerInfo(*this));
auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
@@ -205,6 +229,10 @@ const CallLowering *AArch64Subtarget::getCallLowering() const {
return CallLoweringInfo.get();
}
+const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
+ return InlineAsmLoweringInfo.get();
+}
+
InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
return InstSelector.get();
}
@@ -316,3 +344,25 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
if (!MFI.isMaxCallFrameSizeComputed())
MFI.computeMaxCallFrameSize(MF);
}
+
+unsigned AArch64Subtarget::getMaxSVEVectorSizeInBits() const {
+ assert(HasSVE && "Tried to get SVE vector length without SVE support!");
+ assert(SVEVectorBitsMax % 128 == 0 &&
+ "SVE requires vector length in multiples of 128!");
+ assert((SVEVectorBitsMax >= SVEVectorBitsMin || SVEVectorBitsMax == 0) &&
+ "Minimum SVE vector size should not be larger than its maximum!");
+ if (SVEVectorBitsMax == 0)
+ return 0;
+ return (std::max(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
+}
+
+unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const {
+ assert(HasSVE && "Tried to get SVE vector length without SVE support!");
+ assert(SVEVectorBitsMin % 128 == 0 &&
+ "SVE requires vector length in multiples of 128!");
+ assert((SVEVectorBitsMax >= SVEVectorBitsMin || SVEVectorBitsMax == 0) &&
+ "Minimum SVE vector size should not be larger than its maximum!");
+ if (SVEVectorBitsMax == 0)
+ return (SVEVectorBitsMin / 128) * 128;
+ return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 40eb67a153e4..b111f0016948 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -19,6 +19,7 @@
#include "AArch64RegisterInfo.h"
#include "AArch64SelectionDAGInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
@@ -38,11 +39,13 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
public:
enum ARMProcFamilyEnum : uint8_t {
Others,
+ A64FX,
AppleA7,
AppleA10,
AppleA11,
AppleA12,
AppleA13,
+ Carmel,
CortexA35,
CortexA53,
CortexA55,
@@ -52,6 +55,9 @@ public:
CortexA73,
CortexA75,
CortexA76,
+ CortexA77,
+ CortexA78,
+ CortexX1,
ExynosM3,
Falkor,
Kryo,
@@ -76,6 +82,7 @@ protected:
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
+ bool HasV8_6aOps = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
@@ -100,6 +107,10 @@ protected:
bool HasPAN_RWV = false;
bool HasCCPP = false;
+ // SVE extensions
+ bool HasSVE = false;
+ bool UseExperimentalZeroingPseudos = false;
+
// Armv8.2 Crypto extensions
bool HasSM4 = false;
bool HasSHA3 = false;
@@ -126,8 +137,6 @@ protected:
bool HasRCPC_IMMO = false;
bool HasLSLFast = false;
- bool HasSVE = false;
- bool HasSVE2 = false;
bool HasRCPC = false;
bool HasAggressiveFMA = false;
@@ -144,7 +153,17 @@ protected:
bool HasMTE = false;
bool HasTME = false;
+ // Armv8.6-A Extensions
+ bool HasBF16 = false;
+ bool HasMatMulInt8 = false;
+ bool HasMatMulFP32 = false;
+ bool HasMatMulFP64 = false;
+ bool HasAMVS = false;
+ bool HasFineGrainedTraps = false;
+ bool HasEnhancedCounterVirtualization = false;
+
// Arm SVE2 extensions
+ bool HasSVE2 = false;
bool HasSVE2AES = false;
bool HasSVE2SM4 = false;
bool HasSVE2SHA3 = false;
@@ -197,6 +216,8 @@ protected:
bool UseEL2ForTP = false;
bool UseEL3ForTP = false;
bool AllowTaggedGlobals = false;
+ bool HardenSlsRetBr = false;
+ bool HardenSlsBlr = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
@@ -226,6 +247,7 @@ protected:
/// GlobalISel related APIs.
std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
@@ -261,6 +283,7 @@ public:
return &getInstrInfo()->getRegisterInfo();
}
const CallLowering *getCallLowering() const override;
+ const InlineAsmLowering *getInlineAsmLowering() const override;
InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
@@ -348,6 +371,9 @@ public:
hasFuseCCSelect() || hasFuseLiterals();
}
+ bool hardenSlsRetBr() const { return HardenSlsRetBr; }
+ bool hardenSlsBlr() const { return HardenSlsBlr; }
+
bool useEL1ForTP() const { return UseEL1ForTP; }
bool useEL2ForTP() const { return UseEL2ForTP; }
bool useEL3ForTP() const { return UseEL3ForTP; }
@@ -360,7 +386,12 @@ public:
}
unsigned getCacheLineSize() const override { return CacheLineSize; }
unsigned getPrefetchDistance() const override { return PrefetchDistance; }
- unsigned getMinPrefetchStride() const override { return MinPrefetchStride; }
+ unsigned getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches,
+ bool HasCall) const override {
+ return MinPrefetchStride;
+ }
unsigned getMaxPrefetchIterationsAhead() const override {
return MaxPrefetchIterationsAhead;
}
@@ -373,6 +404,10 @@ public:
unsigned getWideningBaseCost() const { return WideningBaseCost; }
+ bool useExperimentalZeroingPseudos() const {
+ return UseExperimentalZeroingPseudos;
+ }
+
/// CPU has TBI (top byte of addresses is ignored during HW address
/// translation) and OS enables it.
bool supportsAddressTopByteIgnored() const;
@@ -402,6 +437,16 @@ public:
bool hasSVE2SM4() const { return HasSVE2SM4; }
bool hasSVE2SHA3() const { return HasSVE2SHA3; }
bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
+ bool hasMatMulInt8() const { return HasMatMulInt8; }
+ bool hasMatMulFP32() const { return HasMatMulFP32; }
+ bool hasMatMulFP64() const { return HasMatMulFP64; }
+
+ // Armv8.6-A Extensions
+ bool hasBF16() const { return HasBF16; }
+ bool hasFineGrainedTraps() const { return HasFineGrainedTraps; }
+ bool hasEnhancedCounterVirtualization() const {
+ return HasEnhancedCounterVirtualization;
+ }
bool isLittleEndian() const { return IsLittle; }
@@ -439,6 +484,7 @@ public:
bool hasDIT() const { return HasDIT; }
bool hasTRACEV8_4() const { return HasTRACEV8_4; }
bool hasAM() const { return HasAM; }
+ bool hasAMVS() const { return HasAMVS; }
bool hasSEL2() const { return HasSEL2; }
bool hasPMU() const { return HasPMU; }
bool hasTLB_RMI() const { return HasTLB_RMI; }
@@ -498,6 +544,12 @@ public:
}
void mirFileLoaded(MachineFunction &MF) const override;
+
+ // Return the known range for the bit length of SVE data registers. A value
+ // of 0 means nothing is known about that particular limit beyong what's
+ // implied by the architecture.
+ unsigned getMaxSVEVectorSizeInBits() const;
+ unsigned getMinSVEVectorSizeInBits() const;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 6e82d326e519..ceceabc6ff4e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -18,18 +18,18 @@ include "llvm/TableGen/SearchableTable.td"
//===----------------------------------------------------------------------===//
def HasCCPP : Predicate<"Subtarget->hasCCPP()">,
- AssemblerPredicate<"FeatureCCPP", "ccpp">;
+ AssemblerPredicate<(all_of FeatureCCPP), "ccpp">;
def HasPAN : Predicate<"Subtarget->hasPAN()">,
- AssemblerPredicate<"FeaturePAN",
+ AssemblerPredicate<(all_of FeaturePAN),
"ARM v8.1 Privileged Access-Never extension">;
def HasPsUAO : Predicate<"Subtarget->hasPsUAO()">,
- AssemblerPredicate<"FeaturePsUAO",
+ AssemblerPredicate<(all_of FeaturePsUAO),
"ARM v8.2 UAO PState extension (psuao)">;
def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
- AssemblerPredicate<"FeaturePAN_RWV",
+ AssemblerPredicate<(all_of FeaturePAN_RWV),
"ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
//===----------------------------------------------------------------------===//
@@ -338,7 +338,7 @@ def : PState<"PAN", 0b00100>;
// v8.2a "User Access Override" extension-specific PStates
let Requires = [{ {AArch64::FeaturePsUAO} }] in
def : PState<"UAO", 0b00011>;
-// v8.4a timining insensitivity of data processing instructions
+// v8.4a timing insensitivity of data processing instructions
let Requires = [{ {AArch64::FeatureDIT} }] in
def : PState<"DIT", 0b11010>;
// v8.5a Spectre Mitigation
@@ -844,7 +844,7 @@ def : RWSysReg<"SP_EL2", 0b11, 0b110, 0b0100, 0b0001, 0b000>;
def : RWSysReg<"SPSel", 0b11, 0b000, 0b0100, 0b0010, 0b000>;
def : RWSysReg<"NZCV", 0b11, 0b011, 0b0100, 0b0010, 0b000>;
def : RWSysReg<"DAIF", 0b11, 0b011, 0b0100, 0b0010, 0b001>;
-def : RWSysReg<"CurrentEL", 0b11, 0b000, 0b0100, 0b0010, 0b010>;
+def : ROSysReg<"CurrentEL", 0b11, 0b000, 0b0100, 0b0010, 0b010>;
def : RWSysReg<"SPSR_irq", 0b11, 0b100, 0b0100, 0b0011, 0b000>;
def : RWSysReg<"SPSR_abt", 0b11, 0b100, 0b0100, 0b0011, 0b001>;
def : RWSysReg<"SPSR_und", 0b11, 0b100, 0b0100, 0b0011, 0b010>;
@@ -1167,7 +1167,6 @@ def : RWSysReg<"ICC_SRE_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b101>;
def : RWSysReg<"ICC_IGRPEN0_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b110>;
def : RWSysReg<"ICC_IGRPEN1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b111>;
def : RWSysReg<"ICC_IGRPEN1_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b111>;
-def : RWSysReg<"ICC_SEIEN_EL1", 0b11, 0b000, 0b1100, 0b1101, 0b000>;
def : RWSysReg<"ICC_AP0R0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b100>;
def : RWSysReg<"ICC_AP0R1_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b101>;
def : RWSysReg<"ICC_AP0R2_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b110>;
@@ -1185,9 +1184,8 @@ def : RWSysReg<"ICH_AP1R1_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b001>;
def : RWSysReg<"ICH_AP1R2_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b010>;
def : RWSysReg<"ICH_AP1R3_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b011>;
def : RWSysReg<"ICH_HCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b000>;
-def : RWSysReg<"ICH_MISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b010>;
+def : ROSysReg<"ICH_MISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b010>;
def : RWSysReg<"ICH_VMCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b111>;
-def : RWSysReg<"ICH_VSEIR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b100>;
def : RWSysReg<"ICH_LR0_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b000>;
def : RWSysReg<"ICH_LR1_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b001>;
def : RWSysReg<"ICH_LR2_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b010>;
@@ -1260,7 +1258,7 @@ let Requires = [{ {AArch64::FeatureSPE} }] in {
def : RWSysReg<"PMBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b000>;
def : RWSysReg<"PMBPTR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b001>;
def : RWSysReg<"PMBSR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b011>;
-def : RWSysReg<"PMBIDR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b111>;
+def : ROSysReg<"PMBIDR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b111>;
def : RWSysReg<"PMSCR_EL2", 0b11, 0b100, 0b1001, 0b1001, 0b000>;
def : RWSysReg<"PMSCR_EL12", 0b11, 0b101, 0b1001, 0b1001, 0b000>;
def : RWSysReg<"PMSCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b000>;
@@ -1269,7 +1267,7 @@ def : RWSysReg<"PMSIRR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b011>;
def : RWSysReg<"PMSFCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b100>;
def : RWSysReg<"PMSEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b101>;
def : RWSysReg<"PMSLATFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b110>;
-def : RWSysReg<"PMSIDR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b111>;
+def : ROSysReg<"PMSIDR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b111>;
}
// v8.2a "RAS extension" registers
@@ -1333,7 +1331,6 @@ def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>;
let Requires = [{ {AArch64::FeatureRASv8_4} }] in {
def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>;
-def : RWSysReg<"ERXTS_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b111>;
def : RWSysReg<"ERXMISC2_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b010>;
def : RWSysReg<"ERXMISC3_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b011>;
def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>;
@@ -1360,7 +1357,7 @@ def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>;
def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>;
} //FeatureMPAM
-// v8.4a Activitiy Monitor registers
+// v8.4a Activity Monitor registers
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::FeatureAM} }] in {
def : RWSysReg<"AMCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b000>;
@@ -1426,7 +1423,7 @@ def : RWSysReg<"TRFCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b001>;
def : RWSysReg<"TRFCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b001>;
} //FeatureTRACEV8_4
-// v8.4a Timining insensitivity of data processing instructions
+// v8.4a Timing insensitivity of data processing instructions
// DIT: Data Independent Timing instructions
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::FeatureDIT} }] in {
@@ -1490,6 +1487,41 @@ def : RWSysReg<"TRBTRG_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b110>;
def : ROSysReg<"TRBIDR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b111>;
} // FeatureTRBE
+
+// v8.6a Activity Monitors Virtualization Support
+let Requires = [{ {AArch64::FeatureAMVS} }] in {
+foreach n = 0-15 in {
+ foreach x = 0-1 in {
+ def : RWSysReg<"AMEVCNTVOFF"#x#n#"_EL2",
+ 0b11, 0b100, 0b1101, 0b1000, 0b000>{
+ let Encoding{4} = x;
+ let Encoding{3-0} = n;
+ }
+ }
+}
+}
+
+// v8.6a Fine Grained Virtualization Traps
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureFineGrainedTraps} }] in {
+def : RWSysReg<"HFGRTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b100>;
+def : RWSysReg<"HFGWTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b101>;
+def : RWSysReg<"HFGITR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b110>;
+def : RWSysReg<"HDFGRTR_EL2", 0b11, 0b100, 0b0011, 0b0001, 0b100>;
+def : RWSysReg<"HDFGWTR_EL2", 0b11, 0b100, 0b0011, 0b0001, 0b101>;
+}
+
+// v8.6a Enhanced Counter Virtualization
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureEnhancedCounterVirtualization} }] in {
+def : RWSysReg<"CNTSCALE_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b100>;
+def : RWSysReg<"CNTISCALE_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b101>;
+def : RWSysReg<"CNTPOFF_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b110>;
+def : RWSysReg<"CNTVFRQ_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b111>;
+def : RWSysReg<"CNTPCTSS_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b101>;
+def : RWSysReg<"CNTVCTSS_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b110>;
+}
+
// Cyclone specific system registers
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::ProcAppleA7} }] in
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 115a7da8a6d9..a63b9a97ada5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -11,6 +11,7 @@
#include "AArch64TargetMachine.h"
#include "AArch64.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64MacroFusion.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetObjectFile.h"
@@ -26,6 +27,7 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -146,6 +148,11 @@ static cl::opt<int> EnableGlobalISelAtO(
cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
cl::init(0));
+static cl::opt<bool> EnableSVEIntrinsicOpts(
+ "aarch64-sve-intrinsic-opts", cl::Hidden,
+ cl::desc("Enable SVE intrinsic opts"),
+ cl::init(true));
+
static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
cl::init(true), cl::Hidden);
@@ -176,13 +183,16 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64LoadStoreOptPass(*PR);
initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
+ initializeAArch64PostLegalizerCombinerPass(*PR);
initializeAArch64PromoteConstantPass(*PR);
initializeAArch64RedundantCopyEliminationPass(*PR);
initializeAArch64StorePairSuppressPass(*PR);
initializeFalkorHWPFFixPass(*PR);
initializeFalkorMarkStridedAccessesLegacyPass(*PR);
initializeLDTLSCleanupPass(*PR);
+ initializeSVEIntrinsicOptsPass(*PR);
initializeAArch64SpeculationHardeningPass(*PR);
+ initializeAArch64SLSHardeningPass(*PR);
initializeAArch64StackTaggingPass(*PR);
initializeAArch64StackTaggingPreRAPass(*PR);
}
@@ -236,12 +246,8 @@ getEffectiveAArch64CodeModel(const Triple &TT, Optional<CodeModel::Model> CM,
if (CM) {
if (*CM != CodeModel::Small && *CM != CodeModel::Tiny &&
*CM != CodeModel::Large) {
- if (!TT.isOSFuchsia())
- report_fatal_error(
- "Only small, tiny and large code models are allowed on AArch64");
- else if (*CM != CodeModel::Kernel)
- report_fatal_error("Only small, tiny, kernel, and large code models "
- "are allowed on AArch64");
+ report_fatal_error(
+ "Only small, tiny and large code models are allowed on AArch64");
} else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF())
report_fatal_error("tiny code model is only supported on ELF");
return *CM;
@@ -313,6 +319,9 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
// AArch64 supports default outlining behaviour.
setSupportsDefaultOutlining(true);
+
+ // AArch64 supports the debug entry values.
+ setSupportsDebugEntryValues(true);
}
AArch64TargetMachine::~AArch64TargetMachine() = default;
@@ -403,6 +412,7 @@ public:
bool addIRTranslator() override;
void addPreLegalizeMachineIR() override;
bool addLegalizeMachineIR() override;
+ void addPreRegBankSelect() override;
bool addRegBankSelect() override;
void addPreGlobalInstructionSelect() override;
bool addGlobalInstructionSelect() override;
@@ -435,6 +445,10 @@ void AArch64PassConfig::addIRPasses() {
// ourselves.
addPass(createAtomicExpandPass());
+ // Expand any SVE vector library calls that we can't code generate directly.
+ if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive)
+ addPass(createSVEIntrinsicOptsPass());
+
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
// ldrex/strex loops to simplify this, but it needs tidying up.
@@ -454,6 +468,9 @@ void AArch64PassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
+ addPass(createAArch64StackTaggingPass(
+ /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
+
// Match interleaved memory accesses to ldN/stN intrinsics.
if (TM->getOptLevel() != CodeGenOpt::None) {
addPass(createInterleavedLoadCombinePass());
@@ -473,9 +490,6 @@ void AArch64PassConfig::addIRPasses() {
addPass(createLICMPass());
}
- addPass(createAArch64StackTaggingPass(/* MergeInit = */ TM->getOptLevel() !=
- CodeGenOpt::None));
-
// Add Control Flow Guard checks.
if (TM->getTargetTriple().isOSWindows())
addPass(createCFGuardCheckPass());
@@ -541,6 +555,14 @@ bool AArch64PassConfig::addLegalizeMachineIR() {
return false;
}
+void AArch64PassConfig::addPreRegBankSelect() {
+ // For now we don't add this to the pipeline for -O0. We could do in future
+ // if we split the combines into separate O0/opt groupings.
+ bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ if (!IsOptNone)
+ addPass(createAArch64PostLegalizeCombiner(IsOptNone));
+}
+
bool AArch64PassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
return false;
@@ -614,6 +636,9 @@ void AArch64PassConfig::addPreSched2() {
// info.
addPass(createAArch64SpeculationHardeningPass());
+ addPass(createAArch64IndirectThunks());
+ addPass(createAArch64SLSHardeningPass());
+
if (TM->getOptLevel() != CodeGenOpt::None) {
if (EnableFalkorHWPFFix)
addPass(createFalkorHWPFFixPass());
@@ -648,4 +673,28 @@ void AArch64PassConfig::addPreEmitPass() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
TM->getTargetTriple().isOSBinFormatMachO())
addPass(createAArch64CollectLOHPass());
+
+ // SVE bundles move prefixes with destructive operations.
+ addPass(createUnpackMachineBundles(nullptr));
+}
+
+yaml::MachineFunctionInfo *
+AArch64TargetMachine::createDefaultFuncInfoYAML() const {
+ return new yaml::AArch64FunctionInfo();
+}
+
+yaml::MachineFunctionInfo *
+AArch64TargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
+ const auto *MFI = MF.getInfo<AArch64FunctionInfo>();
+ return new yaml::AArch64FunctionInfo(*MFI);
+}
+
+bool AArch64TargetMachine::parseMachineFunctionInfo(
+ const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
+ SMDiagnostic &Error, SMRange &SourceRange) const {
+ const auto &YamlMFI =
+ reinterpret_cast<const yaml::AArch64FunctionInfo &>(MFI);
+ MachineFunction &MF = PFS.MF;
+ MF.getInfo<AArch64FunctionInfo>()->initializeBaseYamlFields(YamlMFI);
+ return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 5264efb89b9c..7738a4229391 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -49,6 +49,14 @@ public:
return TLOF.get();
}
+ yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override;
+ yaml::MachineFunctionInfo *
+ convertFuncInfoToYAML(const MachineFunction &MF) const override;
+ bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &,
+ PerFunctionMIParsingState &PFS,
+ SMDiagnostic &Error,
+ SMRange &SourceRange) const override;
+
private:
bool isLittle;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 54562094fcf5..dfc66f0cb4c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -20,7 +20,6 @@ using namespace dwarf;
void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
// AARCH64 ELF ABI does not define static relocation type for TLS offset
// within a module. Do not generate AT_location for TLS variables.
SupportDebugThreadLocalLocation = false;
@@ -43,7 +42,7 @@ const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
const MCExpr *Res =
MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
MCSymbol *PCSym = getContext().createTempSymbol();
- Streamer.EmitLabel(PCSym);
+ Streamer.emitLabel(PCSym);
const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext());
return MCBinaryExpr::createSub(Res, PC, getContext());
}
@@ -68,7 +67,7 @@ const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
const MCExpr *Res =
MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
MCSymbol *PCSym = getContext().createTempSymbol();
- Streamer.EmitLabel(PCSym);
+ Streamer.emitLabel(PCSym);
const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext());
return MCBinaryExpr::createSub(Res, PC, getContext());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
index 1cb4c028c80d..28324c2ae608 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -18,6 +18,11 @@ class AArch64TargetMachine;
/// This implementation is used for AArch64 ELF targets (Linux in particular).
class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+public:
+ AArch64_ELFTargetObjectFile() {
+ PLTRelativeVariantKind = MCSymbolRefExpr::VK_PLT;
+ }
};
/// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 4724d6b8daea..cf6de797727b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -57,7 +57,8 @@ int AArch64TTIImpl::getIntImmCost(int64_t Val) {
}
/// Calculate the cost of materializing the given constant.
-int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -82,7 +83,8 @@ int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
}
int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -139,16 +141,17 @@ int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
if (Idx == ImmIdx) {
int NumConstants = (BitSize + 63) / 64;
- int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<int>(TTI::TCC_Free)
: Cost;
}
- return AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -161,7 +164,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
// selected instruction, so we compute the materialization cost for the
// immediate directly.
if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
- return AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
switch (IID) {
default:
@@ -174,7 +177,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
case Intrinsic::umul_with_overflow:
if (Idx == 1) {
int NumConstants = (BitSize + 63) / 64;
- int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<int>(TTI::TCC_Free)
: Cost;
@@ -190,7 +193,7 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return TTI::TCC_Free;
break;
}
- return AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
TargetTransformInfo::PopcntSupportKind
@@ -208,8 +211,8 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
// A helper that returns a vector type from the given type. The number of
// elements in type Ty determine the vector width.
auto toVectorTy = [&](Type *ArgTy) {
- return VectorType::get(ArgTy->getScalarType(),
- DstTy->getVectorNumElements());
+ return FixedVectorType::get(ArgTy->getScalarType(),
+ cast<FixedVectorType>(DstTy)->getNumElements());
};
// Exit early if DstTy is not a vector type whose elements are at least
@@ -251,7 +254,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
// Legalize the source type and ensure it can be used in a widening
// operation.
- Type *SrcTy = toVectorTy(Extend->getSrcTy());
+ auto *SrcTy = toVectorTy(Extend->getSrcTy());
auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
@@ -267,6 +270,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
}
int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -291,11 +295,18 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
}
}
+ // TODO: Allow non-throughput costs that aren't binary.
+ auto AdjustCost = [&CostKind](int Cost) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost == 0 ? 0 : 1;
+ return Cost;
+ };
+
EVT SrcTy = TLI->getValueType(DL, Src);
EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
- return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I));
static const TypeConversionCostTblEntry
ConversionTbl[] = {
@@ -397,9 +408,9 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
- return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I));
}
int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
@@ -425,17 +436,18 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
auto DstVT = TLI->getValueType(DL, Dst);
auto SrcVT = TLI->getValueType(DL, Src);
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
// If the resulting type is still a vector and the destination type is legal,
// we may get the extension for free. If not, get the default cost for the
// extend.
if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
- return Cost + getCastInstrCost(Opcode, Dst, Src);
+ return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
// The destination type should be larger than the element type. If not, get
// the default cost for the extend.
if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
- return Cost + getCastInstrCost(Opcode, Dst, Src);
+ return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
switch (Opcode) {
default:
@@ -454,7 +466,16 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
}
// If we are unable to perform the extend for free, get the default cost.
- return Cost + getCastInstrCost(Opcode, Dst, Src);
+ return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind);
+}
+
+unsigned AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Opcode == Instruction::PHI ? 0 : 1;
+ assert(CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind");
+ // Branches are assumed to be predicted.
+ return 0;
}
int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
@@ -483,10 +504,17 @@ int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
}
int AArch64TTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
+ // TODO: Handle more cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info, Opd1PropInfo,
+ Opd2PropInfo, Args, CxtI);
+
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
@@ -504,7 +532,8 @@ int AArch64TTIImpl::getArithmeticInstrCost(
switch (ISD) {
default:
- return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info,
Opd1PropInfo, Opd2PropInfo);
case ISD::SDIV:
if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -513,16 +542,20 @@ int AArch64TTIImpl::getArithmeticInstrCost(
// normally expanded to the sequence ADD + CMP + SELECT + SRA.
// The OperandValue properties many not be same as that of previous
// operation; conservatively assume OP_None.
- Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
+ Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
+ Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
+ Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
+ Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
+ Cost += getArithmeticInstrCost(Instruction::Select, Ty, CostKind,
+ Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
+ Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
+ Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
return Cost;
@@ -535,31 +568,34 @@ int AArch64TTIImpl::getArithmeticInstrCost(
// Vector signed division by constant are expanded to the
// sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
// to MULHS + SUB + SRL + ADD + SRL.
- int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
- Opd2Info,
+ int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
+ Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
- int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
- Opd2Info,
+ int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
+ Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
- int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
- Opd2Info,
+ int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
+ Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
}
}
- Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info,
Opd1PropInfo, Opd2PropInfo);
if (Ty->isVectorTy()) {
// On AArch64, vector divisions are not supported natively and are
// expanded into scalar divisions of each pair of elements.
- Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
- Opd2Info, Opd1PropInfo, Opd2PropInfo);
- Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
- Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind,
+ Opd1Info, Opd2Info, Opd1PropInfo,
+ Opd2PropInfo);
+ Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind,
+ Opd1Info, Opd2Info, Opd1PropInfo,
+ Opd2PropInfo);
// TODO: if one of the arguments is scalar, then it's not necessary to
// double the cost of handling the vector elements.
Cost += Cost;
@@ -574,6 +610,16 @@ int AArch64TTIImpl::getArithmeticInstrCost(
// These nodes are marked as 'custom' for combining purposes only.
// We know that they are legal. See LowerAdd in ISelLowering.
return (Cost + 1) * LT.first;
+
+ case ISD::FADD:
+ // These nodes are marked as 'custom' just to lower them to SVE.
+ // We know said lowering will incur no additional cost.
+ if (isa<FixedVectorType>(Ty) && !Ty->getScalarType()->isFP128Ty())
+ return (Cost + 2) * LT.first;
+
+ return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info,
+ Opd1PropInfo, Opd2PropInfo);
}
}
@@ -596,7 +642,12 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
}
int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy, const Instruction *I) {
+ Type *CondTy,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower some vector selects well that are wider than the register
@@ -623,13 +674,18 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return Entry->Cost;
}
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
}
AArch64TTIImpl::TTI::MemCmpExpansionOptions
AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
TTI::MemCmpExpansionOptions Options;
- Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
+ if (ST->requiresStrictAlign()) {
+ // TODO: Add cost modeling for strict align. Misaligned loads expand to
+ // a bunch of instructions when strict align is enabled.
+ return Options;
+ }
+ Options.AllowOverlappingLoads = true;
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
Options.NumLoadsPerBlock = Options.MaxNumLoads;
// TODO: Though vector loads usually perform well on AArch64, in some targets
@@ -641,7 +697,17 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
MaybeAlign Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return 1;
+
+ // Type legalization can't handle structs
+ if (TLI->getValueType(DL, Ty, true) == MVT::Other)
+ return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
+ CostKind);
+
auto LT = TLI->getTypeLegalizationCost(DL, Ty);
if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
@@ -656,7 +722,8 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
return LT.first * 2 * AmortizationCost;
}
- if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
+ if (Ty->isVectorTy() &&
+ cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) {
unsigned ProfitableNumElements;
if (Opcode == Instruction::Store)
// We use a custom trunc store lowering so v.4b should be profitable.
@@ -666,8 +733,8 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
// have to promote the elements to v.2.
ProfitableNumElements = 8;
- if (Ty->getVectorNumElements() < ProfitableNumElements) {
- unsigned NumVecElts = Ty->getVectorNumElements();
+ if (cast<FixedVectorType>(Ty)->getNumElements() < ProfitableNumElements) {
+ unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
// We generate 2 instructions per vector element.
return NumVectorizableInstsToAmortize * NumVecElts * 2;
@@ -677,20 +744,18 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
return LT.first;
}
-int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int AArch64TTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
- assert(isa<VectorType>(VecTy) && "Expect a vector type");
+ auto *VecVTy = cast<FixedVectorType>(VecTy);
if (!UseMaskForCond && !UseMaskForGaps &&
Factor <= TLI->getMaxSupportedInterleaveFactor()) {
- unsigned NumElts = VecTy->getVectorNumElements();
- auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
+ unsigned NumElts = VecVTy->getNumElements();
+ auto *SubVecTy =
+ FixedVectorType::get(VecTy->getScalarType(), NumElts / Factor);
// ldN/stN only support legal vector types of size 64 or 128 in bits.
// Accesses having vector types that are a multiple of 128 bits can be
@@ -701,18 +766,20 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
+ Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
}
int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
int Cost = 0;
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
for (auto *I : Tys) {
if (!I->isVectorTy())
continue;
- if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
- Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
- getMemoryOpCost(Instruction::Load, I, Align(128), 0);
+ if (I->getScalarSizeInBits() * cast<FixedVectorType>(I)->getNumElements() ==
+ 128)
+ Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) +
+ getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind);
}
return Cost;
}
@@ -792,6 +859,11 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
getFalkorUnrollingPreferences(L, SE, UP);
}
+void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
+
Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) {
switch (Inst->getIntrinsicID()) {
@@ -902,7 +974,7 @@ bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const {
- assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
+ auto *VTy = cast<VectorType>(Ty);
unsigned ScalarBits = Ty->getScalarSizeInBits();
switch (Opcode) {
case Instruction::FAdd:
@@ -913,10 +985,10 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
case Instruction::Mul:
return false;
case Instruction::Add:
- return ScalarBits * Ty->getVectorNumElements() >= 128;
+ return ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128;
case Instruction::ICmp:
return (ScalarBits < 64) &&
- (ScalarBits * Ty->getVectorNumElements() >= 128);
+ (ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128);
case Instruction::FCmp:
return Flags.NoNaN;
default:
@@ -925,11 +997,14 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
return false;
}
-int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
- bool IsPairwiseForm) {
+int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
+ VectorType *ValTy,
+ bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind) {
if (IsPairwiseForm)
- return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
+ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
+ CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
@@ -950,11 +1025,12 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
return LT.first * Entry->Cost;
- return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
+ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
+ CostKind);
}
-int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
+ int Index, VectorType *SubTp) {
if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
static const CostTblEntry ShuffleTbl[] = {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 131219ca6944..1f029689a60e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -72,11 +72,11 @@ public:
using BaseT::getIntImmCost;
int getIntImmCost(int64_t Val);
- int getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ Type *Ty, TTI::TargetCostKind CostKind);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
/// @}
@@ -98,6 +98,8 @@ public:
unsigned getRegisterBitWidth(bool Vector) const {
if (Vector) {
+ if (ST->hasSVE())
+ return std::max(ST->getMinSVEVectorSizeInBits(), 128u);
if (ST->hasNEON())
return 128;
return 0;
@@ -112,15 +114,19 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
+
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -131,30 +137,37 @@ public:
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr);
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType);
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
- bool isLegalMaskedLoadStore(Type *DataType, MaybeAlign Alignment) {
- if (!isa<VectorType>(DataType) || !ST->hasSVE())
+ bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
+ if (!isa<ScalableVectorType>(DataType) || !ST->hasSVE())
return false;
- Type *Ty = DataType->getVectorElementType();
- if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
+ Type *Ty = cast<ScalableVectorType>(DataType)->getElementType();
+ if (Ty->isBFloatTy() || Ty->isHalfTy() ||
+ Ty->isFloatTy() || Ty->isDoubleTy())
return true;
if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
@@ -164,19 +177,37 @@ public:
return false;
}
- bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) {
+ bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
return isLegalMaskedLoadStore(DataType, Alignment);
}
- bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) {
+ bool isLegalMaskedStore(Type *DataType, Align Alignment) {
return isLegalMaskedLoadStore(DataType, Alignment);
}
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
+ bool isLegalNTStore(Type *DataType, Align Alignment) {
+ // NOTE: The logic below is mostly geared towards LV, which calls it with
+ // vectors with 2 elements. We might want to improve that, if other
+ // users show up.
+ // Nontemporal vector stores can be directly lowered to STNP, if the vector
+ // can be halved so that each half fits into a register. That's the case if
+ // the element type fits into a register and the number of elements is a
+ // power of 2 > 1.
+ if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
+ unsigned NumElements =
+ cast<FixedVectorType>(DataTypeVTy)->getNumElements();
+ unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
+ return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
+ EltSize <= 128 && isPowerOf2_64(EltSize);
+ }
+ return BaseT::isLegalNTStore(DataType, Alignment);
+ }
+
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
bool
shouldConsiderAddressTypePromotion(const Instruction &I,
@@ -207,10 +238,12 @@ public:
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
- int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwiseForm);
+ int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
- int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
/// @}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index be4c96022472..0ac09c4f96f0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -260,6 +260,8 @@ public:
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseDirective(AsmToken DirectiveID) override;
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
@@ -755,12 +757,13 @@ public:
return false;
int64_t Val = MCE->getValue();
- int64_t SVal = typename std::make_signed<T>::type(Val);
- int64_t UVal = typename std::make_unsigned<T>::type(Val);
- if (Val != SVal && Val != UVal)
+ // Avoid left shift by 64 directly.
+ uint64_t Upper = UINT64_C(-1) << (sizeof(T) * 4) << (sizeof(T) * 4);
+ // Allow all-0 or all-1 in top bits to permit bitwise NOT.
+ if ((Val & Upper) && (Val & Upper) != Upper)
return false;
- return AArch64_AM::isLogicalImmediate(UVal, sizeof(T) * 8);
+ return AArch64_AM::isLogicalImmediate(Val & ~Upper, sizeof(T) * 8);
}
bool isShiftedImm() const { return Kind == k_ShiftedImm; }
@@ -852,8 +855,7 @@ public:
if (!isShiftedImm() && (!isImm() || !isa<MCConstantExpr>(getImm())))
return DiagnosticPredicateTy::NoMatch;
- bool IsByte =
- std::is_same<int8_t, typename std::make_signed<T>::type>::value;
+ bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value;
if (auto ShiftedImm = getShiftedVal<8>())
if (!(IsByte && ShiftedImm->second) &&
AArch64_AM::isSVECpyImm<T>(uint64_t(ShiftedImm->first)
@@ -870,8 +872,7 @@ public:
if (!isShiftedImm() && (!isImm() || !isa<MCConstantExpr>(getImm())))
return DiagnosticPredicateTy::NoMatch;
- bool IsByte =
- std::is_same<int8_t, typename std::make_signed<T>::type>::value;
+ bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value;
if (auto ShiftedImm = getShiftedVal<8>())
if (!(IsByte && ShiftedImm->second) &&
AArch64_AM::isSVEAddSubImm<T>(ShiftedImm->first
@@ -969,11 +970,15 @@ public:
bool isMOVZMovAlias() const {
if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- uint64_t Value = CE->getValue();
+ const MCExpr *E = getImm();
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(E)) {
+ uint64_t Value = CE->getValue();
- return AArch64_AM::isMOVZMovAlias(Value, Shift, RegWidth);
+ return AArch64_AM::isMOVZMovAlias(Value, Shift, RegWidth);
+ }
+ // Only supports the case of Shift being 0 if an expression is used as an
+ // operand
+ return !Shift && E;
}
template<int RegWidth, int Shift>
@@ -1033,8 +1038,10 @@ public:
bool isNeonVectorRegLo() const {
return Kind == k_Register && Reg.Kind == RegKind::NeonVector &&
- AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains(
- Reg.RegNum);
+ (AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains(
+ Reg.RegNum) ||
+ AArch64MCRegisterClasses[AArch64::FPR64_loRegClassID].contains(
+ Reg.RegNum));
}
template <unsigned Class> bool isSVEVectorReg() const {
@@ -1606,7 +1613,7 @@ public:
void addLogicalImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- typename std::make_unsigned<T>::type Val = MCE->getValue();
+ std::make_unsigned_t<T> Val = MCE->getValue();
uint64_t encoding = AArch64_AM::encodeLogicalImmediate(Val, sizeof(T) * 8);
Inst.addOperand(MCOperand::createImm(encoding));
}
@@ -1615,7 +1622,7 @@ public:
void addLogicalImmNotOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
- typename std::make_unsigned<T>::type Val = ~MCE->getValue();
+ std::make_unsigned_t<T> Val = ~MCE->getValue();
uint64_t encoding = AArch64_AM::encodeLogicalImmediate(Val, sizeof(T) * 8);
Inst.addOperand(MCOperand::createImm(encoding));
}
@@ -1771,9 +1778,13 @@ public:
void addMOVZMovAliasOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
- uint64_t Value = CE->getValue();
- Inst.addOperand(MCOperand::createImm((Value >> Shift) & 0xffff));
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (CE) {
+ uint64_t Value = CE->getValue();
+ Inst.addOperand(MCOperand::createImm((Value >> Shift) & 0xffff));
+ } else {
+ addExpr(Inst, getImm());
+ }
}
template<int Shift>
@@ -2243,10 +2254,16 @@ static unsigned matchSVEPredicateVectorRegName(StringRef Name) {
bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
+ return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
+}
+
+OperandMatchResultTy AArch64AsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
StartLoc = getLoc();
auto Res = tryParseScalarRegister(RegNo);
EndLoc = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- return Res != MatchOperand_Success;
+ return Res;
}
// Matches a register name or register alias previously defined by '.req'
@@ -2404,9 +2421,9 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Parser.Lex(); // Eat identifier token.
Operands.push_back(AArch64Operand::CreatePrefetch(
*PRFM, Tok.getString(), S, getContext()));
+ Parser.Lex(); // Eat identifier token.
return MatchOperand_Success;
}
@@ -2427,9 +2444,9 @@ AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Parser.Lex(); // Eat identifier token.
Operands.push_back(AArch64Operand::CreatePSBHint(
PSB->Encoding, Tok.getString(), S, getContext()));
+ Parser.Lex(); // Eat identifier token.
return MatchOperand_Success;
}
@@ -2450,9 +2467,9 @@ AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Parser.Lex(); // Eat identifier token.
Operands.push_back(AArch64Operand::CreateBTIHint(
BTI->Encoding, Tok.getString(), S, getContext()));
+ Parser.Lex(); // Eat identifier token.
return MatchOperand_Success;
}
@@ -2827,6 +2844,7 @@ static const struct Extension {
{"tlb-rmi", {AArch64::FeatureTLB_RMI}},
{"pan-rwv", {AArch64::FeaturePAN_RWV}},
{"ccpp", {AArch64::FeatureCCPP}},
+ {"rcpc", {AArch64::FeatureRCPC}},
{"sve", {AArch64::FeatureSVE}},
{"sve2", {AArch64::FeatureSVE2}},
{"sve2-aes", {AArch64::FeatureSVE2AES}},
@@ -2851,6 +2869,8 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv8.4a";
else if (FBS[AArch64::HasV8_5aOps])
Str += "ARMv8.5a";
+ else if (FBS[AArch64::HasV8_6aOps])
+ Str += "ARMv8.6a";
else {
auto ext = std::find_if(std::begin(ExtensionMap),
std::end(ExtensionMap),
@@ -3771,7 +3791,7 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
// First check for the AArch64-specific .req directive.
if (Parser.getTok().is(AsmToken::Identifier) &&
- Parser.getTok().getIdentifier() == ".req") {
+ Parser.getTok().getIdentifier().lower() == ".req") {
parseDirectiveReq(Name, NameLoc);
// We always return 'error' for this, as we're done with this
// statement and don't need to match the 'instruction."
@@ -4106,6 +4126,16 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
"unpredictable STXP instruction, status is also a source");
break;
}
+ case AArch64::LDRABwriteback:
+ case AArch64::LDRAAwriteback: {
+ unsigned Xt = Inst.getOperand(0).getReg();
+ unsigned Xn = Inst.getOperand(1).getReg();
+ if (Xt == Xn)
+ return Error(Loc[0],
+ "unpredictable LDRA instruction, writeback base"
+ " is also a destination");
+ break;
+ }
}
@@ -4235,6 +4265,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "index must be a multiple of 4 in range [-32, 28].");
case Match_InvalidMemoryIndexed16SImm4:
return Error(Loc, "index must be a multiple of 16 in range [-128, 112].");
+ case Match_InvalidMemoryIndexed32SImm4:
+ return Error(Loc, "index must be a multiple of 32 in range [-256, 224].");
case Match_InvalidMemoryIndexed1SImm6:
return Error(Loc, "index must be an integer in range [-32, 31].");
case Match_InvalidMemoryIndexedSImm8:
@@ -4824,7 +4856,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, getSTI());
+ Out.emitInstruction(Inst, getSTI());
return false;
}
case Match_MissingFeature: {
@@ -4894,6 +4926,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidMemoryIndexed4SImm4:
case Match_InvalidMemoryIndexed1SImm6:
case Match_InvalidMemoryIndexed16SImm4:
+ case Match_InvalidMemoryIndexed32SImm4:
case Match_InvalidMemoryIndexed4SImm7:
case Match_InvalidMemoryIndexed8SImm7:
case Match_InvalidMemoryIndexed16SImm7:
@@ -5024,7 +5057,7 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
getContext().getObjectFileInfo()->getObjectFileType();
bool IsMachO = Format == MCObjectFileInfo::IsMachO;
- StringRef IDVal = DirectiveID.getIdentifier();
+ auto IDVal = DirectiveID.getIdentifier().lower();
SMLoc Loc = DirectiveID.getLoc();
if (IDVal == ".arch")
parseDirectiveArch(Loc);
@@ -5076,6 +5109,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
break;
case AArch64::ArchKind::ARMV8_4A:
case AArch64::ArchKind::ARMV8_5A:
+ case AArch64::ArchKind::ARMV8_6A:
RequestedExtensions.push_back("sm4");
RequestedExtensions.push_back("sha3");
RequestedExtensions.push_back("sha2");
@@ -5095,6 +5129,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
break;
case AArch64::ArchKind::ARMV8_4A:
case AArch64::ArchKind::ARMV8_5A:
+ case AArch64::ArchKind::ARMV8_6A:
RequestedExtensions.push_back("nosm4");
RequestedExtensions.push_back("nosha3");
RequestedExtensions.push_back("nosha2");
@@ -5314,7 +5349,7 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
Inst.setOpcode(AArch64::TLSDESCCALL);
Inst.addOperand(MCOperand::createExpr(Expr));
- getParser().getStreamer().EmitInstruction(Inst, getSTI());
+ getParser().getStreamer().emitInstruction(Inst, getSTI());
return false;
}
@@ -5365,7 +5400,7 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
"unexpected token in '" + Twine(IDVal) + "' directive"))
return true;
- getStreamer().EmitLOHDirective((MCLOHType)Kind, Args);
+ getStreamer().emitLOHDirective((MCLOHType)Kind, Args);
return false;
}
@@ -5458,7 +5493,7 @@ bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
bool AArch64AsmParser::parseDirectiveCFINegateRAState() {
if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
return true;
- getStreamer().EmitCFINegateRAState();
+ getStreamer().emitCFINegateRAState();
return false;
}
@@ -5468,7 +5503,7 @@ bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() {
if (parseToken(AsmToken::EndOfStatement,
"unexpected token in '.cfi_b_key_frame'"))
return true;
- getStreamer().EmitCFIBKeyFrame();
+ getStreamer().emitCFIBKeyFrame();
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index d6db88603429..1ff4abb34054 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -146,6 +146,9 @@ static DecodeStatus DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn,
static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeAuthLoadInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeAddSubERegInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
@@ -1501,6 +1504,39 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
return Success;
}
+static DecodeStatus DecodeAuthLoadInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+ uint64_t offset = fieldFromInstruction(insn, 22, 1) << 9 |
+ fieldFromInstruction(insn, 12, 9);
+ unsigned writeback = fieldFromInstruction(insn, 11, 1);
+
+ switch (Inst.getOpcode()) {
+ default:
+ return Fail;
+ case AArch64::LDRAAwriteback:
+ case AArch64::LDRABwriteback:
+ DecodeGPR64spRegisterClass(Inst, Rn /* writeback register */, Addr,
+ Decoder);
+ break;
+ case AArch64::LDRAAindexed:
+ case AArch64::LDRABindexed:
+ break;
+ }
+
+ DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+ DecodeSImm<10>(Inst, offset, Addr, Decoder);
+
+ if (writeback && Rt == Rn && Rn != 31) {
+ return SoftFail;
+ }
+
+ return Success;
+}
+
static DecodeStatus DecodeAddSubERegInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
const void *Decoder) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 76ff238234d9..11a8d5def429 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -62,10 +62,9 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
auto &MFI = MIRBuilder.getMF().getFrameInfo();
int FI = MFI.CreateFixedObject(Size, Offset, true);
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
- Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
- MIRBuilder.buildFrameIndex(AddrReg, FI);
+ auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
StackUsed = std::max(StackUsed, Size + Offset);
- return AddrReg;
+ return AddrReg.getReg(0);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
@@ -87,10 +86,10 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
- // FIXME: Get alignment
- auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+ MachineFunction &MF = MIRBuilder.getMF();
+ auto MMO = MF.getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
- 1);
+ inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
}
@@ -134,7 +133,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
int FPDiff = 0)
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
- StackSize(0) {}
+ StackSize(0), SPReg(0) {}
bool isIncomingArgumentHandler() const override { return false; }
@@ -147,23 +146,20 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
if (IsTailCall) {
Offset += FPDiff;
int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
- Register FIReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildFrameIndex(FIReg, FI);
+ auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
MPO = MachinePointerInfo::getFixedStack(MF, FI);
- return FIReg;
+ return FIReg.getReg(0);
}
- Register SPReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(SPReg, Register(AArch64::SP));
+ if (!SPReg)
+ SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
- Register OffsetReg = MRI.createGenericVirtualRegister(s64);
- MIRBuilder.buildConstant(OffsetReg, Offset);
+ auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
- Register AddrReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildPtrAdd(AddrReg, SPReg, OffsetReg);
+ auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
MPO = MachinePointerInfo::getStack(MF, Offset);
- return AddrReg;
+ return AddrReg.getReg(0);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
@@ -175,17 +171,33 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
- if (VA.getLocInfo() == CCValAssign::LocInfo::AExt) {
- Size = VA.getLocVT().getSizeInBits() / 8;
- ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(Size * 8), ValVReg)
- ->getOperand(0)
- .getReg();
- }
- auto MMO = MIRBuilder.getMF().getMachineMemOperand(
- MPO, MachineMemOperand::MOStore, Size, 1);
+ MachineFunction &MF = MIRBuilder.getMF();
+ auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size,
+ inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildStore(ValVReg, Addr, *MMO);
}
+ void assignValueToAddress(const CallLowering::ArgInfo &Arg, Register Addr,
+ uint64_t Size, MachinePointerInfo &MPO,
+ CCValAssign &VA) override {
+ unsigned MaxSize = Size * 8;
+ // For varargs, we always want to extend them to 8 bytes, in which case
+ // we disable setting a max.
+ if (!Arg.IsFixed)
+ MaxSize = 0;
+
+ Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
+ ? extendRegister(Arg.Regs[0], VA, MaxSize)
+ : Arg.Regs[0];
+
+ // If we extended we might need to adjust the MMO's Size.
+ const LLT RegTy = MRI.getType(ValVReg);
+ if (RegTy.getSizeInBytes() > Size)
+ Size = RegTy.getSizeInBytes();
+
+ assignValueToAddress(ValVReg, Addr, Size, MPO, VA);
+ }
+
bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
const CallLowering::ArgInfo &Info,
@@ -209,6 +221,9 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
/// callee's. Unused elsewhere.
int FPDiff;
uint64_t StackSize;
+
+ // Cache the SP register vreg if we need it more than once in this call site.
+ Register SPReg;
};
} // namespace
@@ -222,13 +237,13 @@ void AArch64CallLowering::splitToValueTypes(
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
LLVMContext &Ctx = OrigArg.Ty->getContext();
- if (OrigArg.Ty->isVoidTy())
- return;
-
SmallVector<EVT, 4> SplitVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+ if (SplitVTs.size() == 0)
+ return;
+
if (SplitVTs.size() == 1) {
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
@@ -322,8 +337,7 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
}
auto Undef = MIRBuilder.buildUndef({OldLLT});
CurVReg =
- MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef.getReg(0)})
- .getReg(0);
+ MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef}).getReg(0);
} else {
// Just do a vector extend.
CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
@@ -413,6 +427,14 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
}
}
+bool AArch64CallLowering::fallBackToDAGISel(const Function &F) const {
+ if (isa<ScalableVectorType>(F.getReturnType()))
+ return true;
+ return llvm::any_of(F.args(), [](const Argument &A) {
+ return isa<ScalableVectorType>(A.getType());
+ });
+}
+
bool AArch64CallLowering::lowerFormalArguments(
MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const {
@@ -424,7 +446,7 @@ bool AArch64CallLowering::lowerFormalArguments(
SmallVector<ArgInfo, 8> SplitArgs;
unsigned i = 0;
for (auto &Arg : F.args()) {
- if (DL.getTypeStoreSize(Arg.getType()) == 0)
+ if (DL.getTypeStoreSize(Arg.getType()).isZero())
continue;
ArgInfo OrigArg{VRegs[i], Arg.getType()};
@@ -759,17 +781,17 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization(
return true;
}
-static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect,
+static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
bool IsTailCall) {
if (!IsTailCall)
- return IsIndirect ? AArch64::BLR : AArch64::BL;
+ return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
if (!IsIndirect)
return AArch64::TCRETURNdi;
// When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
// x16 or x17.
- if (CallerF.hasFnAttribute("branch-target-enforcement"))
+ if (CallerF.getFunction().hasFnAttribute("branch-target-enforcement"))
return AArch64::TCRETURNriBTI;
return AArch64::TCRETURNri;
@@ -805,7 +827,7 @@ bool AArch64CallLowering::lowerTailCall(
if (!IsSibCall)
CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
- unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true);
+ unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
MIB.add(Info.Callee);
@@ -863,7 +885,6 @@ bool AArch64CallLowering::lowerTailCall(
const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
// Do the actual argument marshalling.
- SmallVector<unsigned, 8> PhysRegs;
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
AssignFnVarArg, true, FPDiff);
if (!handleAssignments(MIRBuilder, OutArgs, Handler))
@@ -965,7 +986,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
- unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), false);
+ unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
MIB.add(Info.Callee);
@@ -981,7 +1002,6 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
TRI->emitReservedArgRegCallError(MF);
// Do the actual argument marshalling.
- SmallVector<unsigned, 8> PhysRegs;
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
AssignFnVarArg, false);
if (!handleAssignments(MIRBuilder, OutArgs, Handler))
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
index b0c601c7062c..640a86253059 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
@@ -37,6 +37,8 @@ public:
ArrayRef<Register> VRegs,
Register SwiftErrorVReg) const override;
+ bool fallBackToDAGISel(const Function &F) const override;
+
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index b9ac2657e1c5..408f0cb77e73 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -31,6 +31,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/Support/Debug.h"
@@ -63,6 +65,9 @@ public:
// cache it here for each run of the selector.
ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
+ MFReturnAddr = Register();
+
+ processPHIs(MF);
}
private:
@@ -71,23 +76,33 @@ private:
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
// A lowering phase that runs before any selection attempts.
-
- void preISelLower(MachineInstr &I) const;
+ // Returns true if the instruction was modified.
+ bool preISelLower(MachineInstr &I);
// An early selection function that runs before the selectImpl() call.
bool earlySelect(MachineInstr &I) const;
+ // Do some preprocessing of G_PHIs before we begin selection.
+ void processPHIs(MachineFunction &MF);
+
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
- void contractCrossBankCopyIntoStore(MachineInstr &I,
- MachineRegisterInfo &MRI) const;
+ bool contractCrossBankCopyIntoStore(MachineInstr &I,
+ MachineRegisterInfo &MRI);
+
+ bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
+ bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
+ int64_t CmpConstant,
+ const CmpInst::Predicate &Pred,
+ MachineBasicBlock *DstMBB,
+ MachineIRBuilder &MIB) const;
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
@@ -112,6 +127,8 @@ private:
const RegisterBank &RB,
MachineIRBuilder &MIRBuilder) const;
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
+ MachineRegisterInfo &MRI) const;
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
@@ -123,7 +140,7 @@ private:
MachineRegisterInfo &MRI) const;
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineRegisterInfo &MRI) const;
- bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
@@ -131,17 +148,25 @@ private:
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
- unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
- MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
+ unsigned emitConstantPoolEntry(const Constant *CPVal,
+ MachineFunction &MF) const;
+ MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
MachineIRBuilder &MIRBuilder) const;
// Emit a vector concat operation.
MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
Register Op2,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
- MachineOperand &Predicate,
- MachineIRBuilder &MIRBuilder) const;
+
+ // Emit an integer compare between LHS and RHS, which checks for Predicate.
+ //
+ // This returns the produced compare instruction, and the predicate which
+ // was ultimately used in the compare. The predicate may differ from what
+ // is passed in \p Predicate due to optimization.
+ std::pair<MachineInstr *, CmpInst::Predicate>
+ emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
+ MachineOperand &Predicate,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
@@ -163,6 +188,13 @@ private:
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
MachineIRBuilder &MIRBuilder) const;
+ /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
+ /// \p IsNegative is true if the test should be "not zero".
+ /// This will also optimize the test bit instruction when possible.
+ MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
+ MachineBasicBlock *DstMBB,
+ MachineIRBuilder &MIB) const;
+
// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
// We use these manually instead of using the importer since it doesn't
// support SDNodeXForm.
@@ -194,6 +226,11 @@ private:
return selectAddrModeUnscaled(Root, 16);
}
+ /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
+ /// from complex pattern matchers like selectAddrModeIndexed().
+ ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
+ MachineRegisterInfo &MRI) const;
+
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
unsigned Size) const;
template <int Width>
@@ -258,6 +295,8 @@ private:
/// new copy.
Register narrowExtendRegIfNeeded(Register ExtReg,
MachineIRBuilder &MIB) const;
+ Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
+ MachineIRBuilder &MIB) const;
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
@@ -272,12 +311,17 @@ private:
unsigned OpFlags) const;
// Optimization methods.
- bool tryOptVectorShuffle(MachineInstr &I) const;
- bool tryOptVectorDup(MachineInstr &MI) const;
bool tryOptSelect(MachineInstr &MI) const;
MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
+ MachineOperand &RHS,
+ CmpInst::Predicate &Predicate,
+ MachineIRBuilder &MIB) const;
+ MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIB) const;
/// Return true if \p MI is a load or store of \p NumBytes bytes.
bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
@@ -295,6 +339,11 @@ private:
bool ProduceNonFlagSettingCondBr = false;
+ // Some cached values used during selection.
+ // We use LR as a live-in register, and we keep track of it here as it can be
+ // clobbered by calls.
+ Register MFReturnAddr;
+
#define GET_GLOBALISEL_PREDICATES_DECL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_DECL
@@ -421,6 +470,39 @@ static bool getSubRegForClass(const TargetRegisterClass *RC,
return true;
}
+/// Returns the minimum size the given register bank can hold.
+static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
+ switch (RB.getID()) {
+ case AArch64::GPRRegBankID:
+ return 32;
+ case AArch64::FPRRegBankID:
+ return 8;
+ default:
+ llvm_unreachable("Tried to get minimum size for unknown register bank.");
+ }
+}
+
+static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
+ auto &MI = *Root.getParent();
+ auto &MBB = *MI.getParent();
+ auto &MF = *MBB.getParent();
+ auto &MRI = MF.getRegInfo();
+ uint64_t Immed;
+ if (Root.isImm())
+ Immed = Root.getImm();
+ else if (Root.isCImm())
+ Immed = Root.getCImm()->getZExtValue();
+ else if (Root.isReg()) {
+ auto ValAndVReg =
+ getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
+ if (!ValAndVReg)
+ return None;
+ Immed = ValAndVReg->Value;
+ } else
+ return None;
+ return Immed;
+}
+
/// Check whether \p I is a currently unsupported binary operation:
/// - it has an unsized type
/// - an operand is not a vreg
@@ -609,23 +691,20 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
}
#endif
-/// Helper function for selectCopy. Inserts a subregister copy from
-/// \p *From to \p *To, linking it up to \p I.
-///
-/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
+/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
+/// to \p *To.
///
-/// CopyReg (From class) = COPY SrcReg
-/// SubRegCopy (To class) = COPY CopyReg:SubReg
-/// Dst = COPY SubRegCopy
-static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
- const RegisterBankInfo &RBI, Register SrcReg,
- const TargetRegisterClass *From,
- const TargetRegisterClass *To,
- unsigned SubReg) {
+/// E.g "To = COPY SrcReg:SubReg"
+static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
+ const RegisterBankInfo &RBI, Register SrcReg,
+ const TargetRegisterClass *To, unsigned SubReg) {
+ assert(SrcReg.isValid() && "Expected a valid source register?");
+ assert(To && "Destination register class cannot be null");
+ assert(SubReg && "Expected a valid subregister");
+
MachineIRBuilder MIB(I);
- auto Copy = MIB.buildCopy({From}, {SrcReg});
- auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
- .addReg(Copy.getReg(0), 0, SubReg);
+ auto SubRegCopy =
+ MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(SubRegCopy.getReg(0));
@@ -670,7 +749,6 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
-
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
@@ -703,13 +781,15 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
(!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
!Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
"No phys reg on generic operator!");
- assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
- (void)KnownValid;
- return true;
+ bool ValidCopy = true;
+#ifndef NDEBUG
+ ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
+ assert(ValidCopy && "Invalid copy.");
+#endif
+ return ValidCopy;
};
- // Is this a copy? If so, then we may need to insert a subregister copy, or
- // a SUBREG_TO_REG.
+ // Is this a copy? If so, then we may need to insert a subregister copy.
if (I.isCopy()) {
// Yes. Check if there's anything to fix up.
if (!SrcRC) {
@@ -719,48 +799,43 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
+ unsigned SubReg;
- // If we're doing a cross-bank copy on different-sized registers, we need
- // to do a bit more work.
- if (SrcSize > DstSize) {
- // We're doing a cross-bank copy into a smaller register. We need a
- // subregister copy. First, get a register class that's on the same bank
- // as the destination, but the same size as the source.
- const TargetRegisterClass *SubregRC =
- getMinClassForRegBank(DstRegBank, SrcSize, true);
- assert(SubregRC && "Didn't get a register class for subreg?");
-
- // Get the appropriate subregister for the destination.
- unsigned SubReg = 0;
- if (!getSubRegForClass(DstRC, TRI, SubReg)) {
- LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
- return false;
- }
-
- // Now, insert a subregister copy using the new register class.
- selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
- return CheckCopy();
- }
+ // If the source bank doesn't support a subregister copy small enough,
+ // then we first need to copy to the destination bank.
+ if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
+ const TargetRegisterClass *DstTempRC =
+ getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
+ getSubRegForClass(DstRC, TRI, SubReg);
- // Is this a cross-bank copy?
- if (DstRegBank.getID() != SrcRegBank.getID()) {
- if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
- SrcSize == 16) {
- // Special case for FPR16 to GPR32.
- // FIXME: This can probably be generalized like the above case.
- Register PromoteReg =
- MRI.createVirtualRegister(&AArch64::FPR32RegClass);
- BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
- .addImm(0)
- .addUse(SrcReg)
- .addImm(AArch64::hsub);
- MachineOperand &RegOp = I.getOperand(1);
- RegOp.setReg(PromoteReg);
+ MachineIRBuilder MIB(I);
+ auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
+ copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
+ } else if (SrcSize > DstSize) {
+ // If the source register is bigger than the destination we need to
+ // perform a subregister copy.
+ const TargetRegisterClass *SubRegRC =
+ getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
+ getSubRegForClass(SubRegRC, TRI, SubReg);
+ copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
+ } else if (DstSize > SrcSize) {
+ // If the destination register is bigger than the source we need to do
+ // a promotion using SUBREG_TO_REG.
+ const TargetRegisterClass *PromotionRC =
+ getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
+ getSubRegForClass(SrcRC, TRI, SubReg);
+
+ Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
+ .addImm(0)
+ .addUse(SrcReg)
+ .addImm(SubReg);
+ MachineOperand &RegOp = I.getOperand(1);
+ RegOp.setReg(PromoteReg);
- // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
- KnownValid = true;
- }
+ // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
+ KnownValid = true;
}
// If the destination is a physical register, then there's nothing to
@@ -977,6 +1052,216 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
}
}
+/// Return a register which can be used as a bit to test in a TB(N)Z.
+static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
+ MachineRegisterInfo &MRI) {
+ assert(Reg.isValid() && "Expected valid register!");
+ while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
+ unsigned Opc = MI->getOpcode();
+
+ if (!MI->getOperand(0).isReg() ||
+ !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
+ break;
+
+ // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
+ //
+ // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
+ // on the truncated x is the same as the bit number on x.
+ if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
+ Opc == TargetOpcode::G_TRUNC) {
+ Register NextReg = MI->getOperand(1).getReg();
+ // Did we find something worth folding?
+ if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
+ break;
+
+ // NextReg is worth folding. Keep looking.
+ Reg = NextReg;
+ continue;
+ }
+
+ // Attempt to find a suitable operation with a constant on one side.
+ Optional<uint64_t> C;
+ Register TestReg;
+ switch (Opc) {
+ default:
+ break;
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_XOR: {
+ TestReg = MI->getOperand(1).getReg();
+ Register ConstantReg = MI->getOperand(2).getReg();
+ auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ if (!VRegAndVal) {
+ // AND commutes, check the other side for a constant.
+ // FIXME: Can we canonicalize the constant so that it's always on the
+ // same side at some point earlier?
+ std::swap(ConstantReg, TestReg);
+ VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ }
+ if (VRegAndVal)
+ C = VRegAndVal->Value;
+ break;
+ }
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_SHL: {
+ TestReg = MI->getOperand(1).getReg();
+ auto VRegAndVal =
+ getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ if (VRegAndVal)
+ C = VRegAndVal->Value;
+ break;
+ }
+ }
+
+ // Didn't find a constant or viable register. Bail out of the loop.
+ if (!C || !TestReg.isValid())
+ break;
+
+ // We found a suitable instruction with a constant. Check to see if we can
+ // walk through the instruction.
+ Register NextReg;
+ unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
+ switch (Opc) {
+ default:
+ break;
+ case TargetOpcode::G_AND:
+ // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
+ if ((*C >> Bit) & 1)
+ NextReg = TestReg;
+ break;
+ case TargetOpcode::G_SHL:
+ // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
+ // the type of the register.
+ if (*C <= Bit && (Bit - *C) < TestRegSize) {
+ NextReg = TestReg;
+ Bit = Bit - *C;
+ }
+ break;
+ case TargetOpcode::G_ASHR:
+ // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
+ // in x
+ NextReg = TestReg;
+ Bit = Bit + *C;
+ if (Bit >= TestRegSize)
+ Bit = TestRegSize - 1;
+ break;
+ case TargetOpcode::G_LSHR:
+ // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
+ if ((Bit + *C) < TestRegSize) {
+ NextReg = TestReg;
+ Bit = Bit + *C;
+ }
+ break;
+ case TargetOpcode::G_XOR:
+ // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
+ // appropriate.
+ //
+ // e.g. If x' = xor x, c, and the b-th bit is set in c then
+ //
+ // tbz x', b -> tbnz x, b
+ //
+ // Because x' only has the b-th bit set if x does not.
+ if ((*C >> Bit) & 1)
+ Invert = !Invert;
+ NextReg = TestReg;
+ break;
+ }
+
+ // Check if we found anything worth folding.
+ if (!NextReg.isValid())
+ return Reg;
+ Reg = NextReg;
+ }
+
+ return Reg;
+}
+
+MachineInstr *AArch64InstructionSelector::emitTestBit(
+ Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
+ MachineIRBuilder &MIB) const {
+ assert(TestReg.isValid());
+ assert(ProduceNonFlagSettingCondBr &&
+ "Cannot emit TB(N)Z with speculation tracking!");
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ // Attempt to optimize the test bit by walking over instructions.
+ TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
+ LLT Ty = MRI.getType(TestReg);
+ unsigned Size = Ty.getSizeInBits();
+ assert(!Ty.isVector() && "Expected a scalar!");
+ assert(Bit < 64 && "Bit is too large!");
+
+ // When the test register is a 64-bit register, we have to narrow to make
+ // TBNZW work.
+ bool UseWReg = Bit < 32;
+ unsigned NecessarySize = UseWReg ? 32 : 64;
+ if (Size < NecessarySize)
+ TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
+ else if (Size > NecessarySize)
+ TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
+
+ static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
+ {AArch64::TBZW, AArch64::TBNZW}};
+ unsigned Opc = OpcTable[UseWReg][IsNegative];
+ auto TestBitMI =
+ MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
+ constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
+ return &*TestBitMI;
+}
+
+bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
+ MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
+ MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
+ // Given something like this:
+ //
+ // %x = ...Something...
+ // %one = G_CONSTANT i64 1
+ // %zero = G_CONSTANT i64 0
+ // %and = G_AND %x, %one
+ // %cmp = G_ICMP intpred(ne), %and, %zero
+ // %cmp_trunc = G_TRUNC %cmp
+ // G_BRCOND %cmp_trunc, %bb.3
+ //
+ // We want to try and fold the AND into the G_BRCOND and produce either a
+ // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
+ //
+ // In this case, we'd get
+ //
+ // TBNZ %x %bb.3
+ //
+ if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
+ return false;
+
+ // Need to be comparing against 0 to fold.
+ if (CmpConstant != 0)
+ return false;
+
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
+ // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
+ // so folding would be redundant.
+ if (Pred != CmpInst::Predicate::ICMP_EQ &&
+ Pred != CmpInst::Predicate::ICMP_NE)
+ return false;
+
+ // Check if the AND has a constant on its RHS which we can use as a mask.
+ // If it's a power of 2, then it's the same as checking a specific bit.
+ // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
+ auto MaybeBit =
+ getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
+ if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
+ return false;
+
+ uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
+ Register TestReg = AndInst->getOperand(1).getReg();
+ bool Invert = Pred == CmpInst::Predicate::ICMP_NE;
+
+ // Emit a TB(N)Z.
+ emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
+ return true;
+}
+
bool AArch64InstructionSelector::selectCompareBranch(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
@@ -991,28 +1276,67 @@ bool AArch64InstructionSelector::selectCompareBranch(
Register LHS = CCMI->getOperand(2).getReg();
Register RHS = CCMI->getOperand(3).getReg();
auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
- if (!VRegAndVal)
+ MachineIRBuilder MIB(I);
+ CmpInst::Predicate Pred =
+ (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
+ MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
+
+ // When we can emit a TB(N)Z, prefer that.
+ //
+ // Handle non-commutative condition codes first.
+ // Note that we don't want to do this when we have a G_AND because it can
+ // become a tst. The tst will make the test bit in the TB(N)Z redundant.
+ if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) {
+ int64_t C = VRegAndVal->Value;
+
+ // When we have a greater-than comparison, we can just test if the msb is
+ // zero.
+ if (C == -1 && Pred == CmpInst::ICMP_SGT) {
+ uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
+ emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
+ I.eraseFromParent();
+ return true;
+ }
+
+ // When we have a less than comparison, we can just test if the msb is not
+ // zero.
+ if (C == 0 && Pred == CmpInst::ICMP_SLT) {
+ uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
+ emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
+ I.eraseFromParent();
+ return true;
+ }
+ }
+
+ if (!VRegAndVal) {
std::swap(RHS, LHS);
+ VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ LHSMI = getDefIgnoringCopies(LHS, MRI);
+ }
- VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
if (!VRegAndVal || VRegAndVal->Value != 0) {
- MachineIRBuilder MIB(I);
// If we can't select a CBZ then emit a cmp + Bcc.
- if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
- CCMI->getOperand(1), MIB))
+ MachineInstr *Cmp;
+ std::tie(Cmp, Pred) = emitIntegerCompare(
+ CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB);
+ if (!Cmp)
return false;
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
- (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
+ const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
I.eraseFromParent();
return true;
}
+ // Try to emit a TB(N)Z for an eq or ne condition.
+ if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
+ MIB)) {
+ I.eraseFromParent();
+ return true;
+ }
+
const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID)
return false;
-
- const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
return false;
@@ -1247,7 +1571,7 @@ void AArch64InstructionSelector::materializeLargeCMVal(
return;
}
-void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
+bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1267,10 +1591,10 @@ void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
const LLT ShiftTy = MRI.getType(ShiftReg);
const LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
- return;
+ return false;
assert(!ShiftTy.isVector() && "unexpected vector shift ty");
if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
- return;
+ return false;
auto *AmtMI = MRI.getVRegDef(ShiftReg);
assert(AmtMI && "could not find a vreg definition for shift amount");
if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
@@ -1281,14 +1605,65 @@ void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
I.getOperand(2).setReg(Trunc.getReg(0));
}
- return;
+ return true;
}
case TargetOpcode::G_STORE:
- contractCrossBankCopyIntoStore(I, MRI);
- return;
+ return contractCrossBankCopyIntoStore(I, MRI);
+ case TargetOpcode::G_PTR_ADD:
+ return convertPtrAddToAdd(I, MRI);
+ case TargetOpcode::G_LOAD: {
+ // For scalar loads of pointers, we try to convert the dest type from p0
+ // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
+ // conversion, this should be ok because all users should have been
+ // selected already, so the type doesn't matter for them.
+ Register DstReg = I.getOperand(0).getReg();
+ const LLT DstTy = MRI.getType(DstReg);
+ if (!DstTy.isPointer())
+ return false;
+ MRI.setType(DstReg, LLT::scalar(64));
+ return true;
+ }
default:
- return;
+ return false;
+ }
+}
+
+/// This lowering tries to look for G_PTR_ADD instructions and then converts
+/// them to a standard G_ADD with a COPY on the source.
+///
+/// The motivation behind this is to expose the add semantics to the imported
+/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
+/// because the selector works bottom up, uses before defs. By the time we
+/// end up trying to select a G_PTR_ADD, we should have already attempted to
+/// fold this into addressing modes and were therefore unsuccessful.
+bool AArch64InstructionSelector::convertPtrAddToAdd(
+ MachineInstr &I, MachineRegisterInfo &MRI) {
+ assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
+ Register DstReg = I.getOperand(0).getReg();
+ Register AddOp1Reg = I.getOperand(1).getReg();
+ const LLT PtrTy = MRI.getType(DstReg);
+ if (PtrTy.getAddressSpace() != 0)
+ return false;
+
+ MachineIRBuilder MIB(I);
+ const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
+ auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
+ // Set regbanks on the registers.
+ if (PtrTy.isVector())
+ MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
+ else
+ MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
+
+ // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
+ // %dst(intty) = G_ADD %intbase, off
+ I.setDesc(TII.get(TargetOpcode::G_ADD));
+ MRI.setType(DstReg, CastPtrTy);
+ I.getOperand(1).setReg(PtrToInt.getReg(0));
+ if (!select(*PtrToInt)) {
+ LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
+ return false;
}
+ return true;
}
bool AArch64InstructionSelector::earlySelectSHL(
@@ -1326,8 +1701,8 @@ bool AArch64InstructionSelector::earlySelectSHL(
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
}
-void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
+ MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
// If we're storing a scalar, it doesn't matter what register bank that
// scalar is on. All that matters is the size.
@@ -1343,10 +1718,9 @@ void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
// G_STORE %x:gpr(s32)
//
// And then continue the selection process normally.
- MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
- if (!Def)
- return;
- Register DefDstReg = Def->getOperand(0).getReg();
+ Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
+ if (!DefDstReg.isValid())
+ return false;
LLT DefDstTy = MRI.getType(DefDstReg);
Register StoreSrcReg = I.getOperand(0).getReg();
LLT StoreSrcTy = MRI.getType(StoreSrcReg);
@@ -1354,18 +1728,19 @@ void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
// If we get something strange like a physical register, then we shouldn't
// go any further.
if (!DefDstTy.isValid())
- return;
+ return false;
// Are the source and dst types the same size?
if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
- return;
+ return false;
if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
RBI.getRegBank(DefDstReg, MRI, TRI))
- return;
+ return false;
// We have a cross-bank copy, which is entering a store. Let's fold it.
I.getOperand(0).setReg(DefDstReg);
+ return true;
}
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
@@ -1391,16 +1766,15 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
Register DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
- if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32))
- return false;
-
- if (Ty == LLT::scalar(64)) {
+ if (Ty.getSizeInBits() == 64) {
I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
- } else {
+ } else if (Ty.getSizeInBits() == 32) {
I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
- }
+ } else
+ return false;
+
I.setDesc(TII.get(TargetOpcode::COPY));
return true;
}
@@ -1417,9 +1791,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ const AArch64Subtarget *Subtarget =
+ &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
+ if (Subtarget->requiresStrictAlign()) {
+ // We don't support this feature yet.
+ LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
+ return false;
+ }
+
unsigned Opcode = I.getOpcode();
// G_PHI requires same handling as PHI
- if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
+ if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
// Certain non-generic instructions also need some special handling.
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
@@ -1468,7 +1850,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Try to do some lowering before we start instruction selecting. These
// lowerings are purely transformations on the input G_MIR and so selection
// must continue after any modification of the instruction.
- preISelLower(I);
+ if (preISelLower(I)) {
+ Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
+ }
// There may be patterns where the importer can't deal with them optimally,
// but does select it to a suboptimal sequence so our custom C++ selection
@@ -1503,8 +1887,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
// instructions will not be produced, as they are conditional branch
// instructions that do not set flags.
- bool ProduceNonFlagSettingCondBr =
- !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
return true;
@@ -1540,6 +1922,31 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_BRJT:
return selectBrJT(I, MRI);
+ case AArch64::G_ADD_LOW: {
+ // This op may have been separated from it's ADRP companion by the localizer
+ // or some other code motion pass. Given that many CPUs will try to
+ // macro fuse these operations anyway, select this into a MOVaddr pseudo
+ // which will later be expanded into an ADRP+ADD pair after scheduling.
+ MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
+ if (BaseMI->getOpcode() != AArch64::ADRP) {
+ I.setDesc(TII.get(AArch64::ADDXri));
+ I.addOperand(MachineOperand::CreateImm(0));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+ assert(TM.getCodeModel() == CodeModel::Small &&
+ "Expected small code model");
+ MachineIRBuilder MIB(I);
+ auto Op1 = BaseMI->getOperand(1);
+ auto Op2 = I.getOperand(2);
+ auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
+ .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
+ Op1.getTargetFlags())
+ .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
+ Op2.getTargetFlags());
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
+ }
+
case TargetOpcode::G_BSWAP: {
// Handle vector types for G_BSWAP directly.
Register DstReg = I.getOperand(0).getReg();
@@ -1644,6 +2051,20 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (emitFMovForFConstant(I, MRI))
return true;
+ // For 64b values, emit a constant pool load instead.
+ if (DefSize == 64) {
+ auto *FPImm = I.getOperand(1).getFPImm();
+ MachineIRBuilder MIB(I);
+ auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
+ if (!LoadMI) {
+ LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
+ return false;
+ }
+ MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
+ }
+
// Nope. Emit a copy and use a normal mov instead.
const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
MachineOperand &RegOp = I.getOperand(0);
@@ -2005,9 +2426,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Add and set the set condition flag.
unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
MachineIRBuilder MIRBuilder(I);
- auto AddsMI = MIRBuilder.buildInstr(
- AddsOpc, {I.getOperand(0).getReg()},
- {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
+ auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)},
+ {I.getOperand(2), I.getOperand(3)});
constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
// Now, put the overflow result in the register given by the first operand
@@ -2023,14 +2443,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return true;
}
- case TargetOpcode::G_PTR_MASK: {
- uint64_t Align = I.getOperand(2).getImm();
- if (Align >= 64 || Align == 0)
+ case TargetOpcode::G_PTRMASK: {
+ Register MaskReg = I.getOperand(2).getReg();
+ Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI);
+ // TODO: Implement arbitrary cases
+ if (!MaskVal || !isShiftedMask_64(*MaskVal))
return false;
- uint64_t Mask = ~((1ULL << Align) - 1);
+ uint64_t Mask = *MaskVal;
I.setDesc(TII.get(AArch64::ANDXri));
- I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
+ I.getOperand(2).ChangeToImmediate(
+ AArch64_AM::encodeLogicalImmediate(Mask, 64));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
@@ -2101,6 +2524,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.eraseFromParent();
return true;
}
+
+ // We might have a vector G_PTRTOINT, in which case just emit a COPY.
+ if (Opcode == TargetOpcode::G_PTRTOINT) {
+ assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
+ I.setDesc(TII.get(TargetOpcode::COPY));
+ return true;
+ }
}
return false;
@@ -2151,16 +2581,22 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXT_INREG:
case TargetOpcode::G_SEXT: {
unsigned Opcode = I.getOpcode();
- const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
+ const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
const Register DefReg = I.getOperand(0).getReg();
- const Register SrcReg = I.getOperand(1).getReg();
+ Register SrcReg = I.getOperand(1).getReg();
const LLT DstTy = MRI.getType(DefReg);
const LLT SrcTy = MRI.getType(SrcReg);
unsigned DstSize = DstTy.getSizeInBits();
unsigned SrcSize = SrcTy.getSizeInBits();
+ // SEXT_INREG has the same src reg size as dst, the size of the value to be
+ // extended is encoded in the imm.
+ if (Opcode == TargetOpcode::G_SEXT_INREG)
+ SrcSize = I.getOperand(2).getImm();
+
if (DstTy.isVector())
return false; // Should be handled by imported patterns.
@@ -2179,31 +2615,65 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// %v2(s32) = G_ZEXT %v(s8)
if (!IsSigned) {
auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
- if (LoadMI &&
- RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
+ bool IsGPR =
+ RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
+ if (LoadMI && IsGPR) {
const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
unsigned BytesLoaded = MemOp->getSize();
if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
return selectCopy(I, TII, MRI, TRI, RBI);
}
- }
- if (DstSize == 64) {
- // FIXME: Can we avoid manually doing this?
- if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
- << " operand\n");
- return false;
- }
-
- auto SubregToReg =
- MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
+ // If we are zero extending from 32 bits to 64 bits, it's possible that
+ // the instruction implicitly does the zero extend for us. In that case,
+ // we can just emit a SUBREG_TO_REG.
+ if (IsGPR && SrcSize == 32 && DstSize == 64) {
+ // Unlike with the G_LOAD case, we don't want to look through copies
+ // here.
+ MachineInstr *Def = MRI.getVRegDef(SrcReg);
+ if (Def && isDef32(*Def)) {
+ MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
.addImm(0)
.addUse(SrcReg)
.addImm(AArch64::sub_32);
+ if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
+ MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
+ return false;
+ }
+
+ if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
+ MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
+ return false;
+ }
+
+ I.eraseFromParent();
+ return true;
+ }
+ }
+ }
+
+ if (DstSize == 64) {
+ if (Opcode != TargetOpcode::G_SEXT_INREG) {
+ // FIXME: Can we avoid manually doing this?
+ if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
+ MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
+ << " operand\n");
+ return false;
+ }
+ SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
+ {&AArch64::GPR64RegClass}, {})
+ .addImm(0)
+ .addUse(SrcReg)
+ .addImm(AArch64::sub_32)
+ .getReg(0);
+ }
+
ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
- {DefReg}, {SubregToReg})
+ {DefReg}, {SrcReg})
.addImm(0)
.addImm(SrcSize - 1);
} else if (DstSize <= 32) {
@@ -2236,6 +2706,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return true;
}
+ case TargetOpcode::G_FREEZE:
+ return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_INTTOPTR:
// The importer is currently unable to import pointer types since they
@@ -2294,11 +2766,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
MachineIRBuilder MIRBuilder(I);
- if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
- MIRBuilder))
+ MachineInstr *Cmp;
+ CmpInst::Predicate Pred;
+ std::tie(Cmp, Pred) = emitIntegerCompare(I.getOperand(2), I.getOperand(3),
+ I.getOperand(1), MIRBuilder);
+ if (!Cmp)
return false;
- emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
- MIRBuilder);
+ emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
I.eraseFromParent();
return true;
}
@@ -2435,14 +2909,13 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
- MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
- {JTAddr, Index})
- .addJumpTableIndex(JTI);
-
+ auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
+ {TargetReg, ScratchReg}, {JTAddr, Index})
+ .addJumpTableIndex(JTI);
// Build the indirect branch.
MIB.buildInstr(AArch64::BR, {}, {TargetReg});
I.eraseFromParent();
- return true;
+ return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
}
bool AArch64InstructionSelector::selectJumpTable(
@@ -2482,7 +2955,7 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
- MIB.buildInstr(AArch64::BLR, {}, {Load})
+ MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
.addDef(AArch64::X0, RegState::Implicit)
.addRegMask(TRI.getTLSCallPreservedMask());
@@ -3158,19 +3631,17 @@ bool AArch64InstructionSelector::selectConcatVectors(
}
unsigned
-AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
+AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const {
Type *CPTy = CPVal->getType();
- unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
- if (Align == 0)
- Align = MF.getDataLayout().getTypeAllocSize(CPTy);
+ Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
MachineConstantPool *MCP = MF.getConstantPool();
- return MCP->getConstantPoolIndex(CPVal, Align);
+ return MCP->getConstantPoolIndex(CPVal, Alignment);
}
MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
- Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
+ const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
auto Adrp =
@@ -3248,7 +3719,7 @@ AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
auto ImmFns = selectArithImmed(RHS);
unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
- auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
+ auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS});
// If we matched a valid constant immediate, add those operands.
if (ImmFns) {
@@ -3274,7 +3745,7 @@ AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
- auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
+ auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
// If we matched a valid constant immediate, add those operands.
if (ImmFns) {
@@ -3316,17 +3787,21 @@ AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
return &*TstMI;
}
-MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
+std::pair<MachineInstr *, CmpInst::Predicate>
+AArch64InstructionSelector::emitIntegerCompare(
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
+ assert(Predicate.isPredicate() && "Expected predicate?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+ CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
+
// Fold the compare if possible.
MachineInstr *FoldCmp =
tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
if (FoldCmp)
- return FoldCmp;
+ return {FoldCmp, P};
// Can't fold into a CMN. Just emit a normal compare.
unsigned CmpOpc = 0;
@@ -3337,31 +3812,31 @@ MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
"Expected scalar or pointer");
if (CmpTy == LLT::scalar(32)) {
CmpOpc = AArch64::SUBSWrr;
- ZReg = AArch64::WZR;
+ ZReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
} else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
CmpOpc = AArch64::SUBSXrr;
- ZReg = AArch64::XZR;
+ ZReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
} else {
- return nullptr;
+ return {nullptr, CmpInst::Predicate::BAD_ICMP_PREDICATE};
}
// Try to match immediate forms.
- auto ImmFns = selectArithImmed(RHS);
- if (ImmFns)
- CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
-
- auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
- // If we matched a valid constant immediate, add those operands.
- if (ImmFns) {
- for (auto &RenderFn : *ImmFns)
- RenderFn(CmpMI);
- } else {
- CmpMI.addUse(RHS.getReg());
- }
-
+ MachineInstr *ImmedCmp =
+ tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder);
+ if (ImmedCmp)
+ return {ImmedCmp, P};
+
+ // If we don't have an immediate, we may have a shift which can be folded
+ // into the compare.
+ MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
+ if (ShiftedCmp)
+ return {ShiftedCmp, P};
+
+ auto CmpMI =
+ MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
// Make sure that we can constrain the compare that we emitted.
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return &*CmpMI;
+ return {&*CmpMI, P};
}
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
@@ -3497,8 +3972,16 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
while (CondDef) {
// We can only fold if all of the defs have one use.
- if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
- return false;
+ Register CondDefReg = CondDef->getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(CondDefReg)) {
+ // Unless it's another select.
+ for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
+ if (CondDef == &UI)
+ continue;
+ if (UI.getOpcode() != TargetOpcode::G_SELECT)
+ return false;
+ }
+ }
// We can skip over G_TRUNC since the condition is 1-bit.
// Truncating/extending can have no impact on the value.
@@ -3524,13 +4007,21 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
AArch64CC::CondCode CondCode;
if (CondOpc == TargetOpcode::G_ICMP) {
- CondCode = changeICMPPredToAArch64CC(
- (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
- if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
- CondDef->getOperand(1), MIB)) {
+ MachineInstr *Cmp;
+ CmpInst::Predicate Pred;
+
+ std::tie(Cmp, Pred) =
+ emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
+ CondDef->getOperand(1), MIB);
+
+ if (!Cmp) {
LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
return false;
}
+
+ // Have to collect the CondCode after emitIntegerCompare, since it can
+ // update the predicate.
+ CondCode = changeICMPPredToAArch64CC(Pred);
} else {
// Get the condition code for the select.
AArch64CC::CondCode CondCode2;
@@ -3660,119 +4151,150 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
return nullptr;
}
-bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
- // Try to match a vector splat operation into a dup instruction.
- // We're looking for this pattern:
- // %scalar:gpr(s64) = COPY $x0
- // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
- // %cst0:gpr(s32) = G_CONSTANT i32 0
- // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
- // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
- // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
- // %zerovec(<2 x s32>)
- //
- // ...into:
- // %splat = DUP %scalar
- // We use the regbank of the scalar to determine which kind of dup to use.
- MachineIRBuilder MIB(I);
+MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
+ MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P,
+ MachineIRBuilder &MIB) const {
+ // Attempt to select the immediate form of an integer compare.
MachineRegisterInfo &MRI = *MIB.getMRI();
- const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
- using namespace TargetOpcode;
- using namespace MIPatternMatch;
-
- // Begin matching the insert.
- auto *InsMI =
- getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
- if (!InsMI)
- return false;
- // Match the undef vector operand.
- auto *UndefMI =
- getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
- if (!UndefMI)
- return false;
- // Match the scalar being splatted.
- Register ScalarReg = InsMI->getOperand(2).getReg();
- const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
- // Match the index constant 0.
- int64_t Index = 0;
- if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
- return false;
-
- // The shuffle's second operand doesn't matter if the mask is all zero.
- ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
- if (!all_of(Mask, [](int Elem) { return Elem == 0; }))
- return false;
+ auto Ty = MRI.getType(LHS.getReg());
+ assert(!Ty.isVector() && "Expected scalar or pointer only?");
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) &&
+ "Expected 32 bit or 64 bit compare only?");
+
+ // Check if this is a case we can already handle.
+ InstructionSelector::ComplexRendererFns ImmFns;
+ ImmFns = selectArithImmed(RHS);
+
+ if (!ImmFns) {
+ // We didn't get a rendering function, but we may still have a constant.
+ auto MaybeImmed = getImmedFromMO(RHS);
+ if (!MaybeImmed)
+ return nullptr;
- // We're done, now find out what kind of splat we need.
- LLT VecTy = MRI.getType(I.getOperand(0).getReg());
- LLT EltTy = VecTy.getElementType();
- if (EltTy.getSizeInBits() < 32) {
- LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet");
- return false;
- }
- bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
- unsigned Opc = 0;
- if (IsFP) {
- switch (EltTy.getSizeInBits()) {
- case 32:
- if (VecTy.getNumElements() == 2) {
- Opc = AArch64::DUPv2i32lane;
- } else {
- Opc = AArch64::DUPv4i32lane;
- assert(VecTy.getNumElements() == 4);
- }
+ // We have a constant, but it doesn't fit. Try adjusting it by one and
+ // updating the predicate if possible.
+ uint64_t C = *MaybeImmed;
+ CmpInst::Predicate NewP;
+ switch (P) {
+ default:
+ return nullptr;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SGE:
+ // Check for
+ //
+ // x slt c => x sle c - 1
+ // x sge c => x sgt c - 1
+ //
+ // When c is not the smallest possible negative number.
+ if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
+ (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
+ return nullptr;
+ NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
+ C -= 1;
break;
- case 64:
- assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
- Opc = AArch64::DUPv2i64lane;
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_UGE:
+ // Check for
+ //
+ // x ult c => x ule c - 1
+ // x uge c => x ugt c - 1
+ //
+ // When c is not zero.
+ if (C == 0)
+ return nullptr;
+ NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
+ C -= 1;
break;
- }
- } else {
- switch (EltTy.getSizeInBits()) {
- case 32:
- if (VecTy.getNumElements() == 2) {
- Opc = AArch64::DUPv2i32gpr;
- } else {
- Opc = AArch64::DUPv4i32gpr;
- assert(VecTy.getNumElements() == 4);
- }
+ case CmpInst::ICMP_SLE:
+ case CmpInst::ICMP_SGT:
+ // Check for
+ //
+ // x sle c => x slt c + 1
+ // x sgt c => s sge c + 1
+ //
+ // When c is not the largest possible signed integer.
+ if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
+ (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
+ return nullptr;
+ NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
+ C += 1;
break;
- case 64:
- assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
- Opc = AArch64::DUPv2i64gpr;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_UGT:
+ // Check for
+ //
+ // x ule c => x ult c + 1
+ // x ugt c => s uge c + 1
+ //
+ // When c is not the largest possible unsigned integer.
+ if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
+ (Size == 64 && C == UINT64_MAX))
+ return nullptr;
+ NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
+ C += 1;
break;
}
+
+ // Check if the new constant is valid.
+ if (Size == 32)
+ C = static_cast<uint32_t>(C);
+ ImmFns = select12BitValueWithLeftShift(C);
+ if (!ImmFns)
+ return nullptr;
+ P = NewP;
}
- assert(Opc && "Did not compute an opcode for a dup");
- // For FP splats, we need to widen the scalar reg via undef too.
- if (IsFP) {
- MachineInstr *Widen = emitScalarToVector(
- EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
- if (!Widen)
- return false;
- ScalarReg = Widen->getOperand(0).getReg();
+ // At this point, we know we can select an immediate form. Go ahead and do
+ // that.
+ Register ZReg;
+ unsigned Opc;
+ if (Size == 32) {
+ ZReg = AArch64::WZR;
+ Opc = AArch64::SUBSWri;
+ } else {
+ ZReg = AArch64::XZR;
+ Opc = AArch64::SUBSXri;
}
- auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
- if (IsFP)
- Dup.addImm(0);
- constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
- I.eraseFromParent();
- return true;
+
+ auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
+ for (auto &RenderFn : *ImmFns)
+ RenderFn(CmpMI);
+ constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
+ return &*CmpMI;
}
-bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
- if (TM.getOptLevel() == CodeGenOpt::None)
- return false;
- if (tryOptVectorDup(I))
- return true;
- return false;
+MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare(
+ MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIB) const {
+ // We are looking for the following pattern:
+ //
+ // shift = G_SHL/ASHR/LHSR y, c
+ // ...
+ // cmp = G_ICMP pred, something, shift
+ //
+ // Since we will select the G_ICMP to a SUBS, we can potentially fold the
+ // shift into the subtract.
+ static const unsigned OpcTable[2] = {AArch64::SUBSWrs, AArch64::SUBSXrs};
+ static const Register ZRegTable[2] = {AArch64::WZR, AArch64::XZR};
+ auto ImmFns = selectShiftedRegister(RHS);
+ if (!ImmFns)
+ return nullptr;
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ auto Ty = MRI.getType(LHS.getReg());
+ assert(!Ty.isVector() && "Expected scalar or pointer only?");
+ unsigned Size = Ty.getSizeInBits();
+ bool Idx = (Size == 64);
+ Register ZReg = ZRegTable[Idx];
+ unsigned Opc = OpcTable[Idx];
+ auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
+ for (auto &RenderFn : *ImmFns)
+ RenderFn(CmpMI);
+ constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
+ return &*CmpMI;
}
bool AArch64InstructionSelector::selectShuffleVector(
MachineInstr &I, MachineRegisterInfo &MRI) const {
- if (tryOptVectorShuffle(I))
- return true;
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Register Src1Reg = I.getOperand(1).getReg();
const LLT Src1Ty = MRI.getType(Src1Reg);
@@ -3852,9 +4374,8 @@ bool AArch64InstructionSelector::selectShuffleVector(
.addUse(Src2Reg)
.addImm(AArch64::qsub1);
- auto TBL2 =
- MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
- {RegSeq, IndexLoad->getOperand(0).getReg()});
+ auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
+ {RegSeq, IndexLoad->getOperand(0)});
constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
I.eraseFromParent();
@@ -3968,6 +4489,44 @@ bool AArch64InstructionSelector::selectInsertElt(
return true;
}
+bool AArch64InstructionSelector::tryOptConstantBuildVec(
+ MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!");
+ if (DstTy.getSizeInBits() < 32)
+ return false;
+ // Check if we're building a constant vector, in which case we want to
+ // generate a constant pool load instead of a vector insert sequence.
+ SmallVector<Constant *, 16> Csts;
+ for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
+ // Try to find G_CONSTANT or G_FCONSTANT
+ auto *OpMI =
+ getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
+ if (OpMI)
+ Csts.emplace_back(
+ const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
+ else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
+ I.getOperand(Idx).getReg(), MRI)))
+ Csts.emplace_back(
+ const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
+ else
+ return false;
+ }
+ Constant *CV = ConstantVector::get(Csts);
+ MachineIRBuilder MIB(I);
+ auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
+ if (!CPLoad) {
+ LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
+ return false;
+ }
+ MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
+ RBI.constrainGenericRegister(I.getOperand(0).getReg(),
+ *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
+ MRI);
+ I.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::selectBuildVector(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
@@ -3976,6 +4535,9 @@ bool AArch64InstructionSelector::selectBuildVector(
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
unsigned EltSize = EltTy.getSizeInBits();
+
+ if (tryOptConstantBuildVec(I, DstTy, MRI))
+ return true;
if (EltSize < 16 || EltSize > 64)
return false; // Don't support all element types yet.
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
@@ -4081,8 +4643,8 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
return true;
}
-bool AArch64InstructionSelector::selectIntrinsic(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
+bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
+ MachineRegisterInfo &MRI) {
unsigned IntrinID = findIntrinsicID(I);
if (!IntrinID)
return false;
@@ -4091,7 +4653,7 @@ bool AArch64InstructionSelector::selectIntrinsic(
switch (IntrinID) {
default:
break;
- case Intrinsic::aarch64_crypto_sha1h:
+ case Intrinsic::aarch64_crypto_sha1h: {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(2).getReg();
@@ -4130,28 +4692,59 @@ bool AArch64InstructionSelector::selectIntrinsic(
I.eraseFromParent();
return true;
}
- return false;
-}
+ case Intrinsic::frameaddress:
+ case Intrinsic::returnaddress: {
+ MachineFunction &MF = *I.getParent()->getParent();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
-static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
- auto &MI = *Root.getParent();
- auto &MBB = *MI.getParent();
- auto &MF = *MBB.getParent();
- auto &MRI = MF.getRegInfo();
- uint64_t Immed;
- if (Root.isImm())
- Immed = Root.getImm();
- else if (Root.isCImm())
- Immed = Root.getCImm()->getZExtValue();
- else if (Root.isReg()) {
- auto ValAndVReg =
- getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
- if (!ValAndVReg)
- return None;
- Immed = ValAndVReg->Value;
- } else
- return None;
- return Immed;
+ unsigned Depth = I.getOperand(2).getImm();
+ Register DstReg = I.getOperand(0).getReg();
+ RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
+
+ if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
+ if (MFReturnAddr) {
+ MIRBuilder.buildCopy({DstReg}, MFReturnAddr);
+ I.eraseFromParent();
+ return true;
+ }
+ MFI.setReturnAddressIsTaken(true);
+ MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass);
+ // Insert the copy from LR/X30 into the entry block, before it can be
+ // clobbered by anything.
+ MachineBasicBlock &EntryBlock = *MF.begin();
+ if (!EntryBlock.isLiveIn(AArch64::LR))
+ EntryBlock.addLiveIn(AArch64::LR);
+ MachineIRBuilder EntryBuilder(MF);
+ EntryBuilder.setInstr(*EntryBlock.begin());
+ EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ MFReturnAddr = DstReg;
+ I.eraseFromParent();
+ return true;
+ }
+
+ MFI.setFrameAddressIsTaken(true);
+ Register FrameAddr(AArch64::FP);
+ while (Depth--) {
+ Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
+ auto Ldr =
+ MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
+ .addImm(0);
+ constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
+ FrameAddr = NextFrame;
+ }
+
+ if (IntrinID == Intrinsic::frameaddress)
+ MIRBuilder.buildCopy({DstReg}, {FrameAddr});
+ else {
+ MFI.setReturnAddressIsTaken(true);
+ MIRBuilder.buildInstr(AArch64::LDRXui, {DstReg}, {FrameAddr}).addImm(1);
+ }
+
+ I.eraseFromParent();
+ return true;
+ }
+ }
+ return false;
}
InstructionSelector::ComplexRendererFns
@@ -4271,7 +4864,7 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
// Always fold if there is one use, or if we're optimizing for size.
Register DefReg = MI.getOperand(0).getReg();
- if (MRI.hasOneUse(DefReg) ||
+ if (MRI.hasOneNonDBGUse(DefReg) ||
MI.getParent()->getParent()->getFunction().hasMinSize())
return true;
@@ -4283,10 +4876,21 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
// We have a fastpath, so folding a shift in and potentially computing it
// many times may be beneficial. Check if this is only used in memory ops.
// If it is, then we should fold.
- return all_of(MRI.use_instructions(DefReg),
+ return all_of(MRI.use_nodbg_instructions(DefReg),
[](MachineInstr &Use) { return Use.mayLoadOrStore(); });
}
+static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
+ switch (Type) {
+ case AArch64_AM::SXTB:
+ case AArch64_AM::SXTH:
+ case AArch64_AM::SXTW:
+ return true;
+ default:
+ return false;
+ }
+}
+
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectExtendedSHL(
MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
@@ -4359,7 +4963,10 @@ AArch64InstructionSelector::selectExtendedSHL(
if (Ext == AArch64_AM::InvalidShiftExtend)
return None;
- SignExtend = Ext == AArch64_AM::SXTW;
+ SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
+ // We only support SXTW for signed extension here.
+ if (SignExtend && Ext != AArch64_AM::SXTW)
+ return None;
// Need a 32-bit wide register here.
MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
@@ -4441,7 +5048,7 @@ AArch64InstructionSelector::selectAddrModeRegisterOffset(
// If this is used more than once, let's not bother folding.
// TODO: Check if they are memory ops. If they are, then we can still fold
// without having to recompute anything.
- if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
+ if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
return None;
// Base is the GEP's LHS, offset is its RHS.
@@ -4595,14 +5202,46 @@ AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
return None;
}
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
+ unsigned Size,
+ MachineRegisterInfo &MRI) const {
+ if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
+ return None;
+ MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
+ if (Adrp.getOpcode() != AArch64::ADRP)
+ return None;
+
+ // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
+ // TODO: Need to check GV's offset % size if doing offset folding into globals.
+ assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
+ auto GV = Adrp.getOperand(1).getGlobal();
+ if (GV->isThreadLocal())
+ return None;
+
+ auto &MF = *RootDef.getParent()->getParent();
+ if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
+ return None;
+
+ unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
+ MachineIRBuilder MIRBuilder(RootDef);
+ Register AdrpReg = Adrp.getOperand(0).getReg();
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addGlobalAddress(GV, /* Offset */ 0,
+ OpFlags | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ }}};
+}
+
/// Select a "register plus scaled unsigned 12-bit immediate" address. The
/// "Size" argument is the size in bytes of the memory reference, which
/// determines the scale.
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
unsigned Size) const {
- MachineRegisterInfo &MRI =
- Root.getParent()->getParent()->getParent()->getRegInfo();
+ MachineFunction &MF = *Root.getParent()->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
if (!Root.isReg())
return None;
@@ -4618,6 +5257,14 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
}};
}
+ CodeModel::Model CM = MF.getTarget().getCodeModel();
+ // Check if we can fold in the ADD of small code model ADRP + ADD address.
+ if (CM == CodeModel::Small) {
+ auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
+ if (OpFns)
+ return OpFns;
+ }
+
if (isBaseWithConstantOffset(Root, MRI)) {
MachineOperand &LHS = RootDef->getOperand(1);
MachineOperand &RHS = RootDef->getOperand(2);
@@ -4717,7 +5364,11 @@ AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
// Handle explicit extend instructions first.
if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
- unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned Size;
+ if (Opc == TargetOpcode::G_SEXT)
+ Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ else
+ Size = MI.getOperand(2).getImm();
assert(Size != 64 && "Extend from 64 bits?");
switch (Size) {
case 8:
@@ -4782,6 +5433,52 @@ Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
return Copy.getReg(0);
}
+Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
+ Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const {
+ assert(WideSize >= 8 && "WideSize is smaller than all possible registers?");
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ unsigned NarrowSize = MRI.getType(Reg).getSizeInBits();
+ assert(WideSize >= NarrowSize &&
+ "WideSize cannot be smaller than NarrowSize!");
+
+ // If the sizes match, just return the register.
+ //
+ // If NarrowSize is an s1, then we can select it to any size, so we'll treat
+ // it as a don't care.
+ if (NarrowSize == WideSize || NarrowSize == 1)
+ return Reg;
+
+ // Now check the register classes.
+ const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI);
+ const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize);
+ const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize);
+ assert(OrigRC && "Could not determine narrow RC?");
+ assert(WideRC && "Could not determine wide RC?");
+
+ // If the sizes differ, but the register classes are the same, there is no
+ // need to insert a SUBREG_TO_REG.
+ //
+ // For example, an s8 that's supposed to be a GPR will be selected to either
+ // a GPR32 or a GPR64 register. Note that this assumes that the s8 will
+ // always end up on a GPR32.
+ if (OrigRC == WideRC)
+ return Reg;
+
+ // We have two different register classes. Insert a SUBREG_TO_REG.
+ unsigned SubReg = 0;
+ getSubRegForClass(OrigRC, TRI, SubReg);
+ assert(SubReg && "Couldn't determine subregister?");
+
+ // Build the SUBREG_TO_REG and return the new, widened register.
+ auto SubRegToReg =
+ MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
+ .addImm(0)
+ .addUse(Reg)
+ .addImm(SubReg);
+ constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
+ return SubRegToReg.getReg(0);
+}
+
/// Select an "extended register" operand. This operand folds in an extend
/// followed by an optional left shift.
InstructionSelector::ComplexRendererFns
@@ -4908,6 +5605,95 @@ bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
}
}
+
+// Perform fixups on the given PHI instruction's operands to force them all
+// to be the same as the destination regbank.
+static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
+ const AArch64RegisterBankInfo &RBI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
+ Register DstReg = MI.getOperand(0).getReg();
+ const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
+ assert(DstRB && "Expected PHI dst to have regbank assigned");
+ MachineIRBuilder MIB(MI);
+
+ // Go through each operand and ensure it has the same regbank.
+ for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ Register OpReg = MO.getReg();
+ const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
+ if (RB != DstRB) {
+ // Insert a cross-bank copy.
+ auto *OpDef = MRI.getVRegDef(OpReg);
+ const LLT &Ty = MRI.getType(OpReg);
+ MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
+ auto Copy = MIB.buildCopy(Ty, OpReg);
+ MRI.setRegBank(Copy.getReg(0), *DstRB);
+ MO.setReg(Copy.getReg(0));
+ }
+ }
+}
+
+void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
+ // We're looking for PHIs, build a list so we don't invalidate iterators.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SmallVector<MachineInstr *, 32> Phis;
+ for (auto &BB : MF) {
+ for (auto &MI : BB) {
+ if (MI.getOpcode() == TargetOpcode::G_PHI)
+ Phis.emplace_back(&MI);
+ }
+ }
+
+ for (auto *MI : Phis) {
+ // We need to do some work here if the operand types are < 16 bit and they
+ // are split across fpr/gpr banks. Since all types <32b on gpr
+ // end up being assigned gpr32 regclasses, we can end up with PHIs here
+ // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
+ // be selecting heterogenous regbanks for operands if possible, but we
+ // still need to be able to deal with it here.
+ //
+ // To fix this, if we have a gpr-bank operand < 32b in size and at least
+ // one other operand is on the fpr bank, then we add cross-bank copies
+ // to homogenize the operand banks. For simplicity the bank that we choose
+ // to settle on is whatever bank the def operand has. For example:
+ //
+ // %endbb:
+ // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
+ // =>
+ // %bb2:
+ // ...
+ // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
+ // ...
+ // %endbb:
+ // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
+ bool HasGPROp = false, HasFPROp = false;
+ for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
+ const auto &MO = MI->getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ const LLT &Ty = MRI.getType(MO.getReg());
+ if (!Ty.isValid() || !Ty.isScalar())
+ break;
+ if (Ty.getSizeInBits() >= 32)
+ break;
+ const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
+ // If for some reason we don't have a regbank yet. Don't try anything.
+ if (!RB)
+ break;
+
+ if (RB->getID() == AArch64::GPRRegBankID)
+ HasGPROp = true;
+ else
+ HasFPROp = true;
+ }
+ // We have heterogenous regbanks, need to fixup.
+ if (HasGPROp && HasFPROp)
+ fixupPHIOpBanks(*MI, MRI, RBI);
+ }
+}
+
namespace llvm {
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 95719a35c6da..2eaec0b970fa 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -30,7 +30,8 @@ using namespace LegalizeActions;
using namespace LegalizeMutations;
using namespace LegalityPredicates;
-AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
+AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
+ : ST(&ST) {
using namespace TargetOpcode;
const LLT p0 = LLT::pointer(0, 64);
const LLT s1 = LLT::scalar(1);
@@ -52,13 +53,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
const LLT v2s64 = LLT::vector(2, 64);
const LLT v2p0 = LLT::vector(2, p0);
+ const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
+
// FIXME: support subtargets which have neon/fp-armv8 disabled.
if (!ST.hasNEON() || !ST.hasFPARMv8()) {
computeTables();
return;
}
- getActionDefinitionsBuilder(G_IMPLICIT_DEF)
+ getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
.legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
.clampScalar(0, s1, s64)
.widenScalarToNextPow2(0, 8)
@@ -105,10 +108,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.minScalarSameAs(1, 0);
getActionDefinitionsBuilder(G_PTR_ADD)
- .legalFor({{p0, s64}})
+ .legalFor({{p0, s64}, {v2p0, v2s64}})
.clampScalar(1, s64, s64);
- getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
+ getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
getActionDefinitionsBuilder({G_SDIV, G_UDIV})
.legalFor({s32, s64})
@@ -375,7 +378,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
- getActionDefinitionsBuilder(G_SEXT_INREG).lower();
+ getActionDefinitionsBuilder(G_SEXT_INREG)
+ .legalFor({s32, s64})
+ .lower();
// FP conversions
getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
@@ -413,7 +418,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
// Pointer-handling
getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
- getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
+
+ if (TM.getCodeModel() == CodeModel::Small)
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
+ else
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
getActionDefinitionsBuilder(G_PTRTOINT)
.legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
@@ -617,10 +626,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
verify(*ST.getInstrInfo());
}
-bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder,
- GISelChangeObserver &Observer) const {
+bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ GISelChangeObserver &Observer = Helper.Observer;
switch (MI.getOpcode()) {
default:
// No idea what to do.
@@ -634,19 +644,53 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
+ case TargetOpcode::G_GLOBAL_VALUE:
+ return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
}
llvm_unreachable("expected switch to return");
}
+bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
+ assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
+ // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
+ // G_ADD_LOW instructions.
+ // By splitting this here, we can optimize accesses in the small code model by
+ // folding in the G_ADD_LOW into the load/store offset.
+ auto GV = MI.getOperand(1).getGlobal();
+ if (GV->isThreadLocal())
+ return true; // Don't want to modify TLS vars.
+
+ auto &TM = ST->getTargetLowering()->getTargetMachine();
+ unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
+
+ if (OpFlags & AArch64II::MO_GOT)
+ return true;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
+ .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
+ // Set the regclass on the dest reg too.
+ MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
+
+ MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
+ .addGlobalAddress(GV, 0,
+ OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64LegalizerInfo::legalizeIntrinsic(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
+ LegalizerHelper &Helper, MachineInstr &MI) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
switch (MI.getIntrinsicID()) {
case Intrinsic::memcpy:
case Intrinsic::memset:
case Intrinsic::memmove:
- if (createMemLibcall(MIRBuilder, MRI, MI) ==
+ if (createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI) ==
LegalizerHelper::UnableToLegalize)
return false;
MI.eraseFromParent();
@@ -675,7 +719,6 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
if (Amount > 31)
return true; // This will have to remain a register variant.
assert(MRI.getType(AmtReg).getSizeInBits() == 32);
- MIRBuilder.setInstr(MI);
auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
MI.getOperand(2).setReg(ExtCst.getReg(0));
return true;
@@ -704,17 +747,15 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
return false;
}
- MIRBuilder.setInstr(MI);
unsigned PtrSize = ValTy.getElementType().getSizeInBits();
const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
auto &MMO = **MI.memoperands_begin();
if (MI.getOpcode() == TargetOpcode::G_STORE) {
- auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
- MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
+ auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
+ MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
} else {
- Register NewReg = MRI.createGenericVirtualRegister(NewTy);
- auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
- MIRBuilder.buildBitcast({ValReg}, {NewLoad});
+ auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
+ MIRBuilder.buildBitcast(ValReg, NewLoad);
}
MI.eraseFromParent();
return true;
@@ -723,9 +764,8 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
- MIRBuilder.setInstr(MI);
MachineFunction &MF = MIRBuilder.getMF();
- unsigned Align = MI.getOperand(2).getImm();
+ Align Alignment(MI.getOperand(2).getImm());
Register Dst = MI.getOperand(0).getReg();
Register ListPtr = MI.getOperand(1).getReg();
@@ -733,21 +773,19 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
- Register List = MRI.createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildLoad(
- List, ListPtr,
+ const Align PtrAlign = Align(PtrSize);
+ auto List = MIRBuilder.buildLoad(
+ PtrTy, ListPtr,
*MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
- PtrSize, /* Align = */ PtrSize));
+ PtrSize, PtrAlign));
- Register DstPtr;
- if (Align > PtrSize) {
+ MachineInstrBuilder DstPtr;
+ if (Alignment > PtrAlign) {
// Realign the list to the actual required alignment.
- auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
-
+ auto AlignMinus1 =
+ MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
-
- DstPtr = MRI.createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
+ DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
} else
DstPtr = List;
@@ -755,16 +793,16 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
MIRBuilder.buildLoad(
Dst, DstPtr,
*MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
- ValSize, std::max(Align, PtrSize)));
+ ValSize, std::max(Alignment, PtrAlign)));
- auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
+ auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
- MIRBuilder.buildStore(
- NewList, ListPtr,
- *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
- PtrSize, /* Align = */ PtrSize));
+ MIRBuilder.buildStore(NewList, ListPtr,
+ *MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore,
+ PtrSize, PtrAlign));
MI.eraseFromParent();
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 15161bab466c..1cb24559c1ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -27,12 +27,10 @@ class AArch64LegalizerInfo : public LegalizerInfo {
public:
AArch64LegalizerInfo(const AArch64Subtarget &ST);
- bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder,
- GISelChangeObserver &Observer) const override;
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
- bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const override;
+ bool legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const override;
private:
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -43,6 +41,11 @@ private:
bool legalizeShlAshrLshr(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const;
+
+ bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const;
+ const AArch64Subtarget *ST;
};
} // End llvm namespace.
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
new file mode 100644
index 000000000000..baa8515baf3e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -0,0 +1,507 @@
+ //=== lib/CodeGen/GlobalISel/AArch64PostLegalizerCombiner.cpp -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This performs post-legalization combines on generic MachineInstrs.
+//
+// Any combine that this pass performs must preserve instruction legality.
+// Combines unconcerned with legality should be handled by the
+// PreLegalizerCombiner instead.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "aarch64-postlegalizer-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
+///
+/// Used for matching target-supported shuffles before codegen.
+struct ShuffleVectorPseudo {
+ unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
+ Register Dst; ///< Destination register.
+ SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
+ ShuffleVectorPseudo(unsigned Opc, Register Dst,
+ std::initializer_list<SrcOp> SrcOps)
+ : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
+ ShuffleVectorPseudo() {}
+};
+
+/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat.
+/// If \p MI is not a splat, returns None.
+static Optional<int> getSplatIndex(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Only G_SHUFFLE_VECTOR can have a splat index!");
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; });
+
+ // If all elements are undefined, this shuffle can be considered a splat.
+ // Return 0 for better potential for callers to simplify.
+ if (FirstDefinedIdx == Mask.end())
+ return 0;
+
+ // Make sure all remaining elements are either undef or the same
+ // as the first non-undef value.
+ int SplatValue = *FirstDefinedIdx;
+ if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()),
+ [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; }))
+ return None;
+
+ return SplatValue;
+}
+
+/// Check if a vector shuffle corresponds to a REV instruction with the
+/// specified blocksize.
+static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
+ unsigned BlockSize) {
+ assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
+ "Only possible block sizes for REV are: 16, 32, 64");
+ assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
+
+ unsigned BlockElts = M[0] + 1;
+
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSize;
+
+ if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
+ return false;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ // Ignore undef indices.
+ if (M[i] < 0)
+ continue;
+ if (static_cast<unsigned>(M[i]) !=
+ (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
+ return false;
+ }
+
+ return true;
+}
+
+/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
+/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
+static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
+ unsigned &WhichResult) {
+ if (NumElts % 2 != 0)
+ return false;
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
+ (M[i + 1] >= 0 &&
+ static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
+ return false;
+ }
+ return true;
+}
+
+/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
+/// sources of the shuffle are different.
+static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
+ unsigned NumElts) {
+ // Look for the first non-undef element.
+ auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
+ if (FirstRealElt == M.end())
+ return None;
+
+ // Use APInt to handle overflow when calculating expected element.
+ unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
+ APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
+
+ // The following shuffle indices must be the successive elements after the
+ // first real element.
+ if (any_of(
+ make_range(std::next(FirstRealElt), M.end()),
+ [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
+ return None;
+
+ // The index of an EXT is the first element if it is not UNDEF.
+ // Watch out for the beginning UNDEFs. The EXT index should be the expected
+ // value of the first element. E.g.
+ // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
+ // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
+ // ExpectedElt is the last mask index plus 1.
+ uint64_t Imm = ExpectedElt.getZExtValue();
+ bool ReverseExt = false;
+
+ // There are two difference cases requiring to reverse input vectors.
+ // For example, for vector <4 x i32> we have the following cases,
+ // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
+ // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
+ // For both cases, we finally use mask <5, 6, 7, 0>, which requires
+ // to reverse two input vectors.
+ if (Imm < NumElts)
+ ReverseExt = true;
+ else
+ Imm -= NumElts;
+ return std::make_pair(ReverseExt, Imm);
+}
+
+/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
+/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
+static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
+ unsigned &WhichResult) {
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // Skip undef indices.
+ if (M[i] < 0)
+ continue;
+ if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
+ return false;
+ }
+ return true;
+}
+
+/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
+/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
+static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
+ unsigned &WhichResult) {
+ if (NumElts % 2 != 0)
+ return false;
+
+ // 0 means use ZIP1, 1 means use ZIP2.
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
+ (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
+ return true;
+}
+
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
+/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
+static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(Dst);
+ unsigned EltSize = Ty.getScalarSizeInBits();
+
+ // Element size for a rev cannot be 64.
+ if (EltSize == 64)
+ return false;
+
+ unsigned NumElts = Ty.getNumElements();
+
+ // Try to produce G_REV64
+ if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
+ MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
+ return true;
+ }
+
+ // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
+ // This should be identical to above, but with a constant 32 and constant
+ // 16.
+ return false;
+}
+
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
+/// a G_TRN1 or G_TRN2 instruction.
+static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ unsigned WhichResult;
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ unsigned NumElts = MRI.getType(Dst).getNumElements();
+ if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
+ return false;
+ unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
+ return true;
+}
+
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
+/// a G_UZP1 or G_UZP2 instruction.
+///
+/// \param [in] MI - The shuffle vector instruction.
+/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
+static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ unsigned WhichResult;
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ unsigned NumElts = MRI.getType(Dst).getNumElements();
+ if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
+ return false;
+ unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
+ return true;
+}
+
+static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ unsigned WhichResult;
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ unsigned NumElts = MRI.getType(Dst).getNumElements();
+ if (!isZipMask(ShuffleMask, NumElts, WhichResult))
+ return false;
+ unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
+ return true;
+}
+
+/// Helper function for matchDup.
+static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ if (Lane != 0)
+ return false;
+
+ // Try to match a vector splat operation into a dup instruction.
+ // We're looking for this pattern:
+ //
+ // %scalar:gpr(s64) = COPY $x0
+ // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
+ // %cst0:gpr(s32) = G_CONSTANT i32 0
+ // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
+ // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
+ // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>)
+ //
+ // ...into:
+ // %splat = G_DUP %scalar
+
+ // Begin matching the insert.
+ auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
+ MI.getOperand(1).getReg(), MRI);
+ if (!InsMI)
+ return false;
+ // Match the undef vector operand.
+ if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
+ MRI))
+ return false;
+
+ // Match the index constant 0.
+ int64_t Index = 0;
+ if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
+ return false;
+
+ MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
+ {InsMI->getOperand(2).getReg()});
+ return true;
+}
+
+/// Helper function for matchDup.
+static bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(Lane >= 0 && "Expected positive lane?");
+ // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
+ // lane's definition directly.
+ auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
+ MI.getOperand(1).getReg(), MRI);
+ if (!BuildVecMI)
+ return false;
+ Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
+ MatchInfo =
+ ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
+ return true;
+}
+
+static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ auto MaybeLane = getSplatIndex(MI);
+ if (!MaybeLane)
+ return false;
+ int Lane = *MaybeLane;
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane < 0)
+ Lane = 0;
+ if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
+ return true;
+ if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
+ return true;
+ return false;
+}
+
+static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ Register Dst = MI.getOperand(0).getReg();
+ auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(),
+ MRI.getType(Dst).getNumElements());
+ if (!ExtInfo)
+ return false;
+ bool ReverseExt;
+ uint64_t Imm;
+ std::tie(ReverseExt, Imm) = *ExtInfo;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ if (ReverseExt)
+ std::swap(V1, V2);
+ uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
+ Imm *= ExtFactor;
+ MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
+ return true;
+}
+
+/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
+/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
+static bool applyShuffleVectorPseudo(MachineInstr &MI,
+ ShuffleVectorPseudo &MatchInfo) {
+ MachineIRBuilder MIRBuilder(MI);
+ MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
+ MI.eraseFromParent();
+ return true;
+}
+
+/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
+/// Special-cased because the constant operand must be emitted as a G_CONSTANT
+/// for the imported tablegen patterns to work.
+static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
+ MachineIRBuilder MIRBuilder(MI);
+ // Tablegen patterns expect an i32 G_CONSTANT as the final op.
+ auto Cst =
+ MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
+ MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
+ {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
+ MI.eraseFromParent();
+ return true;
+}
+
+#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#include "AArch64GenPostLegalizeGICombiner.inc"
+#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+
+namespace {
+#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#include "AArch64GenPostLegalizeGICombiner.inc"
+#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+
+class AArch64PostLegalizerCombinerInfo : public CombinerInfo {
+ GISelKnownBits *KB;
+ MachineDominatorTree *MDT;
+
+public:
+ AArch64GenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
+
+ AArch64PostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
+ GISelKnownBits *KB,
+ MachineDominatorTree *MDT)
+ : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
+ KB(KB), MDT(MDT) {
+ if (!GeneratedRuleCfg.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+ }
+
+ virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool AArch64PostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ const auto *LI =
+ MI.getParent()->getParent()->getSubtarget().getLegalizerInfo();
+ CombinerHelper Helper(Observer, B, KB, MDT, LI);
+ AArch64GenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg);
+ return Generated.tryCombineAll(Observer, MI, B, Helper);
+}
+
+#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+#include "AArch64GenPostLegalizeGICombiner.inc"
+#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+
+class AArch64PostLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AArch64PostLegalizerCombiner(bool IsOptNone = false);
+
+ StringRef getPassName() const override {
+ return "AArch64PostLegalizerCombiner";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ bool IsOptNone;
+};
+} // end anonymous namespace
+
+void AArch64PostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ if (!IsOptNone) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AArch64PostLegalizerCombiner::AArch64PostLegalizerCombiner(bool IsOptNone)
+ : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
+ initializeAArch64PostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ assert(MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Legalized) &&
+ "Expected a legalized function?");
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ const Function &F = MF.getFunction();
+ bool EnableOpt =
+ MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MachineDominatorTree *MDT =
+ IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
+ AArch64PostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
+ F.hasMinSize(), KB, MDT);
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+}
+
+char AArch64PostLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(AArch64PostLegalizerCombiner, DEBUG_TYPE,
+ "Combine AArch64 MachineInstrs after legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE,
+ "Combine AArch64 MachineInstrs after legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createAArch64PostLegalizeCombiner(bool IsOptNone) {
+ return new AArch64PostLegalizerCombiner(IsOptNone);
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 230fd514d022..9a1f200d5222 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -27,28 +27,62 @@
using namespace llvm;
using namespace MIPatternMatch;
+/// Return true if a G_FCONSTANT instruction is known to be better-represented
+/// as a G_CONSTANT.
+static bool matchFConstantToConstant(MachineInstr &MI,
+ MachineRegisterInfo &MRI) {
+ assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
+ Register DstReg = MI.getOperand(0).getReg();
+ const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+ if (DstSize != 32 && DstSize != 64)
+ return false;
+
+ // When we're storing a value, it doesn't matter what register bank it's on.
+ // Since not all floating point constants can be materialized using a fmov,
+ // it makes more sense to just use a GPR.
+ return all_of(MRI.use_nodbg_instructions(DstReg),
+ [](const MachineInstr &Use) { return Use.mayStore(); });
+}
+
+/// Change a G_FCONSTANT into a G_CONSTANT.
+static void applyFConstantToConstant(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
+ MachineIRBuilder MIB(MI);
+ const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
+ MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
+ MI.eraseFromParent();
+}
+
+class AArch64PreLegalizerCombinerHelperState {
+protected:
+ CombinerHelper &Helper;
+
+public:
+ AArch64PreLegalizerCombinerHelperState(CombinerHelper &Helper)
+ : Helper(Helper) {}
+};
+
#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
-#include "AArch64GenGICombiner.inc"
+#include "AArch64GenPreLegalizeGICombiner.inc"
#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
namespace {
#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
-#include "AArch64GenGICombiner.inc"
+#include "AArch64GenPreLegalizeGICombiner.inc"
#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
+ AArch64GenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
public:
- AArch64GenPreLegalizerCombinerHelper Generated;
-
AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
GISelKnownBits *KB, MachineDominatorTree *MDT)
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
KB(KB), MDT(MDT) {
- if (!Generated.parseCommandLineOption())
+ if (!GeneratedRuleCfg.parseCommandLineOption())
report_fatal_error("Invalid rule identifier");
}
@@ -60,6 +94,7 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
CombinerHelper Helper(Observer, B, KB, MDT);
+ AArch64GenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper);
switch (MI.getOpcode()) {
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
@@ -79,7 +114,7 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
}
}
- if (Generated.tryCombineAll(Observer, MI, B, Helper))
+ if (Generated.tryCombineAll(Observer, MI, B))
return true;
switch (MI.getOpcode()) {
@@ -93,7 +128,7 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
}
#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-#include "AArch64GenGICombiner.inc"
+#include "AArch64GenPreLegalizeGICombiner.inc"
#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
// Pass boilerplate
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 40efac261fd9..7e3ff1948dad 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -38,58 +38,58 @@ using namespace llvm;
AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
: AArch64GenRegisterBankInfo() {
- static bool AlreadyInit = false;
- // We have only one set of register banks, whatever the subtarget
- // is. Therefore, the initialization of the RegBanks table should be
- // done only once. Indeed the table of all register banks
- // (AArch64::RegBanks) is unique in the compiler. At some point, it
- // will get tablegen'ed and the whole constructor becomes empty.
- if (AlreadyInit)
- return;
- AlreadyInit = true;
-
- const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
- (void)RBGPR;
- assert(&AArch64::GPRRegBank == &RBGPR &&
- "The order in RegBanks is messed up");
-
- const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
- (void)RBFPR;
- assert(&AArch64::FPRRegBank == &RBFPR &&
- "The order in RegBanks is messed up");
-
- const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
- (void)RBCCR;
- assert(&AArch64::CCRegBank == &RBCCR && "The order in RegBanks is messed up");
-
- // The GPR register bank is fully defined by all the registers in
- // GR64all + its subclasses.
- assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
- "Subclass not added?");
- assert(RBGPR.getSize() == 64 && "GPRs should hold up to 64-bit");
-
- // The FPR register bank is fully defined by all the registers in
- // GR64all + its subclasses.
- assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
- "Subclass not added?");
- assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
- "Subclass not added?");
- assert(RBFPR.getSize() == 512 &&
- "FPRs should hold up to 512-bit via QQQQ sequence");
-
- assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
- "Class not added?");
- assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
-
- // Check that the TableGen'ed like file is in sync we our expectations.
- // First, the Idx.
- assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
- {PMI_GPR32, PMI_GPR64}) &&
- "PartialMappingIdx's are incorrectly ordered");
- assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
- {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
- PMI_FPR256, PMI_FPR512}) &&
- "PartialMappingIdx's are incorrectly ordered");
+ static llvm::once_flag InitializeRegisterBankFlag;
+
+ static auto InitializeRegisterBankOnce = [&]() {
+ // We have only one set of register banks, whatever the subtarget
+ // is. Therefore, the initialization of the RegBanks table should be
+ // done only once. Indeed the table of all register banks
+ // (AArch64::RegBanks) is unique in the compiler. At some point, it
+ // will get tablegen'ed and the whole constructor becomes empty.
+
+ const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
+ (void)RBGPR;
+ assert(&AArch64::GPRRegBank == &RBGPR &&
+ "The order in RegBanks is messed up");
+
+ const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
+ (void)RBFPR;
+ assert(&AArch64::FPRRegBank == &RBFPR &&
+ "The order in RegBanks is messed up");
+
+ const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
+ (void)RBCCR;
+ assert(&AArch64::CCRegBank == &RBCCR &&
+ "The order in RegBanks is messed up");
+
+ // The GPR register bank is fully defined by all the registers in
+ // GR64all + its subclasses.
+ assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.getSize() == 64 && "GPRs should hold up to 64-bit");
+
+ // The FPR register bank is fully defined by all the registers in
+ // GR64all + its subclasses.
+ assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
+ "Subclass not added?");
+ assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
+ "Subclass not added?");
+ assert(RBFPR.getSize() == 512 &&
+ "FPRs should hold up to 512-bit via QQQQ sequence");
+
+ assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
+ "Class not added?");
+ assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
+
+ // Check that the TableGen'ed like file is in sync we our expectations.
+ // First, the Idx.
+ assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
+ {PMI_GPR32, PMI_GPR64}) &&
+ "PartialMappingIdx's are incorrectly ordered");
+ assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
+ {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
+ PMI_FPR256, PMI_FPR512}) &&
+ "PartialMappingIdx's are incorrectly ordered");
// Now, the content.
// Check partial mapping.
#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
@@ -99,14 +99,14 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
#Idx " is incorrectly initialized"); \
} while (false)
- CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
- CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
- CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
- CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
- CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
- CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
- CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
- CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
+ CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
+ CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
+ CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
+ CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
+ CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
+ CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
+ CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
+ CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
// Check value mapping.
#define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
@@ -119,14 +119,14 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
#define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
- CHECK_VALUEMAP(GPR, 32);
- CHECK_VALUEMAP(GPR, 64);
- CHECK_VALUEMAP(FPR, 16);
- CHECK_VALUEMAP(FPR, 32);
- CHECK_VALUEMAP(FPR, 64);
- CHECK_VALUEMAP(FPR, 128);
- CHECK_VALUEMAP(FPR, 256);
- CHECK_VALUEMAP(FPR, 512);
+ CHECK_VALUEMAP(GPR, 32);
+ CHECK_VALUEMAP(GPR, 64);
+ CHECK_VALUEMAP(FPR, 16);
+ CHECK_VALUEMAP(FPR, 32);
+ CHECK_VALUEMAP(FPR, 64);
+ CHECK_VALUEMAP(FPR, 128);
+ CHECK_VALUEMAP(FPR, 256);
+ CHECK_VALUEMAP(FPR, 512);
// Check the value mapping for 3-operands instructions where all the operands
// map to the same value mapping.
@@ -137,13 +137,13 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
} while (false)
- CHECK_VALUEMAP_3OPS(GPR, 32);
- CHECK_VALUEMAP_3OPS(GPR, 64);
- CHECK_VALUEMAP_3OPS(FPR, 32);
- CHECK_VALUEMAP_3OPS(FPR, 64);
- CHECK_VALUEMAP_3OPS(FPR, 128);
- CHECK_VALUEMAP_3OPS(FPR, 256);
- CHECK_VALUEMAP_3OPS(FPR, 512);
+ CHECK_VALUEMAP_3OPS(GPR, 32);
+ CHECK_VALUEMAP_3OPS(GPR, 64);
+ CHECK_VALUEMAP_3OPS(FPR, 32);
+ CHECK_VALUEMAP_3OPS(FPR, 64);
+ CHECK_VALUEMAP_3OPS(FPR, 128);
+ CHECK_VALUEMAP_3OPS(FPR, 256);
+ CHECK_VALUEMAP_3OPS(FPR, 512);
#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
do { \
@@ -165,14 +165,14 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
\
} while (false)
- CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
- CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
- CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
- CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
- CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
- CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
- CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
- CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
+ CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
+ CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
+ CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
+ CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
+ CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
+ CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
+ CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
+ CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
#define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
do { \
@@ -193,12 +193,15 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
\
} while (false)
- CHECK_VALUEMAP_FPEXT(32, 16);
- CHECK_VALUEMAP_FPEXT(64, 16);
- CHECK_VALUEMAP_FPEXT(64, 32);
- CHECK_VALUEMAP_FPEXT(128, 64);
+ CHECK_VALUEMAP_FPEXT(32, 16);
+ CHECK_VALUEMAP_FPEXT(64, 16);
+ CHECK_VALUEMAP_FPEXT(64, 32);
+ CHECK_VALUEMAP_FPEXT(128, 64);
- assert(verify(TRI) && "Invalid register bank information");
+ assert(verify(TRI) && "Invalid register bank information");
+ };
+
+ llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
}
unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
@@ -228,8 +231,11 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
switch (RC.getID()) {
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
+ case AArch64::FPR16_loRegClassID:
+ case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
case AArch64::FPR32RegClassID:
case AArch64::FPR64RegClassID:
+ case AArch64::FPR64_loRegClassID:
case AArch64::FPR128RegClassID:
case AArch64::FPR128_loRegClassID:
case AArch64::DDRegClassID:
@@ -495,6 +501,7 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(
const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
switch (MI.getOpcode()) {
+ case AArch64::G_DUP:
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP:
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
@@ -636,6 +643,16 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Some of the floating-point instructions have mixed GPR and FPR operands:
// fine-tune the computed mapping.
switch (Opc) {
+ case AArch64::G_DUP: {
+ Register ScalarReg = MI.getOperand(1).getReg();
+ auto ScalarDef = MRI.getVRegDef(ScalarReg);
+ if (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
+ onlyDefinesFP(*ScalarDef, MRI, TRI))
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+ else
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
+ break;
+ }
case TargetOpcode::G_TRUNC: {
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
@@ -680,7 +697,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// In that case, we want the default mapping to be on FPR
// instead of blind map every scalar to GPR.
for (const MachineInstr &UseMI :
- MRI.use_instructions(MI.getOperand(0).getReg())) {
+ MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
// If we have at least one direct use in a FP instruction,
// assume this was a floating point load in the IR.
// If it was not, we would have had a bitcast before
@@ -727,9 +744,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
//
// %z = G_SELECT %cond %x %y
// fpr = G_FOO %z ...
- if (any_of(
- MRI.use_instructions(MI.getOperand(0).getReg()),
- [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
+ if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
++NumFP;
// Check if the defs of the source values always produce floating point
@@ -770,7 +786,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// UNMERGE into scalars from a vector should always use FPR.
// Likewise if any of the uses are FP instructions.
if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
- any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
+ any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
[&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
// Set the register bank of every operand to FPR.
for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index e956fca1aa10..e956fca1aa10 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 05a909f1780a..9814f7625853 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -763,10 +763,10 @@ static inline bool isSVECpyImm(int64_t Imm) {
bool IsImm8 = int8_t(Imm) == Imm;
bool IsImm16 = int16_t(Imm & ~0xff) == Imm;
- if (std::is_same<int8_t, typename std::make_signed<T>::type>::value)
+ if (std::is_same<int8_t, std::make_signed_t<T>>::value)
return IsImm8 || uint8_t(Imm) == Imm;
- if (std::is_same<int16_t, typename std::make_signed<T>::type>::value)
+ if (std::is_same<int16_t, std::make_signed_t<T>>::value)
return IsImm8 || IsImm16 || uint16_t(Imm & ~0xff) == Imm;
return IsImm8 || IsImm16;
@@ -775,8 +775,7 @@ static inline bool isSVECpyImm(int64_t Imm) {
/// Returns true if Imm is valid for ADD/SUB.
template <typename T>
static inline bool isSVEAddSubImm(int64_t Imm) {
- bool IsInt8t =
- std::is_same<int8_t, typename std::make_signed<T>::type>::value;
+ bool IsInt8t = std::is_same<int8_t, std::make_signed_t<T>>::value;
return uint8_t(Imm) == Imm || (!IsInt8t && uint16_t(Imm & ~0xff) == Imm);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 9db746733aa3..9f7dfdf62482 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -33,6 +34,7 @@ namespace {
class AArch64AsmBackend : public MCAsmBackend {
static const unsigned PCRelFlagVal =
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel;
+protected:
Triple TheTriple;
public:
@@ -68,6 +70,11 @@ public:
{"fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal},
{"fixup_aarch64_tlsdesc_call", 0, 0, 0}};
+ // Fixup kinds from .reloc directive are like R_AARCH64_NONE. They do not
+ // require any extra processing.
+ if (Kind >= FirstLiteralRelocationKind)
+ return MCAsmBackend::getFixupKindInfo(FK_NONE);
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
@@ -86,8 +93,8 @@ public:
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override;
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
void HandleAssemblerFlag(MCAssemblerFlag Flag) {}
@@ -108,7 +115,6 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
- case FK_NONE:
case AArch64::fixup_aarch64_tlsdesc_call:
return 0;
@@ -237,11 +243,22 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
if (AArch64MCExpr::getSymbolLoc(RefKind) != AArch64MCExpr::VK_ABS &&
AArch64MCExpr::getSymbolLoc(RefKind) != AArch64MCExpr::VK_SABS) {
- // VK_GOTTPREL, VK_TPREL, VK_DTPREL are movw fixups, but they can't
- // ever be resolved in the assembler.
- Ctx.reportError(Fixup.getLoc(),
- "relocation for a thread-local variable points to an "
- "absolute symbol");
+ if (!RefKind) {
+ // The fixup is an expression
+ if (SignedValue > 0xFFFF || SignedValue < -0xFFFF)
+ Ctx.reportError(Fixup.getLoc(),
+ "fixup value out of range [-0xFFFF, 0xFFFF]");
+
+ // Invert the negative immediate because it will feed into a MOVN.
+ if (SignedValue < 0)
+ SignedValue = ~SignedValue;
+ Value = static_cast<uint64_t>(SignedValue);
+ } else
+ // VK_GOTTPREL, VK_TPREL, VK_DTPREL are movw fixups, but they can't
+ // ever be resolved in the assembler.
+ Ctx.reportError(Fixup.getLoc(),
+ "relocation for a thread-local variable points to an "
+ "absolute symbol");
return Value;
}
@@ -329,7 +346,6 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
if (!valueFitsIntoFixupKind(Fixup.getTargetKind(), Value))
Ctx.reportError(Fixup.getLoc(), "fixup value too large for data type!");
LLVM_FALLTHROUGH;
- case FK_NONE:
case FK_SecRel_2:
case FK_SecRel_4:
return Value;
@@ -337,9 +353,17 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
}
Optional<MCFixupKind> AArch64AsmBackend::getFixupKind(StringRef Name) const {
- if (TheTriple.isOSBinFormatELF() && Name == "R_AARCH64_NONE")
- return FK_NONE;
- return MCAsmBackend::getFixupKind(Name);
+ if (!TheTriple.isOSBinFormatELF())
+ return None;
+
+ unsigned Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/AArch64.def"
+#undef ELF_RELOC
+ .Default(-1u);
+ if (Type == -1u)
+ return None;
+ return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
}
/// getFixupKindContainereSizeInBytes - The number of bytes of the
@@ -386,9 +410,12 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
MutableArrayRef<char> Data, uint64_t Value,
bool IsResolved,
const MCSubtargetInfo *STI) const {
- unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
if (!Value)
return; // Doesn't change encoding.
+ unsigned Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return;
+ unsigned NumBytes = getFixupKindNumBytes(Kind);
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
MCContext &Ctx = Asm.getContext();
int64_t SignedValue = static_cast<int64_t>(Value);
@@ -424,8 +451,9 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
// FIXME: getFixupKindInfo() and getFixupKindNumBytes() could be fixed to
// handle this more cleanly. This may affect the output of -show-mc-encoding.
AArch64MCExpr::VariantKind RefKind =
- static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
- if (AArch64MCExpr::getSymbolLoc(RefKind) == AArch64MCExpr::VK_SABS) {
+ static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+ if (AArch64MCExpr::getSymbolLoc(RefKind) == AArch64MCExpr::VK_SABS ||
+ (!RefKind && Fixup.getTargetKind() == AArch64::fixup_aarch64_movw)) {
// If the immediate is negative, generate MOVN else MOVZ.
// (Bit 30 = 0) ==> MOVN, (Bit 30 = 1) ==> MOVZ.
if (SignedValue < 0)
@@ -451,9 +479,8 @@ bool AArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
return int64_t(Value) != int64_t(int8_t(Value));
}
-void AArch64AsmBackend::relaxInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- MCInst &Res) const {
+void AArch64AsmBackend::relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
llvm_unreachable("AArch64AsmBackend::relaxInstruction() unimplemented");
}
@@ -474,7 +501,7 @@ bool AArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
const MCValue &Target) {
unsigned Kind = Fixup.getKind();
- if (Kind == FK_NONE)
+ if (Kind >= FirstLiteralRelocationKind)
return true;
// The ADRP instruction adds some multiple of 0x1000 to the current PC &
@@ -544,7 +571,6 @@ enum CompactUnwindEncodings {
// FIXME: This should be in a separate file.
class DarwinAArch64AsmBackend : public AArch64AsmBackend {
const MCRegisterInfo &MRI;
- bool IsILP32;
/// Encode compact unwind stack adjustment for frameless functions.
/// See UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h.
@@ -555,18 +581,15 @@ class DarwinAArch64AsmBackend : public AArch64AsmBackend {
public:
DarwinAArch64AsmBackend(const Target &T, const Triple &TT,
- const MCRegisterInfo &MRI, bool IsILP32)
- : AArch64AsmBackend(T, TT, /*IsLittleEndian*/ true), MRI(MRI),
- IsILP32(IsILP32) {}
+ const MCRegisterInfo &MRI)
+ : AArch64AsmBackend(T, TT, /*IsLittleEndian*/ true), MRI(MRI) {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
- if (IsILP32)
- return createAArch64MachObjectWriter(
- MachO::CPU_TYPE_ARM64_32, MachO::CPU_SUBTYPE_ARM64_32_V8, true);
- else
- return createAArch64MachObjectWriter(MachO::CPU_TYPE_ARM64,
- MachO::CPU_SUBTYPE_ARM64_ALL, false);
+ uint32_t CPUType = cantFail(MachO::getCPUType(TheTriple));
+ uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TheTriple));
+ return createAArch64MachObjectWriter(CPUType, CPUSubType,
+ TheTriple.isArch32Bit());
}
/// Generate the compact unwind encoding from the CFI directives.
@@ -749,8 +772,7 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
const MCTargetOptions &Options) {
const Triple &TheTriple = STI.getTargetTriple();
if (TheTriple.isOSBinFormatMachO()) {
- const bool IsILP32 = TheTriple.isArch32Bit();
- return new DarwinAArch64AsmBackend(T, TheTriple, MRI, IsILP32);
+ return new DarwinAArch64AsmBackend(T, TheTriple, MRI);
}
if (TheTriple.isOSBinFormatCOFF())
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 0fd1ca187be7..e5637dcab941 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -106,13 +106,17 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
+ unsigned Kind = Fixup.getTargetKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return Kind - FirstLiteralRelocationKind;
AArch64MCExpr::VariantKind RefKind =
static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
AArch64MCExpr::VariantKind SymLoc = AArch64MCExpr::getSymbolLoc(RefKind);
bool IsNC = AArch64MCExpr::isNotChecked(RefKind);
assert((!Target.getSymA() ||
- Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None) &&
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None ||
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT) &&
"Should only be expression-level modifiers here");
assert((!Target.getSymB() ||
@@ -120,14 +124,17 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
"Should only be expression-level modifiers here");
if (IsPCRel) {
- switch (Fixup.getTargetKind()) {
+ switch (Kind) {
case FK_Data_1:
Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
return ELF::R_AARCH64_NONE;
case FK_Data_2:
return R_CLS(PREL16);
- case FK_Data_4:
- return R_CLS(PREL32);
+ case FK_Data_4: {
+ return Target.getAccessVariant() == MCSymbolRefExpr::VK_PLT
+ ? R_CLS(PLT32)
+ : R_CLS(PREL32);
+ }
case FK_Data_8:
if (IsILP32) {
Ctx.reportError(Fixup.getLoc(),
@@ -185,8 +192,6 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx))
return ELF::R_AARCH64_NONE;
switch (Fixup.getTargetKind()) {
- case FK_NONE:
- return ELF::R_AARCH64_NONE;
case FK_Data_1:
Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
return ELF::R_AARCH64_NONE;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index c33f7e957b54..fe4c34be1519 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -81,14 +81,14 @@ public:
std::move(Emitter)),
MappingSymbolCounter(0), LastEMS(EMS_None) {}
- void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
+ void changeSection(MCSection *Section, const MCExpr *Subsection) override {
// We have to keep track of the mapping symbol state of any sections we
// use. Each one should start off as EMS_None, which is provided as the
// default constructor by DenseMap::lookup.
LastMappingSymbols[getPreviousSection().first] = LastEMS;
LastEMS = LastMappingSymbols.lookup(Section);
- MCELFStreamer::ChangeSection(Section, Subsection);
+ MCELFStreamer::changeSection(Section, Subsection);
}
// Reset state between object emissions
@@ -102,10 +102,10 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst &Inst,
+ void emitInstruction(const MCInst &Inst,
const MCSubtargetInfo &STI) override {
EmitA64MappingSymbol();
- MCELFStreamer::EmitInstruction(Inst, STI);
+ MCELFStreamer::emitInstruction(Inst, STI);
}
/// Emit a 32-bit value as an instruction. This is only used for the .inst
@@ -122,28 +122,28 @@ public:
}
EmitA64MappingSymbol();
- MCELFStreamer::EmitBytes(StringRef(Buffer, 4));
+ MCELFStreamer::emitBytes(StringRef(Buffer, 4));
}
/// This is one of the functions used to emit data into an ELF section, so the
/// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
/// if necessary.
- void EmitBytes(StringRef Data) override {
- EmitDataMappingSymbol();
- MCELFStreamer::EmitBytes(Data);
+ void emitBytes(StringRef Data) override {
+ emitDataMappingSymbol();
+ MCELFStreamer::emitBytes(Data);
}
/// This is one of the functions used to emit data into an ELF section, so the
/// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
/// if necessary.
- void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
- EmitDataMappingSymbol();
- MCELFStreamer::EmitValueImpl(Value, Size, Loc);
+ void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
+ emitDataMappingSymbol();
+ MCELFStreamer::emitValueImpl(Value, Size, Loc);
}
void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
SMLoc Loc) override {
- EmitDataMappingSymbol();
+ emitDataMappingSymbol();
MCObjectStreamer::emitFill(NumBytes, FillValue, Loc);
}
private:
@@ -153,7 +153,7 @@ private:
EMS_Data
};
- void EmitDataMappingSymbol() {
+ void emitDataMappingSymbol() {
if (LastEMS == EMS_Data)
return;
EmitMappingSymbol("$d");
@@ -170,7 +170,7 @@ private:
void EmitMappingSymbol(StringRef Name) {
auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
Name + "." + Twine(MappingSymbolCounter++)));
- EmitLabel(Symbol);
+ emitLabel(Symbol);
Symbol->setType(ELF::STT_NOTYPE);
Symbol->setBinding(ELF::STB_LOCAL);
Symbol->setExternal(false);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 469892213ef8..38474d31460d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -283,7 +283,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
if (Opcode == AArch64::SPACE) {
- O << '\t' << MAI.getCommentString() << " SPACE";
+ O << '\t' << MAI.getCommentString() << " SPACE "
+ << MI->getOperand(1).getImm();
printAnnotation(O, Annot);
return;
}
@@ -295,7 +296,7 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
return;
}
- if (!printAliasInstr(MI, STI, O))
+ if (!printAliasInstr(MI, Address, STI, O))
printInstruction(MI, Address, STI, O);
printAnnotation(O, Annot);
@@ -900,6 +901,19 @@ void AArch64InstPrinter::printImmHex(const MCInst *MI, unsigned OpNo,
O << format("#%#llx", Op.getImm());
}
+template<int Size>
+void AArch64InstPrinter::printSImm(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Size == 8)
+ O << "#" << formatImm((signed char)Op.getImm());
+ else if (Size == 16)
+ O << "#" << formatImm((signed short)Op.getImm());
+ else
+ O << "#" << formatImm(Op.getImm());
+}
+
void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
unsigned Imm, raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
@@ -1334,7 +1348,8 @@ void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
O << "[" << MI->getOperand(OpNum).getImm() << "]";
}
-void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
+void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
+ unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
@@ -1342,17 +1357,20 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
// If the label has already been resolved to an immediate offset (say, when
// we're running the disassembler), just print the immediate.
if (Op.isImm()) {
- O << "#" << formatImm(Op.getImm() * 4);
+ int64_t Offset = Op.getImm() * 4;
+ if (PrintBranchImmAsAddress)
+ O << formatHex(Address + Offset);
+ else
+ O << "#" << formatImm(Offset);
return;
}
// If the branch target is simply an address then print it in hex.
const MCConstantExpr *BranchTarget =
dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr());
- int64_t Address;
- if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
- O << "0x";
- O.write_hex(Address);
+ int64_t TargetAddress;
+ if (BranchTarget && BranchTarget->evaluateAsAbsolute(TargetAddress)) {
+ O << formatHex(TargetAddress);
} else {
// Otherwise, just print the expression.
MI->getOperand(OpNum).getExpr()->print(O, &MAI);
@@ -1411,6 +1429,12 @@ void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
return;
}
+ // Horrible hack for two different registers having the same encoding.
+ if (Val == AArch64SysReg::TRCEXTINSELR) {
+ O << "TRCEXTINSELR";
+ return;
+ }
+
const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
if (Reg && Reg->Readable && Reg->haveFeatures(STI.getFeatureBits()))
O << Reg->Name;
@@ -1431,6 +1455,12 @@ void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
return;
}
+ // Horrible hack for two different registers having the same encoding.
+ if (Val == AArch64SysReg::TRCEXTINSELR) {
+ O << "TRCEXTINSELR";
+ return;
+ }
+
const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
if (Reg && Reg->Writeable && Reg->haveFeatures(STI.getFeatureBits()))
O << Reg->Name;
@@ -1499,7 +1529,7 @@ void AArch64InstPrinter::printSVERegOp(const MCInst *MI, unsigned OpNum,
template <typename T>
void AArch64InstPrinter::printImmSVE(T Value, raw_ostream &O) {
- typename std::make_unsigned<T>::type HexValue = Value;
+ std::make_unsigned_t<T> HexValue = Value;
if (getPrintImmHex())
O << '#' << formatHex((uint64_t)HexValue);
@@ -1544,8 +1574,8 @@ template <typename T>
void AArch64InstPrinter::printSVELogicalImm(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- typedef typename std::make_signed<T>::type SignedT;
- typedef typename std::make_unsigned<T>::type UnsignedT;
+ typedef std::make_signed_t<T> SignedT;
+ typedef std::make_unsigned_t<T> UnsignedT;
uint64_t Val = MI->getOperand(OpNum).getImm();
UnsignedT PrintVal = AArch64_AM::decodeLogicalImmediate(Val, 64);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 993f379b5343..6da5f0e81c80 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -32,10 +32,10 @@ public:
// Autogenerated by tblgen.
virtual void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
- virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
- virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx,
+ virtual bool printAliasInstr(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ virtual void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
const MCSubtargetInfo &STI,
raw_ostream &O);
@@ -56,6 +56,9 @@ protected:
raw_ostream &O);
void printImmHex(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ template <int Size>
+ void printSImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
template <typename T> void printImmSVE(T Value, raw_ostream &O);
void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
raw_ostream &O);
@@ -97,7 +100,7 @@ protected:
const MCSubtargetInfo &STI, raw_ostream &O);
void printInverseCondCode(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printAlignedLabel(const MCInst *MI, unsigned OpNum,
+ void printAlignedLabel(const MCInst *MI, uint64_t Address, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale,
raw_ostream &O);
@@ -202,10 +205,10 @@ public:
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O) override;
- bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O) override;
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx,
+ bool printAliasInstr(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
const MCSubtargetInfo &STI,
raw_ostream &O) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 5926a4f81616..9a63e26dec19 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -60,7 +60,7 @@ const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
const MCExpr *Res =
MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, Context);
MCSymbol *PCSym = Context.createTempSymbol();
- Streamer.EmitLabel(PCSym);
+ Streamer.emitLabel(PCSym);
const MCExpr *PC = MCSymbolRefExpr::create(PCSym, Context);
return MCBinaryExpr::createSub(Res, PC, Context);
}
@@ -96,8 +96,6 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
- UseIntegratedAssembler = true;
-
HasIdentDirective = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 8f4d9cb94d60..da8f511c650f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -569,23 +569,24 @@ unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
if (UImm16MO.isImm())
return EncodedValue;
- const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
- switch (A64E->getKind()) {
- case AArch64MCExpr::VK_DTPREL_G2:
- case AArch64MCExpr::VK_DTPREL_G1:
- case AArch64MCExpr::VK_DTPREL_G0:
- case AArch64MCExpr::VK_GOTTPREL_G1:
- case AArch64MCExpr::VK_TPREL_G2:
- case AArch64MCExpr::VK_TPREL_G1:
- case AArch64MCExpr::VK_TPREL_G0:
- return EncodedValue & ~(1u << 30);
- default:
- // Nothing to do for an unsigned fixup.
- return EncodedValue;
+ const MCExpr *E = UImm16MO.getExpr();
+ if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(E)) {
+ switch (A64E->getKind()) {
+ case AArch64MCExpr::VK_DTPREL_G2:
+ case AArch64MCExpr::VK_DTPREL_G1:
+ case AArch64MCExpr::VK_DTPREL_G0:
+ case AArch64MCExpr::VK_GOTTPREL_G1:
+ case AArch64MCExpr::VK_TPREL_G2:
+ case AArch64MCExpr::VK_TPREL_G1:
+ case AArch64MCExpr::VK_TPREL_G0:
+ return EncodedValue & ~(1u << 30);
+ default:
+ // Nothing to do for an unsigned fixup.
+ return EncodedValue;
+ }
}
-
- return EncodedValue & ~(1u << 30);
+ return EncodedValue;
}
void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 7dc3665baabc..209bff3a2311 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -254,7 +254,7 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
// Initial state of the frame pointer is SP.
unsigned Reg = MRI.getDwarfRegNum(AArch64::SP, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, Reg, 0);
MAI->addInitialFrameState(Inst);
return MAI;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index fc04d37eb362..b0f414bd27ed 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -139,7 +139,7 @@ static bool canUseLocalRelocation(const MCSectionMachO &Section,
return false;
if (RefSec.getSegmentName() == "__DATA" &&
- RefSec.getSectionName() == "__objc_classrefs")
+ RefSec.getName() == "__objc_classrefs")
return false;
// FIXME: ld64 currently handles internal pointer-sized relocations
@@ -407,5 +407,5 @@ std::unique_ptr<MCObjectTargetWriter>
llvm::createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype,
bool IsILP32) {
return std::make_unique<AArch64MachObjectWriter>(CPUType, CPUSubtype,
- IsILP32);
+ IsILP32);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index f70752f5303f..48ed68f49263 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -51,7 +51,7 @@ void AArch64TargetStreamer::emitInst(uint32_t Inst) {
Inst >>= 8;
}
- getStreamer().EmitBytes(StringRef(Buffer, 4));
+ getStreamer().emitBytes(StringRef(Buffer, 4));
}
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index 37c6fbb03908..03fbab5142a2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -28,7 +28,7 @@ public:
void EmitWinEHHandlerData(SMLoc Loc) override;
void EmitWindowsUnwindTables() override;
- void FinishImpl() override;
+ void finishImpl() override;
};
void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
@@ -45,11 +45,11 @@ void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() {
EHStreamer.Emit(*this);
}
-void AArch64WinCOFFStreamer::FinishImpl() {
- EmitFrames(nullptr);
+void AArch64WinCOFFStreamer::finishImpl() {
+ emitFrames(nullptr);
EmitWindowsUnwindTables();
- MCWinCOFFStreamer::FinishImpl();
+ MCWinCOFFStreamer::finishImpl();
}
} // end anonymous namespace
@@ -68,7 +68,7 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinUnwindCode(unsigned UnwindCode,
WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
if (!CurFrame)
return;
- MCSymbol *Label = S.EmitCFILabel();
+ MCSymbol *Label = S.emitCFILabel();
auto Inst = WinEH::Instruction(UnwindCode, Label, Reg, Offset);
if (InEpilogCFI)
CurFrame->EpilogMap[CurrentEpilog].push_back(Inst);
@@ -158,7 +158,7 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIPrologEnd() {
if (!CurFrame)
return;
- MCSymbol *Label = S.EmitCFILabel();
+ MCSymbol *Label = S.emitCFILabel();
CurFrame->PrologEnd = Label;
WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0);
auto it = CurFrame->Instructions.begin();
@@ -172,7 +172,7 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogStart() {
return;
InEpilogCFI = true;
- CurrentEpilog = S.EmitCFILabel();
+ CurrentEpilog = S.emitCFILabel();
}
void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogEnd() {
@@ -182,7 +182,7 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogEnd() {
return;
InEpilogCFI = false;
- MCSymbol *Label = S.EmitCFILabel();
+ MCSymbol *Label = S.emitCFILabel();
WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0);
CurFrame->EpilogMap[CurrentEpilog].push_back(Inst);
CurrentEpilog = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a172b8d7e6b0..a005d1e65abe 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10,6 +10,14 @@
//
//===----------------------------------------------------------------------===//
+def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
+ SDTCVecEltisVT<0, i1>, SDTCVecEltisVT<1, i1>, SDTCisSameAs<2, 3>,
+ SDTCisVT<4, OtherVT>
+]>;
+
+def AArch64setcc_z : SDNode<"AArch64ISD::SETCC_MERGE_ZERO", SDT_AArch64Setcc>;
+
def SVEPatternOperand : AsmOperandClass {
let Name = "SVEPattern";
let ParserMethod = "tryParseSVEPattern";
@@ -33,7 +41,7 @@ def SVEPrefetchOperand : AsmOperandClass {
let RenderMethod = "addPrefetchOperands";
}
-def sve_prfop : Operand<i32>, ImmLeaf<i32, [{
+def sve_prfop : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) <= 15);
}]> {
let PrintMethod = "printPrefetchOp<true>";
@@ -167,8 +175,8 @@ def SVEAddSubImmOperand32 : SVEShiftedImmOperand<32, "AddSub", "isSVEAddSubImm<i
def SVEAddSubImmOperand64 : SVEShiftedImmOperand<64, "AddSub", "isSVEAddSubImm<int64_t>">;
class imm8_opt_lsl<int ElementWidth, string printType,
- AsmOperandClass OpndClass, code Predicate>
- : Operand<i32>, ImmLeaf<i32, Predicate> {
+ AsmOperandClass OpndClass>
+ : Operand<i32> {
let EncoderMethod = "getImm8OptLsl";
let DecoderMethod = "DecodeImm8OptLsl<" # ElementWidth # ">";
let PrintMethod = "printImm8OptLsl<" # printType # ">";
@@ -176,31 +184,15 @@ class imm8_opt_lsl<int ElementWidth, string printType,
let MIOperandInfo = (ops i32imm, i32imm);
}
-def cpy_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "int8_t", SVECpyImmOperand8, [{
- return AArch64_AM::isSVECpyImm<int8_t>(Imm);
-}]>;
-def cpy_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "int16_t", SVECpyImmOperand16, [{
- return AArch64_AM::isSVECpyImm<int16_t>(Imm);
-}]>;
-def cpy_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "int32_t", SVECpyImmOperand32, [{
- return AArch64_AM::isSVECpyImm<int32_t>(Imm);
-}]>;
-def cpy_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "int64_t", SVECpyImmOperand64, [{
- return AArch64_AM::isSVECpyImm<int64_t>(Imm);
-}]>;
-
-def addsub_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "uint8_t", SVEAddSubImmOperand8, [{
- return AArch64_AM::isSVEAddSubImm<int8_t>(Imm);
-}]>;
-def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16, [{
- return AArch64_AM::isSVEAddSubImm<int16_t>(Imm);
-}]>;
-def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32, [{
- return AArch64_AM::isSVEAddSubImm<int32_t>(Imm);
-}]>;
-def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64, [{
- return AArch64_AM::isSVEAddSubImm<int64_t>(Imm);
-}]>;
+def cpy_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "int8_t", SVECpyImmOperand8>;
+def cpy_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "int16_t", SVECpyImmOperand16>;
+def cpy_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "int32_t", SVECpyImmOperand32>;
+def cpy_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "int64_t", SVECpyImmOperand64>;
+
+def addsub_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "uint8_t", SVEAddSubImmOperand8>;
+def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16>;
+def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32>;
+def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64>;
def SVEAddSubImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8>", []>;
def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", []>;
@@ -212,9 +204,13 @@ def SVELogicalImm16Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i16>",
def SVELogicalImm32Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i32>", []>;
def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
+def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;
+
def SVEArithUImmPat : ComplexPattern<i32, 1, "SelectSVEArithImm", []>;
def SVEArithSImmPat : ComplexPattern<i32, 1, "SelectSVESignedArithImm", []>;
+def SVEShiftImm64 : ComplexPattern<i32, 1, "SelectSVEShiftImm64<0, 64>", []>;
+
class SVEExactFPImm<string Suffix, string ValA, string ValB> : AsmOperandClass {
let Name = "SVEExactFPImmOperand" # Suffix;
let DiagnosticType = "Invalid" # Name;
@@ -324,6 +320,16 @@ class SVE_1_Op_Imm_Arith_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
: Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))),
(inst $Op1, i32:$imm)>;
+class SVE_1_Op_Imm_Shift_Pred_Pat<ValueType vt, ValueType pt, SDPatternOperator op,
+ ZPRRegOp zprty, Operand ImmTy, Instruction inst>
+ : Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (ImmTy:$imm))))),
+ (inst $Op1, ImmTy:$imm)>;
+
+class SVE_1_Op_Imm_Arith_Pred_Pat<ValueType vt, ValueType pt, SDPatternOperator op,
+ ZPRRegOp zprty, ValueType it, ComplexPattern cpx, Instruction inst>
+ : Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))),
+ (inst $Op1, i32:$imm)>;
+
class SVE_1_Op_Imm_Log_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
ValueType it, ComplexPattern cpx, Instruction inst>
: Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i64:$imm)))))),
@@ -367,8 +373,22 @@ class SVE_4_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))),
(inst $Op1, $Op2, $Op3, ImmTy:$Op4)>;
+def SVEDup0 : ComplexPattern<i64, 0, "SelectDupZero", []>;
def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>;
+let AddedComplexity = 1 in {
+class SVE_3_Op_Pat_SelZero<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, Instruction inst>
+: Pat<(vtd (vtd (op vt1:$Op1, (vselect vt1:$Op1, vt2:$Op2, (SVEDup0)), vt3:$Op3))),
+ (inst $Op1, $Op2, $Op3)>;
+
+class SVE_3_Op_Pat_Shift_Imm_SelZero<ValueType vtd, SDPatternOperator op,
+ ValueType vt1, ValueType vt2,
+ Operand vt3, Instruction inst>
+: Pat<(vtd (op vt1:$Op1, (vselect vt1:$Op1, vt2:$Op2, (SVEDup0)), (i32 (vt3:$Op3)))),
+ (inst $Op1, $Op2, vt3:$Op3)>;
+}
+
//
// Common but less generic patterns.
//
@@ -378,6 +398,69 @@ class SVE_1_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1)),
(inst (IMPLICIT_DEF), (ptrue 31), $Op1)>;
+class SVE_2_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, Instruction inst, Instruction ptrue>
+: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
+ (inst (ptrue 31), $Op1, $Op2)>;
+
+//
+// Pseudo -> Instruction mappings
+//
+def getSVEPseudoMap : InstrMapping {
+ let FilterClass = "SVEPseudo2Instr";
+ let RowFields = ["PseudoName"];
+ let ColFields = ["IsInstr"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+class SVEPseudo2Instr<string name, bit instr> {
+ string PseudoName = name;
+ bit IsInstr = instr;
+}
+
+// Lookup e.g. DIV -> DIVR
+def getSVERevInstr : InstrMapping {
+ let FilterClass = "SVEInstr2Rev";
+ let RowFields = ["InstrName"];
+ let ColFields = ["isReverseInstr"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+// Lookup e.g. DIVR -> DIV
+def getSVENonRevInstr : InstrMapping {
+ let FilterClass = "SVEInstr2Rev";
+ let RowFields = ["InstrName"];
+ let ColFields = ["isReverseInstr"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+class SVEInstr2Rev<string name1, string name2, bit name1IsReverseInstr> {
+ string InstrName = !if(name1IsReverseInstr, name1, name2);
+ bit isReverseInstr = name1IsReverseInstr;
+}
+
+//
+// Pseudos for destructive operands
+//
+let hasNoSchedulingInfo = 1 in {
+ class PredTwoOpPseudo<string name, ZPRRegOp zprty,
+ FalseLanesEnum flags = FalseLanesNone>
+ : SVEPseudo2Instr<name, 0>,
+ Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, zprty:$Zs2), []> {
+ let FalseLanes = flags;
+ }
+
+ class PredTwoOpImmPseudo<string name, ZPRRegOp zprty, Operand immty,
+ FalseLanesEnum flags = FalseLanesNone>
+ : SVEPseudo2Instr<name, 0>,
+ Pseudo<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zs1, immty:$imm), []> {
+ let FalseLanes = flags;
+ }
+}
+
//===----------------------------------------------------------------------===//
// SVE Predicate Misc Group
//===----------------------------------------------------------------------===//
@@ -566,7 +649,7 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
@@ -680,7 +763,7 @@ class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
@@ -941,11 +1024,46 @@ multiclass sve_int_perm_tbl<string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_int_perm_tbl<string asm> {
+multiclass sve2_int_perm_tbl<string asm, SDPatternOperator op> {
def _B : sve_int_perm_tbl<0b00, 0b01, asm, ZPR8, ZZ_b>;
def _H : sve_int_perm_tbl<0b01, 0b01, asm, ZPR16, ZZ_h>;
def _S : sve_int_perm_tbl<0b10, 0b01, asm, ZPR32, ZZ_s>;
def _D : sve_int_perm_tbl<0b11, 0b01, asm, ZPR64, ZZ_d>;
+
+ def : Pat<(nxv16i8 (op nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)),
+ (nxv16i8 (!cast<Instruction>(NAME # _B) (REG_SEQUENCE ZPR2, nxv16i8:$Op1, zsub0,
+ nxv16i8:$Op2, zsub1),
+ nxv16i8:$Op3))>;
+
+ def : Pat<(nxv8i16 (op nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)),
+ (nxv8i16 (!cast<Instruction>(NAME # _H) (REG_SEQUENCE ZPR2, nxv8i16:$Op1, zsub0,
+ nxv8i16:$Op2, zsub1),
+ nxv8i16:$Op3))>;
+
+ def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)),
+ (nxv4i32 (!cast<Instruction>(NAME # _S) (REG_SEQUENCE ZPR2, nxv4i32:$Op1, zsub0,
+ nxv4i32:$Op2, zsub1),
+ nxv4i32:$Op3))>;
+
+ def : Pat<(nxv2i64 (op nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)),
+ (nxv2i64 (!cast<Instruction>(NAME # _D) (REG_SEQUENCE ZPR2, nxv2i64:$Op1, zsub0,
+ nxv2i64:$Op2, zsub1),
+ nxv2i64:$Op3))>;
+
+ def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8i16:$Op3)),
+ (nxv8f16 (!cast<Instruction>(NAME # _H) (REG_SEQUENCE ZPR2, nxv8f16:$Op1, zsub0,
+ nxv8f16:$Op2, zsub1),
+ nxv8i16:$Op3))>;
+
+ def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4i32:$Op3)),
+ (nxv4f32 (!cast<Instruction>(NAME # _S) (REG_SEQUENCE ZPR2, nxv4f32:$Op1, zsub0,
+ nxv4f32:$Op2, zsub1),
+ nxv4i32:$Op3))>;
+
+ def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2i64:$Op3)),
+ (nxv2f64 (!cast<Instruction>(NAME # _D) (REG_SEQUENCE ZPR2, nxv2f64:$Op1, zsub0,
+ nxv2f64:$Op2, zsub1),
+ nxv2i64:$Op3))>;
}
class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -967,11 +1085,20 @@ class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_perm_tbx<string asm> {
+multiclass sve2_int_perm_tbx<string asm, SDPatternOperator op> {
def _B : sve2_int_perm_tbx<0b00, asm, ZPR8>;
def _H : sve2_int_perm_tbx<0b01, asm, ZPR16>;
def _S : sve2_int_perm_tbx<0b10, asm, ZPR32>;
def _D : sve2_int_perm_tbx<0b11, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -1072,7 +1199,7 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
}
multiclass sve_int_perm_insrs<string asm, SDPatternOperator op> {
@@ -1102,7 +1229,7 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
}
multiclass sve_int_perm_insrv<string asm, SDPatternOperator op> {
@@ -1135,7 +1262,7 @@ class sve_int_perm_extract_i<string asm>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
@@ -1244,13 +1371,22 @@ class sve_int_pred_log<bits<4> opc, string asm>
}
-multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
+ SDPatternOperator op_nopred = null_frag> {
def NAME : sve_int_pred_log<opc, asm>;
def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, nxv16i1, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, nxv8i1, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, nxv4i1, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, nxv2i1, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_AllActive_Pat<nxv16i1, op_nopred, nxv16i1, nxv16i1,
+ !cast<Instruction>(NAME), PTRUE_B>;
+ def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8i1, nxv8i1,
+ !cast<Instruction>(NAME), PTRUE_H>;
+ def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4i1, nxv4i1,
+ !cast<Instruction>(NAME), PTRUE_S>;
+ def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2i1, nxv2i1,
+ !cast<Instruction>(NAME), PTRUE_D>;
}
@@ -1272,7 +1408,7 @@ class sve_int_log_imm<bits<2> opc, string asm>
let Constraints = "$Zdn = $_Zdn";
let DecoderMethod = "DecodeSVELogicalImmInstruction";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
@@ -1357,7 +1493,8 @@ class sve_int_bin_cons_arit_0<bits<2> sz8_64, bits<3> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm,
+ SDPatternOperator op, SDPatternOperator int_op> {
def _B : sve_int_bin_cons_arit_0<0b00, opc, asm, ZPR8>;
def _H : sve_int_bin_cons_arit_0<0b01, opc, asm, ZPR16>;
def _S : sve_int_bin_cons_arit_0<0b10, opc, asm, ZPR32>;
@@ -1367,6 +1504,12 @@ multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm, SDPatternOperator op
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ // Intrinsic version
+ def : SVE_2_Op_Pat<nxv16i8, int_op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, int_op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, int_op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, int_op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -1394,7 +1537,7 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -1423,15 +1566,21 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
-multiclass sve_fp_2op_p_zds<bits<4> opc, string asm,
- SDPatternOperator op> {
- def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>;
- def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>;
- def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>;
+multiclass sve_fp_2op_p_zds<bits<4> opc, string asm, string Ps,
+ SDPatternOperator op, DestructiveInstTypeEnum flags,
+ string revname="", bit isReverseInstr=0> {
+ let DestructiveInstType = flags in {
+ def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps # _H, 1>, SVEInstr2Rev<NAME # _H, revname # _H, isReverseInstr>;
+ def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>, SVEInstr2Rev<NAME # _S, revname # _S, isReverseInstr>;
+ def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>, SVEInstr2Rev<NAME # _D, revname # _D, isReverseInstr>;
+ }
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
@@ -1449,6 +1598,16 @@ multiclass sve_fp_2op_p_zds_fscale<bits<4> opc, string asm,
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
+multiclass sve_fp_2op_p_zds_zeroing_hsd<SDPatternOperator op> {
+ def _ZERO_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
+ def _ZERO_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
+ def _ZERO_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesZero>;
+
+ def : SVE_3_Op_Pat_SelZero<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _ZERO_H)>;
+ def : SVE_3_Op_Pat_SelZero<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _ZERO_S)>;
+ def : SVE_3_Op_Pat_SelZero<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _ZERO_D)>;
+}
+
class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm32_0_7:$imm3),
asm, "\t$Zdn, $_Zdn, $Zm, $imm3",
@@ -1466,7 +1625,7 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
@@ -1551,7 +1710,7 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -1586,7 +1745,7 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -1620,7 +1779,7 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bit opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
@@ -1646,12 +1805,12 @@ multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm,
let Inst{19-16} = Zm;
}
- def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b:$idx))),
- (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b:$idx)>;
- def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b:$idx))),
- (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>;
- def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b:$idx))),
- (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>;
+ def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexH32b_timm:$idx))),
+ (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexH32b_timm:$idx)>;
+ def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexS32b_timm:$idx))),
+ (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx)>;
+ def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3, (i32 VectorIndexD32b_timm:$idx))),
+ (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx)>;
}
@@ -1694,12 +1853,12 @@ multiclass sve_fp_fmul_by_indexed_elem<string asm, SDPatternOperator op> {
let Inst{19-16} = Zm;
}
- def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b:$idx))),
- (!cast<Instruction>(NAME # _H) $Op1, $Op2, VectorIndexH32b:$idx)>;
- def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b:$idx))),
- (!cast<Instruction>(NAME # _S) $Op1, $Op2, VectorIndexS32b:$idx)>;
- def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b:$idx))),
- (!cast<Instruction>(NAME # _D) $Op1, $Op2, VectorIndexD32b:$idx)>;
+ def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, (i32 VectorIndexH32b_timm:$idx))),
+ (!cast<Instruction>(NAME # _H) $Op1, $Op2, VectorIndexH32b_timm:$idx)>;
+ def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, (i32 VectorIndexS32b_timm:$idx))),
+ (!cast<Instruction>(NAME # _S) $Op1, $Op2, VectorIndexS32b_timm:$idx)>;
+ def : Pat<(nxv2f64 (op nxv2f64:$Op1, nxv2f64:$Op2, (i32 VectorIndexD32b_timm:$idx))),
+ (!cast<Instruction>(NAME # _D) $Op1, $Op2, VectorIndexD32b_timm:$idx)>;
}
//===----------------------------------------------------------------------===//
@@ -1727,7 +1886,7 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -1767,7 +1926,7 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
@@ -1785,10 +1944,10 @@ multiclass sve_fp_fcmla_by_indexed_elem<string asm, SDPatternOperator op> {
let Inst{19-16} = Zm;
}
- def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b:$idx), (i32 complexrotateop:$imm))),
- (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b:$idx, complexrotateop:$imm)>;
- def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b:$idx), (i32 complexrotateop:$imm))),
- (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b:$idx, complexrotateop:$imm)>;
+ def : Pat<(nxv8f16 (op nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3, (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # _H) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+ def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3, (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx, complexrotateop:$imm)>;
}
//===----------------------------------------------------------------------===//
@@ -1815,7 +1974,7 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -1861,22 +2020,22 @@ multiclass sve2_fp_convert_down_narrow<string asm, string op> {
def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
- def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv16i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
- def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
+ def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
multiclass sve2_fp_convert_up_long<string asm, string op> {
def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
- def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv16i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv16i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ def : SVE_3_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
multiclass sve2_fp_convert_down_odd_rounding_top<string asm, string op> {
def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
- def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
//===----------------------------------------------------------------------===//
@@ -1902,7 +2061,7 @@ class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -1942,14 +2101,14 @@ class sve2_fp_mla_long_by_indexed_elem<bits<2> opc, string asm>
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
multiclass sve2_fp_mla_long_by_indexed_elem<bits<2> opc, string asm,
SDPatternOperator op> {
def NAME : sve2_fp_mla_long_by_indexed_elem<opc, asm>;
- def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8f16, nxv8f16, i32, VectorIndexH32b, !cast<Instruction>(NAME)>;
+ def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8f16, nxv8f16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME)>;
}
//===----------------------------------------------------------------------===//
@@ -1974,7 +2133,7 @@ class sve2_fp_mla_long<bits<2> opc, string asm>
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
@@ -2084,7 +2243,7 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = size;
}
@@ -2120,7 +2279,7 @@ multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
multiclass sve2_fp_convert_down_odd_rounding<string asm, string op> {
def _DtoS : sve_fp_2op_p_zd<0b0001010, asm, ZPR64, ZPR32, ElementSizeD>;
- def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
//===----------------------------------------------------------------------===//
@@ -2176,7 +2335,7 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -2192,11 +2351,20 @@ multiclass sve_int_bin_pred_log<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_bin_pred_arit_log<0b00, 0b00, opc, asm, ZPR8>;
- def _H : sve_int_bin_pred_arit_log<0b01, 0b00, opc, asm, ZPR16>;
- def _S : sve_int_bin_pred_arit_log<0b10, 0b00, opc, asm, ZPR32>;
- def _D : sve_int_bin_pred_arit_log<0b11, 0b00, opc, asm, ZPR64>;
+multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm, string Ps,
+ SDPatternOperator op,
+ DestructiveInstTypeEnum flags,
+ string revname="", bit isReverseInstr=0> {
+ let DestructiveInstType = flags in {
+ def _B : sve_int_bin_pred_arit_log<0b00, 0b00, opc, asm, ZPR8>,
+ SVEPseudo2Instr<Ps # _B, 1>, SVEInstr2Rev<NAME # _B, revname # _B, isReverseInstr>;
+ def _H : sve_int_bin_pred_arit_log<0b01, 0b00, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps # _H, 1>, SVEInstr2Rev<NAME # _H, revname # _H, isReverseInstr>;
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b00, opc, asm, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>, SVEInstr2Rev<NAME # _S, revname # _S, isReverseInstr>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b00, opc, asm, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>, SVEInstr2Rev<NAME # _D, revname # _D, isReverseInstr>;
+ }
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -2229,9 +2397,16 @@ multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm, SDPatternOperator op
}
// Special case for divides which are not defined for 8b/16b elements.
-multiclass sve_int_bin_pred_arit_2_div<bits<3> opc, string asm, SDPatternOperator op> {
- def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>;
- def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>;
+multiclass sve_int_bin_pred_arit_2_div<bits<3> opc, string asm, string Ps,
+ SDPatternOperator op,
+ DestructiveInstTypeEnum flags,
+ string revname="", bit isReverseInstr=0> {
+ let DestructiveInstType = flags in {
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>, SVEInstr2Rev<NAME # _S, revname # _S, isReverseInstr>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>, SVEInstr2Rev<NAME # _D, revname # _D, isReverseInstr>;
+ }
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
@@ -2262,7 +2437,7 @@ class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -2299,7 +2474,7 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -2336,21 +2511,30 @@ class sve2_int_mla<bits<2> sz, bits<5> opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_mla<bit S, string asm> {
+multiclass sve2_int_mla<bit S, string asm, SDPatternOperator op> {
def _B : sve2_int_mla<0b00, { 0b1110, S }, asm, ZPR8, ZPR8>;
def _H : sve2_int_mla<0b01, { 0b1110, S }, asm, ZPR16, ZPR16>;
def _S : sve2_int_mla<0b10, { 0b1110, S }, asm, ZPR32, ZPR32>;
def _D : sve2_int_mla<0b11, { 0b1110, S }, asm, ZPR64, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_int_mla_long<bits<5> opc, string asm> {
+multiclass sve2_int_mla_long<bits<5> opc, string asm, SDPatternOperator op> {
def _H : sve2_int_mla<0b01, opc, asm, ZPR16, ZPR8>;
def _S : sve2_int_mla<0b10, opc, asm, ZPR32, ZPR16>;
def _D : sve2_int_mla<0b11, opc, asm, ZPR64, ZPR32>;
+
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -2372,39 +2556,44 @@ class sve2_int_mla_by_indexed_elem<bits<2> sz, bits<6> opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_mla_by_indexed_elem<bits<2> opc, bit S, string asm> {
- def _H : sve2_int_mla_by_indexed_elem<{0, ?}, { 0b000, opc, S }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH> {
+multiclass sve2_int_mla_by_indexed_elem<bits<2> opc, bit S, string asm,
+ SDPatternOperator op> {
+ def _H : sve2_int_mla_by_indexed_elem<{0, ?}, { 0b000, opc, S }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH32b> {
bits<3> Zm;
bits<3> iop;
let Inst{22} = iop{2};
let Inst{20-19} = iop{1-0};
let Inst{18-16} = Zm;
}
- def _S : sve2_int_mla_by_indexed_elem<0b10, { 0b000, opc, S }, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS> {
+ def _S : sve2_int_mla_by_indexed_elem<0b10, { 0b000, opc, S }, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS32b> {
bits<3> Zm;
bits<2> iop;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
- def _D : sve2_int_mla_by_indexed_elem<0b11, { 0b000, opc, S }, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD> {
+ def _D : sve2_int_mla_by_indexed_elem<0b11, { 0b000, opc, S }, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD32b> {
bits<4> Zm;
bit iop;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
+
+ def : SVE_4_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _H)>;
+ def : SVE_4_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _S)>;
+ def : SVE_4_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
// SVE2 Integer Multiply-Add Long - Indexed Group
//===----------------------------------------------------------------------===//
-multiclass sve2_int_mla_long_by_indexed_elem<bits<4> opc, string asm> {
+multiclass sve2_int_mla_long_by_indexed_elem<bits<4> opc, string asm, SDPatternOperator op> {
def _S : sve2_int_mla_by_indexed_elem<0b10, { opc{3}, 0b0, opc{2-1}, ?, opc{0} },
- asm, ZPR32, ZPR16, ZPR3b16, VectorIndexH> {
+ asm, ZPR32, ZPR16, ZPR3b16, VectorIndexH32b> {
bits<3> Zm;
bits<3> iop;
let Inst{20-19} = iop{2-1};
@@ -2412,13 +2601,16 @@ multiclass sve2_int_mla_long_by_indexed_elem<bits<4> opc, string asm> {
let Inst{11} = iop{0};
}
def _D : sve2_int_mla_by_indexed_elem<0b11, { opc{3}, 0b0, opc{2-1}, ?, opc{0} },
- asm, ZPR64, ZPR32, ZPR4b32, VectorIndexS> {
+ asm, ZPR64, ZPR32, ZPR4b32, VectorIndexS32b> {
bits<4> Zm;
bits<2> iop;
let Inst{20} = iop{1};
let Inst{19-16} = Zm;
let Inst{11} = iop{0};
}
+
+ def : SVE_4_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv8i16, nxv8i16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _S)>;
+ def : SVE_4_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv4i32, nxv4i32, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -2442,7 +2634,7 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
}
multiclass sve_intx_dot<bit opc, string asm, SDPatternOperator op> {
@@ -2474,28 +2666,28 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
}
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
SDPatternOperator op> {
- def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
+ def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
bits<2> iop;
bits<3> Zm;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
- def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
+ def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
bits<1> iop;
bits<4> Zm;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
- def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv16i8:$Op2, nxv16i8:$Op3, (i32 VectorIndexS32b:$idx))),
- (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>;
- def : Pat<(nxv2i64 (op nxv2i64:$Op1, nxv8i16:$Op2, nxv8i16:$Op3, (i32 VectorIndexD32b:$idx))),
- (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>;
+ def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv16i8:$Op2, nxv16i8:$Op3, (i32 VectorIndexS32b_timm:$idx))),
+ (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx)>;
+ def : Pat<(nxv2i64 (op nxv2i64:$Op1, nxv8i16:$Op2, nxv8i16:$Op3, (i32 VectorIndexD32b_timm:$idx))),
+ (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx)>;
}
//===----------------------------------------------------------------------===//
@@ -2521,24 +2713,36 @@ class sve2_complex_int_arith<bits<2> sz, bits<4> opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
-multiclass sve2_cintx_dot<string asm> {
+multiclass sve2_cintx_dot<string asm, SDPatternOperator op> {
def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
+
+ def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, complexrotateop:$imm)>;
+ def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3),
+ (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # "_D") ZPR64:$Op1, ZPR16:$Op2, ZPR16:$Op3, complexrotateop:$imm)>;
}
//===----------------------------------------------------------------------===//
// SVE2 Complex Multiply-Add Group
//===----------------------------------------------------------------------===//
-multiclass sve2_int_cmla<bit opc, string asm> {
+multiclass sve2_int_cmla<bit opc, string asm, SDPatternOperator op> {
def _B : sve2_complex_int_arith<0b00, { 0b001, opc }, asm, ZPR8, ZPR8>;
def _H : sve2_complex_int_arith<0b01, { 0b001, opc }, asm, ZPR16, ZPR16>;
def _S : sve2_complex_int_arith<0b10, { 0b001, opc }, asm, ZPR32, ZPR32>;
def _D : sve2_complex_int_arith<0b11, { 0b001, opc }, asm, ZPR64, ZPR64>;
+
+ def : SVE_4_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, i32, complexrotateop, !cast<Instruction>(NAME # _B)>;
+ def : SVE_4_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, i32, complexrotateop, !cast<Instruction>(NAME # _H)>;
+ def : SVE_4_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, i32, complexrotateop, !cast<Instruction>(NAME # _S)>;
+ def : SVE_4_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, i32, complexrotateop, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -2563,42 +2767,58 @@ class sve2_complex_int_arith_indexed<bits<2> sz, bits<4> opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
-multiclass sve2_cintx_dot_by_indexed_elem<string asm> {
- def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+multiclass sve2_cintx_dot_by_indexed_elem<string asm, SDPatternOperator op> {
+ def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> {
bits<2> iop;
bits<3> Zm;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
- def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+ def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> {
bit iop;
bits<4> Zm;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
+
+ def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv16i8 ZPR8:$Op2), (nxv16i8 ZPR8:$Op3),
+ (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # "_S") ZPR32:$Op1, ZPR8:$Op2, ZPR8:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+ def : Pat<(nxv2i64 (op (nxv2i64 ZPR64:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3),
+ (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # "_D") ZPR64:$Op1, ZPR16:$Op2, ZPR16:$Op3, VectorIndexD32b_timm:$idx, complexrotateop:$imm)>;
}
//===----------------------------------------------------------------------===//
// SVE2 Complex Multiply-Add - Indexed Group
//===----------------------------------------------------------------------===//
-multiclass sve2_cmla_by_indexed_elem<bit opc, string asm> {
- def _H : sve2_complex_int_arith_indexed<0b10, { 0b011, opc }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexS> {
+multiclass sve2_cmla_by_indexed_elem<bit opc, string asm,
+ SDPatternOperator op> {
+ def _H : sve2_complex_int_arith_indexed<0b10, { 0b011, opc }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexS32b> {
bits<2> iop;
bits<3> Zm;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
- def _S : sve2_complex_int_arith_indexed<0b11, { 0b011, opc }, asm, ZPR32, ZPR32, ZPR4b32, VectorIndexD> {
+ def _S : sve2_complex_int_arith_indexed<0b11, { 0b011, opc }, asm, ZPR32, ZPR32, ZPR4b32, VectorIndexD32b> {
bit iop;
bits<4> Zm;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
+
+ def : Pat<(nxv8i16 (op (nxv8i16 ZPR16:$Op1), (nxv8i16 ZPR16:$Op2), (nxv8i16 ZPR16:$Op3),
+ (i32 VectorIndexS32b_timm:$idx), (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # "_H") ZPR16:$Op1, ZPR16:$Op2, ZPR16:$Op3, VectorIndexS32b_timm:$idx, complexrotateop:$imm)>;
+
+ def : Pat<(nxv4i32 (op (nxv4i32 ZPR32:$Op1), (nxv4i32 ZPR32:$Op2), (nxv4i32 ZPR32:$Op3),
+ (i32 VectorIndexD32b_timm:$idx), (i32 complexrotateop:$imm))),
+ (!cast<Instruction>(NAME # "_S") ZPR32:$Op1, ZPR32:$Op2, ZPR32:$Op3, VectorIndexD32b_timm:$idx, complexrotateop:$imm)>;
}
//===----------------------------------------------------------------------===//
@@ -2621,11 +2841,22 @@ class sve2_int_mul<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
-multiclass sve2_int_mul<bits<3> opc, string asm> {
+multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve2_int_mul<0b00, opc, asm, ZPR8>;
def _H : sve2_int_mul<0b01, opc, asm, ZPR16>;
def _S : sve2_int_mul<0b10, opc, asm, ZPR32>;
def _D : sve2_int_mul<0b11, opc, asm, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve2_int_mul_single<bits<3> opc, string asm, SDPatternOperator op> {
+ def _B : sve2_int_mul<0b00, opc, asm, ZPR8>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
}
//===----------------------------------------------------------------------===//
@@ -2648,31 +2879,37 @@ class sve2_int_mul_by_indexed_elem<bits<2> sz, bits<4> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve2_int_mul_by_indexed_elem<bits<4> opc, string asm> {
- def _H : sve2_int_mul_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH> {
+multiclass sve2_int_mul_by_indexed_elem<bits<4> opc, string asm,
+ SDPatternOperator op> {
+ def _H : sve2_int_mul_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH32b> {
bits<3> Zm;
bits<3> iop;
let Inst{22} = iop{2};
let Inst{20-19} = iop{1-0};
let Inst{18-16} = Zm;
}
- def _S : sve2_int_mul_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS> {
+ def _S : sve2_int_mul_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS32b> {
bits<3> Zm;
bits<2> iop;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
- def _D : sve2_int_mul_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD> {
+ def _D : sve2_int_mul_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD32b> {
bits<4> Zm;
bit iop;
let Inst{20} = iop;
let Inst{19-16} = Zm;
}
+
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_int_mul_long_by_indexed_elem<bits<3> opc, string asm> {
+multiclass sve2_int_mul_long_by_indexed_elem<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _S : sve2_int_mul_by_indexed_elem<0b10, { opc{2-1}, ?, opc{0} }, asm,
- ZPR32, ZPR16, ZPR3b16, VectorIndexH> {
+ ZPR32, ZPR16, ZPR3b16, VectorIndexH32b> {
bits<3> Zm;
bits<3> iop;
let Inst{20-19} = iop{2-1};
@@ -2680,13 +2917,16 @@ multiclass sve2_int_mul_long_by_indexed_elem<bits<3> opc, string asm> {
let Inst{11} = iop{0};
}
def _D : sve2_int_mul_by_indexed_elem<0b11, { opc{2-1}, ?, opc{0} }, asm,
- ZPR64, ZPR32, ZPR4b32, VectorIndexS> {
+ ZPR64, ZPR32, ZPR4b32, VectorIndexS32b> {
bits<4> Zm;
bits<2> iop;
let Inst{20} = iop{1};
let Inst{19-16} = Zm;
let Inst{11} = iop{0};
}
+
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv8i16, nxv8i16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv4i32, nxv4i32, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -2702,7 +2942,7 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
bits<5> Zdn;
let Inst{31-24} = 0b01000100;
let Inst{23-22} = sz;
- let Inst{21} = 0b0;
+ let Inst{21-20} = 0b01;
let Inst{20-16} = opc{5-1};
let Inst{15-14} = 0b10;
let Inst{13} = opc{0};
@@ -2711,15 +2951,20 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
-multiclass sve2_int_arith_pred<bits<6> opc, string asm> {
+multiclass sve2_int_arith_pred<bits<6> opc, string asm, SDPatternOperator op> {
def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>;
def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>;
def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>;
def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
@@ -2739,14 +2984,18 @@ class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty1.ElementSize;
}
-multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm> {
+multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm, SDPatternOperator op> {
def _H : sve2_int_sadd_long_accum_pairwise<0b01, U, asm, ZPR16, ZPR8>;
def _S : sve2_int_sadd_long_accum_pairwise<0b10, U, asm, ZPR32, ZPR16>;
def _D : sve2_int_sadd_long_accum_pairwise<0b11, U, asm, ZPR64, ZPR32>;
+
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv16i8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv8i16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
@@ -2770,19 +3019,26 @@ class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
-multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm> {
+multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
}
-multiclass sve2_int_un_pred_arit<bits<3> opc, string asm> {
+multiclass sve2_int_un_pred_arit<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>;
def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>;
def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -2806,21 +3062,47 @@ class sve2_wide_int_arith<bits<2> sz, bits<5> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve2_wide_int_arith_long<bits<5> opc, string asm> {
+multiclass sve2_wide_int_arith_long<bits<5> opc, string asm,
+ SDPatternOperator op> {
def _H : sve2_wide_int_arith<0b01, opc, asm, ZPR16, ZPR8, ZPR8>;
def _S : sve2_wide_int_arith<0b10, opc, asm, ZPR32, ZPR16, ZPR16>;
def _D : sve2_wide_int_arith<0b11, opc, asm, ZPR64, ZPR32, ZPR32>;
+
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_wide_int_arith_wide<bits<3> opc, string asm> {
+multiclass sve2_wide_int_arith_wide<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _H : sve2_wide_int_arith<0b01, { 0b10, opc }, asm, ZPR16, ZPR16, ZPR8>;
def _S : sve2_wide_int_arith<0b10, { 0b10, opc }, asm, ZPR32, ZPR32, ZPR16>;
def _D : sve2_wide_int_arith<0b11, { 0b10, opc }, asm, ZPR64, ZPR64, ZPR32>;
+
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv16i8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv8i16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv4i32, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve2_wide_int_arith_pmul<bits<2> sz, bits<5> opc, string asm,
+ SDPatternOperator op> {
+ def NAME : sve2_wide_int_arith<sz, opc, asm, ZPR128, ZPR64, ZPR64>;
+
+ // To avoid using 128 bit elements in the IR, the pattern below works with
+ // llvm intrinsics with the _pair suffix, to reflect that
+ // _Q is implemented as a pair of _D.
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
}
-multiclass sve2_pmul_long<bits<1> opc, string asm> {
+multiclass sve2_pmul_long<bits<1> opc, string asm, SDPatternOperator op> {
def _H : sve2_wide_int_arith<0b01, {0b1101, opc}, asm, ZPR16, ZPR8, ZPR8>;
def _D : sve2_wide_int_arith<0b11, {0b1101, opc}, asm, ZPR64, ZPR32, ZPR32>;
+
+ // To avoid using 128 bit elements in the IR, the patterns below work with
+ // llvm intrinsics with the _pair suffix, to reflect that
+ // _H is implemented as a pair of _B and _D is implemented as a pair of _S.
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -2844,17 +3126,27 @@ class sve2_misc<bits<2> sz, bits<4> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve2_misc_bitwise<bits<4> opc, string asm> {
+multiclass sve2_misc_bitwise<bits<4> opc, string asm, SDPatternOperator op> {
def _B : sve2_misc<0b00, opc, asm, ZPR8, ZPR8>;
def _H : sve2_misc<0b01, opc, asm, ZPR16, ZPR16>;
def _S : sve2_misc<0b10, opc, asm, ZPR32, ZPR32>;
def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_misc_int_addsub_long_interleaved<bits<2> opc, string asm> {
+multiclass sve2_misc_int_addsub_long_interleaved<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
+
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
class sve2_bitwise_xor_interleaved<bits<2> sz, bits<1> opc, string asm,
@@ -2874,15 +3166,21 @@ class sve2_bitwise_xor_interleaved<bits<2> sz, bits<1> opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
-multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
+multiclass sve2_bitwise_xor_interleaved<bit opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_bitwise_xor_interleaved<0b00, opc, asm, ZPR8, ZPR8>;
def _H : sve2_bitwise_xor_interleaved<0b01, opc, asm, ZPR16, ZPR16>;
def _S : sve2_bitwise_xor_interleaved<0b10, opc, asm, ZPR32, ZPR32>;
def _D : sve2_bitwise_xor_interleaved<0b11, opc, asm, ZPR64, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
@@ -2905,7 +3203,8 @@ class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm> {
+multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _H : sve2_bitwise_shift_left_long<{0,0,1}, opc, asm,
ZPR16, ZPR8, vecshiftL8>;
def _S : sve2_bitwise_shift_left_long<{0,1,?}, opc, asm,
@@ -2916,6 +3215,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm> {
ZPR64, ZPR32, vecshiftL32> {
let Inst{20-19} = imm{4-3};
}
+ def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -2943,7 +3245,8 @@ class sve2_int_bin_shift_imm<bits<4> tsz8_64, bit opc, string asm,
let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_bin_shift_imm_left<bit opc, string asm> {
+multiclass sve2_int_bin_shift_imm_left<bit opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
let Inst{19} = imm{3};
@@ -2955,9 +3258,15 @@ multiclass sve2_int_bin_shift_imm_left<bit opc, string asm> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_int_bin_shift_imm_right<bit opc, string asm> {
+multiclass sve2_int_bin_shift_imm_right<bit opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
@@ -2969,6 +3278,11 @@ multiclass sve2_int_bin_shift_imm_right<bit opc, string asm> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
@@ -2990,11 +3304,12 @@ class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm> {
+multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
@@ -3006,6 +3321,11 @@ multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty>
@@ -3024,15 +3344,20 @@ class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_cadd<bit opc, string asm> {
+multiclass sve2_int_cadd<bit opc, string asm, SDPatternOperator op> {
def _B : sve2_int_cadd<0b00, opc, asm, ZPR8>;
def _H : sve2_int_cadd<0b01, opc, asm, ZPR16>;
def _S : sve2_int_cadd<0b10, opc, asm, ZPR32>;
def _D : sve2_int_cadd<0b11, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, complexrotateopodd, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, complexrotateopodd, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, complexrotateopodd, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, complexrotateopodd, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm,
@@ -3052,28 +3377,41 @@ class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_absdiff_accum<bit opc, string asm> {
+multiclass sve2_int_absdiff_accum<bit opc, string asm, SDPatternOperator op> {
def _B : sve2_int_absdiff_accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>;
def _H : sve2_int_absdiff_accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>;
def _S : sve2_int_absdiff_accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>;
def _D : sve2_int_absdiff_accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm> {
+multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _H : sve2_int_absdiff_accum<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
def _S : sve2_int_absdiff_accum<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
def _D : sve2_int_absdiff_accum<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
+
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> {
+multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm, SDPatternOperator op> {
def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
ZPR32, ZPR32>;
def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
ZPR64, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -3300,7 +3638,7 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -3465,11 +3803,12 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
-multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_arith_imm0<bits<3> opc, string asm,
+ SDPatternOperator op, SDPatternOperator int_op> {
def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>;
def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
@@ -3479,6 +3818,12 @@ multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubImm16Pat, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubImm32Pat, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
+
+ // Intrinsic version
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, int_op, ZPR8, i32, SVEAddSubImm8Pat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, int_op, ZPR16, i32, SVEAddSubImm16Pat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, int_op, ZPR32, i32, SVEAddSubImm32Pat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, int_op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_arith_imm0_subr<bits<3> opc, string asm, SDPatternOperator op> {
@@ -3509,7 +3854,7 @@ class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
@@ -3519,10 +3864,10 @@ multiclass sve_int_arith_imm1<bits<2> opc, string asm, SDPatternOperator op> {
def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, simm8>;
def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, simm8>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv16i8, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv8i16, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv4i32, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv2i64, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_arith_imm1_unsigned<bits<2> opc, string asm, SDPatternOperator op> {
@@ -3531,10 +3876,10 @@ multiclass sve_int_arith_imm1_unsigned<bits<2> opc, string asm, SDPatternOperato
def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>;
def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv16i8, op, ZPR8, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv8i16, op, ZPR16, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv4i32, op, ZPR32, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv2i64, op, ZPR64, i64, SVEArithUImmPat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImmPat, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
@@ -3604,11 +3949,11 @@ class sve2_int_bitwise_ternary_op_d<bits<3> opc, string asm>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm> {
+multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm, SDPatternOperator op> {
def NAME : sve2_int_bitwise_ternary_op_d<opc, asm>;
def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
@@ -3617,6 +3962,11 @@ multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm> {
(!cast<Instruction>(NAME) ZPR16:$Zdn, ZPR16:$Zm, ZPR16:$Zk), 1>;
def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
(!cast<Instruction>(NAME) ZPR32:$Zdn, ZPR32:$Zm, ZPR32:$Zk), 1>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
}
class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,
@@ -3638,11 +3988,11 @@ class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_rotate_right_imm<string asm> {
+multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> {
def _B : sve2_int_rotate_right_imm<{0,0,0,1}, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_rotate_right_imm<{0,0,1,?}, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
@@ -3654,6 +4004,10 @@ multiclass sve2_int_rotate_right_imm<string asm> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -3678,7 +4032,7 @@ class sve_int_dup_fpimm_pred<bits<2> sz, Operand fpimmtype,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -3713,26 +4067,34 @@ class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,
let Inst{12-5} = imm{7-0}; // imm8
let Inst{4-0} = Zd;
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_dup_imm_pred_merge<string asm> {
- let Constraints = "$Zd = $_Zd" in {
- def _B : sve_int_dup_imm_pred<0b00, 1, asm, ZPR8, "/m", (ins ZPR8:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm)>;
- def _H : sve_int_dup_imm_pred<0b01, 1, asm, ZPR16, "/m", (ins ZPR16:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm)>;
- def _S : sve_int_dup_imm_pred<0b10, 1, asm, ZPR32, "/m", (ins ZPR32:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm)>;
- def _D : sve_int_dup_imm_pred<0b11, 1, asm, ZPR64, "/m", (ins ZPR64:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm)>;
- }
-
- def : InstAlias<"mov $Zd, $Pg/m, $imm",
- (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm), 1>;
- def : InstAlias<"mov $Zd, $Pg/m, $imm",
- (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm), 1>;
+multiclass sve_int_dup_imm_pred_merge_inst<
+ bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty,
+ ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> {
+ let Constraints = "$Zd = $_Zd" in
+ def NAME : sve_int_dup_imm_pred<sz8_64, 1, asm, zprty, "/m",
+ (ins zprty:$_Zd, PPRAny:$Pg, cpyimm:$imm)>;
def : InstAlias<"mov $Zd, $Pg/m, $imm",
- (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm), 1>;
- def : InstAlias<"mov $Zd, $Pg/m, $imm",
- (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm), 1>;
+ (!cast<Instruction>(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>;
+ def : Pat<(intty
+ (vselect predty:$Pg,
+ (intty (AArch64dup (scalarty (SVE8BitLslImm i32:$imm, i32:$shift)))),
+ intty:$Zd)),
+ (!cast<Instruction>(NAME) zprty:$Zd, $Pg, i32:$imm, i32:$shift)>;
+}
+
+multiclass sve_int_dup_imm_pred_merge<string asm> {
+ defm _B : sve_int_dup_imm_pred_merge_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1,
+ i32, cpy_imm8_opt_lsl_i8>;
+ defm _H : sve_int_dup_imm_pred_merge_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1,
+ i32, cpy_imm8_opt_lsl_i16>;
+ defm _S : sve_int_dup_imm_pred_merge_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1,
+ i32, cpy_imm8_opt_lsl_i32>;
+ defm _D : sve_int_dup_imm_pred_merge_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1,
+ i64, cpy_imm8_opt_lsl_i64>;
def : InstAlias<"fmov $Zd, $Pg/m, #0.0",
(!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, 0, 0), 0>;
@@ -3742,20 +4104,35 @@ multiclass sve_int_dup_imm_pred_merge<string asm> {
(!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, 0, 0), 0>;
}
-multiclass sve_int_dup_imm_pred_zero<string asm> {
- def _B : sve_int_dup_imm_pred<0b00, 0, asm, ZPR8, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm)>;
- def _H : sve_int_dup_imm_pred<0b01, 0, asm, ZPR16, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm)>;
- def _S : sve_int_dup_imm_pred<0b10, 0, asm, ZPR32, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm)>;
- def _D : sve_int_dup_imm_pred<0b11, 0, asm, ZPR64, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm)>;
-
- def : InstAlias<"mov $Zd, $Pg/z, $imm",
- (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm), 1>;
- def : InstAlias<"mov $Zd, $Pg/z, $imm",
- (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm), 1>;
+multiclass sve_int_dup_imm_pred_zero_inst<
+ bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty,
+ ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> {
+ def NAME : sve_int_dup_imm_pred<sz8_64, 0, asm, zprty, "/z",
+ (ins PPRAny:$Pg, cpyimm:$imm)>;
def : InstAlias<"mov $Zd, $Pg/z, $imm",
- (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm), 1>;
- def : InstAlias<"mov $Zd, $Pg/z, $imm",
- (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm), 1>;
+ (!cast<Instruction>(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>;
+ def : Pat<(intty (zext (predty PPRAny:$Ps1))),
+ (!cast<Instruction>(NAME) PPRAny:$Ps1, 1, 0)>;
+ def : Pat<(intty (sext (predty PPRAny:$Ps1))),
+ (!cast<Instruction>(NAME) PPRAny:$Ps1, -1, 0)>;
+ def : Pat<(intty (anyext (predty PPRAny:$Ps1))),
+ (!cast<Instruction>(NAME) PPRAny:$Ps1, 1, 0)>;
+ def : Pat<(intty
+ (vselect predty:$Pg,
+ (intty (AArch64dup (scalarty (SVE8BitLslImm i32:$imm, i32:$shift)))),
+ (intty (AArch64dup (scalarty 0))))),
+ (!cast<Instruction>(NAME) $Pg, i32:$imm, i32:$shift)>;
+}
+
+multiclass sve_int_dup_imm_pred_zero<string asm> {
+ defm _B : sve_int_dup_imm_pred_zero_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1,
+ i32, cpy_imm8_opt_lsl_i8>;
+ defm _H : sve_int_dup_imm_pred_zero_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1,
+ i32, cpy_imm8_opt_lsl_i16>;
+ defm _S : sve_int_dup_imm_pred_zero_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1,
+ i32, cpy_imm8_opt_lsl_i32>;
+ defm _D : sve_int_dup_imm_pred_zero_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1,
+ i64, cpy_imm8_opt_lsl_i64>;
}
//===----------------------------------------------------------------------===//
@@ -3787,17 +4164,24 @@ class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm,
let Defs = [NZCV];
}
-multiclass sve_int_cmp_0<bits<3> opc, string asm, SDPatternOperator op,
- CondCode cc> {
+multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
+ ValueType intvt, sve_int_cmp cmp> {
+ def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, cc)),
+ (cmp $Op1, $Op2, $Op3)>;
+ def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)),
+ (cmp $Op1, $Op3, $Op2)>;
+}
+
+multiclass sve_int_cmp_0<bits<3> opc, string asm, CondCode cc, CondCode invcc> {
def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR8>;
def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR16>;
def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR32>;
def _D : sve_int_cmp<0b0, 0b11, opc, asm, PPR64, ZPR64, ZPR64>;
- def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_SETCC_Pat<cc, invcc, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ defm : SVE_SETCC_Pat<cc, invcc, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc, invcc, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Pat<cc, invcc, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_cmp_0_wide<bits<3> opc, string asm, SDPatternOperator op> {
@@ -3852,67 +4236,35 @@ class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let ElementSize = pprty.ElementSize;
}
-multiclass sve_int_scmp_vi<bits<3> opc, string asm, CondCode cc,
- SDPatternOperator op = null_frag,
- SDPatternOperator inv_op = null_frag> {
+multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
+ ValueType predvt, ValueType intvt,
+ Operand immtype, Instruction cmp> {
+ def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg),
+ (intvt ZPR:$Zs1),
+ (intvt (AArch64dup (immtype:$imm))),
+ cc)),
+ (cmp $Pg, $Zs1, immtype:$imm)>;
+ def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg),
+ (intvt (AArch64dup (immtype:$imm))),
+ (intvt ZPR:$Zs1),
+ commuted_cc)),
+ (cmp $Pg, $Zs1, immtype:$imm)>;
+}
+
+multiclass sve_int_scmp_vi<bits<3> opc, string asm, CondCode cc, CondCode commuted_cc> {
def _B : sve_int_scmp_vi<0b00, opc, asm, PPR8, ZPR8, simm5_32b>;
def _H : sve_int_scmp_vi<0b01, opc, asm, PPR16, ZPR16, simm5_32b>;
def _S : sve_int_scmp_vi<0b10, opc, asm, PPR32, ZPR32, simm5_32b>;
def _D : sve_int_scmp_vi<0b11, opc, asm, PPR64, ZPR64, simm5_64b>;
- // IR version
- def : Pat<(nxv16i1 (setcc (nxv16i8 ZPR:$Zs1),
- (nxv16i8 (AArch64dup (simm5_32b:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_B") (PTRUE_B 31), ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv8i1 (setcc (nxv8i16 ZPR:$Zs1),
- (nxv8i16 (AArch64dup (simm5_32b:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_H") (PTRUE_H 31), ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv4i1 (setcc (nxv4i32 ZPR:$Zs1),
- (nxv4i32 (AArch64dup (simm5_32b:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_S") (PTRUE_S 31), ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv2i1 (setcc (nxv2i64 ZPR:$Zs1),
- (nxv2i64 (AArch64dup (simm5_64b:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_D") (PTRUE_D 31), ZPR:$Zs1, simm5_64b:$imm)>;
-
- // Intrinsic version
- def : Pat<(nxv16i1 (op (nxv16i1 PPR_3b:$Pg),
- (nxv16i8 ZPR:$Zs1),
- (nxv16i8 (AArch64dup (simm5_32b:$imm))))),
- (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv8i1 (op (nxv8i1 PPR_3b:$Pg),
- (nxv8i16 ZPR:$Zs1),
- (nxv8i16 (AArch64dup (simm5_32b:$imm))))),
- (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv4i1 (op (nxv4i1 PPR_3b:$Pg),
- (nxv4i32 ZPR:$Zs1),
- (nxv4i32 (AArch64dup (simm5_32b:$imm))))),
- (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv2i1 (op (nxv2i1 PPR_3b:$Pg),
- (nxv2i64 ZPR:$Zs1),
- (nxv2i64 (AArch64dup (simm5_64b:$imm))))),
- (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, simm5_64b:$imm)>;
-
- // Inverted intrinsic version
- def : Pat<(nxv16i1 (inv_op (nxv16i1 PPR_3b:$Pg),
- (nxv16i8 (AArch64dup (simm5_32b:$imm))),
- (nxv16i8 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv8i1 (inv_op (nxv8i1 PPR_3b:$Pg),
- (nxv8i16 (AArch64dup (simm5_32b:$imm))),
- (nxv8i16 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv4i1 (inv_op (nxv4i1 PPR_3b:$Pg),
- (nxv4i32 (AArch64dup (simm5_32b:$imm))),
- (nxv4i32 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, simm5_32b:$imm)>;
- def : Pat<(nxv2i1 (inv_op (nxv2i1 PPR_3b:$Pg),
- (nxv2i64 (AArch64dup (simm5_64b:$imm))),
- (nxv2i64 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, simm5_64b:$imm)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv16i1, nxv16i8, simm5_32b,
+ !cast<Instruction>(NAME # _B)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv8i1, nxv8i16, simm5_32b,
+ !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv4i1, nxv4i32, simm5_32b,
+ !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv2i1, nxv2i64, simm5_64b,
+ !cast<Instruction>(NAME # _D)>;
}
@@ -3944,66 +4296,20 @@ class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty,
}
multiclass sve_int_ucmp_vi<bits<2> opc, string asm, CondCode cc,
- SDPatternOperator op = null_frag,
- SDPatternOperator inv_op = null_frag> {
+ CondCode commuted_cc> {
def _B : sve_int_ucmp_vi<0b00, opc, asm, PPR8, ZPR8, imm0_127>;
def _H : sve_int_ucmp_vi<0b01, opc, asm, PPR16, ZPR16, imm0_127>;
def _S : sve_int_ucmp_vi<0b10, opc, asm, PPR32, ZPR32, imm0_127>;
def _D : sve_int_ucmp_vi<0b11, opc, asm, PPR64, ZPR64, imm0_127_64b>;
- // IR version
- def : Pat<(nxv16i1 (setcc (nxv16i8 ZPR:$Zs1),
- (nxv16i8 (AArch64dup (imm0_127:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_B") (PTRUE_B 31), ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv8i1 (setcc (nxv8i16 ZPR:$Zs1),
- (nxv8i16 (AArch64dup (imm0_127:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_H") (PTRUE_H 31), ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv4i1 (setcc (nxv4i32 ZPR:$Zs1),
- (nxv4i32 (AArch64dup (imm0_127:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_S") (PTRUE_S 31), ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv2i1 (setcc (nxv2i64 ZPR:$Zs1),
- (nxv2i64 (AArch64dup (imm0_127_64b:$imm))),
- cc)),
- (!cast<Instruction>(NAME # "_D") (PTRUE_D 31), ZPR:$Zs1, imm0_127_64b:$imm)>;
-
- // Intrinsic version
- def : Pat<(nxv16i1 (op (nxv16i1 PPR_3b:$Pg),
- (nxv16i8 ZPR:$Zs1),
- (nxv16i8 (AArch64dup (imm0_127:$imm))))),
- (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv8i1 (op (nxv8i1 PPR_3b:$Pg),
- (nxv8i16 ZPR:$Zs1),
- (nxv8i16 (AArch64dup (imm0_127:$imm))))),
- (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv4i1 (op (nxv4i1 PPR_3b:$Pg),
- (nxv4i32 ZPR:$Zs1),
- (nxv4i32 (AArch64dup (imm0_127:$imm))))),
- (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv2i1 (op (nxv2i1 PPR_3b:$Pg),
- (nxv2i64 ZPR:$Zs1),
- (nxv2i64 (AArch64dup (imm0_127_64b:$imm))))),
- (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, imm0_127_64b:$imm)>;
-
- // Inverted intrinsic version
- def : Pat<(nxv16i1 (inv_op (nxv16i1 PPR_3b:$Pg),
- (nxv16i8 (AArch64dup (imm0_127:$imm))),
- (nxv16i8 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_B") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv8i1 (inv_op (nxv8i1 PPR_3b:$Pg),
- (nxv8i16 (AArch64dup (imm0_127:$imm))),
- (nxv8i16 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_H") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv4i1 (inv_op (nxv4i1 PPR_3b:$Pg),
- (nxv4i32 (AArch64dup (imm0_127:$imm))),
- (nxv4i32 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_S") PPR_3b:$Pg, ZPR:$Zs1, imm0_127:$imm)>;
- def : Pat<(nxv2i1 (inv_op (nxv2i1 PPR_3b:$Pg),
- (nxv2i64 (AArch64dup (imm0_127_64b:$imm))),
- (nxv2i64 ZPR:$Zs1))),
- (!cast<Instruction>(NAME # "_D") PPR_3b:$Pg, ZPR:$Zs1, imm0_127_64b:$imm)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv16i1, nxv16i8, imm0_127,
+ !cast<Instruction>(NAME # _B)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv8i1, nxv8i16, imm0_127,
+ !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv4i1, nxv4i32, imm0_127,
+ !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Imm_Pat<cc, commuted_cc, nxv2i1, nxv2i64, imm0_127_64b,
+ !cast<Instruction>(NAME # _D)>;
}
@@ -4096,11 +4402,17 @@ class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
let Defs = [NZCV];
}
-multiclass sve2_int_while_rr<bits<1> rw, string asm> {
+multiclass sve2_int_while_rr<bits<1> rw, string asm, string op> {
def _B : sve2_int_while_rr<0b00, rw, asm, PPR8>;
def _H : sve2_int_while_rr<0b01, rw, asm, PPR16>;
def _S : sve2_int_while_rr<0b10, rw, asm, PPR32>;
def _D : sve2_int_while_rr<0b11, rw, asm, PPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i1, !cast<SDPatternOperator>(op # _b), i64, i64, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i1, !cast<SDPatternOperator>(op # _h), i64, i64, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i1, !cast<SDPatternOperator>(op # _s), i64, i64, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i1, !cast<SDPatternOperator>(op # _d), i64, i64, !cast<Instruction>(NAME # _D)>;
+
}
//===----------------------------------------------------------------------===//
@@ -4108,8 +4420,8 @@ multiclass sve2_int_while_rr<bits<1> rw, string asm> {
//===----------------------------------------------------------------------===//
class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,
- ZPRRegOp zprty, RegisterClass dstRegClass>
-: I<(outs dstRegClass:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
+ ZPRRegOp zprty, FPRasZPROperand dstOpType>
+: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
asm, "\t$Vd, $Pg, $Zn",
"",
[]>, Sched<[]> {
@@ -4127,13 +4439,13 @@ class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,
}
multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
- def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16>;
- def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32>;
- def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64>;
+ def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16asZPR>;
+ def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32asZPR>;
+ def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64asZPR>;
- def : SVE_2_Op_Pat<f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
@@ -4142,8 +4454,8 @@ multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
//===----------------------------------------------------------------------===//
class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,
- ZPRRegOp zprty, RegisterClass dstRegClass>
-: I<(outs dstRegClass:$Vdn), (ins PPR3bAny:$Pg, dstRegClass:$_Vdn, zprty:$Zm),
+ ZPRRegOp zprty, FPRasZPROperand dstOpType>
+: I<(outs dstOpType:$Vdn), (ins PPR3bAny:$Pg, dstOpType:$_Vdn, zprty:$Zm),
asm, "\t$Vdn, $Pg, $_Vdn, $Zm",
"",
[]>,
@@ -4164,13 +4476,13 @@ class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,
}
multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {
- def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16>;
- def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32>;
- def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64>;
+ def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16asZPR>;
+ def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32asZPR>;
+ def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64asZPR>;
- def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -4210,6 +4522,22 @@ multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
+multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm, SDPatternOperator op,
+ SDPatternOperator op_nopred>
+: sve_fp_3op_p_pd<opc, asm, op> {
+ def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8f16, nxv8f16,
+ !cast<Instruction>(NAME # _H), PTRUE_H>;
+ def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f16, nxv4f16,
+ !cast<Instruction>(NAME # _H), PTRUE_S>;
+ def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f16, nxv2f16,
+ !cast<Instruction>(NAME # _H), PTRUE_D>;
+ def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f32, nxv4f32,
+ !cast<Instruction>(NAME # _S), PTRUE_S>;
+ def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f32, nxv2f32,
+ !cast<Instruction>(NAME # _S), PTRUE_D>;
+ def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f64, nxv2f64,
+ !cast<Instruction>(NAME # _D), PTRUE_D>;
+}
//===----------------------------------------------------------------------===//
// SVE Floating Point Compare - with Zero Group
@@ -4263,11 +4591,20 @@ class sve_int_index_ii<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_ii<string asm> {
- def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_32b>;
- def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_32b>;
+multiclass sve_int_index_ii<string asm, SDPatternOperator op> {
+ def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_8b>;
+ def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_16b>;
def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>;
def _D : sve_int_index_ii<0b11, asm, ZPR64, simm5_64b>;
+
+ def : Pat<(nxv16i8 (op simm5_8b:$imm5, simm5_8b:$imm5b)),
+ (!cast<Instruction>(NAME # "_B") simm5_8b:$imm5, simm5_8b:$imm5b)>;
+ def : Pat<(nxv8i16 (op simm5_16b:$imm5, simm5_16b:$imm5b)),
+ (!cast<Instruction>(NAME # "_H") simm5_16b:$imm5, simm5_16b:$imm5b)>;
+ def : Pat<(nxv4i32 (op simm5_32b:$imm5, simm5_32b:$imm5b)),
+ (!cast<Instruction>(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>;
+ def : Pat<(nxv2i64 (op simm5_64b:$imm5, simm5_64b:$imm5b)),
+ (!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>;
}
class sve_int_index_ir<bits<2> sz8_64, string asm, ZPRRegOp zprty,
@@ -4287,11 +4624,20 @@ class sve_int_index_ir<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_ir<string asm> {
- def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_32b>;
- def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_32b>;
+multiclass sve_int_index_ir<string asm, SDPatternOperator op> {
+ def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_8b>;
+ def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_16b>;
def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>;
def _D : sve_int_index_ir<0b11, asm, ZPR64, GPR64, simm5_64b>;
+
+ def : Pat<(nxv16i8 (op simm5_8b:$imm5, GPR32:$Rm)),
+ (!cast<Instruction>(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>;
+ def : Pat<(nxv8i16 (op simm5_16b:$imm5, GPR32:$Rm)),
+ (!cast<Instruction>(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>;
+ def : Pat<(nxv4i32 (op simm5_32b:$imm5, GPR32:$Rm)),
+ (!cast<Instruction>(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>;
+ def : Pat<(nxv2i64 (op simm5_64b:$imm5, GPR64:$Rm)),
+ (!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>;
}
class sve_int_index_ri<bits<2> sz8_64, string asm, ZPRRegOp zprty,
@@ -4311,11 +4657,20 @@ class sve_int_index_ri<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_ri<string asm> {
- def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_32b>;
- def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_32b>;
+multiclass sve_int_index_ri<string asm, SDPatternOperator op> {
+ def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_8b>;
+ def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_16b>;
def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>;
def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>;
+
+ def : Pat<(nxv16i8 (op GPR32:$Rm, simm5_8b:$imm5)),
+ (!cast<Instruction>(NAME # "_B") GPR32:$Rm, simm5_8b:$imm5)>;
+ def : Pat<(nxv8i16 (op GPR32:$Rm, simm5_16b:$imm5)),
+ (!cast<Instruction>(NAME # "_H") GPR32:$Rm, simm5_16b:$imm5)>;
+ def : Pat<(nxv4i32 (op GPR32:$Rm, simm5_32b:$imm5)),
+ (!cast<Instruction>(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>;
+ def : Pat<(nxv2i64 (op GPR64:$Rm, simm5_64b:$imm5)),
+ (!cast<Instruction>(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>;
}
class sve_int_index_rr<bits<2> sz8_64, string asm, ZPRRegOp zprty,
@@ -4335,19 +4690,23 @@ class sve_int_index_rr<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_rr<string asm> {
+multiclass sve_int_index_rr<string asm, SDPatternOperator op> {
def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>;
def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>;
def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>;
def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, i32, i32, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, i32, i32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, i32, i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, i64, i64, !cast<Instruction>(NAME # _D)>;
}
//
//===----------------------------------------------------------------------===//
// SVE Bitwise Shift - Predicated Group
//===----------------------------------------------------------------------===//
class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
- ZPRRegOp zprty, Operand immtype,
- ElementSizeEnum size>
+ ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
asm, "\t$Zdn, $Pg/m, $_Zdn, $imm",
"",
@@ -4366,50 +4725,99 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
- let ElementSize = size;
+ let DestructiveInstType = DestructiveBinaryImm;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string psName=""> {
+ def _B : SVEPseudo2Instr<psName # _B, 1>,
+ sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
+ def _H : SVEPseudo2Instr<psName # _H, 1>,
+ sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+ let Inst{8} = imm{3};
+ }
+ def _S : SVEPseudo2Instr<psName # _S, 1>,
+ sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+ let Inst{9-8} = imm{4-3};
+ }
+ def _D : SVEPseudo2Instr<psName # _D, 1>,
+ sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+ let Inst{22} = imm{5};
+ let Inst{9-8} = imm{4-3};
+ }
}
-multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm> {
- def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8,
- ElementSizeB>;
- def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16,
- ElementSizeH> {
+multiclass sve2_int_bin_pred_shift_imm_left<bits<4> opc, string asm,
+ string psName,
+ SDPatternOperator op> {
+
+ def _B : SVEPseudo2Instr<psName # _B, 1>, sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
+ def _H : SVEPseudo2Instr<psName # _H, 1>,
+ sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
let Inst{8} = imm{3};
}
- def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32,
- ElementSizeS> {
+ def _S : SVEPseudo2Instr<psName # _S, 1>,
+ sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
let Inst{9-8} = imm{4-3};
}
- def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64,
- ElementSizeD> {
+ def _D : SVEPseudo2Instr<psName # _D, 1>,
+ sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm,
+multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> {
+ def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
+ def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
+ def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
+ def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
+
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _ZERO_B)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _ZERO_H)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _ZERO_S)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _ZERO_D)>;
+}
+
+multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
SDPatternOperator op = null_frag> {
- def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
- ElementSizeB>;
- def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
- ElementSizeH> {
+ def _B : SVEPseudo2Instr<Ps # _B, 1>,
+ sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+ def _H : SVEPseudo2Instr<Ps # _H, 1>,
+ sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{8} = imm{3};
}
- def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32,
- ElementSizeS> {
+ def _S : SVEPseudo2Instr<Ps # _S, 1>,
+ sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
let Inst{9-8} = imm{4-3};
}
- def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64,
- ElementSizeD> {
+ def _D : SVEPseudo2Instr<Ps # _D, 1>,
+ sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op = null_frag> {
+ def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftR8, FalseLanesZero>;
+ def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftR16, FalseLanesZero>;
+ def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>;
+ def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftR64, FalseLanesZero>;
+
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftR8, !cast<Pseudo>(NAME # _ZERO_B)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftR16, !cast<Pseudo>(NAME # _ZERO_H)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftR32, !cast<Pseudo>(NAME # _ZERO_S)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftR64, !cast<Pseudo>(NAME # _ZERO_D)>;
}
class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
@@ -4432,23 +4840,40 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_bin_pred_shift<bits<3> opc, string asm,
- SDPatternOperator op> {
- def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>;
- def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>;
- def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>;
- def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>;
-
+multiclass sve_int_bin_pred_shift<bits<3> opc, string asm, string Ps,
+ SDPatternOperator op, string revname, bit isReverseInstr = 0> {
+ let DestructiveInstType = DestructiveBinaryCommWithRev in {
+ def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>,
+ SVEPseudo2Instr<Ps # _B, 1>, SVEInstr2Rev<NAME # _B, revname # _B, isReverseInstr>;
+ def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>,
+ SVEPseudo2Instr<Ps # _H, 1>, SVEInstr2Rev<NAME # _H, revname # _H, isReverseInstr>;
+ def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>, SVEInstr2Rev<NAME # _S, revname # _S, isReverseInstr>;
+ def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>, SVEInstr2Rev<NAME # _D, revname # _D, isReverseInstr>;
+ }
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
+multiclass sve_int_bin_pred_zeroing_bhsd<SDPatternOperator op> {
+ def _ZERO_B : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesZero>;
+ def _ZERO_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
+ def _ZERO_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
+ def _ZERO_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesZero>;
+
+ def : SVE_3_Op_Pat_SelZero<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Pseudo>(NAME # _ZERO_B)>;
+ def : SVE_3_Op_Pat_SelZero<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Pseudo>(NAME # _ZERO_H)>;
+ def : SVE_3_Op_Pat_SelZero<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _ZERO_S)>;
+ def : SVE_3_Op_Pat_SelZero<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _ZERO_D)>;
+}
+
multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>;
@@ -4493,7 +4918,8 @@ class sve_int_bin_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
asm, "\t$Zd, $Zn, $imm",
- "", []>, Sched<[]> {
+ "",
+ []>, Sched<[]> {
bits<5> Zd;
bits<5> Zn;
bits<6> imm;
@@ -4508,7 +4934,8 @@ class sve_int_bin_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
let Inst{4-0} = Zd;
}
-multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm> {
+multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
let Inst{19} = imm{3};
@@ -4520,9 +4947,15 @@ multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
+
+ def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, vecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, vecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, vecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEShiftImm64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm> {
+multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
@@ -4534,6 +4967,11 @@ multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
+
+ def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Shift_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEShiftImm64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
// SVE Memory - Store Group
@@ -4743,16 +5181,36 @@ class sve2_mem_sstnt_vs_base<bits<3> opc, string asm,
let mayStore = 1;
}
-multiclass sve2_mem_sstnt_vs<bits<3> opc, string asm,
- RegisterOperand listty, ZPRRegOp zprty> {
- def _REAL : sve2_mem_sstnt_vs_base<opc, asm, listty, zprty>;
+multiclass sve2_mem_sstnt_vs_32_ptrs<bits<3> opc, string asm,
+ SDPatternOperator op,
+ ValueType vt> {
+ def _REAL : sve2_mem_sstnt_vs_base<opc, asm, Z_s, ZPR32>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
+ (!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+ (!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+ (!cast<Instruction>(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;
+
+ def : Pat <(op (nxv4i32 ZPR32:$Zt), (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zn), (i64 GPR64:$Rm), vt),
+ (!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm)>;
+}
+
+multiclass sve2_mem_sstnt_vs_64_ptrs<bits<3> opc, string asm,
+ SDPatternOperator op,
+ ValueType vt> {
+ def _REAL : sve2_mem_sstnt_vs_base<opc, asm, Z_d, ZPR64>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+ (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
+ (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
+ (!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;
+
+ def : Pat <(op (nxv2i64 ZPR64:$Zt), (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64:$Rm), vt),
+ (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
}
class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
@@ -5094,6 +5552,17 @@ class sve_int_rdffr_pred<bit s, string asm>
let Uses = [FFR];
}
+multiclass sve_int_rdffr_pred<bit s, string asm, SDPatternOperator op> {
+ def _REAL : sve_int_rdffr_pred<s, asm>;
+
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+ def "" : Pseudo<(outs PPR8:$Pd), (ins PPRAny:$Pg), [(set (nxv16i1 PPR8:$Pd), (op (nxv16i1 PPRAny:$Pg)))]>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) PPR8:$Pd, PPRAny:$Pg)>;
+ }
+}
+
class sve_int_rdffr_unpred<string asm> : I<
(outs PPR8:$Pd), (ins),
asm, "\t$Pd",
@@ -5106,11 +5575,22 @@ class sve_int_rdffr_unpred<string asm> : I<
let Uses = [FFR];
}
-class sve_int_wrffr<string asm>
+multiclass sve_int_rdffr_unpred<string asm, SDPatternOperator op> {
+ def _REAL : sve_int_rdffr_unpred<asm>;
+
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+ def "" : Pseudo<(outs PPR8:$Pd), (ins), [(set (nxv16i1 PPR8:$Pd), (op))]>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) PPR8:$Pd)>;
+ }
+}
+
+class sve_int_wrffr<string asm, SDPatternOperator op>
: I<(outs), (ins PPR8:$Pn),
asm, "\t$Pn",
"",
- []>, Sched<[]> {
+ [(op (nxv16i1 PPR8:$Pn))]>, Sched<[]> {
bits<4> Pn;
let Inst{31-9} = 0b00100101001010001001000;
let Inst{8-5} = Pn;
@@ -5120,11 +5600,11 @@ class sve_int_wrffr<string asm>
let Defs = [FFR];
}
-class sve_int_setffr<string asm>
+class sve_int_setffr<string asm, SDPatternOperator op>
: I<(outs), (ins),
asm, "",
"",
- []>, Sched<[]> {
+ [(op)]>, Sched<[]> {
let Inst{31-0} = 0b00100101001011001001000000000000;
let hasSideEffects = 1;
@@ -5219,7 +5699,7 @@ class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
@@ -5317,7 +5797,7 @@ class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
}
@@ -5332,9 +5812,9 @@ multiclass sve_int_perm_splice<string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
- def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
@@ -5380,7 +5860,7 @@ class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
@@ -5443,11 +5923,11 @@ class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_perm_cpy_r<string asm> {
+multiclass sve_int_perm_cpy_r<string asm, SDPatternOperator op> {
def _B : sve_int_perm_cpy_r<0b00, asm, ZPR8, GPR32sp>;
def _H : sve_int_perm_cpy_r<0b01, asm, ZPR16, GPR32sp>;
def _S : sve_int_perm_cpy_r<0b10, asm, ZPR32, GPR32sp>;
@@ -5461,6 +5941,15 @@ multiclass sve_int_perm_cpy_r<string asm> {
(!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>;
def : InstAlias<"mov $Zd, $Pg/m, $Rn",
(!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, GPR64sp:$Rn), 1>;
+
+ def : Pat<(nxv16i8 (op nxv16i1:$pg, i32:$splat, nxv16i8:$passthru)),
+ (!cast<Instruction>(NAME # _B) $passthru, $pg, $splat)>;
+ def : Pat<(nxv8i16 (op nxv8i1:$pg, i32:$splat, nxv8i16:$passthru)),
+ (!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>;
+ def : Pat<(nxv4i32 (op nxv4i1:$pg, i32:$splat, nxv4i32:$passthru)),
+ (!cast<Instruction>(NAME # _S) $passthru, $pg, $splat)>;
+ def : Pat<(nxv2i64 (op nxv2i1:$pg, i64:$splat, nxv2i64:$passthru)),
+ (!cast<Instruction>(NAME # _D) $passthru, $pg, $splat)>;
}
class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty,
@@ -5480,11 +5969,11 @@ class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = Destructive;
+ let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_perm_cpy_v<string asm> {
+multiclass sve_int_perm_cpy_v<string asm, SDPatternOperator op> {
def _B : sve_int_perm_cpy_v<0b00, asm, ZPR8, FPR8>;
def _H : sve_int_perm_cpy_v<0b01, asm, ZPR16, FPR16>;
def _S : sve_int_perm_cpy_v<0b10, asm, ZPR32, FPR32>;
@@ -5498,6 +5987,16 @@ multiclass sve_int_perm_cpy_v<string asm> {
(!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPR3bAny:$Pg, FPR32:$Vn), 1>;
def : InstAlias<"mov $Zd, $Pg/m, $Vn",
(!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, FPR64:$Vn), 1>;
+
+
+ def : Pat<(nxv8f16 (op nxv8i1:$pg, f16:$splat, nxv8f16:$passthru)),
+ (!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>;
+ def : Pat<(nxv2f32 (op nxv2i1:$pg, f32:$splat, nxv2f32:$passthru)),
+ (!cast<Instruction>(NAME # _S) $passthru, $pg, $splat)>;
+ def : Pat<(nxv4f32 (op nxv4i1:$pg, f32:$splat, nxv4f32:$passthru)),
+ (!cast<Instruction>(NAME # _S) $passthru, $pg, $splat)>;
+ def : Pat<(nxv2f64 (op nxv2i1:$pg, f64:$splat, nxv2f64:$passthru)),
+ (!cast<Instruction>(NAME # _D) $passthru, $pg, $splat)>;
}
class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
@@ -5557,14 +6056,21 @@ class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
RegisterOperand listty, ZPRRegOp zprty> {
- def "" : sve_mem_cld_si_base<dtype, nf, asm, listty>;
+ def _REAL : sve_mem_cld_si_base<dtype, nf, asm, listty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
- (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
- (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
- (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+ (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1, mayLoad = 1 in {
+ def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4)>;
+ }
}
multiclass sve_mem_cld_si<bits<4> dtype, string asm, RegisterOperand listty,
@@ -5773,6 +6279,13 @@ multiclass sve_mem_cldff_ss<bits<4> dtype, string asm, RegisterOperand listty,
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
+
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+ def "" : Pseudo<(outs listty:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm)>;
+ }
}
multiclass sve_mem_cldnf_si<bits<4> dtype, string asm, RegisterOperand listty,
@@ -5878,10 +6391,19 @@ multiclass sve_mem_32b_gld_sv_32_scaled<bits<4> opc, string asm,
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+ def _UXTW_SCALED : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_SCALED_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
+ def _SXTW_SCALED : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
+ }
+
def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)),
- (!cast<Instruction>(NAME # _UXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
+ (!cast<Instruction>(NAME # _UXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
def : Pat<(nxv4i32 (sxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)),
- (!cast<Instruction>(NAME # _SXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
+ (!cast<Instruction>(NAME # _SXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
}
multiclass sve_mem_32b_gld_vs_32_unscaled<bits<4> opc, string asm,
@@ -5898,10 +6420,19 @@ multiclass sve_mem_32b_gld_vs_32_unscaled<bits<4> opc, string asm,
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+ def _UXTW : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
+ def _SXTW : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
+ }
+
def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)),
- (!cast<Instruction>(NAME # _UXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ (!cast<Instruction>(NAME # _UXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
def : Pat<(nxv4i32 (sxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)),
- (!cast<Instruction>(NAME # _SXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ (!cast<Instruction>(NAME # _SXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
@@ -5940,8 +6471,15 @@ multiclass sve_mem_32b_gld_vi_32_ptrs<bits<4> opc, string asm, Operand imm_ty,
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>;
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+ def _IMM : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5)>;
+ }
+
def : Pat<(nxv4i32 (op (nxv4i1 PPR:$gp), (nxv4i32 ZPR:$ptrs), imm_ty:$index, vt)),
- (!cast<Instruction>(NAME # _IMM_REAL) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
+ (!cast<Instruction>(NAME # _IMM) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
}
class sve_mem_prfm_si<bits<2> msz, string asm>
@@ -6022,9 +6560,17 @@ class sve_mem_32b_prfm_sv<bits<2> msz, bit xs, string asm,
multiclass sve_mem_32b_prfm_sv_scaled<bits<2> msz, string asm,
RegisterOperand sxtw_opnd,
- RegisterOperand uxtw_opnd> {
+ RegisterOperand uxtw_opnd,
+ PatFrag op_sxtw,
+ PatFrag op_uxtw> {
def _UXTW_SCALED : sve_mem_32b_prfm_sv<msz, 0, asm, uxtw_opnd>;
def _SXTW_SCALED : sve_mem_32b_prfm_sv<msz, 1, asm, sxtw_opnd>;
+
+ def : Pat<(op_uxtw (nxv4i1 PPR3bAny:$Pg), (i64 GPR64sp:$Rn), (nxv4i32 uxtw_opnd:$Zm), (i32 sve_prfop:$prfop)),
+ (!cast<Instruction>(NAME # _UXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
+
+ def : Pat<(op_sxtw (nxv4i1 PPR3bAny:$Pg), (i64 GPR64sp:$Rn), (nxv4i32 sxtw_opnd:$Zm), (i32 sve_prfop:$prfop)),
+ (!cast<Instruction>(NAME # _SXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
}
class sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty>
@@ -6047,11 +6593,14 @@ class sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty>
let Inst{3-0} = prfop;
}
-multiclass sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty> {
+multiclass sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty, SDPatternOperator op> {
def NAME : sve_mem_32b_prfm_vi<msz, asm, imm_ty>;
def : InstAlias<asm # "\t$prfop, $Pg, [$Zn]",
(!cast<Instruction>(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>;
+
+ def : Pat<(op (nxv4i1 PPR_3b:$Pg), (nxv4i32 ZPR32:$Zn), (i64 imm_ty:$imm), (i32 sve_prfop:$prfop)),
+ (!cast<Instruction>(NAME) sve_prfop:$prfop, PPR_3b:$Pg, ZPR32:$Zn, imm_ty:$imm)>;
}
class sve_mem_z_fill<string asm>
@@ -6130,17 +6679,38 @@ class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm,
let mayLoad = 1;
}
-multiclass sve2_mem_gldnt_vs<bits<5> opc, string asm,
- RegisterOperand listty, ZPRRegOp zprty> {
- def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
- asm, listty>;
+multiclass sve2_mem_gldnt_vs_32_ptrs<bits<5> opc, string asm,
+ SDPatternOperator op,
+ ValueType vt> {
+ def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm),
+ asm, Z_s>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
+ (!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+ (!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+ (!cast<Instruction>(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;
+
+ def : Pat <(nxv4i32 (op (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zd), (i64 GPR64:$Rm), vt)),
+ (!cast<Instruction>(NAME # _REAL) PPR3bAny:$Pg, ZPR32:$Zd, GPR64:$Rm)>;
+}
+
+multiclass sve2_mem_gldnt_vs_64_ptrs<bits<5> opc, string asm,
+ SDPatternOperator op,
+ ValueType vt> {
+ def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm),
+ asm, Z_d>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+ (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
+ (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
- (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
+ (!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;
+
+ def : Pat <(nxv2i64 (op (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zd), (i64 GPR64:$Rm), vt)),
+ (!cast<Instruction>(NAME # _REAL) PPR3bAny:$Pg, ZPR64:$Zd, GPR64:$Rm)>;
}
//===----------------------------------------------------------------------===//
@@ -6190,10 +6760,19 @@ multiclass sve_mem_64b_gld_sv_32_scaled<bits<4> opc, string asm,
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+ def _UXTW_SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
+ def _SXTW_SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
+ }
+
def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
- (!cast<Instruction>(NAME # _UXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
+ (!cast<Instruction>(NAME # _UXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
def : Pat<(nxv2i64 (sxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
- (!cast<Instruction>(NAME # _SXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
+ (!cast<Instruction>(NAME # _SXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
}
multiclass sve_mem_64b_gld_vs_32_unscaled<bits<4> opc, string asm,
@@ -6210,10 +6789,19 @@ multiclass sve_mem_64b_gld_vs_32_unscaled<bits<4> opc, string asm,
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+ def _UXTW : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
+ def _SXTW : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
+ }
+
def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
- (!cast<Instruction>(NAME # _UXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ (!cast<Instruction>(NAME # _UXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
def : Pat<(nxv2i64 (sxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
- (!cast<Instruction>(NAME # _SXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ (!cast<Instruction>(NAME # _SXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
multiclass sve_mem_64b_gld_sv2_64_scaled<bits<4> opc, string asm,
@@ -6224,8 +6812,15 @@ multiclass sve_mem_64b_gld_sv2_64_scaled<bits<4> opc, string asm,
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+ def _SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm)>;
+ }
+
def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
- (!cast<Instruction>(NAME # _SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
+ (!cast<Instruction>(NAME # _SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
}
multiclass sve_mem_64b_gld_vs2_64_unscaled<bits<4> opc, string asm,
@@ -6235,8 +6830,15 @@ multiclass sve_mem_64b_gld_vs2_64_unscaled<bits<4> opc, string asm,
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+ def "" : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm)>;
+ }
+
def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
- (!cast<Instruction>(NAME # _REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ (!cast<Instruction>(NAME) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
@@ -6274,8 +6876,15 @@ multiclass sve_mem_64b_gld_vi_64_ptrs<bits<4> opc, string asm, Operand imm_ty,
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
+ // We need a layer of indirection because early machine code passes balk at
+ // physical register (i.e. FFR) uses that have no previous definition.
+ let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
+ def _IMM : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), []>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5)>;
+ }
+
def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt)),
- (!cast<Instruction>(NAME # _IMM_REAL) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
+ (!cast<Instruction>(NAME # _IMM) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
}
// bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl)
@@ -6305,14 +6914,27 @@ class sve_mem_64b_prfm_sv<bits<2> msz, bit xs, bit lsl, string asm,
multiclass sve_mem_64b_prfm_sv_ext_scaled<bits<2> msz, string asm,
RegisterOperand sxtw_opnd,
- RegisterOperand uxtw_opnd> {
+ RegisterOperand uxtw_opnd,
+ PatFrag op_sxtw,
+ PatFrag op_uxtw> {
def _UXTW_SCALED : sve_mem_64b_prfm_sv<msz, 0, 0, asm, uxtw_opnd>;
def _SXTW_SCALED : sve_mem_64b_prfm_sv<msz, 1, 0, asm, sxtw_opnd>;
+
+ def : Pat<(op_uxtw (nxv2i1 PPR3bAny:$Pg), (i64 GPR64sp:$Rn), (nxv2i64 uxtw_opnd:$Zm), (i32 sve_prfop:$prfop)),
+ (!cast<Instruction>(NAME # _UXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
+
+ def : Pat<(op_sxtw (nxv2i1 PPR3bAny:$Pg), (i64 GPR64sp:$Rn), (nxv2i64 sxtw_opnd:$Zm), (i32 sve_prfop:$prfop)),
+ (!cast<Instruction>(NAME # _SXTW_SCALED) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
+
}
multiclass sve_mem_64b_prfm_sv_lsl_scaled<bits<2> msz, string asm,
- RegisterOperand zprext> {
+ RegisterOperand zprext, PatFrag frag> {
def NAME : sve_mem_64b_prfm_sv<msz, 1, 1, asm, zprext>;
+
+ def : Pat<(frag (nxv2i1 PPR3bAny:$Pg), (i64 GPR64sp:$Rn), (nxv2i64 zprext:$Zm), (i32 sve_prfop:$prfop)),
+ (!cast<Instruction>(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm)>;
+
}
@@ -6338,13 +6960,15 @@ class sve_mem_64b_prfm_vi<bits<2> msz, string asm, Operand imm_ty>
let hasSideEffects = 1;
}
-multiclass sve_mem_64b_prfm_vi<bits<2> msz, string asm, Operand imm_ty> {
+multiclass sve_mem_64b_prfm_vi<bits<2> msz, string asm, Operand imm_ty, SDPatternOperator op> {
def NAME : sve_mem_64b_prfm_vi<msz, asm, imm_ty>;
def : InstAlias<asm # "\t$prfop, $Pg, [$Zn]",
(!cast<Instruction>(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
-}
+ def : Pat<(op (nxv2i1 PPR_3b:$Pg), (nxv2i64 ZPR32:$Zn), (i64 imm_ty:$imm), (i32 sve_prfop:$prfop)),
+ (!cast<Instruction>(NAME) sve_prfop:$prfop, PPR_3b:$Pg, ZPR32:$Zn, imm_ty:$imm)>;
+}
//===----------------------------------------------------------------------===//
// SVE Compute Vector Address Group
@@ -6600,6 +7224,12 @@ class sve_int_brkp<bits<2> opc, string asm>
let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
}
+multiclass sve_int_brkp<bits<2> opc, string asm, SDPatternOperator op> {
+ def NAME : sve_int_brkp<opc, asm>;
+
+ def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, nxv16i1, !cast<Instruction>(NAME)>;
+}
+
//===----------------------------------------------------------------------===//
// SVE Partition Break Group
@@ -6626,6 +7256,12 @@ class sve_int_brkn<bit S, string asm>
let Defs = !if(!eq (S, 0b1), [NZCV], []);
}
+multiclass sve_int_brkn<bits<1> opc, string asm, SDPatternOperator op> {
+ def NAME : sve_int_brkn<opc, asm>;
+
+ def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, nxv16i1, !cast<Instruction>(NAME)>;
+}
+
class sve_int_break<bits<3> opc, string asm, string suffix, dag iops>
: I<(outs PPR8:$Pd), iops,
asm, "\t$Pd, $Pg"#suffix#", $Pn",
@@ -6648,12 +7284,16 @@ class sve_int_break<bits<3> opc, string asm, string suffix, dag iops>
}
-multiclass sve_int_break_m<bits<3> opc, string asm> {
+multiclass sve_int_break_m<bits<3> opc, string asm, SDPatternOperator op> {
def NAME : sve_int_break<opc, asm, "/m", (ins PPR8:$_Pd, PPRAny:$Pg, PPR8:$Pn)>;
+
+ def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, nxv16i1, !cast<Instruction>(NAME)>;
}
-multiclass sve_int_break_z<bits<3> opc, string asm> {
+multiclass sve_int_break_z<bits<3> opc, string asm, SDPatternOperator op> {
def NAME : sve_int_break<opc, asm, "/z", (ins PPRAny:$Pg, PPR8:$Pn)>;
+
+ def : SVE_2_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, !cast<Instruction>(NAME)>;
}
//===----------------------------------------------------------------------===//
@@ -6683,20 +7323,23 @@ class sve2_char_match<bit sz, bit opc, string asm,
let Defs = [NZCV];
}
-multiclass sve2_char_match<bit opc, string asm> {
+multiclass sve2_char_match<bit opc, string asm, SDPatternOperator op> {
def _B : sve2_char_match<0b0, opc, asm, PPR8, ZPR8>;
def _H : sve2_char_match<0b1, opc, asm, PPR16, ZPR16>;
+
+ def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
}
//===----------------------------------------------------------------------===//
// SVE2 Histogram Computation - Segment Group
//===----------------------------------------------------------------------===//
-class sve2_hist_gen_segment<string asm>
+class sve2_hist_gen_segment<string asm, SDPatternOperator op>
: I<(outs ZPR8:$Zd), (ins ZPR8:$Zn, ZPR8:$Zm),
asm, "\t$Zd, $Zn, $Zm",
"",
- []>, Sched<[]> {
+ [(set nxv16i8:$Zd, (op nxv16i8:$Zn, nxv16i8:$Zm))]>, Sched<[]> {
bits<5> Zd;
bits<5> Zn;
bits<5> Zm;
@@ -6730,9 +7373,12 @@ class sve2_hist_gen_vector<bit sz, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
-multiclass sve2_hist_gen_vector<string asm> {
+multiclass sve2_hist_gen_vector<string asm, SDPatternOperator op> {
def _S : sve2_hist_gen_vector<0b0, asm, ZPR32>;
def _D : sve2_hist_gen_vector<0b1, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -6755,6 +7401,12 @@ class sve2_crypto_cons_bin_op<bit opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
+multiclass sve2_crypto_cons_bin_op<bit opc, string asm, ZPRRegOp zprty,
+ SDPatternOperator op, ValueType vt> {
+ def NAME : sve2_crypto_cons_bin_op<opc, asm, zprty>;
+ def : SVE_2_Op_Pat<vt, op, vt, vt, !cast<Instruction>(NAME)>;
+}
+
class sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty>
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm),
asm, "\t$Zdn, $_Zdn, $Zm",
@@ -6772,8 +7424,14 @@ class sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty>
let Constraints = "$Zdn = $_Zdn";
}
-class sve2_crypto_unary_op<bit opc, string asm>
-: I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn),
+multiclass sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty,
+ SDPatternOperator op, ValueType vt> {
+ def NAME : sve2_crypto_des_bin_op<opc, asm, zprty>;
+ def : SVE_2_Op_Pat<vt, op, vt, vt, !cast<Instruction>(NAME)>;
+}
+
+class sve2_crypto_unary_op<bit opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn),
asm, "\t$Zdn, $_Zdn",
"",
[]>, Sched<[]> {
@@ -6785,3 +7443,389 @@ class sve2_crypto_unary_op<bit opc, string asm>
let Constraints = "$Zdn = $_Zdn";
}
+
+multiclass sve2_crypto_unary_op<bit opc, string asm, SDPatternOperator op> {
+ def NAME : sve2_crypto_unary_op<opc, asm, ZPR8>;
+ def : SVE_1_Op_Pat<nxv16i8, op, nxv16i8, !cast<Instruction>(NAME)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE BFloat16 Group
+//===----------------------------------------------------------------------===//
+
+class sve_bfloat_dot_base<bits<2> opc, string asm, string ops, dag iops>
+: I<(outs ZPR32:$Zda), iops, asm, ops, "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ let Inst{31-21} = 0b01100100011;
+ let Inst{15-14} = opc;
+ let Inst{13-10} = 0b0000;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ElementSizeH;
+}
+
+class sve_bfloat_dot<string asm>
+: sve_bfloat_dot_base<0b10, asm, "\t$Zda, $Zn, $Zm",
+ (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm)> {
+ bits<5> Zm;
+ let Inst{20-16} = Zm;
+}
+
+multiclass sve_bfloat_dot<string asm, SDPatternOperator op> {
+ def NAME : sve_bfloat_dot<asm>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>;
+}
+
+class sve_bfloat_dot_indexed<string asm>
+: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop",
+ (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop)> {
+ bits<2> iop;
+ bits<3> Zm;
+ let Inst{20-19} = iop;
+ let Inst{18-16} = Zm;
+}
+
+multiclass sve_bfloat_dot_indexed<string asm, SDPatternOperator op> {
+ def NAME : sve_bfloat_dot_indexed<asm>;
+ def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i64, VectorIndexS_timm, !cast<Instruction>(NAME)>;
+}
+
+class sve_bfloat_matmul<string asm>
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
+ asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zm;
+ bits<5> Zda;
+ bits<5> Zn;
+ let Inst{31-21} = 0b01100100011;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b111001;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ElementSizeH;
+}
+
+multiclass sve_bfloat_matmul<string asm, SDPatternOperator op> {
+ def NAME : sve_bfloat_matmul<asm>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>;
+}
+
+class sve_bfloat_matmul_longvecl<bit BT, string asm>
+: sve_bfloat_matmul<asm> {
+ let Inst{23} = 0b1;
+ let Inst{14-13} = 0b00;
+ let Inst{10} = BT;
+}
+
+multiclass sve_bfloat_matmul_longvecl<bit BT, string asm, SDPatternOperator op> {
+ def NAME : sve_bfloat_matmul_longvecl<BT, asm>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>;
+}
+
+class sve_bfloat_matmul_longvecl_idx<bit BT, string asm>
+: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop",
+ (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH:$iop)> {
+ bits<3> iop;
+ bits<3> Zm;
+ let Inst{23} = 0b1;
+ let Inst{20-19} = iop{2-1};
+ let Inst{18-16} = Zm;
+ let Inst{11} = iop{0};
+ let Inst{10} = BT;
+}
+
+multiclass sve_bfloat_matmul_longvecl_idx<bit BT, string asm, SDPatternOperator op> {
+ def NAME : sve_bfloat_matmul_longvecl_idx<BT, asm>;
+ def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i64, VectorIndexH_timm, !cast<Instruction>(NAME)>;
+}
+
+class sve_bfloat_convert<bit N, string asm>
+: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
+ asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<3> Pg;
+ bits<5> Zn;
+ let Inst{31-25} = 0b0110010;
+ let Inst{24} = N;
+ let Inst{23-13} = 0b10001010101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 1;
+ let ElementSize = ElementSizeS;
+}
+
+multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op> {
+ def NAME : sve_bfloat_convert<N, asm>;
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i1, nxv4f32, !cast<Instruction>(NAME)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Matrix Multiply Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_matmul<bits<2> uns, string asm>
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR8:$Zm), asm,
+ "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = uns;
+ let Inst{21} = 0;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b100110;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ZPR32.ElementSize;
+}
+
+multiclass sve_int_matmul<bits<2> uns, string asm, SDPatternOperator op> {
+ def NAME : sve_int_matmul<uns, asm>;
+
+ def : SVE_3_Op_Pat<nxv4i32, op , nxv4i32, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Dot Product Mixed Sign Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_dot_mixed<string asm>
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR8:$Zm), asm,
+ "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-21} = 0b01000100100;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b011110;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ZPR32.ElementSize;
+}
+
+multiclass sve_int_dot_mixed<string asm, SDPatternOperator op> {
+ def NAME : sve_int_dot_mixed<asm>;
+
+ def : SVE_3_Op_Pat<nxv4i32, op , nxv4i32, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Dot Product Mixed Sign - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_dot_mixed_indexed<bit U, string asm>
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexS32b:$idx),
+ asm, "\t$Zda, $Zn, $Zm$idx", "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<3> Zm;
+ bits<2> idx;
+ let Inst{31-21} = 0b01000100101;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = Zm;
+ let Inst{15-11} = 0b00011;
+ let Inst{10} = U;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = ZPR32.ElementSize;
+}
+
+multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
+ def NAME : sve_int_dot_mixed_indexed<U, asm>;
+
+ def : SVE_4_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Matrix Multiply Accumulate Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-23} = 0b011001001;
+ let Inst{22} = sz;
+ let Inst{21} = 1;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b111001;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = DestructiveOther;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty, SDPatternOperator op, ValueType vt> {
+ def NAME : sve_fp_matrix_mla<sz, asm, zprty>;
+
+ def : SVE_3_Op_Pat<vt, op , vt, vt, vt, !cast<Instruction>(NAME)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Memory - Contiguous Load And Replicate 256-bit Group
+//===----------------------------------------------------------------------===//
+
+class sve_mem_ldor_si<bits<2> sz, string asm, RegisterOperand VecList>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s32:$imm4),
+ asm, "\t$Zt, $Pg/z, [$Rn, $imm4]", "", []>, Sched<[]> {
+ bits<5> Zt;
+ bits<5> Rn;
+ bits<3> Pg;
+ bits<4> imm4;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-23} = sz;
+ let Inst{22-20} = 0b010;
+ let Inst{19-16} = imm4;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_ldor_si<bits<2> sz, string asm, RegisterOperand listty,
+ ZPRRegOp zprty, ValueType Ty, ValueType PredTy, SDNode Ld1ro> {
+ def NAME : sve_mem_ldor_si<sz, asm, listty>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s32:$imm4), 0>;
+
+ // Base addressing mode
+ def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), GPR64sp:$base)),
+ (!cast<Instruction>(NAME) PPR3bAny:$gp, GPR64sp:$base, (i64 0))>;
+
+}
+
+class sve_mem_ldor_ss<bits<2> sz, string asm, RegisterOperand VecList,
+ RegisterOperand gprty>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", "", []>, Sched<[]> {
+ bits<5> Zt;
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-23} = sz;
+ let Inst{22-21} = 0b01;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_ldor_ss<bits<2> sz, string asm, RegisterOperand listty,
+ ZPRRegOp zprty, RegisterOperand gprty, ValueType Ty,
+ ValueType PredTy, SDNode Ld1ro, ComplexPattern AddrCP> {
+ def NAME : sve_mem_ldor_ss<sz, asm, listty, gprty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
+
+ def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), (AddrCP GPR64sp:$base, gprty:$offset))),
+ (!cast<Instruction>(NAME) PPR3bAny:$gp, GPR64sp:$base, gprty:$offset)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Interleave 128-bit Elements Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm>
+: I<(outs ZPR128:$Zd), (ins ZPR128:$Zn, ZPR128:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-21} = 0b00000101101;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b000;
+ let Inst{12-11} = opc;
+ let Inst{10} = P;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm, SDPatternOperator op> {
+ def NAME : sve_int_perm_bin_perm_128_zz<opc, P, asm>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME)>;
+}
+
+/// Addressing modes
+def am_sve_indexed_s4 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-8,7>", [], [SDNPWantRoot]>;
+def am_sve_indexed_s6 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-32,31>", [], [SDNPWantRoot]>;
+
+def am_sve_regreg_lsl0 : ComplexPattern<i64, 2, "SelectSVERegRegAddrMode<0>", []>;
+def am_sve_regreg_lsl1 : ComplexPattern<i64, 2, "SelectSVERegRegAddrMode<1>", []>;
+def am_sve_regreg_lsl2 : ComplexPattern<i64, 2, "SelectSVERegRegAddrMode<2>", []>;
+def am_sve_regreg_lsl3 : ComplexPattern<i64, 2, "SelectSVERegRegAddrMode<3>", []>;
+
+// Predicated pseudo floating point two operand instructions.
+multiclass sve_fp_bin_pred_hfd<SDPatternOperator op> {
+ def _UNDEF_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _UNDEF_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+}
+
+// Predicated pseudo integer two operand instructions.
+multiclass sve_int_bin_pred_bhsd<SDPatternOperator op> {
+ def _UNDEF_B : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
+ def _UNDEF_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _UNDEF_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+}
+
+// As sve_int_bin_pred but when only i32 and i64 vector types are required.
+multiclass sve_int_bin_pred_sd<SDPatternOperator op> {
+ def _UNDEF_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
+
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
new file mode 100644
index 000000000000..74fe0cdd1ea7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -0,0 +1,265 @@
+//===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Performs general IR level optimizations on SVE intrinsics.
+//
+// The main goal of this pass is to remove unnecessary reinterpret
+// intrinsics (llvm.aarch64.sve.convert.[to|from].svbool), e.g:
+//
+// %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
+// %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
+//
+// This pass also looks for ptest intrinsics & phi instructions where the
+// operands are being needlessly converted to and from svbool_t.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "sve-intrinsic-opts"
+
+namespace llvm {
+void initializeSVEIntrinsicOptsPass(PassRegistry &);
+}
+
+namespace {
+struct SVEIntrinsicOpts : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ SVEIntrinsicOpts() : ModulePass(ID) {
+ initializeSVEIntrinsicOptsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ static IntrinsicInst *isReinterpretToSVBool(Value *V);
+
+ static bool optimizeIntrinsic(Instruction *I);
+
+ bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions);
+
+ static bool optimizeConvertFromSVBool(IntrinsicInst *I);
+ static bool optimizePTest(IntrinsicInst *I);
+
+ static bool processPhiNode(IntrinsicInst *I);
+};
+} // end anonymous namespace
+
+void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
+}
+
+char SVEIntrinsicOpts::ID = 0;
+static const char *name = "SVE intrinsics optimizations";
+INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
+INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)
+
+namespace llvm {
+ModulePass *createSVEIntrinsicOptsPass() { return new SVEIntrinsicOpts(); }
+} // namespace llvm
+
+/// Returns V if it's a cast from <n x 16 x i1> (aka svbool_t), nullptr
+/// otherwise.
+IntrinsicInst *SVEIntrinsicOpts::isReinterpretToSVBool(Value *V) {
+ IntrinsicInst *I = dyn_cast<IntrinsicInst>(V);
+ if (!I)
+ return nullptr;
+
+ if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
+ return nullptr;
+
+ return I;
+}
+
+/// The function will remove redundant reinterprets casting in the presence
+/// of the control flow
+bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) {
+
+ SmallVector<Instruction *, 32> Worklist;
+ auto RequiredType = X->getType();
+
+ auto *PN = dyn_cast<PHINode>(X->getArgOperand(0));
+ assert(PN && "Expected Phi Node!");
+
+ // Don't create a new Phi unless we can remove the old one.
+ if (!PN->hasOneUse())
+ return false;
+
+ for (Value *IncValPhi : PN->incoming_values()) {
+ auto *Reinterpret = isReinterpretToSVBool(IncValPhi);
+ if (!Reinterpret ||
+ RequiredType != Reinterpret->getArgOperand(0)->getType())
+ return false;
+ }
+
+ // Create the new Phi
+ LLVMContext &Ctx = PN->getContext();
+ IRBuilder<> Builder(Ctx);
+ Builder.SetInsertPoint(PN);
+ PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues());
+ Worklist.push_back(PN);
+
+ for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {
+ auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I));
+ NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I));
+ Worklist.push_back(Reinterpret);
+ }
+
+ // Cleanup Phi Node and reinterprets
+ X->replaceAllUsesWith(NPN);
+ X->eraseFromParent();
+
+ for (auto &I : Worklist)
+ if (I->use_empty())
+ I->eraseFromParent();
+
+ return true;
+}
+
+bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) {
+ IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0));
+ IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1));
+
+ if (Op1 && Op2 &&
+ Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
+ Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
+ Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) {
+
+ Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)};
+ Type *Tys[] = {Op1->getArgOperand(0)->getType()};
+ Module *M = I->getParent()->getParent()->getParent();
+
+ auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys);
+ auto CI = CallInst::Create(Fn, Ops, I->getName(), I);
+
+ I->replaceAllUsesWith(CI);
+ I->eraseFromParent();
+ if (Op1->use_empty())
+ Op1->eraseFromParent();
+ if (Op2->use_empty())
+ Op2->eraseFromParent();
+
+ return true;
+ }
+
+ return false;
+}
+
+bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
+ assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_convert_from_svbool &&
+ "Unexpected opcode");
+
+ // If the reinterpret instruction operand is a PHI Node
+ if (isa<PHINode>(I->getArgOperand(0)))
+ return processPhiNode(I);
+
+ // If we have a reinterpret intrinsic I of type A which is converting from
+ // another reinterpret Y of type B, and the source type of Y is A, then we can
+ // elide away both reinterprets if there are no other users of Y.
+ auto *Y = isReinterpretToSVBool(I->getArgOperand(0));
+ if (!Y)
+ return false;
+
+ Value *SourceVal = Y->getArgOperand(0);
+ if (I->getType() != SourceVal->getType())
+ return false;
+
+ I->replaceAllUsesWith(SourceVal);
+ I->eraseFromParent();
+ if (Y->use_empty())
+ Y->eraseFromParent();
+
+ return true;
+}
+
+bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) {
+ IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I);
+ if (!IntrI)
+ return false;
+
+ switch (IntrI->getIntrinsicID()) {
+ case Intrinsic::aarch64_sve_convert_from_svbool:
+ return optimizeConvertFromSVBool(IntrI);
+ case Intrinsic::aarch64_sve_ptest_any:
+ case Intrinsic::aarch64_sve_ptest_first:
+ case Intrinsic::aarch64_sve_ptest_last:
+ return optimizePTest(IntrI);
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+bool SVEIntrinsicOpts::optimizeFunctions(
+ SmallSetVector<Function *, 4> &Functions) {
+ bool Changed = false;
+ for (auto *F : Functions) {
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
+
+ // Traverse the DT with an rpo walk so we see defs before uses, allowing
+ // simplification to be done incrementally.
+ BasicBlock *Root = DT->getRoot();
+ ReversePostOrderTraversal<BasicBlock *> RPOT(Root);
+ for (auto *BB : RPOT)
+ for (Instruction &I : make_early_inc_range(*BB))
+ Changed |= optimizeIntrinsic(&I);
+ }
+ return Changed;
+}
+
+bool SVEIntrinsicOpts::runOnModule(Module &M) {
+ bool Changed = false;
+ SmallSetVector<Function *, 4> Functions;
+
+ // Check for SVE intrinsic declarations first so that we only iterate over
+ // relevant functions. Where an appropriate declaration is found, store the
+ // function(s) where it is used so we can target these only.
+ for (auto &F : M.getFunctionList()) {
+ if (!F.isDeclaration())
+ continue;
+
+ switch (F.getIntrinsicID()) {
+ case Intrinsic::aarch64_sve_convert_from_svbool:
+ case Intrinsic::aarch64_sve_ptest_any:
+ case Intrinsic::aarch64_sve_ptest_first:
+ case Intrinsic::aarch64_sve_ptest_last:
+ for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
+ auto *Inst = dyn_cast<Instruction>(*I++);
+ Functions.insert(Inst->getFunction());
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Functions.empty())
+ Changed |= optimizeFunctions(Functions);
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 87980cddb7c0..4e289fbe2325 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -658,6 +658,7 @@ namespace AArch64 {
// in index i*P of a <n x (M*P) x t> vector. The other elements of the
// <n x (M*P) x t> vector (such as index 1) are undefined.
static constexpr unsigned SVEBitsPerBlock = 128;
+static constexpr unsigned SVEMaxBitsPerVector = 2048;
const unsigned NeonBitsPerVector = 128;
} // end namespace AArch64
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
index a55a1747cafe..88c79665be60 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -10,15 +10,16 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
-#include "llvm/Target/TargetMachine.h"
#include "llvm/IR/IntrinsicsR600.h" // TODO: Sink this.
#include "llvm/IR/IntrinsicsAMDGPU.h" // TODO: Sink this.
+#include "llvm/Support/CodeGen.h"
namespace llvm {
class AMDGPUTargetMachine;
class FunctionPass;
class GCNTargetMachine;
+class ImmutablePass;
class ModulePass;
class Pass;
class Target;
@@ -27,6 +28,14 @@ class TargetOptions;
class PassRegistry;
class Module;
+// GlobalISel passes
+void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
+FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
+void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
+FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
+FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
+void initializeAMDGPURegBankCombinerPass(PassRegistry &);
+
// R600 Passes
FunctionPass *createR600VectorRegMerger();
FunctionPass *createR600ExpandSpecialInstrsPass();
@@ -55,8 +64,9 @@ FunctionPass *createSIMemoryLegalizerPass();
FunctionPass *createSIInsertWaitcntsPass();
FunctionPass *createSIPreAllocateWWMRegsPass();
FunctionPass *createSIFormMemoryClausesPass();
-FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &,
- const TargetMachine *);
+
+FunctionPass *createSIPostRABundlerPass();
+FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *);
FunctionPass *createAMDGPUUseNativeCallsPass();
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
@@ -156,6 +166,12 @@ extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowPass(PassRegistry &);
extern char &SILowerControlFlowID;
+void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
+extern char &SIRemoveShortExecBranchesID;
+
+void initializeSIPreEmitPeepholePass(PassRegistry &);
+extern char &SIPreEmitPeepholeID;
+
void initializeSIInsertSkipsPass(PassRegistry &);
extern char &SIInsertSkipsPassID;
@@ -182,6 +198,10 @@ FunctionPass *createAMDGPUPromoteAlloca();
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
extern char &AMDGPUPromoteAllocaID;
+FunctionPass *createAMDGPUPromoteAllocaToVector();
+void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
+extern char &AMDGPUPromoteAllocaToVectorID;
+
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(
TargetMachine *TM = nullptr,
@@ -216,12 +236,18 @@ extern char &SIMemoryLegalizerID;
void initializeSIModeRegisterPass(PassRegistry&);
extern char &SIModeRegisterID;
+void initializeSIInsertHardClausesPass(PassRegistry &);
+extern char &SIInsertHardClausesID;
+
void initializeSIInsertWaitcntsPass(PassRegistry&);
extern char &SIInsertWaitcntsID;
void initializeSIFormMemoryClausesPass(PassRegistry&);
extern char &SIFormMemoryClausesID;
+void initializeSIPostRABundlerPass(PassRegistry&);
+extern char &SIPostRABundlerID;
+
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
extern char &AMDGPUUnifyDivergentExitNodesID;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
index 42b477e07b3b..e32f0fcc4771 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -33,6 +33,12 @@ def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
"Assuming f32 fma is at least as fast as mul + add"
>;
+def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32",
+ "FastDenormalF32",
+ "true",
+ "Enabling denormals does not cause f32 instructions to run at f64 rates"
+>;
+
def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128",
"MIMG_R128",
"true",
@@ -254,6 +260,12 @@ def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
"Additional instructions for GFX10+"
>;
+def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts",
+ "GFX10_3Insts",
+ "true",
+ "Additional instructions for GFX10.3"
+>;
+
def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts",
"GFX7GFX8GFX9Insts",
"true",
@@ -360,7 +372,19 @@ def FeatureDPP8 : SubtargetFeature<"dpp8",
def FeatureR128A16 : SubtargetFeature<"r128-a16",
"HasR128A16",
"true",
- "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
+ "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128"
+>;
+
+def FeatureGFX10A16 : SubtargetFeature<"a16",
+ "HasGFX10A16",
+ "true",
+ "Support gfx10-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands"
+>;
+
+def FeatureG16 : SubtargetFeature<"g16",
+ "HasG16",
+ "true",
+ "Support G16 for 16-bit gradient image operands"
>;
def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
@@ -369,6 +393,12 @@ def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
"Support NSA encoding for image instructions"
>;
+def FeatureGFX10_BEncoding : SubtargetFeature<"gfx10_b-encoding",
+ "GFX10_BEncoding",
+ "true",
+ "Encoding format GFX10_B"
+>;
+
def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
"HasIntClamp",
"true",
@@ -439,7 +469,8 @@ def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts",
"HasAtomicFaddInsts",
"true",
"Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, "
- "global_atomic_pk_add_f16 instructions"
+ "global_atomic_pk_add_f16 instructions",
+ [FeatureFlatGlobalInsts]
>;
def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
@@ -466,6 +497,30 @@ def FeatureVscnt : SubtargetFeature<"vscnt",
"Has separate store vscnt counter"
>;
+def FeatureGetWaveIdInst : SubtargetFeature<"get-wave-id-inst",
+ "HasGetWaveIdInst",
+ "true",
+ "Has s_get_waveid_in_workgroup instruction"
+>;
+
+def FeatureSMemTimeInst : SubtargetFeature<"s-memtime-inst",
+ "HasSMemTimeInst",
+ "true",
+ "Has s_memtime instruction"
+>;
+
+def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts",
+ "HasMadMacF32Insts",
+ "true",
+ "Has v_mad_f32/v_mac_f32/v_madak_f32/v_madmk_f32 instructions"
+>;
+
+def FeatureDsSrc2Insts : SubtargetFeature<"ds-src2-insts",
+ "HasDsSrc2Insts",
+ "true",
+ "Has ds_*_src2 instructions"
+>;
+
def FeatureRegisterBanking : SubtargetFeature<"register-banking",
"HasRegisterBanking",
"true",
@@ -488,36 +543,6 @@ def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
-// Denormal handling for fp64 and fp16 is controlled by the same
-// config register when fp16 supported.
-// TODO: Do we need a separate f16 setting when not legal?
-def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
- "FP64FP16Denormals",
- "true",
- "Enable double and half precision denormal handling",
- [FeatureFP64]
->;
-
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
- "FP64FP16Denormals",
- "true",
- "Enable double and half precision denormal handling",
- [FeatureFP64, FeatureFP64FP16Denormals]
->;
-
-def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
- "FP64FP16Denormals",
- "true",
- "Enable half precision denormal handling",
- [FeatureFP64FP16Denormals]
->;
-
-def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
- "FPExceptions",
- "true",
- "Enable floating point exceptions"
->;
-
class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
"max-private-element-size-"#size,
"MaxPrivateElementSize",
@@ -628,9 +653,10 @@ class GCNSubtargetFeatureGeneration <string Value,
def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
"southern-islands",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
- FeatureWavefrontSize64,
- FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange,
- FeatureDoesNotSupportSRAMECC, FeatureDoesNotSupportXNACK]
+ FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
+ FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
+ FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
+ FeatureDoesNotSupportXNACK]
>;
def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
@@ -638,7 +664,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureFlatAddressSpace,
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
- FeatureGFX7GFX8GFX9Insts, FeatureDoesNotSupportSRAMECC]
+ FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
+ FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC]
>;
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
@@ -649,8 +676,9 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
- FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
- FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts
+ FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
+ FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
+ FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32
]
>;
@@ -665,7 +693,9 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
- FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
+ FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
+ FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts,
+ FeatureFastDenormalF32
]
>;
@@ -682,7 +712,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
FeatureVOP3Literal, FeatureDPP8,
- FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC
+ FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
+ FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16
]
>;
@@ -853,6 +884,10 @@ def FeatureISAVersion10_1_0 : FeatureSet<
FeatureScalarStores,
FeatureScalarAtomics,
FeatureScalarFlatScratchInsts,
+ FeatureGetWaveIdInst,
+ FeatureSMemTimeInst,
+ FeatureMadMacF32Insts,
+ FeatureDsSrc2Insts,
FeatureLdsMisalignedBug,
FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3])>;
@@ -871,6 +906,10 @@ def FeatureISAVersion10_1_1 : FeatureSet<
FeatureScalarStores,
FeatureScalarAtomics,
FeatureScalarFlatScratchInsts,
+ FeatureGetWaveIdInst,
+ FeatureSMemTimeInst,
+ FeatureMadMacF32Insts,
+ FeatureDsSrc2Insts,
FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3])>;
@@ -888,10 +927,29 @@ def FeatureISAVersion10_1_2 : FeatureSet<
FeatureScalarStores,
FeatureScalarAtomics,
FeatureScalarFlatScratchInsts,
+ FeatureGetWaveIdInst,
+ FeatureSMemTimeInst,
+ FeatureMadMacF32Insts,
+ FeatureDsSrc2Insts,
FeatureLdsMisalignedBug,
FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3])>;
+def FeatureISAVersion10_3_0 : FeatureSet<
+ [FeatureGFX10,
+ FeatureGFX10_BEncoding,
+ FeatureGFX10_3Insts,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot5Insts,
+ FeatureDot6Insts,
+ FeatureNSAEncoding,
+ FeatureWavefrontSize32,
+ FeatureDoesNotSupportXNACK,
+ FeatureCodeObjectV3]>;
+
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
@@ -973,190 +1031,222 @@ def NullALU : InstrItinClass;
def isGFX6 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">,
- AssemblerPredicate<"FeatureSouthernIslands">;
+ AssemblerPredicate<(all_of FeatureSouthernIslands)>;
def isGFX6GFX7 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
- AssemblerPredicate<"!FeatureGCN3Encoding,!FeatureGFX10Insts">;
+ AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX10Insts))>;
def isGFX6GFX7GFX10 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
- AssemblerPredicate<"!FeatureGCN3Encoding">;
+ AssemblerPredicate<(all_of (not FeatureGCN3Encoding))>;
def isGFX7Only :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
- AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts,!FeatureGFX10Insts">;
+ AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX10Insts))>;
def isGFX7GFX10 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
- AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">;
+ AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>;
def isGFX7GFX8GFX9 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
- AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">;
+ AssemblerPredicate<(all_of FeatureGFX7GFX8GFX9Insts)>;
def isGFX6GFX7GFX8GFX9 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
- AssemblerPredicate<"!FeatureGFX10Insts">;
+ AssemblerPredicate<(all_of (not FeatureGFX10Insts))>;
def isGFX7Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
- AssemblerPredicate<"FeatureCIInsts">;
+ AssemblerPredicate<(all_of FeatureCIInsts)>;
def isGFX8Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
- AssemblerPredicate<"FeatureGFX8Insts">;
+ AssemblerPredicate<(all_of FeatureGFX8Insts)>;
def isGFX8Only : Predicate<"Subtarget->getGeneration() =="
"AMDGPUSubtarget::VOLCANIC_ISLANDS">,
- AssemblerPredicate <"FeatureVolcanicIslands">;
+ AssemblerPredicate <(all_of FeatureVolcanicIslands)>;
def isGFX9Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
- AssemblerPredicate<"FeatureGFX9Insts">;
+ AssemblerPredicate<(all_of FeatureGFX9Insts)>;
def isGFX9Only : Predicate <
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
- AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts">;
+ AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts)>;
def isGFX8GFX9 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
- AssemblerPredicate<"FeatureGFX8Insts,FeatureGCN3Encoding">;
+ AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding)>;
def isGFX10Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
- AssemblerPredicate<"FeatureGFX10Insts">;
+ AssemblerPredicate<(all_of FeatureGFX10Insts)>;
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
- AssemblerPredicate<"FeatureFlatAddressSpace">;
+ AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
- AssemblerPredicate<"FeatureFlatGlobalInsts">;
+ AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>;
def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
- AssemblerPredicate<"FeatureFlatScratchInsts">;
+ AssemblerPredicate<(all_of FeatureFlatScratchInsts)>;
def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">,
- AssemblerPredicate<"FeatureScalarFlatScratchInsts">;
+ AssemblerPredicate<(all_of FeatureScalarFlatScratchInsts)>;
def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
- AssemblerPredicate<"FeatureGFX9Insts">;
+ AssemblerPredicate<(all_of FeatureGFX9Insts)>;
+
+def HasGFX10_BEncoding : Predicate<"Subtarget->hasGFX10_BEncoding()">,
+ AssemblerPredicate<(all_of FeatureGFX10_BEncoding)>;
def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
- AssemblerPredicate<"FeatureUnpackedD16VMem">;
+ AssemblerPredicate<(all_of FeatureUnpackedD16VMem)>;
def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
- AssemblerPredicate<"!FeatureUnpackedD16VMem">;
+ AssemblerPredicate<(all_of (not FeatureUnpackedD16VMem))>;
def D16PreservesUnusedBits :
Predicate<"Subtarget->d16PreservesUnusedBits()">,
- AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;
+ AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC))>;
def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
- AssemblerPredicate<"FeatureGFX9Insts">;
+ AssemblerPredicate<(all_of FeatureGFX9Insts)>;
+
+def HasLDSFPAtomics : Predicate<"Subtarget->hasLDSFPAtomics()">,
+ AssemblerPredicate<(all_of FeatureGFX8Insts)>;
def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
- AssemblerPredicate<"FeatureAddNoCarryInsts">;
+ AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>;
def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
- AssemblerPredicate<"Feature16BitInsts">;
+ AssemblerPredicate<(all_of Feature16BitInsts)>;
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
- AssemblerPredicate<"FeatureVOP3P">;
+ AssemblerPredicate<(all_of FeatureVOP3P)>;
+
+def HasMinMaxDenormModes : Predicate<"Subtarget->supportsMinMaxDenormModes()">;
+def NotHasMinMaxDenormModes : Predicate<"!Subtarget->supportsMinMaxDenormModes()">;
def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
- AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">;
+ AssemblerPredicate<(all_of FeatureSDWA, FeatureVolcanicIslands)>;
def HasSDWA9 :
Predicate<"Subtarget->hasSDWA()">,
- AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">;
+ AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts,FeatureSDWA)>;
def HasSDWA10 :
Predicate<"Subtarget->hasSDWA()">,
- AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureSDWA">;
+ AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureSDWA)>;
def HasDPP : Predicate<"Subtarget->hasDPP()">,
- AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">;
+ AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureDPP)>;
def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
- AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP8">;
+ AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP8)>;
def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
- AssemblerPredicate<"FeatureR128A16">;
+ AssemblerPredicate<(all_of FeatureR128A16)>;
+
+def HasGFX10A16 : Predicate<"Subtarget->hasGFX10A16()">,
+ AssemblerPredicate<(all_of FeatureGFX10A16)>;
+
+def HasG16 : Predicate<"Subtarget->hasG16()">,
+ AssemblerPredicate<(all_of FeatureG16)>;
def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
- AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">;
+ AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP)>;
def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
- AssemblerPredicate<"FeatureIntClamp">;
+ AssemblerPredicate<(all_of FeatureIntClamp)>;
def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
- AssemblerPredicate<"FeatureMadMixInsts">;
+ AssemblerPredicate<(all_of FeatureMadMixInsts)>;
def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">,
- AssemblerPredicate<"FeatureScalarStores">;
+ AssemblerPredicate<(all_of FeatureScalarStores)>;
def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
- AssemblerPredicate<"FeatureScalarAtomics">;
+ AssemblerPredicate<(all_of FeatureScalarAtomics)>;
def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">,
- AssemblerPredicate<"FeatureNoSdstCMPX">;
+ AssemblerPredicate<(all_of FeatureNoSdstCMPX)>;
def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">,
- AssemblerPredicate<"!FeatureNoSdstCMPX">;
+ AssemblerPredicate<(all_of (not FeatureNoSdstCMPX))>;
def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
- AssemblerPredicate<"FeatureVGPRIndexMode">;
+ AssemblerPredicate<(all_of FeatureVGPRIndexMode)>;
def HasMovrel : Predicate<"Subtarget->hasMovrel()">,
- AssemblerPredicate<"FeatureMovrel">;
+ AssemblerPredicate<(all_of FeatureMovrel)>;
def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
- AssemblerPredicate<"FeatureFmaMixInsts">;
+ AssemblerPredicate<(all_of FeatureFmaMixInsts)>;
def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
- AssemblerPredicate<"FeatureDLInsts">;
+ AssemblerPredicate<(all_of FeatureDLInsts)>;
def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">,
- AssemblerPredicate<"FeatureDot1Insts">;
+ AssemblerPredicate<(all_of FeatureDot1Insts)>;
def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">,
- AssemblerPredicate<"FeatureDot2Insts">;
+ AssemblerPredicate<(all_of FeatureDot2Insts)>;
def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">,
- AssemblerPredicate<"FeatureDot3Insts">;
+ AssemblerPredicate<(all_of FeatureDot3Insts)>;
def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">,
- AssemblerPredicate<"FeatureDot4Insts">;
+ AssemblerPredicate<(all_of FeatureDot4Insts)>;
def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
- AssemblerPredicate<"FeatureDot5Insts">;
+ AssemblerPredicate<(all_of FeatureDot5Insts)>;
def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
- AssemblerPredicate<"FeatureDot6Insts">;
+ AssemblerPredicate<(all_of FeatureDot6Insts)>;
+
+def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
+ AssemblerPredicate<(all_of FeatureGetWaveIdInst)>;
def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">,
- AssemblerPredicate<"FeatureMAIInsts">;
+ AssemblerPredicate<(all_of FeatureMAIInsts)>;
+
+def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">,
+ AssemblerPredicate<(all_of FeatureSMemTimeInst)>;
+
+def HasNoSMemTimeInst : Predicate<"!Subtarget->hasSMemTimeInst()">;
def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
- AssemblerPredicate<"FeaturePkFmacF16Inst">;
+ AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>;
+
+def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">,
+ AssemblerPredicate<(all_of FeatureMadMacF32Insts)>;
def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">,
- AssemblerPredicate<"FeatureAtomicFaddInsts">;
+ AssemblerPredicate<(all_of FeatureAtomicFaddInsts)>;
+
+def HasNoMadMacF32Insts : Predicate<"!Subtarget->hasMadMacF32Insts()">,
+ AssemblerPredicate<(all_of (not FeatureMadMacF32Insts))>;
+
+def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">,
+ AssemblerPredicate<(all_of FeatureDsSrc2Insts)>;
def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">,
- AssemblerPredicate<"FeatureOffset3fBug">;
+ AssemblerPredicate<(all_of FeatureOffset3fBug)>;
def EnableLateCFGStructurize : Predicate<
"EnableLateStructurizeCFG">;
@@ -1165,7 +1255,7 @@ def EnableLateCFGStructurize : Predicate<
include "SISchedule.td"
include "GCNProcessors.td"
include "AMDGPUInstrInfo.td"
-include "AMDGPURegisterInfo.td"
+include "SIRegisterInfo.td"
include "AMDGPURegisterBanks.td"
include "AMDGPUInstructions.td"
include "SIInstrInfo.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index bba132c3bc46..bb2aba044974 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -91,12 +91,16 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
AAQueryInfo &AAQI, bool OrLocal) {
+ unsigned AS = Loc.Ptr->getType()->getPointerAddressSpace();
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
+ return true;
+
const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
- unsigned AS = Base->getType()->getPointerAddressSpace();
+ AS = Base->getType()->getPointerAddressSpace();
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return true;
- }
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
if (GV->isConstant())
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index fb722920900f..fd8889ea5c0d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -48,10 +48,6 @@ public:
AAQueryInfo &AAQI);
bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
bool OrLocal);
-
-private:
- bool Aliases(const MDNode *A, const MDNode *B) const;
- bool PathAliases(const MDNode *A, const MDNode *B) const;
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index ff2bda6bed53..22947544ac07 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -71,6 +71,13 @@ void AMDGPUAlwaysInline::recursivelyVisitUsers(
if (Instruction *I = dyn_cast<Instruction>(U)) {
Function *F = I->getParent()->getParent();
if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
+ // FIXME: This is a horrible hack. We should always respect noinline,
+ // and just let us hit the error when we can't handle this.
+ //
+ // Unfortunately, clang adds noinline to all functions at -O0. We have
+ // to override this here. until that's fixed.
+ F->removeFnAttr(Attribute::NoInline);
+
FuncsToAlwaysInline.insert(F);
Stack.push_back(F);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index e72b3f4fde63..625074569cfa 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -21,7 +21,6 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -71,7 +70,8 @@ public:
static bool visitConstantExpr(const ConstantExpr *CE);
static bool visitConstantExprsRecursively(
const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
+ SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc,
+ bool HasApertureRegs);
};
} // end anonymous namespace
@@ -93,6 +93,14 @@ static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
return castRequiresQueuePtr(ASC->getSrcAddressSpace());
}
+static bool isDSAddress(const Constant *C) {
+ const GlobalValue *GV = dyn_cast<GlobalValue>(C);
+ if (!GV)
+ return false;
+ unsigned AS = GV->getAddressSpace();
+ return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
+}
+
bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
@@ -104,7 +112,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
+ SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
+ bool IsFunc, bool HasApertureRegs) {
if (!ConstantExprVisited.insert(EntryC).second)
return false;
@@ -115,9 +124,13 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
while (!Stack.empty()) {
const Constant *C = Stack.pop_back_val();
+ // We need to trap on DS globals in non-entry functions.
+ if (IsFunc && isDSAddress(C))
+ return true;
+
// Check this constant expression.
if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
- if (visitConstantExpr(CE))
+ if (!HasApertureRegs && visitConstantExpr(CE))
return true;
}
@@ -202,7 +215,7 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
"amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
"amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
"amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
- "amdgpu-kernarg-segment-ptr", "amdgpu-implicitarg-ptr"};
+ "amdgpu-implicitarg-ptr"};
if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
NeedQueuePtr = true;
@@ -263,10 +276,10 @@ bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
- bool HasFlat = ST.hasFlatAddressSpace();
bool HasApertureRegs = ST.hasApertureRegs();
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
+ bool HaveStackObjects = false;
bool Changed = false;
bool NeedQueuePtr = false;
bool HaveCall = false;
@@ -274,13 +287,18 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
- CallSite CS(&I);
- if (CS) {
- Function *Callee = CS.getCalledFunction();
+ if (isa<AllocaInst>(I)) {
+ HaveStackObjects = true;
+ continue;
+ }
+
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ const Function *Callee =
+ dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
// TODO: Do something with indirect calls.
if (!Callee) {
- if (!CS.isInlineAsm())
+ if (!CB->isInlineAsm())
HaveCall = true;
continue;
}
@@ -292,20 +310,25 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
Changed = true;
} else {
bool NonKernelOnly = false;
- StringRef AttrName = intrinsicToAttrName(IID,
- NonKernelOnly, NeedQueuePtr);
- if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
- F.addFnAttr(AttrName);
- Changed = true;
+
+ if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
+ F.addFnAttr("amdgpu-kernarg-segment-ptr");
+ } else {
+ StringRef AttrName = intrinsicToAttrName(IID, NonKernelOnly,
+ NeedQueuePtr);
+ if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
+ F.addFnAttr(AttrName);
+ Changed = true;
+ }
}
}
}
- if (NeedQueuePtr || HasApertureRegs)
+ if (NeedQueuePtr || (!IsFunc && HasApertureRegs))
continue;
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
- if (castRequiresQueuePtr(ASC)) {
+ if (!HasApertureRegs && castRequiresQueuePtr(ASC)) {
NeedQueuePtr = true;
continue;
}
@@ -316,7 +339,8 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
if (!OpC)
continue;
- if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
+ if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc,
+ HasApertureRegs)) {
NeedQueuePtr = true;
break;
}
@@ -332,8 +356,13 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
// TODO: We could refine this to captured pointers that could possibly be
// accessed by flat instructions. For now this is mostly a poor way of
// estimating whether there are calls before argument lowering.
- if (HasFlat && !IsFunc && HaveCall) {
- F.addFnAttr("amdgpu-flat-scratch");
+ if (!IsFunc && HaveCall) {
+ F.addFnAttr("amdgpu-calls");
+ Changed = true;
+ }
+
+ if (HaveStackObjects) {
+ F.addFnAttr("amdgpu-stack-objects");
Changed = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 6fb507083cef..b09e92c07f9b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -35,7 +36,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
MemoryDependenceResults *MDR;
LoopInfo *LI;
DenseMap<Value*, GetElementPtrInst*> noClobberClones;
- bool isKernelFunc;
+ bool isEntryFunc;
public:
static char ID;
@@ -127,11 +128,10 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
auto isGlobalLoad = [&](LoadInst &Load)->bool {
return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
};
- // We're tracking up to the Function boundaries
- // We cannot go beyond because of FunctionPass restrictions
- // Thus we can ensure that memory not clobbered for memory
- // operations that live in kernel only.
- bool NotClobbered = isKernelFunc && !isClobberedInFunction(&I);
+ // We're tracking up to the Function boundaries, and cannot go beyond because
+ // of FunctionPass restrictions. We can ensure that is memory not clobbered
+ // for memory operations that are live in to entry points only.
+ bool NotClobbered = isEntryFunc && !isClobberedInFunction(&I);
Instruction *PtrI = dyn_cast<Instruction>(Ptr);
if (!PtrI && NotClobbered && isGlobalLoad(I)) {
if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
@@ -170,7 +170,7 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
DA = &getAnalysis<LegacyDivergenceAnalysis>();
MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
+ isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
visit(F);
noClobberClones.clear();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index 99a01ca3a2fd..d078fc147a36 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -8,6 +8,8 @@
#include "AMDGPU.h"
#include "AMDGPUArgumentUsageInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/raw_ostream.h"
@@ -43,6 +45,10 @@ char AMDGPUArgumentUsageInfo::ID = 0;
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
+// Hardcoded registers from fixed function ABI
+const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
+ = AMDGPUFunctionArgInfo::fixedABILayout();
+
bool AMDGPUArgumentUsageInfo::doInitialization(Module &M) {
return false;
}
@@ -77,59 +83,102 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
}
}
-std::pair<const ArgDescriptor *, const TargetRegisterClass *>
+std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
AMDGPUFunctionArgInfo::getPreloadedValue(
- AMDGPUFunctionArgInfo::PreloadedValue Value) const {
+ AMDGPUFunctionArgInfo::PreloadedValue Value) const {
switch (Value) {
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
- return std::make_pair(
- PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr,
- &AMDGPU::SGPR_128RegClass);
+ return std::make_tuple(PrivateSegmentBuffer ? &PrivateSegmentBuffer
+ : nullptr,
+ &AMDGPU::SGPR_128RegClass, LLT::vector(4, 32));
}
case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
- return std::make_pair(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
- &AMDGPU::SGPR_64RegClass);
+ return std::make_tuple(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
+ &AMDGPU::SGPR_64RegClass,
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
- return std::make_pair(WorkGroupIDX ? &WorkGroupIDX : nullptr,
- &AMDGPU::SGPR_32RegClass);
-
+ return std::make_tuple(WorkGroupIDX ? &WorkGroupIDX : nullptr,
+ &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
- return std::make_pair(WorkGroupIDY ? &WorkGroupIDY : nullptr,
- &AMDGPU::SGPR_32RegClass);
+ return std::make_tuple(WorkGroupIDY ? &WorkGroupIDY : nullptr,
+ &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
- return std::make_pair(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
- &AMDGPU::SGPR_32RegClass);
+ return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
+ &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
- return std::make_pair(
- PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
- &AMDGPU::SGPR_32RegClass);
+ return std::make_tuple(
+ PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
+ &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
- return std::make_pair(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
- &AMDGPU::SGPR_64RegClass);
+ return std::make_tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
+ &AMDGPU::SGPR_64RegClass,
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR:
- return std::make_pair(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
- &AMDGPU::SGPR_64RegClass);
+ return std::make_tuple(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
+ &AMDGPU::SGPR_64RegClass,
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::DISPATCH_ID:
- return std::make_pair(DispatchID ? &DispatchID : nullptr,
- &AMDGPU::SGPR_64RegClass);
+ return std::make_tuple(DispatchID ? &DispatchID : nullptr,
+ &AMDGPU::SGPR_64RegClass, LLT::scalar(64));
case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
- return std::make_pair(FlatScratchInit ? &FlatScratchInit : nullptr,
- &AMDGPU::SGPR_64RegClass);
+ return std::make_tuple(FlatScratchInit ? &FlatScratchInit : nullptr,
+ &AMDGPU::SGPR_64RegClass, LLT::scalar(64));
case AMDGPUFunctionArgInfo::DISPATCH_PTR:
- return std::make_pair(DispatchPtr ? &DispatchPtr : nullptr,
- &AMDGPU::SGPR_64RegClass);
+ return std::make_tuple(DispatchPtr ? &DispatchPtr : nullptr,
+ &AMDGPU::SGPR_64RegClass,
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::QUEUE_PTR:
- return std::make_pair(QueuePtr ? &QueuePtr : nullptr,
- &AMDGPU::SGPR_64RegClass);
+ return std::make_tuple(QueuePtr ? &QueuePtr : nullptr,
+ &AMDGPU::SGPR_64RegClass,
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
- return std::make_pair(WorkItemIDX ? &WorkItemIDX : nullptr,
- &AMDGPU::VGPR_32RegClass);
+ return std::make_tuple(WorkItemIDX ? &WorkItemIDX : nullptr,
+ &AMDGPU::VGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
- return std::make_pair(WorkItemIDY ? &WorkItemIDY : nullptr,
- &AMDGPU::VGPR_32RegClass);
+ return std::make_tuple(WorkItemIDY ? &WorkItemIDY : nullptr,
+ &AMDGPU::VGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
- return std::make_pair(WorkItemIDZ ? &WorkItemIDZ : nullptr,
- &AMDGPU::VGPR_32RegClass);
+ return std::make_tuple(WorkItemIDZ ? &WorkItemIDZ : nullptr,
+ &AMDGPU::VGPR_32RegClass, LLT::scalar(32));
}
llvm_unreachable("unexpected preloaded value type");
}
+
+constexpr AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
+ AMDGPUFunctionArgInfo AI;
+ AI.PrivateSegmentBuffer
+ = ArgDescriptor::createRegister(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3);
+ AI.DispatchPtr = ArgDescriptor::createRegister(AMDGPU::SGPR4_SGPR5);
+ AI.QueuePtr = ArgDescriptor::createRegister(AMDGPU::SGPR6_SGPR7);
+
+ // Do not pass kernarg segment pointer, only pass increment version in its
+ // place.
+ AI.ImplicitArgPtr = ArgDescriptor::createRegister(AMDGPU::SGPR8_SGPR9);
+ AI.DispatchID = ArgDescriptor::createRegister(AMDGPU::SGPR10_SGPR11);
+
+ // Skip FlatScratchInit/PrivateSegmentSize
+ AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12);
+ AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13);
+ AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14);
+
+ const unsigned Mask = 0x3ff;
+ AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask);
+ AI.WorkItemIDY = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 10);
+ AI.WorkItemIDZ = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 20);
+ return AI;
+}
+
+const AMDGPUFunctionArgInfo &
+AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
+ auto I = ArgInfoMap.find(&F);
+ if (I == ArgInfoMap.end()) {
+ if (AMDGPUTargetMachine::EnableFixedFunctionABI)
+ return FixedABIFunctionInfo;
+
+ // Without the fixed ABI, we assume no function has special inputs.
+ assert(F.isDeclaration());
+ return ExternFunctionInfo;
+ }
+
+ return I->second;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index f0e7ee910f95..576e6cfe929e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -11,15 +11,13 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/Register.h"
-#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
namespace llvm {
class Function;
class raw_ostream;
-class GCNSubtarget;
-class TargetMachine;
class TargetRegisterClass;
class TargetRegisterInfo;
@@ -40,19 +38,22 @@ private:
bool IsSet : 1;
public:
- ArgDescriptor(unsigned Val = 0, unsigned Mask = ~0u,
+ constexpr ArgDescriptor(unsigned Val = 0, unsigned Mask = ~0u,
bool IsStack = false, bool IsSet = false)
: Reg(Val), Mask(Mask), IsStack(IsStack), IsSet(IsSet) {}
- static ArgDescriptor createRegister(Register Reg, unsigned Mask = ~0u) {
+ static constexpr ArgDescriptor createRegister(Register Reg,
+ unsigned Mask = ~0u) {
return ArgDescriptor(Reg, Mask, false, true);
}
- static ArgDescriptor createStack(unsigned Offset, unsigned Mask = ~0u) {
+ static constexpr ArgDescriptor createStack(unsigned Offset,
+ unsigned Mask = ~0u) {
return ArgDescriptor(Offset, Mask, true, true);
}
- static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask) {
+ static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg,
+ unsigned Mask) {
return ArgDescriptor(Arg.Reg, Mask, Arg.IsStack, Arg.IsSet);
}
@@ -141,25 +142,29 @@ struct AMDGPUFunctionArgInfo {
ArgDescriptor ImplicitArgPtr;
// Input registers for non-HSA ABI
- ArgDescriptor ImplicitBufferPtr = 0;
+ ArgDescriptor ImplicitBufferPtr;
// VGPRs inputs. These are always v0, v1 and v2 for entry functions.
ArgDescriptor WorkItemIDX;
ArgDescriptor WorkItemIDY;
ArgDescriptor WorkItemIDZ;
- std::pair<const ArgDescriptor *, const TargetRegisterClass *>
+ std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
getPreloadedValue(PreloadedValue Value) const;
+
+ static constexpr AMDGPUFunctionArgInfo fixedABILayout();
};
class AMDGPUArgumentUsageInfo : public ImmutablePass {
private:
- static const AMDGPUFunctionArgInfo ExternFunctionInfo;
DenseMap<const Function *, AMDGPUFunctionArgInfo> ArgInfoMap;
public:
static char ID;
+ static const AMDGPUFunctionArgInfo ExternFunctionInfo;
+ static const AMDGPUFunctionArgInfo FixedABIFunctionInfo;
+
AMDGPUArgumentUsageInfo() : ImmutablePass(ID) { }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -175,15 +180,7 @@ public:
ArgInfoMap[&F] = ArgInfo;
}
- const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const {
- auto I = ArgInfoMap.find(&F);
- if (I == ArgInfoMap.end()) {
- assert(F.isDeclaration());
- return ExternFunctionInfo;
- }
-
- return I->second;
- }
+ const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 9e07b4d252b7..eef8fe2fc3b7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -49,9 +49,25 @@ using namespace llvm;
using namespace llvm::AMDGPU;
using namespace llvm::AMDGPU::HSAMD;
-// TODO: This should get the default rounding mode from the kernel. We just set
-// the default here, but this could change if the OpenCL rounding mode pragmas
-// are used.
+// We need to tell the runtime some amount ahead of time if we don't know the
+// true stack size. Assume a smaller number if this is only due to dynamic /
+// non-entry block allocas.
+static cl::opt<uint32_t> AssumedStackSizeForExternalCall(
+ "amdgpu-assume-external-call-stack-size",
+ cl::desc("Assumed stack use of any external call (in bytes)"),
+ cl::Hidden,
+ cl::init(16384));
+
+static cl::opt<uint32_t> AssumedStackSizeForDynamicSizeObjects(
+ "amdgpu-assume-dynamic-stack-object-size",
+ cl::desc("Assumed extra stack use if there are any "
+ "variable sized objects (in bytes)"),
+ cl::Hidden,
+ cl::init(4096));
+
+// This should get the default rounding mode from the kernel. We just set the
+// default here, but this could change if the OpenCL rounding mode pragmas are
+// used.
//
// The denormal mode here should match what is reported by the OpenCL runtime
// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
@@ -70,18 +86,10 @@ using namespace llvm::AMDGPU::HSAMD;
// instructions to run at the double precision rate for the device so it's
// probably best to just report no single precision denormals.
static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) {
-
- // TODO: Is there any real use for the flush in only / flush out only modes?
- uint32_t FP32Denormals =
- Mode.FP32Denormals ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
-
- uint32_t FP64Denormals =
- Mode.FP64FP16Denormals ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
-
return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
- FP_DENORM_MODE_SP(FP32Denormals) |
- FP_DENORM_MODE_DP(FP64Denormals);
+ FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |
+ FP_DENORM_MODE_DP(Mode.fpDenormModeDPValue());
}
static AsmPrinter *
@@ -120,7 +128,7 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());
}
-void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
+void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
if (IsaInfo::hasCodeObjectV3(getGlobalSTI())) {
std::string ExpectedTarget;
raw_string_ostream ExpectedTargetOS(ExpectedTarget);
@@ -152,7 +160,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
}
-void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
+void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
// Following code requires TargetStreamer to be present.
if (!getTargetStreamer())
return;
@@ -188,7 +196,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
}
-void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
+void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
if (!MFI.isEntryFunction())
return;
@@ -207,7 +215,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
}
-void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
+void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
if (!MFI.isEntryFunction())
return;
@@ -226,7 +234,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
// CP microcode requires the kernel descriptor to be allocated on 64 byte
// alignment.
- Streamer.EmitValueToAlignment(64, 0, 1, 0);
+ Streamer.emitValueToAlignment(64, 0, 1, 0);
if (ReadOnlySection.getAlignment() < 64)
ReadOnlySection.setAlignment(Align(64));
@@ -247,10 +255,10 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
Streamer.PopSection();
}
-void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
+void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
if (IsaInfo::hasCodeObjectV3(getGlobalSTI()) &&
TM.getTargetTriple().getOS() == Triple::AMDHSA) {
- AsmPrinter::EmitFunctionEntryLabel();
+ AsmPrinter::emitFunctionEntryLabel();
return;
}
@@ -269,10 +277,10 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
HexLines.push_back("");
}
- AsmPrinter::EmitFunctionEntryLabel();
+ AsmPrinter::emitFunctionEntryLabel();
}
-void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
+void AMDGPUAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
// Write a line for the basic block label if it is not only fallthrough.
DisasmLines.push_back(
@@ -281,10 +289,10 @@ void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
HexLines.push_back("");
}
- AsmPrinter::EmitBasicBlockStart(MBB);
+ AsmPrinter::emitBasicBlockStart(MBB);
}
-void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
OutContext.reportError({},
@@ -307,18 +315,16 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
const DataLayout &DL = GV->getParent()->getDataLayout();
uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
- unsigned Align = GV->getAlignment();
- if (!Align)
- Align = 4;
+ Align Alignment = GV->getAlign().getValueOr(Align(4));
- EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
- EmitLinkage(GV, GVSym);
+ emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+ emitLinkage(GV, GVSym);
if (auto TS = getTargetStreamer())
- TS->emitAMDGPULDS(GVSym, Size, Align);
+ TS->emitAMDGPULDS(GVSym, Size, Alignment);
return;
}
- AsmPrinter::EmitGlobalVariable(GV);
+ AsmPrinter::emitGlobalVariable(GV);
}
bool AMDGPUAsmPrinter::doFinalization(Module &M) {
@@ -468,7 +474,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
HexLines.clear();
DisasmLineMaxLen = 0;
- EmitFunctionBody();
+ emitFunctionBody();
if (isVerbose()) {
MCSectionELF *CommentSection =
@@ -549,7 +555,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (DumpCodeInstEmitter) {
OutStreamer->SwitchSection(
- Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
+ Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));
for (size_t i = 0; i < DisasmLines.size(); ++i) {
std::string Comment = "\n";
@@ -558,8 +564,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
Comment += " ; " + HexLines[i] + "\n";
}
- OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
- OutStreamer->EmitBytes(StringRef(Comment));
+ OutStreamer->emitBytes(StringRef(DisasmLines[i]));
+ OutStreamer->emitBytes(StringRef(Comment));
}
}
@@ -609,6 +615,15 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs(
return std::max(NumVGPR, NumAGPR);
}
+static const Function *getCalleeFunction(const MachineOperand &Op) {
+ if (Op.isImm()) {
+ assert(Op.getImm() == 0);
+ return nullptr;
+ }
+
+ return cast<Function>(Op.getGlobal());
+}
+
AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
const MachineFunction &MF) const {
SIFunctionResourceInfo Info;
@@ -636,11 +651,15 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Info.UsesFlatScratch = false;
}
- Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
Info.PrivateSegmentSize = FrameInfo.getStackSize();
- if (MFI->isStackRealigned())
- Info.PrivateSegmentSize += FrameInfo.getMaxAlignment();
+ // Assume a big number if there are any unknown sized objects.
+ Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
+ if (Info.HasDynamicallySizedStack)
+ Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
+
+ if (MFI->isStackRealigned())
+ Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
MRI.isPhysRegUsed(AMDGPU::VCC_HI);
@@ -715,6 +734,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
case AMDGPU::SRC_PRIVATE_BASE:
case AMDGPU::SRC_PRIVATE_LIMIT:
case AMDGPU::SGPR_NULL:
+ case AMDGPU::MODE:
continue;
case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
@@ -727,6 +747,10 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
case AMDGPU::VCC:
case AMDGPU::VCC_LO:
case AMDGPU::VCC_HI:
+ case AMDGPU::VCC_LO_LO16:
+ case AMDGPU::VCC_LO_HI16:
+ case AMDGPU::VCC_HI_LO16:
+ case AMDGPU::VCC_HI_HI16:
Info.UsesVCC = true;
continue;
@@ -764,15 +788,20 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
break;
}
- if (AMDGPU::SReg_32RegClass.contains(Reg)) {
+ if (AMDGPU::SReg_32RegClass.contains(Reg) ||
+ AMDGPU::SReg_LO16RegClass.contains(Reg) ||
+ AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
"trap handler registers should not be used");
IsSGPR = true;
Width = 1;
- } else if (AMDGPU::VGPR_32RegClass.contains(Reg)) {
+ } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
+ AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
+ AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
IsSGPR = false;
Width = 1;
- } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
+ } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
+ AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
IsSGPR = false;
IsAGPR = true;
Width = 1;
@@ -794,6 +823,10 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
IsSGPR = true;
Width = 3;
+ } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 3;
} else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
"trap handler registers should not be used");
@@ -812,6 +845,20 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
IsSGPR = true;
Width = 5;
+ } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 5;
+ } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 6;
+ } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 6;
+ } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 6;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
"trap handler registers should not be used");
@@ -820,6 +867,10 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
IsSGPR = false;
Width = 8;
+ } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
+ IsSGPR = false;
+ IsAGPR = true;
+ Width = 8;
} else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
"trap handler registers should not be used");
@@ -862,8 +913,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
const MachineOperand *CalleeOp
= TII->getNamedOperand(MI, AMDGPU::OpName::callee);
- const Function *Callee = cast<Function>(CalleeOp->getGlobal());
- if (Callee->isDeclaration()) {
+
+ const Function *Callee = getCalleeFunction(*CalleeOp);
+ if (!Callee || Callee->isDeclaration()) {
// If this is a call to an external function, we can't do much. Make
// conservative guesses.
@@ -874,7 +926,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
MaxVGPR = std::max(MaxVGPR, 23);
MaxAGPR = std::max(MaxAGPR, 23);
- CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384));
+ CalleeFrameSize = std::max(CalleeFrameSize,
+ static_cast<uint64_t>(AssumedStackSizeForExternalCall));
+
Info.UsesVCC = true;
Info.UsesFlatScratch = ST.hasFlatAddressSpace();
Info.HasDynamicallySizedStack = true;
@@ -906,7 +960,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Info.HasRecursion |= I->second.HasRecursion;
}
- if (!Callee->doesNotRecurse())
+ // FIXME: Call site could have norecurse on it
+ if (!Callee || !Callee->doesNotRecurse())
Info.HasRecursion = true;
}
}
@@ -1108,7 +1163,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
S_00B84C_EXCP_EN(0);
- ProgInfo.Occupancy = STM.computeOccupancy(MF, ProgInfo.LDSSize,
+ ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize,
ProgInfo.NumSGPRsForWavesPerEU,
ProgInfo.NumVGPRsForWavesPerEU);
}
@@ -1132,40 +1187,41 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
- OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
+ OutStreamer->emitInt32(R_00B848_COMPUTE_PGM_RSRC1);
- OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc1, 4);
+ OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc1);
- OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
- OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc2, 4);
+ OutStreamer->emitInt32(R_00B84C_COMPUTE_PGM_RSRC2);
+ OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
- OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
- OutStreamer->EmitIntValue(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
+ OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE);
+ OutStreamer->emitInt32(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks));
// TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
// 0" comment but I don't see a corresponding field in the register spec.
} else {
- OutStreamer->EmitIntValue(RsrcReg, 4);
- OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
+ OutStreamer->emitInt32(RsrcReg);
+ OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
- OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
- OutStreamer->EmitIntValue(
+ OutStreamer->emitInt32(R_0286E8_SPI_TMPRING_SIZE);
+ OutStreamer->emitIntValue(
S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
}
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
- OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
- OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks), 4);
- OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
- OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
- OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
- OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
+ OutStreamer->emitInt32(R_00B02C_SPI_SHADER_PGM_RSRC2_PS);
+ OutStreamer->emitInt32(
+ S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
+ OutStreamer->emitInt32(R_0286CC_SPI_PS_INPUT_ENA);
+ OutStreamer->emitInt32(MFI->getPSInputEnable());
+ OutStreamer->emitInt32(R_0286D0_SPI_PS_INPUT_ADDR);
+ OutStreamer->emitInt32(MFI->getPSInputAddr());
}
- OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4);
- OutStreamer->EmitIntValue(MFI->getNumSpilledSGPRs(), 4);
- OutStreamer->EmitIntValue(R_SPILLED_VGPRS, 4);
- OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4);
+ OutStreamer->emitInt32(R_SPILLED_SGPRS);
+ OutStreamer->emitInt32(MFI->getNumSpilledSGPRs());
+ OutStreamer->emitInt32(R_SPILLED_VGPRS);
+ OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
}
// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
@@ -1304,7 +1360,18 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
AMDGPUInstPrinter::printRegOperand(MO.getReg(), O,
*MF->getSubtarget().getRegisterInfo());
return false;
+ } else if (MO.isImm()) {
+ int64_t Val = MO.getImm();
+ if (AMDGPU::isInlinableIntLiteral(Val)) {
+ O << Val;
+ } else if (isUInt<16>(Val)) {
+ O << format("0x%" PRIx16, static_cast<uint16_t>(Val));
+ } else if (isUInt<32>(Val)) {
+ O << format("0x%" PRIx32, static_cast<uint32_t>(Val));
+ } else {
+ O << format("0x%" PRIx64, static_cast<uint64_t>(Val));
+ }
+ return false;
}
-
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index c50c19a4609c..54e8338ab4b0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -121,21 +121,21 @@ public:
const MachineInstr *MI);
/// Implemented in AMDGPUMCInstLower.cpp
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
- void EmitFunctionBodyStart() override;
+ void emitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
+ void emitFunctionBodyEnd() override;
- void EmitFunctionEntryLabel() override;
+ void emitFunctionEntryLabel() override;
- void EmitBasicBlockStart(const MachineBasicBlock &MBB) override;
+ void emitBasicBlockStart(const MachineBasicBlock &MBB) override;
- void EmitGlobalVariable(const GlobalVariable *GV) override;
+ void emitGlobalVariable(const GlobalVariable *GV) override;
- void EmitStartOfAsmFile(Module &M) override;
+ void emitStartOfAsmFile(Module &M) override;
- void EmitEndOfAsmFile(Module &M) override;
+ void emitEndOfAsmFile(Module &M) override;
bool isBlockOnlyReachableByFallthrough(
const MachineBasicBlock *MBB) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 59aa0ea98aa7..c9d25d4250d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -438,7 +438,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
Type *const Ty = I.getType();
const unsigned TyBitWidth = DL->getTypeSizeInBits(Ty);
- Type *const VecTy = VectorType::get(B.getInt32Ty(), 2);
+ auto *const VecTy = FixedVectorType::get(B.getInt32Ty(), 2);
// This is the value in the atomic operation we need to combine in order to
// reduce the number of atomic operations.
@@ -447,9 +447,8 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// We need to know how many lanes are active within the wavefront, and we do
// this by doing a ballot of active lanes.
Type *const WaveTy = B.getIntNTy(ST->getWavefrontSize());
- CallInst *const Ballot = B.CreateIntrinsic(
- Intrinsic::amdgcn_icmp, {WaveTy, B.getInt32Ty()},
- {B.getInt32(1), B.getInt32(0), B.getInt32(CmpInst::ICMP_NE)});
+ CallInst *const Ballot =
+ B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy, B.getTrue());
// We need to know how many lanes are active within the wavefront that are
// below us. If we counted each lane linearly starting from 0, a lane is
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index c657ca71bfdf..05a4e3462a26 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -16,6 +16,7 @@
#include "AMDGPU.h"
#include "AMDGPUISelLowering.h"
#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
#include "SIISelLowering.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
@@ -59,6 +60,18 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
} else
ExtReg = extendRegister(ValVReg, VA);
+ // If this is a scalar return, insert a readfirstlane just in case the value
+ // ends up in a VGPR.
+ // FIXME: Assert this is a shader return.
+ const SIRegisterInfo *TRI
+ = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
+ if (TRI->isSGPRReg(MRI, PhysReg)) {
+ auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
+ {MRI.getType(ExtReg)}, false)
+ .addReg(ExtReg);
+ ExtReg = ToSGPR.getReg(0);
+ }
+
MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
}
@@ -84,11 +97,10 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
auto &MFI = MIRBuilder.getMF().getFrameInfo();
int FI = MFI.CreateFixedObject(Size, Offset, true);
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
- Register AddrReg = MRI.createGenericVirtualRegister(
- LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32));
- MIRBuilder.buildFrameIndex(AddrReg, FI);
+ auto AddrReg = MIRBuilder.buildFrameIndex(
+ LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32), FI);
StackUsed = std::max(StackUsed, Size + Offset);
- return AddrReg;
+ return AddrReg.getReg(0);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
@@ -119,9 +131,12 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
+ MachineFunction &MF = MIRBuilder.getMF();
+
// FIXME: Get alignment
- auto MMO = MIRBuilder.getMF().getMachineMemOperand(
- MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1);
+ auto MMO = MF.getMachineMemOperand(
+ MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
+ inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
}
@@ -150,10 +165,26 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI) {
}
+// FIXME: Compatability shim
+static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
+ switch (MIOpc) {
+ case TargetOpcode::G_SEXT:
+ return ISD::SIGN_EXTEND;
+ case TargetOpcode::G_ZEXT:
+ return ISD::ZERO_EXTEND;
+ case TargetOpcode::G_ANYEXT:
+ return ISD::ANY_EXTEND;
+ default:
+ llvm_unreachable("not an extend opcode");
+ }
+}
+
void AMDGPUCallLowering::splitToValueTypes(
- const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
- SplitArgTy PerformArgSplit) const {
+ MachineIRBuilder &B,
+ const ArgInfo &OrigArg, unsigned OrigArgIdx,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, CallingConv::ID CallConv,
+ SplitArgTy PerformArgSplit) const {
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
LLVMContext &Ctx = OrigArg.Ty->getContext();
@@ -167,28 +198,46 @@ void AMDGPUCallLowering::splitToValueTypes(
int SplitIdx = 0;
for (EVT VT : SplitVTs) {
- unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
+ Register Reg = OrigArg.Regs[SplitIdx];
Type *Ty = VT.getTypeForEVT(Ctx);
+ LLT LLTy = getLLTForType(*Ty, DL);
+ if (OrigArgIdx == AttributeList::ReturnIndex && VT.isScalarInteger()) {
+ unsigned ExtendOp = TargetOpcode::G_ANYEXT;
+ if (OrigArg.Flags[0].isSExt()) {
+ assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
+ ExtendOp = TargetOpcode::G_SEXT;
+ } else if (OrigArg.Flags[0].isZExt()) {
+ assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
+ ExtendOp = TargetOpcode::G_ZEXT;
+ }
+ EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
+ extOpcodeToISDExtOpcode(ExtendOp));
+ if (ExtVT != VT) {
+ VT = ExtVT;
+ Ty = ExtVT.getTypeForEVT(Ctx);
+ LLTy = getLLTForType(*Ty, DL);
+ Reg = B.buildInstr(ExtendOp, {LLTy}, {Reg}).getReg(0);
+ }
+ }
+
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
+ MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
if (NumParts == 1) {
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
- SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty,
- OrigArg.Flags, OrigArg.IsFixed);
+ SplitArgs.emplace_back(Reg, Ty, OrigArg.Flags, OrigArg.IsFixed);
++SplitIdx;
continue;
}
- LLT LLTy = getLLTForType(*Ty, DL);
-
SmallVector<Register, 8> SplitRegs;
-
- EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
- Type *PartTy = PartVT.getTypeForEVT(Ctx);
+ Type *PartTy = EVT(RegVT).getTypeForEVT(Ctx);
LLT PartLLT = getLLTForType(*PartTy, DL);
+ MachineRegisterInfo &MRI = *B.getMRI();
// FIXME: Should we be reporting all of the part registers for a single
// argument, and let handleAssignments take care of the repacking?
@@ -198,7 +247,7 @@ void AMDGPUCallLowering::splitToValueTypes(
SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
}
- PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx);
+ PerformArgSplit(SplitRegs, Reg, LLTy, PartLLT, SplitIdx);
++SplitIdx;
}
@@ -218,13 +267,11 @@ static LLT getMultipleType(LLT OrigTy, int Factor) {
static void unpackRegsToOrigType(MachineIRBuilder &B,
ArrayRef<Register> DstRegs,
Register SrcReg,
+ const CallLowering::ArgInfo &Info,
LLT SrcTy,
LLT PartTy) {
assert(DstRegs.size() > 1 && "Nothing to unpack");
- MachineFunction &MF = B.getMF();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
const unsigned SrcSize = SrcTy.getSizeInBits();
const unsigned PartSize = PartTy.getSizeInBits();
@@ -248,12 +295,11 @@ static void unpackRegsToOrigType(MachineIRBuilder &B,
LLT BigTy = getMultipleType(PartTy, NumRoundedParts);
auto ImpDef = B.buildUndef(BigTy);
- Register BigReg = MRI.createGenericVirtualRegister(BigTy);
- B.buildInsert(BigReg, ImpDef.getReg(0), SrcReg, 0).getReg(0);
+ auto Big = B.buildInsert(BigTy, ImpDef.getReg(0), SrcReg, 0).getReg(0);
int64_t Offset = 0;
for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize)
- B.buildExtract(DstRegs[i], BigReg, Offset);
+ B.buildExtract(DstRegs[i], Big, Offset);
}
/// Lower the return value for the already existing \p Ret. This assumes that
@@ -267,24 +313,26 @@ bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
auto &MF = B.getMF();
const auto &F = MF.getFunction();
const DataLayout &DL = MF.getDataLayout();
+ MachineRegisterInfo *MRI = B.getMRI();
CallingConv::ID CC = F.getCallingConv();
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
- MachineRegisterInfo &MRI = MF.getRegInfo();
ArgInfo OrigRetInfo(VRegs, Val->getType());
setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(
- OrigRetInfo, SplitRetInfos, DL, MRI, CC,
- [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
- unpackRegsToOrigType(B, Regs, VRegs[VTSplitIdx], LLTy, PartLLT);
+ B, OrigRetInfo, AttributeList::ReturnIndex, SplitRetInfos, DL, CC,
+ [&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT,
+ int VTSplitIdx) {
+ unpackRegsToOrigType(B, Regs, SrcReg,
+ SplitRetInfos[VTSplitIdx],
+ LLTy, PartLLT);
});
CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
-
- OutgoingValueHandler RetHandler(B, MF.getRegInfo(), Ret, AssignFn);
+ OutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn);
return handleAssignments(B, SplitRetInfos, RetHandler);
}
@@ -309,7 +357,7 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B,
return true;
}
- auto const &ST = B.getMF().getSubtarget<GCNSubtarget>();
+ auto const &ST = MF.getSubtarget<GCNSubtarget>();
unsigned ReturnOpc =
IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
@@ -348,22 +396,17 @@ Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B,
const DataLayout &DL = F.getParent()->getDataLayout();
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
LLT PtrType = getLLTForType(*PtrTy, DL);
- Register DstReg = MRI.createGenericVirtualRegister(PtrType);
Register KernArgSegmentPtr =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
- Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
- B.buildConstant(OffsetReg, Offset);
+ auto OffsetReg = B.buildConstant(LLT::scalar(64), Offset);
- B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
-
- return DstReg;
+ return B.buildPtrAdd(PtrType, KernArgSegmentVReg, OffsetReg).getReg(0);
}
-void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B,
- Type *ParamTy, uint64_t Offset,
- unsigned Align,
+void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, Type *ParamTy,
+ uint64_t Offset, Align Alignment,
Register DstReg) const {
MachineFunction &MF = B.getMF();
const Function &F = MF.getFunction();
@@ -372,11 +415,11 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B,
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
Register PtrReg = lowerParameterPtr(B, ParamTy, Offset);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant,
- TypeSize, Align);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo,
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ TypeSize, Alignment);
B.buildLoad(DstReg, PtrReg, *MMO);
}
@@ -389,19 +432,19 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
SIMachineFunctionInfo &Info) {
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
if (Info.hasPrivateSegmentBuffer()) {
- unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
+ Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
if (Info.hasDispatchPtr()) {
- unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
+ Register DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);
}
if (Info.hasQueuePtr()) {
- unsigned QueuePtrReg = Info.addQueuePtr(TRI);
+ Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
}
@@ -418,13 +461,13 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
}
if (Info.hasDispatchID()) {
- unsigned DispatchIDReg = Info.addDispatchID(TRI);
+ Register DispatchIDReg = Info.addDispatchID(TRI);
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchIDReg);
}
if (Info.hasFlatScratchInit()) {
- unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
+ Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
}
@@ -451,7 +494,7 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info);
unsigned i = 0;
- const unsigned KernArgBaseAlign = 16;
+ const Align KernArgBaseAlign(16);
const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
uint64_t ExplicitArgOffset = 0;
@@ -462,19 +505,24 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
if (AllocSize == 0)
continue;
- unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
+ Align ABIAlign = DL.getABITypeAlign(ArgTy);
uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
+ if (Arg.use_empty()) {
+ ++i;
+ continue;
+ }
+
ArrayRef<Register> OrigArgRegs = VRegs[i];
Register ArgReg =
OrigArgRegs.size() == 1
? OrigArgRegs[0]
: MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
- unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
- ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
- lowerParameter(B, ArgTy, ArgOffset, Align, ArgReg);
+
+ Align Alignment = commonAlignment(KernArgBaseAlign, ArgOffset);
+ lowerParameter(B, ArgTy, ArgOffset, Alignment, ArgReg);
if (OrigArgRegs.size() > 1)
unpackRegs(OrigArgRegs, ArgReg, ArgTy, B);
++i;
@@ -485,38 +533,72 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
return true;
}
+/// Pack values \p SrcRegs to cover the vector type result \p DstRegs.
+static MachineInstrBuilder mergeVectorRegsToResultRegs(
+ MachineIRBuilder &B, ArrayRef<Register> DstRegs, ArrayRef<Register> SrcRegs) {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ LLT LLTy = MRI.getType(DstRegs[0]);
+ LLT PartLLT = MRI.getType(SrcRegs[0]);
+
+ // Deal with v3s16 split into v2s16
+ LLT LCMTy = getLCMType(LLTy, PartLLT);
+ if (LCMTy == LLTy) {
+ // Common case where no padding is needed.
+ assert(DstRegs.size() == 1);
+ return B.buildConcatVectors(DstRegs[0], SrcRegs);
+ }
+
+ const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits();
+ Register Undef = B.buildUndef(PartLLT).getReg(0);
+
+ // Build vector of undefs.
+ SmallVector<Register, 8> WidenedSrcs(NumWide, Undef);
+
+ // Replace the first sources with the real registers.
+ std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin());
+
+ auto Widened = B.buildConcatVectors(LCMTy, WidenedSrcs);
+ int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits();
+
+ SmallVector<Register, 8> PadDstRegs(NumDst);
+ std::copy(DstRegs.begin(), DstRegs.end(), PadDstRegs.begin());
+
+ // Create the excess dead defs for the unmerge.
+ for (int I = DstRegs.size(); I != NumDst; ++I)
+ PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy);
+
+ return B.buildUnmerge(PadDstRegs, Widened);
+}
+
// TODO: Move this to generic code
static void packSplitRegsToOrigType(MachineIRBuilder &B,
ArrayRef<Register> OrigRegs,
ArrayRef<Register> Regs,
LLT LLTy,
LLT PartLLT) {
- if (!LLTy.isVector() && !PartLLT.isVector()) {
- B.buildMerge(OrigRegs[0], Regs);
- return;
- }
+ MachineRegisterInfo &MRI = *B.getMRI();
- if (LLTy.isVector() && PartLLT.isVector()) {
- assert(LLTy.getElementType() == PartLLT.getElementType());
+ if (!LLTy.isVector() && !PartLLT.isVector()) {
+ assert(OrigRegs.size() == 1);
+ LLT OrigTy = MRI.getType(OrigRegs[0]);
- int DstElts = LLTy.getNumElements();
- int PartElts = PartLLT.getNumElements();
- if (DstElts % PartElts == 0)
- B.buildConcatVectors(OrigRegs[0], Regs);
+ unsigned SrcSize = PartLLT.getSizeInBits() * Regs.size();
+ if (SrcSize == OrigTy.getSizeInBits())
+ B.buildMerge(OrigRegs[0], Regs);
else {
- // Deal with v3s16 split into v2s16
- assert(PartElts == 2 && DstElts % 2 != 0);
- int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
-
- LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
- auto RoundedConcat = B.buildConcatVectors(RoundedDestTy, Regs);
- B.buildExtract(OrigRegs[0], RoundedConcat, 0);
+ auto Widened = B.buildMerge(LLT::scalar(SrcSize), Regs);
+ B.buildTrunc(OrigRegs[0], Widened);
}
return;
}
- MachineRegisterInfo &MRI = *B.getMRI();
+ if (LLTy.isVector() && PartLLT.isVector()) {
+ assert(OrigRegs.size() == 1);
+ assert(LLTy.getElementType() == PartLLT.getElementType());
+ mergeVectorRegsToResultRegs(B, OrigRegs, Regs);
+ return;
+ }
assert(LLTy.isVector() && !PartLLT.isVector());
@@ -644,13 +726,16 @@ bool AMDGPUCallLowering::lowerFormalArguments(
}
ArgInfo OrigArg(VRegs[Idx], Arg.getType());
- setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
+ const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
+ setArgFlags(OrigArg, OrigArgIdx, DL, F);
splitToValueTypes(
- OrigArg, SplitArgs, DL, MRI, CC,
+ B, OrigArg, OrigArgIdx, SplitArgs, DL, CC,
// FIXME: We should probably be passing multiple registers to
// handleAssignments to do this
- [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
+ [&](ArrayRef<Register> Regs, Register DstReg,
+ LLT LLTy, LLT PartLLT, int VTSplitIdx) {
+ assert(DstReg == VRegs[Idx][VTSplitIdx]);
packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs,
LLTy, PartLLT);
});
@@ -705,11 +790,17 @@ bool AMDGPUCallLowering::lowerFormalArguments(
if (!MBB.empty())
B.setInstr(*MBB.begin());
+ if (!IsEntryFunc) {
+ // For the fixed ABI, pass workitem IDs in the last argument register.
+ if (AMDGPUTargetMachine::EnableFixedFunctionABI)
+ TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
+ }
+
FormalArgHandler Handler(B, MRI, AssignFn);
if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler))
return false;
- if (!IsEntryFunc) {
+ if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) {
// Special inputs come after user arguments.
TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
}
@@ -719,8 +810,6 @@ bool AMDGPUCallLowering::lowerFormalArguments(
TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
} else {
CCInfo.AllocateReg(Info->getScratchRSrcReg());
- CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
- CCInfo.AllocateReg(Info->getFrameOffsetReg());
TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
index 53a562586bc0..446619d1502e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -27,14 +27,16 @@ class AMDGPUCallLowering: public CallLowering {
uint64_t Offset) const;
void lowerParameter(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset,
- unsigned Align, Register DstReg) const;
+ Align Alignment, Register DstReg) const;
/// A function of this type is used to perform value split action.
- using SplitArgTy = std::function<void(ArrayRef<Register>, LLT, LLT, int)>;
+ using SplitArgTy = std::function<void(ArrayRef<Register>, Register, LLT, LLT, int)>;
- void splitToValueTypes(const ArgInfo &OrigArgInfo,
+ void splitToValueTypes(MachineIRBuilder &B,
+ const ArgInfo &OrigArgInfo,
+ unsigned OrigArgIdx,
SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL, MachineRegisterInfo &MRI,
+ const DataLayout &DL,
CallingConv::ID CallConv,
SplitArgTy SplitArg) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index f8a54a61aac2..7c83b6dcb44b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -18,7 +18,7 @@ class CCIfExtend<CCAction A>
// Calling convention for SI
def CC_SI : CallingConv<[
- CCIfInReg<CCIfType<[f32, i32, f16, v2i16, v2f16] , CCAssignToReg<[
+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -28,7 +28,7 @@ def CC_SI : CallingConv<[
]>>>,
// 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
- CCIfNotInReg<CCIfType<[f32, i32, f16, v2i16, v2f16] , CCAssignToReg<[
+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -50,7 +50,7 @@ def CC_SI : CallingConv<[
]>;
def RetCC_SI_Shader : CallingConv<[
- CCIfType<[i32] , CCAssignToReg<[
+ CCIfType<[i32, i16] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -89,6 +89,24 @@ def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs<
(sequence "VGPR%u", 32, 255)
>;
+def CSR_AMDGPU_VGPRs : CalleeSavedRegs<
+ // The CSRs & scratch-registers are interleaved at a split boundary of 8.
+ (add (sequence "VGPR%u", 40, 47),
+ (sequence "VGPR%u", 56, 63),
+ (sequence "VGPR%u", 72, 79),
+ (sequence "VGPR%u", 88, 95),
+ (sequence "VGPR%u", 104, 111),
+ (sequence "VGPR%u", 120, 127),
+ (sequence "VGPR%u", 136, 143),
+ (sequence "VGPR%u", 152, 159),
+ (sequence "VGPR%u", 168, 175),
+ (sequence "VGPR%u", 184, 191),
+ (sequence "VGPR%u", 200, 207),
+ (sequence "VGPR%u", 216, 223),
+ (sequence "VGPR%u", 232, 239),
+ (sequence "VGPR%u", 248, 255))
+>;
+
def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs<
(sequence "SGPR%u", 32, 105)
>;
@@ -104,7 +122,7 @@ def CSR_AMDGPU_AllAllocatableSRegs : CalleeSavedRegs<
>;
def CSR_AMDGPU_HighRegs : CalleeSavedRegs<
- (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_105)
+ (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_32_105)
>;
// Calling convention for leaf functions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index cf908766caa0..a79549301740 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -15,8 +15,10 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -26,6 +28,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -41,6 +44,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
#include <cassert>
#include <iterator>
@@ -54,7 +58,7 @@ static cl::opt<bool> WidenLoads(
"amdgpu-codegenprepare-widen-constant-loads",
cl::desc("Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"),
cl::ReallyHidden,
- cl::init(true));
+ cl::init(false));
static cl::opt<bool> UseMul24Intrin(
"amdgpu-codegenprepare-mul24",
@@ -62,10 +66,26 @@ static cl::opt<bool> UseMul24Intrin(
cl::ReallyHidden,
cl::init(true));
+// Legalize 64-bit division by using the generic IR expansion.
+static cl::opt<bool> ExpandDiv64InIR(
+ "amdgpu-codegenprepare-expand-div64",
+ cl::desc("Expand 64-bit division in AMDGPUCodeGenPrepare"),
+ cl::ReallyHidden,
+ cl::init(false));
+
+// Leave all division operations as they are. This supersedes ExpandDiv64InIR
+// and is used for testing the legalizer.
+static cl::opt<bool> DisableIDivExpand(
+ "amdgpu-codegenprepare-disable-idiv-expansion",
+ cl::desc("Prevent expanding integer division in AMDGPUCodeGenPrepare"),
+ cl::ReallyHidden,
+ cl::init(false));
+
class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
const GCNSubtarget *ST = nullptr;
AssumptionCache *AC = nullptr;
+ DominatorTree *DT = nullptr;
LegacyDivergenceAnalysis *DA = nullptr;
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
@@ -152,15 +172,33 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
/// SelectionDAG has an issue where an and asserting the bits are known
bool replaceMulWithMul24(BinaryOperator &I) const;
+ /// Perform same function as equivalently named function in DAGCombiner. Since
+ /// we expand some divisions here, we need to perform this before obscuring.
+ bool foldBinOpIntoSelect(BinaryOperator &I) const;
+
+ bool divHasSpecialOptimization(BinaryOperator &I,
+ Value *Num, Value *Den) const;
+ int getDivNumBits(BinaryOperator &I,
+ Value *Num, Value *Den,
+ unsigned AtLeast, bool Signed) const;
+
/// Expands 24 bit div or rem.
Value* expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I,
Value *Num, Value *Den,
bool IsDiv, bool IsSigned) const;
+ Value *expandDivRem24Impl(IRBuilder<> &Builder, BinaryOperator &I,
+ Value *Num, Value *Den, unsigned NumBits,
+ bool IsDiv, bool IsSigned) const;
+
/// Expands 32 bit div or rem.
Value* expandDivRem32(IRBuilder<> &Builder, BinaryOperator &I,
Value *Num, Value *Den) const;
+ Value *shrinkDivRem64(IRBuilder<> &Builder, BinaryOperator &I,
+ Value *Num, Value *Den) const;
+ void expandDivRem64(BinaryOperator &I) const;
+
/// Widen a scalar load.
///
/// \details \p Widen scalar load for uniform, small type loads from constant
@@ -195,7 +233,10 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<LegacyDivergenceAnalysis>();
- AU.setPreservesAll();
+
+ // FIXME: Division expansion needs to preserve the dominator tree.
+ if (!ExpandDiv64InIR)
+ AU.setPreservesAll();
}
};
@@ -214,7 +255,7 @@ Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const {
if (T->isIntegerTy())
return B.getInt32Ty();
- return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements());
+ return FixedVectorType::get(B.getInt32Ty(), cast<FixedVectorType>(T));
}
bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
@@ -276,10 +317,9 @@ bool AMDGPUCodeGenPrepare::canWidenScalarExtLoad(LoadInst &I) const {
Type *Ty = I.getType();
const DataLayout &DL = Mod->getDataLayout();
int TySize = DL.getTypeSizeInBits(Ty);
- unsigned Align = I.getAlignment() ?
- I.getAlignment() : DL.getABITypeAlignment(Ty);
+ Align Alignment = DL.getValueOrABITypeAlignment(I.getAlign(), Ty);
- return I.isSimple() && TySize < 32 && Align >= 4 && DA->isUniform(&I);
+ return I.isSimple() && TySize < 32 && Alignment >= 4 && DA->isUniform(&I);
}
bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
@@ -436,7 +476,7 @@ bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const {
static void extractValues(IRBuilder<> &Builder,
SmallVectorImpl<Value *> &Values, Value *V) {
- VectorType *VT = dyn_cast<VectorType>(V->getType());
+ auto *VT = dyn_cast<FixedVectorType>(V->getType());
if (!VT) {
Values.push_back(V);
return;
@@ -525,58 +565,218 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
return true;
}
-static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
- const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
- if (!CNum)
- return HasDenormals;
+// Find a select instruction, which may have been casted. This is mostly to deal
+// with cases where i16 selects were promoted here to i32.
+static SelectInst *findSelectThroughCast(Value *V, CastInst *&Cast) {
+ Cast = nullptr;
+ if (SelectInst *Sel = dyn_cast<SelectInst>(V))
+ return Sel;
- if (UnsafeDiv)
- return true;
+ if ((Cast = dyn_cast<CastInst>(V))) {
+ if (SelectInst *Sel = dyn_cast<SelectInst>(Cast->getOperand(0)))
+ return Sel;
+ }
+
+ return nullptr;
+}
+
+bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const {
+ // Don't do this unless the old select is going away. We want to eliminate the
+ // binary operator, not replace a binop with a select.
+ int SelOpNo = 0;
+
+ CastInst *CastOp;
+
+ // TODO: Should probably try to handle some cases with multiple
+ // users. Duplicating the select may be profitable for division.
+ SelectInst *Sel = findSelectThroughCast(BO.getOperand(0), CastOp);
+ if (!Sel || !Sel->hasOneUse()) {
+ SelOpNo = 1;
+ Sel = findSelectThroughCast(BO.getOperand(1), CastOp);
+ }
+
+ if (!Sel || !Sel->hasOneUse())
+ return false;
+
+ Constant *CT = dyn_cast<Constant>(Sel->getTrueValue());
+ Constant *CF = dyn_cast<Constant>(Sel->getFalseValue());
+ Constant *CBO = dyn_cast<Constant>(BO.getOperand(SelOpNo ^ 1));
+ if (!CBO || !CT || !CF)
+ return false;
+
+ if (CastOp) {
+ if (!CastOp->hasOneUse())
+ return false;
+ CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), *DL);
+ CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), *DL);
+ }
+
+ // TODO: Handle special 0/-1 cases DAG combine does, although we only really
+ // need to handle divisions here.
+ Constant *FoldedT = SelOpNo ?
+ ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, *DL) :
+ ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, *DL);
+ if (isa<ConstantExpr>(FoldedT))
+ return false;
+
+ Constant *FoldedF = SelOpNo ?
+ ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, *DL) :
+ ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, *DL);
+ if (isa<ConstantExpr>(FoldedF))
+ return false;
+
+ IRBuilder<> Builder(&BO);
+ Builder.SetCurrentDebugLocation(BO.getDebugLoc());
+ if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&BO))
+ Builder.setFastMathFlags(FPOp->getFastMathFlags());
+
+ Value *NewSelect = Builder.CreateSelect(Sel->getCondition(),
+ FoldedT, FoldedF);
+ NewSelect->takeName(&BO);
+ BO.replaceAllUsesWith(NewSelect);
+ BO.eraseFromParent();
+ if (CastOp)
+ CastOp->eraseFromParent();
+ Sel->eraseFromParent();
+ return true;
+}
+
+// Optimize fdiv with rcp:
+//
+// 1/x -> rcp(x) when rcp is sufficiently accurate or inaccurate rcp is
+// allowed with unsafe-fp-math or afn.
+//
+// a/b -> a*rcp(b) when inaccurate rcp is allowed with unsafe-fp-math or afn.
+static Value *optimizeWithRcp(Value *Num, Value *Den, bool AllowInaccurateRcp,
+ bool RcpIsAccurate, IRBuilder<> &Builder,
+ Module *Mod) {
+
+ if (!AllowInaccurateRcp && !RcpIsAccurate)
+ return nullptr;
+
+ Type *Ty = Den->getType();
+ if (const ConstantFP *CLHS = dyn_cast<ConstantFP>(Num)) {
+ if (AllowInaccurateRcp || RcpIsAccurate) {
+ if (CLHS->isExactlyValue(1.0)) {
+ Function *Decl = Intrinsic::getDeclaration(
+ Mod, Intrinsic::amdgcn_rcp, Ty);
+
+ // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
+ // the CI documentation has a worst case error of 1 ulp.
+ // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
+ // use it as long as we aren't trying to use denormals.
+ //
+ // v_rcp_f16 and v_rsq_f16 DO support denormals.
+
+ // NOTE: v_sqrt and v_rcp will be combined to v_rsq later. So we don't
+ // insert rsq intrinsic here.
+
+ // 1.0 / x -> rcp(x)
+ return Builder.CreateCall(Decl, { Den });
+ }
+
+ // Same as for 1.0, but expand the sign out of the constant.
+ if (CLHS->isExactlyValue(-1.0)) {
+ Function *Decl = Intrinsic::getDeclaration(
+ Mod, Intrinsic::amdgcn_rcp, Ty);
+
+ // -1.0 / x -> rcp (fneg x)
+ Value *FNeg = Builder.CreateFNeg(Den);
+ return Builder.CreateCall(Decl, { FNeg });
+ }
+ }
+ }
- bool IsOne = CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0);
+ if (AllowInaccurateRcp) {
+ Function *Decl = Intrinsic::getDeclaration(
+ Mod, Intrinsic::amdgcn_rcp, Ty);
- // Reciprocal f32 is handled separately without denormals.
- return HasDenormals ^ IsOne;
+ // Turn into multiply by the reciprocal.
+ // x / y -> x * (1.0 / y)
+ Value *Recip = Builder.CreateCall(Decl, { Den });
+ return Builder.CreateFMul(Num, Recip);
+ }
+ return nullptr;
+}
+
+// optimize with fdiv.fast:
+//
+// a/b -> fdiv.fast(a, b) when !fpmath >= 2.5ulp with denormals flushed.
+//
+// 1/x -> fdiv.fast(1,x) when !fpmath >= 2.5ulp.
+//
+// NOTE: optimizeWithRcp should be tried first because rcp is the preference.
+static Value *optimizeWithFDivFast(Value *Num, Value *Den, float ReqdAccuracy,
+ bool HasDenormals, IRBuilder<> &Builder,
+ Module *Mod) {
+ // fdiv.fast can achieve 2.5 ULP accuracy.
+ if (ReqdAccuracy < 2.5f)
+ return nullptr;
+
+ // Only have fdiv.fast for f32.
+ Type *Ty = Den->getType();
+ if (!Ty->isFloatTy())
+ return nullptr;
+
+ bool NumIsOne = false;
+ if (const ConstantFP *CNum = dyn_cast<ConstantFP>(Num)) {
+ if (CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0))
+ NumIsOne = true;
+ }
+
+ // fdiv does not support denormals. But 1.0/x is always fine to use it.
+ if (HasDenormals && !NumIsOne)
+ return nullptr;
+
+ Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
+ return Builder.CreateCall(Decl, { Num, Den });
}
-// Insert an intrinsic for fast fdiv for safe math situations where we can
-// reduce precision. Leave fdiv for situations where the generic node is
-// expected to be optimized.
+// Optimizations is performed based on fpmath, fast math flags as well as
+// denormals to optimize fdiv with either rcp or fdiv.fast.
+//
+// With rcp:
+// 1/x -> rcp(x) when rcp is sufficiently accurate or inaccurate rcp is
+// allowed with unsafe-fp-math or afn.
+//
+// a/b -> a*rcp(b) when inaccurate rcp is allowed with unsafe-fp-math or afn.
+//
+// With fdiv.fast:
+// a/b -> fdiv.fast(a, b) when !fpmath >= 2.5ulp with denormals flushed.
+//
+// 1/x -> fdiv.fast(1,x) when !fpmath >= 2.5ulp.
+//
+// NOTE: rcp is the preference in cases that both are legal.
bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
- Type *Ty = FDiv.getType();
- if (!Ty->getScalarType()->isFloatTy())
- return false;
+ Type *Ty = FDiv.getType()->getScalarType();
- MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
- if (!FPMath)
+ // No intrinsic for fdiv16 if target does not support f16.
+ if (Ty->isHalfTy() && !ST->has16BitInsts())
return false;
const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
- float ULP = FPOp->getFPAccuracy();
- if (ULP < 2.5f)
- return false;
+ const float ReqdAccuracy = FPOp->getFPAccuracy();
+ // Inaccurate rcp is allowed with unsafe-fp-math or afn.
FastMathFlags FMF = FPOp->getFastMathFlags();
- bool UnsafeDiv = HasUnsafeFPMath || FMF.isFast() ||
- FMF.allowReciprocal();
+ const bool AllowInaccurateRcp = HasUnsafeFPMath || FMF.approxFunc();
- // With UnsafeDiv node will be optimized to just rcp and mul.
- if (UnsafeDiv)
- return false;
+ // rcp_f16 is accurate for !fpmath >= 1.0ulp.
+ // rcp_f32 is accurate for !fpmath >= 1.0ulp and denormals are flushed.
+ // rcp_f64 is never accurate.
+ const bool RcpIsAccurate = (Ty->isHalfTy() && ReqdAccuracy >= 1.0f) ||
+ (Ty->isFloatTy() && !HasFP32Denormals && ReqdAccuracy >= 1.0f);
- IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
+ IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()));
Builder.setFastMathFlags(FMF);
Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
- Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
-
Value *Num = FDiv.getOperand(0);
Value *Den = FDiv.getOperand(1);
Value *NewFDiv = nullptr;
-
- if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
+ if (auto *VT = dyn_cast<FixedVectorType>(FDiv.getType())) {
NewFDiv = UndefValue::get(VT);
// FIXME: Doesn't do the right thing for cases where the vector is partially
@@ -584,19 +784,25 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
Value *NumEltI = Builder.CreateExtractElement(Num, I);
Value *DenEltI = Builder.CreateExtractElement(Den, I);
- Value *NewElt;
-
- if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasFP32Denormals)) {
+ // Try rcp first.
+ Value *NewElt = optimizeWithRcp(NumEltI, DenEltI, AllowInaccurateRcp,
+ RcpIsAccurate, Builder, Mod);
+ if (!NewElt) // Try fdiv.fast.
+ NewElt = optimizeWithFDivFast(NumEltI, DenEltI, ReqdAccuracy,
+ HasFP32Denormals, Builder, Mod);
+ if (!NewElt) // Keep the original.
NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
- } else {
- NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
- }
NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
}
- } else {
- if (!shouldKeepFDivF32(Num, UnsafeDiv, HasFP32Denormals))
- NewFDiv = Builder.CreateCall(Decl, { Num, Den });
+ } else { // Scalar FDiv.
+ // Try rcp first.
+ NewFDiv = optimizeWithRcp(Num, Den, AllowInaccurateRcp, RcpIsAccurate,
+ Builder, Mod);
+ if (!NewFDiv) { // Try fdiv.fast.
+ NewFDiv = optimizeWithFDivFast(Num, Den, ReqdAccuracy, HasFP32Denormals,
+ Builder, Mod);
+ }
}
if (NewFDiv) {
@@ -631,31 +837,49 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
return getMul64(Builder, LHS, RHS).second;
}
-// The fractional part of a float is enough to accurately represent up to
-// a 24-bit signed integer.
-Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
- BinaryOperator &I,
- Value *Num, Value *Den,
- bool IsDiv, bool IsSigned) const {
- assert(Num->getType()->isIntegerTy(32));
-
+/// Figure out how many bits are really needed for this ddivision. \p AtLeast is
+/// an optimization hint to bypass the second ComputeNumSignBits call if we the
+/// first one is insufficient. Returns -1 on failure.
+int AMDGPUCodeGenPrepare::getDivNumBits(BinaryOperator &I,
+ Value *Num, Value *Den,
+ unsigned AtLeast, bool IsSigned) const {
const DataLayout &DL = Mod->getDataLayout();
unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I);
- if (LHSSignBits < 9)
- return nullptr;
+ if (LHSSignBits < AtLeast)
+ return -1;
unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I);
- if (RHSSignBits < 9)
- return nullptr;
-
+ if (RHSSignBits < AtLeast)
+ return -1;
unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
- unsigned DivBits = 32 - SignBits;
+ unsigned DivBits = Num->getType()->getScalarSizeInBits() - SignBits;
if (IsSigned)
++DivBits;
+ return DivBits;
+}
- Type *Ty = Num->getType();
+// The fractional part of a float is enough to accurately represent up to
+// a 24-bit signed integer.
+Value *AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
+ BinaryOperator &I,
+ Value *Num, Value *Den,
+ bool IsDiv, bool IsSigned) const {
+ int DivBits = getDivNumBits(I, Num, Den, 9, IsSigned);
+ if (DivBits == -1)
+ return nullptr;
+ return expandDivRem24Impl(Builder, I, Num, Den, DivBits, IsDiv, IsSigned);
+}
+
+Value *AMDGPUCodeGenPrepare::expandDivRem24Impl(IRBuilder<> &Builder,
+ BinaryOperator &I,
+ Value *Num, Value *Den,
+ unsigned DivBits,
+ bool IsDiv, bool IsSigned) const {
Type *I32Ty = Builder.getInt32Ty();
+ Num = Builder.CreateTrunc(Num, I32Ty);
+ Den = Builder.CreateTrunc(Den, I32Ty);
+
Type *F32Ty = Builder.getFloatTy();
ConstantInt *One = Builder.getInt32(1);
Value *JQ = One;
@@ -685,7 +909,9 @@ Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
Value *FB = IsSigned ? Builder.CreateSIToFP(IB,F32Ty)
: Builder.CreateUIToFP(IB,F32Ty);
- Value *RCP = Builder.CreateFDiv(ConstantFP::get(F32Ty, 1.0), FB);
+ Function *RcpDecl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_rcp,
+ Builder.getFloatTy());
+ Value *RCP = Builder.CreateCall(RcpDecl, { FB });
Value *FQM = Builder.CreateFMul(FA, RCP);
// fq = trunc(fqm);
@@ -696,7 +922,10 @@ Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
Value *FQNeg = Builder.CreateFNeg(FQ);
// float fr = mad(fqneg, fb, fa);
- Value *FR = Builder.CreateIntrinsic(Intrinsic::amdgcn_fmad_ftz,
+ auto FMAD = !ST->hasMadMacF32Insts()
+ ? Intrinsic::fma
+ : (Intrinsic::ID)Intrinsic::amdgcn_fmad_ftz;
+ Value *FR = Builder.CreateIntrinsic(FMAD,
{FQNeg->getType()}, {FQNeg, FB, FA}, FQ);
// int iq = (int)fq;
@@ -725,21 +954,72 @@ Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
Res = Builder.CreateSub(Num, Rem);
}
- // Truncate to number of bits this divide really is.
- if (IsSigned) {
- Res = Builder.CreateTrunc(Res, Builder.getIntNTy(DivBits));
- Res = Builder.CreateSExt(Res, Ty);
- } else {
- ConstantInt *TruncMask = Builder.getInt32((UINT64_C(1) << DivBits) - 1);
- Res = Builder.CreateAnd(Res, TruncMask);
+ if (DivBits != 0 && DivBits < 32) {
+ // Extend in register from the number of bits this divide really is.
+ if (IsSigned) {
+ int InRegBits = 32 - DivBits;
+
+ Res = Builder.CreateShl(Res, InRegBits);
+ Res = Builder.CreateAShr(Res, InRegBits);
+ } else {
+ ConstantInt *TruncMask
+ = Builder.getInt32((UINT64_C(1) << DivBits) - 1);
+ Res = Builder.CreateAnd(Res, TruncMask);
+ }
}
return Res;
}
-Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
- BinaryOperator &I,
- Value *Num, Value *Den) const {
+// Try to recognize special cases the DAG will emit special, better expansions
+// than the general expansion we do here.
+
+// TODO: It would be better to just directly handle those optimizations here.
+bool AMDGPUCodeGenPrepare::divHasSpecialOptimization(
+ BinaryOperator &I, Value *Num, Value *Den) const {
+ if (Constant *C = dyn_cast<Constant>(Den)) {
+ // Arbitrary constants get a better expansion as long as a wider mulhi is
+ // legal.
+ if (C->getType()->getScalarSizeInBits() <= 32)
+ return true;
+
+ // TODO: Sdiv check for not exact for some reason.
+
+ // If there's no wider mulhi, there's only a better expansion for powers of
+ // two.
+ // TODO: Should really know for each vector element.
+ if (isKnownToBeAPowerOfTwo(C, *DL, true, 0, AC, &I, DT))
+ return true;
+
+ return false;
+ }
+
+ if (BinaryOperator *BinOpDen = dyn_cast<BinaryOperator>(Den)) {
+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+ if (BinOpDen->getOpcode() == Instruction::Shl &&
+ isa<Constant>(BinOpDen->getOperand(0)) &&
+ isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), *DL, true,
+ 0, AC, &I, DT)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout *DL) {
+ // Check whether the sign can be determined statically.
+ KnownBits Known = computeKnownBits(V, *DL);
+ if (Known.isNegative())
+ return Constant::getAllOnesValue(V->getType());
+ if (Known.isNonNegative())
+ return Constant::getNullValue(V->getType());
+ return Builder.CreateAShr(V, Builder.getInt32(31));
+}
+
+Value *AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
+ BinaryOperator &I, Value *X,
+ Value *Y) const {
Instruction::BinaryOps Opc = I.getOpcode();
assert(Opc == Instruction::URem || Opc == Instruction::UDiv ||
Opc == Instruction::SRem || Opc == Instruction::SDiv);
@@ -748,142 +1028,171 @@ Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
FMF.setFast();
Builder.setFastMathFlags(FMF);
- if (isa<Constant>(Den))
- return nullptr; // Keep it for optimization
+ if (divHasSpecialOptimization(I, X, Y))
+ return nullptr; // Keep it for later optimization.
bool IsDiv = Opc == Instruction::UDiv || Opc == Instruction::SDiv;
bool IsSigned = Opc == Instruction::SRem || Opc == Instruction::SDiv;
- Type *Ty = Num->getType();
+ Type *Ty = X->getType();
Type *I32Ty = Builder.getInt32Ty();
Type *F32Ty = Builder.getFloatTy();
if (Ty->getScalarSizeInBits() < 32) {
if (IsSigned) {
- Num = Builder.CreateSExt(Num, I32Ty);
- Den = Builder.CreateSExt(Den, I32Ty);
+ X = Builder.CreateSExt(X, I32Ty);
+ Y = Builder.CreateSExt(Y, I32Ty);
} else {
- Num = Builder.CreateZExt(Num, I32Ty);
- Den = Builder.CreateZExt(Den, I32Ty);
+ X = Builder.CreateZExt(X, I32Ty);
+ Y = Builder.CreateZExt(Y, I32Ty);
}
}
- if (Value *Res = expandDivRem24(Builder, I, Num, Den, IsDiv, IsSigned)) {
- Res = Builder.CreateTrunc(Res, Ty);
- return Res;
+ if (Value *Res = expandDivRem24(Builder, I, X, Y, IsDiv, IsSigned)) {
+ return IsSigned ? Builder.CreateSExtOrTrunc(Res, Ty) :
+ Builder.CreateZExtOrTrunc(Res, Ty);
}
ConstantInt *Zero = Builder.getInt32(0);
ConstantInt *One = Builder.getInt32(1);
- ConstantInt *MinusOne = Builder.getInt32(~0);
Value *Sign = nullptr;
if (IsSigned) {
- ConstantInt *K31 = Builder.getInt32(31);
- Value *LHSign = Builder.CreateAShr(Num, K31);
- Value *RHSign = Builder.CreateAShr(Den, K31);
+ Value *SignX = getSign32(X, Builder, DL);
+ Value *SignY = getSign32(Y, Builder, DL);
// Remainder sign is the same as LHS
- Sign = IsDiv ? Builder.CreateXor(LHSign, RHSign) : LHSign;
+ Sign = IsDiv ? Builder.CreateXor(SignX, SignY) : SignX;
- Num = Builder.CreateAdd(Num, LHSign);
- Den = Builder.CreateAdd(Den, RHSign);
+ X = Builder.CreateAdd(X, SignX);
+ Y = Builder.CreateAdd(Y, SignY);
- Num = Builder.CreateXor(Num, LHSign);
- Den = Builder.CreateXor(Den, RHSign);
+ X = Builder.CreateXor(X, SignX);
+ Y = Builder.CreateXor(Y, SignY);
}
- // RCP = URECIP(Den) = 2^32 / Den + e
- // e is rounding error.
- Value *DEN_F32 = Builder.CreateUIToFP(Den, F32Ty);
- Value *RCP_F32 = Builder.CreateFDiv(ConstantFP::get(F32Ty, 1.0), DEN_F32);
- Constant *UINT_MAX_PLUS_1 = ConstantFP::get(F32Ty, BitsToFloat(0x4f800000));
- Value *RCP_SCALE = Builder.CreateFMul(RCP_F32, UINT_MAX_PLUS_1);
- Value *RCP = Builder.CreateFPToUI(RCP_SCALE, I32Ty);
-
- // RCP_LO, RCP_HI = mul(RCP, Den) */
- Value *RCP_LO, *RCP_HI;
- std::tie(RCP_LO, RCP_HI) = getMul64(Builder, RCP, Den);
-
- // NEG_RCP_LO = -RCP_LO
- Value *NEG_RCP_LO = Builder.CreateNeg(RCP_LO);
-
- // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
- Value *RCP_HI_0_CC = Builder.CreateICmpEQ(RCP_HI, Zero);
- Value *ABS_RCP_LO = Builder.CreateSelect(RCP_HI_0_CC, NEG_RCP_LO, RCP_LO);
-
- // Calculate the rounding error from the URECIP instruction
- // E = mulhu(ABS_RCP_LO, RCP)
- Value *E = getMulHu(Builder, ABS_RCP_LO, RCP);
-
- // RCP_A_E = RCP + E
- Value *RCP_A_E = Builder.CreateAdd(RCP, E);
-
- // RCP_S_E = RCP - E
- Value *RCP_S_E = Builder.CreateSub(RCP, E);
-
- // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
- Value *Tmp0 = Builder.CreateSelect(RCP_HI_0_CC, RCP_A_E, RCP_S_E);
-
- // Quotient = mulhu(Tmp0, Num)
- Value *Quotient = getMulHu(Builder, Tmp0, Num);
-
- // Num_S_Remainder = Quotient * Den
- Value *Num_S_Remainder = Builder.CreateMul(Quotient, Den);
+ // The algorithm here is based on ideas from "Software Integer Division", Tom
+ // Rodeheffer, August 2008.
+ //
+ // unsigned udiv(unsigned x, unsigned y) {
+ // // Initial estimate of inv(y). The constant is less than 2^32 to ensure
+ // // that this is a lower bound on inv(y), even if some of the calculations
+ // // round up.
+ // unsigned z = (unsigned)((4294967296.0 - 512.0) * v_rcp_f32((float)y));
+ //
+ // // One round of UNR (Unsigned integer Newton-Raphson) to improve z.
+ // // Empirically this is guaranteed to give a "two-y" lower bound on
+ // // inv(y).
+ // z += umulh(z, -y * z);
+ //
+ // // Quotient/remainder estimate.
+ // unsigned q = umulh(x, z);
+ // unsigned r = x - q * y;
+ //
+ // // Two rounds of quotient/remainder refinement.
+ // if (r >= y) {
+ // ++q;
+ // r -= y;
+ // }
+ // if (r >= y) {
+ // ++q;
+ // r -= y;
+ // }
+ //
+ // return q;
+ // }
+
+ // Initial estimate of inv(y).
+ Value *FloatY = Builder.CreateUIToFP(Y, F32Ty);
+ Function *Rcp = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_rcp, F32Ty);
+ Value *RcpY = Builder.CreateCall(Rcp, {FloatY});
+ Constant *Scale = ConstantFP::get(F32Ty, BitsToFloat(0x4F7FFFFE));
+ Value *ScaledY = Builder.CreateFMul(RcpY, Scale);
+ Value *Z = Builder.CreateFPToUI(ScaledY, I32Ty);
+
+ // One round of UNR.
+ Value *NegY = Builder.CreateSub(Zero, Y);
+ Value *NegYZ = Builder.CreateMul(NegY, Z);
+ Z = Builder.CreateAdd(Z, getMulHu(Builder, Z, NegYZ));
+
+ // Quotient/remainder estimate.
+ Value *Q = getMulHu(Builder, X, Z);
+ Value *R = Builder.CreateSub(X, Builder.CreateMul(Q, Y));
+
+ // First quotient/remainder refinement.
+ Value *Cond = Builder.CreateICmpUGE(R, Y);
+ if (IsDiv)
+ Q = Builder.CreateSelect(Cond, Builder.CreateAdd(Q, One), Q);
+ R = Builder.CreateSelect(Cond, Builder.CreateSub(R, Y), R);
+
+ // Second quotient/remainder refinement.
+ Cond = Builder.CreateICmpUGE(R, Y);
+ Value *Res;
+ if (IsDiv)
+ Res = Builder.CreateSelect(Cond, Builder.CreateAdd(Q, One), Q);
+ else
+ Res = Builder.CreateSelect(Cond, Builder.CreateSub(R, Y), R);
- // Remainder = Num - Num_S_Remainder
- Value *Remainder = Builder.CreateSub(Num, Num_S_Remainder);
+ if (IsSigned) {
+ Res = Builder.CreateXor(Res, Sign);
+ Res = Builder.CreateSub(Res, Sign);
+ }
- // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
- Value *Rem_GE_Den_CC = Builder.CreateICmpUGE(Remainder, Den);
- Value *Remainder_GE_Den = Builder.CreateSelect(Rem_GE_Den_CC, MinusOne, Zero);
+ Res = Builder.CreateTrunc(Res, Ty);
- // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
- Value *Num_GE_Num_S_Rem_CC = Builder.CreateICmpUGE(Num, Num_S_Remainder);
- Value *Remainder_GE_Zero = Builder.CreateSelect(Num_GE_Num_S_Rem_CC,
- MinusOne, Zero);
+ return Res;
+}
- // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
- Value *Tmp1 = Builder.CreateAnd(Remainder_GE_Den, Remainder_GE_Zero);
- Value *Tmp1_0_CC = Builder.CreateICmpEQ(Tmp1, Zero);
+Value *AMDGPUCodeGenPrepare::shrinkDivRem64(IRBuilder<> &Builder,
+ BinaryOperator &I,
+ Value *Num, Value *Den) const {
+ if (!ExpandDiv64InIR && divHasSpecialOptimization(I, Num, Den))
+ return nullptr; // Keep it for later optimization.
- Value *Res;
- if (IsDiv) {
- // Quotient_A_One = Quotient + 1
- Value *Quotient_A_One = Builder.CreateAdd(Quotient, One);
+ Instruction::BinaryOps Opc = I.getOpcode();
- // Quotient_S_One = Quotient - 1
- Value *Quotient_S_One = Builder.CreateSub(Quotient, One);
+ bool IsDiv = Opc == Instruction::SDiv || Opc == Instruction::UDiv;
+ bool IsSigned = Opc == Instruction::SDiv || Opc == Instruction::SRem;
- // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
- Value *Div = Builder.CreateSelect(Tmp1_0_CC, Quotient, Quotient_A_One);
+ int NumDivBits = getDivNumBits(I, Num, Den, 32, IsSigned);
+ if (NumDivBits == -1)
+ return nullptr;
- // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
- Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Div, Quotient_S_One);
- } else {
- // Remainder_S_Den = Remainder - Den
- Value *Remainder_S_Den = Builder.CreateSub(Remainder, Den);
+ Value *Narrowed = nullptr;
+ if (NumDivBits <= 24) {
+ Narrowed = expandDivRem24Impl(Builder, I, Num, Den, NumDivBits,
+ IsDiv, IsSigned);
+ } else if (NumDivBits <= 32) {
+ Narrowed = expandDivRem32(Builder, I, Num, Den);
+ }
- // Remainder_A_Den = Remainder + Den
- Value *Remainder_A_Den = Builder.CreateAdd(Remainder, Den);
+ if (Narrowed) {
+ return IsSigned ? Builder.CreateSExt(Narrowed, Num->getType()) :
+ Builder.CreateZExt(Narrowed, Num->getType());
+ }
- // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
- Value *Rem = Builder.CreateSelect(Tmp1_0_CC, Remainder, Remainder_S_Den);
+ return nullptr;
+}
- // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
- Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Rem, Remainder_A_Den);
+void AMDGPUCodeGenPrepare::expandDivRem64(BinaryOperator &I) const {
+ Instruction::BinaryOps Opc = I.getOpcode();
+ // Do the general expansion.
+ if (Opc == Instruction::UDiv || Opc == Instruction::SDiv) {
+ expandDivisionUpTo64Bits(&I);
+ return;
}
- if (IsSigned) {
- Res = Builder.CreateXor(Res, Sign);
- Res = Builder.CreateSub(Res, Sign);
+ if (Opc == Instruction::URem || Opc == Instruction::SRem) {
+ expandRemainderUpTo64Bits(&I);
+ return;
}
- Res = Builder.CreateTrunc(Res, Ty);
-
- return Res;
+ llvm_unreachable("not a division");
}
bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
+ if (foldBinOpIntoSelect(I))
+ return true;
+
if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
DA->isUniform(&I) && promoteUniformOpToI32(I))
return true;
@@ -895,27 +1204,54 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
Instruction::BinaryOps Opc = I.getOpcode();
Type *Ty = I.getType();
Value *NewDiv = nullptr;
+ unsigned ScalarSize = Ty->getScalarSizeInBits();
+
+ SmallVector<BinaryOperator *, 8> Div64ToExpand;
+
if ((Opc == Instruction::URem || Opc == Instruction::UDiv ||
Opc == Instruction::SRem || Opc == Instruction::SDiv) &&
- Ty->getScalarSizeInBits() <= 32) {
+ ScalarSize <= 64 &&
+ !DisableIDivExpand) {
Value *Num = I.getOperand(0);
Value *Den = I.getOperand(1);
IRBuilder<> Builder(&I);
Builder.SetCurrentDebugLocation(I.getDebugLoc());
- if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
NewDiv = UndefValue::get(VT);
for (unsigned N = 0, E = VT->getNumElements(); N != E; ++N) {
Value *NumEltN = Builder.CreateExtractElement(Num, N);
Value *DenEltN = Builder.CreateExtractElement(Den, N);
- Value *NewElt = expandDivRem32(Builder, I, NumEltN, DenEltN);
- if (!NewElt)
- NewElt = Builder.CreateBinOp(Opc, NumEltN, DenEltN);
+
+ Value *NewElt;
+ if (ScalarSize <= 32) {
+ NewElt = expandDivRem32(Builder, I, NumEltN, DenEltN);
+ if (!NewElt)
+ NewElt = Builder.CreateBinOp(Opc, NumEltN, DenEltN);
+ } else {
+ // See if this 64-bit division can be shrunk to 32/24-bits before
+ // producing the general expansion.
+ NewElt = shrinkDivRem64(Builder, I, NumEltN, DenEltN);
+ if (!NewElt) {
+ // The general 64-bit expansion introduces control flow and doesn't
+ // return the new value. Just insert a scalar copy and defer
+ // expanding it.
+ NewElt = Builder.CreateBinOp(Opc, NumEltN, DenEltN);
+ Div64ToExpand.push_back(cast<BinaryOperator>(NewElt));
+ }
+ }
+
NewDiv = Builder.CreateInsertElement(NewDiv, NewElt, N);
}
} else {
- NewDiv = expandDivRem32(Builder, I, Num, Den);
+ if (ScalarSize <= 32)
+ NewDiv = expandDivRem32(Builder, I, Num, Den);
+ else {
+ NewDiv = shrinkDivRem64(Builder, I, Num, Den);
+ if (!NewDiv)
+ Div64ToExpand.push_back(&I);
+ }
}
if (NewDiv) {
@@ -925,6 +1261,14 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
}
}
+ if (ExpandDiv64InIR) {
+ // TODO: We get much worse code in specially handled constant cases.
+ for (BinaryOperator *Div : Div64ToExpand) {
+ expandDivRem64(*Div);
+ Changed = true;
+ }
+ }
+
return Changed;
}
@@ -1033,16 +1377,36 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
ST = &TM.getSubtarget<GCNSubtarget>(F);
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DA = &getAnalysis<LegacyDivergenceAnalysis>();
+
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
HasUnsafeFPMath = hasUnsafeFPMath(F);
- HasFP32Denormals = ST->hasFP32Denormals(F);
+
+ AMDGPU::SIModeRegisterDefaults Mode(F);
+ HasFP32Denormals = Mode.allFP32Denormals();
bool MadeChange = false;
- for (BasicBlock &BB : F) {
+ Function::iterator NextBB;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; FI = NextBB) {
+ BasicBlock *BB = &*FI;
+ NextBB = std::next(FI);
+
BasicBlock::iterator Next;
- for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; I = Next) {
Next = std::next(I);
+
MadeChange |= visit(*I);
+
+ if (Next != E) { // Control flow changed
+ BasicBlock *NextInstBB = Next->getParent();
+ if (NextInstBB != BB) {
+ BB = NextInstBB;
+ E = BB->end();
+ FE = F.end();
+ }
+ }
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
new file mode 100644
index 000000000000..faaf9168d0dd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -0,0 +1,69 @@
+//=- AMDGPUCombine.td - Define AMDGPU Combine Rules ----------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/GlobalISel/Combine.td"
+
+// TODO: This really belongs after legalization after scalarization.
+// TODO: GICombineRules should accept subtarget predicates
+
+def fmin_fmax_legacy_matchdata : GIDefMatchData<"FMinFMaxLegacyInfo">;
+
+def fcmp_select_to_fmin_fmax_legacy : GICombineRule<
+ (defs root:$select, fmin_fmax_legacy_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SELECT):$select,
+ [{ return matchFMinFMaxLegacy(*${select}, MRI, *MF, ${matchinfo}); }]),
+ (apply [{ applySelectFCmpToFMinToFMaxLegacy(*${select}, ${matchinfo}); }])>;
+
+
+def uchar_to_float : GICombineRule<
+ (defs root:$itofp),
+ (match (wip_match_opcode G_UITOFP, G_SITOFP):$itofp,
+ [{ return matchUCharToFloat(*${itofp}, MRI, *MF, Helper); }]),
+ (apply [{ applyUCharToFloat(*${itofp}); }])>;
+
+def cvt_f32_ubyteN_matchdata : GIDefMatchData<"CvtF32UByteMatchInfo">;
+
+def cvt_f32_ubyteN : GICombineRule<
+ (defs root:$cvt_f32_ubyteN, cvt_f32_ubyteN_matchdata:$matchinfo),
+ (match (wip_match_opcode G_AMDGPU_CVT_F32_UBYTE0,
+ G_AMDGPU_CVT_F32_UBYTE1,
+ G_AMDGPU_CVT_F32_UBYTE2,
+ G_AMDGPU_CVT_F32_UBYTE3):$cvt_f32_ubyteN,
+ [{ return matchCvtF32UByteN(*${cvt_f32_ubyteN}, MRI, *MF, ${matchinfo}); }]),
+ (apply [{ applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>;
+
+// Combines which should only apply on SI/VI
+def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
+
+
+def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
+ "AMDGPUGenPreLegalizerCombinerHelper", [all_combines,
+ elide_br_by_inverting_cond]> {
+ let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
+}
+
+
+// FIXME: combines_for_extload can introduce illegal extloads which
+// aren't re-legalized.
+// FIXME: Is there a way to remove a single item from all_combines?
+def all_combines_minus_extload : GICombineGroup<[trivial_combines,
+ ptr_add_immed_chain, combine_indexed_load_store, undef_combines,
+ identity_combines]
+>;
+
+def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
+ "AMDGPUGenPostLegalizerCombinerHelper",
+ [all_combines_minus_extload, gfx6gfx7_combines,
+ uchar_to_float, cvt_f32_ubyteN]> {
+ let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
+}
+
+def AMDGPURegBankCombinerHelper : GICombinerHelper<
+ "AMDGPUGenRegBankCombinerHelper", []> {
+ let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
new file mode 100644
index 000000000000..25c82ed61fc2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
@@ -0,0 +1,150 @@
+//===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains a DAG scheduling mutation to cluster shader
+/// exports.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUExportClustering.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class ExportClustering : public ScheduleDAGMutation {
+public:
+ ExportClustering() {}
+ void apply(ScheduleDAGInstrs *DAG) override;
+};
+
+static bool isExport(const SUnit &SU) {
+ const MachineInstr *MI = SU.getInstr();
+ return MI->getOpcode() == AMDGPU::EXP ||
+ MI->getOpcode() == AMDGPU::EXP_DONE;
+}
+
+static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
+ const MachineInstr *MI = SU->getInstr();
+ int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
+ return Imm >= 12 && Imm <= 15;
+}
+
+static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
+ unsigned PosCount) {
+ if (!PosCount || PosCount == Chain.size())
+ return;
+
+ // Position exports should occur as soon as possible in the shader
+ // for optimal performance. This moves position exports before
+ // other exports while preserving the order within different export
+ // types (pos or other).
+ SmallVector<SUnit *, 8> Copy(Chain);
+ unsigned PosIdx = 0;
+ unsigned OtherIdx = PosCount;
+ for (SUnit *SU : Copy) {
+ if (isPositionExport(TII, SU))
+ Chain[PosIdx++] = SU;
+ else
+ Chain[OtherIdx++] = SU;
+ }
+}
+
+static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
+ SUnit *ChainHead = Exports.front();
+
+ // Now construct cluster from chain by adding new edges.
+ for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
+ SUnit *SUa = Exports[Idx];
+ SUnit *SUb = Exports[Idx + 1];
+
+ // Copy all dependencies to the head of the chain to avoid any
+ // computation being inserted into the chain.
+ for (const SDep &Pred : SUb->Preds) {
+ SUnit *PredSU = Pred.getSUnit();
+ if (!isExport(*PredSU) && !Pred.isWeak())
+ DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
+ }
+
+ // New barrier edge ordering exports
+ DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
+ // Also add cluster edge
+ DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
+ }
+}
+
+static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
+ SmallVector<SDep, 2> ToAdd, ToRemove;
+
+ for (const SDep &Pred : SU.Preds) {
+ SUnit *PredSU = Pred.getSUnit();
+ if (Pred.isBarrier() && isExport(*PredSU)) {
+ ToRemove.push_back(Pred);
+ if (isExport(SU))
+ continue;
+
+ // If we remove a barrier we need to copy dependencies
+ // from the predecessor to maintain order.
+ for (const SDep &ExportPred : PredSU->Preds) {
+ SUnit *ExportPredSU = ExportPred.getSUnit();
+ if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
+ ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
+ }
+ }
+ }
+
+ for (SDep Pred : ToRemove)
+ SU.removePred(Pred);
+ for (SDep Pred : ToAdd)
+ DAG->addEdge(&SU, Pred);
+}
+
+void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
+
+ SmallVector<SUnit *, 8> Chain;
+
+ // Pass through DAG gathering a list of exports and removing barrier edges
+ // creating dependencies on exports. Freeing exports of successor edges
+ // allows more scheduling freedom, and nothing should be order dependent
+ // on exports. Edges will be added later to order the exports.
+ unsigned PosCount = 0;
+ for (SUnit &SU : DAG->SUnits) {
+ if (!isExport(SU))
+ continue;
+
+ Chain.push_back(&SU);
+ if (isPositionExport(TII, &SU))
+ PosCount++;
+
+ removeExportDependencies(DAG, SU);
+
+ SmallVector<SDep, 4> Succs(SU.Succs);
+ for (SDep Succ : Succs)
+ removeExportDependencies(DAG, *Succ.getSUnit());
+ }
+
+ // Apply clustering if there are multiple exports
+ if (Chain.size() > 1) {
+ sortChain(TII, Chain, PosCount);
+ buildCluster(Chain, DAG);
+ }
+}
+
+} // end namespace
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
+ return std::make_unique<ExportClustering>();
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h
new file mode 100644
index 000000000000..58491d0671e4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h
@@ -0,0 +1,15 @@
+//===- AMDGPUExportClustering.h - AMDGPU Export Clustering ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation();
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
index ea3952c316e4..db00f8f711a3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -18,15 +18,6 @@ def FeatureFMA : SubtargetFeature<"fmaf",
"Enable single precision FMA (not as fast as mul+add, but fused)"
>;
-// Some instructions do not support denormals despite this flag. Using
-// fp32 denormals also causes instructions to run at the double
-// precision rate for the device.
-def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
- "FP32Denormals",
- "true",
- "Enable single precision denormal handling"
->;
-
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"localmemorysize"#Value,
"LocalMemorySize",
@@ -38,16 +29,16 @@ def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
-class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
- "wavefrontsize"#Value,
- "WavefrontSize",
- !cast<string>(Value),
+class SubtargetFeatureWavefrontSize <int ValueLog2> : SubtargetFeature<
+ "wavefrontsize"#!shl(1, ValueLog2),
+ "WavefrontSizeLog2",
+ !cast<string>(ValueLog2),
"The number of threads per wavefront"
>;
-def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
-def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
-def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
+def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<4>;
+def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<5>;
+def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<6>;
class SubtargetFeatureGeneration <string Value, string FeatureName,
string Subtarget,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
index 9ba04d113c70..ea6c6d0fd212 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
using namespace llvm;
@@ -31,12 +32,13 @@ class AMDGPUFixFunctionBitcasts final
bool Modified;
public:
- void visitCallSite(CallSite CS) {
- if (CS.getCalledFunction())
+ void visitCallBase(CallBase &CB) {
+ if (CB.getCalledFunction())
return;
- auto Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
- if (Callee && isLegalToPromote(CS, Callee)) {
- promoteCall(CS, Callee);
+ auto *Callee =
+ dyn_cast<Function>(CB.getCalledOperand()->stripPointerCasts());
+ if (Callee && isLegalToPromote(CB, Callee)) {
+ promoteCall(CB, Callee);
Modified = true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index 92e256cf2829..260a18e278cf 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -26,7 +26,7 @@ namespace llvm {
class AMDGPUFrameLowering : public TargetFrameLowering {
public:
AMDGPUFrameLowering(StackDirection D, Align StackAl, int LAO,
- Align TransAl = Align::None());
+ Align TransAl = Align(1));
~AMDGPUFrameLowering() override;
/// \returns The number of 32-bit sub-registers that are used when storing
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index d420aa02ac28..3f12addbcc79 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
include "AMDGPU.td"
+include "AMDGPUCombine.td"
def sd_vsrc0 : ComplexPattern<i32, 1, "">;
def gi_vsrc0 :
@@ -30,6 +31,10 @@ def gi_vop3mods :
GIComplexOperandMatcher<s32, "selectVOP3Mods">,
GIComplexPatternEquiv<VOP3Mods>;
+def gi_vop3_no_mods :
+ GIComplexOperandMatcher<s32, "selectVOP3NoMods">,
+ GIComplexPatternEquiv<VOP3NoMods>;
+
def gi_vop3mods_nnan :
GIComplexOperandMatcher<s32, "selectVOP3Mods_nnan">,
GIComplexPatternEquiv<VOP3Mods_nnan>;
@@ -38,9 +43,9 @@ def gi_vop3omods :
GIComplexOperandMatcher<s32, "selectVOP3OMods">,
GIComplexPatternEquiv<VOP3OMods>;
-def gi_vop3opselmods0 :
- GIComplexOperandMatcher<s32, "selectVOP3OpSelMods0">,
- GIComplexPatternEquiv<VOP3OpSelMods0>;
+def gi_vop3pmods :
+ GIComplexOperandMatcher<s32, "selectVOP3PMods">,
+ GIComplexPatternEquiv<VOP3PMods>;
def gi_vop3opselmods :
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">,
@@ -83,6 +88,33 @@ def gi_ds_1addr_1offset :
GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">,
GIComplexPatternEquiv<DS1Addr1Offset>;
+def gi_ds_64bit_4byte_aligned :
+ GIComplexOperandMatcher<s64, "selectDS64Bit4ByteAligned">,
+ GIComplexPatternEquiv<DS64Bit4ByteAligned>;
+
+def gi_mubuf_addr64 :
+ GIComplexOperandMatcher<s64, "selectMUBUFAddr64">,
+ GIComplexPatternEquiv<MUBUFAddr64>;
+
+def gi_mubuf_offset :
+ GIComplexOperandMatcher<s64, "selectMUBUFOffset">,
+ GIComplexPatternEquiv<MUBUFOffset>;
+
+def gi_mubuf_addr64_atomic :
+ GIComplexOperandMatcher<s64, "selectMUBUFAddr64Atomic">,
+ GIComplexPatternEquiv<MUBUFAddr64Atomic>;
+
+def gi_mubuf_offset_atomic :
+ GIComplexOperandMatcher<s64, "selectMUBUFOffsetAtomic">,
+ GIComplexPatternEquiv<MUBUFOffsetAtomic>;
+
+def gi_smrd_buffer_imm :
+ GIComplexOperandMatcher<s64, "selectSMRDBufferImm">,
+ GIComplexPatternEquiv<SMRDBufferImm>;
+
+def gi_smrd_buffer_imm32 :
+ GIComplexOperandMatcher<s64, "selectSMRDBufferImm32">,
+ GIComplexPatternEquiv<SMRDBufferImm32>;
// Separate load nodes are defined to glue m0 initialization in
// SelectionDAG. The GISel selector can just insert m0 initialization
@@ -116,9 +148,54 @@ def : GINodeEquiv<G_ATOMICRMW_UMIN, atomic_load_umin_glue>;
def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax_glue>;
def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>;
-def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32>;
-def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
+def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32_impl>;
+def : GINodeEquiv<G_AMDGPU_FMIN_LEGACY, AMDGPUfmin_legacy>;
+def : GINodeEquiv<G_AMDGPU_FMAX_LEGACY, AMDGPUfmax_legacy>;
+def : GINodeEquiv<G_AMDGPU_RCP_IFLAG, AMDGPUrcp_iflag>;
+def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE0, AMDGPUcvt_f32_ubyte0>;
+def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE1, AMDGPUcvt_f32_ubyte1>;
+def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE2, AMDGPUcvt_f32_ubyte2>;
+def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
+
+def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_USHORT, SIbuffer_load_ushort>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_UBYTE, SIbuffer_load_ubyte>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SSHORT, SIbuffer_load_short>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SBYTE, SIbuffer_load_byte>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_FORMAT, SIbuffer_load_format>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_FORMAT_D16, SIbuffer_load_format_d16>;
+def : GINodeEquiv<G_AMDGPU_TBUFFER_LOAD_FORMAT, SItbuffer_load>;
+def : GINodeEquiv<G_AMDGPU_TBUFFER_LOAD_FORMAT_D16, SItbuffer_load_d16>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_STORE, SIbuffer_store>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_STORE_SHORT, SIbuffer_store_short>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_STORE_BYTE, SIbuffer_store_byte>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_STORE_FORMAT, SIbuffer_store_format>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_STORE_FORMAT_D16, SIbuffer_store_format_d16>;
+def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT, SItbuffer_store>;
+def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT_D16, SItbuffer_store_d16>;
+
+// FIXME: Check MMO is atomic
+def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, SIatomic_inc>;
+def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, SIatomic_dec>;
+def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, atomic_inc_glue>;
+def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, atomic_dec_glue>;
+
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_SWAP, SIbuffer_atomic_swap>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_ADD, SIbuffer_atomic_add>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_SUB, SIbuffer_atomic_sub>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_SMIN, SIbuffer_atomic_smin>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_UMIN, SIbuffer_atomic_umin>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_SMAX, SIbuffer_atomic_smax>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_UMAX, SIbuffer_atomic_umax>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_AND, SIbuffer_atomic_and>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_OR, SIbuffer_atomic_or>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_XOR, SIbuffer_atomic_xor>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_INC, SIbuffer_atomic_inc>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_DEC, SIbuffer_atomic_dec>;
+def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
+def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>;
class GISelSop2Pat <
SDPatternOperator node,
@@ -188,16 +265,13 @@ multiclass GISelVop2IntrPat <
def : GISelVop2Pat <node, inst, dst_vt, src_vt>;
- // FIXME: Intrinsics aren't marked as commutable, so we need to add an explcit
+ // FIXME: Intrinsics aren't marked as commutable, so we need to add an explicit
// pattern to handle commuting. This is another reason why legalizing to a
// generic machine instruction may be better that matching the intrinsic
// directly.
def : GISelVop2CommutePat <node, inst, dst_vt, src_vt>;
}
-def : GISelSop2Pat <or, S_OR_B32, i32>;
-def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
-
// Since GlobalISel is more flexible then SelectionDAG, I think we can get
// away with adding patterns for integer types and not legalizing all
// loads and stores to vector types. This should help simplify the load/store
@@ -206,12 +280,18 @@ foreach Ty = [i64, p0, p1, p4] in {
defm : SMRD_Pattern <"S_LOAD_DWORDX2", Ty>;
}
-def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">,
+def gi_as_i32timm : GICustomOperandRenderer<"renderTruncTImm32">,
GISDNodeXFormEquiv<as_i32timm>;
-def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm">,
+def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm16">,
GISDNodeXFormEquiv<as_i16timm>;
+def gi_as_i8timm : GICustomOperandRenderer<"renderTruncTImm8">,
+ GISDNodeXFormEquiv<as_i8timm>;
+
+def gi_as_i1timm : GICustomOperandRenderer<"renderTruncTImm1">,
+ GISDNodeXFormEquiv<as_i1timm>;
+
def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">,
GISDNodeXFormEquiv<NegateImm>;
@@ -220,3 +300,15 @@ def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">,
def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">,
GISDNodeXFormEquiv<IMMPopCount>;
+
+def gi_extract_glc : GICustomOperandRenderer<"renderExtractGLC">,
+ GISDNodeXFormEquiv<extract_glc>;
+
+def gi_extract_slc : GICustomOperandRenderer<"renderExtractSLC">,
+ GISDNodeXFormEquiv<extract_slc>;
+
+def gi_extract_dlc : GICustomOperandRenderer<"renderExtractDLC">,
+ GISDNodeXFormEquiv<extract_dlc>;
+
+def gi_extract_swz : GICustomOperandRenderer<"renderExtractSWZ">,
+ GISDNodeXFormEquiv<extract_swz>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index 2e92ae51660b..600b351f9ea1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -132,7 +132,8 @@ const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {
};
-// For some instructions which can operate 64-bit only for the scalar version.
+// For some instructions which can operate 64-bit only for the scalar
+// version. Otherwise, these need to be split into 2 32-bit operations.
const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] {
/*32-bit sgpr*/ {&SGPROnly64BreakDown[0], 1},
/*2 x 32-bit sgpr*/ {&SGPROnly64BreakDown[1], 2},
@@ -207,75 +208,16 @@ const RegisterBankInfo::ValueMapping *getValueMappingSGPR64Only(unsigned BankID,
return &ValMappingsSGPR64OnlyVGPR32[2];
}
-const RegisterBankInfo::PartialMapping LoadSGPROnlyBreakDown[] {
- /* 256-bit load */ {0, 256, SGPRRegBank},
- /* 512-bit load */ {0, 512, SGPRRegBank},
- /* 8 32-bit loads */ {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank},
- {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank},
- {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank},
- {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank},
- /* 16 32-bit loads */ {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank},
- {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank},
- {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank},
- {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank},
- {256, 32, VGPRRegBank}, {288, 32, VGPRRegBank},
- {320, 32, VGPRRegBank}, {352, 32, VGPRRegBank},
- {384, 32, VGPRRegBank}, {416, 32, VGPRRegBank},
- {448, 32, VGPRRegBank}, {480, 32, VGPRRegBank},
- /* 4 64-bit loads */ {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank},
- {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank},
- /* 8 64-bit loads */ {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank},
- {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank},
- {256, 64, VGPRRegBank}, {320, 64, VGPRRegBank},
- {384, 64, VGPRRegBank}, {448, 64, VGPRRegBank},
-
- /* FIXME: The generic register bank select does not support complex
- * break downs where the number of vector elements does not equal the
- * number of breakdowns.
- * FIXME: register bank select now tries to handle complex break downs,
- * but it emits an illegal instruction:
- * %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS %2:vgpr(s128), %3:vgpr(s128)
- */
- /* 2 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank},
- /* 4 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank},
- {256, 128, VGPRRegBank}, {384, 128, VGPRRegBank}
-};
-
-const RegisterBankInfo::ValueMapping ValMappingsLoadSGPROnly[] {
- /* 256-bit load */ {&LoadSGPROnlyBreakDown[0], 1},
- /* 512-bit load */ {&LoadSGPROnlyBreakDown[1], 1},
- /* <8 x i32> load */ {&LoadSGPROnlyBreakDown[2], 8},
- /* <16 x i32> load */ {&LoadSGPROnlyBreakDown[10], 16},
- /* <4 x i64> load */ {&LoadSGPROnlyBreakDown[26], 4},
- /* <8 x i64> load */ {&LoadSGPROnlyBreakDown[30], 8}
-};
-
-const RegisterBankInfo::ValueMapping *
-getValueMappingLoadSGPROnly(unsigned BankID, LLT SizeTy) {
- unsigned Size = SizeTy.getSizeInBits();
- if (Size < 256 || BankID == AMDGPU::SGPRRegBankID)
- return getValueMapping(BankID, Size);
-
- assert((Size == 256 || Size == 512) && BankID == AMDGPU::VGPRRegBankID);
-
- // Default to using the non-split ValueMappings, we will use these if
- // the register bank is SGPR or if we don't know how to handle the vector
- // type.
- unsigned Idx = Size == 256 ? 0 : 1;
-
- // We need to split this load if it has a vgpr pointer.
- if (BankID == AMDGPU::VGPRRegBankID) {
- if (SizeTy == LLT::vector(8, 32))
- Idx = 2;
- else if (SizeTy == LLT::vector(16, 32))
- Idx = 3;
- else if (SizeTy == LLT::vector(4, 64))
- Idx = 4;
- else if (SizeTy == LLT::vector(8, 64))
- Idx = 5;
- }
+/// Split any 64-bit value into 2 32-bit pieces. Unlike
+/// getValueMappingSGPR64Only, this splits both VGPRs and SGPRs.
+const RegisterBankInfo::ValueMapping *getValueMappingSplit64(unsigned BankID,
+ unsigned Size) {
+ assert(Size == 64);
+ if (BankID == AMDGPU::VGPRRegBankID)
+ return &ValMappingsSGPR64OnlyVGPR32[4];
- return &ValMappingsLoadSGPROnly[Idx];
+ assert(BankID == AMDGPU::SGPRRegBankID);
+ return &ValMappingsSGPR64OnlyVGPR32[1];
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 16d7f2c4f9e5..989937a597fb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -43,3 +43,12 @@ AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
return std::make_tuple(Reg, 0, Def);
}
+
+bool AMDGPU::isLegalVOP3PShuffleMask(ArrayRef<int> Mask) {
+ assert(Mask.size() == 2);
+
+ // If one half is undef, the other is trivially in the same reg.
+ if (Mask[0] == -1 || Mask[1] == -1)
+ return true;
+ return (Mask[0] & 2) == (Mask[1] & 2);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
index 1507ade79547..766750758efc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H
+#include "AMDGPUInstrInfo.h"
#include "llvm/CodeGen/Register.h"
#include <tuple>
@@ -23,6 +24,38 @@ namespace AMDGPU {
std::tuple<Register, unsigned, MachineInstr *>
getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg);
+bool isLegalVOP3PShuffleMask(ArrayRef<int> Mask);
+
+/// Return number of address arguments, and the number of gradients for an image
+/// intrinsic.
+inline std::pair<int, int>
+getImageNumVAddr(const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode) {
+ const AMDGPU::MIMGDimInfo *DimInfo
+ = AMDGPU::getMIMGDimInfo(ImageDimIntr->Dim);
+
+ int NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
+ int NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
+ int NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
+ int NumVAddr = BaseOpcode->NumExtraArgs + NumGradients + NumCoords + NumLCM;
+ return {NumVAddr, NumGradients};
+}
+
+/// Return index of dmask in an gMIR image intrinsic
+inline int getDMaskIdx(const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode,
+ int NumDefs) {
+ assert(!BaseOpcode->Atomic);
+ return NumDefs + 1 + (BaseOpcode->Store ? 1 : 0);
+}
+
+/// Return first address operand index in a gMIR image intrinsic.
+inline int getImageVAddrIdxBegin(const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode,
+ int NumDefs) {
+ if (BaseOpcode->Atomic)
+ return NumDefs + 1 + (BaseOpcode->AtomicX2 ? 2 : 1);
+ return getDMaskIdx(BaseOpcode, NumDefs) + 1;
+}
+
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 511d62943189..c6f6a3b84e36 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -47,7 +47,7 @@ void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata Parser Test: ";
HSAMD::Metadata FromHSAMetadataString;
- if (fromString(HSAMetadataString, FromHSAMetadataString)) {
+ if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) {
errs() << "FAIL\n";
return;
}
@@ -127,38 +127,6 @@ ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
ValueKind::ByValue);
}
-ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
- switch (Ty->getTypeID()) {
- case Type::IntegerTyID: {
- auto Signed = !TypeName.startswith("u");
- switch (Ty->getIntegerBitWidth()) {
- case 8:
- return Signed ? ValueType::I8 : ValueType::U8;
- case 16:
- return Signed ? ValueType::I16 : ValueType::U16;
- case 32:
- return Signed ? ValueType::I32 : ValueType::U32;
- case 64:
- return Signed ? ValueType::I64 : ValueType::U64;
- default:
- return ValueType::Struct;
- }
- }
- case Type::HalfTyID:
- return ValueType::F16;
- case Type::FloatTyID:
- return ValueType::F32;
- case Type::DoubleTyID:
- return ValueType::F64;
- case Type::PointerTyID:
- return getValueType(Ty->getPointerElementType(), TypeName);
- case Type::VectorTyID:
- return getValueType(Ty->getVectorElementType(), TypeName);
- default:
- return ValueType::Struct;
- }
-}
-
std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
switch (Ty->getTypeID()) {
case Type::IntegerTyID: {
@@ -185,10 +153,10 @@ std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
return "float";
case Type::DoubleTyID:
return "double";
- case Type::VectorTyID: {
- auto VecTy = cast<VectorType>(Ty);
+ case Type::FixedVectorTyID: {
+ auto VecTy = cast<FixedVectorType>(Ty);
auto ElTy = VecTy->getElementType();
- auto NumElements = VecTy->getVectorNumElements();
+ auto NumElements = VecTy->getNumElements();
return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
}
default:
@@ -259,7 +227,8 @@ void MetadataStreamerV2::emitPrintf(const Module &Mod) {
for (auto Op : Node->operands())
if (Op->getNumOperands())
- Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
+ Printf.push_back(
+ std::string(cast<MDString>(Op->getOperand(0))->getString()));
}
void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
@@ -345,12 +314,11 @@ void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
Type *Ty = Arg.getType();
const DataLayout &DL = Func->getParent()->getDataLayout();
- unsigned PointeeAlign = 0;
+ MaybeAlign PointeeAlign;
if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
- PointeeAlign = Arg.getParamAlignment();
- if (PointeeAlign == 0)
- PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
+ PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
+ PtrTy->getElementType());
}
}
@@ -360,20 +328,19 @@ void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
ValueKind ValueKind,
- unsigned PointeeAlign, StringRef Name,
+ MaybeAlign PointeeAlign, StringRef Name,
StringRef TypeName,
StringRef BaseTypeName,
StringRef AccQual, StringRef TypeQual) {
HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
- Arg.mName = Name;
- Arg.mTypeName = TypeName;
+ Arg.mName = std::string(Name);
+ Arg.mTypeName = std::string(TypeName);
Arg.mSize = DL.getTypeAllocSize(Ty);
- Arg.mAlign = DL.getABITypeAlignment(Ty);
+ Arg.mAlign = DL.getABITypeAlign(Ty).value();
Arg.mValueKind = ValueKind;
- Arg.mValueType = getValueType(Ty, BaseTypeName);
- Arg.mPointeeAlign = PointeeAlign;
+ Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
if (auto PtrTy = dyn_cast<PointerType>(Ty))
Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
@@ -479,7 +446,7 @@ void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
HSAMetadata.mKernels.push_back(Kernel::Metadata());
auto &Kernel = HSAMetadata.mKernels.back();
- Kernel.mName = Func.getName();
+ Kernel.mName = std::string(Func.getName());
Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
emitKernelLanguage(Func);
emitKernelAttrs(Func);
@@ -573,38 +540,6 @@ StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
: "by_value");
}
-StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
- switch (Ty->getTypeID()) {
- case Type::IntegerTyID: {
- auto Signed = !TypeName.startswith("u");
- switch (Ty->getIntegerBitWidth()) {
- case 8:
- return Signed ? "i8" : "u8";
- case 16:
- return Signed ? "i16" : "u16";
- case 32:
- return Signed ? "i32" : "u32";
- case 64:
- return Signed ? "i64" : "u64";
- default:
- return "struct";
- }
- }
- case Type::HalfTyID:
- return "f16";
- case Type::FloatTyID:
- return "f32";
- case Type::DoubleTyID:
- return "f64";
- case Type::PointerTyID:
- return getValueType(Ty->getPointerElementType(), TypeName);
- case Type::VectorTyID:
- return getValueType(Ty->getVectorElementType(), TypeName);
- default:
- return "struct";
- }
-}
-
std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
switch (Ty->getTypeID()) {
case Type::IntegerTyID: {
@@ -631,10 +566,10 @@ std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
return "float";
case Type::DoubleTyID:
return "double";
- case Type::VectorTyID: {
- auto VecTy = cast<VectorType>(Ty);
+ case Type::FixedVectorTyID: {
+ auto VecTy = cast<FixedVectorType>(Ty);
auto ElTy = VecTy->getElementType();
- auto NumElements = VecTy->getVectorNumElements();
+ auto NumElements = VecTy->getNumElements();
return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
}
default:
@@ -767,12 +702,11 @@ void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
Type *Ty = Arg.getType();
const DataLayout &DL = Func->getParent()->getDataLayout();
- unsigned PointeeAlign = 0;
+ MaybeAlign PointeeAlign;
if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
- PointeeAlign = Arg.getParamAlignment();
- if (PointeeAlign == 0)
- PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
+ PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
+ PtrTy->getElementType());
}
}
@@ -785,7 +719,7 @@ void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
StringRef ValueKind, unsigned &Offset,
msgpack::ArrayDocNode Args,
- unsigned PointeeAlign, StringRef Name,
+ MaybeAlign PointeeAlign, StringRef Name,
StringRef TypeName,
StringRef BaseTypeName,
StringRef AccQual, StringRef TypeQual) {
@@ -796,16 +730,14 @@ void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
if (!TypeName.empty())
Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
auto Size = DL.getTypeAllocSize(Ty);
- auto Align = DL.getABITypeAlignment(Ty);
+ Align Alignment = DL.getABITypeAlign(Ty);
Arg[".size"] = Arg.getDocument()->getNode(Size);
- Offset = alignTo(Offset, Align);
+ Offset = alignTo(Offset, Alignment);
Arg[".offset"] = Arg.getDocument()->getNode(Offset);
Offset += Size;
Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
- Arg[".value_type"] =
- Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true);
if (PointeeAlign)
- Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign);
+ Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
if (auto PtrTy = dyn_cast<PointerType>(Ty))
if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 80ac8ca67bcd..9534fffd228d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/Support/AMDGPUMetadata.h"
+#include "llvm/Support/Alignment.h"
namespace llvm {
@@ -27,6 +28,7 @@ class AMDGPUTargetStreamer;
class Argument;
class DataLayout;
class Function;
+class MachineFunction;
class MDNode;
class Module;
struct SIProgramInfo;
@@ -65,8 +67,6 @@ private:
StringRef getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const;
- StringRef getValueType(Type *Ty, StringRef TypeName) const;
-
std::string getTypeName(Type *Ty, bool Signed) const;
msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const;
@@ -89,7 +89,7 @@ private:
void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind,
unsigned &Offset, msgpack::ArrayDocNode Args,
- unsigned PointeeAlign = 0, StringRef Name = "",
+ MaybeAlign PointeeAlign = None, StringRef Name = "",
StringRef TypeName = "", StringRef BaseTypeName = "",
StringRef AccQual = "", StringRef TypeQual = "");
@@ -133,8 +133,6 @@ private:
ValueKind getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const;
- ValueType getValueType(Type *Ty, StringRef TypeName) const;
-
std::string getTypeName(Type *Ty, bool Signed) const;
std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;
@@ -159,10 +157,9 @@ private:
void emitKernelArg(const Argument &Arg);
void emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind,
- unsigned PointeeAlign = 0,
- StringRef Name = "", StringRef TypeName = "",
- StringRef BaseTypeName = "", StringRef AccQual = "",
- StringRef TypeQual = "");
+ MaybeAlign PointeeAlign = None, StringRef Name = "",
+ StringRef TypeName = "", StringRef BaseTypeName = "",
+ StringRef AccQual = "", StringRef TypeQual = "");
void emitHiddenKernelArgs(const Function &Func);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 2b6308dc1549..aaf448346b53 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -16,7 +16,6 @@
#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPUInstrInfo.h"
#include "AMDGPUPerfHintAnalysis.h"
-#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -29,6 +28,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -252,7 +252,6 @@ private:
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
@@ -265,16 +264,10 @@ private:
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp) const;
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp) const;
bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp) const;
bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
@@ -286,7 +279,6 @@ private:
void SelectAddcSubb(SDNode *N);
void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
- void SelectDIV_FMAS(SDNode *N);
void SelectMAD_64_32(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
@@ -301,6 +293,7 @@ private:
void SelectATOMIC_CMP_SWAP(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
+ void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
void SelectINTRINSIC_WO_CHAIN(SDNode *N);
void SelectINTRINSIC_VOID(SDNode *N);
@@ -409,7 +402,7 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
}
#endif
Subtarget = &MF.getSubtarget<GCNSubtarget>();
- Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction(), *Subtarget);
+ Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -655,29 +648,6 @@ MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
}
-static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
- switch (NumVectorElts) {
- case 1:
- return AMDGPU::SReg_32RegClassID;
- case 2:
- return AMDGPU::SReg_64RegClassID;
- case 3:
- return AMDGPU::SGPR_96RegClassID;
- case 4:
- return AMDGPU::SGPR_128RegClassID;
- case 5:
- return AMDGPU::SGPR_160RegClassID;
- case 8:
- return AMDGPU::SReg_256RegClassID;
- case 16:
- return AMDGPU::SReg_512RegClassID;
- case 32:
- return AMDGPU::SReg_1024RegClassID;
- }
-
- llvm_unreachable("invalid vector size");
-}
-
void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
@@ -698,6 +668,8 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
// 1 = Vector Register Class
SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
+ bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
+ Triple::amdgcn;
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
bool IsRegSeq = true;
unsigned NOps = N->getNumOperands();
@@ -707,7 +679,8 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
IsRegSeq = false;
break;
}
- unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
+ unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
+ : R600RegisterInfo::getSubRegFromChannel(i);
RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
}
@@ -717,7 +690,8 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, EltVT);
for (unsigned i = NOps; i < NumVectorElts; ++i) {
- unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
+ unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
+ : R600RegisterInfo::getSubRegFromChannel(i);
RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(Sub, DL, MVT::i32);
@@ -742,7 +716,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
(Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
Opc == ISD::ATOMIC_LOAD_FADD ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
- Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
+ Opc == AMDGPUISD::ATOMIC_LOAD_FMAX ||
+ Opc == AMDGPUISD::ATOMIC_LOAD_CSUB)) {
N = glueCopyToM0LDSInit(N);
SelectCode(N);
return;
@@ -801,7 +776,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
assert(VT.getVectorElementType().bitsEq(MVT::i32));
- unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
+ unsigned RegClassID =
+ SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
SelectBuildVector(N, RegClassID);
return;
}
@@ -874,10 +850,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectDIV_SCALE(N);
return;
}
- case AMDGPUISD::DIV_FMAS: {
- SelectDIV_FMAS(N);
- return;
- }
case AMDGPUISD::MAD_I64_I32:
case AMDGPUISD::MAD_U64_U32: {
SelectMAD_64_32(N);
@@ -1020,8 +992,14 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
- unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
- unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
+ static const unsigned OpcMap[2][2][2] = {
+ {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
+ {AMDGPU::V_SUB_I32_e32, AMDGPU::V_ADD_I32_e32}},
+ {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
+ {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
+
+ unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
+ unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
SDNode *AddLo;
if (!ConsumeCarry) {
@@ -1063,24 +1041,51 @@ void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
SDValue RHS = N->getOperand(1);
SDValue CI = N->getOperand(2);
- unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
- : AMDGPU::V_SUBB_U32_e64;
- CurDAG->SelectNodeTo(
- N, Opc, N->getVTList(),
- {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
+ if (N->isDivergent()) {
+ unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
+ : AMDGPU::V_SUBB_U32_e64;
+ CurDAG->SelectNodeTo(
+ N, Opc, N->getVTList(),
+ {LHS, RHS, CI,
+ CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
+ } else {
+ unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
+ : AMDGPU::S_SUB_CO_PSEUDO;
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
+ }
}
void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
// The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
// carry out despite the _i32 name. These were renamed in VI to _U32.
// FIXME: We should probably rename the opcodes here.
- unsigned Opc = N->getOpcode() == ISD::UADDO ?
- AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+ bool IsAdd = N->getOpcode() == ISD::UADDO;
+ bool IsVALU = N->isDivergent();
+
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
+ ++UI)
+ if (UI.getUse().getResNo() == 1) {
+ if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
+ (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
+ IsVALU = true;
+ break;
+ }
+ }
+
+ if (IsVALU) {
+ unsigned Opc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+
+ CurDAG->SelectNodeTo(
+ N, Opc, N->getVTList(),
+ {N->getOperand(0), N->getOperand(1),
+ CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
+ } else {
+ unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
+ : AMDGPU::S_USUBO_PSEUDO;
- CurDAG->SelectNodeTo(
- N, Opc, N->getVTList(),
- {N->getOperand(0), N->getOperand(1),
- CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
+ {N->getOperand(0), N->getOperand(1)});
+ }
}
void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
@@ -1125,35 +1130,6 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
-void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
- const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
- const SIRegisterInfo *TRI = ST->getRegisterInfo();
-
- SDLoc SL(N);
- EVT VT = N->getValueType(0);
-
- assert(VT == MVT::f32 || VT == MVT::f64);
-
- unsigned Opc
- = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
-
- SDValue CarryIn = N->getOperand(3);
- // V_DIV_FMAS implicitly reads VCC.
- SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
- TRI->getVCC(), CarryIn, SDValue());
-
- SDValue Ops[10];
-
- SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
- SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
- SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
-
- Ops[8] = VCC;
- Ops[9] = VCC.getValue(1);
-
- CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
-}
-
// We need to handle this here because tablegen doesn't support matching
// instructions with multiple outputs.
void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
@@ -1343,6 +1319,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &TFE, SDValue &DLC,
SDValue &SWZ) const {
// Subtarget prefers to use flat instruction
+ // FIXME: This should be a pattern predicate and not reach here
if (Subtarget->useFlatForGlobal())
return false;
@@ -1438,6 +1415,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue Ptr, Offen, Idxen, Addr64;
// addr64 bit was removed for volcanic islands.
+ // FIXME: This should be a pattern predicate and not reach here
if (!Subtarget->hasAddr64())
return false;
@@ -1475,6 +1453,7 @@ static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
}
std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
+ SDLoc DL(N);
const MachineFunction &MF = CurDAG->getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
@@ -1489,9 +1468,8 @@ std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const
}
// If we don't know this private access is a local stack object, it needs to
- // be relative to the entry point's scratch wave offset register.
- return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
- MVT::i32));
+ // be relative to the entry point's scratch wave offset.
+ return std::make_pair(N, CurDAG->getTargetConstant(0, DL, MVT::i32));
}
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
@@ -1506,22 +1484,26 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
- unsigned Imm = CAddr->getZExtValue();
-
- SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
- MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- DL, MVT::i32, HighBits);
- VAddr = SDValue(MovHighBits, 0);
-
- // In a call sequence, stores to the argument stack area are relative to the
- // stack pointer.
- const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
- unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
- Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
-
- SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
- ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
- return true;
+ int64_t Imm = CAddr->getSExtValue();
+ const int64_t NullPtr =
+ AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
+ // Don't fold null pointer.
+ if (Imm != NullPtr) {
+ SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
+ MachineSDNode *MovHighBits = CurDAG->getMachineNode(
+ AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
+ VAddr = SDValue(MovHighBits, 0);
+
+ // In a call sequence, stores to the argument stack area are relative to the
+ // stack pointer.
+ const MachinePointerInfo &PtrInfo
+ = cast<MemSDNode>(Parent)->getPointerInfo();
+ SOffset = isStackPtrRelative(PtrInfo)
+ ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
+ : CurDAG->getTargetConstant(0, DL, MVT::i32);
+ ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
+ return true;
+ }
}
if (CurDAG->isBaseWithConstantOffset(Addr)) {
@@ -1577,12 +1559,12 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
- unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
- Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
// FIXME: Get from MachinePointerInfo? We should only be using the frame
// offset if we know this is in a call sequence.
- SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
+ SOffset = isStackPtrRelative(PtrInfo)
+ ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
+ : CurDAG->getTargetConstant(0, DL, MVT::i32);
Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
return true;
@@ -1646,6 +1628,37 @@ static MemSDNode* findMemSDNode(SDNode *N) {
llvm_unreachable("cannot find MemSDNode in the pattern!");
}
+static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
+ SDValue &N0, SDValue &N1) {
+ if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
+ Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
+ // (i64 (bitcast (v2i32 (build_vector
+ // (or (extract_vector_elt V, 0), OFFSET),
+ // (extract_vector_elt V, 1)))))
+ SDValue Lo = Addr.getOperand(0).getOperand(0);
+ if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
+ SDValue BaseLo = Lo.getOperand(0);
+ SDValue BaseHi = Addr.getOperand(0).getOperand(1);
+ // Check that split base (Lo and Hi) are extracted from the same one.
+ if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
+ // Lo is statically extracted from index 0.
+ isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
+ BaseLo.getConstantOperandVal(1) == 0 &&
+ // Hi is statically extracted from index 0.
+ isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
+ BaseHi.getConstantOperandVal(1) == 1) {
+ N0 = BaseLo.getOperand(0).getOperand(0);
+ N1 = Lo.getOperand(1);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
template <bool IsSigned>
bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
SDValue Addr,
@@ -1656,84 +1669,91 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
if (Subtarget->hasFlatInstOffsets() &&
(!Subtarget->hasFlatSegmentOffsetBug() ||
- findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) &&
- CurDAG->isBaseWithConstantOffset(Addr)) {
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
- uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
-
- const SIInstrInfo *TII = Subtarget->getInstrInfo();
- unsigned AS = findMemSDNode(N)->getAddressSpace();
- if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
- Addr = N0;
- OffsetVal = COffsetVal;
- } else {
- // If the offset doesn't fit, put the low bits into the offset field and
- // add the rest.
-
- SDLoc DL(N);
- uint64_t ImmField;
- const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
- if (IsSigned) {
- ImmField = SignExtend64(COffsetVal, NumBits);
-
- // Don't use a negative offset field if the base offset is positive.
- // Since the scheduler currently relies on the offset field, doing so
- // could result in strange scheduling decisions.
-
- // TODO: Should we not do this in the opposite direction as well?
- if (static_cast<int64_t>(COffsetVal) > 0) {
- if (static_cast<int64_t>(ImmField) < 0) {
- const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits - 1);
- ImmField = COffsetVal & OffsetMask;
+ findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)) {
+ SDValue N0, N1;
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ N0 = Addr.getOperand(0);
+ N1 = Addr.getOperand(1);
+ } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
+ assert(N0 && N1 && isa<ConstantSDNode>(N1));
+ }
+ if (N0 && N1) {
+ uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
+
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+ unsigned AS = findMemSDNode(N)->getAddressSpace();
+ if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
+ Addr = N0;
+ OffsetVal = COffsetVal;
+ } else {
+ // If the offset doesn't fit, put the low bits into the offset field and
+ // add the rest.
+
+ SDLoc DL(N);
+ uint64_t ImmField;
+ const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
+ if (IsSigned) {
+ ImmField = SignExtend64(COffsetVal, NumBits);
+
+ // Don't use a negative offset field if the base offset is positive.
+ // Since the scheduler currently relies on the offset field, doing so
+ // could result in strange scheduling decisions.
+
+ // TODO: Should we not do this in the opposite direction as well?
+ if (static_cast<int64_t>(COffsetVal) > 0) {
+ if (static_cast<int64_t>(ImmField) < 0) {
+ const uint64_t OffsetMask =
+ maskTrailingOnes<uint64_t>(NumBits - 1);
+ ImmField = COffsetVal & OffsetMask;
+ }
}
+ } else {
+ // TODO: Should we do this for a negative offset?
+ const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits);
+ ImmField = COffsetVal & OffsetMask;
}
- } else {
- // TODO: Should we do this for a negative offset?
- const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits);
- ImmField = COffsetVal & OffsetMask;
- }
- uint64_t RemainderOffset = COffsetVal - ImmField;
+ uint64_t RemainderOffset = COffsetVal - ImmField;
- assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
- assert(RemainderOffset + ImmField == COffsetVal);
+ assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
+ assert(RemainderOffset + ImmField == COffsetVal);
- OffsetVal = ImmField;
+ OffsetVal = ImmField;
- // TODO: Should this try to use a scalar add pseudo if the base address is
- // uniform and saddr is usable?
- SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
- SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
+ // TODO: Should this try to use a scalar add pseudo if the base address
+ // is uniform and saddr is usable?
+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
- SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, N0, Sub0);
- SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, N0, Sub1);
+ SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ MVT::i32, N0, Sub0);
+ SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ MVT::i32, N0, Sub1);
- SDValue AddOffsetLo
- = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
- SDValue AddOffsetHi
- = getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
+ SDValue AddOffsetLo =
+ getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
+ SDValue AddOffsetHi =
+ getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
- SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
- SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
+ SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
- SDNode *Add = CurDAG->getMachineNode(
- AMDGPU::V_ADD_I32_e64, DL, VTs,
- {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
+ SDNode *Add =
+ CurDAG->getMachineNode(AMDGPU::V_ADD_I32_e64, DL, VTs,
+ {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
- SDNode *Addc = CurDAG->getMachineNode(
- AMDGPU::V_ADDC_U32_e64, DL, VTs,
- {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
+ SDNode *Addc = CurDAG->getMachineNode(
+ AMDGPU::V_ADDC_U32_e64, DL, VTs,
+ {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
- SDValue RegSequenceArgs[] = {
- CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
- SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1
- };
+ SDValue RegSequenceArgs[] = {
+ CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
+ SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
- Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
- MVT::i64, RegSequenceArgs), 0);
+ Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::i64, RegSequenceArgs),
+ 0);
+ }
}
}
@@ -1761,35 +1781,52 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDValue &Offset, bool &Imm) const {
-
- // FIXME: Handle non-constant offsets.
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
- if (!C)
+ if (!C) {
+ if (ByteOffsetNode.getValueType().isScalarInteger() &&
+ ByteOffsetNode.getValueType().getSizeInBits() == 32) {
+ Offset = ByteOffsetNode;
+ Imm = false;
+ return true;
+ }
+ if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
+ if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
+ Offset = ByteOffsetNode.getOperand(0);
+ Imm = false;
+ return true;
+ }
+ }
return false;
+ }
SDLoc SL(ByteOffsetNode);
- GCNSubtarget::Generation Gen = Subtarget->getGeneration();
+ // GFX9 and GFX10 have signed byte immediate offsets.
int64_t ByteOffset = C->getSExtValue();
- int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
-
- if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
- Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
+ Optional<int64_t> EncodedOffset =
+ AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
+ if (EncodedOffset) {
+ Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
Imm = true;
return true;
}
- if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
+ // SGPR and literal offsets are unsigned.
+ if (ByteOffset < 0)
return false;
- if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
- // 32-bit Immediates are supported on Sea Islands.
- Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
- } else {
- SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
- Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
- C32Bit), 0);
+ EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
+ if (EncodedOffset) {
+ Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
+ return true;
}
- Imm = false;
+
+ if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
+ return false;
+
+ SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
+ Offset = SDValue(
+ CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
+
return true;
}
@@ -1825,14 +1862,21 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
// A 32-bit (address + offset) should not cause unsigned 32-bit integer
// wraparound, because s_load instructions perform the addition in 64 bits.
if ((Addr.getValueType() != MVT::i32 ||
- Addr->getFlags().hasNoUnsignedWrap()) &&
- CurDAG->isBaseWithConstantOffset(Addr)) {
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
-
- if (SelectSMRDOffset(N1, Offset, Imm)) {
- SBase = Expand32BitAddress(N0);
- return true;
+ Addr->getFlags().hasNoUnsignedWrap())) {
+ SDValue N0, N1;
+ // Extract the base and offset if possible.
+ if (CurDAG->isBaseWithConstantOffset(Addr) ||
+ Addr.getOpcode() == ISD::ADD) {
+ N0 = Addr.getOperand(0);
+ N1 = Addr.getOperand(1);
+ } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
+ assert(N0 && N1 && isa<ConstantSDNode>(N1));
+ }
+ if (N0 && N1) {
+ if (SelectSMRDOffset(N1, Offset, Imm)) {
+ SBase = Expand32BitAddress(N0);
+ return true;
+ }
}
}
SBase = Expand32BitAddress(Addr);
@@ -1843,17 +1887,16 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
SDValue &Offset) const {
- bool Imm;
+ bool Imm = false;
return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
}
bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
SDValue &Offset) const {
- if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
- return false;
+ assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
- bool Imm;
+ bool Imm = false;
if (!SelectSMRD(Addr, SBase, Offset, Imm))
return false;
@@ -1862,27 +1905,38 @@ bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
SDValue &Offset) const {
- bool Imm;
+ bool Imm = false;
return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
!isa<ConstantSDNode>(Offset);
}
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
SDValue &Offset) const {
- bool Imm;
- return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
+ // The immediate offset for S_BUFFER instructions is unsigned.
+ if (auto Imm =
+ AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
+ Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
+ return true;
+ }
+ }
+
+ return false;
}
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
SDValue &Offset) const {
- if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
- return false;
+ assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
- bool Imm;
- if (!SelectSMRDOffset(Addr, Offset, Imm))
- return false;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
+ if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
+ C->getZExtValue())) {
+ Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
+ return true;
+ }
+ }
- return !Imm && isa<ConstantSDNode>(Offset);
+ return false;
}
bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
@@ -1898,7 +1952,9 @@ bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
// (add n0, c0)
// Don't peel off the offset (c0) if doing so could possibly lead
// the base (n0) to be negative.
- if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
+ // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
+ if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
+ (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
Base = N0;
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
return true;
@@ -2066,7 +2122,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
- unsigned CondReg = UseSCCBr ? (unsigned)AMDGPU::SCC : TRI->getVCC();
+ Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
SDLoc SL(N);
if (!UseSCCBr) {
@@ -2121,7 +2177,7 @@ void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
- assert((IsFMA || !Mode.FP32Denormals) &&
+ assert((IsFMA || !Mode.allFP32Denormals()) &&
"fmad selected with denormals enabled");
// TODO: We can select this with f32 denormals enabled if all the sources are
// converted from f16 (in which case fmad isn't legal).
@@ -2338,6 +2394,64 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
+void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
+ if (Subtarget->getLDSBankCount() != 16) {
+ // This is a single instruction with a pattern.
+ SelectCode(N);
+ return;
+ }
+
+ SDLoc DL(N);
+
+ // This requires 2 instructions. It is possible to write a pattern to support
+ // this, but the generated isel emitter doesn't correctly deal with multiple
+ // output instructions using the same physical register input. The copy to m0
+ // is incorrectly placed before the second instruction.
+ //
+ // TODO: Match source modifiers.
+ //
+ // def : Pat <
+ // (int_amdgcn_interp_p1_f16
+ // (VOP3Mods f32:$src0, i32:$src0_modifiers),
+ // (i32 timm:$attrchan), (i32 timm:$attr),
+ // (i1 timm:$high), M0),
+ // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
+ // timm:$attrchan, 0,
+ // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
+ // let Predicates = [has16BankLDS];
+ // }
+
+ // 16 bank LDS
+ SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
+ N->getOperand(5), SDValue());
+
+ SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
+
+ SDNode *InterpMov =
+ CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
+ CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
+ N->getOperand(3), // Attr
+ N->getOperand(2), // Attrchan
+ ToM0.getValue(1) // In glue
+ });
+
+ SDNode *InterpP1LV =
+ CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
+ CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
+ N->getOperand(1), // Src0
+ N->getOperand(3), // Attr
+ N->getOperand(2), // Attrchan
+ CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
+ SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
+ N->getOperand(4), // high
+ CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
+ CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
+ SDValue(InterpMov, 1)
+ });
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
+}
+
void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntrID) {
@@ -2366,6 +2480,9 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_wwm:
Opcode = AMDGPU::WWM;
break;
+ case Intrinsic::amdgcn_interp_p1_f16:
+ SelectInterpP1F16(N);
+ return;
default:
SelectCode(N);
return;
@@ -2428,15 +2545,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
return isNoNanSrc(Src);
}
-bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
- SDValue &SrcMods) const {
- if (In.getValueType() == MVT::f32)
- return SelectVOP3Mods(In, Src, SrcMods);
- Src = In;
- SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
- return true;
-}
-
bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
return false;
@@ -2520,17 +2628,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
return true;
}
-bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
- SDValue &SrcMods,
- SDValue &Clamp) const {
- SDLoc SL(In);
-
- // FIXME: Handle clamp and op_sel
- Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
-
- return SelectVOP3PMods(In, Src, SrcMods);
-}
-
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
Src = In;
@@ -2539,34 +2636,12 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
return true;
}
-bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
- SDValue &SrcMods,
- SDValue &Clamp) const {
- SDLoc SL(In);
-
- // FIXME: Handle clamp
- Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
-
- return SelectVOP3OpSel(In, Src, SrcMods);
-}
-
bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
// FIXME: Handle op_sel
return SelectVOP3Mods(In, Src, SrcMods);
}
-bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
- SDValue &SrcMods,
- SDValue &Clamp) const {
- SDLoc SL(In);
-
- // FIXME: Handle clamp
- Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
-
- return SelectVOP3OpSelMods(In, Src, SrcMods);
-}
-
// The return value is not whether the match is possible (which it always is),
// but whether or not it a conversion is really used.
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
@@ -2705,7 +2780,7 @@ bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
(
Subtarget->getScalarizeGlobalBehavior() &&
Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
- !Ld->isVolatile() &&
+ Ld->isSimple() &&
!N->isDivergent() &&
static_cast<const SITargetLowering *>(
getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 23cc9404532d..940ec6f31c69 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -16,7 +16,6 @@
#include "AMDGPU.h"
#include "AMDGPUCallLowering.h"
#include "AMDGPUFrameLowering.h"
-#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -38,6 +37,11 @@ using namespace llvm;
#include "AMDGPUGenCallingConv.inc"
+static cl::opt<bool> AMDGPUBypassSlowDiv(
+ "amdgpu-bypass-slow-div",
+ cl::desc("Skip 64-bit divide for dynamic 32-bit values"),
+ cl::init(true));
+
// Find a larger type to do a load / store of a vector with.
EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
unsigned StoreSize = VT.getStoreSizeInBits();
@@ -103,6 +107,24 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::v2f64, Promote);
AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v4i32);
+ setOperationAction(ISD::LOAD, MVT::v4i64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v4i64, MVT::v8i32);
+
+ setOperationAction(ISD::LOAD, MVT::v4f64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v4f64, MVT::v8i32);
+
+ setOperationAction(ISD::LOAD, MVT::v8i64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v8i64, MVT::v16i32);
+
+ setOperationAction(ISD::LOAD, MVT::v8f64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v8f64, MVT::v16i32);
+
+ setOperationAction(ISD::LOAD, MVT::v16i64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v16i64, MVT::v32i32);
+
+ setOperationAction(ISD::LOAD, MVT::v16f64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v16f64, MVT::v32i32);
+
// There are no 64-bit extloads. These should be done as a 32-bit extload and
// an extension to 64-bit.
for (MVT VT : MVT::integer_valuetypes()) {
@@ -161,11 +183,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand);
setOperationAction(ISD::STORE, MVT::f32, Promote);
AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
@@ -203,6 +227,24 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v2f64, Promote);
AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32);
+ setOperationAction(ISD::STORE, MVT::v4i64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v4i64, MVT::v8i32);
+
+ setOperationAction(ISD::STORE, MVT::v4f64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v4f64, MVT::v8i32);
+
+ setOperationAction(ISD::STORE, MVT::v8i64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v8i64, MVT::v16i32);
+
+ setOperationAction(ISD::STORE, MVT::v8f64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v8f64, MVT::v16i32);
+
+ setOperationAction(ISD::STORE, MVT::v16i64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v16i64, MVT::v32i32);
+
+ setOperationAction(ISD::STORE, MVT::v16f64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v16f64, MVT::v32i32);
+
setTruncStoreAction(MVT::i64, MVT::i1, Expand);
setTruncStoreAction(MVT::i64, MVT::i8, Expand);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
@@ -227,12 +269,21 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand);
+ setTruncStoreAction(MVT::v4i64, MVT::v4i32, Expand);
+ setTruncStoreAction(MVT::v4i64, MVT::v4i16, Expand);
setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand);
setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand);
setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand);
setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand);
+ setTruncStoreAction(MVT::v16f64, MVT::v16f32, Expand);
+ setTruncStoreAction(MVT::v16f64, MVT::v16f16, Expand);
+ setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
+ setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
+ setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand);
+ setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand);
+ setTruncStoreAction(MVT::v16i64, MVT::v16i1, Expand);
setOperationAction(ISD::Constant, MVT::i32, Legal);
setOperationAction(ISD::Constant, MVT::i64, Legal);
@@ -297,6 +348,14 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f64, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i64, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f64, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i64, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f64, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i64, Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
@@ -329,6 +388,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUBE, VT, Legal);
}
+ // The hardware supports 32-bit FSHR, but not FSHL.
+ setOperationAction(ISD::FSHR, MVT::i32, Legal);
+
// The hardware supports 32-bit ROTR, but not ROTL.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
@@ -381,7 +443,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- setOperationAction(ISD::SDIVREM, VT, Custom);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
@@ -483,6 +545,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
MaxStoresPerMemmove = 0xffffffff;
MaxStoresPerMemset = 0xffffffff;
+ // The expansion for 64-bit division is enormous.
+ if (AMDGPUBypassSlowDiv)
+ addBypassSlowDiv(64, 32);
+
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
@@ -609,6 +675,17 @@ bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N,
return true;
}
+EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
+ ISD::NodeType ExtendKind) const {
+ assert(!VT.isVector() && "only scalar expected");
+
+ // Round to the next multiple of 32-bits.
+ unsigned Size = VT.getSizeInBits();
+ if (Size <= 32)
+ return MVT::i32;
+ return EVT::getIntegerVT(Context, 32 * ((Size + 31) / 32));
+}
+
MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
return MVT::i32;
}
@@ -641,8 +718,9 @@ bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
unsigned NewSize = NewVT.getStoreSizeInBits();
- // If we are reducing to a 32-bit load, this is always better.
- if (NewSize == 32)
+ // If we are reducing to a 32-bit load or a smaller multi-dword load,
+ // this is always better.
+ if (NewSize >= 32)
return true;
EVT OldVT = N->getValueType(0);
@@ -733,6 +811,26 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
}
}
+SDValue AMDGPUTargetLowering::getNegatedExpression(
+ SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize,
+ NegatibleCost &Cost, unsigned Depth) const {
+
+ switch (Op.getOpcode()) {
+ case ISD::FMA:
+ case ISD::FMAD: {
+ // Negating a fma is not free if it has users without source mods.
+ if (!allUsesHaveSourceMods(Op.getNode()))
+ return SDValue();
+ break;
+ }
+ default:
+ break;
+ }
+
+ return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations,
+ ForCodeSize, Cost, Depth);
+}
+
//===---------------------------------------------------------------------===//
// Target Properties
//===---------------------------------------------------------------------===//
@@ -912,7 +1010,7 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn);
CallingConv::ID CC = Fn.getCallingConv();
- unsigned MaxAlign = 1;
+ Align MaxAlign = Align(1);
uint64_t ExplicitArgOffset = 0;
const DataLayout &DL = Fn.getParent()->getDataLayout();
@@ -920,12 +1018,12 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
for (const Argument &Arg : Fn.args()) {
Type *BaseArgTy = Arg.getType();
- unsigned Align = DL.getABITypeAlignment(BaseArgTy);
- MaxAlign = std::max(Align, MaxAlign);
+ Align Alignment = DL.getABITypeAlign(BaseArgTy);
+ MaxAlign = std::max(Alignment, MaxAlign);
unsigned AllocSize = DL.getTypeAllocSize(BaseArgTy);
- uint64_t ArgOffset = alignTo(ExplicitArgOffset, Align) + ExplicitOffset;
- ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize;
+ uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;
+ ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize;
// We're basically throwing away everything passed into us and starting over
// to get accurate in-memory offsets. The "PartOffset" is completely useless
@@ -999,6 +1097,8 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
assert(MemVT.getVectorNumElements() == 3 ||
MemVT.getVectorNumElements() == 5);
MemVT = MemVT.getPow2VectorType(State.getContext());
+ } else if (!MemVT.isSimple() && !MemVT.isVector()) {
+ MemVT = MemVT.getRoundIntegerType(State.getContext());
}
unsigned PartOffset = 0;
@@ -1140,7 +1240,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::FROUND: return LowerFROUND(Op, DAG);
case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
case ISD::FLOG:
- return LowerFLOG(Op, DAG, 1.0F / numbers::log2ef);
+ return LowerFLOG(Op, DAG, numbers::ln2f);
case ISD::FLOG10:
return LowerFLOG(Op, DAG, numbers::ln2f / numbers::ln10f);
case ISD::FEXP:
@@ -1196,10 +1296,23 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
if (!MFI->isEntryFunction()) {
+ SDLoc DL(Op);
const Function &Fn = DAG.getMachineFunction().getFunction();
DiagnosticInfoUnsupported BadLDSDecl(
- Fn, "local memory global used by non-kernel function", SDLoc(Op).getDebugLoc());
+ Fn, "local memory global used by non-kernel function",
+ DL.getDebugLoc(), DS_Warning);
DAG.getContext()->diagnose(BadLDSDecl);
+
+ // We currently don't have a way to correctly allocate LDS objects that
+ // aren't directly associated with a kernel. We do force inlining of
+ // functions that use local objects. However, if these dead functions are
+ // not eliminated, we don't want a compile time error. Just emit a warning
+ // and a trap, since there should be no callable path here.
+ SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode());
+ SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ Trap, DAG.getRoot());
+ DAG.setRoot(OutputChain);
+ return DAG.getUNDEF(Op.getValueType());
}
// XXX: What does the value of G->getOffset() mean?
@@ -1208,7 +1321,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
// TODO: We could emit code to handle the initialization somewhere.
if (!hasDefinedInitializer(GV)) {
- unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
+ unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
}
}
@@ -1383,12 +1496,11 @@ AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL,
(HiVT.isVector() ? HiVT.getVectorNumElements() : 1) <=
N.getValueType().getVectorNumElements() &&
"More vector elements requested than available!");
- auto IdxTy = getVectorIdxTy(DAG.getDataLayout());
SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
- DAG.getConstant(0, DL, IdxTy));
+ DAG.getVectorIdxConstant(0, DL));
SDValue Hi = DAG.getNode(
HiVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT, DL,
- HiVT, N, DAG.getConstant(LoVT.getVectorNumElements(), DL, IdxTy));
+ HiVT, N, DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), DL));
return std::make_pair(Lo, Hi);
}
@@ -1433,18 +1545,17 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
HiMemVT, HiAlign, Load->getMemOperand()->getFlags());
- auto IdxTy = getVectorIdxTy(DAG.getDataLayout());
SDValue Join;
if (LoVT == HiVT) {
// This is the case that the vector is power of two so was evenly split.
Join = DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad);
} else {
Join = DAG.getNode(ISD::INSERT_SUBVECTOR, SL, VT, DAG.getUNDEF(VT), LoLoad,
- DAG.getConstant(0, SL, IdxTy));
- Join = DAG.getNode(HiVT.isVector() ? ISD::INSERT_SUBVECTOR
- : ISD::INSERT_VECTOR_ELT,
- SL, VT, Join, HiLoad,
- DAG.getConstant(LoVT.getVectorNumElements(), SL, IdxTy));
+ DAG.getVectorIdxConstant(0, SL));
+ Join = DAG.getNode(
+ HiVT.isVector() ? ISD::INSERT_SUBVECTOR : ISD::INSERT_VECTOR_ELT, SL,
+ VT, Join, HiLoad,
+ DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), SL));
}
SDValue Ops[] = {Join, DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
@@ -1474,7 +1585,7 @@ SDValue AMDGPUTargetLowering::WidenVectorLoad(SDValue Op,
WideMemVT, BaseAlign, Load->getMemOperand()->getFlags());
return DAG.getMergeValues(
{DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, VT, WideLoad,
- DAG.getConstant(0, SL, getVectorIdxTy(DAG.getDataLayout()))),
+ DAG.getVectorIdxConstant(0, SL)),
WideLoad.getValue(1)},
SL);
}
@@ -1588,9 +1699,11 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
// float fr = mad(fqneg, fb, fa);
- unsigned OpCode = MFI->getMode().FP32Denormals ?
- (unsigned)AMDGPUISD::FMAD_FTZ :
- (unsigned)ISD::FMAD;
+ unsigned OpCode = !Subtarget->hasMadMacF32Insts() ?
+ (unsigned)ISD::FMA :
+ !MFI->getMode().allFP32Denormals() ?
+ (unsigned)ISD::FMAD :
+ (unsigned)AMDGPUISD::FMAD_FTZ;
SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
// int iq = (int)fq;
@@ -1673,9 +1786,11 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// Compute denominator reciprocal.
- unsigned FMAD = MFI->getMode().FP32Denormals ?
- (unsigned)AMDGPUISD::FMAD_FTZ :
- (unsigned)ISD::FMAD;
+ unsigned FMAD = !Subtarget->hasMadMacF32Insts() ?
+ (unsigned)ISD::FMA :
+ !MFI->getMode().allFP32Denormals() ?
+ (unsigned)ISD::FMAD :
+ (unsigned)AMDGPUISD::FMAD_FTZ;
SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
@@ -1861,103 +1976,43 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
return Res;
}
- SDValue Num = Op.getOperand(0);
- SDValue Den = Op.getOperand(1);
-
- // RCP = URECIP(Den) = 2^32 / Den + e
- // e is rounding error.
- SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
-
- // RCP_LO = mul(RCP, Den) */
- SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den);
-
- // RCP_HI = mulhu (RCP, Den) */
- SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
-
- // NEG_RCP_LO = -RCP_LO
- SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- RCP_LO);
-
- // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
- SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
- NEG_RCP_LO, RCP_LO,
- ISD::SETEQ);
- // Calculate the rounding error from the URECIP instruction
- // E = mulhu(ABS_RCP_LO, RCP)
- SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
-
- // RCP_A_E = RCP + E
- SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
-
- // RCP_S_E = RCP - E
- SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
-
- // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
- SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, DL, VT),
- RCP_A_E, RCP_S_E,
- ISD::SETEQ);
- // Quotient = mulhu(Tmp0, Num)
- SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
-
- // Num_S_Remainder = Quotient * Den
- SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den);
-
- // Remainder = Num - Num_S_Remainder
- SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
-
- // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
- SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
- DAG.getConstant(-1, DL, VT),
- DAG.getConstant(0, DL, VT),
- ISD::SETUGE);
- // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
- SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
- Num_S_Remainder,
- DAG.getConstant(-1, DL, VT),
- DAG.getConstant(0, DL, VT),
- ISD::SETUGE);
- // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
- SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
- Remainder_GE_Zero);
-
- // Calculate Division result:
-
- // Quotient_A_One = Quotient + 1
- SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
- DAG.getConstant(1, DL, VT));
-
- // Quotient_S_One = Quotient - 1
- SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
- DAG.getConstant(1, DL, VT));
-
- // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
- SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
- Quotient, Quotient_A_One, ISD::SETEQ);
-
- // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
- Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
- Quotient_S_One, Div, ISD::SETEQ);
-
- // Calculate Rem result:
-
- // Remainder_S_Den = Remainder - Den
- SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
-
- // Remainder_A_Den = Remainder + Den
- SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
-
- // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
- SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, DL, VT),
- Remainder, Remainder_S_Den, ISD::SETEQ);
-
- // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
- Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, DL, VT),
- Remainder_A_Den, Rem, ISD::SETEQ);
- SDValue Ops[2] = {
- Div,
- Rem
- };
- return DAG.getMergeValues(Ops, DL);
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+
+ // See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the
+ // algorithm used here.
+
+ // Initial estimate of inv(y).
+ SDValue Z = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Y);
+
+ // One round of UNR.
+ SDValue NegY = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Y);
+ SDValue NegYZ = DAG.getNode(ISD::MUL, DL, VT, NegY, Z);
+ Z = DAG.getNode(ISD::ADD, DL, VT, Z,
+ DAG.getNode(ISD::MULHU, DL, VT, Z, NegYZ));
+
+ // Quotient/remainder estimate.
+ SDValue Q = DAG.getNode(ISD::MULHU, DL, VT, X, Z);
+ SDValue R =
+ DAG.getNode(ISD::SUB, DL, VT, X, DAG.getNode(ISD::MUL, DL, VT, Q, Y));
+
+ // First quotient/remainder refinement.
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE);
+ Q = DAG.getNode(ISD::SELECT, DL, VT, Cond,
+ DAG.getNode(ISD::ADD, DL, VT, Q, One), Q);
+ R = DAG.getNode(ISD::SELECT, DL, VT, Cond,
+ DAG.getNode(ISD::SUB, DL, VT, R, Y), R);
+
+ // Second quotient/remainder refinement.
+ Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE);
+ Q = DAG.getNode(ISD::SELECT, DL, VT, Cond,
+ DAG.getNode(ISD::ADD, DL, VT, Q, One), Q);
+ R = DAG.getNode(ISD::SELECT, DL, VT, Cond,
+ DAG.getNode(ISD::SUB, DL, VT, R, Y), R);
+
+ return DAG.getMergeValues({Q, R}, DL);
}
SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
@@ -2164,8 +2219,7 @@ SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) con
// Don't handle v2f16. The extra instructions to scalarize and repack around the
// compare and vselect end up producing worse code than scalarizing the whole
// operation.
-SDValue AMDGPUTargetLowering::LowerFROUND_LegalFTRUNC(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue X = Op.getOperand(0);
EVT VT = Op.getValueType();
@@ -2194,75 +2248,6 @@ SDValue AMDGPUTargetLowering::LowerFROUND_LegalFTRUNC(SDValue Op,
return DAG.getNode(ISD::FADD, SL, VT, T, Sel);
}
-SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const {
- SDLoc SL(Op);
- SDValue X = Op.getOperand(0);
-
- SDValue L = DAG.getNode(ISD::BITCAST, SL, MVT::i64, X);
-
- const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- const SDValue One = DAG.getConstant(1, SL, MVT::i32);
- const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32);
- const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32);
- EVT SetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
-
- SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
-
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC, One);
-
- SDValue Exp = extractF64Exponent(Hi, SL, DAG);
-
- const SDValue Mask = DAG.getConstant(INT64_C(0x000fffffffffffff), SL,
- MVT::i64);
-
- SDValue M = DAG.getNode(ISD::SRA, SL, MVT::i64, Mask, Exp);
- SDValue D = DAG.getNode(ISD::SRA, SL, MVT::i64,
- DAG.getConstant(INT64_C(0x0008000000000000), SL,
- MVT::i64),
- Exp);
-
- SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, L, M);
- SDValue Tmp1 = DAG.getSetCC(SL, SetCCVT,
- DAG.getConstant(0, SL, MVT::i64), Tmp0,
- ISD::SETNE);
-
- SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, Tmp1,
- D, DAG.getConstant(0, SL, MVT::i64));
- SDValue K = DAG.getNode(ISD::ADD, SL, MVT::i64, L, Tmp2);
-
- K = DAG.getNode(ISD::AND, SL, MVT::i64, K, DAG.getNOT(SL, M, MVT::i64));
- K = DAG.getNode(ISD::BITCAST, SL, MVT::f64, K);
-
- SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
- SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
- SDValue ExpEqNegOne = DAG.getSetCC(SL, SetCCVT, NegOne, Exp, ISD::SETEQ);
-
- SDValue Mag = DAG.getNode(ISD::SELECT, SL, MVT::f64,
- ExpEqNegOne,
- DAG.getConstantFP(1.0, SL, MVT::f64),
- DAG.getConstantFP(0.0, SL, MVT::f64));
-
- SDValue S = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, Mag, X);
-
- K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpLt0, S, K);
- K = DAG.getNode(ISD::SELECT, SL, MVT::f64, ExpGt51, X, K);
-
- return K;
-}
-
-SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
-
- if (isOperationLegal(ISD::FTRUNC, VT))
- return LowerFROUND_LegalFTRUNC(Op, DAG);
-
- if (VT == MVT::f64)
- return LowerFROUND64(Op, DAG);
-
- llvm_unreachable("unhandled type");
-}
-
SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
@@ -2793,6 +2778,7 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) {
static SDValue simplifyI24(SDNode *Node24,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0);
@@ -2806,11 +2792,11 @@ static SDValue simplifyI24(SDNode *Node24,
APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
- // First try to simplify using GetDemandedBits which allows the operands to
- // have other uses, but will only perform simplifications that involve
- // bypassing some nodes for this user.
- SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded);
- SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded);
+ // First try to simplify using SimplifyMultipleUseDemandedBits which allows
+ // the operands to have other uses, but will only perform simplifications that
+ // involve bypassing some nodes for this user.
+ SDValue DemandedLHS = TLI.SimplifyMultipleUseDemandedBits(LHS, Demanded, DAG);
+ SDValue DemandedRHS = TLI.SimplifyMultipleUseDemandedBits(RHS, Demanded, DAG);
if (DemandedLHS || DemandedRHS)
return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(),
DemandedLHS ? DemandedLHS : LHS,
@@ -2818,7 +2804,6 @@ static SDValue simplifyI24(SDNode *Node24,
// Now try SimplifyDemandedBits which can simplify the nodes used by our
// operands if this node is the only user.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI))
return SDValue(Node24, 0);
if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI))
@@ -2877,7 +2862,7 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
return SDValue();
LoadSDNode *LN = cast<LoadSDNode>(N);
- if (LN->isVolatile() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN))
+ if (!LN->isSimple() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN))
return SDValue();
SDLoc SL(N);
@@ -2885,16 +2870,17 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
EVT VT = LN->getMemoryVT();
unsigned Size = VT.getStoreSize();
- unsigned Align = LN->getAlignment();
- if (Align < Size && isTypeLegal(VT)) {
+ Align Alignment = LN->getAlign();
+ if (Alignment < Size && isTypeLegal(VT)) {
bool IsFast;
unsigned AS = LN->getAddressSpace();
// Expand unaligned loads earlier than legalization. Due to visitation order
// problems during legalization, the emitted instructions to pack and unpack
// the bytes again are not eliminated in the case of an unaligned copy.
- if (!allowsMisalignedMemoryAccesses(
- VT, AS, Align, LN->getMemOperand()->getFlags(), &IsFast)) {
+ if (!allowsMisalignedMemoryAccesses(VT, AS, Alignment.value(),
+ LN->getMemOperand()->getFlags(),
+ &IsFast)) {
SDValue Ops[2];
if (VT.isVector())
@@ -2931,7 +2917,7 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
return SDValue();
StoreSDNode *SN = cast<StoreSDNode>(N);
- if (SN->isVolatile() || !ISD::isNormalStore(SN))
+ if (!SN->isSimple() || !ISD::isNormalStore(SN))
return SDValue();
EVT VT = SN->getMemoryVT();
@@ -2939,8 +2925,8 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
SDLoc SL(N);
SelectionDAG &DAG = DCI.DAG;
- unsigned Align = SN->getAlignment();
- if (Align < Size && isTypeLegal(VT)) {
+ Align Alignment = SN->getAlign();
+ if (Alignment < Size && isTypeLegal(VT)) {
bool IsFast;
unsigned AS = SN->getAddressSpace();
@@ -2948,8 +2934,9 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
// order problems during legalization, the emitted instructions to pack and
// unpack the bytes again are not eliminated in the case of an unaligned
// copy.
- if (!allowsMisalignedMemoryAccesses(
- VT, AS, Align, SN->getMemOperand()->getFlags(), &IsFast)) {
+ if (!allowsMisalignedMemoryAccesses(VT, AS, Alignment.value(),
+ SN->getMemOperand()->getFlags(),
+ &IsFast)) {
if (VT.isVector())
return scalarizeVectorStore(SN, DAG);
@@ -3012,6 +2999,16 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
case Intrinsic::amdgcn_mul_i24:
case Intrinsic::amdgcn_mul_u24:
return simplifyI24(N, DCI);
+ case Intrinsic::amdgcn_fract:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_rsq_legacy:
+ case Intrinsic::amdgcn_rsq_clamp:
+ case Intrinsic::amdgcn_ldexp: {
+ // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
+ SDValue Src = N->getOperand(1);
+ return Src.isUndef() ? Src : SDValue();
+ }
default:
return SDValue();
}
@@ -3465,24 +3462,24 @@ SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue C
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
SDValue CmpLHS = Cond.getOperand(0);
- unsigned Opc = isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 :
- AMDGPUISD::FFBH_U32;
-
// select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
// select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x
if (CCOpcode == ISD::SETEQ &&
(isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
- RHS.getOperand(0) == CmpLHS &&
- isNegativeOne(LHS)) {
+ RHS.getOperand(0) == CmpLHS && isNegativeOne(LHS)) {
+ unsigned Opc =
+ isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
return getFFBX_U32(DAG, CmpLHS, SL, Opc);
}
// select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
// select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x
if (CCOpcode == ISD::SETNE &&
- (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
- LHS.getOperand(0) == CmpLHS &&
- isNegativeOne(RHS)) {
+ (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) &&
+ LHS.getOperand(0) == CmpLHS && isNegativeOne(RHS)) {
+ unsigned Opc =
+ isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
+
return getFFBX_U32(DAG, CmpLHS, SL, Opc);
}
@@ -4117,12 +4114,12 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
- unsigned Reg, EVT VT,
+ Register Reg, EVT VT,
const SDLoc &SL,
bool RawReg) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned VReg;
+ Register VReg;
if (!MRI.isLiveIn(Reg)) {
VReg = MRI.createVirtualRegister(RC);
@@ -4266,11 +4263,9 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(DIV_FMAS)
NODE_NAME_CASE(DIV_FIXUP)
NODE_NAME_CASE(FMAD_FTZ)
- NODE_NAME_CASE(TRIG_PREOP)
NODE_NAME_CASE(RCP)
NODE_NAME_CASE(RSQ)
NODE_NAME_CASE(RCP_LEGACY)
- NODE_NAME_CASE(RSQ_LEGACY)
NODE_NAME_CASE(RCP_IFLAG)
NODE_NAME_CASE(FMUL_LEGACY)
NODE_NAME_CASE(RSQ_CLAMP)
@@ -4298,8 +4293,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MAD_U64_U32)
NODE_NAME_CASE(PERM)
NODE_NAME_CASE(TEXTURE_FETCH)
- NODE_NAME_CASE(EXPORT)
- NODE_NAME_CASE(EXPORT_DONE)
NODE_NAME_CASE(R600_EXPORT)
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)
@@ -4323,12 +4316,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
NODE_NAME_CASE(LDS)
- NODE_NAME_CASE(KILL)
NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
- NODE_NAME_CASE(INTERP_P1LL_F16)
- NODE_NAME_CASE(INTERP_P1LV_F16)
- NODE_NAME_CASE(INTERP_P2_F16)
NODE_NAME_CASE(LOAD_D16_HI)
NODE_NAME_CASE(LOAD_D16_LO)
NODE_NAME_CASE(LOAD_D16_HI_I8)
@@ -4347,6 +4336,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ATOMIC_DEC)
NODE_NAME_CASE(ATOMIC_LOAD_FMIN)
NODE_NAME_CASE(ATOMIC_LOAD_FMAX)
+ NODE_NAME_CASE(ATOMIC_LOAD_CSUB)
NODE_NAME_CASE(BUFFER_LOAD)
NODE_NAME_CASE(BUFFER_LOAD_UBYTE)
NODE_NAME_CASE(BUFFER_LOAD_USHORT)
@@ -4373,6 +4363,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUFFER_ATOMIC_INC)
NODE_NAME_CASE(BUFFER_ATOMIC_DEC)
NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
+ NODE_NAME_CASE(BUFFER_ATOMIC_CSUB)
NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
NODE_NAME_CASE(BUFFER_ATOMIC_PK_FADD)
NODE_NAME_CASE(ATOMIC_PK_FADD)
@@ -4539,11 +4530,10 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
}
case AMDGPUISD::LDS: {
auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode());
- unsigned Align = GA->getGlobal()->getAlignment();
+ Align Alignment = GA->getGlobal()->getPointerAlignment(DAG.getDataLayout());
Known.Zero.setHighBits(16);
- if (Align)
- Known.Zero.setLowBits(Log2_32(Align));
+ Known.Zero.setLowBits(Log2(Alignment));
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
@@ -4607,6 +4597,29 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
}
}
+unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
+ GISelKnownBits &Analysis, Register R,
+ const APInt &DemandedElts, const MachineRegisterInfo &MRI,
+ unsigned Depth) const {
+ const MachineInstr *MI = MRI.getVRegDef(R);
+ if (!MI)
+ return 1;
+
+ // TODO: Check range metadata on MMO.
+ switch (MI->getOpcode()) {
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
+ return 25;
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
+ return 17;
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
+ return 24;
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
+ return 16;
+ default:
+ return 1;
+ }
+}
+
bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN,
@@ -4648,7 +4661,6 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
case AMDGPUISD::RCP:
case AMDGPUISD::RSQ:
case AMDGPUISD::RCP_LEGACY:
- case AMDGPUISD::RSQ_LEGACY:
case AMDGPUISD::RSQ_CLAMP: {
if (SNaN)
return true;
@@ -4665,7 +4677,6 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
case AMDGPUISD::DIV_SCALE:
case AMDGPUISD::DIV_FMAS:
case AMDGPUISD::DIV_FIXUP:
- case AMDGPUISD::TRIG_PREOP:
// TODO: Refine on operands.
return SNaN;
case AMDGPUISD::SIN_HW:
@@ -4692,6 +4703,18 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
}
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_rsq_legacy:
+ case Intrinsic::amdgcn_rsq_clamp: {
+ if (SNaN)
+ return true;
+
+ // TODO: Need is known positive check.
+ return false;
+ }
+ case Intrinsic::amdgcn_trig_preop:
case Intrinsic::amdgcn_fdot2:
// TODO: Refine on operand
return SNaN;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index a90b7f5653dc..85f23c81db17 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -18,6 +18,7 @@
#include "AMDGPU.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -52,8 +53,6 @@ protected:
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFROUND_LegalFTRUNC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLOG(SDValue Op, SelectionDAG &DAG,
@@ -172,8 +171,16 @@ public:
bool isZExtFree(EVT Src, EVT Dest) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, bool ForCodeSize,
+ NegatibleCost &Cost,
+ unsigned Depth) const override;
+
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
+ EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
+ ISD::NodeType ExtendKind) const override;
+
MVT getVectorIdxTy(const DataLayout &) const override;
bool isSelectSupported(SelectSupportKind) const override;
@@ -264,6 +271,12 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
+ unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis,
+ Register R,
+ const APInt &DemandedElts,
+ const MachineRegisterInfo &MRI,
+ unsigned Depth = 0) const override;
+
bool isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN = false,
@@ -276,19 +289,19 @@ public:
/// a copy from the register.
SDValue CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
- unsigned Reg, EVT VT,
+ Register Reg, EVT VT,
const SDLoc &SL,
bool RawReg = false) const;
SDValue CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const {
+ Register Reg, EVT VT) const {
return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()));
}
// Returns the raw live in register rather than a copy from it.
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG,
const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const {
+ Register Reg, EVT VT) const {
return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true);
}
@@ -398,14 +411,12 @@ enum NodeType : unsigned {
// For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
// treated as an illegal operation.
FMAD_FTZ,
- TRIG_PREOP, // 1 ULP max error for f64
// RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
// For f64, max error 2^29 ULP, handles denormals.
RCP,
RSQ,
RCP_LEGACY,
- RSQ_LEGACY,
RCP_IFLAG,
FMUL_LEGACY,
RSQ_CLAMP,
@@ -433,8 +444,6 @@ enum NodeType : unsigned {
MUL_LOHI_U24,
PERM,
TEXTURE_FETCH,
- EXPORT, // exp on SI+
- EXPORT_DONE, // exp on SI+ with done bit set
R600_EXPORT,
CONST_ADDRESS,
REGISTER_LOAD,
@@ -476,12 +485,8 @@ enum NodeType : unsigned {
BUILD_VERTICAL_VECTOR,
/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,
- INTERP_P1LL_F16,
- INTERP_P1LV_F16,
- INTERP_P2_F16,
PC_ADD_REL_OFFSET,
LDS,
- KILL,
DUMMY_CHAIN,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
LOAD_D16_HI,
@@ -503,6 +508,7 @@ enum NodeType : unsigned {
ATOMIC_DEC,
ATOMIC_LOAD_FMIN,
ATOMIC_LOAD_FMAX,
+ ATOMIC_LOAD_CSUB,
BUFFER_LOAD,
BUFFER_LOAD_UBYTE,
BUFFER_LOAD_USHORT,
@@ -529,6 +535,7 @@ enum NodeType : unsigned {
BUFFER_ATOMIC_INC,
BUFFER_ATOMIC_DEC,
BUFFER_ATOMIC_CMPSWAP,
+ BUFFER_ATOMIC_CSUB,
BUFFER_ATOMIC_FADD,
BUFFER_ATOMIC_PK_FADD,
ATOMIC_PK_FADD,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
index 64d761997b0c..3b5d91133a2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -23,7 +23,6 @@
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -67,9 +66,9 @@ public:
static char ID; // Pass identification, replacement for typeid
- unsigned getInlineThreshold(CallSite CS) const;
+ unsigned getInlineThreshold(CallBase &CB) const;
- InlineCost getInlineCost(CallSite CS) override;
+ InlineCost getInlineCost(CallBase &CB) override;
bool runOnSCC(CallGraphSCC &SCC) override;
@@ -106,13 +105,13 @@ void AMDGPUInliner::getAnalysisUsage(AnalysisUsage &AU) const {
LegacyInlinerBase::getAnalysisUsage(AU);
}
-unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
+unsigned AMDGPUInliner::getInlineThreshold(CallBase &CB) const {
int Thres = Params.DefaultThreshold;
- Function *Caller = CS.getCaller();
+ Function *Caller = CB.getCaller();
// Listen to the inlinehint attribute when it would increase the threshold
// and the caller does not need to minimize its size.
- Function *Callee = CS.getCalledFunction();
+ Function *Callee = CB.getCalledFunction();
bool InlineHint = Callee && !Callee->isDeclaration() &&
Callee->hasFnAttribute(Attribute::InlineHint);
if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres
@@ -129,7 +128,7 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
// Increase the inline threshold to allow inliniting in this case.
uint64_t AllocaSize = 0;
SmallPtrSet<const AllocaInst *, 8> AIVisited;
- for (Value *PtrArg : CS.args()) {
+ for (Value *PtrArg : CB.args()) {
PointerType *Ty = dyn_cast<PointerType>(PtrArg->getType());
if (!Ty || (Ty->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS &&
Ty->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS))
@@ -156,8 +155,8 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
// Check if call is just a wrapper around another call.
// In this case we only have call and ret instructions.
-static bool isWrapperOnlyCall(CallSite CS) {
- Function *Callee = CS.getCalledFunction();
+static bool isWrapperOnlyCall(CallBase &CB) {
+ Function *Callee = CB.getCalledFunction();
if (!Callee || Callee->size() != 1)
return false;
const BasicBlock &BB = Callee->getEntryBlock();
@@ -174,32 +173,32 @@ static bool isWrapperOnlyCall(CallSite CS) {
return false;
}
-InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
- Function *Callee = CS.getCalledFunction();
- Function *Caller = CS.getCaller();
+InlineCost AMDGPUInliner::getInlineCost(CallBase &CB) {
+ Function *Callee = CB.getCalledFunction();
+ Function *Caller = CB.getCaller();
if (!Callee || Callee->isDeclaration())
return llvm::InlineCost::getNever("undefined callee");
- if (CS.isNoInline())
+ if (CB.isNoInline())
return llvm::InlineCost::getNever("noinline");
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
if (!TTI.areInlineCompatible(Caller, Callee))
return llvm::InlineCost::getNever("incompatible");
- if (CS.hasFnAttr(Attribute::AlwaysInline)) {
+ if (CB.hasFnAttr(Attribute::AlwaysInline)) {
auto IsViable = isInlineViable(*Callee);
- if (IsViable)
+ if (IsViable.isSuccess())
return llvm::InlineCost::getAlways("alwaysinline viable");
- return llvm::InlineCost::getNever(IsViable.message);
+ return llvm::InlineCost::getNever(IsViable.getFailureReason());
}
- if (isWrapperOnlyCall(CS))
+ if (isWrapperOnlyCall(CB))
return llvm::InlineCost::getAlways("wrapper-only call");
InlineParams LocalParams = Params;
- LocalParams.DefaultThreshold = (int)getInlineThreshold(CS);
+ LocalParams.DefaultThreshold = (int)getInlineThreshold(CB);
bool RemarksEnabled = false;
const auto &BBs = Caller->getBasicBlockList();
if (!BBs.empty()) {
@@ -209,14 +208,13 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
}
OptimizationRemarkEmitter ORE(Caller);
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [this](Function &F) -> AssumptionCache & {
+ auto GetAssumptionCache = [this](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
- auto IC = llvm::getInlineCost(cast<CallBase>(*CS.getInstruction()), Callee,
- LocalParams, TTI, GetAssumptionCache, None, PSI,
- RemarksEnabled ? &ORE : nullptr);
+ auto IC = llvm::getInlineCost(CB, Callee, LocalParams, TTI,
+ GetAssumptionCache, GetTLI, nullptr, PSI,
+ RemarksEnabled ? &ORE : nullptr);
if (IC && !IC.isAlways() && !Callee->hasFnAttribute(Attribute::InlineHint)) {
// Single BB does not increase total BB amount, thus subtract 1
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 9951cbf2326e..6c13bc8599db 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUInstrInfo.h"
-#include "AMDGPURegisterInfo.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 698189e14c21..61b78acad3f4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -55,6 +55,9 @@ struct ImageDimIntrinsicInfo {
};
const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
+const ImageDimIntrinsicInfo *getImageDimInstrinsicByBaseOpcode(unsigned BaseOpcode,
+ unsigned Dim);
+
} // end AMDGPU namespace
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 50c451be4b86..894677ec68b6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains DAG node defintions for the AMDGPU target.
+// This file contains DAG node definitions for the AMDGPU target.
//
//===----------------------------------------------------------------------===//
@@ -18,10 +18,6 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
]>;
-def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
->;
-
def AMDGPULdExpOp : SDTypeProfile<1, 2,
[SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
>;
@@ -121,8 +117,6 @@ def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
// out = 1.0 / sqrt(a)
def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
-// out = 1.0 / sqrt(a)
-def AMDGPUrsq_legacy_impl : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
def AMDGPUrcp_legacy_impl : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>;
def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>;
@@ -151,7 +145,7 @@ def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,
[]
>;
-def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp,
+def AMDGPUfmul_legacy_impl : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]
>;
@@ -204,13 +198,6 @@ def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
-def AMDGPUSetRegOp : SDTypeProfile<0, 2, [
- SDTCisInt<0>, SDTCisInt<1>
-]>;
-
-def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [
- SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
-
def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
@@ -238,7 +225,7 @@ def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
// Special case divide FMA with scale and flags (src0 = Quotient,
// src1 = Denominator, src2 = Numerator).
-def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
+def AMDGPUdiv_fmas_impl : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
[SDNPOptInGlue]>;
// Single or double precision division fixup.
@@ -248,9 +235,6 @@ def AMDGPUdiv_fixup_impl : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
def AMDGPUfmad_ftz_impl : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
-// Look Up 2.0 / pi src0 with segment select src1[4:0]
-def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
-
def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
[SDNPHasChain, SDNPMayLoad]>;
@@ -278,18 +262,18 @@ def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP",
def AMDGPUround : SDNode<"ISD::FROUND",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
-def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
-def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
+def AMDGPUbfe_u32_impl : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
+def AMDGPUbfe_i32_impl : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
-def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>;
-def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>;
+def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>;
+def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>;
-def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>;
+def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>;
// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
-// when performing the mulitply. The result is a 32-bit value.
+// when performing the multiply. The result is a 32-bit value.
def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
@@ -321,7 +305,7 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp,
def AMDGPUfmed3_impl : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;
-def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
+def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2",
SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>,
SDTCisFP<0>, SDTCisVec<1>,
SDTCisInt<4>]>,
@@ -329,21 +313,6 @@ def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
-def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16",
- SDTypeProfile<1, 7, [SDTCisFP<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-
-def AMDGPUinterp_p1lv_f16 : SDNode<"AMDGPUISD::INTERP_P1LV_F16",
- SDTypeProfile<1, 9, [SDTCisFP<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-
-def AMDGPUinterp_p2_f16 : SDNode<"AMDGPUISD::INTERP_P2_F16",
- SDTypeProfile<1, 8, [SDTCisFP<0>]>,
- [SDNPInGlue]>;
-
-def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT,
- [SDNPHasChain, SDNPSideEffect]>;
-
// SI+ export
def AMDGPUExportOp : SDTypeProfile<0, 8, [
SDTCisInt<0>, // i8 tgt
@@ -358,12 +327,6 @@ def AMDGPUExportOp : SDTypeProfile<0, 8, [
]>;
-def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp,
- [SDNPHasChain, SDNPMayStore]>;
-
-def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp,
- [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
-
def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
@@ -398,7 +361,7 @@ def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPt
//===----------------------------------------------------------------------===//
-// Intrinsic/Custom node compatability PatFrags
+// Intrinsic/Custom node compatibility PatFrags
//===----------------------------------------------------------------------===//
def AMDGPUrcp : PatFrags<(ops node:$src), [(int_amdgcn_rcp node:$src),
@@ -406,9 +369,6 @@ def AMDGPUrcp : PatFrags<(ops node:$src), [(int_amdgcn_rcp node:$src),
def AMDGPUrcp_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rcp_legacy node:$src),
(AMDGPUrcp_legacy_impl node:$src)]>;
-def AMDGPUrsq_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rsq_legacy node:$src),
- (AMDGPUrsq_legacy_impl node:$src)]>;
-
def AMDGPUrsq : PatFrags<(ops node:$src), [(int_amdgcn_rsq node:$src),
(AMDGPUrsq_impl node:$src)]>;
@@ -442,6 +402,14 @@ def AMDGPUffbh_i32 : PatFrags<(ops node:$src),
[(int_amdgcn_sffbh node:$src),
(AMDGPUffbh_i32_impl node:$src)]>;
+def AMDGPUffbh_u32 : PatFrags<(ops node:$src),
+ [(ctlz_zero_undef node:$src),
+ (AMDGPUffbh_u32_impl node:$src)]>;
+
+def AMDGPUffbl_b32 : PatFrags<(ops node:$src),
+ [(cttz_zero_undef node:$src),
+ (AMDGPUffbl_b32_impl node:$src)]>;
+
def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_cvt_pkrtz node:$src0, node:$src1),
(AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>;
@@ -473,3 +441,23 @@ def AMDGPUmul_u24 : PatFrags<(ops node:$src0, node:$src1),
def AMDGPUmul_i24 : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_mul_i24 node:$src0, node:$src1),
(AMDGPUmul_i24_impl node:$src0, node:$src1)]>;
+
+def AMDGPUbfe_i32 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(int_amdgcn_sbfe node:$src0, node:$src1, node:$src2),
+ (AMDGPUbfe_i32_impl node:$src0, node:$src1, node:$src2)]>;
+
+def AMDGPUbfe_u32 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(int_amdgcn_ubfe node:$src0, node:$src1, node:$src2),
+ (AMDGPUbfe_u32_impl node:$src0, node:$src1, node:$src2)]>;
+
+def AMDGPUfmul_legacy : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_fmul_legacy node:$src0, node:$src1),
+ (AMDGPUfmul_legacy_impl node:$src0, node:$src1)]>;
+
+def AMDGPUfdot2 : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$clamp),
+ [(int_amdgcn_fdot2 node:$src0, node:$src1, node:$src2, node:$clamp),
+ (AMDGPUfdot2_impl node:$src0, node:$src1, node:$src2, node:$clamp)]>;
+
+def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$vcc),
+ [(int_amdgcn_div_fmas node:$src0, node:$src1, node:$src2, node:$vcc),
+ (AMDGPUdiv_fmas_impl node:$src0, node:$src1, node:$src2, node:$vcc)]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index c0ea35817ec8..2025c0fa5d21 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -15,7 +15,6 @@
#include "AMDGPUInstrInfo.h"
#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPURegisterBankInfo.h"
-#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -40,6 +39,12 @@
using namespace llvm;
using namespace MIPatternMatch;
+static cl::opt<bool> AllowRiskySelect(
+ "amdgpu-global-isel-risky-select",
+ cl::desc("Allow GlobalISel to select cases that are likely to not work yet"),
+ cl::init(false),
+ cl::ReallyHidden);
+
#define GET_GLOBALISEL_IMPL
#define AMDGPUSubtarget GCNSubtarget
#include "AMDGPUGenGlobalISel.inc"
@@ -88,6 +93,30 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg,
return RB->getID() == AMDGPU::VCCRegBankID;
}
+bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI,
+ unsigned NewOpc) const {
+ MI.setDesc(TII.get(NewOpc));
+ MI.RemoveOperand(1); // Remove intrinsic ID.
+ MI.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+
+ MachineOperand &Dst = MI.getOperand(0);
+ MachineOperand &Src = MI.getOperand(1);
+
+ // TODO: This should be legalized to s32 if needed
+ if (MRI->getType(Dst.getReg()) == LLT::scalar(1))
+ return false;
+
+ const TargetRegisterClass *DstRC
+ = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
+ const TargetRegisterClass *SrcRC
+ = TRI.getConstrainedRegClassForOperand(Src, *MRI);
+ if (!DstRC || DstRC != SrcRC)
+ return false;
+
+ return RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) &&
+ RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
+}
+
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
const DebugLoc &DL = I.getDebugLoc();
MachineBasicBlock *BB = I.getParent();
@@ -173,6 +202,14 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI->getType(DefReg);
+ if (DefTy == LLT::scalar(1)) {
+ if (!AllowRiskySelect) {
+ LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Selecting risky boolean phi\n");
+ }
// TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
@@ -261,6 +298,11 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
RC == &AMDGPU::SReg_64RegClass);
I.setDesc(TII.get(InstOpc));
+ // Dead implicit-def of scc
+ I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
+ true, // isImp
+ false, // isKill
+ true)); // isDead
// FIXME: Hack to avoid turning the register bank into a register class.
// The selector for G_ICMP relies on seeing the register bank for the result
@@ -295,7 +337,11 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
MachineFunction *MF = BB->getParent();
Register DstReg = I.getOperand(0).getReg();
const DebugLoc &DL = I.getDebugLoc();
- unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
+ LLT Ty = MRI->getType(DstReg);
+ if (Ty.isVector())
+ return false;
+
+ unsigned Size = Ty.getSizeInBits();
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
@@ -445,6 +491,7 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
return true;
}
+// TODO: We should probably legalize these to only using 32-bit results.
bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
Register DstReg = I.getOperand(0).getReg();
@@ -452,11 +499,21 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
LLT DstTy = MRI->getType(DstReg);
LLT SrcTy = MRI->getType(SrcReg);
const unsigned SrcSize = SrcTy.getSizeInBits();
- const unsigned DstSize = DstTy.getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
// TODO: Should handle any multiple of 32 offset.
unsigned Offset = I.getOperand(2).getImm();
- if (Offset % DstSize != 0)
+ if (Offset % 32 != 0 || DstSize > 128)
+ return false;
+
+ // 16-bit operations really use 32-bit registers.
+ // FIXME: Probably should not allow 16-bit G_EXTRACT results.
+ if (DstSize == 16)
+ DstSize = 32;
+
+ const TargetRegisterClass *DstRC =
+ TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI);
+ if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
return false;
const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
@@ -464,20 +521,18 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI);
if (!SrcRC)
return false;
+ unsigned SubReg = SIRegisterInfo::getSubRegFromChannel(Offset / 32,
+ DstSize / 32);
+ SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
+ if (!SrcRC)
+ return false;
- ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
-
+ SrcReg = constrainOperandRegClass(*MF, TRI, *MRI, TII, RBI, I,
+ *SrcRC, I.getOperand(1));
const DebugLoc &DL = I.getDebugLoc();
- MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)
- .addReg(SrcReg, 0, SubRegs[Offset / DstSize]);
+ BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)
+ .addReg(SrcReg, 0, SubReg);
- for (const MachineOperand &MO : Copy->operands()) {
- const TargetRegisterClass *RC =
- TRI.getConstrainedRegClassForOperand(MO, *MRI);
- if (!RC)
- continue;
- RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
- }
I.eraseFromParent();
return true;
}
@@ -563,6 +618,90 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
return true;
}
+static bool isZero(Register Reg, const MachineRegisterInfo &MRI) {
+ int64_t Val;
+ return mi_match(Reg, MRI, m_ICst(Val)) && Val == 0;
+}
+
+bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
+ MachineInstr &MI) const {
+ if (selectImpl(MI, *CoverageInfo))
+ return true;
+
+ const LLT S32 = LLT::scalar(32);
+ const LLT V2S16 = LLT::vector(2, 16);
+
+ Register Dst = MI.getOperand(0).getReg();
+ if (MRI->getType(Dst) != V2S16)
+ return false;
+
+ const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
+ if (DstBank->getID() != AMDGPU::SGPRRegBankID)
+ return false;
+
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+ if (MRI->getType(Src0) != S32)
+ return false;
+
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock *BB = MI.getParent();
+
+ // TODO: This should probably be a combine somewhere
+ // (build_vector_trunc $src0, undef -> copy $src0
+ MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI);
+ if (Src1Def && Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
+ MI.setDesc(TII.get(AMDGPU::COPY));
+ MI.RemoveOperand(2);
+ return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI) &&
+ RBI.constrainGenericRegister(Src0, AMDGPU::SReg_32RegClass, *MRI);
+ }
+
+ Register ShiftSrc0;
+ Register ShiftSrc1;
+ int64_t ShiftAmt;
+
+ // With multiple uses of the shift, this will duplicate the shift and
+ // increase register pressure.
+ //
+ // (build_vector_trunc (lshr_oneuse $src0, 16), (lshr_oneuse $src1, 16)
+ // => (S_PACK_HH_B32_B16 $src0, $src1)
+ // (build_vector_trunc $src0, (lshr_oneuse SReg_32:$src1, 16))
+ // => (S_PACK_LH_B32_B16 $src0, $src1)
+ // (build_vector_trunc $src0, $src1)
+ // => (S_PACK_LL_B32_B16 $src0, $src1)
+
+ // FIXME: This is an inconvenient way to check a specific value
+ bool Shift0 = mi_match(
+ Src0, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc0), m_ICst(ShiftAmt)))) &&
+ ShiftAmt == 16;
+
+ bool Shift1 = mi_match(
+ Src1, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc1), m_ICst(ShiftAmt)))) &&
+ ShiftAmt == 16;
+
+ unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
+ if (Shift0 && Shift1) {
+ Opc = AMDGPU::S_PACK_HH_B32_B16;
+ MI.getOperand(1).setReg(ShiftSrc0);
+ MI.getOperand(2).setReg(ShiftSrc1);
+ } else if (Shift1) {
+ Opc = AMDGPU::S_PACK_LH_B32_B16;
+ MI.getOperand(2).setReg(ShiftSrc1);
+ } else if (Shift0 && isZero(Src1, *MRI)) {
+ // build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16
+ auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
+ .addReg(ShiftSrc0)
+ .addImm(16);
+
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ }
+
+ MI.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
+}
+
bool AMDGPUInstructionSelector::selectG_PTR_ADD(MachineInstr &I) const {
return selectG_ADD_SUB(I);
}
@@ -594,7 +733,9 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
unsigned InsSize = Src1Ty.getSizeInBits();
int64_t Offset = I.getOperand(3).getImm();
- if (Offset % 32 != 0)
+
+ // FIXME: These cases should have been illegal and unnecessary to check here.
+ if (Offset % 32 != 0 || InsSize % 32 != 0)
return false;
unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32);
@@ -617,7 +758,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
// Deal with weird cases where the class only partially supports the subreg
// index.
Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
- if (!Src0RC)
+ if (!Src0RC || !Src1RC)
return false;
if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
@@ -635,6 +776,85 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
return true;
}
+bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const {
+ if (STI.getLDSBankCount() != 16)
+ return selectImpl(MI, *CoverageInfo);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(2).getReg();
+ Register M0Val = MI.getOperand(6).getReg();
+ if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
+ !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
+ !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
+ return false;
+
+ // This requires 2 instructions. It is possible to write a pattern to support
+ // this, but the generated isel emitter doesn't correctly deal with multiple
+ // output instructions using the same physical register input. The copy to m0
+ // is incorrectly placed before the second instruction.
+ //
+ // TODO: Match source modifiers.
+
+ Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock *MBB = MI.getParent();
+
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(M0Val);
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
+ .addImm(2)
+ .addImm(MI.getOperand(4).getImm()) // $attr
+ .addImm(MI.getOperand(3).getImm()); // $attrchan
+
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_P1LV_F16), Dst)
+ .addImm(0) // $src0_modifiers
+ .addReg(Src0) // $src0
+ .addImm(MI.getOperand(4).getImm()) // $attr
+ .addImm(MI.getOperand(3).getImm()) // $attrchan
+ .addImm(0) // $src2_modifiers
+ .addReg(InterpMov) // $src2 - 2 f16 values selected by high
+ .addImm(MI.getOperand(5).getImm()) // $high
+ .addImm(0) // $clamp
+ .addImm(0); // $omod
+
+ MI.eraseFromParent();
+ return true;
+}
+
+// We need to handle this here because tablegen doesn't support matching
+// instructions with multiple outputs.
+bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {
+ Register Dst0 = MI.getOperand(0).getReg();
+ Register Dst1 = MI.getOperand(1).getReg();
+
+ LLT Ty = MRI->getType(Dst0);
+ unsigned Opc;
+ if (Ty == LLT::scalar(32))
+ Opc = AMDGPU::V_DIV_SCALE_F32;
+ else if (Ty == LLT::scalar(64))
+ Opc = AMDGPU::V_DIV_SCALE_F64;
+ else
+ return false;
+
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock *MBB = MI.getParent();
+
+ Register Numer = MI.getOperand(3).getReg();
+ Register Denom = MI.getOperand(4).getReg();
+ unsigned ChooseDenom = MI.getOperand(5).getImm();
+
+ Register Src0 = ChooseDenom != 0 ? Numer : Denom;
+
+ auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0)
+ .addDef(Dst1)
+ .addUse(Src0)
+ .addUse(Denom)
+ .addUse(Numer);
+
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
unsigned IntrinsicID = I.getIntrinsicID();
switch (IntrinsicID) {
@@ -659,6 +879,20 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return true;
}
+ case Intrinsic::amdgcn_interp_p1_f16:
+ return selectInterpP1F16(I);
+ case Intrinsic::amdgcn_wqm:
+ return constrainCopyLikeIntrin(I, AMDGPU::WQM);
+ case Intrinsic::amdgcn_softwqm:
+ return constrainCopyLikeIntrin(I, AMDGPU::SOFT_WQM);
+ case Intrinsic::amdgcn_wwm:
+ return constrainCopyLikeIntrin(I, AMDGPU::WWM);
+ case Intrinsic::amdgcn_div_scale:
+ return selectDivScale(I);
+ case Intrinsic::amdgcn_icmp:
+ return selectIntrinsicIcmp(I);
+ case Intrinsic::amdgcn_ballot:
+ return selectBallot(I);
default:
return selectImpl(I, *CoverageInfo);
}
@@ -779,247 +1013,79 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
return Ret;
}
-static MachineInstr *
-buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
- unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
- unsigned VM, bool Compr, unsigned Enabled, bool Done) {
- const DebugLoc &DL = Insert->getDebugLoc();
- MachineBasicBlock &BB = *Insert->getParent();
- unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
- return BuildMI(BB, Insert, DL, TII.get(Opcode))
- .addImm(Tgt)
- .addReg(Reg0)
- .addReg(Reg1)
- .addReg(Reg2)
- .addReg(Reg3)
- .addImm(VM)
- .addImm(Compr)
- .addImm(Enabled);
-}
-
-static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
- int64_t C;
- if (mi_match(Reg, MRI, m_ICst(C)) && C == 0)
- return true;
-
- // FIXME: matcher should ignore copies
- return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0;
-}
+bool AMDGPUInstructionSelector::selectIntrinsicIcmp(MachineInstr &I) const {
+ Register Dst = I.getOperand(0).getReg();
+ if (isVCC(Dst, *MRI))
+ return false;
-static unsigned extractGLC(unsigned AuxiliaryData) {
- return AuxiliaryData & 1;
-}
+ if (MRI->getType(Dst).getSizeInBits() != STI.getWavefrontSize())
+ return false;
-static unsigned extractSLC(unsigned AuxiliaryData) {
- return (AuxiliaryData >> 1) & 1;
-}
+ MachineBasicBlock *BB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ Register SrcReg = I.getOperand(2).getReg();
+ unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
+ auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(4).getImm());
-static unsigned extractDLC(unsigned AuxiliaryData) {
- return (AuxiliaryData >> 2) & 1;
-}
+ int Opcode = getV_CMPOpcode(Pred, Size);
+ if (Opcode == -1)
+ return false;
-static unsigned extractSWZ(unsigned AuxiliaryData) {
- return (AuxiliaryData >> 3) & 1;
+ MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst)
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+ RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), *TRI.getBoolRC(),
+ *MRI);
+ bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
+ I.eraseFromParent();
+ return Ret;
}
-static unsigned getBufferStoreOpcode(LLT Ty,
- const unsigned MemSize,
- const bool Offen) {
- const int Size = Ty.getSizeInBits();
- switch (8 * MemSize) {
- case 8:
- return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
- AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
- case 16:
- return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
- AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
- default:
- unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
- AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
- if (Size > 32)
- Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
- return Opc;
- }
-}
-
-static unsigned getBufferStoreFormatOpcode(LLT Ty,
- const unsigned MemSize,
- const bool Offen) {
- bool IsD16Packed = Ty.getScalarSizeInBits() == 16;
- bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits();
- int NumElts = Ty.isVector() ? Ty.getNumElements() : 1;
-
- if (IsD16Packed) {
- switch (NumElts) {
- case 1:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
- case 2:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact;
- case 3:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact;
- case 4:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact;
- default:
- return -1;
- }
- }
-
- if (IsD16Unpacked) {
- switch (NumElts) {
- case 1:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
- case 2:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact;
- case 3:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact;
- case 4:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact;
- default:
- return -1;
- }
- }
-
- switch (NumElts) {
- case 1:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact;
- case 2:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact;
- case 3:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact;
- case 4:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact;
- default:
- return -1;
- }
+bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ Register DstReg = I.getOperand(0).getReg();
+ const unsigned Size = MRI->getType(DstReg).getSizeInBits();
+ const bool Is64 = Size == 64;
- llvm_unreachable("unhandled buffer store");
-}
-
-// TODO: Move this to combiner
-// Returns base register, imm offset, total constant offset.
-std::tuple<Register, unsigned, unsigned>
-AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B,
- Register OrigOffset) const {
- const unsigned MaxImm = 4095;
- Register BaseReg;
- unsigned TotalConstOffset;
- MachineInstr *OffsetDef;
-
- std::tie(BaseReg, TotalConstOffset, OffsetDef)
- = AMDGPU::getBaseWithConstantOffset(*MRI, OrigOffset);
-
- unsigned ImmOffset = TotalConstOffset;
-
- // If the immediate value is too big for the immoffset field, put the value
- // and -4096 into the immoffset field so that the value that is copied/added
- // for the voffset field is a multiple of 4096, and it stands more chance
- // of being CSEd with the copy/add for another similar load/store.f
- // However, do not do that rounding down to a multiple of 4096 if that is a
- // negative number, as it appears to be illegal to have a negative offset
- // in the vgpr, even if adding the immediate offset makes it positive.
- unsigned Overflow = ImmOffset & ~MaxImm;
- ImmOffset -= Overflow;
- if ((int32_t)Overflow < 0) {
- Overflow += ImmOffset;
- ImmOffset = 0;
- }
-
- if (Overflow != 0) {
- // In case this is in a waterfall loop, insert offset code at the def point
- // of the offset, not inside the loop.
- MachineBasicBlock::iterator OldInsPt = B.getInsertPt();
- MachineBasicBlock &OldMBB = B.getMBB();
- B.setInstr(*OffsetDef);
-
- if (!BaseReg) {
- BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- B.buildInstr(AMDGPU::V_MOV_B32_e32)
- .addDef(BaseReg)
- .addImm(Overflow);
- } else {
- Register OverflowVal = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- B.buildInstr(AMDGPU::V_MOV_B32_e32)
- .addDef(OverflowVal)
- .addImm(Overflow);
-
- Register NewBaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg)
- .addReg(BaseReg)
- .addReg(OverflowVal, RegState::Kill)
- .addImm(0);
- BaseReg = NewBaseReg;
- }
+ if (Size != STI.getWavefrontSize())
+ return false;
- B.setInsertPt(OldMBB, OldInsPt);
+ Optional<ValueAndVReg> Arg =
+ getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI, true);
+
+ if (Arg.hasValue()) {
+ const int64_t Value = Arg.getValue().Value;
+ if (Value == 0) {
+ unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
+ BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0);
+ } else if (Value == -1) { // all ones
+ Register SrcReg = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg);
+ } else
+ return false;
+ } else {
+ Register SrcReg = I.getOperand(2).getReg();
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg);
}
- return std::make_tuple(BaseReg, ImmOffset, TotalConstOffset);
+ I.eraseFromParent();
+ return true;
}
-bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
- bool IsFormat) const {
- MachineIRBuilder B(MI);
- MachineFunction &MF = B.getMF();
- Register VData = MI.getOperand(1).getReg();
- LLT Ty = MRI->getType(VData);
-
- int Size = Ty.getSizeInBits();
- if (Size % 32 != 0)
- return false;
-
- // FIXME: Verifier should enforce 1 MMO for these intrinsics.
- MachineMemOperand *MMO = *MI.memoperands_begin();
- const int MemSize = MMO->getSize();
-
- Register RSrc = MI.getOperand(2).getReg();
- Register VOffset = MI.getOperand(3).getReg();
- Register SOffset = MI.getOperand(4).getReg();
- unsigned AuxiliaryData = MI.getOperand(5).getImm();
- unsigned ImmOffset;
- unsigned TotalOffset;
-
- std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
- if (TotalOffset != 0)
- MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize);
-
- const bool Offen = !isZero(VOffset, *MRI);
-
- int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) :
- getBufferStoreOpcode(Ty, MemSize, Offen);
- if (Opc == -1)
- return false;
-
- MachineInstrBuilder MIB = B.buildInstr(Opc)
- .addUse(VData);
-
- if (Offen)
- MIB.addUse(VOffset);
-
- MIB.addUse(RSrc)
- .addUse(SOffset)
- .addImm(ImmOffset)
- .addImm(extractGLC(AuxiliaryData))
- .addImm(extractSLC(AuxiliaryData))
- .addImm(0) // tfe: FIXME: Remove from inst
- .addImm(extractDLC(AuxiliaryData))
- .addImm(extractSWZ(AuxiliaryData))
- .addMemOperand(MMO);
+bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
+ // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+ // SelectionDAG uses for wave32 vs wave64.
+ MachineBasicBlock *BB = MI.getParent();
+ BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
+ .add(MI.getOperand(1));
+ Register Reg = MI.getOperand(1).getReg();
MI.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ if (!MRI->getRegClassOrNull(Reg))
+ MRI->setRegClass(Reg, TRI.getWaveMaskRegClass());
+ return true;
}
static unsigned getDSShaderTypeValue(const MachineFunction &MF) {
@@ -1106,70 +1172,458 @@ bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
return Ret;
}
-bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
- MachineInstr &I) const {
- MachineBasicBlock *BB = I.getParent();
- unsigned IntrinsicID = I.getIntrinsicID();
- switch (IntrinsicID) {
- case Intrinsic::amdgcn_exp: {
- int64_t Tgt = I.getOperand(1).getImm();
- int64_t Enabled = I.getOperand(2).getImm();
- int64_t Done = I.getOperand(7).getImm();
- int64_t VM = I.getOperand(8).getImm();
-
- MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
- I.getOperand(4).getReg(),
- I.getOperand(5).getReg(),
- I.getOperand(6).getReg(),
- VM, false, Enabled, Done);
+static unsigned gwsIntrinToOpcode(unsigned IntrID) {
+ switch (IntrID) {
+ case Intrinsic::amdgcn_ds_gws_init:
+ return AMDGPU::DS_GWS_INIT;
+ case Intrinsic::amdgcn_ds_gws_barrier:
+ return AMDGPU::DS_GWS_BARRIER;
+ case Intrinsic::amdgcn_ds_gws_sema_v:
+ return AMDGPU::DS_GWS_SEMA_V;
+ case Intrinsic::amdgcn_ds_gws_sema_br:
+ return AMDGPU::DS_GWS_SEMA_BR;
+ case Intrinsic::amdgcn_ds_gws_sema_p:
+ return AMDGPU::DS_GWS_SEMA_P;
+ case Intrinsic::amdgcn_ds_gws_sema_release_all:
+ return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
+ default:
+ llvm_unreachable("not a gws intrinsic");
+ }
+}
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
+bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
+ Intrinsic::ID IID) const {
+ if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
+ !STI.hasGWSSemaReleaseAll())
+ return false;
+
+ // intrinsic ID, vsrc, offset
+ const bool HasVSrc = MI.getNumOperands() == 3;
+ assert(HasVSrc || MI.getNumOperands() == 2);
+
+ Register BaseOffset = MI.getOperand(HasVSrc ? 2 : 1).getReg();
+ const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
+ if (OffsetRB->getID() != AMDGPU::SGPRRegBankID)
+ return false;
+
+ MachineInstr *OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI);
+ assert(OffsetDef);
+
+ unsigned ImmOffset;
+
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ MachineInstr *Readfirstlane = nullptr;
+
+ // If we legalized the VGPR input, strip out the readfirstlane to analyze the
+ // incoming offset, in case there's an add of a constant. We'll have to put it
+ // back later.
+ if (OffsetDef->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
+ Readfirstlane = OffsetDef;
+ BaseOffset = OffsetDef->getOperand(1).getReg();
+ OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI);
}
- case Intrinsic::amdgcn_exp_compr: {
- const DebugLoc &DL = I.getDebugLoc();
- int64_t Tgt = I.getOperand(1).getImm();
- int64_t Enabled = I.getOperand(2).getImm();
- Register Reg0 = I.getOperand(3).getReg();
- Register Reg1 = I.getOperand(4).getReg();
- Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- int64_t Done = I.getOperand(5).getImm();
- int64_t VM = I.getOperand(6).getImm();
-
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
- MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
- true, Enabled, Done);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
+ if (OffsetDef->getOpcode() == AMDGPU::G_CONSTANT) {
+ // If we have a constant offset, try to use the 0 in m0 as the base.
+ // TODO: Look into changing the default m0 initialization value. If the
+ // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
+ // the immediate offset.
+
+ ImmOffset = OffsetDef->getOperand(1).getCImm()->getZExtValue();
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addImm(0);
+ } else {
+ std::tie(BaseOffset, ImmOffset, OffsetDef)
+ = AMDGPU::getBaseWithConstantOffset(*MRI, BaseOffset);
+
+ if (Readfirstlane) {
+ // We have the constant offset now, so put the readfirstlane back on the
+ // variable component.
+ if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
+ return false;
+
+ Readfirstlane->getOperand(1).setReg(BaseOffset);
+ BaseOffset = Readfirstlane->getOperand(0).getReg();
+ } else {
+ if (!RBI.constrainGenericRegister(BaseOffset,
+ AMDGPU::SReg_32RegClass, *MRI))
+ return false;
+ }
+
+ Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base)
+ .addReg(BaseOffset)
+ .addImm(16);
+
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(M0Base);
}
- case Intrinsic::amdgcn_end_cf: {
- // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
- // SelectionDAG uses for wave32 vs wave64.
- BuildMI(*BB, &I, I.getDebugLoc(),
- TII.get(AMDGPU::SI_END_CF))
- .add(I.getOperand(1));
- Register Reg = I.getOperand(1).getReg();
- I.eraseFromParent();
+ // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
+ // offset field) % 64. Some versions of the programming guide omit the m0
+ // part, or claim it's from offset 0.
+ auto MIB = BuildMI(*MBB, &MI, DL, TII.get(gwsIntrinToOpcode(IID)));
- if (!MRI->getRegClassOrNull(Reg))
- MRI->setRegClass(Reg, TRI.getWaveMaskRegClass());
- return true;
+ if (HasVSrc) {
+ Register VSrc = MI.getOperand(1).getReg();
+ MIB.addReg(VSrc);
+ if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
+ return false;
+ }
+
+ MIB.addImm(ImmOffset)
+ .addImm(-1) // $gds
+ .cloneMemRefs(MI);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,
+ bool IsAppend) const {
+ Register PtrBase = MI.getOperand(2).getReg();
+ LLT PtrTy = MRI->getType(PtrBase);
+ bool IsGDS = PtrTy.getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+
+ unsigned Offset;
+ std::tie(PtrBase, Offset) = selectDS1Addr1OffsetImpl(MI.getOperand(2));
+
+ // TODO: Should this try to look through readfirstlane like GWS?
+ if (!isDSOffsetLegal(PtrBase, Offset, 16)) {
+ PtrBase = MI.getOperand(2).getReg();
+ Offset = 0;
+ }
+
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
+
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(PtrBase);
+ BuildMI(*MBB, &MI, DL, TII.get(Opc), MI.getOperand(0).getReg())
+ .addImm(Offset)
+ .addImm(IsGDS ? -1 : 0)
+ .cloneMemRefs(MI);
+ MI.eraseFromParent();
+ return true;
+}
+
+static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE,
+ bool &IsTexFail) {
+ if (TexFailCtrl)
+ IsTexFail = true;
+
+ TFE = (TexFailCtrl & 0x1) ? 1 : 0;
+ TexFailCtrl &= ~(uint64_t)0x1;
+ LWE = (TexFailCtrl & 0x2) ? 1 : 0;
+ TexFailCtrl &= ~(uint64_t)0x2;
+
+ return TexFailCtrl == 0;
+}
+
+static bool parseCachePolicy(uint64_t Value,
+ bool *GLC, bool *SLC, bool *DLC) {
+ if (GLC) {
+ *GLC = (Value & 0x1) ? 1 : 0;
+ Value &= ~(uint64_t)0x1;
+ }
+ if (SLC) {
+ *SLC = (Value & 0x2) ? 1 : 0;
+ Value &= ~(uint64_t)0x2;
+ }
+ if (DLC) {
+ *DLC = (Value & 0x4) ? 1 : 0;
+ Value &= ~(uint64_t)0x4;
+ }
+
+ return Value == 0;
+}
+
+bool AMDGPUInstructionSelector::selectImageIntrinsic(
+ MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
+
+ const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
+ const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
+ AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
+ const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
+ AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
+ unsigned IntrOpcode = Intr->BaseOpcode;
+ const bool IsGFX10 = STI.getGeneration() >= AMDGPUSubtarget::GFX10;
+
+ const int VAddrIdx = getImageVAddrIdxBegin(BaseOpcode,
+ MI.getNumExplicitDefs());
+ int NumVAddr, NumGradients;
+ std::tie(NumVAddr, NumGradients) = getImageNumVAddr(Intr, BaseOpcode);
+
+ Register VDataIn, VDataOut;
+ LLT VDataTy;
+ int NumVDataDwords = -1;
+ bool IsD16 = false;
+
+ // XXX - Can we just get the second to last argument for ctrl?
+ unsigned CtrlIdx; // Index of texfailctrl argument
+ bool Unorm;
+ if (!BaseOpcode->Sampler) {
+ Unorm = true;
+ CtrlIdx = VAddrIdx + NumVAddr + 1;
+ } else {
+ Unorm = MI.getOperand(VAddrIdx + NumVAddr + 2).getImm() != 0;
+ CtrlIdx = VAddrIdx + NumVAddr + 3;
+ }
+
+ bool TFE;
+ bool LWE;
+ bool IsTexFail = false;
+ if (!parseTexFail(MI.getOperand(CtrlIdx).getImm(), TFE, LWE, IsTexFail))
+ return false;
+
+ const int Flags = MI.getOperand(CtrlIdx + 2).getImm();
+ const bool IsA16 = (Flags & 1) != 0;
+ const bool IsG16 = (Flags & 2) != 0;
+
+ // A16 implies 16 bit gradients
+ if (IsA16 && !IsG16)
+ return false;
+
+ unsigned DMask = 0;
+ unsigned DMaskLanes = 0;
+
+ if (BaseOpcode->Atomic) {
+ VDataOut = MI.getOperand(0).getReg();
+ VDataIn = MI.getOperand(2).getReg();
+ LLT Ty = MRI->getType(VDataIn);
+
+ // Be careful to allow atomic swap on 16-bit element vectors.
+ const bool Is64Bit = BaseOpcode->AtomicX2 ?
+ Ty.getSizeInBits() == 128 :
+ Ty.getSizeInBits() == 64;
+
+ if (BaseOpcode->AtomicX2) {
+ assert(MI.getOperand(3).getReg() == AMDGPU::NoRegister);
+
+ DMask = Is64Bit ? 0xf : 0x3;
+ NumVDataDwords = Is64Bit ? 4 : 2;
+ } else {
+ DMask = Is64Bit ? 0x3 : 0x1;
+ NumVDataDwords = Is64Bit ? 2 : 1;
+ }
+ } else {
+ const int DMaskIdx = 2; // Input/output + intrinsic ID.
+
+ DMask = MI.getOperand(DMaskIdx).getImm();
+ DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
+
+ if (BaseOpcode->Store) {
+ VDataIn = MI.getOperand(1).getReg();
+ VDataTy = MRI->getType(VDataIn);
+ NumVDataDwords = (VDataTy.getSizeInBits() + 31) / 32;
+ } else {
+ VDataOut = MI.getOperand(0).getReg();
+ VDataTy = MRI->getType(VDataOut);
+ NumVDataDwords = DMaskLanes;
+
+ // One memoperand is mandatory, except for getresinfo.
+ // FIXME: Check this in verifier.
+ if (!MI.memoperands_empty()) {
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+
+ // Infer d16 from the memory size, as the register type will be mangled by
+ // unpacked subtargets, or by TFE.
+ IsD16 = ((8 * MMO->getSize()) / DMaskLanes) < 32;
+
+ if (IsD16 && !STI.hasUnpackedD16VMem())
+ NumVDataDwords = (DMaskLanes + 1) / 2;
+ }
+ }
+ }
+
+ // Optimize _L to _LZ when _L is zero
+ if (LZMappingInfo) {
+ // The legalizer replaced the register with an immediate 0 if we need to
+ // change the opcode.
+ const MachineOperand &Lod = MI.getOperand(VAddrIdx + NumVAddr - 1);
+ if (Lod.isImm()) {
+ assert(Lod.getImm() == 0);
+ IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
+ }
+ }
+
+ // Optimize _mip away, when 'lod' is zero
+ if (MIPMappingInfo) {
+ const MachineOperand &Lod = MI.getOperand(VAddrIdx + NumVAddr - 1);
+ if (Lod.isImm()) {
+ assert(Lod.getImm() == 0);
+ IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip
+ }
+ }
+
+ // Set G16 opcode
+ if (IsG16 && !IsA16) {
+ const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
+ AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode);
+ assert(G16MappingInfo);
+ IntrOpcode = G16MappingInfo->G16; // set opcode to variant with _g16
+ }
+
+ // TODO: Check this in verifier.
+ assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this");
+
+ bool GLC = false;
+ bool SLC = false;
+ bool DLC = false;
+ if (BaseOpcode->Atomic) {
+ GLC = true; // TODO no-return optimization
+ if (!parseCachePolicy(MI.getOperand(CtrlIdx + 1).getImm(), nullptr, &SLC,
+ IsGFX10 ? &DLC : nullptr))
+ return false;
+ } else {
+ if (!parseCachePolicy(MI.getOperand(CtrlIdx + 1).getImm(), &GLC, &SLC,
+ IsGFX10 ? &DLC : nullptr))
+ return false;
+ }
+
+ int NumVAddrRegs = 0;
+ int NumVAddrDwords = 0;
+ for (int I = 0; I < NumVAddr; ++I) {
+ // Skip the $noregs and 0s inserted during legalization.
+ MachineOperand &AddrOp = MI.getOperand(VAddrIdx + I);
+ if (!AddrOp.isReg())
+ continue; // XXX - Break?
+
+ Register Addr = AddrOp.getReg();
+ if (!Addr)
+ break;
+
+ ++NumVAddrRegs;
+ NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
+ }
+
+ // The legalizer preprocessed the intrinsic arguments. If we aren't using
+ // NSA, these should have beeen packed into a single value in the first
+ // address register
+ const bool UseNSA = NumVAddrRegs != 1 && NumVAddrDwords == NumVAddrRegs;
+ if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
+ LLVM_DEBUG(dbgs() << "Trying to use NSA on non-NSA target\n");
+ return false;
+ }
+
+ if (IsTexFail)
+ ++NumVDataDwords;
+
+ int Opcode = -1;
+ if (IsGFX10) {
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
+ UseNSA ? AMDGPU::MIMGEncGfx10NSA
+ : AMDGPU::MIMGEncGfx10Default,
+ NumVDataDwords, NumVAddrDwords);
+ } else {
+ if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
+ NumVDataDwords, NumVAddrDwords);
+ if (Opcode == -1)
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
+ NumVDataDwords, NumVAddrDwords);
+ }
+ assert(Opcode != -1);
+
+ auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opcode))
+ .cloneMemRefs(MI);
+
+ if (VDataOut) {
+ if (BaseOpcode->AtomicX2) {
+ const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
+
+ Register TmpReg = MRI->createVirtualRegister(
+ Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
+ unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
+
+ MIB.addDef(TmpReg);
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), VDataOut)
+ .addReg(TmpReg, RegState::Kill, SubReg);
+
+ } else {
+ MIB.addDef(VDataOut); // vdata output
+ }
}
- case Intrinsic::amdgcn_raw_buffer_store:
- return selectStoreIntrinsic(I, false);
- case Intrinsic::amdgcn_raw_buffer_store_format:
- return selectStoreIntrinsic(I, true);
+
+ if (VDataIn)
+ MIB.addReg(VDataIn); // vdata input
+
+ for (int i = 0; i != NumVAddrRegs; ++i) {
+ MachineOperand &SrcOp = MI.getOperand(VAddrIdx + i);
+ if (SrcOp.isReg()) {
+ assert(SrcOp.getReg() != 0);
+ MIB.addReg(SrcOp.getReg());
+ }
+ }
+
+ MIB.addReg(MI.getOperand(VAddrIdx + NumVAddr).getReg()); // rsrc
+ if (BaseOpcode->Sampler)
+ MIB.addReg(MI.getOperand(VAddrIdx + NumVAddr + 1).getReg()); // sampler
+
+ MIB.addImm(DMask); // dmask
+
+ if (IsGFX10)
+ MIB.addImm(DimInfo->Encoding);
+ MIB.addImm(Unorm);
+ if (IsGFX10)
+ MIB.addImm(DLC);
+
+ MIB.addImm(GLC);
+ MIB.addImm(SLC);
+ MIB.addImm(IsA16 && // a16 or r128
+ STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
+ if (IsGFX10)
+ MIB.addImm(IsA16 ? -1 : 0);
+
+ MIB.addImm(TFE); // tfe
+ MIB.addImm(LWE); // lwe
+ if (!IsGFX10)
+ MIB.addImm(DimInfo->DA ? -1 : 0);
+ if (BaseOpcode->HasD16)
+ MIB.addImm(IsD16 ? -1 : 0);
+
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
+bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
+ MachineInstr &I) const {
+ unsigned IntrinsicID = I.getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_end_cf:
+ return selectEndCfIntrinsic(I);
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
return selectDSOrderedIntrinsic(I, IntrinsicID);
- default:
+ case Intrinsic::amdgcn_ds_gws_init:
+ case Intrinsic::amdgcn_ds_gws_barrier:
+ case Intrinsic::amdgcn_ds_gws_sema_v:
+ case Intrinsic::amdgcn_ds_gws_sema_br:
+ case Intrinsic::amdgcn_ds_gws_sema_p:
+ case Intrinsic::amdgcn_ds_gws_sema_release_all:
+ return selectDSGWSIntrinsic(I, IntrinsicID);
+ case Intrinsic::amdgcn_ds_append:
+ return selectDSAppendConsume(I, true);
+ case Intrinsic::amdgcn_ds_consume:
+ return selectDSAppendConsume(I, false);
+ default: {
return selectImpl(I, *CoverageInfo);
}
+ }
}
bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
+ if (selectImpl(I, *CoverageInfo))
+ return true;
+
MachineBasicBlock *BB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();
@@ -1247,9 +1701,6 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
Register SrcReg = I.getOperand(1).getReg();
const LLT DstTy = MRI->getType(DstReg);
const LLT SrcTy = MRI->getType(SrcReg);
- if (!DstTy.isScalar())
- return false;
-
const LLT S1 = LLT::scalar(1);
const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
@@ -1264,6 +1715,8 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
return false;
}
+ const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
+
unsigned DstSize = DstTy.getSizeInBits();
unsigned SrcSize = SrcTy.getSizeInBits();
@@ -1271,6 +1724,73 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
= TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, *MRI);
const TargetRegisterClass *DstRC
= TRI.getRegClassForSizeOnBank(DstSize, *DstRB, *MRI);
+ if (!SrcRC || !DstRC)
+ return false;
+
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+ return false;
+ }
+
+ if (DstTy == LLT::vector(2, 16) && SrcTy == LLT::vector(2, 32)) {
+ MachineBasicBlock *MBB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+
+ Register LoReg = MRI->createVirtualRegister(DstRC);
+ Register HiReg = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), LoReg)
+ .addReg(SrcReg, 0, AMDGPU::sub0);
+ BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), HiReg)
+ .addReg(SrcReg, 0, AMDGPU::sub1);
+
+ if (IsVALU && STI.hasSDWA()) {
+ // Write the low 16-bits of the high element into the high 16-bits of the
+ // low element.
+ MachineInstr *MovSDWA =
+ BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
+ .addImm(0) // $src0_modifiers
+ .addReg(HiReg) // $src0
+ .addImm(0) // $clamp
+ .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel
+ .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused
+ .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel
+ .addReg(LoReg, RegState::Implicit);
+ MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1);
+ } else {
+ Register TmpReg0 = MRI->createVirtualRegister(DstRC);
+ Register TmpReg1 = MRI->createVirtualRegister(DstRC);
+ Register ImmReg = MRI->createVirtualRegister(DstRC);
+ if (IsVALU) {
+ BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
+ .addImm(16)
+ .addReg(HiReg);
+ } else {
+ BuildMI(*MBB, I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0)
+ .addReg(HiReg)
+ .addImm(16);
+ }
+
+ unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
+ unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
+ unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
+
+ BuildMI(*MBB, I, DL, TII.get(MovOpc), ImmReg)
+ .addImm(0xffff);
+ BuildMI(*MBB, I, DL, TII.get(AndOpc), TmpReg1)
+ .addReg(LoReg)
+ .addReg(ImmReg);
+ BuildMI(*MBB, I, DL, TII.get(OrOpc), DstReg)
+ .addReg(TmpReg0)
+ .addReg(TmpReg1);
+ }
+
+ I.eraseFromParent();
+ return true;
+ }
+
+ if (!DstTy.isScalar())
+ return false;
if (SrcSize > 32) {
int SubRegIdx = sizeToSubRegIndex(DstSize);
@@ -1279,17 +1799,17 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
// Deal with weird cases where the class only partially supports the subreg
// index.
- SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
- if (!SrcRC)
+ const TargetRegisterClass *SrcWithSubRC
+ = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
+ if (!SrcWithSubRC)
return false;
- I.getOperand(1).setSubReg(SubRegIdx);
- }
+ if (SrcWithSubRC != SrcRC) {
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
+ return false;
+ }
- if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
- !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
- return false;
+ I.getOperand(1).setSubReg(SubRegIdx);
}
I.setDesc(TII.get(TargetOpcode::COPY));
@@ -1318,7 +1838,8 @@ const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
}
bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
- bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
+ bool InReg = I.getOpcode() == AMDGPU::G_SEXT_INREG;
+ bool Signed = I.getOpcode() == AMDGPU::G_SEXT || InReg;
const DebugLoc &DL = I.getDebugLoc();
MachineBasicBlock &MBB = *I.getParent();
const Register DstReg = I.getOperand(0).getReg();
@@ -1326,7 +1847,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
const LLT DstTy = MRI->getType(DstReg);
const LLT SrcTy = MRI->getType(SrcReg);
- const unsigned SrcSize = SrcTy.getSizeInBits();
+ const unsigned SrcSize = I.getOpcode() == AMDGPU::G_SEXT_INREG ?
+ I.getOperand(2).getImm() : SrcTy.getSizeInBits();
const unsigned DstSize = DstTy.getSizeInBits();
if (!DstTy.isScalar())
return false;
@@ -1362,7 +1884,9 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
}
if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
- if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
+ const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
+ AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
+ if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
return false;
if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
@@ -1378,13 +1902,15 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
// Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
- if (DstSize > 32 && SrcSize <= 32) {
+ if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
// We need a 64-bit register source, but the high bits don't matter.
Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ unsigned SubReg = InReg ? AMDGPU::sub0 : 0;
+
BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
- .addReg(SrcReg)
+ .addReg(SrcReg, 0, SubReg)
.addImm(AMDGPU::sub0)
.addReg(UndefReg)
.addImm(AMDGPU::sub1);
@@ -1487,6 +2013,103 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI);
}
+bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const {
+ // Only manually handle the f64 SGPR case.
+ //
+ // FIXME: This is a workaround for 2.5 different tablegen problems. Because
+ // the bit ops theoretically have a second result due to the implicit def of
+ // SCC, the GlobalISelEmitter is overly conservative and rejects it. Fixing
+ // that is easy by disabling the check. The result works, but uses a
+ // nonsensical sreg32orlds_and_sreg_1 regclass.
+ //
+ // The DAG emitter is more problematic, and incorrectly adds both S_XOR_B32 to
+ // the variadic REG_SEQUENCE operands.
+
+ Register Dst = MI.getOperand(0).getReg();
+ const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
+ if (DstRB->getID() != AMDGPU::SGPRRegBankID ||
+ MRI->getType(Dst) != LLT::scalar(64))
+ return false;
+
+ Register Src = MI.getOperand(1).getReg();
+ MachineInstr *Fabs = getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
+ if (Fabs)
+ Src = Fabs->getOperand(1).getReg();
+
+ if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
+ !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
+ return false;
+
+ MachineBasicBlock *BB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)
+ .addReg(Src, 0, AMDGPU::sub0);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)
+ .addReg(Src, 0, AMDGPU::sub1);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
+ .addImm(0x80000000);
+
+ // Set or toggle sign bit.
+ unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
+ BuildMI(*BB, &MI, DL, TII.get(Opc), OpReg)
+ .addReg(HiReg)
+ .addReg(ConstReg);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
+ .addReg(LoReg)
+ .addImm(AMDGPU::sub0)
+ .addReg(OpReg)
+ .addImm(AMDGPU::sub1);
+ MI.eraseFromParent();
+ return true;
+}
+
+// FIXME: This is a workaround for the same tablegen problems as G_FNEG
+bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const {
+ Register Dst = MI.getOperand(0).getReg();
+ const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
+ if (DstRB->getID() != AMDGPU::SGPRRegBankID ||
+ MRI->getType(Dst) != LLT::scalar(64))
+ return false;
+
+ Register Src = MI.getOperand(1).getReg();
+ MachineBasicBlock *BB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
+ !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
+ return false;
+
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)
+ .addReg(Src, 0, AMDGPU::sub0);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)
+ .addReg(Src, 0, AMDGPU::sub1);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
+ .addImm(0x7fffffff);
+
+ // Clear sign bit.
+ // TODO: Should this used S_BITSET0_*?
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_AND_B32), OpReg)
+ .addReg(HiReg)
+ .addReg(ConstReg);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
+ .addReg(LoReg)
+ .addImm(AMDGPU::sub0)
+ .addReg(OpReg)
+ .addImm(AMDGPU::sub1);
+
+ MI.eraseFromParent();
+ return true;
+}
+
static bool isConstant(const MachineInstr &MI) {
return MI.getOpcode() == TargetOpcode::G_CONSTANT;
}
@@ -1573,6 +2196,65 @@ bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const {
return selectImpl(I, *CoverageInfo);
}
+// TODO: No rtn optimization.
+bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG(
+ MachineInstr &MI) const {
+ Register PtrReg = MI.getOperand(1).getReg();
+ const LLT PtrTy = MRI->getType(PtrReg);
+ if (PtrTy.getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
+ STI.useFlatForGlobal())
+ return selectImpl(MI, *CoverageInfo);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ const LLT Ty = MRI->getType(DstReg);
+ const bool Is64 = Ty.getSizeInBits() == 64;
+ const unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
+ Register TmpReg = MRI->createVirtualRegister(
+ Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
+
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock *BB = MI.getParent();
+
+ Register VAddr, RSrcReg, SOffset;
+ int64_t Offset = 0;
+
+ unsigned Opcode;
+ if (selectMUBUFOffsetImpl(MI.getOperand(1), RSrcReg, SOffset, Offset)) {
+ Opcode = Is64 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN :
+ AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN;
+ } else if (selectMUBUFAddr64Impl(MI.getOperand(1), VAddr,
+ RSrcReg, SOffset, Offset)) {
+ Opcode = Is64 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN :
+ AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN;
+ } else
+ return selectImpl(MI, *CoverageInfo);
+
+ auto MIB = BuildMI(*BB, &MI, DL, TII.get(Opcode), TmpReg)
+ .addReg(MI.getOperand(2).getReg());
+
+ if (VAddr)
+ MIB.addReg(VAddr);
+
+ MIB.addReg(RSrcReg);
+ if (SOffset)
+ MIB.addReg(SOffset);
+ else
+ MIB.addImm(0);
+
+ MIB.addImm(Offset);
+ MIB.addImm(0); // slc
+ MIB.cloneMemRefs(MI);
+
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), DstReg)
+ .addReg(TmpReg, RegState::Kill, SubReg);
+
+ MI.eraseFromParent();
+
+ MRI->setRegClass(
+ DstReg, Is64 ? &AMDGPU::VReg_64RegClass : &AMDGPU::VGPR_32RegClass);
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineOperand &CondOp = I.getOperand(0);
@@ -1619,7 +2301,8 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
return true;
}
-bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
+bool AMDGPUInstructionSelector::selectG_FRAME_INDEX_GLOBAL_VALUE(
+ MachineInstr &I) const {
Register DstReg = I.getOperand(0).getReg();
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
@@ -1631,67 +2314,134 @@ bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
}
-bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const {
- uint64_t Align = I.getOperand(2).getImm();
- const uint64_t Mask = ~((UINT64_C(1) << Align) - 1);
-
- MachineBasicBlock *BB = I.getParent();
-
+bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
+ Register MaskReg = I.getOperand(2).getReg();
+ LLT Ty = MRI->getType(DstReg);
+ LLT MaskTy = MRI->getType(MaskReg);
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
+ const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
+ if (DstRB != SrcRB) // Should only happen for hand written MIR.
+ return false;
+
unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
- unsigned MovOpc = IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
const TargetRegisterClass &RegRC
= IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
- LLT Ty = MRI->getType(DstReg);
-
const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB,
*MRI);
const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB,
*MRI);
+ const TargetRegisterClass *MaskRC =
+ TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB, *MRI);
+
if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
- !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
+ !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
+ !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
return false;
+ MachineBasicBlock *BB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();
- Register ImmReg = MRI->createVirtualRegister(&RegRC);
- BuildMI(*BB, &I, DL, TII.get(MovOpc), ImmReg)
- .addImm(Mask);
-
if (Ty.getSizeInBits() == 32) {
+ assert(MaskTy.getSizeInBits() == 32 &&
+ "ptrmask should have been narrowed during legalize");
+
BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)
.addReg(SrcReg)
- .addReg(ImmReg);
+ .addReg(MaskReg);
I.eraseFromParent();
return true;
}
Register HiReg = MRI->createVirtualRegister(&RegRC);
Register LoReg = MRI->createVirtualRegister(&RegRC);
- Register MaskLo = MRI->createVirtualRegister(&RegRC);
+ // Extract the subregisters from the source pointer.
BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg)
.addReg(SrcReg, 0, AMDGPU::sub0);
BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg)
.addReg(SrcReg, 0, AMDGPU::sub1);
- BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskLo)
- .addReg(LoReg)
- .addReg(ImmReg);
+ Register MaskedLo, MaskedHi;
+
+ // Try to avoid emitting a bit operation when we only need to touch half of
+ // the 64-bit pointer.
+ APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64);
+
+ const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
+ const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
+ if ((MaskOnes & MaskLo32) == MaskLo32) {
+ // If all the bits in the low half are 1, we only need a copy for it.
+ MaskedLo = LoReg;
+ } else {
+ // Extract the mask subregister and apply the and.
+ Register MaskLo = MRI->createVirtualRegister(&RegRC);
+ MaskedLo = MRI->createVirtualRegister(&RegRC);
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskLo)
+ .addReg(MaskReg, 0, AMDGPU::sub0);
+ BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedLo)
+ .addReg(LoReg)
+ .addReg(MaskLo);
+ }
+
+ if ((MaskOnes & MaskHi32) == MaskHi32) {
+ // If all the bits in the high half are 1, we only need a copy for it.
+ MaskedHi = HiReg;
+ } else {
+ Register MaskHi = MRI->createVirtualRegister(&RegRC);
+ MaskedHi = MRI->createVirtualRegister(&RegRC);
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskHi)
+ .addReg(MaskReg, 0, AMDGPU::sub1);
+ BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedHi)
+ .addReg(HiReg)
+ .addReg(MaskHi);
+ }
+
BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
- .addReg(MaskLo)
+ .addReg(MaskedLo)
.addImm(AMDGPU::sub0)
- .addReg(HiReg)
+ .addReg(MaskedHi)
.addImm(AMDGPU::sub1);
I.eraseFromParent();
return true;
}
+/// Return the register to use for the index value, and the subregister to use
+/// for the indirectly accessed register.
+static std::pair<Register, unsigned>
+computeIndirectRegIndex(MachineRegisterInfo &MRI,
+ const SIRegisterInfo &TRI,
+ const TargetRegisterClass *SuperRC,
+ Register IdxReg,
+ unsigned EltSize) {
+ Register IdxBaseReg;
+ int Offset;
+ MachineInstr *Unused;
+
+ std::tie(IdxBaseReg, Offset, Unused)
+ = AMDGPU::getBaseWithConstantOffset(MRI, IdxReg);
+ if (IdxBaseReg == AMDGPU::NoRegister) {
+ // This will happen if the index is a known constant. This should ordinarily
+ // be legalized out, but handle it as a register just in case.
+ assert(Offset == 0);
+ IdxBaseReg = IdxReg;
+ }
+
+ ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SuperRC, EltSize);
+
+ // Skip out of bounds offsets, or else we would end up using an undefined
+ // register.
+ if (static_cast<unsigned>(Offset) >= SubRegs.size())
+ return std::make_pair(IdxReg, SubRegs[0]);
+ return std::make_pair(IdxBaseReg, SubRegs[Offset]);
+}
+
bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
MachineInstr &MI) const {
Register DstReg = MI.getOperand(0).getReg();
@@ -1714,6 +2464,8 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
*MRI);
const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(DstTy, *DstRB,
*MRI);
+ if (!SrcRC || !DstRC)
+ return false;
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
!RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
@@ -1723,7 +2475,9 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
const DebugLoc &DL = MI.getDebugLoc();
const bool Is64 = DstTy.getSizeInBits() == 64;
- unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
+ unsigned SubReg;
+ std::tie(IdxReg, SubReg) = computeIndirectRegIndex(*MRI, TRI, SrcRC, IdxReg,
+ DstTy.getSizeInBits() / 8);
if (SrcRB->getID() == AMDGPU::SGPRRegBankID) {
if (DstTy.getSizeInBits() != 32 && !Is64)
@@ -1766,6 +2520,237 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
return true;
}
+// TODO: Fold insert_vector_elt (extract_vector_elt) into movrelsd
+bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
+ MachineInstr &MI) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register VecReg = MI.getOperand(1).getReg();
+ Register ValReg = MI.getOperand(2).getReg();
+ Register IdxReg = MI.getOperand(3).getReg();
+
+ LLT VecTy = MRI->getType(DstReg);
+ LLT ValTy = MRI->getType(ValReg);
+ unsigned VecSize = VecTy.getSizeInBits();
+ unsigned ValSize = ValTy.getSizeInBits();
+
+ const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
+ const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
+ const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
+
+ assert(VecTy.getElementType() == ValTy);
+
+ // The index must be scalar. If it wasn't RegBankSelect should have moved this
+ // into a waterfall loop.
+ if (IdxRB->getID() != AMDGPU::SGPRRegBankID)
+ return false;
+
+ const TargetRegisterClass *VecRC = TRI.getRegClassForTypeOnBank(VecTy, *VecRB,
+ *MRI);
+ const TargetRegisterClass *ValRC = TRI.getRegClassForTypeOnBank(ValTy, *ValRB,
+ *MRI);
+
+ if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
+ !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
+ !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
+ return false;
+
+ if (VecRB->getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
+ return false;
+
+ unsigned SubReg;
+ std::tie(IdxReg, SubReg) = computeIndirectRegIndex(*MRI, TRI, VecRC, IdxReg,
+ ValSize / 8);
+
+ const bool IndexMode = VecRB->getID() == AMDGPU::VGPRRegBankID &&
+ STI.useVGPRIndexMode();
+
+ MachineBasicBlock *BB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ if (IndexMode) {
+ BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_ON))
+ .addReg(IdxReg)
+ .addImm(AMDGPU::VGPRIndexMode::DST_ENABLE);
+ } else {
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(IdxReg);
+ }
+
+ const MCInstrDesc &RegWriteOp
+ = TII.getIndirectRegWritePseudo(VecSize, ValSize,
+ VecRB->getID() == AMDGPU::SGPRRegBankID);
+ BuildMI(*BB, MI, DL, RegWriteOp, DstReg)
+ .addReg(VecReg)
+ .addReg(ValReg)
+ .addImm(SubReg);
+
+ if (IndexMode)
+ BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_OFF));
+
+ MI.eraseFromParent();
+ return true;
+}
+
+static bool isZeroOrUndef(int X) {
+ return X == 0 || X == -1;
+}
+
+static bool isOneOrUndef(int X) {
+ return X == 1 || X == -1;
+}
+
+static bool isZeroOrOneOrUndef(int X) {
+ return X == 0 || X == 1 || X == -1;
+}
+
+// Normalize a VOP3P shuffle mask to refer to the low/high half of a single
+// 32-bit register.
+static Register normalizeVOP3PMask(int NewMask[2], Register Src0, Register Src1,
+ ArrayRef<int> Mask) {
+ NewMask[0] = Mask[0];
+ NewMask[1] = Mask[1];
+ if (isZeroOrOneOrUndef(Mask[0]) && isZeroOrOneOrUndef(Mask[1]))
+ return Src0;
+
+ assert(NewMask[0] == 2 || NewMask[0] == 3 || NewMask[0] == -1);
+ assert(NewMask[1] == 2 || NewMask[1] == 3 || NewMask[1] == -1);
+
+ // Shift the mask inputs to be 0/1;
+ NewMask[0] = NewMask[0] == -1 ? -1 : NewMask[0] - 2;
+ NewMask[1] = NewMask[1] == -1 ? -1 : NewMask[1] - 2;
+ return Src1;
+}
+
+// This is only legal with VOP3P instructions as an aid to op_sel matching.
+bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR(
+ MachineInstr &MI) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0Reg = MI.getOperand(1).getReg();
+ Register Src1Reg = MI.getOperand(2).getReg();
+ ArrayRef<int> ShufMask = MI.getOperand(3).getShuffleMask();
+
+ const LLT V2S16 = LLT::vector(2, 16);
+ if (MRI->getType(DstReg) != V2S16 || MRI->getType(Src0Reg) != V2S16)
+ return false;
+
+ if (!AMDGPU::isLegalVOP3PShuffleMask(ShufMask))
+ return false;
+
+ assert(ShufMask.size() == 2);
+ assert(STI.hasSDWA() && "no target has VOP3P but not SDWA");
+
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
+ const TargetRegisterClass &RC = IsVALU ?
+ AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
+
+ // Handle the degenerate case which should have folded out.
+ if (ShufMask[0] == -1 && ShufMask[1] == -1) {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::IMPLICIT_DEF), DstReg);
+
+ MI.eraseFromParent();
+ return RBI.constrainGenericRegister(DstReg, RC, *MRI);
+ }
+
+ // A legal VOP3P mask only reads one of the sources.
+ int Mask[2];
+ Register SrcVec = normalizeVOP3PMask(Mask, Src0Reg, Src1Reg, ShufMask);
+
+ if (!RBI.constrainGenericRegister(DstReg, RC, *MRI) ||
+ !RBI.constrainGenericRegister(SrcVec, RC, *MRI))
+ return false;
+
+ // TODO: This also should have been folded out
+ if (isZeroOrUndef(Mask[0]) && isOneOrUndef(Mask[1])) {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::COPY), DstReg)
+ .addReg(SrcVec);
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (Mask[0] == 1 && Mask[1] == -1) {
+ if (IsVALU) {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
+ .addImm(16)
+ .addReg(SrcVec);
+ } else {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg)
+ .addReg(SrcVec)
+ .addImm(16);
+ }
+ } else if (Mask[0] == -1 && Mask[1] == 0) {
+ if (IsVALU) {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), DstReg)
+ .addImm(16)
+ .addReg(SrcVec);
+ } else {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHL_B32), DstReg)
+ .addReg(SrcVec)
+ .addImm(16);
+ }
+ } else if (Mask[0] == 0 && Mask[1] == 0) {
+ if (IsVALU) {
+ // Write low half of the register into the high half.
+ MachineInstr *MovSDWA =
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
+ .addImm(0) // $src0_modifiers
+ .addReg(SrcVec) // $src0
+ .addImm(0) // $clamp
+ .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel
+ .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused
+ .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel
+ .addReg(SrcVec, RegState::Implicit);
+ MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1);
+ } else {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg)
+ .addReg(SrcVec)
+ .addReg(SrcVec);
+ }
+ } else if (Mask[0] == 1 && Mask[1] == 1) {
+ if (IsVALU) {
+ // Write high half of the register into the low half.
+ MachineInstr *MovSDWA =
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
+ .addImm(0) // $src0_modifiers
+ .addReg(SrcVec) // $src0
+ .addImm(0) // $clamp
+ .addImm(AMDGPU::SDWA::WORD_0) // $dst_sel
+ .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused
+ .addImm(AMDGPU::SDWA::WORD_1) // $src0_sel
+ .addReg(SrcVec, RegState::Implicit);
+ MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1);
+ } else {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HH_B32_B16), DstReg)
+ .addReg(SrcVec)
+ .addReg(SrcVec);
+ }
+ } else if (Mask[0] == 1 && Mask[1] == 0) {
+ if (IsVALU) {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_ALIGNBIT_B32), DstReg)
+ .addReg(SrcVec)
+ .addReg(SrcVec)
+ .addImm(16);
+ } else {
+ Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg)
+ .addReg(SrcVec)
+ .addImm(16);
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg)
+ .addReg(TmpReg)
+ .addReg(SrcVec);
+ }
+ } else
+ llvm_unreachable("all shuffle masks should be handled");
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::select(MachineInstr &I) {
if (I.isPHI())
return selectPHI(I);
@@ -1780,9 +2765,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
- if (selectG_AND_OR_XOR(I))
+ if (selectImpl(I, *CoverageInfo))
return true;
- return selectImpl(I, *CoverageInfo);
+ return selectG_AND_OR_XOR(I);
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
if (selectImpl(I, *CoverageInfo))
@@ -1800,6 +2785,14 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_FCONSTANT:
return selectG_CONSTANT(I);
+ case TargetOpcode::G_FNEG:
+ if (selectImpl(I, *CoverageInfo))
+ return true;
+ return selectG_FNEG(I);
+ case TargetOpcode::G_FABS:
+ if (selectImpl(I, *CoverageInfo))
+ return true;
+ return selectG_FABS(I);
case TargetOpcode::G_EXTRACT:
return selectG_EXTRACT(I);
case TargetOpcode::G_MERGE_VALUES:
@@ -1808,6 +2801,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectG_MERGE_VALUES(I);
case TargetOpcode::G_UNMERGE_VALUES:
return selectG_UNMERGE_VALUES(I);
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC:
+ return selectG_BUILD_VECTOR_TRUNC(I);
case TargetOpcode::G_PTR_ADD:
return selectG_PTR_ADD(I);
case TargetOpcode::G_IMPLICIT_DEF:
@@ -1836,6 +2831,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ATOMICRMW_UMAX:
case TargetOpcode::G_ATOMICRMW_FADD:
return selectG_LOAD_ATOMICRMW(I);
+ case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
+ return selectG_AMDGPU_ATOMIC_CMPXCHG(I);
case TargetOpcode::G_SELECT:
return selectG_SELECT(I);
case TargetOpcode::G_STORE:
@@ -1845,17 +2842,34 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT_INREG:
if (selectImpl(I, *CoverageInfo))
return true;
return selectG_SZA_EXT(I);
case TargetOpcode::G_BRCOND:
return selectG_BRCOND(I);
case TargetOpcode::G_FRAME_INDEX:
- return selectG_FRAME_INDEX(I);
- case TargetOpcode::G_PTR_MASK:
- return selectG_PTR_MASK(I);
+ case TargetOpcode::G_GLOBAL_VALUE:
+ return selectG_FRAME_INDEX_GLOBAL_VALUE(I);
+ case TargetOpcode::G_PTRMASK:
+ return selectG_PTRMASK(I);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return selectG_EXTRACT_VECTOR_ELT(I);
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
+ return selectG_INSERT_VECTOR_ELT(I);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return selectG_SHUFFLE_VECTOR(I);
+ case AMDGPU::G_AMDGPU_ATOMIC_INC:
+ case AMDGPU::G_AMDGPU_ATOMIC_DEC:
+ initM0(I);
+ return selectImpl(I, *CoverageInfo);
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: {
+ const AMDGPU::ImageDimIntrinsicInfo *Intr
+ = AMDGPU::getImageDimIntrinsicInfo(I.getIntrinsicID());
+ assert(Intr && "not an image intrinsic with image pseudo");
+ return selectImageIntrinsic(I, Intr);
+ }
default:
return selectImpl(I, *CoverageInfo);
}
@@ -1871,15 +2885,16 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
}
std::pair<Register, unsigned>
-AMDGPUInstructionSelector::selectVOP3ModsImpl(
- Register Src) const {
+AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root) const {
+ Register Src = Root.getReg();
+ Register OrigSrc = Src;
unsigned Mods = 0;
- MachineInstr *MI = MRI->getVRegDef(Src);
+ MachineInstr *MI = getDefIgnoringCopies(Src, *MRI);
if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
Src = MI->getOperand(1).getReg();
Mods |= SISrcMods::NEG;
- MI = MRI->getVRegDef(Src);
+ MI = getDefIgnoringCopies(Src, *MRI);
}
if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
@@ -1887,6 +2902,20 @@ AMDGPUInstructionSelector::selectVOP3ModsImpl(
Mods |= SISrcMods::ABS;
}
+ if (Mods != 0 &&
+ RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
+ MachineInstr *UseMI = Root.getParent();
+
+ // If we looked through copies to find source modifiers on an SGPR operand,
+ // we now have an SGPR register source. To avoid potentially violating the
+ // constant bus restriction, we need to insert a copy to a VGPR.
+ Register VGPRSrc = MRI->cloneVirtualRegister(OrigSrc);
+ BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(),
+ TII.get(AMDGPU::COPY), VGPRSrc)
+ .addReg(Src);
+ Src = VGPRSrc;
+ }
+
return std::make_pair(Src, Mods);
}
@@ -1904,7 +2933,7 @@ InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
Register Src;
unsigned Mods;
- std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
@@ -1927,7 +2956,7 @@ InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
Register Src;
unsigned Mods;
- std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
@@ -1936,12 +2965,48 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
+AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
+ Register Reg = Root.getReg();
+ const MachineInstr *Def = getDefIgnoringCopies(Reg, *MRI);
+ if (Def && (Def->getOpcode() == AMDGPU::G_FNEG ||
+ Def->getOpcode() == AMDGPU::G_FABS))
+ return {};
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
+ }};
+}
+
+std::pair<Register, unsigned>
+AMDGPUInstructionSelector::selectVOP3PModsImpl(
+ Register Src, const MachineRegisterInfo &MRI) const {
+ unsigned Mods = 0;
+ MachineInstr *MI = MRI.getVRegDef(Src);
+
+ if (MI && MI->getOpcode() == AMDGPU::G_FNEG &&
+ // It's possible to see an f32 fneg here, but unlikely.
+ // TODO: Treat f32 fneg as only high bit.
+ MRI.getType(Src) == LLT::vector(2, 16)) {
+ Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ Src = MI->getOperand(1).getReg();
+ MI = MRI.getVRegDef(Src);
+ }
+
+ // TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.
+
+ // Packed instructions do not have abs modifiers.
+ Mods |= SISrcMods::OP_SEL_1;
+
+ return std::make_pair(Src, Mods);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PMods(MachineOperand &Root) const {
+ MachineRegisterInfo &MRI
+ = Root.getParent()->getParent()->getParent()->getRegInfo();
+
Register Src;
unsigned Mods;
- std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
- if (!TM.Options.NoNaNsFPMath && !isKnownNeverNaN(Src, *MRI))
- return None;
+ std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
@@ -1950,12 +3015,16 @@ AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const {
- // FIXME: Handle clamp and op_sel
+AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
+ if (!TM.Options.NoNaNsFPMath && !isKnownNeverNaN(Src, *MRI))
+ return None;
+
return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // clamp
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
}};
}
@@ -1977,15 +3046,15 @@ AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
return None;
const GEPInfo &GEPInfo = AddrInfo[0];
-
- if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
+ Optional<int64_t> EncodedImm =
+ AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm, false);
+ if (!EncodedImm)
return None;
unsigned PtrReg = GEPInfo.SgprParts[0];
- int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }
}};
}
@@ -1998,14 +3067,15 @@ AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
return None;
const GEPInfo &GEPInfo = AddrInfo[0];
- unsigned PtrReg = GEPInfo.SgprParts[0];
- int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
- if (!isUInt<32>(EncodedImm))
+ Register PtrReg = GEPInfo.SgprParts[0];
+ Optional<int64_t> EncodedImm =
+ AMDGPU::getSMRDEncodedLiteralOffset32(STI, GEPInfo.Imm);
+ if (!EncodedImm)
return None;
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }
}};
}
@@ -2023,14 +3093,15 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
return None;
const GEPInfo &GEPInfo = AddrInfo[0];
- if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
+ // SGPR offset is unsigned.
+ if (!GEPInfo.Imm || GEPInfo.Imm < 0 || !isUInt<32>(GEPInfo.Imm))
return None;
// If we make it this far we have a load with an 32-bit immediate offset.
// It is OK to select this using a sgpr offset, because we have already
// failed trying to select this load into one of the _IMM variants since
// the _IMM Patterns are considered before the _SGPR patterns.
- unsigned PtrReg = GEPInfo.SgprParts[0];
+ Register PtrReg = GEPInfo.SgprParts[0];
Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
.addImm(GEPInfo.Imm);
@@ -2099,7 +3170,8 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
int64_t Offset = 0;
- if (mi_match(Root.getReg(), *MRI, m_ICst(Offset))) {
+ if (mi_match(Root.getReg(), *MRI, m_ICst(Offset)) &&
+ Offset != TM.getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS)) {
Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
// TODO: Should this be inside the render function? The iterator seems to
@@ -2118,17 +3190,17 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
const MachineMemOperand *MMO = *MI->memoperands_begin();
const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
- Register SOffsetReg = isStackPtrRelative(PtrInfo)
- ? Info->getStackPtrOffsetReg()
- : Info->getScratchWaveOffsetReg();
- MIB.addReg(SOffsetReg);
+ if (isStackPtrRelative(PtrInfo))
+ MIB.addReg(Info->getStackPtrOffsetReg());
+ else
+ MIB.addImm(0);
},
[=](MachineInstrBuilder &MIB) { // offset
MIB.addImm(Offset & 4095);
}}};
}
- assert(Offset == 0);
+ assert(Offset == 0 || Offset == -1);
// Try to fold a frame index directly into the MUBUF vaddr field, and any
// offsets.
@@ -2158,13 +3230,6 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
}
}
- // If we don't know this private access is a local stack object, it needs to
- // be relative to the entry point's scratch wave offset register.
- // TODO: Should split large offsets that don't fit like above.
- // TODO: Don't use scratch wave offset just because the offset didn't fit.
- Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg()
- : Info->getScratchWaveOffsetReg();
-
return {{[=](MachineInstrBuilder &MIB) { // rsrc
MIB.addReg(Info->getScratchRSrcReg());
},
@@ -2175,15 +3240,22 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
MIB.addReg(VAddr);
},
[=](MachineInstrBuilder &MIB) { // soffset
- MIB.addReg(SOffset);
+ // If we don't know this private access is a local stack object, it
+ // needs to be relative to the entry point's scratch wave offset.
+ // TODO: Should split large offsets that don't fit like above.
+ // TODO: Don't use scratch wave offset just because the offset
+ // didn't fit.
+ if (!Info->isEntryFunction() && FI.hasValue())
+ MIB.addReg(Info->getStackPtrOffsetReg());
+ else
+ MIB.addImm(0);
},
[=](MachineInstrBuilder &MIB) { // offset
MIB.addImm(Offset);
}}};
}
-bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI,
- const MachineOperand &Base,
+bool AMDGPUInstructionSelector::isDSOffsetLegal(Register Base,
int64_t Offset,
unsigned OffsetBits) const {
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
@@ -2195,7 +3267,7 @@ bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI,
// On Southern Islands instruction with a negative base value and an offset
// don't seem to work.
- return KnownBits->signBitIsZero(Base.getReg());
+ return KnownBits->signBitIsZero(Base);
}
InstructionSelector::ComplexRendererFns
@@ -2214,68 +3286,485 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
const MachineMemOperand *MMO = *MI->memoperands_begin();
const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
- Register SOffsetReg = isStackPtrRelative(PtrInfo)
- ? Info->getStackPtrOffsetReg()
- : Info->getScratchWaveOffsetReg();
return {{
- [=](MachineInstrBuilder &MIB) {
+ [=](MachineInstrBuilder &MIB) { // rsrc
MIB.addReg(Info->getScratchRSrcReg());
- }, // rsrc
- [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
+ },
+ [=](MachineInstrBuilder &MIB) { // soffset
+ if (isStackPtrRelative(PtrInfo))
+ MIB.addReg(Info->getStackPtrOffsetReg());
+ else
+ MIB.addImm(0);
+ },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
}};
}
+std::pair<Register, unsigned>
+AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const {
+ const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg());
+ if (!RootDef)
+ return std::make_pair(Root.getReg(), 0);
+
+ int64_t ConstAddr = 0;
+
+ Register PtrBase;
+ int64_t Offset;
+ std::tie(PtrBase, Offset) =
+ getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
+
+ if (Offset) {
+ if (isDSOffsetLegal(PtrBase, Offset, 16)) {
+ // (add n0, c0)
+ return std::make_pair(PtrBase, Offset);
+ }
+ } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
+ // TODO
+
+
+ } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) {
+ // TODO
+
+ }
+
+ return std::make_pair(Root.getReg(), 0);
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
+ Register Reg;
+ unsigned Offset;
+ std::tie(Reg, Offset) = selectDS1Addr1OffsetImpl(Root);
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const {
+ Register Reg;
+ unsigned Offset;
+ std::tie(Reg, Offset) = selectDS64Bit4ByteAlignedImpl(Root);
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset+1); }
+ }};
+}
+
+std::pair<Register, unsigned>
+AMDGPUInstructionSelector::selectDS64Bit4ByteAlignedImpl(MachineOperand &Root) const {
const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg());
- if (!RootDef) {
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
- }};
- }
+ if (!RootDef)
+ return std::make_pair(Root.getReg(), 0);
int64_t ConstAddr = 0;
- if (isBaseWithConstantOffset(Root, *MRI)) {
- const MachineOperand &LHS = RootDef->getOperand(1);
- const MachineOperand &RHS = RootDef->getOperand(2);
- const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg());
- const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg());
- if (LHSDef && RHSDef) {
- int64_t PossibleOffset =
- RHSDef->getOperand(1).getCImm()->getSExtValue();
- if (isDSOffsetLegal(*MRI, LHS, PossibleOffset, 16)) {
- // (add n0, c0)
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); }
- }};
- }
+
+ Register PtrBase;
+ int64_t Offset;
+ std::tie(PtrBase, Offset) =
+ getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
+
+ if (Offset) {
+ int64_t DWordOffset0 = Offset / 4;
+ int64_t DWordOffset1 = DWordOffset0 + 1;
+ if (isDSOffsetLegal(PtrBase, DWordOffset1, 8)) {
+ // (add n0, c0)
+ return std::make_pair(PtrBase, DWordOffset0);
}
} else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
+ // TODO
+ } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) {
+ // TODO
+ }
- } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) {
+ return std::make_pair(Root.getReg(), 0);
+}
+
+/// If \p Root is a G_PTR_ADD with a G_CONSTANT on the right hand side, return
+/// the base value with the constant offset. There may be intervening copies
+/// between \p Root and the identified constant. Returns \p Root, 0 if this does
+/// not match the pattern.
+std::pair<Register, int64_t>
+AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
+ Register Root, const MachineRegisterInfo &MRI) const {
+ MachineInstr *RootI = MRI.getVRegDef(Root);
+ if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
+ return {Root, 0};
+
+ MachineOperand &RHS = RootI->getOperand(2);
+ Optional<ValueAndVReg> MaybeOffset
+ = getConstantVRegValWithLookThrough(RHS.getReg(), MRI, true);
+ if (!MaybeOffset)
+ return {Root, 0};
+ return {RootI->getOperand(1).getReg(), MaybeOffset->Value};
+}
+
+static void addZeroImm(MachineInstrBuilder &MIB) {
+ MIB.addImm(0);
+}
+
+/// Return a resource descriptor for use with an arbitrary 64-bit pointer. If \p
+/// BasePtr is not valid, a null base pointer will be used.
+static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ uint32_t FormatLo, uint32_t FormatHi,
+ Register BasePtr) {
+ Register RSrc2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register RSrc3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register RSrcHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register RSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
+
+ B.buildInstr(AMDGPU::S_MOV_B32)
+ .addDef(RSrc2)
+ .addImm(FormatLo);
+ B.buildInstr(AMDGPU::S_MOV_B32)
+ .addDef(RSrc3)
+ .addImm(FormatHi);
+
+ // Build the half of the subregister with the constants before building the
+ // full 128-bit register. If we are building multiple resource descriptors,
+ // this will allow CSEing of the 2-component register.
+ B.buildInstr(AMDGPU::REG_SEQUENCE)
+ .addDef(RSrcHi)
+ .addReg(RSrc2)
+ .addImm(AMDGPU::sub0)
+ .addReg(RSrc3)
+ .addImm(AMDGPU::sub1);
+
+ Register RSrcLo = BasePtr;
+ if (!BasePtr) {
+ RSrcLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ B.buildInstr(AMDGPU::S_MOV_B64)
+ .addDef(RSrcLo)
+ .addImm(0);
+ }
+
+ B.buildInstr(AMDGPU::REG_SEQUENCE)
+ .addDef(RSrc)
+ .addReg(RSrcLo)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(RSrcHi)
+ .addImm(AMDGPU::sub2_sub3);
+
+ return RSrc;
+}
+
+static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ const SIInstrInfo &TII, Register BasePtr) {
+ uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();
+
+ // FIXME: Why are half the "default" bits ignored based on the addressing
+ // mode?
+ return buildRSRC(B, MRI, 0, Hi_32(DefaultFormat), BasePtr);
+}
+
+static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ const SIInstrInfo &TII, Register BasePtr) {
+ uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();
+
+ // FIXME: Why are half the "default" bits ignored based on the addressing
+ // mode?
+ return buildRSRC(B, MRI, -1, Hi_32(DefaultFormat), BasePtr);
+}
+
+AMDGPUInstructionSelector::MUBUFAddressData
+AMDGPUInstructionSelector::parseMUBUFAddress(Register Src) const {
+ MUBUFAddressData Data;
+ Data.N0 = Src;
+
+ Register PtrBase;
+ int64_t Offset;
+
+ std::tie(PtrBase, Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
+ if (isUInt<32>(Offset)) {
+ Data.N0 = PtrBase;
+ Data.Offset = Offset;
+ }
+
+ if (MachineInstr *InputAdd
+ = getOpcodeDef(TargetOpcode::G_PTR_ADD, Data.N0, *MRI)) {
+ Data.N2 = InputAdd->getOperand(1).getReg();
+ Data.N3 = InputAdd->getOperand(2).getReg();
+
+ // FIXME: Need to fix extra SGPR->VGPRcopies inserted
+ // FIXME: Don't know this was defined by operand 0
+ //
+ // TODO: Remove this when we have copy folding optimizations after
+ // RegBankSelect.
+ Data.N2 = getDefIgnoringCopies(Data.N2, *MRI)->getOperand(0).getReg();
+ Data.N3 = getDefIgnoringCopies(Data.N3, *MRI)->getOperand(0).getReg();
+ }
+
+ return Data;
+}
+
+/// Return if the addr64 mubuf mode should be used for the given address.
+bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const {
+ // (ptr_add N2, N3) -> addr64, or
+ // (ptr_add (ptr_add N2, N3), C1) -> addr64
+ if (Addr.N2)
+ return true;
+
+ const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
+ return N0Bank->getID() == AMDGPU::VGPRRegBankID;
+}
+/// Split an immediate offset \p ImmOffset depending on whether it fits in the
+/// immediate field. Modifies \p ImmOffset and sets \p SOffset to the variable
+/// component.
+void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
+ MachineIRBuilder &B, Register &SOffset, int64_t &ImmOffset) const {
+ if (SIInstrInfo::isLegalMUBUFImmOffset(ImmOffset))
+ return;
+
+ // Illegal offset, store it in soffset.
+ SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ B.buildInstr(AMDGPU::S_MOV_B32)
+ .addDef(SOffset)
+ .addImm(ImmOffset);
+ ImmOffset = 0;
+}
+bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
+ MachineOperand &Root, Register &VAddr, Register &RSrcReg,
+ Register &SOffset, int64_t &Offset) const {
+ // FIXME: Predicates should stop this from reaching here.
+ // addr64 bit was removed for volcanic islands.
+ if (!STI.hasAddr64() || STI.useFlatForGlobal())
+ return false;
+
+ MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());
+ if (!shouldUseAddr64(AddrData))
+ return false;
+
+ Register N0 = AddrData.N0;
+ Register N2 = AddrData.N2;
+ Register N3 = AddrData.N3;
+ Offset = AddrData.Offset;
+
+ // Base pointer for the SRD.
+ Register SRDPtr;
+
+ if (N2) {
+ if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
+ assert(N3);
+ if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
+ // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
+ // addr64, and construct the default resource from a 0 address.
+ VAddr = N0;
+ } else {
+ SRDPtr = N3;
+ VAddr = N2;
+ }
+ } else {
+ // N2 is not divergent.
+ SRDPtr = N2;
+ VAddr = N3;
+ }
+ } else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
+ // Use the default null pointer in the resource
+ VAddr = N0;
+ } else {
+ // N0 -> offset, or
+ // (N0 + C1) -> offset
+ SRDPtr = N0;
}
+ MachineIRBuilder B(*Root.getParent());
+ RSrcReg = buildAddr64RSrc(B, *MRI, TII, SRDPtr);
+ splitIllegalMUBUFOffset(B, SOffset, Offset);
+ return true;
+}
+
+bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
+ MachineOperand &Root, Register &RSrcReg, Register &SOffset,
+ int64_t &Offset) const {
+ MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());
+ if (shouldUseAddr64(AddrData))
+ return false;
+
+ // N0 -> offset, or
+ // (N0 + C1) -> offset
+ Register SRDPtr = AddrData.N0;
+ Offset = AddrData.Offset;
+
+ // TODO: Look through extensions for 32-bit soffset.
+ MachineIRBuilder B(*Root.getParent());
+
+ RSrcReg = buildOffsetSrc(B, *MRI, TII, SRDPtr);
+ splitIllegalMUBUFOffset(B, SOffset, Offset);
+ return true;
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
+ Register VAddr;
+ Register RSrcReg;
+ Register SOffset;
+ int64_t Offset = 0;
+
+ if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset, Offset))
+ return {};
+
+ // FIXME: Use defaulted operands for trailing 0s and remove from the complex
+ // pattern.
return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
+ [=](MachineInstrBuilder &MIB) { // rsrc
+ MIB.addReg(RSrcReg);
+ },
+ [=](MachineInstrBuilder &MIB) { // vaddr
+ MIB.addReg(VAddr);
+ },
+ [=](MachineInstrBuilder &MIB) { // soffset
+ if (SOffset)
+ MIB.addReg(SOffset);
+ else
+ MIB.addImm(0);
+ },
+ [=](MachineInstrBuilder &MIB) { // offset
+ MIB.addImm(Offset);
+ },
+ addZeroImm, // glc
+ addZeroImm, // slc
+ addZeroImm, // tfe
+ addZeroImm, // dlc
+ addZeroImm // swz
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
+ Register RSrcReg;
+ Register SOffset;
+ int64_t Offset = 0;
+
+ if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset, Offset))
+ return {};
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { // rsrc
+ MIB.addReg(RSrcReg);
+ },
+ [=](MachineInstrBuilder &MIB) { // soffset
+ if (SOffset)
+ MIB.addReg(SOffset);
+ else
+ MIB.addImm(0);
+ },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
+ addZeroImm, // glc
+ addZeroImm, // slc
+ addZeroImm, // tfe
+ addZeroImm, // dlc
+ addZeroImm // swz
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectMUBUFAddr64Atomic(MachineOperand &Root) const {
+ Register VAddr;
+ Register RSrcReg;
+ Register SOffset;
+ int64_t Offset = 0;
+
+ if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset, Offset))
+ return {};
+
+ // FIXME: Use defaulted operands for trailing 0s and remove from the complex
+ // pattern.
+ return {{
+ [=](MachineInstrBuilder &MIB) { // rsrc
+ MIB.addReg(RSrcReg);
+ },
+ [=](MachineInstrBuilder &MIB) { // vaddr
+ MIB.addReg(VAddr);
+ },
+ [=](MachineInstrBuilder &MIB) { // soffset
+ if (SOffset)
+ MIB.addReg(SOffset);
+ else
+ MIB.addImm(0);
+ },
+ [=](MachineInstrBuilder &MIB) { // offset
+ MIB.addImm(Offset);
+ },
+ addZeroImm // slc
}};
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
+ Register RSrcReg;
+ Register SOffset;
+ int64_t Offset = 0;
+
+ if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset, Offset))
+ return {};
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { // rsrc
+ MIB.addReg(RSrcReg);
+ },
+ [=](MachineInstrBuilder &MIB) { // soffset
+ if (SOffset)
+ MIB.addReg(SOffset);
+ else
+ MIB.addImm(0);
+ },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
+ addZeroImm // slc
+ }};
+}
+
+/// Get an immediate that must be 32-bits, and treated as zero extended.
+static Optional<uint64_t> getConstantZext32Val(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ // getConstantVRegVal sexts any values, so see if that matters.
+ Optional<int64_t> OffsetVal = getConstantVRegVal(Reg, MRI);
+ if (!OffsetVal || !isInt<32>(*OffsetVal))
+ return None;
+ return Lo_32(*OffsetVal);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const {
+ Optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
+ if (!OffsetVal)
+ return {};
+
+ Optional<int64_t> EncodedImm =
+ AMDGPU::getSMRDEncodedOffset(STI, *OffsetVal, true);
+ if (!EncodedImm)
+ return {};
+
+ return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const {
+ assert(STI.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
+
+ Optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
+ if (!OffsetVal)
+ return {};
+
+ Optional<int64_t> EncodedImm
+ = AMDGPU::getSMRDEncodedLiteralOffset32(STI, *OffsetVal);
+ if (!EncodedImm)
+ return {};
+
+ return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }};
+}
+
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
- Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), *MRI);
- assert(CstVal && "Expected constant value");
- MIB.addImm(CstVal.getValue());
+ MIB.addImm(MI.getOperand(1).getCImm()->getSExtValue());
}
void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,
@@ -2316,6 +3805,34 @@ void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
MIB.addImm(MI.getOperand(OpIdx).getImm());
}
+void AMDGPUInstructionSelector::renderExtractGLC(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(OpIdx >= 0 && "expected to match an immediate operand");
+ MIB.addImm(MI.getOperand(OpIdx).getImm() & 1);
+}
+
+void AMDGPUInstructionSelector::renderExtractSLC(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(OpIdx >= 0 && "expected to match an immediate operand");
+ MIB.addImm((MI.getOperand(OpIdx).getImm() >> 1) & 1);
+}
+
+void AMDGPUInstructionSelector::renderExtractDLC(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(OpIdx >= 0 && "expected to match an immediate operand");
+ MIB.addImm((MI.getOperand(OpIdx).getImm() >> 2) & 1);
+}
+
+void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(OpIdx >= 0 && "expected to match an immediate operand");
+ MIB.addImm((MI.getOperand(OpIdx).getImm() >> 3) & 1);
+}
+
bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const {
return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 38ca7fd4104b..1fe80958917d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -31,6 +31,10 @@ namespace {
namespace llvm {
+namespace AMDGPU {
+struct ImageDimIntrinsicInfo;
+}
+
class AMDGPUInstrInfo;
class AMDGPURegisterBankInfo;
class GCNSubtarget;
@@ -80,28 +84,39 @@ private:
MachineOperand getSubOperand64(MachineOperand &MO,
const TargetRegisterClass &SubRC,
unsigned SubIdx) const;
+
+ bool constrainCopyLikeIntrin(MachineInstr &MI, unsigned NewOpc) const;
bool selectCOPY(MachineInstr &I) const;
bool selectPHI(MachineInstr &I) const;
bool selectG_TRUNC(MachineInstr &I) const;
bool selectG_SZA_EXT(MachineInstr &I) const;
bool selectG_CONSTANT(MachineInstr &I) const;
+ bool selectG_FNEG(MachineInstr &I) const;
+ bool selectG_FABS(MachineInstr &I) const;
bool selectG_AND_OR_XOR(MachineInstr &I) const;
bool selectG_ADD_SUB(MachineInstr &I) const;
bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const;
bool selectG_EXTRACT(MachineInstr &I) const;
bool selectG_MERGE_VALUES(MachineInstr &I) const;
bool selectG_UNMERGE_VALUES(MachineInstr &I) const;
+ bool selectG_BUILD_VECTOR_TRUNC(MachineInstr &I) const;
bool selectG_PTR_ADD(MachineInstr &I) const;
bool selectG_IMPLICIT_DEF(MachineInstr &I) const;
bool selectG_INSERT(MachineInstr &I) const;
- bool selectG_INTRINSIC(MachineInstr &I) const;
- std::tuple<Register, unsigned, unsigned>
- splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const;
+ bool selectInterpP1F16(MachineInstr &MI) const;
+ bool selectDivScale(MachineInstr &MI) const;
+ bool selectIntrinsicIcmp(MachineInstr &MI) const;
+ bool selectBallot(MachineInstr &I) const;
+ bool selectG_INTRINSIC(MachineInstr &I) const;
- bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
+ bool selectEndCfIntrinsic(MachineInstr &MI) const;
bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
+ bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
+ bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
+ bool selectImageIntrinsic(MachineInstr &MI,
+ const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
bool selectG_ICMP(MachineInstr &I) const;
@@ -112,15 +127,18 @@ private:
void initM0(MachineInstr &I) const;
bool selectG_LOAD_ATOMICRMW(MachineInstr &I) const;
+ bool selectG_AMDGPU_ATOMIC_CMPXCHG(MachineInstr &I) const;
bool selectG_STORE(MachineInstr &I) const;
bool selectG_SELECT(MachineInstr &I) const;
bool selectG_BRCOND(MachineInstr &I) const;
- bool selectG_FRAME_INDEX(MachineInstr &I) const;
- bool selectG_PTR_MASK(MachineInstr &I) const;
+ bool selectG_FRAME_INDEX_GLOBAL_VALUE(MachineInstr &I) const;
+ bool selectG_PTRMASK(MachineInstr &I) const;
bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const;
+ bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const;
+ bool selectG_SHUFFLE_VECTOR(MachineInstr &I) const;
std::pair<Register, unsigned>
- selectVOP3ModsImpl(Register Src) const;
+ selectVOP3ModsImpl(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVCSRC(MachineOperand &Root) const;
@@ -134,11 +152,18 @@ private:
selectVOP3OMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3Mods(MachineOperand &Root) const;
+
+ ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const;
+
InstructionSelector::ComplexRendererFns
selectVOP3Mods_nnan(MachineOperand &Root) const;
+ std::pair<Register, unsigned>
+ selectVOP3PModsImpl(Register Src, const MachineRegisterInfo &MRI) const;
+
InstructionSelector::ComplexRendererFns
- selectVOP3OpSelMods0(MachineOperand &Root) const;
+ selectVOP3PMods(MachineOperand &Root) const;
+
InstructionSelector::ComplexRendererFns
selectVOP3OpSelMods(MachineOperand &Root) const;
@@ -163,19 +188,86 @@ private:
InstructionSelector::ComplexRendererFns
selectMUBUFScratchOffset(MachineOperand &Root) const;
- bool isDSOffsetLegal(const MachineRegisterInfo &MRI,
- const MachineOperand &Base,
- int64_t Offset, unsigned OffsetBits) const;
+ bool isDSOffsetLegal(Register Base, int64_t Offset,
+ unsigned OffsetBits) const;
+ std::pair<Register, unsigned>
+ selectDS1Addr1OffsetImpl(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectDS1Addr1Offset(MachineOperand &Root) const;
+ std::pair<Register, unsigned>
+ selectDS64Bit4ByteAlignedImpl(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectDS64Bit4ByteAligned(MachineOperand &Root) const;
+
+ std::pair<Register, int64_t>
+ getPtrBaseWithConstantOffset(Register Root,
+ const MachineRegisterInfo &MRI) const;
+
+ // Parse out a chain of up to two g_ptr_add instructions.
+ // g_ptr_add (n0, _)
+ // g_ptr_add (n0, (n1 = g_ptr_add n2, n3))
+ struct MUBUFAddressData {
+ Register N0, N2, N3;
+ int64_t Offset = 0;
+ };
+
+ bool shouldUseAddr64(MUBUFAddressData AddrData) const;
+
+ void splitIllegalMUBUFOffset(MachineIRBuilder &B,
+ Register &SOffset, int64_t &ImmOffset) const;
+
+ MUBUFAddressData parseMUBUFAddress(Register Src) const;
+
+ bool selectMUBUFAddr64Impl(MachineOperand &Root, Register &VAddr,
+ Register &RSrcReg, Register &SOffset,
+ int64_t &Offset) const;
+
+ bool selectMUBUFOffsetImpl(MachineOperand &Root, Register &RSrcReg,
+ Register &SOffset, int64_t &Offset) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectMUBUFAddr64(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectMUBUFOffset(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectMUBUFOffsetAtomic(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectMUBUFAddr64Atomic(MachineOperand &Root) const;
+
+ ComplexRendererFns selectSMRDBufferImm(MachineOperand &Root) const;
+ ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const;
+
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
+ void renderTruncTImm1(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const {
+ renderTruncTImm(MIB, MI, OpIdx);
+ }
+
+ void renderTruncTImm8(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const {
+ renderTruncTImm(MIB, MI, OpIdx);
+ }
+
+ void renderTruncTImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const {
+ renderTruncTImm(MIB, MI, OpIdx);
+ }
+
+ void renderTruncTImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const {
+ renderTruncTImm(MIB, MI, OpIdx);
+ }
+
void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
@@ -184,6 +276,14 @@ private:
void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
+ void renderExtractGLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderExtractSLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderExtractDLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderExtractSWZ(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
bool isInlineImmediate16(int64_t Imm) const;
bool isInlineImmediate32(int64_t Imm) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 7e71dbdd1240..5cb7ac320d2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -77,6 +77,9 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
def TruePredicate : Predicate<"">;
+// FIXME: Tablegen should specially supports this
+def FalsePredicate : Predicate<"false">;
+
// Add a predicate to the list if does not already exist to deduplicate it.
class PredConcat<list<Predicate> lst, Predicate pred> {
list<Predicate> ret =
@@ -101,12 +104,12 @@ class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
PredicateControl;
let RecomputePerFunction = 1 in {
-def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
-def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals">;
-def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
-def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
-def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals">;
-def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals">;
+def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
+def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
+def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
+def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
+def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
+def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
}
@@ -408,7 +411,12 @@ def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
let IsAtomic = 1;
let MemoryVT = i64;
}
+} // End let AddressSpaces
+} // End foreach as
+
+foreach as = [ "global", "flat", "local", "private", "region" ] in {
+let AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
def store_#as : PatFrag<(ops node:$val, node:$ptr),
(unindexedstore node:$val, node:$ptr)> {
let IsStore = 1;
@@ -444,8 +452,8 @@ def truncstorei16_hi16_#as : StoreHi16<truncstorei16>;
defm atomic_store_#as : binary_atomic_op<atomic_store>;
-} // End let AddressSpaces = ...
-} // End foreach AddrSpace
+} // End let AddressSpaces
+} // End foreach as
multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
@@ -520,7 +528,7 @@ class Constants {
int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
-int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
+int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9
int FP16_ONE = 0x3C00;
int FP16_NEG_ONE = 0xBC00;
int FP32_ONE = 0x3f800000;
@@ -731,6 +739,12 @@ multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
>;
}
+// fshr pattern
+class FSHRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
+ (fshr i32:$src0, i32:$src1, i32:$src2),
+ (BIT_ALIGN $src0, $src1, $src2)
+>;
+
// rotr pattern
class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
(rotr i32:$src0, i32:$src1),
@@ -796,3 +810,13 @@ def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
[(fmaxnum_ieee_oneuse node:$src0, node:$src1),
(fmaxnum_oneuse node:$src0, node:$src1)]
>;
+
+def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(fmad node:$src0, node:$src1, node:$src2),
+ (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
+>;
+
+// FIXME: fsqrt should not select directly
+def any_amdgcn_sqrt : PatFrags<(ops node:$src0),
+ [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)]
+>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 3f99d5cfb7f9..2976794b49c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -11,19 +11,16 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
-#if defined(_MSC_VER) || defined(__MINGW32__)
-// According to Microsoft, one must set _USE_MATH_DEFINES in order to get M_PI
-// from the Visual C++ cmath / math.h headers:
-// https://docs.microsoft.com/en-us/cpp/c-runtime-library/math-constants?view=vs-2019
-#define _USE_MATH_DEFINES
-#endif
+#include "AMDGPULegalizerInfo.h"
#include "AMDGPU.h"
-#include "AMDGPULegalizerInfo.h"
+#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPUTargetMachine.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
@@ -37,21 +34,30 @@ using namespace llvm;
using namespace LegalizeActions;
using namespace LegalizeMutations;
using namespace LegalityPredicates;
-
-
-static LegalityPredicate isMultiple32(unsigned TypeIdx,
- unsigned MaxSize = 1024) {
- return [=](const LegalityQuery &Query) {
- const LLT Ty = Query.Types[TypeIdx];
- const LLT EltTy = Ty.getScalarType();
- return Ty.getSizeInBits() <= MaxSize && EltTy.getSizeInBits() % 32 == 0;
- };
+using namespace MIPatternMatch;
+
+// Hack until load/store selection patterns support any tuple of legal types.
+static cl::opt<bool> EnableNewLegality(
+ "amdgpu-global-isel-new-legality",
+ cl::desc("Use GlobalISel desired legality, rather than try to use"
+ "rules compatible with selection patterns"),
+ cl::init(false),
+ cl::ReallyHidden);
+
+static constexpr unsigned MaxRegisterSize = 1024;
+
+// Round the number of elements to the next power of two elements
+static LLT getPow2VectorType(LLT Ty) {
+ unsigned NElts = Ty.getNumElements();
+ unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts);
+ return Ty.changeNumElements(Pow2NElts);
}
-static LegalityPredicate sizeIs(unsigned TypeIdx, unsigned Size) {
- return [=](const LegalityQuery &Query) {
- return Query.Types[TypeIdx].getSizeInBits() == Size;
- };
+// Round the number of bits to the next power of two bits
+static LLT getPow2ScalarType(LLT Ty) {
+ unsigned Bits = Ty.getSizeInBits();
+ unsigned Pow2Bits = 1 << Log2_32_Ceil(Bits);
+ return LLT::scalar(Pow2Bits);
}
static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
@@ -109,6 +115,23 @@ static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
};
}
+static LegalizeMutation bitcastToRegisterType(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ unsigned Size = Ty.getSizeInBits();
+
+ LLT CoercedTy;
+ if (Size <= 32) {
+ // <2 x s8> -> s16
+ // <4 x s8> -> s32
+ CoercedTy = LLT::scalar(Size);
+ } else
+ CoercedTy = LLT::scalarOrVector(Size / 32, 32);
+
+ return std::make_pair(TypeIdx, CoercedTy);
+ };
+}
+
static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
@@ -130,25 +153,47 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
};
}
-// Any combination of 32 or 64-bit elements up to 1024 bits, and multiples of
-// v2s16.
+static bool isRegisterSize(unsigned Size) {
+ return Size % 32 == 0 && Size <= MaxRegisterSize;
+}
+
+static bool isRegisterVectorElementType(LLT EltTy) {
+ const int EltSize = EltTy.getSizeInBits();
+ return EltSize == 16 || EltSize % 32 == 0;
+}
+
+static bool isRegisterVectorType(LLT Ty) {
+ const int EltSize = Ty.getElementType().getSizeInBits();
+ return EltSize == 32 || EltSize == 64 ||
+ (EltSize == 16 && Ty.getNumElements() % 2 == 0) ||
+ EltSize == 128 || EltSize == 256;
+}
+
+static bool isRegisterType(LLT Ty) {
+ if (!isRegisterSize(Ty.getSizeInBits()))
+ return false;
+
+ if (Ty.isVector())
+ return isRegisterVectorType(Ty);
+
+ return true;
+}
+
+// Any combination of 32 or 64-bit elements up the maximum register size, and
+// multiples of v2s16.
static LegalityPredicate isRegisterType(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
- const LLT Ty = Query.Types[TypeIdx];
- if (Ty.isVector()) {
- const int EltSize = Ty.getElementType().getSizeInBits();
- return EltSize == 32 || EltSize == 64 ||
- (EltSize == 16 && Ty.getNumElements() % 2 == 0) ||
- EltSize == 128 || EltSize == 256;
- }
-
- return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 1024;
+ return isRegisterType(Query.Types[TypeIdx]);
};
}
-static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) {
+static LegalityPredicate elementTypeIsLegal(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
- return Query.Types[TypeIdx].getElementType() == Type;
+ const LLT QueryTy = Query.Types[TypeIdx];
+ if (!QueryTy.isVector())
+ return false;
+ const LLT EltTy = QueryTy.getElementType();
+ return EltTy == LLT::scalar(16) || EltTy.getSizeInBits() >= 32;
};
}
@@ -160,6 +205,120 @@ static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) {
};
}
+// TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
+// handle some operations by just promoting the register during
+// selection. There are also d16 loads on GFX9+ which preserve the high bits.
+static unsigned maxSizeForAddrSpace(const GCNSubtarget &ST, unsigned AS,
+ bool IsLoad) {
+ switch (AS) {
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ // FIXME: Private element size.
+ return 32;
+ case AMDGPUAS::LOCAL_ADDRESS:
+ return ST.useDS128() ? 128 : 64;
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ case AMDGPUAS::CONSTANT_ADDRESS:
+ case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
+ // Treat constant and global as identical. SMRD loads are sometimes usable for
+ // global loads (ideally constant address space should be eliminated)
+ // depending on the context. Legality cannot be context dependent, but
+ // RegBankSelect can split the load as necessary depending on the pointer
+ // register bank/uniformity and if the memory is invariant or not written in a
+ // kernel.
+ return IsLoad ? 512 : 128;
+ default:
+ // Flat addresses may contextually need to be split to 32-bit parts if they
+ // may alias scratch depending on the subtarget.
+ return 128;
+ }
+}
+
+static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
+ const LegalityQuery &Query,
+ unsigned Opcode) {
+ const LLT Ty = Query.Types[0];
+
+ // Handle G_LOAD, G_ZEXTLOAD, G_SEXTLOAD
+ const bool IsLoad = Opcode != AMDGPU::G_STORE;
+
+ unsigned RegSize = Ty.getSizeInBits();
+ unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+ unsigned Align = Query.MMODescrs[0].AlignInBits;
+ unsigned AS = Query.Types[1].getAddressSpace();
+
+ // All of these need to be custom lowered to cast the pointer operand.
+ if (AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
+ return false;
+
+ // TODO: We should be able to widen loads if the alignment is high enough, but
+ // we also need to modify the memory access size.
+#if 0
+ // Accept widening loads based on alignment.
+ if (IsLoad && MemSize < Size)
+ MemSize = std::max(MemSize, Align);
+#endif
+
+ // Only 1-byte and 2-byte to 32-bit extloads are valid.
+ if (MemSize != RegSize && RegSize != 32)
+ return false;
+
+ if (MemSize > maxSizeForAddrSpace(ST, AS, IsLoad))
+ return false;
+
+ switch (MemSize) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ break;
+ case 96:
+ if (!ST.hasDwordx3LoadStores())
+ return false;
+ break;
+ case 256:
+ case 512:
+ // These may contextually need to be broken down.
+ break;
+ default:
+ return false;
+ }
+
+ assert(RegSize >= MemSize);
+
+ if (Align < MemSize) {
+ const SITargetLowering *TLI = ST.getTargetLowering();
+ if (!TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8))
+ return false;
+ }
+
+ return true;
+}
+
+// The current selector can't handle <6 x s16>, <8 x s16>, s96, s128 etc, so
+// workaround this. Eventually it should ignore the type for loads and only care
+// about the size. Return true in cases where we will workaround this for now by
+// bitcasting.
+static bool loadStoreBitcastWorkaround(const LLT Ty) {
+ if (EnableNewLegality)
+ return false;
+
+ const unsigned Size = Ty.getSizeInBits();
+ if (Size <= 64)
+ return false;
+ if (!Ty.isVector())
+ return true;
+ unsigned EltSize = Ty.getElementType().getSizeInBits();
+ return EltSize != 32 && EltSize != 64;
+}
+
+static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query,
+ unsigned Opcode) {
+ const LLT Ty = Query.Types[0];
+ return isRegisterType(Ty) && isLoadStoreSizeLegal(ST, Query, Opcode) &&
+ !loadStoreBitcastWorkaround(Ty);
+}
+
AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const GCNTargetMachine &TM)
: ST(ST_) {
@@ -170,14 +329,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
};
const LLT S1 = LLT::scalar(1);
- const LLT S8 = LLT::scalar(8);
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
- const LLT S96 = LLT::scalar(96);
const LLT S128 = LLT::scalar(128);
const LLT S256 = LLT::scalar(256);
- const LLT S1024 = LLT::scalar(1024);
+ const LLT S512 = LLT::scalar(512);
+ const LLT MaxScalar = LLT::scalar(MaxRegisterSize);
const LLT V2S16 = LLT::vector(2, 16);
const LLT V4S16 = LLT::vector(4, 16);
@@ -244,6 +402,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
S32, S64, S16, V2S16
};
+ const LLT MinScalarFPTy = ST.has16BitInsts() ? S16 : S32;
+
setAction({G_BRCOND, S1}, Legal); // VCC branches
setAction({G_BRCOND, S32}, Legal); // SCC branches
@@ -261,11 +421,19 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.legalIf(isPointer(0));
- if (ST.has16BitInsts()) {
+ if (ST.hasVOP3PInsts()) {
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
+ .legalFor({S32, S16, V2S16})
+ .clampScalar(0, S16, S32)
+ .clampMaxNumElements(0, S16, 2)
+ .scalarize(0)
+ .widenScalarToNextPow2(0, 32);
+ } else if (ST.has16BitInsts()) {
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32, S16})
.clampScalar(0, S16, S32)
- .scalarize(0);
+ .scalarize(0)
+ .widenScalarToNextPow2(0, 32);
} else {
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32})
@@ -275,7 +443,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// FIXME: Not really legal. Placeholder for custom lowering.
getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_SREM, G_UREM})
- .legalFor({S32, S64})
+ .customFor({S32, S64})
.clampScalar(0, S32, S64)
.widenScalarToNextPow2(0, 32)
.scalarize(0);
@@ -298,35 +466,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder({G_UADDO, G_USUBO,
G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
.legalFor({{S32, S1}, {S32, S32}})
- .clampScalar(0, S32, S32)
- .scalarize(0); // TODO: Implement.
-
- getActionDefinitionsBuilder({G_SADDO, G_SSUBO})
+ .minScalar(0, S32)
+ // TODO: .scalarize(0)
.lower();
getActionDefinitionsBuilder(G_BITCAST)
// Don't worry about the size constraint.
.legalIf(all(isRegisterType(0), isRegisterType(1)))
- // FIXME: Testing hack
- .legalForCartesianProduct({S16, LLT::vector(2, 8), });
-
- getActionDefinitionsBuilder(G_FCONSTANT)
- .legalFor({S32, S64, S16})
- .clampScalar(0, S16, S64);
-
- getActionDefinitionsBuilder(G_IMPLICIT_DEF)
- .legalFor({S1, S32, S64, S16, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
- ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
- .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
- .clampScalarOrElt(0, S32, S1024)
- .legalIf(isMultiple32(0))
- .widenScalarToNextPow2(0, 32)
- .clampMaxNumElements(0, S32, 16);
+ .lower();
- // FIXME: i1 operands to intrinsics should always be legal, but other i1
- // values may not be legal. We need to figure out how to distinguish
- // between these two scenarios.
getActionDefinitionsBuilder(G_CONSTANT)
.legalFor({S1, S32, S64, S16, GlobalPtr,
LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
@@ -334,10 +483,31 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.widenScalarToNextPow2(0)
.legalIf(isPointer(0));
+ getActionDefinitionsBuilder(G_FCONSTANT)
+ .legalFor({S32, S64, S16})
+ .clampScalar(0, S16, S64);
+
+ getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
+ .legalIf(isRegisterType(0))
+ // s1 and s16 are special cases because they have legal operations on
+ // them, but don't really occupy registers in the normal way.
+ .legalFor({S1, S16})
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .clampScalarOrElt(0, S32, MaxScalar)
+ .widenScalarToNextPow2(0, 32)
+ .clampMaxNumElements(0, S32, 16);
+
setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
- getActionDefinitionsBuilder(G_GLOBAL_VALUE)
- .customFor({LocalPtr, GlobalPtr, ConstantPtr, Constant32Ptr});
+ // If the amount is divergent, we have to do a wave reduction to get the
+ // maximum value, so this is expanded during RegBankSelect.
+ getActionDefinitionsBuilder(G_DYN_STACKALLOC)
+ .legalFor({{PrivatePtr, S32}});
+
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE)
+ .unsupportedFor({PrivatePtr})
+ .custom();
+ setAction({G_BLOCK_ADDR, CodePtr}, Legal);
auto &FPOpActions = getActionDefinitionsBuilder(
{ G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE})
@@ -397,33 +567,41 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.clampScalar(0, S16, S64);
- // TODO: Implement
- getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
-
if (ST.has16BitInsts()) {
getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR})
.legalFor({S32, S64, S16})
.scalarize(0)
.clampScalar(0, S16, S64);
} else {
- getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR})
+ getActionDefinitionsBuilder(G_FSQRT)
.legalFor({S32, S64})
.scalarize(0)
.clampScalar(0, S32, S64);
+
+ if (ST.hasFractBug()) {
+ getActionDefinitionsBuilder(G_FFLOOR)
+ .customFor({S64})
+ .legalFor({S32, S64})
+ .scalarize(0)
+ .clampScalar(0, S32, S64);
+ } else {
+ getActionDefinitionsBuilder(G_FFLOOR)
+ .legalFor({S32, S64})
+ .scalarize(0)
+ .clampScalar(0, S32, S64);
+ }
}
getActionDefinitionsBuilder(G_FPTRUNC)
.legalFor({{S32, S64}, {S16, S32}})
- .scalarize(0);
+ .scalarize(0)
+ .lower();
getActionDefinitionsBuilder(G_FPEXT)
.legalFor({{S64, S32}, {S32, S16}})
.lowerFor({{S64, S16}}) // FIXME: Implement
.scalarize(0);
- // TODO: Verify V_BFI_B32 is generated from expanded bit ops.
- getActionDefinitionsBuilder(G_FCOPYSIGN).lower();
-
getActionDefinitionsBuilder(G_FSUB)
// Use actual fsub instruction
.legalFor({S32})
@@ -434,22 +612,32 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Whether this is legal depends on the floating point mode for the function.
auto &FMad = getActionDefinitionsBuilder(G_FMAD);
- if (ST.hasMadF16())
+ if (ST.hasMadF16() && ST.hasMadMacF32Insts())
FMad.customFor({S32, S16});
- else
+ else if (ST.hasMadMacF32Insts())
FMad.customFor({S32});
+ else if (ST.hasMadF16())
+ FMad.customFor({S16});
FMad.scalarize(0)
.lower();
+ // TODO: Do we need to clamp maximum bitwidth?
+ getActionDefinitionsBuilder(G_TRUNC)
+ .legalIf(isScalar(0))
+ .legalFor({{V2S16, V2S32}})
+ .clampMaxNumElements(0, S16, 2)
+ // Avoid scalarizing in cases that should be truly illegal. In unresolvable
+ // situations (like an invalid implicit use), we don't want to infinite loop
+ // in the legalizer.
+ .fewerElementsIf(elementTypeIsLegal(0), LegalizeMutations::scalarize(0))
+ .alwaysLegal();
+
getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
.legalFor({{S64, S32}, {S32, S16}, {S64, S16},
- {S32, S1}, {S64, S1}, {S16, S1},
- {S96, S32},
- // FIXME: Hack
- {S64, LLT::scalar(33)},
- {S32, S8}, {S32, LLT::scalar(24)}})
+ {S32, S1}, {S64, S1}, {S16, S1}})
.scalarize(0)
- .clampScalar(0, S32, S64);
+ .clampScalar(0, S32, S64)
+ .widenScalarToNextPow2(1, 32);
// TODO: Split s1->s64 during regbankselect for VALU.
auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
@@ -460,17 +648,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.has16BitInsts())
IToFP.legalFor({{S16, S16}});
IToFP.clampScalar(1, S32, S64)
- .scalarize(0);
+ .scalarize(0)
+ .widenScalarToNextPow2(1);
auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
- .legalFor({{S32, S32}, {S32, S64}, {S32, S16}});
+ .legalFor({{S32, S32}, {S32, S64}, {S32, S16}})
+ .customFor({{S64, S64}});
if (ST.has16BitInsts())
FPToI.legalFor({{S16, S16}});
else
FPToI.minScalar(1, S32);
FPToI.minScalar(0, S32)
- .scalarize(0);
+ .scalarize(0)
+ .lower();
getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
.scalarize(0)
@@ -494,16 +685,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
}
+ // FIXME: Clamp offset operand.
getActionDefinitionsBuilder(G_PTR_ADD)
- .legalForCartesianProduct(AddrSpaces64, {S64})
- .legalForCartesianProduct(AddrSpaces32, {S32})
+ .legalIf(isPointer(0))
.scalarize(0);
- getActionDefinitionsBuilder(G_PTR_MASK)
- .scalarize(0)
- .alwaysLegal();
-
- setAction({G_BLOCK_ADDR, CodePtr}, Legal);
+ getActionDefinitionsBuilder(G_PTRMASK)
+ .legalIf(typeInSet(1, {S64, S32}))
+ .minScalar(1, S32)
+ .maxScalarIf(sizeIs(0, 32), 1, S32)
+ .maxScalarIf(sizeIs(0, 64), 1, S64)
+ .scalarize(0);
auto &CmpBuilder =
getActionDefinitionsBuilder(G_ICMP)
@@ -537,16 +729,45 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(1, S32, S64)
.scalarize(0);
- // FIXME: fexp, flog2, flog10 needs to be custom lowered.
- getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2,
- G_FLOG, G_FLOG2, G_FLOG10})
- .legalFor({S32})
- .scalarize(0);
+ // FIXME: fpow has a selection pattern that should move to custom lowering.
+ auto &Exp2Ops = getActionDefinitionsBuilder({G_FEXP2, G_FLOG2});
+ if (ST.has16BitInsts())
+ Exp2Ops.legalFor({S32, S16});
+ else
+ Exp2Ops.legalFor({S32});
+ Exp2Ops.clampScalar(0, MinScalarFPTy, S32);
+ Exp2Ops.scalarize(0);
+
+ auto &ExpOps = getActionDefinitionsBuilder({G_FEXP, G_FLOG, G_FLOG10, G_FPOW});
+ if (ST.has16BitInsts())
+ ExpOps.customFor({{S32}, {S16}});
+ else
+ ExpOps.customFor({S32});
+ ExpOps.clampScalar(0, MinScalarFPTy, S32)
+ .scalarize(0);
+
+ // The 64-bit versions produce 32-bit results, but only on the SALU.
+ getActionDefinitionsBuilder(G_CTPOP)
+ .legalFor({{S32, S32}, {S32, S64}})
+ .clampScalar(0, S32, S32)
+ .clampScalar(1, S32, S64)
+ .scalarize(0)
+ .widenScalarToNextPow2(0, 32)
+ .widenScalarToNextPow2(1, 32);
+
+ // The hardware instructions return a different result on 0 than the generic
+ // instructions expect. The hardware produces -1, but these produce the
+ // bitwidth.
+ getActionDefinitionsBuilder({G_CTLZ, G_CTTZ})
+ .scalarize(0)
+ .clampScalar(0, S32, S32)
+ .clampScalar(1, S32, S64)
+ .widenScalarToNextPow2(0, 32)
+ .widenScalarToNextPow2(1, 32)
+ .lower();
// The 64-bit versions produce 32-bit results, but only on the SALU.
- getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
- G_CTTZ, G_CTTZ_ZERO_UNDEF,
- G_CTPOP})
+ getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF})
.legalFor({{S32, S32}, {S32, S64}})
.clampScalar(0, S32, S32)
.clampScalar(1, S32, S64)
@@ -554,50 +775,58 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.widenScalarToNextPow2(0, 32)
.widenScalarToNextPow2(1, 32);
- // TODO: Expand for > s32
- getActionDefinitionsBuilder({G_BSWAP, G_BITREVERSE})
+ getActionDefinitionsBuilder(G_BITREVERSE)
.legalFor({S32})
.clampScalar(0, S32, S32)
.scalarize(0);
if (ST.has16BitInsts()) {
+ getActionDefinitionsBuilder(G_BSWAP)
+ .legalFor({S16, S32, V2S16})
+ .clampMaxNumElements(0, S16, 2)
+ // FIXME: Fixing non-power-of-2 before clamp is workaround for
+ // narrowScalar limitation.
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, S16, S32)
+ .scalarize(0);
+
if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
.legalFor({S32, S16, V2S16})
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.clampMaxNumElements(0, S16, 2)
- .clampScalar(0, S16, S32)
+ .minScalar(0, S16)
.widenScalarToNextPow2(0)
- .scalarize(0);
+ .scalarize(0)
+ .lower();
} else {
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
.legalFor({S32, S16})
.widenScalarToNextPow2(0)
- .clampScalar(0, S16, S32)
- .scalarize(0);
+ .minScalar(0, S16)
+ .scalarize(0)
+ .lower();
}
} else {
+ // TODO: Should have same legality without v_perm_b32
+ getActionDefinitionsBuilder(G_BSWAP)
+ .legalFor({S32})
+ .lowerIf(scalarNarrowerThan(0, 32))
+ // FIXME: Fixing non-power-of-2 before clamp is workaround for
+ // narrowScalar limitation.
+ .widenScalarToNextPow2(0)
+ .maxScalar(0, S32)
+ .scalarize(0)
+ .lower();
+
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
.legalFor({S32})
- .clampScalar(0, S32, S32)
+ .minScalar(0, S32)
.widenScalarToNextPow2(0)
- .scalarize(0);
+ .scalarize(0)
+ .lower();
}
- auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
- return [=](const LegalityQuery &Query) {
- return Query.Types[TypeIdx0].getSizeInBits() <
- Query.Types[TypeIdx1].getSizeInBits();
- };
- };
-
- auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
- return [=](const LegalityQuery &Query) {
- return Query.Types[TypeIdx0].getSizeInBits() >
- Query.Types[TypeIdx1].getSizeInBits();
- };
- };
-
getActionDefinitionsBuilder(G_INTTOPTR)
// List the common cases
.legalForCartesianProduct(AddrSpaces64, {S64})
@@ -609,7 +838,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
[](const LegalityQuery &Query) {
return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
})
- .narrowScalarIf(greaterThan(1, 0),
+ .narrowScalarIf(largerThan(1, 0),
[](const LegalityQuery &Query) {
return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
});
@@ -626,7 +855,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
})
.narrowScalarIf(
- greaterThan(0, 1),
+ largerThan(0, 1),
[](const LegalityQuery &Query) {
return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
});
@@ -635,33 +864,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.custom();
- // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
- // handle some operations by just promoting the register during
- // selection. There are also d16 loads on GFX9+ which preserve the high bits.
- auto maxSizeForAddrSpace = [this](unsigned AS) -> unsigned {
- switch (AS) {
- // FIXME: Private element size.
- case AMDGPUAS::PRIVATE_ADDRESS:
- return 32;
- // FIXME: Check subtarget
- case AMDGPUAS::LOCAL_ADDRESS:
- return ST.useDS128() ? 128 : 64;
-
- // Treat constant and global as identical. SMRD loads are sometimes usable
- // for global loads (ideally constant address space should be eliminated)
- // depending on the context. Legality cannot be context dependent, but
- // RegBankSelect can split the load as necessary depending on the pointer
- // register bank/uniformity and if the memory is invariant or not written in
- // a kernel.
- case AMDGPUAS::CONSTANT_ADDRESS:
- case AMDGPUAS::GLOBAL_ADDRESS:
- return 512;
- default:
- return 128;
- }
- };
-
- const auto needToSplitLoad = [=](const LegalityQuery &Query) -> bool {
+ const auto needToSplitMemOp = [=](const LegalityQuery &Query,
+ bool IsLoad) -> bool {
const LLT DstTy = Query.Types[0];
// Split vector extloads.
@@ -676,14 +880,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT PtrTy = Query.Types[1];
unsigned AS = PtrTy.getAddressSpace();
- if (MemSize > maxSizeForAddrSpace(AS))
+ if (MemSize > maxSizeForAddrSpace(ST, AS, IsLoad))
return true;
// Catch weird sized loads that don't evenly divide into the access sizes
// TODO: May be able to widen depending on alignment etc.
- unsigned NumRegs = MemSize / 32;
- if (NumRegs == 3 && !ST.hasDwordx3LoadStores())
- return true;
+ unsigned NumRegs = (MemSize + 31) / 32;
+ if (NumRegs == 3) {
+ if (!ST.hasDwordx3LoadStores())
+ return true;
+ } else {
+ // If the alignment allows, these should have been widened.
+ if (!isPowerOf2_32(NumRegs))
+ return true;
+ }
if (Align < MemSize) {
const SITargetLowering *TLI = ST.getTargetLowering();
@@ -693,6 +903,24 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return false;
};
+ const auto shouldWidenLoadResult = [=](const LegalityQuery &Query,
+ unsigned Opc) -> bool {
+ unsigned Size = Query.Types[0].getSizeInBits();
+ if (isPowerOf2_32(Size))
+ return false;
+
+ if (Size == 96 && ST.hasDwordx3LoadStores())
+ return false;
+
+ unsigned AddrSpace = Query.Types[1].getAddressSpace();
+ if (Size >= maxSizeForAddrSpace(ST, AddrSpace, Opc))
+ return false;
+
+ unsigned Align = Query.MMODescrs[0].AlignInBits;
+ unsigned RoundedSize = NextPowerOf2(Size);
+ return (Align >= RoundedSize);
+ };
+
unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
@@ -705,17 +933,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const bool IsStore = Op == G_STORE;
auto &Actions = getActionDefinitionsBuilder(Op);
- // Whitelist the common cases.
- // TODO: Pointer loads
- // TODO: Wide constant loads
- // TODO: Only CI+ has 3x loads
- // TODO: Loads to s16 on gfx9
+ // Explicitly list some common cases.
+ // TODO: Does this help compile time at all?
Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32},
{V2S32, GlobalPtr, 64, GlobalAlign32},
- {V3S32, GlobalPtr, 96, GlobalAlign32},
- {S96, GlobalPtr, 96, GlobalAlign32},
{V4S32, GlobalPtr, 128, GlobalAlign32},
- {S128, GlobalPtr, 128, GlobalAlign32},
{S64, GlobalPtr, 64, GlobalAlign32},
{V2S64, GlobalPtr, 128, GlobalAlign32},
{V2S16, GlobalPtr, 32, GlobalAlign32},
@@ -734,23 +956,60 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
{S32, PrivatePtr, 16, 16},
{V2S16, PrivatePtr, 32, 32},
- {S32, FlatPtr, 32, GlobalAlign32},
- {S32, FlatPtr, 16, GlobalAlign16},
- {S32, FlatPtr, 8, GlobalAlign8},
- {V2S16, FlatPtr, 32, GlobalAlign32},
-
{S32, ConstantPtr, 32, GlobalAlign32},
{V2S32, ConstantPtr, 64, GlobalAlign32},
- {V3S32, ConstantPtr, 96, GlobalAlign32},
{V4S32, ConstantPtr, 128, GlobalAlign32},
{S64, ConstantPtr, 64, GlobalAlign32},
- {S128, ConstantPtr, 128, GlobalAlign32},
{V2S32, ConstantPtr, 32, GlobalAlign32}});
+ Actions.legalIf(
+ [=](const LegalityQuery &Query) -> bool {
+ return isLoadStoreLegal(ST, Query, Op);
+ });
+
+ // Constant 32-bit is handled by addrspacecasting the 32-bit pointer to
+ // 64-bits.
+ //
+ // TODO: Should generalize bitcast action into coerce, which will also cover
+ // inserting addrspacecasts.
+ Actions.customIf(typeIs(1, Constant32Ptr));
+
+ // Turn any illegal element vectors into something easier to deal
+ // with. These will ultimately produce 32-bit scalar shifts to extract the
+ // parts anyway.
+ //
+ // For odd 16-bit element vectors, prefer to split those into pieces with
+ // 16-bit vector parts.
+ Actions.bitcastIf(
+ [=](const LegalityQuery &Query) -> bool {
+ const LLT Ty = Query.Types[0];
+ const unsigned Size = Ty.getSizeInBits();
+
+ if (Size != Query.MMODescrs[0].SizeInBits)
+ return Size <= 32 && Ty.isVector();
+
+ if (loadStoreBitcastWorkaround(Ty) && isRegisterType(Ty))
+ return true;
+ return Ty.isVector() && (Size <= 32 || isRegisterSize(Size)) &&
+ !isRegisterVectorElementType(Ty.getElementType());
+ }, bitcastToRegisterType(0));
+
Actions
.customIf(typeIs(1, Constant32Ptr))
+ // Widen suitably aligned loads by loading extra elements.
+ .moreElementsIf([=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[0];
+ return Op == G_LOAD && Ty.isVector() &&
+ shouldWidenLoadResult(Query, Op);
+ }, moreElementsToNextPow2(0))
+ .widenScalarIf([=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[0];
+ return Op == G_LOAD && !Ty.isVector() &&
+ shouldWidenLoadResult(Query, Op);
+ }, widenScalarOrEltToNextPow2(0))
.narrowScalarIf(
[=](const LegalityQuery &Query) -> bool {
- return !Query.Types[0].isVector() && needToSplitLoad(Query);
+ return !Query.Types[0].isVector() &&
+ needToSplitMemOp(Query, Op == G_LOAD);
},
[=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
const LLT DstTy = Query.Types[0];
@@ -763,13 +1022,23 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (DstSize > MemSize)
return std::make_pair(0, LLT::scalar(MemSize));
+ if (!isPowerOf2_32(DstSize)) {
+ // We're probably decomposing an odd sized store. Try to split
+ // to the widest type. TODO: Account for alignment. As-is it
+ // should be OK, since the new parts will be further legalized.
+ unsigned FloorSize = PowerOf2Floor(DstSize);
+ return std::make_pair(0, LLT::scalar(FloorSize));
+ }
+
if (DstSize > 32 && (DstSize % 32 != 0)) {
// FIXME: Need a way to specify non-extload of larger size if
// suitably aligned.
return std::make_pair(0, LLT::scalar(32 * (DstSize / 32)));
}
- unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
+ unsigned MaxSize = maxSizeForAddrSpace(ST,
+ PtrTy.getAddressSpace(),
+ Op == G_LOAD);
if (MemSize > MaxSize)
return std::make_pair(0, LLT::scalar(MaxSize));
@@ -778,18 +1047,32 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
})
.fewerElementsIf(
[=](const LegalityQuery &Query) -> bool {
- return Query.Types[0].isVector() && needToSplitLoad(Query);
+ return Query.Types[0].isVector() &&
+ needToSplitMemOp(Query, Op == G_LOAD);
},
[=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
const LLT DstTy = Query.Types[0];
const LLT PtrTy = Query.Types[1];
LLT EltTy = DstTy.getElementType();
- unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
+ unsigned MaxSize = maxSizeForAddrSpace(ST,
+ PtrTy.getAddressSpace(),
+ Op == G_LOAD);
+
+ // FIXME: Handle widened to power of 2 results better. This ends
+ // up scalarizing.
+ // FIXME: 3 element stores scalarized on SI
// Split if it's too large for the address space.
if (Query.MMODescrs[0].SizeInBits > MaxSize) {
unsigned NumElts = DstTy.getNumElements();
+ unsigned EltSize = EltTy.getSizeInBits();
+
+ if (MaxSize % EltSize == 0) {
+ return std::make_pair(
+ 0, LLT::scalarOrVector(MaxSize / EltSize, EltTy));
+ }
+
unsigned NumPieces = Query.MMODescrs[0].SizeInBits / MaxSize;
// FIXME: Refine when odd breakdowns handled
@@ -802,9 +1085,24 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
LLT::vector(NumElts / NumPieces, EltTy));
}
+ // FIXME: We could probably handle weird extending loads better.
+ unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+ if (DstTy.getSizeInBits() > MemSize)
+ return std::make_pair(0, EltTy);
+
+ unsigned EltSize = EltTy.getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
+ if (!isPowerOf2_32(DstSize)) {
+ // We're probably decomposing an odd sized store. Try to split
+ // to the widest type. TODO: Account for alignment. As-is it
+ // should be OK, since the new parts will be further legalized.
+ unsigned FloorSize = PowerOf2Floor(DstSize);
+ return std::make_pair(
+ 0, LLT::scalarOrVector(FloorSize / EltSize, EltTy));
+ }
+
// Need to split because of alignment.
unsigned Align = Query.MMODescrs[0].AlignInBits;
- unsigned EltSize = EltTy.getSizeInBits();
if (EltSize > Align &&
(EltSize / Align < DstTy.getNumElements())) {
return std::make_pair(0, LLT::vector(EltSize / Align, EltTy));
@@ -820,39 +1118,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// TODO: Need a bitcast lower option?
Actions
- .legalIf([=](const LegalityQuery &Query) {
- const LLT Ty0 = Query.Types[0];
- unsigned Size = Ty0.getSizeInBits();
- unsigned MemSize = Query.MMODescrs[0].SizeInBits;
- unsigned Align = Query.MMODescrs[0].AlignInBits;
-
- // FIXME: Widening store from alignment not valid.
- if (MemSize < Size)
- MemSize = std::max(MemSize, Align);
-
- // No extending vector loads.
- if (Size > MemSize && Ty0.isVector())
- return false;
-
- switch (MemSize) {
- case 8:
- case 16:
- return Size == 32;
- case 32:
- case 64:
- case 128:
- return true;
- case 96:
- return ST.hasDwordx3LoadStores();
- case 256:
- case 512:
- return true;
- default:
- return false;
- }
- })
.widenScalarToNextPow2(0)
- // TODO: v3s32->v4s32 with alignment
.moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0));
}
@@ -886,8 +1152,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
}
- getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
- .legalFor({{S32, LocalPtr}});
+ if (ST.hasLDSFPAtomics()) {
+ getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
+ .legalFor({{S32, LocalPtr}});
+ }
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output
// demarshalling
@@ -896,10 +1164,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
{S32, FlatPtr}, {S64, FlatPtr}})
.legalFor({{S32, LocalPtr}, {S64, LocalPtr},
{S32, RegionPtr}, {S64, RegionPtr}});
-
- getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
- .lower();
-
// TODO: Pointer types, any 32-bit or 64-bit vector
// Condition should be s32 for scalar, s1 for vector.
@@ -908,9 +1172,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1, S32})
.clampScalar(0, S16, S64)
+ .scalarize(1)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.fewerElementsIf(numElementsNotEven(0), scalarize(0))
- .scalarize(1)
.clampMaxNumElements(0, S32, 2)
.clampMaxNumElements(0, LocalPtr, 2)
.clampMaxNumElements(0, PrivatePtr, 2)
@@ -924,12 +1188,22 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalFor({{S32, S32}, {S64, S32}});
if (ST.has16BitInsts()) {
if (ST.hasVOP3PInsts()) {
- Shifts.legalFor({{S16, S32}, {S16, S16}, {V2S16, V2S16}})
+ Shifts.legalFor({{S16, S16}, {V2S16, V2S16}})
.clampMaxNumElements(0, S16, 2);
} else
- Shifts.legalFor({{S16, S32}, {S16, S16}});
+ Shifts.legalFor({{S16, S16}});
- // TODO: Support 16-bit shift amounts
+ // TODO: Support 16-bit shift amounts for all types
+ Shifts.widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ // Use 16-bit shift amounts for any 16-bit shift. Otherwise we want a
+ // 32-bit amount.
+ const LLT ValTy = Query.Types[0];
+ const LLT AmountTy = Query.Types[1];
+ return ValTy.getSizeInBits() <= 16 &&
+ AmountTy.getSizeInBits() < 16;
+ }, changeTo(1, S16));
+ Shifts.maxScalarIf(typeIs(0, S16), 1, S16);
Shifts.clampScalar(1, S32, S32);
Shifts.clampScalar(0, S16, S64);
Shifts.widenScalarToNextPow2(0, 16);
@@ -956,7 +1230,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return (EltTy.getSizeInBits() == 16 ||
EltTy.getSizeInBits() % 32 == 0) &&
VecTy.getSizeInBits() % 32 == 0 &&
- VecTy.getSizeInBits() <= 1024 &&
+ VecTy.getSizeInBits() <= MaxRegisterSize &&
IdxTy.getSizeInBits() == 32;
})
.clampScalar(EltTypeIdx, S32, S64)
@@ -1008,28 +1282,40 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampNumElements(0, V2S64, V16S64)
.fewerElementsIf(isWideVec16(0), changeTo(0, V2S16));
- if (ST.hasScalarPackInsts())
- BuildVector.legalFor({V2S16, S32});
-
- BuildVector
- .minScalarSameAs(1, 0)
- .legalIf(isRegisterType(0))
- .minScalarOrElt(0, S32);
-
if (ST.hasScalarPackInsts()) {
+ BuildVector
+ // FIXME: Should probably widen s1 vectors straight to s32
+ .minScalarOrElt(0, S16)
+ // Widen source elements and produce a G_BUILD_VECTOR_TRUNC
+ .minScalar(1, S32);
+
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
.legalFor({V2S16, S32})
.lower();
+ BuildVector.minScalarOrElt(0, S32);
} else {
+ BuildVector.customFor({V2S16, S16});
+ BuildVector.minScalarOrElt(0, S32);
+
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
+ .customFor({V2S16, S32})
.lower();
}
+ BuildVector.legalIf(isRegisterType(0));
+
+ // FIXME: Clamp maximum size
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalIf(isRegisterType(0));
- // TODO: Don't fully scalarize v2s16 pieces
- getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower();
+ // TODO: Don't fully scalarize v2s16 pieces? Or combine out thosse
+ // pre-legalize.
+ if (ST.hasVOP3PInsts()) {
+ getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
+ .customFor({V2S16, V2S16})
+ .lower();
+ } else
+ getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower();
// Merge/Unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
@@ -1037,10 +1323,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
- const LLT &Ty = Query.Types[TypeIdx];
+ const LLT Ty = Query.Types[TypeIdx];
if (Ty.isVector()) {
const LLT &EltTy = Ty.getElementType();
- if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
+ if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 512)
return true;
if (!isPowerOf2_32(EltTy.getSizeInBits()))
return true;
@@ -1049,25 +1335,32 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
};
auto &Builder = getActionDefinitionsBuilder(Op)
+ .lowerFor({{S16, V2S16}})
+ .lowerIf([=](const LegalityQuery &Query) {
+ const LLT BigTy = Query.Types[BigTyIdx];
+ return BigTy.getSizeInBits() == 32;
+ })
+ // Try to widen to s16 first for small types.
+ // TODO: Only do this on targets with legal s16 shifts
+ .minScalarOrEltIf(scalarNarrowerThan(LitTyIdx, 16), LitTyIdx, S16)
.widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
- // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
- // worth considering the multiples of 64 since 2*192 and 2*384 are not
- // valid.
- .clampScalar(LitTyIdx, S16, S256)
- .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
.moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
.fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32),
elementTypeIs(1, S16)),
changeTo(1, V2S16))
+ // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
+ // worth considering the multiples of 64 since 2*192 and 2*384 are not
+ // valid.
+ .clampScalar(LitTyIdx, S32, S512)
+ .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
// Break up vectors with weird elements into scalars
.fewerElementsIf(
- [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
+ [=](const LegalityQuery &Query) { return notValidElt(Query, LitTyIdx); },
scalarize(0))
.fewerElementsIf(
- [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
+ [=](const LegalityQuery &Query) { return notValidElt(Query, BigTyIdx); },
scalarize(1))
- .clampScalar(BigTyIdx, S32, S1024)
- .lowerFor({{S16, V2S16}});
+ .clampScalar(BigTyIdx, S32, MaxScalar);
if (Op == G_MERGE_VALUES) {
Builder.widenScalarIf(
@@ -1108,22 +1401,68 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return BigTy.getSizeInBits() % 16 == 0 &&
LitTy.getSizeInBits() % 16 == 0 &&
- BigTy.getSizeInBits() <= 1024;
+ BigTy.getSizeInBits() <= MaxRegisterSize;
})
// Any vectors left are the wrong size. Scalarize them.
.scalarize(0)
.scalarize(1);
}
- getActionDefinitionsBuilder(G_SEXT_INREG).lower();
+ // S64 is only legal on SALU, and needs to be broken into 32-bit elements in
+ // RegBankSelect.
+ auto &SextInReg = getActionDefinitionsBuilder(G_SEXT_INREG)
+ .legalFor({{S32}, {S64}});
+
+ if (ST.hasVOP3PInsts()) {
+ SextInReg.lowerFor({{V2S16}})
+ // Prefer to reduce vector widths for 16-bit vectors before lowering, to
+ // get more vector shift opportunities, since we'll get those when
+ // expanded.
+ .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16));
+ } else if (ST.has16BitInsts()) {
+ SextInReg.lowerFor({{S32}, {S64}, {S16}});
+ } else {
+ // Prefer to promote to s32 before lowering if we don't have 16-bit
+ // shifts. This avoid a lot of intermediate truncate and extend operations.
+ SextInReg.lowerFor({{S32}, {S64}});
+ }
+
+ // FIXME: Placeholder rule. Really depends on whether the clamp modifier is
+ // available, and is selectively legal for s16, s32, v2s16.
+ getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT, G_UADDSAT, G_USUBSAT})
+ .scalarize(0)
+ .clampScalar(0, S16, S32);
- getActionDefinitionsBuilder({G_READ_REGISTER, G_WRITE_REGISTER}).lower();
+ SextInReg
+ .scalarize(0)
+ .clampScalar(0, S32, S64)
+ .lower();
+
+ getActionDefinitionsBuilder(G_FSHR)
+ .legalFor({{S32, S32}})
+ .scalarize(0)
+ .lower();
getActionDefinitionsBuilder(G_READCYCLECOUNTER)
.legalFor({S64});
+ getActionDefinitionsBuilder({
+ // TODO: Verify V_BFI_B32 is generated from expanded bit ops
+ G_FCOPYSIGN,
+
+ G_ATOMIC_CMPXCHG_WITH_SUCCESS,
+ G_READ_REGISTER,
+ G_WRITE_REGISTER,
+
+ G_SADDO, G_SSUBO,
+
+ // TODO: Implement
+ G_FMINIMUM, G_FMAXIMUM,
+ G_FSHL
+ }).lower();
+
getActionDefinitionsBuilder({G_VASTART, G_VAARG, G_BRJT, G_JUMP_TABLE,
- G_DYN_STACKALLOC, G_INDEXED_LOAD, G_INDEXED_SEXTLOAD,
+ G_INDEXED_LOAD, G_INDEXED_SEXTLOAD,
G_INDEXED_ZEXTLOAD, G_INDEXED_STORE})
.unsupported();
@@ -1131,10 +1470,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
verify(*ST.getInstrInfo());
}
-bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
- GISelChangeObserver &Observer) const {
+bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *B.getMRI();
+ GISelChangeObserver &Observer = Helper.Observer;
+
switch (MI.getOpcode()) {
case TargetOpcode::G_ADDRSPACE_CAST:
return legalizeAddrSpaceCast(MI, MRI, B);
@@ -1148,15 +1489,21 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
return legalizeITOFP(MI, MRI, B, true);
case TargetOpcode::G_UITOFP:
return legalizeITOFP(MI, MRI, B, false);
+ case TargetOpcode::G_FPTOSI:
+ return legalizeFPTOI(MI, MRI, B, true);
+ case TargetOpcode::G_FPTOUI:
+ return legalizeFPTOI(MI, MRI, B, false);
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM_IEEE:
- return legalizeMinNumMaxNum(MI, MRI, B);
+ return legalizeMinNumMaxNum(Helper, MI);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return legalizeExtractVectorElt(MI, MRI, B);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return legalizeInsertVectorElt(MI, MRI, B);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return legalizeShuffleVector(MI, MRI, B);
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FCOS:
return legalizeSinCos(MI, MRI, B);
@@ -1168,8 +1515,26 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
return legalizeFMad(MI, MRI, B);
case TargetOpcode::G_FDIV:
return legalizeFDIV(MI, MRI, B);
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_UREM:
+ return legalizeUDIV_UREM(MI, MRI, B);
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_SREM:
+ return legalizeSDIV_SREM(MI, MRI, B);
case TargetOpcode::G_ATOMIC_CMPXCHG:
return legalizeAtomicCmpXChg(MI, MRI, B);
+ case TargetOpcode::G_FLOG:
+ return legalizeFlog(MI, B, numbers::ln2f);
+ case TargetOpcode::G_FLOG10:
+ return legalizeFlog(MI, B, numbers::ln2f / numbers::ln10f);
+ case TargetOpcode::G_FEXP:
+ return legalizeFExp(MI, B);
+ case TargetOpcode::G_FPOW:
+ return legalizeFPow(MI, B);
+ case TargetOpcode::G_FFLOOR:
+ return legalizeFFloor(MI, MRI, B);
+ case TargetOpcode::G_BUILD_VECTOR:
+ return legalizeBuildVector(MI, MRI, B);
default:
return false;
}
@@ -1201,7 +1566,6 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
- Register ApertureReg = MRI.createGenericVirtualRegister(S32);
Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
B.buildInstr(AMDGPU::S_GETREG_B32)
@@ -1210,12 +1574,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
MRI.setType(GetReg, S32);
auto ShiftAmt = B.buildConstant(S32, WidthM1 + 1);
- B.buildInstr(TargetOpcode::G_SHL)
- .addDef(ApertureReg)
- .addUse(GetReg)
- .addUse(ShiftAmt.getReg(0));
-
- return ApertureReg;
+ return B.buildShl(S32, GetReg, ShiftAmt).getReg(0);
}
Register QueuePtr = MRI.createGenericVirtualRegister(
@@ -1232,19 +1591,15 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
// TODO: can we be smarter about machine pointer info?
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo,
- MachineMemOperand::MOLoad |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant,
- 4,
- MinAlign(64, StructOffset));
-
- Register LoadResult = MRI.createGenericVirtualRegister(S32);
+ PtrInfo,
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ 4, commonAlignment(Align(64), StructOffset));
+
Register LoadAddr;
B.materializePtrAdd(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
- B.buildLoad(LoadResult, LoadAddr, *MMO);
- return LoadResult;
+ return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
}
bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
@@ -1252,8 +1607,6 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
MachineIRBuilder &B) const {
MachineFunction &MF = B.getMF();
- B.setInstr(MI);
-
const LLT S32 = LLT::scalar(32);
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
@@ -1292,7 +1645,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
// extra ptrtoint would be kind of pointless.
auto HighAddr = B.buildConstant(
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal);
- B.buildMerge(Dst, {Src, HighAddr.getReg(0)});
+ B.buildMerge(Dst, {Src, HighAddr});
MI.eraseFromParent();
return true;
}
@@ -1305,13 +1658,11 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
auto SegmentNull = B.buildConstant(DstTy, NullVal);
auto FlatNull = B.buildConstant(SrcTy, 0);
- Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
-
// Extract low 32-bits of the pointer.
- B.buildExtract(PtrLo32, Src, 0);
+ auto PtrLo32 = B.buildExtract(DstTy, Src, 0);
- Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
- B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
+ auto CmpRes =
+ B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, FlatNull.getReg(0));
B.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
MI.eraseFromParent();
@@ -1333,21 +1684,16 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
if (!ApertureReg.isValid())
return false;
- Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
- B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
-
- Register BuildPtr = MRI.createGenericVirtualRegister(DstTy);
+ auto CmpRes =
+ B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, SegmentNull.getReg(0));
// Coerce the type of the low half of the result so we can use merge_values.
- Register SrcAsInt = MRI.createGenericVirtualRegister(S32);
- B.buildInstr(TargetOpcode::G_PTRTOINT)
- .addDef(SrcAsInt)
- .addUse(Src);
+ Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0);
// TODO: Should we allow mismatched types but matching sizes in merges to
// avoid the ptrtoint?
- B.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
- B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
+ auto BuildPtr = B.buildMerge(DstTy, {SrcAsInt, ApertureReg});
+ B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull);
MI.eraseFromParent();
return true;
@@ -1356,8 +1702,6 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
bool AMDGPULegalizerInfo::legalizeFrint(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
- B.setInstr(MI);
-
Register Src = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(Src);
assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
@@ -1383,7 +1727,6 @@ bool AMDGPULegalizerInfo::legalizeFrint(
bool AMDGPULegalizerInfo::legalizeFceil(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
- B.setInstr(MI);
const LLT S1 = LLT::scalar(1);
const LLT S64 = LLT::scalar(64);
@@ -1395,7 +1738,7 @@ bool AMDGPULegalizerInfo::legalizeFceil(
// if (src > 0.0 && src != result)
// result += 1.0
- auto Trunc = B.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {S64}, {Src});
+ auto Trunc = B.buildIntrinsicTrunc(S64, Src);
const auto Zero = B.buildFConstant(S64, 0.0);
const auto One = B.buildFConstant(S64, 1.0);
@@ -1428,8 +1771,6 @@ static MachineInstrBuilder extractF64Exponent(unsigned Hi,
bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
- B.setInstr(MI);
-
const LLT S1 = LLT::scalar(1);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
@@ -1456,7 +1797,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
const auto Zero32 = B.buildConstant(S32, 0);
// Extend back to 64-bits.
- auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)});
+ auto SignBit64 = B.buildMerge(S64, {Zero32, SignBit});
auto Shr = B.buildAShr(S64, FractMask, Exp);
auto Not = B.buildNot(S64, Shr);
@@ -1474,7 +1815,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
bool AMDGPULegalizerInfo::legalizeITOFP(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool Signed) const {
- B.setInstr(MI);
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
@@ -1503,10 +1843,44 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
return true;
}
-bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(
+// TODO: Copied from DAG implementation. Verify logic and document how this
+// actually works.
+bool AMDGPULegalizerInfo::legalizeFPTOI(
MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const {
- MachineFunction &MF = B.getMF();
+ MachineIRBuilder &B, bool Signed) const {
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+
+ assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
+
+ unsigned Flags = MI.getFlags();
+
+ auto Trunc = B.buildIntrinsicTrunc(S64, Src, Flags);
+ auto K0 = B.buildFConstant(S64, BitsToDouble(UINT64_C(0x3df0000000000000)));
+ auto K1 = B.buildFConstant(S64, BitsToDouble(UINT64_C(0xc1f0000000000000)));
+
+ auto Mul = B.buildFMul(S64, Trunc, K0, Flags);
+ auto FloorMul = B.buildFFloor(S64, Mul, Flags);
+ auto Fma = B.buildFMA(S64, FloorMul, K1, Trunc, Flags);
+
+ auto Hi = Signed ?
+ B.buildFPTOSI(S32, FloorMul) :
+ B.buildFPTOUI(S32, FloorMul);
+ auto Lo = B.buildFPTOUI(S32, Fma);
+
+ B.buildMerge(Dst, { Lo, Hi });
+ MI.eraseFromParent();
+
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ MachineFunction &MF = Helper.MIRBuilder.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
@@ -1520,10 +1894,6 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(
if (IsIEEEOp)
return true;
- MachineIRBuilder HelperBuilder(MI);
- GISelObserverWrapper DummyObserver;
- LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
- HelperBuilder.setInstr(MI);
return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
}
@@ -1533,8 +1903,12 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
// TODO: Should move some of this into LegalizerHelper.
// TODO: Promote dynamic indexing of s16 to s32
- // TODO: Dynamic s64 indexing is only legal for SGPR.
- Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
+
+ // FIXME: Artifact combiner probably should have replaced the truncated
+ // constant before this, so we shouldn't need
+ // getConstantVRegValWithLookThrough.
+ Optional<ValueAndVReg> IdxVal = getConstantVRegValWithLookThrough(
+ MI.getOperand(2).getReg(), MRI);
if (!IdxVal) // Dynamic case will be selected to register indexing.
return true;
@@ -1545,10 +1919,8 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
LLT EltTy = VecTy.getElementType();
assert(EltTy == MRI.getType(Dst));
- B.setInstr(MI);
-
- if (IdxVal.getValue() < VecTy.getNumElements())
- B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
+ if (IdxVal->Value < VecTy.getNumElements())
+ B.buildExtract(Dst, Vec, IdxVal->Value * EltTy.getSizeInBits());
else
B.buildUndef(Dst);
@@ -1562,8 +1934,12 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
// TODO: Should move some of this into LegalizerHelper.
// TODO: Promote dynamic indexing of s16 to s32
- // TODO: Dynamic s64 indexing is only legal for SGPR.
- Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI);
+
+ // FIXME: Artifact combiner probably should have replaced the truncated
+ // constant before this, so we shouldn't need
+ // getConstantVRegValWithLookThrough.
+ Optional<ValueAndVReg> IdxVal = getConstantVRegValWithLookThrough(
+ MI.getOperand(3).getReg(), MRI);
if (!IdxVal) // Dynamic case will be selected to register indexing.
return true;
@@ -1575,10 +1951,8 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
LLT EltTy = VecTy.getElementType();
assert(EltTy == MRI.getType(Ins));
- B.setInstr(MI);
-
- if (IdxVal.getValue() < VecTy.getNumElements())
- B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
+ if (IdxVal->Value < VecTy.getNumElements())
+ B.buildInsert(Dst, Vec, Ins, IdxVal->Value * EltTy.getSizeInBits());
else
B.buildUndef(Dst);
@@ -1586,10 +1960,29 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
return true;
}
+bool AMDGPULegalizerInfo::legalizeShuffleVector(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ const LLT V2S16 = LLT::vector(2, 16);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src0);
+
+ if (SrcTy == V2S16 && DstTy == V2S16 &&
+ AMDGPU::isLegalVOP3PShuffleMask(MI.getOperand(3).getShuffleMask()))
+ return true;
+
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(B.getMF(), DummyObserver, HelperBuilder);
+ return Helper.lowerShuffleVector(MI) == LegalizerHelper::Legalized;
+}
+
bool AMDGPULegalizerInfo::legalizeSinCos(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
- B.setInstr(MI);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
@@ -1597,7 +1990,7 @@ bool AMDGPULegalizerInfo::legalizeSinCos(
unsigned Flags = MI.getFlags();
Register TrigVal;
- auto OneOver2Pi = B.buildFConstant(Ty, 0.5 / M_PI);
+ auto OneOver2Pi = B.buildFConstant(Ty, 0.5 * numbers::inv_pi);
if (ST.hasTrigReducedRange()) {
auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags);
TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false)
@@ -1615,10 +2008,12 @@ bool AMDGPULegalizerInfo::legalizeSinCos(
return true;
}
-bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(
- Register DstReg, LLT PtrTy,
- MachineIRBuilder &B, const GlobalValue *GV,
- unsigned Offset, unsigned GAFlags) const {
+bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
+ MachineIRBuilder &B,
+ const GlobalValue *GV,
+ int64_t Offset,
+ unsigned GAFlags) const {
+ assert(isInt<32>(Offset + 4) && "32-bit offset is expected!");
// In order to support pc-relative addressing, SI_PC_ADD_REL_OFFSET is lowered
// to the following code sequence:
//
@@ -1681,19 +2076,37 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
const GlobalValue *GV = MI.getOperand(1).getGlobal();
MachineFunction &MF = B.getMF();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- B.setInstr(MI);
if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
if (!MFI->isEntryFunction()) {
const Function &Fn = MF.getFunction();
DiagnosticInfoUnsupported BadLDSDecl(
- Fn, "local memory global used by non-kernel function", MI.getDebugLoc());
+ Fn, "local memory global used by non-kernel function", MI.getDebugLoc(),
+ DS_Warning);
Fn.getContext().diagnose(BadLDSDecl);
+
+ // We currently don't have a way to correctly allocate LDS objects that
+ // aren't directly associated with a kernel. We do force inlining of
+ // functions that use local objects. However, if these dead functions are
+ // not eliminated, we don't want a compile time error. Just emit a warning
+ // and a trap, since there should be no callable path here.
+ B.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
+ B.buildUndef(DstReg);
+ MI.eraseFromParent();
+ return true;
}
// TODO: We could emit code to handle the initialization somewhere.
if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) {
- B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(), *GV));
+ const SITargetLowering *TLI = ST.getTargetLowering();
+ if (!TLI->shouldUseLDSConstAddress(GV)) {
+ MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO);
+ return true; // Leave in place;
+ }
+
+ B.buildConstant(
+ DstReg,
+ MFI->allocateLDSGlobal(B.getDataLayout(), *cast<GlobalVariable>(GV)));
MI.eraseFromParent();
return true;
}
@@ -1723,10 +2136,10 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
Register GOTAddr = MRI.createGenericVirtualRegister(PtrTy);
MachineMemOperand *GOTMMO = MF.getMachineMemOperand(
- MachinePointerInfo::getGOT(MF),
- MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant,
- 8 /*Size*/, 8 /*Align*/);
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ 8 /*Size*/, Align(8));
buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32);
@@ -1744,7 +2157,6 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
bool AMDGPULegalizerInfo::legalizeLoad(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, GISelChangeObserver &Observer) const {
- B.setInstr(MI);
LLT ConstPtr = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
auto Cast = B.buildAddrSpaceCast(ConstPtr, MI.getOperand(1).getReg());
Observer.changingInstr(MI);
@@ -1763,16 +2175,15 @@ bool AMDGPULegalizerInfo::legalizeFMad(
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// TODO: Always legal with future ftz flag.
- if (Ty == LLT::scalar(32) && !MFI->getMode().FP32Denormals)
+ // FIXME: Do we need just output?
+ if (Ty == LLT::scalar(32) && !MFI->getMode().allFP32Denormals())
return true;
- if (Ty == LLT::scalar(16) && !MFI->getMode().FP64FP16Denormals)
+ if (Ty == LLT::scalar(16) && !MFI->getMode().allFP64FP16Denormals())
return true;
-
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
- HelperBuilder.setMBB(*MI.getParent());
return Helper.lowerFMad(MI) == LegalizerHelper::Legalized;
}
@@ -1790,7 +2201,6 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
LLT ValTy = MRI.getType(CmpVal);
LLT VecTy = LLT::vector(2, ValTy);
- B.setInstr(MI);
Register PackedVal = B.buildBuildVector(VecTy, { NewVal, CmpVal }).getReg(0);
B.buildInstr(AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG)
@@ -1803,39 +2213,248 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
return true;
}
+bool AMDGPULegalizerInfo::legalizeFlog(
+ MachineInstr &MI, MachineIRBuilder &B, double Log2BaseInverted) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT Ty = B.getMRI()->getType(Dst);
+ unsigned Flags = MI.getFlags();
+
+ auto Log2Operand = B.buildFLog2(Ty, Src, Flags);
+ auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted);
+
+ B.buildFMul(Dst, Log2Operand, Log2BaseInvertedOperand, Flags);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ unsigned Flags = MI.getFlags();
+ LLT Ty = B.getMRI()->getType(Dst);
+
+ auto K = B.buildFConstant(Ty, numbers::log2e);
+ auto Mul = B.buildFMul(Ty, Src, K, Flags);
+ B.buildFExp2(Dst, Mul, Flags);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+ unsigned Flags = MI.getFlags();
+ LLT Ty = B.getMRI()->getType(Dst);
+ const LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+
+ if (Ty == S32) {
+ auto Log = B.buildFLog2(S32, Src0, Flags);
+ auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false)
+ .addUse(Log.getReg(0))
+ .addUse(Src1)
+ .setMIFlags(Flags);
+ B.buildFExp2(Dst, Mul, Flags);
+ } else if (Ty == S16) {
+ // There's no f16 fmul_legacy, so we need to convert for it.
+ auto Log = B.buildFLog2(S16, Src0, Flags);
+ auto Ext0 = B.buildFPExt(S32, Log, Flags);
+ auto Ext1 = B.buildFPExt(S32, Src1, Flags);
+ auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false)
+ .addUse(Ext0.getReg(0))
+ .addUse(Ext1.getReg(0))
+ .setMIFlags(Flags);
+
+ B.buildFExp2(Dst, B.buildFPTrunc(S16, Mul), Flags);
+ } else
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+}
+
+// Find a source register, ignoring any possible source modifiers.
+static Register stripAnySourceMods(Register OrigSrc, MachineRegisterInfo &MRI) {
+ Register ModSrc = OrigSrc;
+ if (MachineInstr *SrcFNeg = getOpcodeDef(AMDGPU::G_FNEG, ModSrc, MRI)) {
+ ModSrc = SrcFNeg->getOperand(1).getReg();
+ if (MachineInstr *SrcFAbs = getOpcodeDef(AMDGPU::G_FABS, ModSrc, MRI))
+ ModSrc = SrcFAbs->getOperand(1).getReg();
+ } else if (MachineInstr *SrcFAbs = getOpcodeDef(AMDGPU::G_FABS, ModSrc, MRI))
+ ModSrc = SrcFAbs->getOperand(1).getReg();
+ return ModSrc;
+}
+
+bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+
+ const LLT S1 = LLT::scalar(1);
+ const LLT S64 = LLT::scalar(64);
+ Register Dst = MI.getOperand(0).getReg();
+ Register OrigSrc = MI.getOperand(1).getReg();
+ unsigned Flags = MI.getFlags();
+ assert(ST.hasFractBug() && MRI.getType(Dst) == S64 &&
+ "this should not have been custom lowered");
+
+ // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x))
+ // is used instead. However, SI doesn't have V_FLOOR_F64, so the most
+ // efficient way to implement it is using V_FRACT_F64. The workaround for the
+ // V_FRACT bug is:
+ // fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
+ //
+ // Convert floor(x) to (x - fract(x))
+
+ auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64}, false)
+ .addUse(OrigSrc)
+ .setMIFlags(Flags);
+
+ // Give source modifier matching some assistance before obscuring a foldable
+ // pattern.
+
+ // TODO: We can avoid the neg on the fract? The input sign to fract
+ // shouldn't matter?
+ Register ModSrc = stripAnySourceMods(OrigSrc, MRI);
+
+ auto Const = B.buildFConstant(S64, BitsToDouble(0x3fefffffffffffff));
+
+ Register Min = MRI.createGenericVirtualRegister(S64);
+
+ // We don't need to concern ourselves with the snan handling difference, so
+ // use the one which will directly select.
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ if (MFI->getMode().IEEE)
+ B.buildFMinNumIEEE(Min, Fract, Const, Flags);
+ else
+ B.buildFMinNum(Min, Fract, Const, Flags);
+
+ Register CorrectedFract = Min;
+ if (!MI.getFlag(MachineInstr::FmNoNans)) {
+ auto IsNan = B.buildFCmp(CmpInst::FCMP_ORD, S1, ModSrc, ModSrc, Flags);
+ CorrectedFract = B.buildSelect(S64, IsNan, ModSrc, Min, Flags).getReg(0);
+ }
+
+ auto NegFract = B.buildFNeg(S64, CorrectedFract, Flags);
+ B.buildFAdd(Dst, OrigSrc, NegFract, Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+// Turn an illegal packed v2s16 build vector into bit operations.
+// TODO: This should probably be a bitcast action in LegalizerHelper.
+bool AMDGPULegalizerInfo::legalizeBuildVector(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
+ Register Dst = MI.getOperand(0).getReg();
+ const LLT S32 = LLT::scalar(32);
+ assert(MRI.getType(Dst) == LLT::vector(2, 16));
+
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+ assert(MRI.getType(Src0) == LLT::scalar(16));
+
+ auto Merge = B.buildMerge(S32, {Src0, Src1});
+ B.buildBitcast(Dst, Merge);
+
+ MI.eraseFromParent();
+ return true;
+}
+
// Return the use branch instruction, otherwise null if the usage is invalid.
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
- MachineInstr *&Br) {
+ MachineInstr *&Br,
+ MachineBasicBlock *&UncondBrTarget) {
Register CondDef = MI.getOperand(0).getReg();
if (!MRI.hasOneNonDBGUse(CondDef))
return nullptr;
+ MachineBasicBlock *Parent = MI.getParent();
MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
- if (UseMI.getParent() != MI.getParent() ||
+ if (UseMI.getParent() != Parent ||
UseMI.getOpcode() != AMDGPU::G_BRCOND)
return nullptr;
- // Make sure the cond br is followed by a G_BR
+ // Make sure the cond br is followed by a G_BR, or is the last instruction.
MachineBasicBlock::iterator Next = std::next(UseMI.getIterator());
- if (Next != MI.getParent()->end()) {
+ if (Next == Parent->end()) {
+ MachineFunction::iterator NextMBB = std::next(Parent->getIterator());
+ if (NextMBB == Parent->getParent()->end()) // Illegal intrinsic use.
+ return nullptr;
+ UncondBrTarget = &*NextMBB;
+ } else {
if (Next->getOpcode() != AMDGPU::G_BR)
return nullptr;
Br = &*Next;
+ UncondBrTarget = Br->getOperand(0).getMBB();
}
return &UseMI;
}
-Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI,
- Register Reg, LLT Ty) const {
- Register LiveIn = MRI.getLiveInVirtReg(Reg);
- if (LiveIn)
+Register AMDGPULegalizerInfo::insertLiveInCopy(MachineIRBuilder &B,
+ MachineRegisterInfo &MRI,
+ Register LiveIn,
+ Register PhyReg) const {
+ assert(PhyReg.isPhysical() && "Physical register expected");
+
+ // Insert the live-in copy, if required, by defining destination virtual
+ // register.
+ // FIXME: It seems EmitLiveInCopies isn't called anywhere?
+ if (!MRI.getVRegDef(LiveIn)) {
+ // FIXME: Should have scoped insert pt
+ MachineBasicBlock &OrigInsBB = B.getMBB();
+ auto OrigInsPt = B.getInsertPt();
+
+ MachineBasicBlock &EntryMBB = B.getMF().front();
+ EntryMBB.addLiveIn(PhyReg);
+ B.setInsertPt(EntryMBB, EntryMBB.begin());
+ B.buildCopy(LiveIn, PhyReg);
+
+ B.setInsertPt(OrigInsBB, OrigInsPt);
+ }
+
+ return LiveIn;
+}
+
+Register AMDGPULegalizerInfo::getLiveInRegister(MachineIRBuilder &B,
+ MachineRegisterInfo &MRI,
+ Register PhyReg, LLT Ty,
+ bool InsertLiveInCopy) const {
+ assert(PhyReg.isPhysical() && "Physical register expected");
+
+ // Get or create virtual live-in regester
+ Register LiveIn = MRI.getLiveInVirtReg(PhyReg);
+ if (!LiveIn) {
+ LiveIn = MRI.createGenericVirtualRegister(Ty);
+ MRI.addLiveIn(PhyReg, LiveIn);
+ }
+
+ // When the actual true copy required is from virtual register to physical
+ // register (to be inserted later), live-in copy insertion from physical
+ // to register virtual register is not required
+ if (!InsertLiveInCopy)
return LiveIn;
- Register NewReg = MRI.createGenericVirtualRegister(Ty);
- MRI.addLiveIn(Reg, NewReg);
- return NewReg;
+ return insertLiveInCopy(B, MRI, LiveIn, PhyReg);
+}
+
+const ArgDescriptor *AMDGPULegalizerInfo::getArgDescriptor(
+ MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ const ArgDescriptor *Arg;
+ const TargetRegisterClass *RC;
+ LLT ArgTy;
+ std::tie(Arg, RC, ArgTy) = MFI->getPreloadedValue(ArgType);
+ if (!Arg) {
+ LLVM_DEBUG(dbgs() << "Required arg register missing\n");
+ return nullptr;
+ }
+ return Arg;
}
bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
@@ -1843,12 +2462,14 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
if (!Arg->isRegister() || !Arg->getRegister().isValid())
return false; // TODO: Handle these
- assert(Arg->getRegister().isPhysical());
+ Register SrcReg = Arg->getRegister();
+ assert(SrcReg.isPhysical() && "Physical register expected");
+ assert(DstReg.isVirtual() && "Virtual register expected");
MachineRegisterInfo &MRI = *B.getMRI();
LLT Ty = MRI.getType(DstReg);
- Register LiveIn = getLiveInRegister(MRI, Arg->getRegister(), Ty);
+ Register LiveIn = getLiveInRegister(B, MRI, SrcReg, Ty);
if (Arg->isMasked()) {
// TODO: Should we try to emit this once in the entry block?
@@ -1864,56 +2485,31 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
}
B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(S32, Mask >> Shift));
- } else
+ } else {
B.buildCopy(DstReg, LiveIn);
-
- // Insert the argument copy if it doens't already exist.
- // FIXME: It seems EmitLiveInCopies isn't called anywhere?
- if (!MRI.getVRegDef(LiveIn)) {
- // FIXME: Should have scoped insert pt
- MachineBasicBlock &OrigInsBB = B.getMBB();
- auto OrigInsPt = B.getInsertPt();
-
- MachineBasicBlock &EntryMBB = B.getMF().front();
- EntryMBB.addLiveIn(Arg->getRegister());
- B.setInsertPt(EntryMBB, EntryMBB.begin());
- B.buildCopy(LiveIn, Arg->getRegister());
-
- B.setInsertPt(OrigInsBB, OrigInsPt);
}
return true;
}
bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
- MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
- AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
- B.setInstr(MI);
-
- const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+ AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
- const ArgDescriptor *Arg;
- const TargetRegisterClass *RC;
- std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
- if (!Arg) {
- LLVM_DEBUG(dbgs() << "Required arg register missing\n");
+ const ArgDescriptor *Arg = getArgDescriptor(B, ArgType);
+ if (!Arg)
return false;
- }
- if (loadInputValue(MI.getOperand(0).getReg(), B, Arg)) {
- MI.eraseFromParent();
- return true;
- }
+ if (!loadInputValue(MI.getOperand(0).getReg(), B, Arg))
+ return false;
- return false;
+ MI.eraseFromParent();
+ return true;
}
bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
- B.setInstr(MI);
Register Dst = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst);
LLT S16 = LLT::scalar(16);
@@ -1933,6 +2529,284 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
return false;
}
+void AMDGPULegalizerInfo::legalizeUDIV_UREM32Impl(MachineIRBuilder &B,
+ Register DstReg,
+ Register X,
+ Register Y,
+ bool IsDiv) const {
+ const LLT S1 = LLT::scalar(1);
+ const LLT S32 = LLT::scalar(32);
+
+ // See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the
+ // algorithm used here.
+
+ // Initial estimate of inv(y).
+ auto FloatY = B.buildUITOFP(S32, Y);
+ auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {FloatY});
+ auto Scale = B.buildFConstant(S32, BitsToFloat(0x4f7ffffe));
+ auto ScaledY = B.buildFMul(S32, RcpIFlag, Scale);
+ auto Z = B.buildFPTOUI(S32, ScaledY);
+
+ // One round of UNR.
+ auto NegY = B.buildSub(S32, B.buildConstant(S32, 0), Y);
+ auto NegYZ = B.buildMul(S32, NegY, Z);
+ Z = B.buildAdd(S32, Z, B.buildUMulH(S32, Z, NegYZ));
+
+ // Quotient/remainder estimate.
+ auto Q = B.buildUMulH(S32, X, Z);
+ auto R = B.buildSub(S32, X, B.buildMul(S32, Q, Y));
+
+ // First quotient/remainder refinement.
+ auto One = B.buildConstant(S32, 1);
+ auto Cond = B.buildICmp(CmpInst::ICMP_UGE, S1, R, Y);
+ if (IsDiv)
+ Q = B.buildSelect(S32, Cond, B.buildAdd(S32, Q, One), Q);
+ R = B.buildSelect(S32, Cond, B.buildSub(S32, R, Y), R);
+
+ // Second quotient/remainder refinement.
+ Cond = B.buildICmp(CmpInst::ICMP_UGE, S1, R, Y);
+ if (IsDiv)
+ B.buildSelect(DstReg, Cond, B.buildAdd(S32, Q, One), Q);
+ else
+ B.buildSelect(DstReg, Cond, B.buildSub(S32, R, Y), R);
+}
+
+bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ const bool IsDiv = MI.getOpcode() == AMDGPU::G_UDIV;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Num = MI.getOperand(1).getReg();
+ Register Den = MI.getOperand(2).getReg();
+ legalizeUDIV_UREM32Impl(B, DstReg, Num, Den, IsDiv);
+ MI.eraseFromParent();
+ return true;
+}
+
+// Build integer reciprocal sequence arounud V_RCP_IFLAG_F32
+//
+// Return lo, hi of result
+//
+// %cvt.lo = G_UITOFP Val.lo
+// %cvt.hi = G_UITOFP Val.hi
+// %mad = G_FMAD %cvt.hi, 2**32, %cvt.lo
+// %rcp = G_AMDGPU_RCP_IFLAG %mad
+// %mul1 = G_FMUL %rcp, 0x5f7ffffc
+// %mul2 = G_FMUL %mul1, 2**(-32)
+// %trunc = G_INTRINSIC_TRUNC %mul2
+// %mad2 = G_FMAD %trunc, -(2**32), %mul1
+// return {G_FPTOUI %mad2, G_FPTOUI %trunc}
+static std::pair<Register, Register> emitReciprocalU64(MachineIRBuilder &B,
+ Register Val) {
+ const LLT S32 = LLT::scalar(32);
+ auto Unmerge = B.buildUnmerge(S32, Val);
+
+ auto CvtLo = B.buildUITOFP(S32, Unmerge.getReg(0));
+ auto CvtHi = B.buildUITOFP(S32, Unmerge.getReg(1));
+
+ auto Mad = B.buildFMAD(S32, CvtHi, // 2**32
+ B.buildFConstant(S32, BitsToFloat(0x4f800000)), CvtLo);
+
+ auto Rcp = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {Mad});
+ auto Mul1 =
+ B.buildFMul(S32, Rcp, B.buildFConstant(S32, BitsToFloat(0x5f7ffffc)));
+
+ // 2**(-32)
+ auto Mul2 =
+ B.buildFMul(S32, Mul1, B.buildFConstant(S32, BitsToFloat(0x2f800000)));
+ auto Trunc = B.buildIntrinsicTrunc(S32, Mul2);
+
+ // -(2**32)
+ auto Mad2 = B.buildFMAD(S32, Trunc,
+ B.buildFConstant(S32, BitsToFloat(0xcf800000)), Mul1);
+
+ auto ResultLo = B.buildFPTOUI(S32, Mad2);
+ auto ResultHi = B.buildFPTOUI(S32, Trunc);
+
+ return {ResultLo.getReg(0), ResultHi.getReg(0)};
+}
+
+void AMDGPULegalizerInfo::legalizeUDIV_UREM64Impl(MachineIRBuilder &B,
+ Register DstReg,
+ Register Numer,
+ Register Denom,
+ bool IsDiv) const {
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+ const LLT S1 = LLT::scalar(1);
+ Register RcpLo, RcpHi;
+
+ std::tie(RcpLo, RcpHi) = emitReciprocalU64(B, Denom);
+
+ auto Rcp = B.buildMerge(S64, {RcpLo, RcpHi});
+
+ auto Zero64 = B.buildConstant(S64, 0);
+ auto NegDenom = B.buildSub(S64, Zero64, Denom);
+
+ auto MulLo1 = B.buildMul(S64, NegDenom, Rcp);
+ auto MulHi1 = B.buildUMulH(S64, Rcp, MulLo1);
+
+ auto UnmergeMulHi1 = B.buildUnmerge(S32, MulHi1);
+ Register MulHi1_Lo = UnmergeMulHi1.getReg(0);
+ Register MulHi1_Hi = UnmergeMulHi1.getReg(1);
+
+ auto Add1_Lo = B.buildUAddo(S32, S1, RcpLo, MulHi1_Lo);
+ auto Add1_Hi = B.buildUAdde(S32, S1, RcpHi, MulHi1_Hi, Add1_Lo.getReg(1));
+ auto Add1_HiNc = B.buildAdd(S32, RcpHi, MulHi1_Hi);
+ auto Add1 = B.buildMerge(S64, {Add1_Lo, Add1_Hi});
+
+ auto MulLo2 = B.buildMul(S64, NegDenom, Add1);
+ auto MulHi2 = B.buildUMulH(S64, Add1, MulLo2);
+ auto UnmergeMulHi2 = B.buildUnmerge(S32, MulHi2);
+ Register MulHi2_Lo = UnmergeMulHi2.getReg(0);
+ Register MulHi2_Hi = UnmergeMulHi2.getReg(1);
+
+ auto Zero32 = B.buildConstant(S32, 0);
+ auto Add2_Lo = B.buildUAddo(S32, S1, Add1_Lo, MulHi2_Lo);
+ auto Add2_HiC =
+ B.buildUAdde(S32, S1, Add1_HiNc, MulHi2_Hi, Add1_Lo.getReg(1));
+ auto Add2_Hi = B.buildUAdde(S32, S1, Add2_HiC, Zero32, Add2_Lo.getReg(1));
+ auto Add2 = B.buildMerge(S64, {Add2_Lo, Add2_Hi});
+
+ auto UnmergeNumer = B.buildUnmerge(S32, Numer);
+ Register NumerLo = UnmergeNumer.getReg(0);
+ Register NumerHi = UnmergeNumer.getReg(1);
+
+ auto MulHi3 = B.buildUMulH(S64, Numer, Add2);
+ auto Mul3 = B.buildMul(S64, Denom, MulHi3);
+ auto UnmergeMul3 = B.buildUnmerge(S32, Mul3);
+ Register Mul3_Lo = UnmergeMul3.getReg(0);
+ Register Mul3_Hi = UnmergeMul3.getReg(1);
+ auto Sub1_Lo = B.buildUSubo(S32, S1, NumerLo, Mul3_Lo);
+ auto Sub1_Hi = B.buildUSube(S32, S1, NumerHi, Mul3_Hi, Sub1_Lo.getReg(1));
+ auto Sub1_Mi = B.buildSub(S32, NumerHi, Mul3_Hi);
+ auto Sub1 = B.buildMerge(S64, {Sub1_Lo, Sub1_Hi});
+
+ auto UnmergeDenom = B.buildUnmerge(S32, Denom);
+ Register DenomLo = UnmergeDenom.getReg(0);
+ Register DenomHi = UnmergeDenom.getReg(1);
+
+ auto CmpHi = B.buildICmp(CmpInst::ICMP_UGE, S1, Sub1_Hi, DenomHi);
+ auto C1 = B.buildSExt(S32, CmpHi);
+
+ auto CmpLo = B.buildICmp(CmpInst::ICMP_UGE, S1, Sub1_Lo, DenomLo);
+ auto C2 = B.buildSExt(S32, CmpLo);
+
+ auto CmpEq = B.buildICmp(CmpInst::ICMP_EQ, S1, Sub1_Hi, DenomHi);
+ auto C3 = B.buildSelect(S32, CmpEq, C2, C1);
+
+ // TODO: Here and below portions of the code can be enclosed into if/endif.
+ // Currently control flow is unconditional and we have 4 selects after
+ // potential endif to substitute PHIs.
+
+ // if C3 != 0 ...
+ auto Sub2_Lo = B.buildUSubo(S32, S1, Sub1_Lo, DenomLo);
+ auto Sub2_Mi = B.buildUSube(S32, S1, Sub1_Mi, DenomHi, Sub1_Lo.getReg(1));
+ auto Sub2_Hi = B.buildUSube(S32, S1, Sub2_Mi, Zero32, Sub2_Lo.getReg(1));
+ auto Sub2 = B.buildMerge(S64, {Sub2_Lo, Sub2_Hi});
+
+ auto One64 = B.buildConstant(S64, 1);
+ auto Add3 = B.buildAdd(S64, MulHi3, One64);
+
+ auto C4 =
+ B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Hi, DenomHi));
+ auto C5 =
+ B.buildSExt(S32, B.buildICmp(CmpInst::ICMP_UGE, S1, Sub2_Lo, DenomLo));
+ auto C6 = B.buildSelect(
+ S32, B.buildICmp(CmpInst::ICMP_EQ, S1, Sub2_Hi, DenomHi), C5, C4);
+
+ // if (C6 != 0)
+ auto Add4 = B.buildAdd(S64, Add3, One64);
+ auto Sub3_Lo = B.buildUSubo(S32, S1, Sub2_Lo, DenomLo);
+
+ auto Sub3_Mi = B.buildUSube(S32, S1, Sub2_Mi, DenomHi, Sub2_Lo.getReg(1));
+ auto Sub3_Hi = B.buildUSube(S32, S1, Sub3_Mi, Zero32, Sub3_Lo.getReg(1));
+ auto Sub3 = B.buildMerge(S64, {Sub3_Lo, Sub3_Hi});
+
+ // endif C6
+ // endif C3
+
+ if (IsDiv) {
+ auto Sel1 = B.buildSelect(
+ S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Add4, Add3);
+ B.buildSelect(DstReg,
+ B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), Sel1, MulHi3);
+ } else {
+ auto Sel2 = B.buildSelect(
+ S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Sub3, Sub2);
+ B.buildSelect(DstReg,
+ B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), Sel2, Sub1);
+ }
+}
+
+bool AMDGPULegalizerInfo::legalizeUDIV_UREM(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+ const bool IsDiv = MI.getOpcode() == AMDGPU::G_UDIV;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Num = MI.getOperand(1).getReg();
+ Register Den = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(DstReg);
+
+ if (Ty == S32)
+ legalizeUDIV_UREM32Impl(B, DstReg, Num, Den, IsDiv);
+ else if (Ty == S64)
+ legalizeUDIV_UREM64Impl(B, DstReg, Num, Den, IsDiv);
+ else
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+
+}
+
+bool AMDGPULegalizerInfo::legalizeSDIV_SREM(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ const LLT Ty = MRI.getType(DstReg);
+ if (Ty != S32 && Ty != S64)
+ return false;
+
+ const bool IsDiv = MI.getOpcode() == AMDGPU::G_SDIV;
+
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ auto SignBitOffset = B.buildConstant(S32, Ty.getSizeInBits() - 1);
+ auto LHSign = B.buildAShr(Ty, LHS, SignBitOffset);
+ auto RHSign = B.buildAShr(Ty, RHS, SignBitOffset);
+
+ LHS = B.buildAdd(Ty, LHS, LHSign).getReg(0);
+ RHS = B.buildAdd(Ty, RHS, RHSign).getReg(0);
+
+ LHS = B.buildXor(Ty, LHS, LHSign).getReg(0);
+ RHS = B.buildXor(Ty, RHS, RHSign).getReg(0);
+
+ Register UDivRem = MRI.createGenericVirtualRegister(Ty);
+ if (Ty == S32)
+ legalizeUDIV_UREM32Impl(B, UDivRem, LHS, RHS, IsDiv);
+ else
+ legalizeUDIV_UREM64Impl(B, UDivRem, LHS, RHS, IsDiv);
+
+ Register Sign;
+ if (IsDiv)
+ Sign = B.buildXor(Ty, LHSign, RHSign).getReg(0);
+ else
+ Sign = LHSign.getReg(0); // Remainder sign is the same as LHS
+
+ UDivRem = B.buildXor(Ty, UDivRem, Sign).getReg(0);
+ B.buildSub(DstReg, UDivRem, Sign);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -1954,7 +2828,7 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
return false;
if (!Unsafe && ResTy == S32 &&
- MF.getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals)
+ MF.getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals())
return false;
if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) {
@@ -1997,7 +2871,6 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
- B.setInstr(MI);
Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
@@ -2035,15 +2908,13 @@ static void toggleSPDenormMode(bool Enable,
AMDGPU::SIModeRegisterDefaults Mode) {
// Set SP denorm mode to this value.
unsigned SPDenormMode =
- Enable ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+ Enable ? FP_DENORM_FLUSH_NONE : Mode.fpDenormModeSPValue();
if (ST.hasDenormModeInst()) {
// Preserve default FP64FP16 denorm mode while updating FP32 mode.
- unsigned DPDenormModeDefault = Mode.FP64FP16Denormals
- ? FP_DENORM_FLUSH_NONE
- : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+ uint32_t DPDenormModeDefault = Mode.fpDenormModeDPValue();
- unsigned NewDenormModeValue = SPDenormMode | (DPDenormModeDefault << 2);
+ uint32_t NewDenormModeValue = SPDenormMode | (DPDenormModeDefault << 2);
B.buildInstr(AMDGPU::S_DENORM_MODE)
.addImm(NewDenormModeValue);
@@ -2062,7 +2933,6 @@ static void toggleSPDenormMode(bool Enable,
bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
- B.setInstr(MI);
Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
@@ -2078,15 +2948,15 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
auto DenominatorScaled =
B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
- .addUse(RHS)
.addUse(LHS)
- .addImm(1)
+ .addUse(RHS)
+ .addImm(0)
.setMIFlags(Flags);
auto NumeratorScaled =
B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
.addUse(LHS)
.addUse(RHS)
- .addImm(0)
+ .addImm(1)
.setMIFlags(Flags);
auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
@@ -2096,7 +2966,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
// FIXME: Doesn't correctly model the FP mode switch, and the FP operations
// aren't modeled as reading it.
- if (!Mode.FP32Denormals)
+ if (!Mode.allFP32Denormals())
toggleSPDenormMode(true, B, ST, Mode);
auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
@@ -2106,7 +2976,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
- if (!Mode.FP32Denormals)
+ if (!Mode.allFP32Denormals())
toggleSPDenormMode(false, B, ST, Mode);
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)
@@ -2129,7 +2999,6 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
- B.setInstr(MI);
Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
@@ -2144,7 +3013,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
.addUse(LHS)
.addUse(RHS)
- .addImm(1)
+ .addImm(0)
.setMIFlags(Flags);
auto NegDivScale0 = B.buildFNeg(S64, DivScale0.getReg(0), Flags);
@@ -2160,11 +3029,11 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
.addUse(LHS)
.addUse(RHS)
- .addImm(0)
+ .addImm(1)
.setMIFlags(Flags);
auto Fma3 = B.buildFMA(S64, Fma1, Fma2, Fma1, Flags);
- auto Mul = B.buildMul(S64, DivScale1.getReg(0), Fma3, Flags);
+ auto Mul = B.buildFMul(S64, DivScale1.getReg(0), Fma3, Flags);
auto Fma4 = B.buildFMA(S64, NegDivScale0, Mul, DivScale1.getReg(0), Flags);
Register Scale;
@@ -2172,8 +3041,6 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
// Workaround a hardware bug on SI where the condition output from div_scale
// is not usable.
- Scale = MRI.createGenericVirtualRegister(S1);
-
LLT S32 = LLT::scalar(32);
auto NumUnmerge = B.buildUnmerge(S32, LHS);
@@ -2185,7 +3052,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
Scale1Unmerge.getReg(1));
auto CmpDen = B.buildICmp(ICmpInst::ICMP_EQ, S1, DenUnmerge.getReg(1),
Scale0Unmerge.getReg(1));
- B.buildXor(Scale, CmpNum, CmpDen);
+ Scale = B.buildXor(S1, CmpNum, CmpDen).getReg(0);
} else {
Scale = DivScale1.getReg(1);
}
@@ -2210,7 +3077,6 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
- B.setInstr(MI);
Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
@@ -2252,8 +3118,6 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
}
- B.setInstr(MI);
-
uint64_t Offset =
ST.getTargetLowering()->getImplicitParameterOffset(
B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT);
@@ -2263,8 +3127,9 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
const ArgDescriptor *Arg;
const TargetRegisterClass *RC;
- std::tie(Arg, RC)
- = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+ LLT ArgTy;
+ std::tie(Arg, RC, ArgTy) =
+ MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
if (!Arg)
return false;
@@ -2281,7 +3146,6 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
unsigned AddrSpace) const {
- B.setInstr(MI);
Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B);
auto Hi32 = B.buildExtract(LLT::scalar(32), MI.getOperand(2).getReg(), 32);
B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg);
@@ -2289,6 +3153,55 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
return true;
}
+// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
+// offset (the offset that is included in bounds checking and swizzling, to be
+// split between the instruction's voffset and immoffset fields) and soffset
+// (the offset that is excluded from bounds checking and swizzling, to go in
+// the instruction's soffset field). This function takes the first kind of
+// offset and figures out how to split it between voffset and immoffset.
+std::tuple<Register, unsigned, unsigned>
+AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
+ Register OrigOffset) const {
+ const unsigned MaxImm = 4095;
+ Register BaseReg;
+ unsigned TotalConstOffset;
+ MachineInstr *OffsetDef;
+ const LLT S32 = LLT::scalar(32);
+
+ std::tie(BaseReg, TotalConstOffset, OffsetDef)
+ = AMDGPU::getBaseWithConstantOffset(*B.getMRI(), OrigOffset);
+
+ unsigned ImmOffset = TotalConstOffset;
+
+ // If the immediate value is too big for the immoffset field, put the value
+ // and -4096 into the immoffset field so that the value that is copied/added
+ // for the voffset field is a multiple of 4096, and it stands more chance
+ // of being CSEd with the copy/add for another similar load/store.
+ // However, do not do that rounding down to a multiple of 4096 if that is a
+ // negative number, as it appears to be illegal to have a negative offset
+ // in the vgpr, even if adding the immediate offset makes it positive.
+ unsigned Overflow = ImmOffset & ~MaxImm;
+ ImmOffset -= Overflow;
+ if ((int32_t)Overflow < 0) {
+ Overflow += ImmOffset;
+ ImmOffset = 0;
+ }
+
+ if (Overflow != 0) {
+ if (!BaseReg) {
+ BaseReg = B.buildConstant(S32, Overflow).getReg(0);
+ } else {
+ auto OverflowVal = B.buildConstant(S32, Overflow);
+ BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
+ }
+ }
+
+ if (!BaseReg)
+ BaseReg = B.buildConstant(S32, 0).getReg(0);
+
+ return std::make_tuple(BaseReg, ImmOffset, TotalConstOffset);
+}
+
/// Handle register layout difference for f16 images for some subtargets.
Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
MachineRegisterInfo &MRI,
@@ -2312,75 +3225,969 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0);
}
-bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
- bool IsFormat) const {
- // TODO: Reject f16 format on targets where unsupported.
- Register VData = MI.getOperand(1).getReg();
- LLT Ty = MRI.getType(VData);
+Register AMDGPULegalizerInfo::fixStoreSourceType(
+ MachineIRBuilder &B, Register VData, bool IsFormat) const {
+ MachineRegisterInfo *MRI = B.getMRI();
+ LLT Ty = MRI->getType(VData);
- B.setInstr(MI);
-
- const LLT S32 = LLT::scalar(32);
const LLT S16 = LLT::scalar(16);
// Fixup illegal register types for i8 stores.
if (Ty == LLT::scalar(8) || Ty == S16) {
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
- MI.getOperand(1).setReg(AnyExt);
- return true;
+ return AnyExt;
}
if (Ty.isVector()) {
if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) {
if (IsFormat)
- MI.getOperand(1).setReg(handleD16VData(B, MRI, VData));
+ return handleD16VData(B, *MRI, VData);
+ }
+ }
+
+ return VData;
+}
+
+bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ bool IsTyped,
+ bool IsFormat) const {
+ Register VData = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(VData);
+ LLT EltTy = Ty.getScalarType();
+ const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
+ const LLT S32 = LLT::scalar(32);
+
+ VData = fixStoreSourceType(B, VData, IsFormat);
+ Register RSrc = MI.getOperand(2).getReg();
+
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ const int MemSize = MMO->getSize();
+
+ unsigned ImmOffset;
+ unsigned TotalOffset;
+
+ // The typed intrinsics add an immediate after the registers.
+ const unsigned NumVIndexOps = IsTyped ? 8 : 7;
+
+ // The struct intrinsic variants add one additional operand over raw.
+ const bool HasVIndex = MI.getNumOperands() == NumVIndexOps;
+ Register VIndex;
+ int OpOffset = 0;
+ if (HasVIndex) {
+ VIndex = MI.getOperand(3).getReg();
+ OpOffset = 1;
+ }
+
+ Register VOffset = MI.getOperand(3 + OpOffset).getReg();
+ Register SOffset = MI.getOperand(4 + OpOffset).getReg();
+
+ unsigned Format = 0;
+ if (IsTyped) {
+ Format = MI.getOperand(5 + OpOffset).getImm();
+ ++OpOffset;
+ }
+
+ unsigned AuxiliaryData = MI.getOperand(5 + OpOffset).getImm();
+
+ std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
+ if (TotalOffset != 0)
+ MMO = B.getMF().getMachineMemOperand(MMO, TotalOffset, MemSize);
+
+ unsigned Opc;
+ if (IsTyped) {
+ Opc = IsD16 ? AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16 :
+ AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT;
+ } else if (IsFormat) {
+ Opc = IsD16 ? AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16 :
+ AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT;
+ } else {
+ switch (MemSize) {
+ case 1:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE;
+ break;
+ case 2:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT;
+ break;
+ default:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_STORE;
+ break;
+ }
+ }
+
+ if (!VIndex)
+ VIndex = B.buildConstant(S32, 0).getReg(0);
+
+ auto MIB = B.buildInstr(Opc)
+ .addUse(VData) // vdata
+ .addUse(RSrc) // rsrc
+ .addUse(VIndex) // vindex
+ .addUse(VOffset) // voffset
+ .addUse(SOffset) // soffset
+ .addImm(ImmOffset); // offset(imm)
+
+ if (IsTyped)
+ MIB.addImm(Format);
+
+ MIB.addImm(AuxiliaryData) // cachepolicy, swizzled buffer(imm)
+ .addImm(HasVIndex ? -1 : 0) // idxen(imm)
+ .addMemOperand(MMO);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ bool IsFormat,
+ bool IsTyped) const {
+ // FIXME: Verifier should enforce 1 MMO for these intrinsics.
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ const int MemSize = MMO->getSize();
+ const LLT S32 = LLT::scalar(32);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register RSrc = MI.getOperand(2).getReg();
+
+ // The typed intrinsics add an immediate after the registers.
+ const unsigned NumVIndexOps = IsTyped ? 8 : 7;
+
+ // The struct intrinsic variants add one additional operand over raw.
+ const bool HasVIndex = MI.getNumOperands() == NumVIndexOps;
+ Register VIndex;
+ int OpOffset = 0;
+ if (HasVIndex) {
+ VIndex = MI.getOperand(3).getReg();
+ OpOffset = 1;
+ }
+
+ Register VOffset = MI.getOperand(3 + OpOffset).getReg();
+ Register SOffset = MI.getOperand(4 + OpOffset).getReg();
+
+ unsigned Format = 0;
+ if (IsTyped) {
+ Format = MI.getOperand(5 + OpOffset).getImm();
+ ++OpOffset;
+ }
+
+ unsigned AuxiliaryData = MI.getOperand(5 + OpOffset).getImm();
+ unsigned ImmOffset;
+ unsigned TotalOffset;
+
+ LLT Ty = MRI.getType(Dst);
+ LLT EltTy = Ty.getScalarType();
+ const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
+ const bool Unpacked = ST.hasUnpackedD16VMem();
+
+ std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
+ if (TotalOffset != 0)
+ MMO = B.getMF().getMachineMemOperand(MMO, TotalOffset, MemSize);
+
+ unsigned Opc;
+
+ if (IsTyped) {
+ Opc = IsD16 ? AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16 :
+ AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT;
+ } else if (IsFormat) {
+ Opc = IsD16 ? AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16 :
+ AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT;
+ } else {
+ switch (MemSize) {
+ case 1:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE;
+ break;
+ case 2:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
+ break;
+ default:
+ Opc = AMDGPU::G_AMDGPU_BUFFER_LOAD;
+ break;
+ }
+ }
+
+ Register LoadDstReg;
+
+ bool IsExtLoad = (!IsD16 && MemSize < 4) || (IsD16 && !Ty.isVector());
+ LLT UnpackedTy = Ty.changeElementSize(32);
+
+ if (IsExtLoad)
+ LoadDstReg = B.getMRI()->createGenericVirtualRegister(S32);
+ else if (Unpacked && IsD16 && Ty.isVector())
+ LoadDstReg = B.getMRI()->createGenericVirtualRegister(UnpackedTy);
+ else
+ LoadDstReg = Dst;
+
+ if (!VIndex)
+ VIndex = B.buildConstant(S32, 0).getReg(0);
+
+ auto MIB = B.buildInstr(Opc)
+ .addDef(LoadDstReg) // vdata
+ .addUse(RSrc) // rsrc
+ .addUse(VIndex) // vindex
+ .addUse(VOffset) // voffset
+ .addUse(SOffset) // soffset
+ .addImm(ImmOffset); // offset(imm)
+
+ if (IsTyped)
+ MIB.addImm(Format);
+
+ MIB.addImm(AuxiliaryData) // cachepolicy, swizzled buffer(imm)
+ .addImm(HasVIndex ? -1 : 0) // idxen(imm)
+ .addMemOperand(MMO);
+
+ if (LoadDstReg != Dst) {
+ B.setInsertPt(B.getMBB(), ++B.getInsertPt());
+
+ // Widen result for extending loads was widened.
+ if (IsExtLoad)
+ B.buildTrunc(Dst, LoadDstReg);
+ else {
+ // Repack to original 16-bit vector result
+ // FIXME: G_TRUNC should work, but legalization currently fails
+ auto Unmerge = B.buildUnmerge(S32, LoadDstReg);
+ SmallVector<Register, 4> Repack;
+ for (unsigned I = 0, N = Unmerge->getNumOperands() - 1; I != N; ++I)
+ Repack.push_back(B.buildTrunc(EltTy, Unmerge.getReg(I)).getReg(0));
+ B.buildMerge(Dst, Repack);
+ }
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeAtomicIncDec(MachineInstr &MI,
+ MachineIRBuilder &B,
+ bool IsInc) const {
+ unsigned Opc = IsInc ? AMDGPU::G_AMDGPU_ATOMIC_INC :
+ AMDGPU::G_AMDGPU_ATOMIC_DEC;
+ B.buildInstr(Opc)
+ .addDef(MI.getOperand(0).getReg())
+ .addUse(MI.getOperand(2).getReg())
+ .addUse(MI.getOperand(3).getReg())
+ .cloneMemRefs(MI);
+ MI.eraseFromParent();
+ return true;
+}
+
+static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
+ switch (IntrID) {
+ case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+ case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP;
+ case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD;
+ case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB;
+ case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN;
+ case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN;
+ case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX;
+ case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX;
+ case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND;
+ case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR;
+ case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+ case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR;
+ case Intrinsic::amdgcn_raw_buffer_atomic_inc:
+ case Intrinsic::amdgcn_struct_buffer_atomic_inc:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC;
+ case Intrinsic::amdgcn_raw_buffer_atomic_dec:
+ case Intrinsic::amdgcn_struct_buffer_atomic_dec:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC;
+ case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
+ case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
+ return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP;
+ default:
+ llvm_unreachable("unhandled atomic opcode");
+ }
+}
+
+bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
+ MachineIRBuilder &B,
+ Intrinsic::ID IID) const {
+ const bool IsCmpSwap = IID == Intrinsic::amdgcn_raw_buffer_atomic_cmpswap ||
+ IID == Intrinsic::amdgcn_struct_buffer_atomic_cmpswap;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register VData = MI.getOperand(2).getReg();
+
+ Register CmpVal;
+ int OpOffset = 0;
+
+ if (IsCmpSwap) {
+ CmpVal = MI.getOperand(3 + OpOffset).getReg();
+ ++OpOffset;
+ }
+
+ Register RSrc = MI.getOperand(3 + OpOffset).getReg();
+ const unsigned NumVIndexOps = IsCmpSwap ? 9 : 8;
+
+ // The struct intrinsic variants add one additional operand over raw.
+ const bool HasVIndex = MI.getNumOperands() == NumVIndexOps;
+ Register VIndex;
+ if (HasVIndex) {
+ VIndex = MI.getOperand(4 + OpOffset).getReg();
+ ++OpOffset;
+ }
+
+ Register VOffset = MI.getOperand(4 + OpOffset).getReg();
+ Register SOffset = MI.getOperand(5 + OpOffset).getReg();
+ unsigned AuxiliaryData = MI.getOperand(6 + OpOffset).getImm();
+
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+
+ unsigned ImmOffset;
+ unsigned TotalOffset;
+ std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
+ if (TotalOffset != 0)
+ MMO = B.getMF().getMachineMemOperand(MMO, TotalOffset, MMO->getSize());
+
+ if (!VIndex)
+ VIndex = B.buildConstant(LLT::scalar(32), 0).getReg(0);
+
+ auto MIB = B.buildInstr(getBufferAtomicPseudo(IID))
+ .addDef(Dst)
+ .addUse(VData); // vdata
+
+ if (IsCmpSwap)
+ MIB.addReg(CmpVal);
+
+ MIB.addUse(RSrc) // rsrc
+ .addUse(VIndex) // vindex
+ .addUse(VOffset) // voffset
+ .addUse(SOffset) // soffset
+ .addImm(ImmOffset) // offset(imm)
+ .addImm(AuxiliaryData) // cachepolicy, swizzled buffer(imm)
+ .addImm(HasVIndex ? -1 : 0) // idxen(imm)
+ .addMemOperand(MMO);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+/// Turn a set of s16 typed registers in \p A16AddrRegs into a dword sized
+/// vector with s16 typed elements.
+static void packImageA16AddressToDwords(MachineIRBuilder &B, MachineInstr &MI,
+ SmallVectorImpl<Register> &PackedAddrs,
+ int AddrIdx, int DimIdx, int EndIdx,
+ int NumGradients) {
+ const LLT S16 = LLT::scalar(16);
+ const LLT V2S16 = LLT::vector(2, 16);
+
+ for (int I = AddrIdx; I < EndIdx; ++I) {
+ MachineOperand &SrcOp = MI.getOperand(I);
+ if (!SrcOp.isReg())
+ continue; // _L to _LZ may have eliminated this.
+
+ Register AddrReg = SrcOp.getReg();
+
+ if (I < DimIdx) {
+ AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);
+ PackedAddrs.push_back(AddrReg);
+ } else {
+ // Dz/dh, dz/dv and the last odd coord are packed with undef. Also, in 1D,
+ // derivatives dx/dh and dx/dv are packed with undef.
+ if (((I + 1) >= EndIdx) ||
+ ((NumGradients / 2) % 2 == 1 &&
+ (I == DimIdx + (NumGradients / 2) - 1 ||
+ I == DimIdx + NumGradients - 1)) ||
+ // Check for _L to _LZ optimization
+ !MI.getOperand(I + 1).isReg()) {
+ PackedAddrs.push_back(
+ B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)})
+ .getReg(0));
+ } else {
+ PackedAddrs.push_back(
+ B.buildBuildVector(V2S16, {AddrReg, MI.getOperand(I + 1).getReg()})
+ .getReg(0));
+ ++I;
+ }
+ }
+ }
+}
+
+/// Convert from separate vaddr components to a single vector address register,
+/// and replace the remaining operands with $noreg.
+static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
+ int DimIdx, int NumVAddrs) {
+ const LLT S32 = LLT::scalar(32);
+
+ SmallVector<Register, 8> AddrRegs;
+ for (int I = 0; I != NumVAddrs; ++I) {
+ MachineOperand &SrcOp = MI.getOperand(DimIdx + I);
+ if (SrcOp.isReg()) {
+ AddrRegs.push_back(SrcOp.getReg());
+ assert(B.getMRI()->getType(SrcOp.getReg()) == S32);
+ }
+ }
+
+ int NumAddrRegs = AddrRegs.size();
+ if (NumAddrRegs != 1) {
+ // Round up to 8 elements for v5-v7
+ // FIXME: Missing intermediate sized register classes and instructions.
+ if (NumAddrRegs > 4 && !isPowerOf2_32(NumAddrRegs)) {
+ const int RoundedNumRegs = NextPowerOf2(NumAddrRegs);
+ auto Undef = B.buildUndef(S32);
+ AddrRegs.append(RoundedNumRegs - NumAddrRegs, Undef.getReg(0));
+ NumAddrRegs = RoundedNumRegs;
+ }
+
+ auto VAddr = B.buildBuildVector(LLT::vector(NumAddrRegs, 32), AddrRegs);
+ MI.getOperand(DimIdx).setReg(VAddr.getReg(0));
+ }
+
+ for (int I = 1; I != NumVAddrs; ++I) {
+ MachineOperand &SrcOp = MI.getOperand(DimIdx + I);
+ if (SrcOp.isReg())
+ MI.getOperand(DimIdx + I).setReg(AMDGPU::NoRegister);
+ }
+}
+
+/// Rewrite image intrinsics to use register layouts expected by the subtarget.
+///
+/// Depending on the subtarget, load/store with 16-bit element data need to be
+/// rewritten to use the low half of 32-bit registers, or directly use a packed
+/// layout. 16-bit addresses should also sometimes be packed into 32-bit
+/// registers.
+///
+/// We don't want to directly select image instructions just yet, but also want
+/// to exposes all register repacking to the legalizer/combiners. We also don't
+/// want a selected instrution entering RegBankSelect. In order to avoid
+/// defining a multitude of intermediate image instructions, directly hack on
+/// the intrinsic's arguments. In cases like a16 addreses, this requires padding
+/// now unnecessary arguments with $noreg.
+bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
+ MachineInstr &MI, MachineIRBuilder &B,
+ GISelChangeObserver &Observer,
+ const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const {
+
+ const int NumDefs = MI.getNumExplicitDefs();
+ bool IsTFE = NumDefs == 2;
+ // We are only processing the operands of d16 image operations on subtargets
+ // that use the unpacked register layout, or need to repack the TFE result.
+
+ // TODO: Do we need to guard against already legalized intrinsics?
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
+
+ MachineRegisterInfo *MRI = B.getMRI();
+ const LLT S32 = LLT::scalar(32);
+ const LLT S16 = LLT::scalar(16);
+ const LLT V2S16 = LLT::vector(2, 16);
+
+ // Index of first address argument
+ const int AddrIdx = getImageVAddrIdxBegin(BaseOpcode, NumDefs);
+
+ int NumVAddrs, NumGradients;
+ std::tie(NumVAddrs, NumGradients) = getImageNumVAddr(ImageDimIntr, BaseOpcode);
+ const int DMaskIdx = BaseOpcode->Atomic ? -1 :
+ getDMaskIdx(BaseOpcode, NumDefs);
+ unsigned DMask = 0;
+
+ // Check for 16 bit addresses and pack if true.
+ int DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
+ LLT GradTy = MRI->getType(MI.getOperand(DimIdx).getReg());
+ LLT AddrTy = MRI->getType(MI.getOperand(DimIdx + NumGradients).getReg());
+ const bool IsG16 = GradTy == S16;
+ const bool IsA16 = AddrTy == S16;
+
+ int DMaskLanes = 0;
+ if (!BaseOpcode->Atomic) {
+ DMask = MI.getOperand(DMaskIdx).getImm();
+ if (BaseOpcode->Gather4) {
+ DMaskLanes = 4;
+ } else if (DMask != 0) {
+ DMaskLanes = countPopulation(DMask);
+ } else if (!IsTFE && !BaseOpcode->Store) {
+ // If dmask is 0, this is a no-op load. This can be eliminated.
+ B.buildUndef(MI.getOperand(0));
+ MI.eraseFromParent();
return true;
}
+ }
+
+ Observer.changingInstr(MI);
+ auto ChangedInstr = make_scope_exit([&] { Observer.changedInstr(MI); });
+
+ unsigned NewOpcode = NumDefs == 0 ?
+ AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE : AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD;
+
+ // Track that we legalized this
+ MI.setDesc(B.getTII().get(NewOpcode));
+
+ // Expecting to get an error flag since TFC is on - and dmask is 0 Force
+ // dmask to be at least 1 otherwise the instruction will fail
+ if (IsTFE && DMask == 0) {
+ DMask = 0x1;
+ DMaskLanes = 1;
+ MI.getOperand(DMaskIdx).setImm(DMask);
+ }
+
+ if (BaseOpcode->Atomic) {
+ Register VData0 = MI.getOperand(2).getReg();
+ LLT Ty = MRI->getType(VData0);
+
+ // TODO: Allow atomic swap and bit ops for v2s16/v4s16
+ if (Ty.isVector())
+ return false;
+
+ if (BaseOpcode->AtomicX2) {
+ Register VData1 = MI.getOperand(3).getReg();
+ // The two values are packed in one register.
+ LLT PackedTy = LLT::vector(2, Ty);
+ auto Concat = B.buildBuildVector(PackedTy, {VData0, VData1});
+ MI.getOperand(2).setReg(Concat.getReg(0));
+ MI.getOperand(3).setReg(AMDGPU::NoRegister);
+ }
+ }
- return Ty.getElementType() == S32 && Ty.getNumElements() <= 4;
+ int CorrectedNumVAddrs = NumVAddrs;
+
+ // Optimize _L to _LZ when _L is zero
+ if (const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
+ AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
+ const ConstantFP *ConstantLod;
+ const int LodIdx = AddrIdx + NumVAddrs - 1;
+
+ if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_GFCst(ConstantLod))) {
+ if (ConstantLod->isZero() || ConstantLod->isNegative()) {
+ // Set new opcode to _lz variant of _l, and change the intrinsic ID.
+ ImageDimIntr = AMDGPU::getImageDimInstrinsicByBaseOpcode(
+ LZMappingInfo->LZ, ImageDimIntr->Dim);
+
+ // The starting indexes should remain in the same place.
+ --NumVAddrs;
+ --CorrectedNumVAddrs;
+
+ MI.getOperand(MI.getNumExplicitDefs()).setIntrinsicID(
+ static_cast<Intrinsic::ID>(ImageDimIntr->Intr));
+ MI.RemoveOperand(LodIdx);
+ }
+ }
}
- return Ty == S32;
+ // Optimize _mip away, when 'lod' is zero
+ if (AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
+ int64_t ConstantLod;
+ const int LodIdx = AddrIdx + NumVAddrs - 1;
+
+ if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_ICst(ConstantLod))) {
+ if (ConstantLod == 0) {
+ // TODO: Change intrinsic opcode and remove operand instead or replacing
+ // it with 0, as the _L to _LZ handling is done above.
+ MI.getOperand(LodIdx).ChangeToImmediate(0);
+ --CorrectedNumVAddrs;
+ }
+ }
+ }
+
+ // Rewrite the addressing register layout before doing anything else.
+ if (IsA16 || IsG16) {
+ if (IsA16) {
+ // Target must support the feature and gradients need to be 16 bit too
+ if (!ST.hasA16() || !IsG16)
+ return false;
+ } else if (!ST.hasG16())
+ return false;
+
+ if (NumVAddrs > 1) {
+ SmallVector<Register, 4> PackedRegs;
+ // Don't compress addresses for G16
+ const int PackEndIdx =
+ IsA16 ? (AddrIdx + NumVAddrs) : (DimIdx + NumGradients);
+ packImageA16AddressToDwords(B, MI, PackedRegs, AddrIdx, DimIdx,
+ PackEndIdx, NumGradients);
+
+ if (!IsA16) {
+ // Add uncompressed address
+ for (int I = DimIdx + NumGradients; I != AddrIdx + NumVAddrs; ++I) {
+ int AddrReg = MI.getOperand(I).getReg();
+ assert(B.getMRI()->getType(AddrReg) == LLT::scalar(32));
+ PackedRegs.push_back(AddrReg);
+ }
+ }
+
+ // See also below in the non-a16 branch
+ const bool UseNSA = PackedRegs.size() >= 3 && ST.hasNSAEncoding();
+
+ if (!UseNSA && PackedRegs.size() > 1) {
+ LLT PackedAddrTy = LLT::vector(2 * PackedRegs.size(), 16);
+ auto Concat = B.buildConcatVectors(PackedAddrTy, PackedRegs);
+ PackedRegs[0] = Concat.getReg(0);
+ PackedRegs.resize(1);
+ }
+
+ const int NumPacked = PackedRegs.size();
+ for (int I = 0; I != NumVAddrs; ++I) {
+ MachineOperand &SrcOp = MI.getOperand(AddrIdx + I);
+ if (!SrcOp.isReg()) {
+ assert(SrcOp.isImm() && SrcOp.getImm() == 0);
+ continue;
+ }
+
+ assert(SrcOp.getReg() != AMDGPU::NoRegister);
+
+ if (I < NumPacked)
+ SrcOp.setReg(PackedRegs[I]);
+ else
+ SrcOp.setReg(AMDGPU::NoRegister);
+ }
+ }
+ } else {
+ // If the register allocator cannot place the address registers contiguously
+ // without introducing moves, then using the non-sequential address encoding
+ // is always preferable, since it saves VALU instructions and is usually a
+ // wash in terms of code size or even better.
+ //
+ // However, we currently have no way of hinting to the register allocator
+ // that MIMG addresses should be placed contiguously when it is possible to
+ // do so, so force non-NSA for the common 2-address case as a heuristic.
+ //
+ // SIShrinkInstructions will convert NSA encodings to non-NSA after register
+ // allocation when possible.
+ const bool UseNSA = CorrectedNumVAddrs >= 3 && ST.hasNSAEncoding();
+
+ if (!UseNSA && NumVAddrs > 1)
+ convertImageAddrToPacked(B, MI, AddrIdx, NumVAddrs);
+ }
+
+ int Flags = 0;
+ if (IsA16)
+ Flags |= 1;
+ if (IsG16)
+ Flags |= 2;
+ MI.addOperand(MachineOperand::CreateImm(Flags));
+
+ if (BaseOpcode->Store) { // No TFE for stores?
+ // TODO: Handle dmask trim
+ Register VData = MI.getOperand(1).getReg();
+ LLT Ty = MRI->getType(VData);
+ if (!Ty.isVector() || Ty.getElementType() != S16)
+ return true;
+
+ Register RepackedReg = handleD16VData(B, *MRI, VData);
+ if (RepackedReg != VData) {
+ MI.getOperand(1).setReg(RepackedReg);
+ }
+
+ return true;
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI->getType(DstReg);
+ const LLT EltTy = Ty.getScalarType();
+ const bool IsD16 = Ty.getScalarType() == S16;
+ const int NumElts = Ty.isVector() ? Ty.getNumElements() : 1;
+
+ // Confirm that the return type is large enough for the dmask specified
+ if (NumElts < DMaskLanes)
+ return false;
+
+ if (NumElts > 4 || DMaskLanes > 4)
+ return false;
+
+ const unsigned AdjustedNumElts = DMaskLanes == 0 ? 1 : DMaskLanes;
+ const LLT AdjustedTy = Ty.changeNumElements(AdjustedNumElts);
+
+ // The raw dword aligned data component of the load. The only legal cases
+ // where this matters should be when using the packed D16 format, for
+ // s16 -> <2 x s16>, and <3 x s16> -> <4 x s16>,
+ LLT RoundedTy;
+
+ // S32 vector to to cover all data, plus TFE result element.
+ LLT TFETy;
+
+ // Register type to use for each loaded component. Will be S32 or V2S16.
+ LLT RegTy;
+
+ if (IsD16 && ST.hasUnpackedD16VMem()) {
+ RoundedTy = LLT::scalarOrVector(AdjustedNumElts, 32);
+ TFETy = LLT::vector(AdjustedNumElts + 1, 32);
+ RegTy = S32;
+ } else {
+ unsigned EltSize = EltTy.getSizeInBits();
+ unsigned RoundedElts = (AdjustedTy.getSizeInBits() + 31) / 32;
+ unsigned RoundedSize = 32 * RoundedElts;
+ RoundedTy = LLT::scalarOrVector(RoundedSize / EltSize, EltSize);
+ TFETy = LLT::vector(RoundedSize / 32 + 1, S32);
+ RegTy = !IsTFE && EltSize == 16 ? V2S16 : S32;
+ }
+
+ // The return type does not need adjustment.
+ // TODO: Should we change s16 case to s32 or <2 x s16>?
+ if (!IsTFE && (RoundedTy == Ty || !Ty.isVector()))
+ return true;
+
+ Register Dst1Reg;
+
+ // Insert after the instruction.
+ B.setInsertPt(*MI.getParent(), ++MI.getIterator());
+
+ // TODO: For TFE with d16, if we used a TFE type that was a multiple of <2 x
+ // s16> instead of s32, we would only need 1 bitcast instead of multiple.
+ const LLT LoadResultTy = IsTFE ? TFETy : RoundedTy;
+ const int ResultNumRegs = LoadResultTy.getSizeInBits() / 32;
+
+ Register NewResultReg = MRI->createGenericVirtualRegister(LoadResultTy);
+
+ MI.getOperand(0).setReg(NewResultReg);
+
+ // In the IR, TFE is supposed to be used with a 2 element struct return
+ // type. The intruction really returns these two values in one contiguous
+ // register, with one additional dword beyond the loaded data. Rewrite the
+ // return type to use a single register result.
+
+ if (IsTFE) {
+ Dst1Reg = MI.getOperand(1).getReg();
+ if (MRI->getType(Dst1Reg) != S32)
+ return false;
+
+ // TODO: Make sure the TFE operand bit is set.
+ MI.RemoveOperand(1);
+
+ // Handle the easy case that requires no repack instructions.
+ if (Ty == S32) {
+ B.buildUnmerge({DstReg, Dst1Reg}, NewResultReg);
+ return true;
+ }
+ }
+
+ // Now figure out how to copy the new result register back into the old
+ // result.
+ SmallVector<Register, 5> ResultRegs(ResultNumRegs, Dst1Reg);
+
+ const int NumDataRegs = IsTFE ? ResultNumRegs - 1 : ResultNumRegs;
+
+ if (ResultNumRegs == 1) {
+ assert(!IsTFE);
+ ResultRegs[0] = NewResultReg;
+ } else {
+ // We have to repack into a new vector of some kind.
+ for (int I = 0; I != NumDataRegs; ++I)
+ ResultRegs[I] = MRI->createGenericVirtualRegister(RegTy);
+ B.buildUnmerge(ResultRegs, NewResultReg);
+
+ // Drop the final TFE element to get the data part. The TFE result is
+ // directly written to the right place already.
+ if (IsTFE)
+ ResultRegs.resize(NumDataRegs);
+ }
+
+ // For an s16 scalar result, we form an s32 result with a truncate regardless
+ // of packed vs. unpacked.
+ if (IsD16 && !Ty.isVector()) {
+ B.buildTrunc(DstReg, ResultRegs[0]);
+ return true;
+ }
+
+ // Avoid a build/concat_vector of 1 entry.
+ if (Ty == V2S16 && NumDataRegs == 1 && !ST.hasUnpackedD16VMem()) {
+ B.buildBitcast(DstReg, ResultRegs[0]);
+ return true;
+ }
+
+ assert(Ty.isVector());
+
+ if (IsD16) {
+ // For packed D16 results with TFE enabled, all the data components are
+ // S32. Cast back to the expected type.
+ //
+ // TODO: We don't really need to use load s32 elements. We would only need one
+ // cast for the TFE result if a multiple of v2s16 was used.
+ if (RegTy != V2S16 && !ST.hasUnpackedD16VMem()) {
+ for (Register &Reg : ResultRegs)
+ Reg = B.buildBitcast(V2S16, Reg).getReg(0);
+ } else if (ST.hasUnpackedD16VMem()) {
+ for (Register &Reg : ResultRegs)
+ Reg = B.buildTrunc(S16, Reg).getReg(0);
+ }
+ }
+
+ auto padWithUndef = [&](LLT Ty, int NumElts) {
+ if (NumElts == 0)
+ return;
+ Register Undef = B.buildUndef(Ty).getReg(0);
+ for (int I = 0; I != NumElts; ++I)
+ ResultRegs.push_back(Undef);
+ };
+
+ // Pad out any elements eliminated due to the dmask.
+ LLT ResTy = MRI->getType(ResultRegs[0]);
+ if (!ResTy.isVector()) {
+ padWithUndef(ResTy, NumElts - ResultRegs.size());
+ B.buildBuildVector(DstReg, ResultRegs);
+ return true;
+ }
+
+ assert(!ST.hasUnpackedD16VMem() && ResTy == V2S16);
+ const int RegsToCover = (Ty.getSizeInBits() + 31) / 32;
+
+ // Deal with the one annoying legal case.
+ const LLT V3S16 = LLT::vector(3, 16);
+ if (Ty == V3S16) {
+ padWithUndef(ResTy, RegsToCover - ResultRegs.size() + 1);
+ auto Concat = B.buildConcatVectors(LLT::vector(6, 16), ResultRegs);
+ B.buildUnmerge({DstReg, MRI->createGenericVirtualRegister(V3S16)}, Concat);
+ return true;
+ }
+
+ padWithUndef(ResTy, RegsToCover - ResultRegs.size());
+ B.buildConcatVectors(DstReg, ResultRegs);
+ return true;
}
-bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const {
+bool AMDGPULegalizerInfo::legalizeSBufferLoad(
+ MachineInstr &MI, MachineIRBuilder &B,
+ GISelChangeObserver &Observer) const {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = B.getMRI()->getType(Dst);
+ unsigned Size = Ty.getSizeInBits();
+ MachineFunction &MF = B.getMF();
+
+ Observer.changingInstr(MI);
+
+ // FIXME: We don't really need this intermediate instruction. The intrinsic
+ // should be fixed to have a memory operand. Since it's readnone, we're not
+ // allowed to add one.
+ MI.setDesc(B.getTII().get(AMDGPU::G_AMDGPU_S_BUFFER_LOAD));
+ MI.RemoveOperand(1); // Remove intrinsic ID
+
+ // FIXME: When intrinsic definition is fixed, this should have an MMO already.
+ // TODO: Should this use datalayout alignment?
+ const unsigned MemSize = (Size + 7) / 8;
+ const Align MemAlign(4);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ MemSize, MemAlign);
+ MI.addMemOperand(MF, MMO);
+
+ // There are no 96-bit result scalar loads, but widening to 128-bit should
+ // always be legal. We may need to restore this to a 96-bit result if it turns
+ // out this needs to be converted to a vector load during RegBankSelect.
+ if (!isPowerOf2_32(Size)) {
+ LegalizerHelper Helper(MF, *this, Observer, B);
+
+ if (Ty.isVector())
+ Helper.moreElementsVectorDst(MI, getPow2VectorType(Ty), 0);
+ else
+ Helper.widenScalarDst(MI, getPow2ScalarType(Ty), 0);
+ }
+
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ // Is non-HSA path or trap-handler disabled? then, insert s_endpgm instruction
+ if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
+ !ST.isTrapHandlerEnabled()) {
+ B.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
+ } else {
+ // Pass queue pointer to trap handler as input, and insert trap instruction
+ // Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
+ const ArgDescriptor *Arg =
+ getArgDescriptor(B, AMDGPUFunctionArgInfo::QUEUE_PTR);
+ if (!Arg)
+ return false;
+ MachineRegisterInfo &MRI = *B.getMRI();
+ Register SGPR01(AMDGPU::SGPR0_SGPR1);
+ Register LiveIn = getLiveInRegister(
+ B, MRI, SGPR01, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64),
+ /*InsertLiveInCopy=*/false);
+ if (!loadInputValue(LiveIn, B, Arg))
+ return false;
+ B.buildCopy(SGPR01, LiveIn);
+ B.buildInstr(AMDGPU::S_TRAP)
+ .addImm(GCNSubtarget::TrapIDLLVMTrap)
+ .addReg(SGPR01, RegState::Implicit);
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
+ // Is non-HSA path or trap-handler disabled? then, report a warning
+ // accordingly
+ if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
+ !ST.isTrapHandlerEnabled()) {
+ DiagnosticInfoUnsupported NoTrap(B.getMF().getFunction(),
+ "debugtrap handler not supported",
+ MI.getDebugLoc(), DS_Warning);
+ LLVMContext &Ctx = B.getMF().getFunction().getContext();
+ Ctx.diagnose(NoTrap);
+ } else {
+ // Insert debug-trap instruction
+ B.buildInstr(AMDGPU::S_TRAP).addImm(GCNSubtarget::TrapIDLLVMDebugTrap);
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *B.getMRI();
+
// Replace the use G_BRCOND with the exec manipulate and branch pseudos.
auto IntrID = MI.getIntrinsicID();
switch (IntrID) {
case Intrinsic::amdgcn_if:
case Intrinsic::amdgcn_else: {
MachineInstr *Br = nullptr;
- if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {
+ MachineBasicBlock *UncondBrTarget = nullptr;
+ if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
- B.setInstr(*BrCond);
Register Def = MI.getOperand(1).getReg();
Register Use = MI.getOperand(3).getReg();
- MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB();
- if (Br)
- BrTarget = Br->getOperand(0).getMBB();
-
+ MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
+ B.setInsertPt(B.getMBB(), BrCond->getIterator());
if (IntrID == Intrinsic::amdgcn_if) {
B.buildInstr(AMDGPU::SI_IF)
.addDef(Def)
.addUse(Use)
- .addMBB(BrTarget);
+ .addMBB(UncondBrTarget);
} else {
B.buildInstr(AMDGPU::SI_ELSE)
.addDef(Def)
.addUse(Use)
- .addMBB(BrTarget)
+ .addMBB(UncondBrTarget)
.addImm(0);
}
- if (Br)
- Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB());
+ if (Br) {
+ Br->getOperand(0).setMBB(CondBrTarget);
+ } else {
+ // The IRTranslator skips inserting the G_BR for fallthrough cases, but
+ // since we're swapping branch targets it needs to be reinserted.
+ // FIXME: IRTranslator should probably not do this
+ B.buildBr(*CondBrTarget);
+ }
MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
@@ -2393,17 +4200,24 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
}
case Intrinsic::amdgcn_loop: {
MachineInstr *Br = nullptr;
- if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {
+ MachineBasicBlock *UncondBrTarget = nullptr;
+ if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
- B.setInstr(*BrCond);
-
- // FIXME: Need to adjust branch targets based on unconditional branch.
+ MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
Register Reg = MI.getOperand(2).getReg();
+
+ B.setInsertPt(B.getMBB(), BrCond->getIterator());
B.buildInstr(AMDGPU::SI_LOOP)
.addUse(Reg)
- .addMBB(BrCond->getOperand(1).getMBB());
+ .addMBB(UncondBrTarget);
+
+ if (Br)
+ Br->getOperand(0).setMBB(CondBrTarget);
+ else
+ B.buildBr(*CondBrTarget);
+
MI.eraseFromParent();
BrCond->eraseFromParent();
MRI.setRegClass(Reg, TRI->getWaveMaskRegClass());
@@ -2413,6 +4227,13 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
return false;
}
case Intrinsic::amdgcn_kernarg_segment_ptr:
+ if (!AMDGPU::isKernel(B.getMF().getFunction().getCallingConv())) {
+ // This only makes sense to call in a kernel, so just lower to null.
+ B.buildConstant(MI.getOperand(0).getReg(), 0);
+ MI.eraseFromParent();
+ return true;
+ }
+
return legalizePreloadedArgIntrin(
MI, MRI, B, AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
case Intrinsic::amdgcn_implicitarg_ptr:
@@ -2454,18 +4275,72 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
case Intrinsic::amdgcn_is_private:
return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::PRIVATE_ADDRESS);
case Intrinsic::amdgcn_wavefrontsize: {
- B.setInstr(MI);
B.buildConstant(MI.getOperand(0), ST.getWavefrontSize());
MI.eraseFromParent();
return true;
}
+ case Intrinsic::amdgcn_s_buffer_load:
+ return legalizeSBufferLoad(MI, B, Helper.Observer);
case Intrinsic::amdgcn_raw_buffer_store:
- return legalizeRawBufferStore(MI, MRI, B, false);
+ case Intrinsic::amdgcn_struct_buffer_store:
+ return legalizeBufferStore(MI, MRI, B, false, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
- return legalizeRawBufferStore(MI, MRI, B, true);
- default:
+ case Intrinsic::amdgcn_struct_buffer_store_format:
+ return legalizeBufferStore(MI, MRI, B, false, true);
+ case Intrinsic::amdgcn_raw_tbuffer_store:
+ case Intrinsic::amdgcn_struct_tbuffer_store:
+ return legalizeBufferStore(MI, MRI, B, true, true);
+ case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_struct_buffer_load:
+ return legalizeBufferLoad(MI, MRI, B, false, false);
+ case Intrinsic::amdgcn_raw_buffer_load_format:
+ case Intrinsic::amdgcn_struct_buffer_load_format:
+ return legalizeBufferLoad(MI, MRI, B, true, false);
+ case Intrinsic::amdgcn_raw_tbuffer_load:
+ case Intrinsic::amdgcn_struct_tbuffer_load:
+ return legalizeBufferLoad(MI, MRI, B, true, true);
+ case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+ case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+ case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+ case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+ case Intrinsic::amdgcn_raw_buffer_atomic_inc:
+ case Intrinsic::amdgcn_struct_buffer_atomic_inc:
+ case Intrinsic::amdgcn_raw_buffer_atomic_dec:
+ case Intrinsic::amdgcn_struct_buffer_atomic_dec:
+ case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
+ case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
+ return legalizeBufferAtomic(MI, B, IntrID);
+ case Intrinsic::amdgcn_atomic_inc:
+ return legalizeAtomicIncDec(MI, B, true);
+ case Intrinsic::amdgcn_atomic_dec:
+ return legalizeAtomicIncDec(MI, B, false);
+ case Intrinsic::trap:
+ return legalizeTrapIntrinsic(MI, MRI, B);
+ case Intrinsic::debugtrap:
+ return legalizeDebugTrapIntrinsic(MI, MRI, B);
+ default: {
+ if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
+ AMDGPU::getImageDimIntrinsicInfo(IntrID))
+ return legalizeImageIntrinsic(MI, B, Helper.Observer, ImageDimIntr);
return true;
}
+ }
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 4b1405a92787..ce32bbf76b34 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -32,9 +32,7 @@ public:
AMDGPULegalizerInfo(const GCNSubtarget &ST,
const GCNTargetMachine &TM);
- bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
- GISelChangeObserver &Observer) const override;
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
Register getSegmentAperture(unsigned AddrSpace,
MachineRegisterInfo &MRI,
@@ -50,18 +48,22 @@ public:
MachineIRBuilder &B) const;
bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool Signed) const;
- bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const;
+ bool legalizeFPTOI(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, bool Signed) const;
+ bool legalizeMinNumMaxNum(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeShuffleVector(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
- bool buildPCRelGlobalAddress(
- Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,
- unsigned Offset, unsigned GAFlags = SIInstrInfo::MO_NONE) const;
+ bool buildPCRelGlobalAddress(Register DstReg, LLT PtrTy, MachineIRBuilder &B,
+ const GlobalValue *GV, int64_t Offset,
+ unsigned GAFlags = SIInstrInfo::MO_NONE) const;
bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
@@ -74,16 +76,50 @@ public:
bool legalizeAtomicCmpXChg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeFlog(MachineInstr &MI, MachineIRBuilder &B,
+ double Log2BaseInverted) const;
+ bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeFPow(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
- Register getLiveInRegister(MachineRegisterInfo &MRI,
- Register Reg, LLT Ty) const;
+ bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ Register getLiveInRegister(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ Register PhyReg, LLT Ty,
+ bool InsertLiveInCopy = true) const;
+ Register insertLiveInCopy(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ Register LiveIn, Register PhyReg) const;
+ const ArgDescriptor *
+ getArgDescriptor(MachineIRBuilder &B,
+ AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
bool loadInputValue(Register DstReg, MachineIRBuilder &B,
const ArgDescriptor *Arg) const;
bool legalizePreloadedArgIntrin(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
+ bool legalizeUDIV_UREM(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
+ void legalizeUDIV_UREM32Impl(MachineIRBuilder &B,
+ Register DstReg, Register Num, Register Den,
+ bool IsRem) const;
+ bool legalizeUDIV_UREM32(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeSDIV_SREM32(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
+ void legalizeUDIV_UREM64Impl(MachineIRBuilder &B,
+ Register DstReg, Register Numer, Register Denom,
+ bool IsDiv) const;
+
+ bool legalizeUDIV_UREM64(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeSDIV_SREM(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeFDIV16(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -102,13 +138,46 @@ public:
bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, unsigned AddrSpace) const;
+ std::tuple<Register, unsigned, unsigned>
+ splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const;
+
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg) const;
bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool IsFormat) const;
- bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const override;
+ bool legalizeRawBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, bool IsFormat) const;
+ Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
+ bool IsFormat) const;
+
+ bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, bool IsTyped,
+ bool IsFormat) const;
+ bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, bool IsTyped,
+ bool IsFormat) const;
+ bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
+ Intrinsic::ID IID) const;
+
+ bool legalizeImageIntrinsic(
+ MachineInstr &MI, MachineIRBuilder &B,
+ GISelChangeObserver &Observer,
+ const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const;
+
+ bool legalizeSBufferLoad(
+ MachineInstr &MI, MachineIRBuilder &B,
+ GISelChangeObserver &Observer) const;
+
+ bool legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B,
+ bool IsInc) const;
+
+ bool legalizeTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeDebugTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const override;
};
} // End llvm namespace.
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 0c56927dea02..4a14259f1bdb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -32,7 +32,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include <cmath>
#include <vector>
@@ -170,16 +169,13 @@ namespace {
class AMDGPUSimplifyLibCalls : public FunctionPass {
- const TargetOptions Options;
-
AMDGPULibCalls Simplifier;
public:
static char ID; // Pass identification
- AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions(),
- const TargetMachine *TM = nullptr)
- : FunctionPass(ID), Options(Opt), Simplifier(TM) {
+ AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), Simplifier(TM) {
initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
@@ -585,7 +581,7 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
auto *M = Callee->getParent();
auto &Ctx = M->getContext();
- std::string Name = Callee->getName();
+ std::string Name = std::string(Callee->getName());
auto NumArg = CI->getNumArgOperands();
if (NumArg != 4 && NumArg != 6)
return false;
@@ -594,15 +590,15 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
return false;
unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
- unsigned Align = cast<ConstantInt>(PacketAlign)->getZExtValue();
- if (Size != Align || !isPowerOf2_32(Size))
+ Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
+ if (Alignment != Size)
return false;
Type *PtrElemTy;
if (Size <= 8)
PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
else
- PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8);
+ PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
auto PtrArg = CI->getArgOperand(PtrArgLoc);
unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
@@ -1130,8 +1126,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
Type* rTy = opr0->getType();
Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
Type *nTy = nTyS;
- if (const VectorType *vTy = dyn_cast<VectorType>(rTy))
- nTy = VectorType::get(nTyS, vTy->getNumElements());
+ if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
+ nTy = FixedVectorType::get(nTyS, vTy);
unsigned size = nTy->getScalarSizeInBits();
opr_n = CI->getArgOperand(1);
if (opr_n->getType()->isIntegerTy())
@@ -1420,8 +1416,8 @@ AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
B.SetInsertPoint(&*ItNew);
AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
std::string(prefix) + UI->getName());
- Alloc->setAlignment(MaybeAlign(
- UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
+ Alloc->setAlignment(
+ Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
return Alloc;
}
@@ -1711,35 +1707,14 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
}
// Public interface to the Simplify LibCalls pass.
-FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt,
- const TargetMachine *TM) {
- return new AMDGPUSimplifyLibCalls(Opt, TM);
+FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) {
+ return new AMDGPUSimplifyLibCalls(TM);
}
FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
return new AMDGPUUseNativeCalls();
}
-static bool setFastFlags(Function &F, const TargetOptions &Options) {
- AttrBuilder B;
-
- if (Options.UnsafeFPMath || Options.NoInfsFPMath)
- B.addAttribute("no-infs-fp-math", "true");
- if (Options.UnsafeFPMath || Options.NoNaNsFPMath)
- B.addAttribute("no-nans-fp-math", "true");
- if (Options.UnsafeFPMath) {
- B.addAttribute("less-precise-fpmad", "true");
- B.addAttribute("unsafe-fp-math", "true");
- }
-
- if (!B.hasAttributes())
- return false;
-
- F.addAttributes(AttributeList::FunctionIndex, B);
-
- return true;
-}
-
bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -1750,15 +1725,14 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
LLVM_DEBUG(dbgs() << "AMDIC: process function ";
F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
- if (!EnablePreLink)
- Changed |= setFastFlags(F, Options);
-
for (auto &BB : F) {
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
// Ignore non-calls.
CallInst *CI = dyn_cast<CallInst>(I);
++I;
- if (!CI) continue;
+ // Ignore intrinsics that do not become real instructions.
+ if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
+ continue;
// Ignore indirect calls.
Function *Callee = CI->getCalledFunction();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index e1ae496d9cbc..2b5143ba7506 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -10,17 +10,18 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
-#include <llvm/ADT/SmallString.h>
-#include <llvm/ADT/SmallVector.h>
-#include <llvm/ADT/StringSwitch.h>
+#include "AMDGPU.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueSymbolTable.h"
-#include <llvm/Support/raw_ostream.h>
+#include "llvm/Support/raw_ostream.h"
#include <string>
using namespace llvm;
@@ -479,8 +480,6 @@ static bool eatTerm(StringRef& mangledName, const char (&str)[N]) {
return false;
}
-static inline bool isDigit(char c) { return c >= '0' && c <= '9'; }
-
static int eatNumber(StringRef& s) {
size_t const savedSize = s.size();
int n = 0;
@@ -605,7 +604,7 @@ bool ItaniumParamParser::parseItaniumParam(StringRef& param,
// parse type
char const TC = param.front();
- if (::isDigit(TC)) {
+ if (isDigit(TC)) {
res.ArgType = StringSwitch<AMDGPULibFunc::EType>
(eatLengthPrefixedName(param))
.Case("ocl_image1darray" , AMDGPULibFunc::IMG1DA)
@@ -863,7 +862,7 @@ std::string AMDGPUMangledLibFunc::mangleNameItanium() const {
Param P;
while ((P = I.getNextParam()).ArgType != 0)
Mangler(S, P);
- return S.str();
+ return std::string(S.str());
}
///////////////////////////////////////////////////////////////////////////////
@@ -903,7 +902,7 @@ static Type* getIntrinsicParamType(
return nullptr;
}
if (P.VectorSize > 1)
- T = VectorType::get(T, P.VectorSize);
+ T = FixedVectorType::get(T, P.VectorSize);
if (P.PtrKind != AMDGPULibFunc::BYVALUE)
T = useAddrSpace ? T->getPointerTo((P.PtrKind & AMDGPULibFunc::ADDR_SPACE)
- 1)
@@ -936,7 +935,7 @@ std::string AMDGPUMangledLibFunc::getName() const {
SmallString<128> Buf;
raw_svector_ostream OS(Buf);
writeName(OS);
- return OS.str();
+ return std::string(OS.str());
}
Function *AMDGPULibFunc::getFunction(Module *M, const AMDGPULibFunc &fInfo) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
index 2354ed7df205..c97223b047e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
@@ -13,6 +13,7 @@
namespace llvm {
+class FunctionCallee;
class FunctionType;
class Function;
class Module;
@@ -341,7 +342,7 @@ public:
/// and unmangled function name for unmangled library functions.
virtual std::string mangle() const = 0;
- void setName(StringRef N) { Name = N; }
+ void setName(StringRef N) { Name = std::string(N); }
void setPrefix(ENamePrefix pfx) { FKind = pfx; }
virtual FunctionType *getFunctionType(Module &M) const = 0;
@@ -438,7 +439,7 @@ class AMDGPUUnmangledLibFunc : public AMDGPULibFuncImpl {
public:
explicit AMDGPUUnmangledLibFunc();
explicit AMDGPUUnmangledLibFunc(StringRef FName, FunctionType *FT) {
- Name = FName;
+ Name = std::string(FName);
FuncTy = FT;
}
std::string getName() const override { return Name; }
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index 15032969890e..54c15e4e4d39 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -22,7 +22,15 @@ using namespace llvm;
namespace {
-const unsigned MaxStaticSize = 1024;
+static int MaxStaticSize;
+
+static cl::opt<int, true> MemIntrinsicExpandSizeThresholdOpt(
+ "amdgpu-mem-intrinsic-expand-size",
+ cl::desc("Set minimum mem intrinsic size to expand in IR"),
+ cl::location(MaxStaticSize),
+ cl::init(1024),
+ cl::Hidden);
+
class AMDGPULowerIntrinsics : public ModulePass {
private:
@@ -57,7 +65,7 @@ INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false,
// require splitting based on alignment)
static bool shouldExpandOperationWithSize(Value *Size) {
ConstantInt *CI = dyn_cast<ConstantInt>(Size);
- return !CI || (CI->getZExtValue() > MaxStaticSize);
+ return !CI || (CI->getSExtValue() > MaxStaticSize);
}
bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index e64542a395f0..62ab5bb55a16 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -58,6 +58,21 @@ public:
} // end anonymous namespace
+// skip allocas
+static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
+ BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
+ for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
+
+ // If this is a dynamic alloca, the value may depend on the loaded kernargs,
+ // so loads will need to be inserted before it.
+ if (!AI || !AI->isStaticAlloca())
+ break;
+ }
+
+ return InsPt;
+}
+
bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
CallingConv::ID CC = F.getCallingConv();
if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
@@ -70,7 +85,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
LLVMContext &Ctx = F.getParent()->getContext();
const DataLayout &DL = F.getParent()->getDataLayout();
BasicBlock &EntryBlock = *F.begin();
- IRBuilder<> Builder(&*EntryBlock.begin());
+ IRBuilder<> Builder(&*getInsertPt(EntryBlock));
const Align KernArgBaseAlign(16); // FIXME: Increase if necessary
const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F);
@@ -94,7 +109,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
for (Argument &Arg : F.args()) {
Type *ArgTy = Arg.getType();
- unsigned ABITypeAlign = DL.getABITypeAlignment(ArgTy);
+ Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
unsigned Size = DL.getTypeSizeInBits(ArgTy);
unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
@@ -120,7 +135,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
continue;
}
- VectorType *VT = dyn_cast<VectorType>(ArgTy);
+ auto *VT = dyn_cast<FixedVectorType>(ArgTy);
bool IsV3 = VT && VT->getNumElements() == 3;
bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType();
@@ -152,7 +167,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
}
if (IsV3 && Size >= 32) {
- V4Ty = VectorType::get(VT->getVectorElementType(), 4);
+ V4Ty = FixedVectorType::get(VT->getElementType(), 4);
// Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
AdjustedArgTy = V4Ty;
}
@@ -160,7 +175,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS),
ArgPtr->getName() + ".cast");
LoadInst *Load =
- Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign.value());
+ Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));
MDBuilder MDB(Ctx);
@@ -210,7 +225,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
Arg.replaceAllUsesWith(NewVal);
} else if (IsV3) {
Value *Shuf = Builder.CreateShuffleVector(Load, UndefValue::get(V4Ty),
- {0, 1, 2},
+ ArrayRef<int>{0, 1, 2},
Arg.getName() + ".load");
Arg.replaceAllUsesWith(Shuf);
} else {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index ce7286dabcc8..99d229c9b74e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -254,7 +254,7 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
return AsmPrinter::lowerConstant(CV);
}
-void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
@@ -272,7 +272,7 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineBasicBlock *MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
while (I != MBB->instr_end() && I->isInsideBundle()) {
- EmitInstruction(&*I);
+ emitInstruction(&*I);
++I;
}
} else {
@@ -381,7 +381,7 @@ void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
}
}
-void R600AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
R600MCInstLower MCInstLowering(OutContext, STI, *this);
@@ -396,7 +396,7 @@ void R600AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineBasicBlock *MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
while (I != MBB->instr_end() && I->isInsideBundle()) {
- EmitInstruction(&*I);
+ emitInstruction(&*I);
++I;
}
} else {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 940ddff85d73..64acd6efe028 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -15,14 +15,9 @@ using namespace llvm;
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
- LocalMemoryObjects(),
- ExplicitKernArgSize(0),
- LDSSize(0),
- Mode(MF.getFunction(), MF.getSubtarget<GCNSubtarget>()),
+ Mode(MF.getFunction()),
IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
- NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath),
- MemoryBound(false),
- WaveLimiter(false) {
+ NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
@@ -43,19 +38,18 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
}
unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
- const GlobalValue &GV) {
+ const GlobalVariable &GV) {
auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
if (!Entry.second)
return Entry.first->second;
- unsigned Align = GV.getAlignment();
- if (Align == 0)
- Align = DL.getABITypeAlignment(GV.getValueType());
+ Align Alignment =
+ DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
/// TODO: We should sort these to minimize wasted space due to alignment
/// padding. Currently the padding is decided by the first encountered use
/// during lowering.
- unsigned Offset = LDSSize = alignTo(LDSSize, Align);
+ unsigned Offset = LDSSize = alignTo(LDSSize, Alignment);
Entry.first->second = Offset;
LDSSize += DL.getTypeAllocSize(GV.getValueType());
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 1933e41c66f3..c504dd76bc65 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -23,26 +23,26 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
protected:
- uint64_t ExplicitKernArgSize; // Cache for this.
+ uint64_t ExplicitKernArgSize = 0; // Cache for this.
Align MaxKernArgAlign; // Cache for this.
/// Number of bytes in the LDS that are being used.
- unsigned LDSSize;
+ unsigned LDSSize = 0;
// State of MODE register, assumed FP mode.
AMDGPU::SIModeRegisterDefaults Mode;
// Kernels + shaders. i.e. functions called by the driver and not called
// by other functions.
- bool IsEntryFunction;
+ bool IsEntryFunction = false;
- bool NoSignedZerosFPMath;
+ bool NoSignedZerosFPMath = false;
// Function may be memory bound.
- bool MemoryBound;
+ bool MemoryBound = false;
// Kernel may need limited waves per EU for better performance.
- bool WaveLimiter;
+ bool WaveLimiter = false;
public:
AMDGPUMachineFunction(const MachineFunction &MF);
@@ -77,7 +77,7 @@ public:
return WaveLimiter;
}
- unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV);
+ unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV);
};
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
index 8c11230f411a..b05855d1afc6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -34,6 +34,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_,
switch (SecondMI.getOpcode()) {
case AMDGPU::V_ADDC_U32_e64:
case AMDGPU::V_SUBB_U32_e64:
+ case AMDGPU::V_SUBBREV_U32_e64:
case AMDGPU::V_CNDMASK_B32_e64: {
// Try to cluster defs of condition registers to their uses. This improves
// the chance VCC will be available which will allow shrinking to VOP2
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index f7231471c107..4f9ffa11bc73 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -33,6 +33,7 @@
#include "AMDGPU.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 9613d5a843b3..93079738ef99 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -220,9 +221,8 @@ AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
++FI.InstCount;
continue;
}
- CallSite CS(const_cast<Instruction *>(&I));
- if (CS) {
- Function *Callee = CS.getCalledFunction();
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ Function *Callee = CB->getCalledFunction();
if (!Callee || Callee->isDeclaration()) {
++FI.InstCount;
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
new file mode 100644
index 000000000000..098b0e993886
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -0,0 +1,359 @@
+//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// after the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPULegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+
+#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+struct FMinFMaxLegacyInfo {
+ Register LHS;
+ Register RHS;
+ Register True;
+ Register False;
+ CmpInst::Predicate Pred;
+};
+
+// TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
+static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineFunction &MF, FMinFMaxLegacyInfo &Info) {
+ // FIXME: Combines should have subtarget predicates, and we shouldn't need
+ // this here.
+ if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
+ return false;
+
+ // FIXME: Type predicate on pattern
+ if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
+ return false;
+
+ Register Cond = MI.getOperand(1).getReg();
+ if (!MRI.hasOneNonDBGUse(Cond) ||
+ !mi_match(Cond, MRI,
+ m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
+ return false;
+
+ Info.True = MI.getOperand(2).getReg();
+ Info.False = MI.getOperand(3).getReg();
+
+ if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
+ !(Info.LHS == Info.False && Info.RHS == Info.True))
+ return false;
+
+ switch (Info.Pred) {
+ case CmpInst::FCMP_FALSE:
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_ORD:
+ case CmpInst::FCMP_UNO:
+ case CmpInst::FCMP_UEQ:
+ case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_TRUE:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
+ const FMinFMaxLegacyInfo &Info) {
+
+ auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) {
+ MachineIRBuilder MIB(MI);
+ MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
+ };
+
+ switch (Info.Pred) {
+ case CmpInst::FCMP_ULT:
+ case CmpInst::FCMP_ULE:
+ if (Info.LHS == Info.True)
+ buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
+ else
+ buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
+ break;
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_OLT: {
+ // We need to permute the operands to get the correct NaN behavior. The
+ // selected operand is the second one based on the failing compare with NaN,
+ // so permute it based on the compare type the hardware uses.
+ if (Info.LHS == Info.True)
+ buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
+ else
+ buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
+ break;
+ }
+ case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_UGT: {
+ if (Info.LHS == Info.True)
+ buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
+ else
+ buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
+ break;
+ }
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_OGE: {
+ if (Info.LHS == Info.True)
+ buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
+ else
+ buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
+ break;
+ }
+ default:
+ llvm_unreachable("predicate should not have matched");
+ }
+
+ MI.eraseFromParent();
+}
+
+static bool matchUCharToFloat(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineFunction &MF, CombinerHelper &Helper) {
+ Register DstReg = MI.getOperand(0).getReg();
+
+ // TODO: We could try to match extracting the higher bytes, which would be
+ // easier if i8 vectors weren't promoted to i32 vectors, particularly after
+ // types are legalized. v4i8 -> v4f32 is probably the only case to worry
+ // about in practice.
+ LLT Ty = MRI.getType(DstReg);
+ if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
+ assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
+ const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
+ return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
+ }
+
+ return false;
+}
+
+static void applyUCharToFloat(MachineInstr &MI) {
+ MachineIRBuilder B(MI);
+
+ const LLT S32 = LLT::scalar(32);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = B.getMRI()->getType(DstReg);
+ LLT SrcTy = B.getMRI()->getType(SrcReg);
+ if (SrcTy != S32)
+ SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
+
+ if (Ty == S32) {
+ B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
+ {SrcReg}, MI.getFlags());
+ } else {
+ auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
+ {SrcReg}, MI.getFlags());
+ B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
+ }
+
+ MI.eraseFromParent();
+}
+
+// FIXME: Should be able to have 2 separate matchdatas rather than custom struct
+// boilerplate.
+struct CvtF32UByteMatchInfo {
+ Register CvtVal;
+ unsigned ShiftOffset;
+};
+
+static bool matchCvtF32UByteN(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineFunction &MF,
+ CvtF32UByteMatchInfo &MatchInfo) {
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ // Look through G_ZEXT.
+ mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
+
+ Register Src0;
+ int64_t ShiftAmt;
+ bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
+ if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
+ const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
+
+ unsigned ShiftOffset = 8 * Offset;
+ if (IsShr)
+ ShiftOffset += ShiftAmt;
+ else
+ ShiftOffset -= ShiftAmt;
+
+ MatchInfo.CvtVal = Src0;
+ MatchInfo.ShiftOffset = ShiftOffset;
+ return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
+ }
+
+ // TODO: Simplify demanded bits.
+ return false;
+}
+
+static void applyCvtF32UByteN(MachineInstr &MI,
+ const CvtF32UByteMatchInfo &MatchInfo) {
+ MachineIRBuilder B(MI);
+ unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
+
+ const LLT S32 = LLT::scalar(32);
+ Register CvtSrc = MatchInfo.CvtVal;
+ LLT SrcTy = B.getMRI()->getType(MatchInfo.CvtVal);
+ if (SrcTy != S32) {
+ assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
+ CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
+ }
+
+ assert(MI.getOpcode() != NewOpc);
+ B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
+ MI.eraseFromParent();
+}
+
+#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#include "AMDGPUGenPostLegalizeGICombiner.inc"
+#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+
+namespace {
+#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#include "AMDGPUGenPostLegalizeGICombiner.inc"
+#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+
+class AMDGPUPostLegalizerCombinerInfo : public CombinerInfo {
+ GISelKnownBits *KB;
+ MachineDominatorTree *MDT;
+
+public:
+ AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
+
+ AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
+ const AMDGPULegalizerInfo *LI,
+ GISelKnownBits *KB, MachineDominatorTree *MDT)
+ : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
+ /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
+ KB(KB), MDT(MDT) {
+ if (!GeneratedRuleCfg.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+ }
+
+ bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ CombinerHelper Helper(Observer, B, KB, MDT);
+ AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg);
+
+ if (Generated.tryCombineAll(Observer, MI, B, Helper))
+ return true;
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR:
+ // On some subtargets, 64-bit shift is a quarter rate instruction. In the
+ // common case, splitting this into a move and a 32-bit shift is faster and
+ // the same code size.
+ return Helper.tryCombineShiftToUnmerge(MI, 32);
+ }
+
+ return false;
+}
+
+#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+#include "AMDGPUGenPostLegalizeGICombiner.inc"
+#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+
+// Pass boilerplate
+// ================
+
+class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
+
+ StringRef getPassName() const override {
+ return "AMDGPUPostLegalizerCombiner";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+private:
+ bool IsOptNone;
+};
+} // end anonymous namespace
+
+void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ if (!IsOptNone) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
+ : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
+ initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ const Function &F = MF.getFunction();
+ bool EnableOpt =
+ MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const AMDGPULegalizerInfo *LI
+ = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
+
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MachineDominatorTree *MDT =
+ IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
+ AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
+ F.hasMinSize(), LI, KB, MDT);
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+}
+
+char AMDGPUPostLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
+ "Combine AMDGPU machine instrs after legalization",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
+ "Combine AMDGPU machine instrs after legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
+ return new AMDGPUPostLegalizerCombiner(IsOptNone);
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..800ad2039f0e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -0,0 +1,153 @@
+//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+
+#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#include "AMDGPUGenPreLegalizeGICombiner.inc"
+#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+
+namespace {
+#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#include "AMDGPUGenPreLegalizeGICombiner.inc"
+#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+
+class AMDGPUPreLegalizerCombinerInfo : public CombinerInfo {
+ GISelKnownBits *KB;
+ MachineDominatorTree *MDT;
+
+public:
+ AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
+
+ AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
+ GISelKnownBits *KB, MachineDominatorTree *MDT)
+ : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
+ KB(KB), MDT(MDT) {
+ if (!GeneratedRuleCfg.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+ }
+
+ virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ CombinerHelper Helper(Observer, B, KB, MDT);
+ AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg);
+
+ if (Generated.tryCombineAll(Observer, MI, B, Helper))
+ return true;
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONCAT_VECTORS:
+ return Helper.tryCombineConcatVectors(MI);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return Helper.tryCombineShuffleVector(MI);
+ }
+
+ return false;
+}
+
+#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+#include "AMDGPUGenPreLegalizeGICombiner.inc"
+#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+
+// Pass boilerplate
+// ================
+
+class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
+
+ StringRef getPassName() const override {
+ return "AMDGPUPreLegalizerCombiner";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+private:
+ bool IsOptNone;
+};
+} // end anonymous namespace
+
+void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ if (!IsOptNone) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
+ : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
+ initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ const Function &F = MF.getFunction();
+ bool EnableOpt =
+ MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MachineDominatorTree *MDT =
+ IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
+ AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
+ F.hasMinSize(), KB, MDT);
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+}
+
+char AMDGPUPreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine AMDGPU machine instrs before legalization",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
+ "Combine AMDGPU machine instrs before legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) {
+ return new AMDGPUPreLegalizerCombiner(IsOptNone);
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index 511de96b5f7c..524a34be876f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -218,10 +218,10 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
//
if (ArgSize % DWORD_ALIGN != 0) {
llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx);
- VectorType *LLVMVecType = llvm::dyn_cast<llvm::VectorType>(ArgType);
+ auto *LLVMVecType = llvm::dyn_cast<llvm::FixedVectorType>(ArgType);
int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1;
if (LLVMVecType && NumElem > 1)
- ResType = llvm::VectorType::get(ResType, NumElem);
+ ResType = llvm::FixedVectorType::get(ResType, NumElem);
Builder.SetInsertPoint(CI);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
if (OpConvSpecifiers[ArgCount - 1] == 'x' ||
@@ -387,9 +387,7 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
Value *id_gep_cast =
new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch);
- StoreInst *stbuff =
- new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast);
- stbuff->insertBefore(Brnch); // to Remove unused variable warning
+ new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast, Brnch);
SmallVector<Value *, 2> FourthIdxList;
ConstantInt *fourInt =
@@ -408,8 +406,7 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
Value *Arg = CI->getArgOperand(ArgCount);
Type *ArgType = Arg->getType();
SmallVector<Value *, 32> WhatToStore;
- if (ArgType->isFPOrFPVectorTy() &&
- (ArgType->getTypeID() != Type::VectorTyID)) {
+ if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(ArgType)) {
Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty;
if (OpConvSpecifiers[ArgCount - 1] == 'f') {
ConstantFP *fpCons = dyn_cast<ConstantFP>(Arg);
@@ -478,18 +475,14 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch);
WhatToStore.push_back(Arg);
}
- } else if (ArgType->getTypeID() == Type::VectorTyID) {
+ } else if (isa<FixedVectorType>(ArgType)) {
Type *IType = NULL;
- uint32_t EleCount = cast<VectorType>(ArgType)->getNumElements();
+ uint32_t EleCount = cast<FixedVectorType>(ArgType)->getNumElements();
uint32_t EleSize = ArgType->getScalarSizeInBits();
uint32_t TotalSize = EleCount * EleSize;
if (EleCount == 3) {
- IntegerType *Int32Ty = Type::getInt32Ty(ArgType->getContext());
- Constant *Indices[4] = {
- ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 1),
- ConstantInt::get(Int32Ty, 2), ConstantInt::get(Int32Ty, 2)};
- Constant *Mask = ConstantVector::get(Indices);
- ShuffleVectorInst *Shuffle = new ShuffleVectorInst(Arg, Arg, Mask);
+ ShuffleVectorInst *Shuffle =
+ new ShuffleVectorInst(Arg, Arg, ArrayRef<int>{0, 1, 2, 2});
Shuffle->insertBefore(Brnch);
Arg = Shuffle;
ArgType = Arg->getType();
@@ -523,7 +516,7 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu(
break;
}
if (EleCount > 1) {
- IType = dyn_cast<Type>(VectorType::get(IType, EleCount));
+ IType = FixedVectorType::get(IType, EleCount);
}
Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch);
WhatToStore.push_back(Arg);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 14958a180ce3..727f71b35049 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -76,6 +76,11 @@ static cl::opt<bool> DisablePromoteAllocaToLDS(
cl::desc("Disable promote alloca to LDS"),
cl::init(false));
+static cl::opt<unsigned> PromoteAllocaToVectorLimit(
+ "amdgpu-promote-alloca-to-vector-limit",
+ cl::desc("Maximum byte size to consider promote alloca to vector"),
+ cl::init(0));
+
// FIXME: This can create globals so should be a module pass.
class AMDGPUPromoteAlloca : public FunctionPass {
private:
@@ -86,6 +91,7 @@ private:
// FIXME: This should be per-kernel.
uint32_t LocalMemLimit = 0;
uint32_t CurrentLocalMemUsage = 0;
+ unsigned MaxVGPRs;
bool IsAMDGCN = false;
bool IsAMDHSA = false;
@@ -128,14 +134,42 @@ public:
}
};
+class AMDGPUPromoteAllocaToVector : public FunctionPass {
+private:
+ unsigned MaxVGPRs;
+
+public:
+ static char ID;
+
+ AMDGPUPromoteAllocaToVector() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override {
+ return "AMDGPU Promote Alloca to vector";
+ }
+
+ bool handleAlloca(AllocaInst &I);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+};
+
} // end anonymous namespace
char AMDGPUPromoteAlloca::ID = 0;
+char AMDGPUPromoteAllocaToVector::ID = 0;
INITIALIZE_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
"AMDGPU promote alloca to vector or LDS", false, false)
+INITIALIZE_PASS(AMDGPUPromoteAllocaToVector, DEBUG_TYPE "-to-vector",
+ "AMDGPU promote alloca to vector", false, false)
+
char &llvm::AMDGPUPromoteAllocaID = AMDGPUPromoteAlloca::ID;
+char &llvm::AMDGPUPromoteAllocaToVectorID = AMDGPUPromoteAllocaToVector::ID;
bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
Mod = &M;
@@ -161,6 +195,13 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
if (!ST.isPromoteAllocaEnabled())
return false;
+ if (IsAMDGCN) {
+ const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
+ MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+ } else {
+ MaxVGPRs = 128;
+ }
+
bool SufficientLDS = hasSufficientLocalMem(F);
bool Changed = false;
BasicBlock &EntryBB = *F.begin();
@@ -251,10 +292,10 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
// 32-bit and extract sequence is already present, and it is probably easier
// to CSE this. The loads should be mergable later anyway.
Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
- LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, 4);
+ LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, Align(4));
Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 2);
- LoadInst *LoadZU = Builder.CreateAlignedLoad(I32Ty, GEPZU, 4);
+ LoadInst *LoadZU = Builder.CreateAlignedLoad(I32Ty, GEPZU, Align(4));
MDNode *MD = MDNode::get(Mod->getContext(), None);
LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
@@ -297,15 +338,26 @@ Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
return CI;
}
-static VectorType *arrayTypeToVecType(ArrayType *ArrayTy) {
- return VectorType::get(ArrayTy->getElementType(),
- ArrayTy->getNumElements());
+static FixedVectorType *arrayTypeToVecType(ArrayType *ArrayTy) {
+ return FixedVectorType::get(ArrayTy->getElementType(),
+ ArrayTy->getNumElements());
+}
+
+static Value *stripBitcasts(Value *V) {
+ while (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (I->getOpcode() != Instruction::BitCast)
+ break;
+ V = I->getOperand(0);
+ }
+ return V;
}
static Value *
calculateVectorIndex(Value *Ptr,
const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(stripBitcasts(Ptr));
+ if (!GEP)
+ return nullptr;
auto I = GEPIdx.find(GEP);
return I == GEPIdx.end() ? nullptr : I->second;
@@ -327,7 +379,8 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
//
// TODO: Check isTriviallyVectorizable for calls and handle other
// instructions.
-static bool canVectorizeInst(Instruction *Inst, User *User) {
+static bool canVectorizeInst(Instruction *Inst, User *User,
+ const DataLayout &DL) {
switch (Inst->getOpcode()) {
case Instruction::Load: {
// Currently only handle the case where the Pointer Operand is a GEP.
@@ -337,7 +390,14 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
LI->getPointerOperandType() == User->getType() &&
isa<VectorType>(LI->getType()))
return true;
- return isa<GetElementPtrInst>(LI->getPointerOperand()) && LI->isSimple();
+
+ Instruction *PtrInst = dyn_cast<Instruction>(LI->getPointerOperand());
+ if (!PtrInst)
+ return false;
+
+ return (PtrInst->getOpcode() == Instruction::GetElementPtr ||
+ PtrInst->getOpcode() == Instruction::BitCast) &&
+ LI->isSimple();
}
case Instruction::BitCast:
return true;
@@ -350,22 +410,46 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
SI->getPointerOperandType() == User->getType() &&
isa<VectorType>(SI->getValueOperand()->getType()))
return true;
- return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && SI->isSimple();
+
+ Instruction *UserInst = dyn_cast<Instruction>(User);
+ if (!UserInst)
+ return false;
+
+ return (SI->getPointerOperand() == User) &&
+ (UserInst->getOpcode() == Instruction::GetElementPtr ||
+ UserInst->getOpcode() == Instruction::BitCast) &&
+ SI->isSimple();
}
default:
return false;
}
}
-static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
+ unsigned MaxVGPRs) {
if (DisablePromoteAllocaToVector) {
LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
return false;
}
- Type *AT = Alloca->getAllocatedType();
- SequentialType *AllocaTy = dyn_cast<SequentialType>(AT);
+ Type *AllocaTy = Alloca->getAllocatedType();
+ auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
+ if (auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
+ if (VectorType::isValidElementType(ArrayTy->getElementType()) &&
+ ArrayTy->getNumElements() > 0)
+ VectorTy = arrayTypeToVecType(ArrayTy);
+ }
+
+ // Use up to 1/4 of available register budget for vectorization.
+ unsigned Limit = PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
+ : (MaxVGPRs * 32);
+
+ if (DL.getTypeSizeInBits(AllocaTy) * 4 > Limit) {
+ LLVM_DEBUG(dbgs() << " Alloca too big for vectorization with "
+ << MaxVGPRs << " registers available\n");
+ return false;
+ }
LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n");
@@ -373,22 +457,44 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
// are just being conservative for now.
// FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
// could also be promoted but we don't currently handle this case
- if (!AllocaTy ||
- AllocaTy->getNumElements() > 16 ||
- AllocaTy->getNumElements() < 2 ||
- !VectorType::isValidElementType(AllocaTy->getElementType())) {
+ if (!VectorTy || VectorTy->getNumElements() > 16 ||
+ VectorTy->getNumElements() < 2) {
LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
return false;
}
std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
- std::vector<Value*> WorkList;
- for (User *AllocaUser : Alloca->users()) {
+ std::vector<Value *> WorkList;
+ SmallVector<User *, 8> Users(Alloca->users());
+ SmallVector<User *, 8> UseUsers(Users.size(), Alloca);
+ Type *VecEltTy = VectorTy->getElementType();
+ while (!Users.empty()) {
+ User *AllocaUser = Users.pop_back_val();
+ User *UseUser = UseUsers.pop_back_val();
+ Instruction *Inst = dyn_cast<Instruction>(AllocaUser);
+
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
if (!GEP) {
- if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
+ if (!canVectorizeInst(Inst, UseUser, DL))
return false;
+ if (Inst->getOpcode() == Instruction::BitCast) {
+ Type *FromTy = Inst->getOperand(0)->getType()->getPointerElementType();
+ Type *ToTy = Inst->getType()->getPointerElementType();
+ if (FromTy->isAggregateType() || ToTy->isAggregateType() ||
+ DL.getTypeSizeInBits(FromTy) != DL.getTypeSizeInBits(ToTy))
+ continue;
+
+ for (User *CastUser : Inst->users()) {
+ if (isAssumeLikeIntrinsic(cast<Instruction>(CastUser)))
+ continue;
+ Users.push_back(CastUser);
+ UseUsers.push_back(Inst);
+ }
+
+ continue;
+ }
+
WorkList.push_back(AllocaUser);
continue;
}
@@ -404,18 +510,10 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
}
GEPVectorIdx[GEP] = Index;
- for (User *GEPUser : AllocaUser->users()) {
- if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
- return false;
-
- WorkList.push_back(GEPUser);
- }
+ Users.append(GEP->user_begin(), GEP->user_end());
+ UseUsers.append(GEP->getNumUses(), GEP);
}
- VectorType *VectorTy = dyn_cast<VectorType>(AllocaTy);
- if (!VectorTy)
- VectorTy = arrayTypeToVecType(cast<ArrayType>(AllocaTy));
-
LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
<< *VectorTy << '\n');
@@ -424,40 +522,46 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
IRBuilder<> Builder(Inst);
switch (Inst->getOpcode()) {
case Instruction::Load: {
- if (Inst->getType() == AT)
+ if (Inst->getType() == AllocaTy || Inst->getType()->isVectorTy())
break;
- Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+ if (!Index)
+ break;
+ Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
+ if (Inst->getType() != VecEltTy)
+ ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, Inst->getType());
Inst->replaceAllUsesWith(ExtractElement);
Inst->eraseFromParent();
break;
}
case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(Inst);
- if (SI->getValueOperand()->getType() == AT)
+ if (SI->getValueOperand()->getType() == AllocaTy ||
+ SI->getValueOperand()->getType()->isVectorTy())
break;
- Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = SI->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+ if (!Index)
+ break;
+
+ Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
- Value *NewVecValue = Builder.CreateInsertElement(VecValue,
- SI->getValueOperand(),
- Index);
+ Value *Elt = SI->getValueOperand();
+ if (Elt->getType() != VecEltTy)
+ Elt = Builder.CreateBitOrPointerCast(Elt, VecEltTy);
+ Value *NewVecValue = Builder.CreateInsertElement(VecValue, Elt, Index);
Builder.CreateStore(NewVecValue, BitCast);
Inst->eraseFromParent();
break;
}
- case Instruction::BitCast:
- case Instruction::AddrSpaceCast:
- break;
default:
llvm_unreachable("Inconsistency in instructions promotable to vector");
@@ -659,16 +763,15 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
continue;
if (Use->getParent()->getParent() == &F) {
- unsigned Align = GV.getAlignment();
- if (Align == 0)
- Align = DL.getABITypeAlignment(GV.getValueType());
+ Align Alignment =
+ DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
// FIXME: Try to account for padding here. The padding is currently
// determined from the inverse order of uses in the function. I'm not
// sure if the use list order is in any way connected to this, so the
// total reported size is likely incorrect.
uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
- CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
+ CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Alignment);
CurrentLocalMemUsage += AllocSize;
break;
}
@@ -722,6 +825,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
if (!I.isStaticAlloca() || I.isArrayAllocation())
return false;
+ const DataLayout &DL = Mod->getDataLayout();
IRBuilder<> Builder(&I);
// First try to replace the alloca with a vector
@@ -729,7 +833,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
- if (tryPromoteAllocaToVector(&I))
+ if (tryPromoteAllocaToVector(&I, DL, MaxVGPRs))
return true; // Promoted to vector.
if (DisablePromoteAllocaToLDS)
@@ -759,11 +863,8 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, ContainingFunction);
unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
- const DataLayout &DL = Mod->getDataLayout();
-
- unsigned Align = I.getAlignment();
- if (Align == 0)
- Align = DL.getABITypeAlignment(I.getAllocatedType());
+ Align Alignment =
+ DL.getValueOrABITypeAlignment(I.getAlign(), I.getAllocatedType());
// FIXME: This computed padding is likely wrong since it depends on inverse
// usage order.
@@ -771,7 +872,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
// FIXME: It is also possible that if we're allowed to use all of the memory
// could could end up using more than the maximum due to alignment padding.
- uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
+ uint32_t NewSize = alignTo(CurrentLocalMemUsage, Alignment);
uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
NewSize += AllocSize;
@@ -938,6 +1039,60 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
return true;
}
+bool AMDGPUPromoteAllocaToVector::runOnFunction(Function &F) {
+ if (skipFunction(F) || DisablePromoteAllocaToVector)
+ return false;
+
+ const TargetMachine *TM;
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
+ TM = &TPC->getTM<TargetMachine>();
+ else
+ return false;
+
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F);
+ if (!ST.isPromoteAllocaEnabled())
+ return false;
+
+ if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
+ const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
+ MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+ } else {
+ MaxVGPRs = 128;
+ }
+
+ bool Changed = false;
+ BasicBlock &EntryBB = *F.begin();
+
+ SmallVector<AllocaInst *, 16> Allocas;
+ for (Instruction &I : EntryBB) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
+ Allocas.push_back(AI);
+ }
+
+ for (AllocaInst *AI : Allocas) {
+ if (handleAlloca(*AI))
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool AMDGPUPromoteAllocaToVector::handleAlloca(AllocaInst &I) {
+ // Array allocations are probably not worth handling, since an allocation of
+ // the array type is the canonical form.
+ if (!I.isStaticAlloca() || I.isArrayAllocation())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
+
+ Module *Mod = I.getParent()->getParent()->getParent();
+ return tryPromoteAllocaToVector(&I, Mod->getDataLayout(), MaxVGPRs);
+}
+
FunctionPass *llvm::createAMDGPUPromoteAlloca() {
return new AMDGPUPromoteAlloca();
}
+
+FunctionPass *llvm::createAMDGPUPromoteAllocaToVector() {
+ return new AMDGPUPromoteAllocaToVector();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
index 7a7addd0f5cf..982aae374884 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -48,19 +48,62 @@ extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
namespace {
+// Target features to propagate.
+static constexpr const FeatureBitset TargetFeatures = {
+ AMDGPU::FeatureWavefrontSize16,
+ AMDGPU::FeatureWavefrontSize32,
+ AMDGPU::FeatureWavefrontSize64
+};
+
+// Attributes to propagate.
+static constexpr const char* AttributeNames[] = {
+ "amdgpu-waves-per-eu"
+};
+
+static constexpr unsigned NumAttr =
+ sizeof(AttributeNames) / sizeof(AttributeNames[0]);
+
class AMDGPUPropagateAttributes {
- const FeatureBitset TargetFeatures = {
- AMDGPU::FeatureWavefrontSize16,
- AMDGPU::FeatureWavefrontSize32,
- AMDGPU::FeatureWavefrontSize64
+
+ class FnProperties {
+ private:
+ explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
+
+ public:
+ explicit FnProperties(const TargetMachine &TM, const Function &F) {
+ Features = TM.getSubtargetImpl(F)->getFeatureBits();
+
+ for (unsigned I = 0; I < NumAttr; ++I)
+ if (F.hasFnAttribute(AttributeNames[I]))
+ Attributes[I] = F.getFnAttribute(AttributeNames[I]);
+ }
+
+ bool operator == (const FnProperties &Other) const {
+ if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
+ return false;
+ for (unsigned I = 0; I < NumAttr; ++I)
+ if (Attributes[I] != Other.Attributes[I])
+ return false;
+ return true;
+ }
+
+ FnProperties adjustToCaller(const FnProperties &CallerProps) const {
+ FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
+ for (unsigned I = 0; I < NumAttr; ++I)
+ New.Attributes[I] = CallerProps.Attributes[I];
+ return New;
+ }
+
+ FeatureBitset Features;
+ Optional<Attribute> Attributes[NumAttr];
};
- class Clone{
+ class Clone {
public:
- Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
- FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
+ Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
+ Properties(Props), OrigF(OrigF), NewF(NewF) {}
- FeatureBitset FeatureMask;
+ FnProperties Properties;
Function *OrigF;
Function *NewF;
};
@@ -77,17 +120,19 @@ class AMDGPUPropagateAttributes {
SmallVector<Clone, 32> Clones;
// Find a clone with required features.
- Function *findFunction(const FeatureBitset &FeaturesNeeded,
+ Function *findFunction(const FnProperties &PropsNeeded,
Function *OrigF);
- // Clone function F and set NewFeatures on the clone.
+ // Clone function \p F and set \p NewProps on the clone.
// Cole takes the name of original function.
- Function *cloneWithFeatures(Function &F,
- const FeatureBitset &NewFeatures);
+ Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
// Set new function's features in place.
void setFeatures(Function &F, const FeatureBitset &NewFeatures);
+ // Set new function's attributes in place.
+ void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
+
std::string getFeatureString(const FeatureBitset &Features) const;
// Propagate attributes from Roots.
@@ -155,11 +200,11 @@ INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
false, false)
Function *
-AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
+AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
Function *OrigF) {
// TODO: search for clone's clones.
for (Clone &C : Clones)
- if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
+ if (C.OrigF == OrigF && PropsNeeded == C.Properties)
return C.NewF;
return nullptr;
@@ -192,12 +237,12 @@ bool AMDGPUPropagateAttributes::process() {
NewRoots.clear();
for (auto &F : M.functions()) {
- if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
+ if (F.isDeclaration())
continue;
- const FeatureBitset &CalleeBits =
- TM->getSubtargetImpl(F)->getFeatureBits();
+ const FnProperties CalleeProps(*TM, F);
SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
+ SmallSet<CallBase *, 32> Visited;
for (User *U : F.users()) {
Instruction *I = dyn_cast<Instruction>(U);
@@ -207,36 +252,36 @@ bool AMDGPUPropagateAttributes::process() {
if (!CI)
continue;
Function *Caller = CI->getCaller();
- if (!Caller)
+ if (!Caller || !Visited.insert(CI).second)
continue;
- if (!Roots.count(Caller))
+ if (!Roots.count(Caller) && !NewRoots.count(Caller))
continue;
- const FeatureBitset &CallerBits =
- TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
+ const FnProperties CallerProps(*TM, *Caller);
- if (CallerBits == (CalleeBits & TargetFeatures)) {
- NewRoots.insert(&F);
+ if (CalleeProps == CallerProps) {
+ if (!Roots.count(&F))
+ NewRoots.insert(&F);
continue;
}
- Function *NewF = findFunction(CallerBits, &F);
+ Function *NewF = findFunction(CallerProps, &F);
if (!NewF) {
- FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
- CallerBits);
+ const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
if (!AllowClone) {
// This may set different features on different iteartions if
// there is a contradiction in callers' attributes. In this case
// we rely on a second pass running on Module, which is allowed
// to clone.
- setFeatures(F, NewFeatures);
+ setFeatures(F, NewProps.Features);
+ setAttributes(F, NewProps.Attributes);
NewRoots.insert(&F);
Changed = true;
break;
}
- NewF = cloneWithFeatures(F, NewFeatures);
- Clones.push_back(Clone(CallerBits, &F, NewF));
+ NewF = cloneWithProperties(F, NewProps);
+ Clones.push_back(Clone(CallerProps, &F, NewF));
NewRoots.insert(NewF);
}
@@ -258,28 +303,30 @@ bool AMDGPUPropagateAttributes::process() {
F->eraseFromParent();
}
+ Roots.clear();
+ Clones.clear();
+
return Changed;
}
Function *
-AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
- const FeatureBitset &NewFeatures) {
+AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
+ const FnProperties &NewProps) {
LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
ValueToValueMapTy dummy;
Function *NewF = CloneFunction(&F, dummy);
- setFeatures(*NewF, NewFeatures);
+ setFeatures(*NewF, NewProps.Features);
+ setAttributes(*NewF, NewProps.Attributes);
+ NewF->setVisibility(GlobalValue::DefaultVisibility);
+ NewF->setLinkage(GlobalValue::InternalLinkage);
// Swap names. If that is the only clone it will retain the name of now
- // dead value.
- if (F.hasName()) {
- std::string NewName = NewF->getName();
+ // dead value. Preserve original name for externally visible functions.
+ if (F.hasName() && F.hasLocalLinkage()) {
+ std::string NewName = std::string(NewF->getName());
NewF->takeName(&F);
F.setName(NewName);
-
- // Name has changed, it does not need an external symbol.
- F.setVisibility(GlobalValue::DefaultVisibility);
- F.setLinkage(GlobalValue::InternalLinkage);
}
return NewF;
@@ -297,6 +344,18 @@ void AMDGPUPropagateAttributes::setFeatures(Function &F,
F.addFnAttr("target-features", NewFeatureStr);
}
+void AMDGPUPropagateAttributes::setAttributes(Function &F,
+ const ArrayRef<Optional<Attribute>> NewAttrs) {
+ LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
+ for (unsigned I = 0; I < NumAttr; ++I) {
+ F.removeFnAttr(AttributeNames[I]);
+ if (NewAttrs[I]) {
+ LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
+ F.addFnAttr(*NewAttrs[I]);
+ }
+ }
+}
+
std::string
AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
{
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
new file mode 100644
index 000000000000..71d82679b3ff
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -0,0 +1,154 @@
+//=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// after register banks are known.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPULegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+
+#define DEBUG_TYPE "amdgpu-regbank-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+
+#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#include "AMDGPUGenRegBankGICombiner.inc"
+#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+
+namespace {
+#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
+#include "AMDGPUGenRegBankGICombiner.inc"
+#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
+
+class AMDGPURegBankCombinerInfo : public CombinerInfo {
+ GISelKnownBits *KB;
+ MachineDominatorTree *MDT;
+
+public:
+ AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
+
+ AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
+ const AMDGPULegalizerInfo *LI,
+ GISelKnownBits *KB, MachineDominatorTree *MDT)
+ : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
+ /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
+ KB(KB), MDT(MDT) {
+ if (!GeneratedRuleCfg.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+ }
+
+ bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ CombinerHelper Helper(Observer, B, KB, MDT);
+ AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg);
+
+ if (Generated.tryCombineAll(Observer, MI, B, Helper))
+ return true;
+
+ return false;
+}
+
+#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
+#include "AMDGPUGenRegBankGICombiner.inc"
+#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
+
+// Pass boilerplate
+// ================
+
+class AMDGPURegBankCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AMDGPURegBankCombiner(bool IsOptNone = false);
+
+ StringRef getPassName() const override {
+ return "AMDGPURegBankCombiner";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+private:
+ bool IsOptNone;
+};
+} // end anonymous namespace
+
+void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ if (!IsOptNone) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
+ : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
+ initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ const Function &F = MF.getFunction();
+ bool EnableOpt =
+ MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const AMDGPULegalizerInfo *LI
+ = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
+
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MachineDominatorTree *MDT =
+ IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
+ AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
+ F.hasMinSize(), LI, KB, MDT);
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+}
+
+char AMDGPURegBankCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
+ "Combine AMDGPU machine instrs after regbankselect",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
+ "Combine AMDGPU machine instrs after regbankselect", false,
+ false)
+
+namespace llvm {
+FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) {
+ return new AMDGPURegBankCombiner(IsOptNone);
+}
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 1bb01dc8fa11..dfaf97bfb08e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -8,10 +8,69 @@
/// \file
/// This file implements the targeting of the RegisterBankInfo class for
/// AMDGPU.
-/// \todo This should be generated by TableGen.
+///
+/// \par
+///
+/// AMDGPU has unique register bank constraints that require special high level
+/// strategies to deal with. There are two main true physical register banks
+/// VGPR (vector), and SGPR (scalar). Additionally the VCC register bank is a
+/// sort of pseudo-register bank needed to represent SGPRs used in a vector
+/// boolean context. There is also the AGPR bank, which is a special purpose
+/// physical register bank present on some subtargets.
+///
+/// Copying from VGPR to SGPR is generally illegal, unless the value is known to
+/// be uniform. It is generally not valid to legalize operands by inserting
+/// copies as on other targets. Operations which require uniform, SGPR operands
+/// generally require scalarization by repeatedly executing the instruction,
+/// activating each set of lanes using a unique set of input values. This is
+/// referred to as a waterfall loop.
+///
+/// \par Booleans
+///
+/// Booleans (s1 values) requires special consideration. A vector compare result
+/// is naturally a bitmask with one bit per lane, in a 32 or 64-bit
+/// register. These are represented with the VCC bank. During selection, we need
+/// to be able to unambiguously go back from a register class to a register
+/// bank. To distinguish whether an SGPR should use the SGPR or VCC register
+/// bank, we need to know the use context type. An SGPR s1 value always means a
+/// VCC bank value, otherwise it will be the SGPR bank. A scalar compare sets
+/// SCC, which is a 1-bit unaddressable register. This will need to be copied to
+/// a 32-bit virtual register. Taken together, this means we need to adjust the
+/// type of boolean operations to be regbank legal. All SALU booleans need to be
+/// widened to 32-bits, and all VALU booleans need to be s1 values.
+///
+/// A noteworthy exception to the s1-means-vcc rule is for legalization artifact
+/// casts. G_TRUNC s1 results, and G_SEXT/G_ZEXT/G_ANYEXT sources are never vcc
+/// bank. A non-boolean source (such as a truncate from a 1-bit load from
+/// memory) will require a copy to the VCC bank which will require clearing the
+/// high bits and inserting a compare.
+///
+/// \par Constant bus restriction
+///
+/// VALU instructions have a limitation known as the constant bus
+/// restriction. Most VALU instructions can use SGPR operands, but may read at
+/// most 1 SGPR or constant literal value (this to 2 in gfx10 for most
+/// instructions). This is one unique SGPR, so the same SGPR may be used for
+/// multiple operands. From a register bank perspective, any combination of
+/// operands should be legal as an SGPR, but this is contextually dependent on
+/// the SGPR operands all being the same register. There is therefore optimal to
+/// choose the SGPR with the most uses to minimize the number of copies.
+///
+/// We avoid trying to solve this problem in RegBankSelect. Any VALU G_*
+/// operation should have its source operands all mapped to VGPRs (except for
+/// VCC), inserting copies from any SGPR operands. This the most trival legal
+/// mapping. Anything beyond the simplest 1:1 instruction selection would be too
+/// complicated to solve here. Every optimization pattern or instruction
+/// selected to multiple outputs would have to enforce this rule, and there
+/// would be additional complexity in tracking this rule for every G_*
+/// operation. By forcing all inputs to VGPRs, it also simplifies the task of
+/// picking the optimal operand combination from a post-isel optimization pass.
+///
//===----------------------------------------------------------------------===//
#include "AMDGPURegisterBankInfo.h"
+
+#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -19,8 +78,8 @@
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -101,8 +160,9 @@ public:
if (!Op.isReg())
continue;
+ // We may see physical registers if building a real MI
Register Reg = Op.getReg();
- if (MRI.getRegClassOrRegBank(Reg))
+ if (Reg.isPhysical() || MRI.getRegClassOrRegBank(Reg))
continue;
const RegisterBank *RB = NewBank;
@@ -138,15 +198,16 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST)
TII(Subtarget.getInstrInfo()) {
// HACK: Until this is fully tablegen'd.
- static bool AlreadyInit = false;
- if (AlreadyInit)
- return;
+ static llvm::once_flag InitializeRegisterBankFlag;
- AlreadyInit = true;
+ static auto InitializeRegisterBankOnce = [this]() {
+ assert(&getRegBank(AMDGPU::SGPRRegBankID) == &AMDGPU::SGPRRegBank &&
+ &getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
+ &getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
+ (void)this;
+ };
- assert(&getRegBank(AMDGPU::SGPRRegBankID) == &AMDGPU::SGPRRegBank &&
- &getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
- &getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
+ llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
}
static bool isVectorRegisterBank(const RegisterBank &Bank) {
@@ -159,7 +220,7 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
unsigned Size) const {
// TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
if (Dst.getID() == AMDGPU::SGPRRegBankID &&
- isVectorRegisterBank(Src)) {
+ (isVectorRegisterBank(Src) || Src.getID() == AMDGPU::VCCRegBankID)) {
return std::numeric_limits<unsigned>::max();
}
@@ -177,9 +238,6 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
Src.getID() == AMDGPU::VCCRegBankID))
return std::numeric_limits<unsigned>::max();
- if (Src.getID() == AMDGPU::VCCRegBankID)
- return std::numeric_limits<unsigned>::max();
-
// There is no direct copy between AGPRs.
if (Dst.getID() == AMDGPU::AGPRRegBankID &&
Src.getID() == AMDGPU::AGPRRegBankID)
@@ -317,22 +375,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
switch (MI.getIntrinsicID()) {
- case Intrinsic::amdgcn_buffer_load: {
- static const OpRegBankEntry<3> Table[4] = {
- // Perfectly legal.
- { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
- { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
-
- // Waterfall loop needed for rsrc. In the worst case this will execute
- // approximately an extra 10 * wavesize + 2 instructions.
- { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
- { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1000 }
- };
-
- // rsrc, voffset, offset
- const std::array<unsigned, 3> RegSrcOpIdx = { { 2, 3, 4 } };
- return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
- }
case Intrinsic::amdgcn_s_buffer_load: {
static const OpRegBankEntry<2> Table[4] = {
// Perfectly legal.
@@ -402,15 +444,15 @@ static bool isScalarLoadLegal(const MachineInstr &MI) {
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
// There are no extending SMRD/SMEM loads, and they require 4-byte alignment.
- return MMO->getSize() >= 4 && MMO->getAlignment() >= 4 &&
- // Can't do a scalar atomic load.
- !MMO->isAtomic() &&
- // Don't use scalar loads for volatile accesses to non-constant address
- // spaces.
- (IsConst || !MMO->isVolatile()) &&
- // Memory must be known constant, or not written before this load.
- (IsConst || MMO->isInvariant() || memOpHasNoClobbered(MMO)) &&
- AMDGPUInstrInfo::isUniformMMO(MMO);
+ return MMO->getSize() >= 4 && MMO->getAlign() >= Align(4) &&
+ // Can't do a scalar atomic load.
+ !MMO->isAtomic() &&
+ // Don't use scalar loads for volatile accesses to non-constant address
+ // spaces.
+ (IsConst || !MMO->isVolatile()) &&
+ // Memory must be known constant, or not written before this load.
+ (IsConst || MMO->isInvariant() || memOpHasNoClobbered(MMO)) &&
+ AMDGPUInstrInfo::isUniformMMO(MMO);
}
RegisterBankInfo::InstructionMappings
@@ -490,24 +532,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
3); // Num Operands
AltMappings.push_back(&VVMapping);
-
- const InstructionMapping &SVMapping = getInstructionMapping(
- 3, 3, getOperandsMapping(
- {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
- AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
- 3); // Num Operands
- AltMappings.push_back(&SVMapping);
-
- // SGPR in LHS is slightly preferrable, so make it VS more expensive than
- // SV.
- const InstructionMapping &VSMapping = getInstructionMapping(
- 3, 4, getOperandsMapping(
- {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
- AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
- AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
- 3); // Num Operands
- AltMappings.push_back(&VSMapping);
break;
}
case TargetOpcode::G_LOAD:
@@ -517,7 +541,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
unsigned PtrSize = PtrTy.getSizeInBits();
unsigned AS = PtrTy.getAddressSpace();
- LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
AS != AMDGPUAS::PRIVATE_ADDRESS) &&
@@ -531,9 +554,10 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
}
const InstructionMapping &VVMapping = getInstructionMapping(
- 2, 1, getOperandsMapping(
- {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
- AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
+ 2, 1,
+ getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
2); // Num Operands
AltMappings.push_back(&VVMapping);
@@ -546,43 +570,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
return AltMappings;
}
- case TargetOpcode::G_ICMP: {
- // TODO: Should report 32-bit for scalar output type.
- unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
- const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
- nullptr, // Predicate operand.
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
- 4); // Num Operands
- AltMappings.push_back(&SSMapping);
-
- const InstructionMapping &SVMapping = getInstructionMapping(2, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
- nullptr, // Predicate operand.
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
- 4); // Num Operands
- AltMappings.push_back(&SVMapping);
-
- const InstructionMapping &VSMapping = getInstructionMapping(3, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
- nullptr, // Predicate operand.
- AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
- 4); // Num Operands
- AltMappings.push_back(&VSMapping);
-
- const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
- nullptr, // Predicate operand.
- AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
- 4); // Num Operands
- AltMappings.push_back(&VVMapping);
-
- return AltMappings;
- }
case TargetOpcode::G_SELECT: {
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
@@ -607,10 +594,8 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
case TargetOpcode::G_SMAX:
case TargetOpcode::G_UMIN:
case TargetOpcode::G_UMAX: {
- static const OpRegBankEntry<3> Table[4] = {
+ static const OpRegBankEntry<3> Table[2] = {
{ { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
- { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
- { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
// Scalar requires cmp+select, and extends if 16-bit.
// FIXME: Should there be separate costs for 32 and 16-bit
@@ -740,6 +725,10 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
SmallVector<Register, 4> InitResultRegs;
SmallVector<Register, 4> PhiRegs;
+ // Track use registers which have already been expanded with a readfirstlane
+ // sequence. This may have multiple uses if moving a sequence.
+ DenseMap<Register, Register> WaterfalledRegMap;
+
MachineBasicBlock &MBB = B.getMBB();
MachineFunction *MF = &B.getMF();
@@ -755,6 +744,10 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
const unsigned ExecReg = Subtarget.isWave32() ?
AMDGPU::EXEC_LO : AMDGPU::EXEC;
+#ifndef NDEBUG
+ const int OrigRangeSize = std::distance(Range.begin(), Range.end());
+#endif
+
for (MachineInstr &MI : Range) {
for (MachineOperand &Def : MI.defs()) {
LLT ResTy = MRI.getType(Def.getReg());
@@ -820,13 +813,14 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
const DebugLoc &DL = B.getDL();
- // Figure out the iterator range after splicing the instructions.
- auto NewBegin = std::prev(LoopBB->end());
+ MachineInstr &FirstInst = *Range.begin();
// Move the instruction into the loop. Note we moved everything after
// Range.end() already into a new block, so Range.end() is no longer valid.
LoopBB->splice(LoopBB->end(), &MBB, Range.begin(), MBB.end());
+ // Figure out the iterator range after splicing the instructions.
+ MachineBasicBlock::iterator NewBegin = FirstInst.getIterator();
auto NewEnd = LoopBB->end();
MachineBasicBlock::iterator I = Range.begin();
@@ -834,39 +828,145 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
Register CondReg;
+ assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
+
for (MachineInstr &MI : make_range(NewBegin, NewEnd)) {
for (MachineOperand &Op : MI.uses()) {
if (!Op.isReg() || Op.isDef())
continue;
- if (SGPROperandRegs.count(Op.getReg())) {
- LLT OpTy = MRI.getType(Op.getReg());
- unsigned OpSize = OpTy.getSizeInBits();
+ Register OldReg = Op.getReg();
+ if (!SGPROperandRegs.count(OldReg))
+ continue;
+
+ // See if we already processed this register in another instruction in the
+ // sequence.
+ auto OldVal = WaterfalledRegMap.find(OldReg);
+ if (OldVal != WaterfalledRegMap.end()) {
+ Op.setReg(OldVal->second);
+ continue;
+ }
+
+ LLT OpTy = MRI.getType(Op.getReg());
+ unsigned OpSize = OpTy.getSizeInBits();
+
+ // Can only do a readlane of 32-bit pieces.
+ if (OpSize == 32) {
+ // Avoid extra copies in the simple case of one 32-bit register.
+ Register CurrentLaneOpReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ MRI.setType(CurrentLaneOpReg, OpTy);
+
+ constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpReg)
+ .addReg(Op.getReg());
+
+ Register NewCondReg = MRI.createVirtualRegister(WaveRC);
+ bool First = CondReg == AMDGPU::NoRegister;
+ if (First)
+ CondReg = NewCondReg;
+
+ // Compare the just read M0 value to all possible Idx values.
+ B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
+ .addDef(NewCondReg)
+ .addReg(CurrentLaneOpReg)
+ .addReg(Op.getReg());
+ Op.setReg(CurrentLaneOpReg);
+
+ if (!First) {
+ Register AndReg = MRI.createVirtualRegister(WaveRC);
+
+ // If there are multiple operands to consider, and the conditions.
+ B.buildInstr(WaveAndOpc)
+ .addDef(AndReg)
+ .addReg(NewCondReg)
+ .addReg(CondReg);
+ CondReg = AndReg;
+ }
+ } else {
+ LLT S32 = LLT::scalar(32);
+ SmallVector<Register, 8> ReadlanePieces;
+
+ // The compares can be done as 64-bit, but the extract needs to be done
+ // in 32-bit pieces.
+
+ bool Is64 = OpSize % 64 == 0;
+
+ LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
+ unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
+ : AMDGPU::V_CMP_EQ_U32_e64;
+
+ // The compares can be done as 64-bit, but the extract needs to be done
+ // in 32-bit pieces.
+
+ // Insert the unmerge before the loop.
+
+ B.setMBB(MBB);
+ auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
+ B.setInstr(*I);
+
+ unsigned NumPieces = Unmerge->getNumOperands() - 1;
+ for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
+ Register UnmergePiece = Unmerge.getReg(PieceIdx);
+
+ Register CurrentLaneOpReg;
+ if (Is64) {
+ Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
+ Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
+
+ MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
+ MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
+ MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpRegLo)
+ .addReg(UnmergePiece, 0, AMDGPU::sub0);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpRegHi)
+ .addReg(UnmergePiece, 0, AMDGPU::sub1);
+
+ CurrentLaneOpReg =
+ B.buildMerge(LLT::scalar(64),
+ {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
+ .getReg(0);
- // Can only do a readlane of 32-bit pieces.
- if (OpSize == 32) {
- // Avoid extra copies in the simple case of one 32-bit register.
- Register CurrentLaneOpReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- MRI.setType(CurrentLaneOpReg, OpTy);
+ MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
- constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpReg)
- .addReg(Op.getReg());
+ if (OpTy.getScalarSizeInBits() == 64) {
+ // If we need to produce a 64-bit element vector, so use the
+ // merged pieces
+ ReadlanePieces.push_back(CurrentLaneOpReg);
+ } else {
+ // 32-bit element type.
+ ReadlanePieces.push_back(CurrentLaneOpRegLo);
+ ReadlanePieces.push_back(CurrentLaneOpRegHi);
+ }
+ } else {
+ CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
+ MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpReg)
+ .addReg(UnmergePiece);
+ ReadlanePieces.push_back(CurrentLaneOpReg);
+ }
Register NewCondReg = MRI.createVirtualRegister(WaveRC);
bool First = CondReg == AMDGPU::NoRegister;
if (First)
CondReg = NewCondReg;
- // Compare the just read M0 value to all possible Idx values.
- B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
+ B.buildInstr(CmpOp)
.addDef(NewCondReg)
.addReg(CurrentLaneOpReg)
- .addReg(Op.getReg());
- Op.setReg(CurrentLaneOpReg);
+ .addReg(UnmergePiece);
if (!First) {
Register AndReg = MRI.createVirtualRegister(WaveRC);
@@ -878,114 +978,23 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
.addReg(CondReg);
CondReg = AndReg;
}
- } else {
- LLT S32 = LLT::scalar(32);
- SmallVector<Register, 8> ReadlanePieces;
-
- // The compares can be done as 64-bit, but the extract needs to be done
- // in 32-bit pieces.
-
- bool Is64 = OpSize % 64 == 0;
-
- LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
- unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
- : AMDGPU::V_CMP_EQ_U32_e64;
-
- // The compares can be done as 64-bit, but the extract needs to be done
- // in 32-bit pieces.
-
- // Insert the unmerge before the loop.
-
- B.setMBB(MBB);
- auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
- B.setInstr(*I);
-
- unsigned NumPieces = Unmerge->getNumOperands() - 1;
- for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
- Register UnmergePiece = Unmerge.getReg(PieceIdx);
-
- Register CurrentLaneOpReg;
- if (Is64) {
- Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
- Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
-
- MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
- MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
- MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
-
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpRegLo)
- .addReg(UnmergePiece, 0, AMDGPU::sub0);
-
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpRegHi)
- .addReg(UnmergePiece, 0, AMDGPU::sub1);
-
- CurrentLaneOpReg =
- B.buildMerge(LLT::scalar(64),
- {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
- .getReg(0);
-
- MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
-
- if (OpTy.getScalarSizeInBits() == 64) {
- // If we need to produce a 64-bit element vector, so use the
- // merged pieces
- ReadlanePieces.push_back(CurrentLaneOpReg);
- } else {
- // 32-bit element type.
- ReadlanePieces.push_back(CurrentLaneOpRegLo);
- ReadlanePieces.push_back(CurrentLaneOpRegHi);
- }
- } else {
- CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32);
- MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
- MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
-
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpReg)
- .addReg(UnmergePiece);
- ReadlanePieces.push_back(CurrentLaneOpReg);
- }
-
- Register NewCondReg = MRI.createVirtualRegister(WaveRC);
- bool First = CondReg == AMDGPU::NoRegister;
- if (First)
- CondReg = NewCondReg;
-
- B.buildInstr(CmpOp)
- .addDef(NewCondReg)
- .addReg(CurrentLaneOpReg)
- .addReg(UnmergePiece);
-
- if (!First) {
- Register AndReg = MRI.createVirtualRegister(WaveRC);
-
- // If there are multiple operands to consider, and the conditions.
- B.buildInstr(WaveAndOpc)
- .addDef(AndReg)
- .addReg(NewCondReg)
- .addReg(CondReg);
- CondReg = AndReg;
- }
- }
-
- // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
- // BUILD_VECTOR
- if (OpTy.isVector()) {
- auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
- Op.setReg(Merge.getReg(0));
- } else {
- auto Merge = B.buildMerge(OpTy, ReadlanePieces);
- Op.setReg(Merge.getReg(0));
- }
+ }
- MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
+ // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
+ // BUILD_VECTOR
+ if (OpTy.isVector()) {
+ auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
+ Op.setReg(Merge.getReg(0));
+ } else {
+ auto Merge = B.buildMerge(OpTy, ReadlanePieces);
+ Op.setReg(Merge.getReg(0));
}
+
+ MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
}
+
+ // Make sure we don't re-process this register again.
+ WaterfalledRegMap.insert(std::make_pair(OldReg, Op.getReg()));
}
}
@@ -1093,53 +1102,89 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
MI.getOperand(OpIdx).setReg(SGPR);
}
-// When regbankselect repairs registers, it will insert a repair instruction
-// which defines the repaired register. Then it calls applyMapping and expects
-// that the targets will either delete or rewrite the originally wrote to the
-// repaired registers. Beccause of this, we end up in a situation where
-// we have 2 instructions defining the same registers.
-static MachineInstr *getOtherVRegDef(const MachineRegisterInfo &MRI,
- Register Reg,
- const MachineInstr &MI) {
- // Is there some way we can assert that there are exactly 2 def instructions?
- for (MachineInstr &Other : MRI.def_instructions(Reg)) {
- if (&Other != &MI)
- return &Other;
- }
-
- return nullptr;
+/// Split \p Ty into 2 pieces. The first will have \p FirstSize bits, and the
+/// rest will be in the remainder.
+static std::pair<LLT, LLT> splitUnequalType(LLT Ty, unsigned FirstSize) {
+ unsigned TotalSize = Ty.getSizeInBits();
+ if (!Ty.isVector())
+ return {LLT::scalar(FirstSize), LLT::scalar(TotalSize - FirstSize)};
+
+ LLT EltTy = Ty.getElementType();
+ unsigned EltSize = EltTy.getSizeInBits();
+ assert(FirstSize % EltSize == 0);
+
+ unsigned FirstPartNumElts = FirstSize / EltSize;
+ unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;
+
+ return {LLT::scalarOrVector(FirstPartNumElts, EltTy),
+ LLT::scalarOrVector(RemainderElts, EltTy)};
+}
+
+static LLT widen96To128(LLT Ty) {
+ if (!Ty.isVector())
+ return LLT::scalar(128);
+
+ LLT EltTy = Ty.getElementType();
+ assert(128 % EltTy.getSizeInBits() == 0);
+ return LLT::vector(128 / EltTy.getSizeInBits(), EltTy);
}
-bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
+bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI) const {
Register DstReg = MI.getOperand(0).getReg();
- const LLT LoadTy = MRI.getType(DstReg);
+ const LLT LoadTy = MRI.getType(DstReg);
unsigned LoadSize = LoadTy.getSizeInBits();
const unsigned MaxNonSmrdLoadSize = 128;
+
+ const RegisterBank *PtrBank =
+ OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
+ if (PtrBank == &AMDGPU::SGPRRegBank) {
+ // If the pointer is an SGPR, we ordinarily have nothing to do.
+ if (LoadSize != 96)
+ return false;
+
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ Register PtrReg = MI.getOperand(1).getReg();
+ // 96-bit loads are only available for vector loads. We need to split this
+ // into a 64-bit part, and 32 (unless we can widen to a 128-bit load).
+
+ MachineIRBuilder B(MI);
+ ApplyRegBankMapping O(*this, MRI, &AMDGPU::SGPRRegBank);
+ GISelObserverWrapper Observer(&O);
+ B.setChangeObserver(Observer);
+
+ if (MMO->getAlign() < Align(16)) {
+ LLT Part64, Part32;
+ std::tie(Part64, Part32) = splitUnequalType(LoadTy, 64);
+ auto Load0 = B.buildLoadFromOffset(Part64, PtrReg, *MMO, 0);
+ auto Load1 = B.buildLoadFromOffset(Part32, PtrReg, *MMO, 8);
+
+ auto Undef = B.buildUndef(LoadTy);
+ auto Ins0 = B.buildInsert(LoadTy, Undef, Load0, 0);
+ B.buildInsert(MI.getOperand(0), Ins0, Load1, 64);
+ } else {
+ LLT WiderTy = widen96To128(LoadTy);
+ auto WideLoad = B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
+ B.buildExtract(MI.getOperand(0), WideLoad, 0);
+ }
+
+ MI.eraseFromParent();
+ return true;
+ }
+
// 128-bit loads are supported for all instruction types.
if (LoadSize <= MaxNonSmrdLoadSize)
return false;
- SmallVector<unsigned, 16> DefRegs(OpdMapper.getVRegs(0));
- SmallVector<unsigned, 1> SrcRegs(OpdMapper.getVRegs(1));
+ SmallVector<Register, 16> DefRegs(OpdMapper.getVRegs(0));
+ SmallVector<Register, 1> SrcRegs(OpdMapper.getVRegs(1));
- // If the pointer is an SGPR, we have nothing to do.
- if (SrcRegs.empty()) {
- const RegisterBank *PtrBank =
- OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
- if (PtrBank == &AMDGPU::SGPRRegBank)
- return false;
+ if (SrcRegs.empty())
SrcRegs.push_back(MI.getOperand(1).getReg());
- }
assert(LoadSize % MaxNonSmrdLoadSize == 0);
- // We want to get the repair instruction now, because it will help us
- // determine which instruction the legalizer inserts that will also
- // write to DstReg.
- MachineInstr *RepairInst = getOtherVRegDef(MRI, DstReg, MI);
-
// RegBankSelect only emits scalar types, so we need to reset the pointer
// operand to a pointer type.
Register BasePtrReg = SrcRegs[0];
@@ -1148,38 +1193,72 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
MachineIRBuilder B(MI);
- unsigned SplitElts =
- MaxNonSmrdLoadSize / LoadTy.getScalarType().getSizeInBits();
- const LLT LoadSplitTy = LLT::vector(SplitElts, LoadTy.getScalarType());
+ unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize;
+ const LLT LoadSplitTy = LoadTy.divide(NumSplitParts);
ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank);
GISelObserverWrapper Observer(&O);
B.setChangeObserver(Observer);
LegalizerHelper Helper(B.getMF(), Observer, B);
- if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
+
+ if (LoadTy.isVector()) {
+ if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
+ return false;
+ } else {
+ if (Helper.narrowScalar(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
+ return false;
+ }
+
+ MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
+ return true;
+}
+
+bool AMDGPURegisterBankInfo::applyMappingDynStackAlloc(
+ MachineInstr &MI,
+ const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+ MachineRegisterInfo &MRI) const {
+ const MachineFunction &MF = *MI.getMF();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const auto &TFI = *ST.getFrameLowering();
+
+ // Guard in case the stack growth direction ever changes with scratch
+ // instructions.
+ if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown)
return false;
- // At this point, the legalizer has split the original load into smaller
- // loads. At the end of lowering, it inserts an instruction (LegalizedInst)
- // that combines the outputs of the lower loads and writes it to DstReg.
- // The register bank selector has also added the RepairInst which writes to
- // DstReg as well.
+ Register Dst = MI.getOperand(0).getReg();
+ Register AllocSize = MI.getOperand(1).getReg();
+ Align Alignment = assumeAligned(MI.getOperand(2).getImm());
- MachineInstr *LegalizedInst = getOtherVRegDef(MRI, DstReg, *RepairInst);
+ const RegisterBank *SizeBank = getRegBank(AllocSize, MRI, *TRI);
- // Replace the output of the LegalizedInst with a temporary register, since
- // RepairInst already defines DstReg.
- Register TmpReg = MRI.createGenericVirtualRegister(MRI.getType(DstReg));
- LegalizedInst->getOperand(0).setReg(TmpReg);
- B.setInsertPt(*RepairInst->getParent(), RepairInst);
+ // TODO: Need to emit a wave reduction to get the maximum size.
+ if (SizeBank != &AMDGPU::SGPRRegBank)
+ return false;
- for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; ++DefIdx) {
- Register IdxReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
- B.buildConstant(IdxReg, DefIdx);
- MRI.setRegBank(IdxReg, AMDGPU::VGPRRegBank);
- B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg);
+ LLT PtrTy = MRI.getType(Dst);
+ LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
+
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ Register SPReg = Info->getStackPtrOffsetReg();
+ ApplyRegBankMapping ApplyBank(*this, MRI, &AMDGPU::SGPRRegBank);
+ GISelObserverWrapper Observer(&ApplyBank);
+
+ MachineIRBuilder B(MI);
+ B.setChangeObserver(Observer);
+
+ auto WaveSize = B.buildConstant(LLT::scalar(32), ST.getWavefrontSizeLog2());
+ auto ScaledSize = B.buildShl(IntPtrTy, AllocSize, WaveSize);
+
+ auto SPCopy = B.buildCopy(PtrTy, SPReg);
+ if (Alignment > TFI.getStackAlign()) {
+ auto PtrAdd = B.buildPtrAdd(PtrTy, SPCopy, ScaledSize);
+ B.buildMaskLowPtrBits(Dst, PtrAdd,
+ Log2(Alignment) + ST.getWavefrontSizeLog2());
+ } else {
+ B.buildPtrAdd(Dst, SPCopy, ScaledSize);
}
- MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
+ MI.eraseFromParent();
return true;
}
@@ -1210,6 +1289,281 @@ bool AMDGPURegisterBankInfo::applyMappingImage(
return true;
}
+static Register getSrcRegIgnoringCopies(const MachineRegisterInfo &MRI,
+ Register Reg) {
+ MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+ if (!Def)
+ return Reg;
+
+ // TODO: Guard against this being an implicit def
+ return Def->getOperand(0).getReg();
+}
+
+// Analyze a combined offset from an llvm.amdgcn.s.buffer intrinsic and store
+// the three offsets (voffset, soffset and instoffset)
+static unsigned setBufferOffsets(MachineIRBuilder &B,
+ const AMDGPURegisterBankInfo &RBI,
+ Register CombinedOffset, Register &VOffsetReg,
+ Register &SOffsetReg, int64_t &InstOffsetVal,
+ Align Alignment) {
+ const LLT S32 = LLT::scalar(32);
+ MachineRegisterInfo *MRI = B.getMRI();
+
+ if (Optional<int64_t> Imm = getConstantVRegVal(CombinedOffset, *MRI)) {
+ uint32_t SOffset, ImmOffset;
+ if (AMDGPU::splitMUBUFOffset(*Imm, SOffset, ImmOffset, &RBI.Subtarget,
+ Alignment)) {
+ VOffsetReg = B.buildConstant(S32, 0).getReg(0);
+ SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
+ InstOffsetVal = ImmOffset;
+
+ B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
+ B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
+ return SOffset + ImmOffset;
+ }
+ }
+
+ Register Base;
+ unsigned Offset;
+ MachineInstr *Unused;
+
+ std::tie(Base, Offset, Unused)
+ = AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset);
+
+ uint32_t SOffset, ImmOffset;
+ if (Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
+ &RBI.Subtarget, Alignment)) {
+ if (RBI.getRegBank(Base, *MRI, *RBI.TRI) == &AMDGPU::VGPRRegBank) {
+ VOffsetReg = Base;
+ SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
+ B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
+ InstOffsetVal = ImmOffset;
+ return 0; // XXX - Why is this 0?
+ }
+
+ // If we have SGPR base, we can use it for soffset.
+ if (SOffset == 0) {
+ VOffsetReg = B.buildConstant(S32, 0).getReg(0);
+ B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
+ SOffsetReg = Base;
+ InstOffsetVal = ImmOffset;
+ return 0; // XXX - Why is this 0?
+ }
+ }
+
+ // Handle the variable sgpr + vgpr case.
+ if (MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI)) {
+ Register Src0 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(1).getReg());
+ Register Src1 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(2).getReg());
+
+ const RegisterBank *Src0Bank = RBI.getRegBank(Src0, *MRI, *RBI.TRI);
+ const RegisterBank *Src1Bank = RBI.getRegBank(Src1, *MRI, *RBI.TRI);
+
+ if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
+ VOffsetReg = Src0;
+ SOffsetReg = Src1;
+ return 0;
+ }
+
+ if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {
+ VOffsetReg = Src1;
+ SOffsetReg = Src0;
+ return 0;
+ }
+ }
+
+ // Ensure we have a VGPR for the combined offset. This could be an issue if we
+ // have an SGPR offset and a VGPR resource.
+ if (RBI.getRegBank(CombinedOffset, *MRI, *RBI.TRI) == &AMDGPU::VGPRRegBank) {
+ VOffsetReg = CombinedOffset;
+ } else {
+ VOffsetReg = B.buildCopy(S32, CombinedOffset).getReg(0);
+ B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
+ }
+
+ SOffsetReg = B.buildConstant(S32, 0).getReg(0);
+ B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
+ return 0;
+}
+
+bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
+ const OperandsMapper &OpdMapper) const {
+ MachineInstr &MI = OpdMapper.getMI();
+ MachineRegisterInfo &MRI = OpdMapper.getMRI();
+
+ const LLT S32 = LLT::scalar(32);
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ const RegisterBank *RSrcBank =
+ OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
+ const RegisterBank *OffsetBank =
+ OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
+ if (RSrcBank == &AMDGPU::SGPRRegBank &&
+ OffsetBank == &AMDGPU::SGPRRegBank)
+ return true; // Legal mapping
+
+ // FIXME: 96-bit case was widened during legalize. We neeed to narrow it back
+ // here but don't have an MMO.
+
+ unsigned LoadSize = Ty.getSizeInBits();
+ int NumLoads = 1;
+ if (LoadSize == 256 || LoadSize == 512) {
+ NumLoads = LoadSize / 128;
+ Ty = Ty.divide(NumLoads);
+ }
+
+ // Use the alignment to ensure that the required offsets will fit into the
+ // immediate offsets.
+ const Align Alignment = NumLoads > 1 ? Align(16 * NumLoads) : Align(1);
+
+ MachineIRBuilder B(MI);
+ MachineFunction &MF = B.getMF();
+
+ Register SOffset;
+ Register VOffset;
+ int64_t ImmOffset = 0;
+
+ unsigned MMOOffset = setBufferOffsets(B, *this, MI.getOperand(2).getReg(),
+ VOffset, SOffset, ImmOffset, Alignment);
+
+ // TODO: 96-bit loads were widened to 128-bit results. Shrink the result if we
+ // can, but we neeed to track an MMO for that.
+ const unsigned MemSize = (Ty.getSizeInBits() + 7) / 8;
+ const Align MemAlign(4); // FIXME: ABI type alignment?
+ MachineMemOperand *BaseMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ MemSize, MemAlign);
+ if (MMOOffset != 0)
+ BaseMMO = MF.getMachineMemOperand(BaseMMO, MMOOffset, MemSize);
+
+ // If only the offset is divergent, emit a MUBUF buffer load instead. We can
+ // assume that the buffer is unswizzled.
+
+ Register RSrc = MI.getOperand(1).getReg();
+ Register VIndex = B.buildConstant(S32, 0).getReg(0);
+ B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);
+
+ SmallVector<Register, 4> LoadParts(NumLoads);
+
+ MachineBasicBlock::iterator MII = MI.getIterator();
+ MachineInstrSpan Span(MII, &B.getMBB());
+
+ for (int i = 0; i < NumLoads; ++i) {
+ if (NumLoads == 1) {
+ LoadParts[i] = Dst;
+ } else {
+ LoadParts[i] = MRI.createGenericVirtualRegister(Ty);
+ MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);
+ }
+
+ MachineMemOperand *MMO = BaseMMO;
+ if (i != 0)
+ BaseMMO = MF.getMachineMemOperand(BaseMMO, MMOOffset + 16 * i, MemSize);
+
+ B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
+ .addDef(LoadParts[i]) // vdata
+ .addUse(RSrc) // rsrc
+ .addUse(VIndex) // vindex
+ .addUse(VOffset) // voffset
+ .addUse(SOffset) // soffset
+ .addImm(ImmOffset + 16 * i) // offset(imm)
+ .addImm(0) // cachepolicy, swizzled buffer(imm)
+ .addImm(0) // idxen(imm)
+ .addMemOperand(MMO);
+ }
+
+ // TODO: If only the resource is a VGPR, it may be better to execute the
+ // scalar load in the waterfall loop if the resource is expected to frequently
+ // be dynamically uniform.
+ if (RSrcBank != &AMDGPU::SGPRRegBank) {
+ // Remove the original instruction to avoid potentially confusing the
+ // waterfall loop logic.
+ B.setInstr(*Span.begin());
+ MI.eraseFromParent();
+
+ SmallSet<Register, 4> OpsToWaterfall;
+
+ OpsToWaterfall.insert(RSrc);
+ executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()),
+ OpsToWaterfall, MRI);
+ }
+
+ if (NumLoads != 1) {
+ if (Ty.isVector())
+ B.buildConcatVectors(Dst, LoadParts);
+ else
+ B.buildMerge(Dst, LoadParts);
+ }
+
+ // We removed the instruction earlier with a waterfall loop.
+ if (RSrcBank == &AMDGPU::SGPRRegBank)
+ MI.eraseFromParent();
+
+ return true;
+}
+
+bool AMDGPURegisterBankInfo::applyMappingBFEIntrinsic(
+ const OperandsMapper &OpdMapper, bool Signed) const {
+ MachineInstr &MI = OpdMapper.getMI();
+ MachineRegisterInfo &MRI = OpdMapper.getMRI();
+
+ // Insert basic copies
+ applyDefaultMapping(OpdMapper);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+
+ const LLT S32 = LLT::scalar(32);
+
+ const RegisterBank *DstBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+ if (DstBank == &AMDGPU::VGPRRegBank) {
+ if (Ty == S32)
+ return true;
+
+ // TODO: 64-bit version is scalar only, so we need to expand this.
+ return false;
+ }
+
+ Register SrcReg = MI.getOperand(2).getReg();
+ Register OffsetReg = MI.getOperand(3).getReg();
+ Register WidthReg = MI.getOperand(4).getReg();
+
+ // The scalar form packs the offset and width in a single operand.
+
+ ApplyRegBankMapping ApplyBank(*this, MRI, &AMDGPU::SGPRRegBank);
+ GISelObserverWrapper Observer(&ApplyBank);
+ MachineIRBuilder B(MI);
+ B.setChangeObserver(Observer);
+
+ // Ensure the high bits are clear to insert the offset.
+ auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
+ auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask);
+
+ // Zeros out the low bits, so don't bother clamping the input value.
+ auto ShiftWidth = B.buildShl(S32, WidthReg, B.buildConstant(S32, 16));
+
+ // Transformation function, pack the offset and width of a BFE into
+ // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
+ // source, bits [5:0] contain the offset and bits [22:16] the width.
+ auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth);
+
+ // TODO: It might be worth using a pseudo here to avoid scc clobber and
+ // register class constraints.
+ unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
+ (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
+
+ auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
+ if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
+ llvm_unreachable("failed to constrain BFE");
+
+ MI.eraseFromParent();
+ return true;
+}
+
// FIXME: Duplicated from LegalizerHelper
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
switch (Opc) {
@@ -1226,6 +1580,51 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
}
}
+static unsigned minMaxToExtend(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ return TargetOpcode::G_SEXT;
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX:
+ return TargetOpcode::G_ZEXT;
+ default:
+ llvm_unreachable("not in integer min/max");
+ }
+}
+
+// Emit a legalized extension from <2 x s16> to 2 32-bit components, avoiding
+// any illegal vector extend or unmerge operations.
+static std::pair<Register, Register>
+unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode) {
+ const LLT S32 = LLT::scalar(32);
+ auto Bitcast = B.buildBitcast(S32, Src);
+
+ if (ExtOpcode == TargetOpcode::G_SEXT) {
+ auto ExtLo = B.buildSExtInReg(S32, Bitcast, 16);
+ auto ShiftHi = B.buildAShr(S32, Bitcast, B.buildConstant(S32, 16));
+ return std::make_pair(ExtLo.getReg(0), ShiftHi.getReg(0));
+ }
+
+ auto ShiftHi = B.buildLShr(S32, Bitcast, B.buildConstant(S32, 16));
+ if (ExtOpcode == TargetOpcode::G_ZEXT) {
+ auto ExtLo = B.buildAnd(S32, Bitcast, B.buildConstant(S32, 0xffff));
+ return std::make_pair(ExtLo.getReg(0), ShiftHi.getReg(0));
+ }
+
+ assert(ExtOpcode == TargetOpcode::G_ANYEXT);
+ return std::make_pair(Bitcast.getReg(0), ShiftHi.getReg(0));
+}
+
+static MachineInstr *buildExpandedScalarMinMax(MachineIRBuilder &B,
+ CmpInst::Predicate Pred,
+ Register Dst, Register Src0,
+ Register Src1) {
+ const LLT CmpType = LLT::scalar(32);
+ auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1);
+ return B.buildSelect(Dst, Cmp, Src0, Src1);
+}
+
// FIXME: Duplicated from LegalizerHelper, except changing the boolean type.
void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B,
MachineInstr &MI) const {
@@ -1234,24 +1633,25 @@ void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B,
Register Src1 = MI.getOperand(2).getReg();
const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
- LLT CmpType = LLT::scalar(32);
-
- auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1);
- B.buildSelect(Dst, Cmp, Src0, Src1);
+ MachineInstr *Sel = buildExpandedScalarMinMax(B, Pred, Dst, Src0, Src1);
- B.getMRI()->setRegBank(Cmp.getReg(0), AMDGPU::SGPRRegBank);
+ Register CmpReg = Sel->getOperand(1).getReg();
+ B.getMRI()->setRegBank(CmpReg, AMDGPU::SGPRRegBank);
MI.eraseFromParent();
}
// For cases where only a single copy is inserted for matching register banks.
// Replace the register in the instruction operand
-static void substituteSimpleCopyRegs(
+static bool substituteSimpleCopyRegs(
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx) {
SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(OpIdx));
if (!SrcReg.empty()) {
assert(SrcReg.size() == 1);
OpdMapper.getMI().getOperand(OpIdx).setReg(SrcReg[0]);
+ return true;
}
+
+ return false;
}
/// Handle register layout difference for f16 images for some subtargets.
@@ -1465,6 +1865,223 @@ bool AMDGPURegisterBankInfo::buildVCopy(MachineIRBuilder &B, Register DstReg,
constrainGenericRegister(DstReg, AMDGPU::VReg_64RegClass, MRI);
}
+/// Utility function for pushing dynamic vector indexes with a constant offset
+/// into waterwall loops.
+static void reinsertVectorIndexAdd(MachineIRBuilder &B,
+ MachineInstr &IdxUseInstr,
+ unsigned OpIdx,
+ unsigned ConstOffset) {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ const LLT S32 = LLT::scalar(32);
+ Register WaterfallIdx = IdxUseInstr.getOperand(OpIdx).getReg();
+ B.setInsertPt(*IdxUseInstr.getParent(), IdxUseInstr.getIterator());
+
+ auto MaterializedOffset = B.buildConstant(S32, ConstOffset);
+
+ auto Add = B.buildAdd(S32, WaterfallIdx, MaterializedOffset);
+ MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
+ MRI.setRegBank(Add.getReg(0), AMDGPU::SGPRRegBank);
+ IdxUseInstr.getOperand(OpIdx).setReg(Add.getReg(0));
+}
+
+/// Implement extending a 32-bit value to a 64-bit value. \p Lo32Reg is the
+/// original 32-bit source value (to be inserted in the low part of the combined
+/// 64-bit result), and \p Hi32Reg is the high half of the combined 64-bit
+/// value.
+static void extendLow32IntoHigh32(MachineIRBuilder &B,
+ Register Hi32Reg, Register Lo32Reg,
+ unsigned ExtOpc,
+ const RegisterBank &RegBank,
+ bool IsBooleanSrc = false) {
+ if (ExtOpc == AMDGPU::G_ZEXT) {
+ B.buildConstant(Hi32Reg, 0);
+ } else if (ExtOpc == AMDGPU::G_SEXT) {
+ if (IsBooleanSrc) {
+ // If we know the original source was an s1, the high half is the same as
+ // the low.
+ B.buildCopy(Hi32Reg, Lo32Reg);
+ } else {
+ // Replicate sign bit from 32-bit extended part.
+ auto ShiftAmt = B.buildConstant(LLT::scalar(32), 31);
+ B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
+ B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
+ }
+ } else {
+ assert(ExtOpc == AMDGPU::G_ANYEXT && "not an integer extension");
+ B.buildUndef(Hi32Reg);
+ }
+}
+
+bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ const OperandsMapper &OpdMapper) const {
+
+ Register VecReg = MI.getOperand(1).getReg();
+ Register Idx = MI.getOperand(2).getReg();
+
+ const RegisterBank &IdxBank =
+ *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
+
+ bool IsDivergentIdx = IdxBank == AMDGPU::VGPRRegBank;
+
+ LLT VecTy = MRI.getType(VecReg);
+ unsigned EltSize = VecTy.getScalarSizeInBits();
+ unsigned NumElem = VecTy.getNumElements();
+
+ if (!SITargetLowering::shouldExpandVectorDynExt(EltSize, NumElem,
+ IsDivergentIdx))
+ return false;
+
+ MachineIRBuilder B(MI);
+ LLT S32 = LLT::scalar(32);
+
+ const RegisterBank &DstBank =
+ *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+ const RegisterBank &SrcBank =
+ *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
+
+ const RegisterBank &CCBank =
+ (DstBank == AMDGPU::SGPRRegBank &&
+ SrcBank == AMDGPU::SGPRRegBank &&
+ IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
+ : AMDGPU::VCCRegBank;
+ LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 : LLT::scalar(1);
+
+ if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
+ Idx = B.buildCopy(S32, Idx)->getOperand(0).getReg();
+ MRI.setRegBank(Idx, AMDGPU::VGPRRegBank);
+ }
+
+ LLT EltTy = VecTy.getScalarType();
+ SmallVector<Register, 2> DstRegs(OpdMapper.getVRegs(0));
+ unsigned NumLanes = DstRegs.size();
+ if (!NumLanes)
+ NumLanes = 1;
+ else
+ EltTy = MRI.getType(DstRegs[0]);
+
+ auto UnmergeToEltTy = B.buildUnmerge(EltTy, VecReg);
+ SmallVector<Register, 2> Res(NumLanes);
+ for (unsigned L = 0; L < NumLanes; ++L)
+ Res[L] = UnmergeToEltTy.getReg(L);
+
+ for (unsigned I = 1; I < NumElem; ++I) {
+ auto IC = B.buildConstant(S32, I);
+ MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
+ auto Cmp = B.buildICmp(CmpInst::ICMP_EQ, CCTy, Idx, IC);
+ MRI.setRegBank(Cmp->getOperand(0).getReg(), CCBank);
+
+ for (unsigned L = 0; L < NumLanes; ++L) {
+ auto S = B.buildSelect(EltTy, Cmp,
+ UnmergeToEltTy.getReg(I * NumLanes + L), Res[L]);
+
+ for (unsigned N : { 0, 2, 3 })
+ MRI.setRegBank(S->getOperand(N).getReg(), DstBank);
+
+ Res[L] = S->getOperand(0).getReg();
+ }
+ }
+
+ for (unsigned L = 0; L < NumLanes; ++L) {
+ Register DstReg = (NumLanes == 1) ? MI.getOperand(0).getReg() : DstRegs[L];
+ B.buildCopy(DstReg, Res[L]);
+ MRI.setRegBank(DstReg, DstBank);
+ }
+
+ MRI.setRegBank(MI.getOperand(0).getReg(), DstBank);
+ MI.eraseFromParent();
+
+ return true;
+}
+
+bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ const OperandsMapper &OpdMapper) const {
+
+ Register VecReg = MI.getOperand(1).getReg();
+ Register Idx = MI.getOperand(3).getReg();
+
+ const RegisterBank &IdxBank =
+ *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
+
+ bool IsDivergentIdx = IdxBank == AMDGPU::VGPRRegBank;
+
+ LLT VecTy = MRI.getType(VecReg);
+ unsigned EltSize = VecTy.getScalarSizeInBits();
+ unsigned NumElem = VecTy.getNumElements();
+
+ if (!SITargetLowering::shouldExpandVectorDynExt(EltSize, NumElem,
+ IsDivergentIdx))
+ return false;
+
+ MachineIRBuilder B(MI);
+ LLT S32 = LLT::scalar(32);
+
+ const RegisterBank &DstBank =
+ *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+ const RegisterBank &SrcBank =
+ *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
+ const RegisterBank &InsBank =
+ *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
+
+ const RegisterBank &CCBank =
+ (DstBank == AMDGPU::SGPRRegBank &&
+ SrcBank == AMDGPU::SGPRRegBank &&
+ InsBank == AMDGPU::SGPRRegBank &&
+ IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
+ : AMDGPU::VCCRegBank;
+ LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 : LLT::scalar(1);
+
+ if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
+ Idx = B.buildCopy(S32, Idx)->getOperand(0).getReg();
+ MRI.setRegBank(Idx, AMDGPU::VGPRRegBank);
+ }
+
+ LLT EltTy = VecTy.getScalarType();
+ SmallVector<Register, 2> InsRegs(OpdMapper.getVRegs(2));
+ unsigned NumLanes = InsRegs.size();
+ if (!NumLanes) {
+ NumLanes = 1;
+ InsRegs.push_back(MI.getOperand(2).getReg());
+ } else {
+ EltTy = MRI.getType(InsRegs[0]);
+ }
+
+ auto UnmergeToEltTy = B.buildUnmerge(EltTy, VecReg);
+ SmallVector<Register, 16> Ops(NumElem * NumLanes);
+
+ for (unsigned I = 0; I < NumElem; ++I) {
+ auto IC = B.buildConstant(S32, I);
+ MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
+ auto Cmp = B.buildICmp(CmpInst::ICMP_EQ, CCTy, Idx, IC);
+ MRI.setRegBank(Cmp->getOperand(0).getReg(), CCBank);
+
+ for (unsigned L = 0; L < NumLanes; ++L) {
+ auto S = B.buildSelect(EltTy, Cmp, InsRegs[L],
+ UnmergeToEltTy.getReg(I * NumLanes + L));
+
+ for (unsigned N : { 0, 2, 3 })
+ MRI.setRegBank(S->getOperand(N).getReg(), DstBank);
+
+ Ops[I * NumLanes + L] = S->getOperand(0).getReg();
+ }
+ }
+
+ LLT MergeTy = LLT::vector(Ops.size(), EltTy);
+ if (MergeTy == MRI.getType(MI.getOperand(0).getReg())) {
+ B.buildBuildVector(MI.getOperand(0), Ops);
+ } else {
+ auto Vec = B.buildBuildVector(MergeTy, Ops);
+ MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);
+ B.buildBitcast(MI.getOperand(0).getReg(), Vec);
+ }
+
+ MRI.setRegBank(MI.getOperand(0).getReg(), DstBank);
+ MI.eraseFromParent();
+
+ return true;
+}
+
void AMDGPURegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
@@ -1555,7 +2172,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MachineBasicBlock *MBB = MI.getParent();
B.setInsertPt(*MBB, std::next(MI.getIterator()));
- B.buildTrunc(DstReg, NewDstReg);
+
+ // If we had a constrained VCC result register, a copy was inserted to VCC
+ // from SGPR.
+ SmallVector<Register, 1> DefRegs(OpdMapper.getVRegs(0));
+ if (DefRegs.empty())
+ DefRegs.push_back(DstReg);
+ B.buildTrunc(DefRegs[0], NewDstReg);
return;
}
case AMDGPU::G_SELECT: {
@@ -1712,10 +2335,16 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
}
case AMDGPU::G_ADD:
case AMDGPU::G_SUB:
- case AMDGPU::G_MUL: {
+ case AMDGPU::G_MUL:
+ case AMDGPU::G_SHL:
+ case AMDGPU::G_LSHR:
+ case AMDGPU::G_ASHR: {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
- if (DstTy != LLT::scalar(16))
+
+ // 16-bit operations are VALU only, but can be promoted to 32-bit SALU.
+ // Packed 16-bit operations need to be scalarized and promoted.
+ if (DstTy != LLT::scalar(16) && DstTy != LLT::vector(2, 16))
break;
const RegisterBank *DstBank =
@@ -1723,16 +2352,42 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (DstBank == &AMDGPU::VGPRRegBank)
break;
- // 16-bit operations are VALU only, but can be promoted to 32-bit SALU.
- MachineFunction *MF = MI.getParent()->getParent();
+ const LLT S32 = LLT::scalar(32);
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineFunction *MF = MBB->getParent();
MachineIRBuilder B(MI);
ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
GISelObserverWrapper Observer(&ApplySALU);
- LegalizerHelper Helper(*MF, Observer, B);
- if (Helper.widenScalar(MI, 0, LLT::scalar(32)) !=
- LegalizerHelper::Legalized)
- llvm_unreachable("widen scalar should have succeeded");
+ if (DstTy.isVector()) {
+ B.setChangeObserver(Observer);
+
+ Register WideSrc0Lo, WideSrc0Hi;
+ Register WideSrc1Lo, WideSrc1Hi;
+
+ std::tie(WideSrc0Lo, WideSrc0Hi)
+ = unpackV2S16ToS32(B, MI.getOperand(1).getReg(), AMDGPU::G_ANYEXT);
+ std::tie(WideSrc1Lo, WideSrc1Hi)
+ = unpackV2S16ToS32(B, MI.getOperand(2).getReg(), AMDGPU::G_ANYEXT);
+ auto Lo = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
+ auto Hi = B.buildInstr(MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
+ B.buildBuildVectorTrunc(DstReg, {Lo.getReg(0), Hi.getReg(0)});
+ MI.eraseFromParent();
+ } else {
+ LegalizerHelper Helper(*MF, Observer, B);
+
+ if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
+ llvm_unreachable("widen scalar should have succeeded");
+
+ // FIXME: s16 shift amounts should be legal.
+ if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||
+ Opc == AMDGPU::G_ASHR) {
+ B.setInsertPt(*MBB, MI.getIterator());
+ if (Helper.widenScalar(MI, 1, S32) != LegalizerHelper::Legalized)
+ llvm_unreachable("widen scalar should have succeeded");
+ }
+ }
+
return;
}
case AMDGPU::G_SMIN:
@@ -1750,10 +2405,44 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// Turn scalar min/max into a compare and select.
LLT Ty = MRI.getType(DstReg);
- LLT S32 = LLT::scalar(32);
- LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S16 = LLT::scalar(16);
+ const LLT V2S16 = LLT::vector(2, 16);
- if (Ty == S16) {
+ if (Ty == V2S16) {
+ ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
+ GISelObserverWrapper Observer(&ApplySALU);
+ B.setChangeObserver(Observer);
+
+ // Need to widen to s32, and expand as cmp + select, and avoid producing
+ // illegal vector extends or unmerges that would need further
+ // legalization.
+ //
+ // TODO: Should we just readfirstlane? That should probably be handled
+ // with a UniformVGPR register bank that wouldn't need special
+ // consideration here.
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+
+ Register WideSrc0Lo, WideSrc0Hi;
+ Register WideSrc1Lo, WideSrc1Hi;
+
+ unsigned ExtendOp = minMaxToExtend(MI.getOpcode());
+
+ std::tie(WideSrc0Lo, WideSrc0Hi) = unpackV2S16ToS32(B, Src0, ExtendOp);
+ std::tie(WideSrc1Lo, WideSrc1Hi) = unpackV2S16ToS32(B, Src1, ExtendOp);
+
+ Register Lo = MRI.createGenericVirtualRegister(S32);
+ Register Hi = MRI.createGenericVirtualRegister(S32);
+ const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
+ buildExpandedScalarMinMax(B, Pred, Lo, WideSrc0Lo, WideSrc1Lo);
+ buildExpandedScalarMinMax(B, Pred, Hi, WideSrc0Hi, WideSrc1Hi);
+
+ B.buildBuildVectorTrunc(Dst, {Lo, Hi});
+ MI.eraseFromParent();
+ } else if (Ty == S16) {
ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
GISelObserverWrapper Observer(&ApplySALU);
LegalizerHelper Helper(*MF, Observer, B);
@@ -1769,11 +2458,77 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
+ case AMDGPU::G_SEXT_INREG: {
+ SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
+ if (SrcRegs.empty())
+ break; // Nothing to repair
+
+ const LLT S32 = LLT::scalar(32);
+ MachineIRBuilder B(MI);
+ ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank);
+ GISelObserverWrapper Observer(&O);
+ B.setChangeObserver(Observer);
+
+ // Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs
+ // we would need to further expand, and doesn't let us directly set the
+ // result registers.
+ SmallVector<Register, 2> DstRegs(OpdMapper.getVRegs(0));
+
+ int Amt = MI.getOperand(2).getImm();
+ if (Amt <= 32) {
+ if (Amt == 32) {
+ // The low bits are unchanged.
+ B.buildCopy(DstRegs[0], SrcRegs[0]);
+ } else {
+ // Extend in the low bits and propagate the sign bit to the high half.
+ B.buildSExtInReg(DstRegs[0], SrcRegs[0], Amt);
+ }
+
+ B.buildAShr(DstRegs[1], DstRegs[0], B.buildConstant(S32, 31));
+ } else {
+ // The low bits are unchanged, and extend in the high bits.
+ B.buildCopy(DstRegs[0], SrcRegs[0]);
+ B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
+ MI.eraseFromParent();
+ return;
+ }
+ case AMDGPU::G_CTPOP:
+ case AMDGPU::G_CTLZ_ZERO_UNDEF:
+ case AMDGPU::G_CTTZ_ZERO_UNDEF: {
+ MachineIRBuilder B(MI);
+ MachineFunction &MF = B.getMF();
+
+ const RegisterBank *DstBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+ if (DstBank == &AMDGPU::SGPRRegBank)
+ break;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ const LLT S32 = LLT::scalar(32);
+ LLT Ty = MRI.getType(SrcReg);
+ if (Ty == S32)
+ break;
+
+ ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank);
+ GISelObserverWrapper Observer(&ApplyVALU);
+ LegalizerHelper Helper(MF, Observer, B);
+
+ if (Helper.narrowScalar(MI, 1, S32) != LegalizerHelper::Legalized)
+ llvm_unreachable("narrowScalar should have succeeded");
+ return;
+ }
case AMDGPU::G_SEXT:
- case AMDGPU::G_ZEXT: {
+ case AMDGPU::G_ZEXT:
+ case AMDGPU::G_ANYEXT: {
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
- bool Signed = Opc == AMDGPU::G_SEXT;
+ const bool Signed = Opc == AMDGPU::G_SEXT;
+
+ assert(empty(OpdMapper.getVRegs(1)));
MachineIRBuilder B(MI);
const RegisterBank *SrcBank =
@@ -1788,23 +2543,19 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// breakdowns supported.
DstTy.getSizeInBits() == 64 &&
SrcTy.getSizeInBits() <= 32) {
- const LLT S32 = LLT::scalar(32);
SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
// Extend to 32-bit, and then extend the low half.
if (Signed) {
// TODO: Should really be buildSExtOrCopy
B.buildSExtOrTrunc(DefRegs[0], SrcReg);
-
- // Replicate sign bit from 32-bit extended part.
- auto ShiftAmt = B.buildConstant(S32, 31);
- MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
- B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
- } else {
+ } else if (Opc == AMDGPU::G_ZEXT) {
B.buildZExtOrTrunc(DefRegs[0], SrcReg);
- B.buildConstant(DefRegs[1], 0);
+ } else {
+ B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
}
+ extendLow32IntoHigh32(B, DefRegs[1], DefRegs[0], Opc, *SrcBank);
MRI.setRegBank(DstReg, *SrcBank);
MI.eraseFromParent();
return;
@@ -1813,6 +2564,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (SrcTy != LLT::scalar(1))
return;
+ // It is not legal to have a legalization artifact with a VCC source. Rather
+ // than introducing a copy, insert the select we would have to select the
+ // copy to.
if (SrcBank == &AMDGPU::VCCRegBank) {
SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
@@ -1834,7 +2588,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (DstSize > 32) {
B.buildSelect(DefRegs[0], SrcReg, True, False);
- B.buildCopy(DefRegs[1], DefRegs[0]);
+ extendLow32IntoHigh32(B, DefRegs[1], DefRegs[0], Opc, *SrcBank, true);
} else if (DstSize < 32) {
auto Sel = B.buildSelect(SelType, SrcReg, True, False);
MRI.setRegBank(Sel.getReg(0), *DstBank);
@@ -1847,24 +2601,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
- // Fixup the case with an s1 src that isn't a condition register. Use shifts
- // instead of introducing a compare to avoid an unnecessary condition
- // register (and since there's no scalar 16-bit compares).
- auto Ext = B.buildAnyExt(DstTy, SrcReg);
- auto ShiftAmt = B.buildConstant(LLT::scalar(32), DstTy.getSizeInBits() - 1);
- auto Shl = B.buildShl(DstTy, Ext, ShiftAmt);
-
- if (MI.getOpcode() == AMDGPU::G_SEXT)
- B.buildAShr(DstReg, Shl, ShiftAmt);
- else
- B.buildLShr(DstReg, Shl, ShiftAmt);
-
- MRI.setRegBank(DstReg, *SrcBank);
- MRI.setRegBank(Ext.getReg(0), *SrcBank);
- MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
- MRI.setRegBank(Shl.getReg(0), *SrcBank);
- MI.eraseFromParent();
- return;
+ break;
}
case AMDGPU::G_BUILD_VECTOR:
case AMDGPU::G_BUILD_VECTOR_TRUNC: {
@@ -1934,7 +2671,16 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
assert(OpdMapper.getVRegs(1).empty() && OpdMapper.getVRegs(2).empty());
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ const LLT S32 = LLT::scalar(32);
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ if (foldExtractEltToCmpSelect(MI, MRI, OpdMapper))
+ return;
+
MachineIRBuilder B(MI);
const ValueMapping &DstMapping
@@ -1942,10 +2688,26 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
const RegisterBank *DstBank = DstMapping.BreakDown[0].RegBank;
const RegisterBank *SrcBank =
OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
-
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- Register IdxReg = MI.getOperand(2).getReg();
+ const RegisterBank *IdxBank =
+ OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
+
+ Register BaseIdxReg;
+ unsigned ConstOffset;
+ MachineInstr *OffsetDef;
+ std::tie(BaseIdxReg, ConstOffset, OffsetDef) =
+ AMDGPU::getBaseWithConstantOffset(MRI, MI.getOperand(2).getReg());
+
+ // See if the index is an add of a constant which will be foldable by moving
+ // the base register of the index later if this is going to be executed in a
+ // waterfall loop. This is essentially to reassociate the add of a constant
+ // with the readfirstlane.
+ bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
+ ConstOffset > 0 &&
+ ConstOffset < SrcTy.getNumElements();
+
+ // Move the base register. We'll re-insert the add later.
+ if (ShouldMoveIndexIntoLoop)
+ MI.getOperand(2).setReg(BaseIdxReg);
// If this is a VGPR result only because the index was a VGPR result, the
// actual indexing will be done on the SGPR source vector, which will
@@ -1969,26 +2731,30 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
buildVCopy(B, DstReg, TmpReg);
}
+ // Re-insert the constant offset add inside the waterfall loop.
+ if (ShouldMoveIndexIntoLoop)
+ reinsertVectorIndexAdd(B, MI, 2, ConstOffset);
+
return;
}
assert(DstTy.getSizeInBits() == 64);
- LLT SrcTy = MRI.getType(SrcReg);
- const LLT S32 = LLT::scalar(32);
LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32);
auto CastSrc = B.buildBitcast(Vec32, SrcReg);
auto One = B.buildConstant(S32, 1);
+ MachineBasicBlock::iterator MII = MI.getIterator();
+
// Split the vector index into 32-bit pieces. Prepare to move all of the
// new instructions into a waterfall loop if necessary.
//
// Don't put the bitcast or constant in the loop.
- MachineInstrSpan Span(MachineBasicBlock::iterator(&MI), &B.getMBB());
+ MachineInstrSpan Span(MII, &B.getMBB());
// Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1).
- auto IdxLo = B.buildShl(S32, IdxReg, One);
+ auto IdxLo = B.buildShl(S32, BaseIdxReg, One);
auto IdxHi = B.buildAdd(S32, IdxLo, One);
auto Extract0 = B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
@@ -2029,33 +2795,70 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
buildVCopy(B, DstRegs[1], TmpReg1);
}
+ if (ShouldMoveIndexIntoLoop)
+ reinsertVectorIndexAdd(B, *IdxLo, 1, ConstOffset);
+
return;
}
case AMDGPU::G_INSERT_VECTOR_ELT: {
SmallVector<Register, 2> InsRegs(OpdMapper.getVRegs(2));
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT VecTy = MRI.getType(DstReg);
+
assert(OpdMapper.getVRegs(0).empty());
- assert(OpdMapper.getVRegs(1).empty());
assert(OpdMapper.getVRegs(3).empty());
- if (InsRegs.empty()) {
- applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(MI, MRI, { 3 });
+ if (substituteSimpleCopyRegs(OpdMapper, 1))
+ MRI.setType(MI.getOperand(1).getReg(), VecTy);
+
+ if (foldInsertEltToCmpSelect(MI, MRI, OpdMapper))
return;
- }
- Register DstReg = MI.getOperand(0).getReg();
+ const RegisterBank *IdxBank =
+ OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
+
Register SrcReg = MI.getOperand(1).getReg();
Register InsReg = MI.getOperand(2).getReg();
- Register IdxReg = MI.getOperand(3).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
LLT InsTy = MRI.getType(InsReg);
(void)InsTy;
+ Register BaseIdxReg;
+ unsigned ConstOffset;
+ MachineInstr *OffsetDef;
+ std::tie(BaseIdxReg, ConstOffset, OffsetDef) =
+ AMDGPU::getBaseWithConstantOffset(MRI, MI.getOperand(3).getReg());
+
+ // See if the index is an add of a constant which will be foldable by moving
+ // the base register of the index later if this is going to be executed in a
+ // waterfall loop. This is essentially to reassociate the add of a constant
+ // with the readfirstlane.
+ bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
+ ConstOffset > 0 &&
+ ConstOffset < VecTy.getNumElements();
+
+ // Move the base register. We'll re-insert the add later.
+ if (ShouldMoveIndexIntoLoop)
+ MI.getOperand(3).setReg(BaseIdxReg);
+
+
+ if (InsRegs.empty()) {
+ executeInWaterfallLoop(MI, MRI, { 3 });
+
+ // Re-insert the constant offset add inside the waterfall loop.
+ if (ShouldMoveIndexIntoLoop) {
+ MachineIRBuilder B(MI);
+ reinsertVectorIndexAdd(B, MI, 3, ConstOffset);
+ }
+
+ return;
+ }
+
+
assert(InsTy.getSizeInBits() == 64);
const LLT S32 = LLT::scalar(32);
- LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32);
+ LLT Vec32 = LLT::vector(2 * VecTy.getNumElements(), 32);
MachineIRBuilder B(MI);
auto CastSrc = B.buildBitcast(Vec32, SrcReg);
@@ -2068,12 +2871,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MachineInstrSpan Span(MachineBasicBlock::iterator(&MI), &B.getMBB());
// Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1).
- auto IdxLo = B.buildShl(S32, IdxReg, One);
+ auto IdxLo = B.buildShl(S32, BaseIdxReg, One);
auto IdxHi = B.buildAdd(S32, IdxLo, One);
auto InsLo = B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
auto InsHi = B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
- B.buildBitcast(DstReg, InsHi);
const RegisterBank *DstBank =
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
@@ -2093,6 +2895,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
SmallSet<Register, 4> OpsToWaterfall;
if (!collectWaterfallOperands(OpsToWaterfall, MI, MRI, { 3 })) {
+ B.setInsertPt(B.getMBB(), MI);
+ B.buildBitcast(DstReg, InsHi);
MI.eraseFromParent();
return;
}
@@ -2100,17 +2904,70 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
B.setInstr(*Span.begin());
MI.eraseFromParent();
+ // Figure out the point after the waterfall loop before mangling the control
+ // flow.
executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()),
OpsToWaterfall, MRI);
+
+ // The insertion point is now right after the original instruction.
+ //
+ // Keep the bitcast to the original vector type out of the loop. Doing this
+ // saved an extra phi we don't need inside the loop.
+ B.buildBitcast(DstReg, InsHi);
+
+ // Re-insert the constant offset add inside the waterfall loop.
+ if (ShouldMoveIndexIntoLoop)
+ reinsertVectorIndexAdd(B, *IdxLo, 1, ConstOffset);
+
+ return;
+ }
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
+ case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
+ case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
+ case AMDGPU::G_AMDGPU_BUFFER_STORE:
+ case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
+ case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
+ case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
+ case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
+ case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
+ case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, {1, 4});
+ return;
+ }
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, {2, 5});
+ return;
+ }
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, {3, 6});
+ return;
+ }
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD: {
+ applyMappingSBufferLoad(OpdMapper);
return;
}
case AMDGPU::G_INTRINSIC: {
switch (MI.getIntrinsicID()) {
- case Intrinsic::amdgcn_s_buffer_load: {
- // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS
- executeInWaterfallLoop(MI, MRI, { 2, 3 });
- return;
- }
case Intrinsic::amdgcn_readlane: {
substituteSimpleCopyRegs(OpdMapper, 2);
@@ -2132,18 +2989,51 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, 3); // Index
return;
}
- default:
- break;
+ case Intrinsic::amdgcn_ballot:
+ case Intrinsic::amdgcn_interp_p1:
+ case Intrinsic::amdgcn_interp_p2:
+ case Intrinsic::amdgcn_interp_mov:
+ case Intrinsic::amdgcn_interp_p1_f16:
+ case Intrinsic::amdgcn_interp_p2_f16: {
+ applyDefaultMapping(OpdMapper);
+
+ // Readlane for m0 value, which is always the last operand.
+ // FIXME: Should this be a waterfall loop instead?
+ constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index
+ return;
+ }
+ case Intrinsic::amdgcn_permlane16:
+ case Intrinsic::amdgcn_permlanex16: {
+ // Doing a waterfall loop over these wouldn't make any sense.
+ substituteSimpleCopyRegs(OpdMapper, 2);
+ substituteSimpleCopyRegs(OpdMapper, 3);
+ constrainOpWithReadfirstlane(MI, MRI, 4);
+ constrainOpWithReadfirstlane(MI, MRI, 5);
+ return;
+ }
+ case Intrinsic::amdgcn_sbfe:
+ applyMappingBFEIntrinsic(OpdMapper, true);
+ return;
+ case Intrinsic::amdgcn_ubfe:
+ applyMappingBFEIntrinsic(OpdMapper, false);
+ return;
}
break;
}
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: {
+ const AMDGPU::RsrcIntrinsic *RSrcIntrin
+ = AMDGPU::lookupRsrcIntrinsic(MI.getIntrinsicID());
+ assert(RSrcIntrin && RSrcIntrin->IsImage);
+ // Non-images can have complications from operands that allow both SGPR
+ // and VGPR. For now it's too complicated to figure out the final opcode
+ // to derive the register bank from the MCInstrDesc.
+ applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg);
+ return;
+ }
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
auto IntrID = MI.getIntrinsicID();
switch (IntrID) {
- case Intrinsic::amdgcn_buffer_load: {
- executeInWaterfallLoop(MI, MRI, { 2 });
- return;
- }
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
// This is only allowed to execute with 1 lane, so readfirstlane is safe.
@@ -2167,28 +3057,19 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, 1); // M0
return;
}
+ case Intrinsic::amdgcn_ds_append:
+ case Intrinsic::amdgcn_ds_consume: {
+ constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ return;
+ }
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
// FIXME: Should this use a waterfall loop?
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
return;
}
- case Intrinsic::amdgcn_raw_buffer_load:
- case Intrinsic::amdgcn_raw_buffer_load_format:
- case Intrinsic::amdgcn_raw_tbuffer_load:
- case Intrinsic::amdgcn_raw_buffer_store:
- case Intrinsic::amdgcn_raw_buffer_store_format:
- case Intrinsic::amdgcn_raw_tbuffer_store: {
- applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(MI, MRI, {2, 4});
- return;
- }
- case Intrinsic::amdgcn_struct_buffer_load:
- case Intrinsic::amdgcn_struct_buffer_store:
- case Intrinsic::amdgcn_struct_tbuffer_load:
- case Intrinsic::amdgcn_struct_tbuffer_store: {
- applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(MI, MRI, {2, 5});
+ case Intrinsic::amdgcn_s_setreg: {
+ constrainOpWithReadfirstlane(MI, MRI, 2);
return;
}
default: {
@@ -2211,10 +3092,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_LOAD:
case AMDGPU::G_ZEXTLOAD:
case AMDGPU::G_SEXTLOAD: {
- if (applyMappingWideLoad(MI, OpdMapper, MRI))
+ if (applyMappingLoad(MI, OpdMapper, MRI))
return;
break;
}
+ case AMDGPU::G_DYN_STACKALLOC:
+ applyMappingDynStackAlloc(MI, OpdMapper, MRI);
+ return;
default:
break;
}
@@ -2244,7 +3128,11 @@ AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
+ const MachineOperand &SrcOp = MI.getOperand(i);
+ if (!SrcOp.isReg())
+ continue;
+
+ unsigned Size = getSizeInBits(SrcOp.getReg(), MRI, *TRI);
OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
}
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
@@ -2256,31 +3144,19 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
- unsigned OpdIdx = 0;
-
- unsigned Size0 = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
- OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
-
- if (MI.getOperand(OpdIdx).isIntrinsicID())
- OpdsMapping[OpdIdx++] = nullptr;
- Register Reg1 = MI.getOperand(OpdIdx).getReg();
- unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
-
- unsigned DefaultBankID = Size1 == 1 ?
- AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
- unsigned Bank1 = getRegBankID(Reg1, MRI, *TRI, DefaultBankID);
-
- OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
-
- for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
- const MachineOperand &MO = MI.getOperand(OpdIdx);
- if (!MO.isReg())
+ // Even though we technically could use SGPRs, this would require knowledge of
+ // the constant bus restriction. Force all sources to VGPR (except for VCC).
+ //
+ // TODO: Unary ops are trivially OK, so accept SGPRs?
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &Src = MI.getOperand(i);
+ if (!Src.isReg())
continue;
- unsigned Size = getSizeInBits(MO.getReg(), MRI, *TRI);
+ unsigned Size = getSizeInBits(Src.getReg(), MRI, *TRI);
unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
- OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
+ OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
}
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
@@ -2324,6 +3200,10 @@ AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
continue;
Register OpReg = MI.getOperand(I).getReg();
+ // We replace some dead address operands with $noreg
+ if (!OpReg)
+ continue;
+
unsigned Size = getSizeInBits(OpReg, MRI, *TRI);
// FIXME: Probably need a new intrinsic register bank searchable table to
@@ -2345,6 +3225,22 @@ AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
}
+/// Return the mapping for a pointer arugment.
+const RegisterBankInfo::ValueMapping *
+AMDGPURegisterBankInfo::getValueMappingForPtr(const MachineRegisterInfo &MRI,
+ Register PtrReg) const {
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned Size = PtrTy.getSizeInBits();
+ if (Subtarget.useFlatForGlobal() ||
+ !SITargetLowering::isFlatGlobalAddrSpace(PtrTy.getAddressSpace()))
+ return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+
+ // If we're using MUBUF instructions for global memory, an SGPR base register
+ // is possible. Otherwise this needs to be a VGPR.
+ const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI);
+ return AMDGPU::getValueMapping(PtrBank->getID(), Size);
+}
+
const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
@@ -2352,7 +3248,6 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
SmallVector<const ValueMapping*, 2> OpdsMapping(2);
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
- LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
Register PtrReg = MI.getOperand(1).getReg();
LLT PtrTy = MRI.getType(PtrReg);
unsigned AS = PtrTy.getAddressSpace();
@@ -2364,14 +3259,23 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI);
if (PtrBank == &AMDGPU::SGPRRegBank &&
- (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
- AS != AMDGPUAS::PRIVATE_ADDRESS) &&
- isScalarLoadLegal(MI)) {
- // We have a uniform instruction so we want to use an SMRD load
- ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
- PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
+ SITargetLowering::isFlatGlobalAddrSpace(AS)) {
+ if (isScalarLoadLegal(MI)) {
+ // We have a uniform instruction so we want to use an SMRD load
+ ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
+ } else {
+ ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+
+ // If we're using MUBUF instructions for global memory, an SGPR base
+ // register is possible. Otherwise this needs to be a VGPR.
+ unsigned PtrBankID = Subtarget.useFlatForGlobal() ?
+ AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
+
+ PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
+ }
} else {
- ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy);
+ ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
}
@@ -2449,11 +3353,35 @@ AMDGPURegisterBankInfo::getAGPROpMapping(Register Reg,
/// in RegBankSelect::Mode::Fast. Any mapping that would cause a
/// VGPR to SGPR generated is illegal.
///
+// Operands that must be SGPRs must accept potentially divergent VGPRs as
+// legal. These will be dealt with in applyMappingImpl.
+//
const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (MI.isCopy()) {
+ // The default logic bothers to analyze impossible alternative mappings. We
+ // want the most straightforward mapping, so just directly handle this.
+ const RegisterBank *DstBank = getRegBank(MI.getOperand(0).getReg(), MRI,
+ *TRI);
+ const RegisterBank *SrcBank = getRegBank(MI.getOperand(1).getReg(), MRI,
+ *TRI);
+ assert(SrcBank && "src bank should have been assigned already");
+ if (!DstBank)
+ DstBank = SrcBank;
+
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ if (cannotCopy(*DstBank, *SrcBank, Size))
+ return getInvalidInstructionMapping();
+
+ const ValueMapping &ValMap = getValueMapping(0, Size, *DstBank);
+ return getInstructionMapping(
+ 1, /*Cost*/ 1,
+ /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
+ }
+
if (MI.isRegSequence()) {
// If any input is a VGPR, the result must be a VGPR. The default handling
// assumes any copy between banks is legal.
@@ -2592,6 +3520,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
LLVM_FALLTHROUGH;
}
case AMDGPU::G_PTR_ADD:
+ case AMDGPU::G_PTRMASK:
case AMDGPU::G_ADD:
case AMDGPU::G_SUB:
case AMDGPU::G_MUL:
@@ -2608,6 +3537,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_SMAX:
case AMDGPU::G_UMIN:
case AMDGPU::G_UMAX:
+ case AMDGPU::G_SHUFFLE_VECTOR:
if (isSALUMapping(MI))
return getDefaultMappingSOP(MI);
LLVM_FALLTHROUGH;
@@ -2635,7 +3565,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FMAXNUM_IEEE:
case AMDGPU::G_FCANONICALIZE:
case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar?
+ case AMDGPU::G_FSHR: // TODO: Expand for scalar
case AMDGPU::G_AMDGPU_FFBH_U32:
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+ case AMDGPU::G_AMDGPU_RCP_IFLAG:
+ case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
+ case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
+ case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
+ case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH: {
@@ -2664,6 +3603,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
break;
}
+ case AMDGPU::G_DYN_STACKALLOC: {
+ // Result is always uniform, and a wave reduction is needed for the source.
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
+ unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
+ break;
+ }
case AMDGPU::G_INSERT: {
unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID :
AMDGPU::VGPRRegBankID;
@@ -2719,12 +3665,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_BITCAST:
case AMDGPU::G_INTTOPTR:
case AMDGPU::G_PTRTOINT:
- case AMDGPU::G_CTLZ:
- case AMDGPU::G_CTLZ_ZERO_UNDEF:
- case AMDGPU::G_CTTZ:
- case AMDGPU::G_CTTZ_ZERO_UNDEF:
- case AMDGPU::G_CTPOP:
- case AMDGPU::G_BSWAP:
case AMDGPU::G_BITREVERSE:
case AMDGPU::G_FABS:
case AMDGPU::G_FNEG: {
@@ -2733,21 +3673,33 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
break;
}
+ case AMDGPU::G_CTLZ_ZERO_UNDEF:
+ case AMDGPU::G_CTTZ_ZERO_UNDEF:
+ case AMDGPU::G_CTPOP: {
+ unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
+
+ // This should really be getValueMappingSGPR64Only, but allowing the generic
+ // code to handle the register split just makes using LegalizerHelper more
+ // difficult.
+ OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
+ break;
+ }
case AMDGPU::G_TRUNC: {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
unsigned Bank = getRegBankID(Src, MRI, *TRI);
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
- OpdsMapping[0] = DstSize == 1 && Bank != AMDGPU::SGPRRegBankID ?
- AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize) :
- AMDGPU::getValueMapping(Bank, DstSize);
+ OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
break;
}
case AMDGPU::G_ZEXT:
case AMDGPU::G_SEXT:
- case AMDGPU::G_ANYEXT: {
+ case AMDGPU::G_ANYEXT:
+ case AMDGPU::G_SEXT_INREG: {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
@@ -2765,17 +3717,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
- // TODO: Should anyext be split into 32-bit part as well?
- if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
- OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
- OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
- } else {
- // Scalar extend can use 64-bit BFE, but VGPRs require extending to
- // 32-bits, and then to 64.
- OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
- OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
- SrcSize);
- }
+ // Scalar extend can use 64-bit BFE, but VGPRs require extending to
+ // 32-bits, and then to 64.
+ OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
+ SrcSize);
break;
}
case AMDGPU::G_FCMP: {
@@ -2790,43 +3736,43 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_STORE: {
assert(MI.getOperand(0).isReg());
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- // FIXME: We need to specify a different reg bank once scalar stores
- // are supported.
+
+ // FIXME: We need to specify a different reg bank once scalar stores are
+ // supported.
const ValueMapping *ValMapping =
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
- // FIXME: Depending on the type of store, the pointer could be in
- // the SGPR Reg bank.
- // FIXME: Pointer size should be based on the address space.
- const ValueMapping *PtrMapping =
- AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
-
OpdsMapping[0] = ValMapping;
- OpdsMapping[1] = PtrMapping;
+ OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg());
break;
}
-
case AMDGPU::G_ICMP: {
auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+
+ // See if the result register has already been constrained to vcc, which may
+ // happen due to control flow intrinsic lowering.
+ unsigned DstBank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
- bool CanUseSCC = Op2Bank == AMDGPU::SGPRRegBankID &&
+ bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
+ Op2Bank == AMDGPU::SGPRRegBankID &&
Op3Bank == AMDGPU::SGPRRegBankID &&
(Size == 32 || (Size == 64 &&
(Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
Subtarget.hasScalarCompareEq64()));
- unsigned Op0Bank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
+ DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
+ unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
// TODO: Use 32-bit for scalar output size.
// SCC results will need to be copied to a 32-bit SGPR virtual register.
const unsigned ResultSize = 1;
- OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, ResultSize);
- OpdsMapping[1] = nullptr; // Predicate Operand.
- OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
- OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
+ OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, Size);
+ OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank, Size);
break;
}
case AMDGPU::G_EXTRACT_VECTOR_ELT: {
@@ -2852,15 +3798,22 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
- unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
unsigned InsertEltBankID = getRegBankID(MI.getOperand(2).getReg(),
MRI, *TRI);
unsigned IdxBankID = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
- OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, VecSize);
- OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(InsertEltBankID,
- InsertSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
+
+ // This is a weird case, because we need to break down the mapping based on
+ // the register bank of a different operand.
+ if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
+ OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
+ InsertSize);
+ } else {
+ assert(InsertSize == 32 || InsertSize == 64);
+ OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
+ }
// The index can be either if the source vector is VGPR.
OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
@@ -2878,6 +3831,116 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
+ case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
+ case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
+ case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
+ case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
+ case AMDGPU::G_AMDGPU_BUFFER_STORE:
+ case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
+ case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
+ case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
+ case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+
+ // rsrc
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+
+ // vindex
+ OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+
+ // voffset
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+
+ // soffset
+ OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+
+ // Any remaining operands are immediates and were correctly null
+ // initialized.
+ break;
+ }
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
+ // vdata_out
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+
+ // vdata_in
+ OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+
+ // rsrc
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+
+ // vindex
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+
+ // voffset
+ OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+
+ // soffset
+ OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
+
+ // Any remaining operands are immediates and were correctly null
+ // initialized.
+ break;
+ }
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
+ // vdata_out
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+
+ // vdata_in
+ OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+
+ // cmp
+ OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+
+ // rsrc
+ OpdsMapping[3] = getSGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+
+ // vindex
+ OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+
+ // voffset
+ OpdsMapping[5] = getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
+
+ // soffset
+ OpdsMapping[6] = getSGPROpMapping(MI.getOperand(6).getReg(), MRI, *TRI);
+
+ // Any remaining operands are immediates and were correctly null
+ // initialized.
+ break;
+ }
+ case AMDGPU::G_AMDGPU_S_BUFFER_LOAD: {
+ // Lie and claim everything is legal, even though some need to be
+ // SGPRs. applyMapping will have to deal with it as a waterfall loop.
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+
+ // We need to convert this to a MUBUF if either the resource of offset is
+ // VGPR.
+ unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
+ unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
+ unsigned ResultBank = regBankUnion(RSrcBank, OffsetBank);
+
+ unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
+ break;
+ }
case AMDGPU::G_INTRINSIC: {
switch (MI.getIntrinsicID()) {
default:
@@ -2890,9 +3953,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_log_clamp:
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_sqrt:
case Intrinsic::amdgcn_rsq:
case Intrinsic::amdgcn_rsq_legacy:
case Intrinsic::amdgcn_rsq_clamp:
+ case Intrinsic::amdgcn_fmul_legacy:
case Intrinsic::amdgcn_ldexp:
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_frexp_exp:
@@ -2911,8 +3976,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_fmad_ftz:
case Intrinsic::amdgcn_mbcnt_lo:
case Intrinsic::amdgcn_mbcnt_hi:
- case Intrinsic::amdgcn_ubfe:
- case Intrinsic::amdgcn_sbfe:
case Intrinsic::amdgcn_mul_u24:
case Intrinsic::amdgcn_mul_i24:
case Intrinsic::amdgcn_lerp:
@@ -2933,13 +3996,21 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_udot4:
case Intrinsic::amdgcn_sdot8:
case Intrinsic::amdgcn_udot8:
- case Intrinsic::amdgcn_wwm:
- case Intrinsic::amdgcn_wqm:
+ return getDefaultMappingVOP(MI);
+ case Intrinsic::amdgcn_sbfe:
+ case Intrinsic::amdgcn_ubfe:
+ if (isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
return getDefaultMappingVOP(MI);
case Intrinsic::amdgcn_ds_swizzle:
case Intrinsic::amdgcn_ds_permute:
case Intrinsic::amdgcn_ds_bpermute:
case Intrinsic::amdgcn_update_dpp:
+ case Intrinsic::amdgcn_mov_dpp8:
+ case Intrinsic::amdgcn_mov_dpp:
+ case Intrinsic::amdgcn_wwm:
+ case Intrinsic::amdgcn_wqm:
+ case Intrinsic::amdgcn_softwqm:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_kernarg_segment_ptr:
case Intrinsic::amdgcn_s_getpc:
@@ -2954,26 +4025,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
= AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);
break;
}
- case Intrinsic::amdgcn_s_buffer_load: {
- // FIXME: This should be moved to G_INTRINSIC_W_SIDE_EFFECTS
- Register RSrc = MI.getOperand(2).getReg(); // SGPR
- Register Offset = MI.getOperand(3).getReg(); // SGPR/imm
-
- unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
- unsigned Size3 = MRI.getType(Offset).getSizeInBits();
-
- unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
- unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
-
- OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size0);
- OpdsMapping[1] = nullptr; // intrinsic id
-
- // Lie and claim everything is legal, even though some need to be
- // SGPRs. applyMapping will have to deal with it as a waterfall loop.
- OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
- OpdsMapping[3] = AMDGPU::getValueMapping(OffsetBank, Size3);
- OpdsMapping[4] = nullptr;
+ case Intrinsic::amdgcn_ps_live: {
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
break;
}
case Intrinsic::amdgcn_div_scale: {
@@ -2983,11 +4036,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
unsigned SrcSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
- OpdsMapping[3] = AMDGPU::getValueMapping(
- getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI), SrcSize);
- OpdsMapping[4] = AMDGPU::getValueMapping(
- getRegBankID(MI.getOperand(4).getReg(), MRI, *TRI), SrcSize);
-
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
+ OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
break;
}
case Intrinsic::amdgcn_class: {
@@ -2997,10 +4047,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned Src1Size = MRI.getType(Src1Reg).getSizeInBits();
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
- OpdsMapping[2] = AMDGPU::getValueMapping(getRegBankID(Src0Reg, MRI, *TRI),
- Src0Size);
- OpdsMapping[3] = AMDGPU::getValueMapping(getRegBankID(Src1Reg, MRI, *TRI),
- Src1Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
break;
}
case Intrinsic::amdgcn_icmp:
@@ -3009,10 +4057,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// This is not VCCRegBank because this is not used in boolean contexts.
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
- unsigned Op1Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
- unsigned Op2Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
- OpdsMapping[2] = AMDGPU::getValueMapping(Op1Bank, OpSize);
- OpdsMapping[3] = AMDGPU::getValueMapping(Op2Bank, OpSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
break;
}
case Intrinsic::amdgcn_readlane: {
@@ -3054,6 +4100,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
+ case Intrinsic::amdgcn_permlane16:
+ case Intrinsic::amdgcn_permlanex16: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[4] = getSGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[5] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ break;
+ }
case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
@@ -3086,9 +4142,46 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[4] = getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
+ case Intrinsic::amdgcn_interp_p1:
+ case Intrinsic::amdgcn_interp_p2:
+ case Intrinsic::amdgcn_interp_mov:
+ case Intrinsic::amdgcn_interp_p1_f16:
+ case Intrinsic::amdgcn_interp_p2_f16: {
+ const int M0Idx = MI.getNumOperands() - 1;
+ Register M0Reg = MI.getOperand(M0Idx).getReg();
+ unsigned M0Bank = getRegBankID(M0Reg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+ for (int I = 2; I != M0Idx && MI.getOperand(I).isReg(); ++I)
+ OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+
+ // Must be SGPR, but we must take whatever the original bank is and fix it
+ // later.
+ OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
+ break;
+ }
+ case Intrinsic::amdgcn_ballot: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
+ break;
+ }
}
break;
}
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: {
+ auto IntrID = MI.getIntrinsicID();
+ const AMDGPU::RsrcIntrinsic *RSrcIntrin = AMDGPU::lookupRsrcIntrinsic(IntrID);
+ assert(RSrcIntrin && "missing RsrcIntrinsic for image intrinsic");
+ // Non-images can have complications from operands that allow both SGPR
+ // and VGPR. For now it's too complicated to figure out the final opcode
+ // to derive the register bank from the MCInstrDesc.
+ assert(RSrcIntrin->IsImage);
+ return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
+ }
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
auto IntrID = MI.getIntrinsicID();
switch (IntrID) {
@@ -3100,13 +4193,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
- case Intrinsic::amdgcn_ds_append:
- case Intrinsic::amdgcn_ds_consume:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax:
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
@@ -3118,17 +4207,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
break;
}
+ case Intrinsic::amdgcn_ds_append:
+ case Intrinsic::amdgcn_ds_consume: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
+ }
case Intrinsic::amdgcn_exp_compr:
- OpdsMapping[0] = nullptr; // IntrinsicID
- // FIXME: These are immediate values which can't be read from registers.
- OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
- OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
- // FIXME: Could we support packed types here?
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
- // FIXME: These are immediate values which can't be read from registers.
- OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
- OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break;
case Intrinsic::amdgcn_exp:
// FIXME: Could we support packed types here?
@@ -3137,31 +4225,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
break;
- case Intrinsic::amdgcn_buffer_load: {
- Register RSrc = MI.getOperand(2).getReg(); // SGPR
- Register VIndex = MI.getOperand(3).getReg(); // VGPR
- Register Offset = MI.getOperand(4).getReg(); // SGPR/VGPR/imm
-
- unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
- unsigned Size3 = MRI.getType(VIndex).getSizeInBits();
- unsigned Size4 = MRI.getType(Offset).getSizeInBits();
-
- unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
- unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
-
- OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
- OpdsMapping[1] = nullptr; // intrinsic id
-
- // Lie and claim everything is legal, even though some need to be
- // SGPRs. applyMapping will have to deal with it as a waterfall loop.
- OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
- OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size3);
- OpdsMapping[4] = AMDGPU::getValueMapping(OffsetBank, Size4);
- OpdsMapping[5] = nullptr;
- OpdsMapping[6] = nullptr;
- break;
- }
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
// This must be an SGPR, but accept a VGPR.
@@ -3170,8 +4233,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
- case Intrinsic::amdgcn_end_cf:
- case Intrinsic::amdgcn_init_exec: {
+ case Intrinsic::amdgcn_s_setreg: {
+ // This must be an SGPR, but accept a VGPR.
+ unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
+ break;
+ }
+ case Intrinsic::amdgcn_end_cf: {
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
@@ -3227,7 +4296,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_init_exec_from_input: {
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
- OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
case Intrinsic::amdgcn_ds_gws_init:
@@ -3251,15 +4319,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
default:
- if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
- AMDGPU::lookupRsrcIntrinsic(IntrID)) {
- // Non-images can have complications from operands that allow both SGPR
- // and VGPR. For now it's too complicated to figure out the final opcode
- // to derive the register bank from the MCInstrDesc.
- if (RSrcIntrin->IsImage)
- return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
- }
-
return getInvalidInstructionMapping();
}
break;
@@ -3319,9 +4378,20 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_ATOMICRMW_UMAX:
case AMDGPU::G_ATOMICRMW_UMIN:
case AMDGPU::G_ATOMICRMW_FADD:
- case AMDGPU::G_ATOMIC_CMPXCHG:
- case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
- return getDefaultMappingAllVGPR(MI);
+ case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
+ case AMDGPU::G_AMDGPU_ATOMIC_INC:
+ case AMDGPU::G_AMDGPU_ATOMIC_DEC: {
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg());
+ OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
+ }
+ case AMDGPU::G_ATOMIC_CMPXCHG: {
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg());
+ OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ break;
}
case AMDGPU::G_BRCOND: {
unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 1ac7d3652a8b..8f38ec4eeb3a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -69,13 +69,20 @@ public:
void constrainOpWithReadfirstlane(MachineInstr &MI, MachineRegisterInfo &MRI,
unsigned OpIdx) const;
- bool applyMappingWideLoad(MachineInstr &MI,
- const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
- MachineRegisterInfo &MRI) const;
+ bool applyMappingDynStackAlloc(MachineInstr &MI,
+ const OperandsMapper &OpdMapper,
+ MachineRegisterInfo &MRI) const;
+ bool applyMappingLoad(MachineInstr &MI,
+ const OperandsMapper &OpdMapper,
+ MachineRegisterInfo &MRI) const;
bool
applyMappingImage(MachineInstr &MI,
- const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+ const OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI, int RSrcIdx) const;
+ bool applyMappingSBufferLoad(const OperandsMapper &OpdMapper) const;
+
+ bool applyMappingBFEIntrinsic(const OperandsMapper &OpdMapper,
+ bool Signed) const;
void lowerScalarMinMax(MachineIRBuilder &B, MachineInstr &MI) const;
@@ -91,6 +98,9 @@ public:
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+ const ValueMapping *getValueMappingForPtr(const MachineRegisterInfo &MRI,
+ Register Ptr) const;
+
const RegisterBankInfo::InstructionMapping &
getInstrMappingForLoad(const MachineInstr &MI) const;
@@ -168,6 +178,15 @@ public:
const InstructionMapping &
getInstrMapping(const MachineInstr &MI) const override;
+
+private:
+
+ bool foldExtractEltToCmpSelect(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ const OperandsMapper &OpdMapper) const;
+ bool foldInsertEltToCmpSelect(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ const OperandsMapper &OpdMapper) const;
};
} // End llvm namespace.
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index c495316c5bce..9f6ebd00cd97 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -7,16 +7,16 @@
//===----------------------------------------------------------------------===//
def SGPRRegBank : RegisterBank<"SGPR",
- [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512, SReg_1024]
+ [SReg_LO16, SReg_32, SReg_64, SReg_128, SReg_160, SReg_192, SReg_256, SReg_512, SReg_1024]
>;
def VGPRRegBank : RegisterBank<"VGPR",
- [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512, VReg_1024]
+ [VGPR_LO16, VGPR_HI16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_256, VReg_512, VReg_1024]
>;
// It is helpful to distinguish conditions from ordinary SGPRs.
def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
def AGPRRegBank : RegisterBank <"AGPR",
- [AGPR_32, AReg_64, AReg_128, AReg_512, AReg_1024]
+ [AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_256, AReg_512, AReg_1024]
>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
deleted file mode 100644
index 9806e6b0714f..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Parent TargetRegisterInfo class common to all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
-#include "SIMachineFunctionInfo.h"
-#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-
-using namespace llvm;
-
-AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
-
-//===----------------------------------------------------------------------===//
-// Function handling callbacks - Functions are a seldom used feature of GPUS, so
-// they are not supported at this time.
-//===----------------------------------------------------------------------===//
-
-// Table of NumRegs sized pieces at every 32-bit offset.
-static const uint16_t SubRegFromChannelTable[][32] = {
- { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
- AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
- AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
- AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
- AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
- AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
- AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31
- },
- {
- AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, AMDGPU::sub3_sub4,
- AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, AMDGPU::sub6_sub7, AMDGPU::sub7_sub8,
- AMDGPU::sub8_sub9, AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12,
- AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, AMDGPU::sub15_sub16,
- AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, AMDGPU::sub18_sub19, AMDGPU::sub19_sub20,
- AMDGPU::sub20_sub21, AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24,
- AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, AMDGPU::sub27_sub28,
- AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, AMDGPU::sub30_sub31, AMDGPU::NoSubRegister
- },
- {
- AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5,
- AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9,
- AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13,
- AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17,
- AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21,
- AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25,
- AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29,
- AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
- },
- {
- AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6,
- AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10,
- AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14,
- AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18,
- AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22,
- AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26,
- AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30,
- AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
- }
-};
-
-// FIXME: TableGen should generate something to make this manageable for all
-// register classes. At a minimum we could use the opposite of
-// composeSubRegIndices and go up from the base 32-bit subreg.
-unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel, unsigned NumRegs) {
- const unsigned NumRegIndex = NumRegs - 1;
-
- assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) &&
- "Not implemented");
- assert(Channel < array_lengthof(SubRegFromChannelTable[0]));
- return SubRegFromChannelTable[NumRegIndex][Channel];
-}
-
-void AMDGPURegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
- MCRegAliasIterator R(Reg, this, true);
-
- for (; R.isValid(); ++R)
- Reserved.set(*R);
-}
-
-#define GET_REGINFO_TARGET_DESC
-#include "AMDGPUGenRegisterInfo.inc"
-
-// Forced to be here by one .inc
-const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
- const MachineFunction *MF) const {
- CallingConv::ID CC = MF->getFunction().getCallingConv();
- switch (CC) {
- case CallingConv::C:
- case CallingConv::Fast:
- case CallingConv::Cold:
- return CSR_AMDGPU_HighRegs_SaveList;
- default: {
- // Dummy to not crash RegisterClassInfo.
- static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
- return &NoCalleeSavedReg;
- }
- }
-}
-
-const MCPhysReg *
-SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
- return nullptr;
-}
-
-const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
- CallingConv::ID CC) const {
- switch (CC) {
- case CallingConv::C:
- case CallingConv::Fast:
- case CallingConv::Cold:
- return CSR_AMDGPU_HighRegs_RegMask;
- default:
- return nullptr;
- }
-}
-
-Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const SIFrameLowering *TFI =
- MF.getSubtarget<GCNSubtarget>().getFrameLowering();
- const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
- : FuncInfo->getStackPtrOffsetReg();
-}
-
-const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const {
- return CSR_AMDGPU_AllVGPRs_RegMask;
-}
-
-const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
- return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
deleted file mode 100644
index 9e713ca804a1..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// TargetRegisterInfo interface that is implemented by all hw codegen
-/// targets.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERINFO_H
-#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERINFO_H
-
-#define GET_REGINFO_HEADER
-#include "AMDGPUGenRegisterInfo.inc"
-
-namespace llvm {
-
-class GCNSubtarget;
-class TargetInstrInfo;
-
-struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
- AMDGPURegisterInfo();
-
- /// \returns the sub reg enum value for the given \p Channel
- /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
- static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
-
- void reserveRegisterTuples(BitVector &, unsigned Reg) const;
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td
deleted file mode 100644
index ab71b7aa8a57..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Tablegen register definitions common to all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-let Namespace = "AMDGPU" in {
-
-foreach Index = 0-31 in {
- def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
-}
-
-}
-
-include "SIRegisterInfo.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 9a1e2fc42ed5..9c3d96de6d68 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -208,8 +208,8 @@ bool AMDGPURewriteOutArguments::doInitialization(Module &M) {
#ifndef NDEBUG
bool AMDGPURewriteOutArguments::isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const {
- VectorType *VT0 = dyn_cast<VectorType>(Ty0);
- VectorType *VT1 = dyn_cast<VectorType>(Ty1);
+ auto *VT0 = dyn_cast<FixedVectorType>(Ty0);
+ auto *VT1 = dyn_cast<FixedVectorType>(Ty1);
if (!VT0 || !VT1)
return false;
@@ -409,7 +409,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
DL->getTypeSizeInBits(Val->getType())) {
assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType()));
Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()),
- { 0, 1, 2 });
+ ArrayRef<int>{0, 1, 2});
}
Val = B.CreateBitCast(Val, EffectiveEltTy);
@@ -453,9 +453,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
PointerType *ArgType = cast<PointerType>(Arg.getType());
auto *EltTy = ArgType->getElementType();
- unsigned Align = Arg.getParamAlignment();
- if (Align == 0)
- Align = DL->getABITypeAlignment(EltTy);
+ const auto Align =
+ DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy);
Value *Val = B.CreateExtractValue(StubCall, RetIdx++);
Type *PtrTy = Val->getType()->getPointerTo(ArgType->getAddressSpace());
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 8d70536ec21c..bc68310b2f5c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -198,6 +198,7 @@ def : SourceOfDivergence<int_r600_read_tidig_y>;
def : SourceOfDivergence<int_r600_read_tidig_z>;
def : SourceOfDivergence<int_amdgcn_atomic_inc>;
def : SourceOfDivergence<int_amdgcn_atomic_dec>;
+def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
def : SourceOfDivergence<int_amdgcn_ds_fadd>;
def : SourceOfDivergence<int_amdgcn_ds_fmin>;
def : SourceOfDivergence<int_amdgcn_ds_fmax>;
@@ -238,6 +239,7 @@ def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_inc>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_dec>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
+def : SourceOfDivergence<int_amdgcn_buffer_atomic_csub>;
def : SourceOfDivergence<int_amdgcn_ps_live>;
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
@@ -247,6 +249,7 @@ def : SourceOfDivergence<int_amdgcn_permlanex16>;
def : SourceOfDivergence<int_amdgcn_mov_dpp>;
def : SourceOfDivergence<int_amdgcn_mov_dpp8>;
def : SourceOfDivergence<int_amdgcn_update_dpp>;
+def : SourceOfDivergence<int_amdgcn_writelane>;
def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x1f32>;
def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x1f32>;
@@ -270,5 +273,13 @@ def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x8i8>;
def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x2bf16>;
def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4bf16>;
+// The dummy boolean output is divergent from the IR's perspective,
+// but the mask results are uniform. These produce a divergent and
+// uniform result, so the returned struct is collectively divergent.
+// isAlwaysUniform can override the extract of the uniform component.
+def : SourceOfDivergence<int_amdgcn_if>;
+def : SourceOfDivergence<int_amdgcn_else>;
+def : SourceOfDivergence<int_amdgcn_loop>;
+
foreach intr = AMDGPUImageDimAtomicIntrinsics in
def : SourceOfDivergence<intr>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 445e91092499..213788ae0f67 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -59,13 +59,6 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
- // FIXME: I don't think think Evergreen has any useful support for
- // denormals, but should be checked. Should we issue a warning somewhere
- // if someone tries to enable these?
- if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- FP32Denormals = false;
- }
-
HasMulU24 = getGeneration() >= EVERGREEN;
HasMulI24 = hasCaymanISA();
@@ -76,9 +69,6 @@ GCNSubtarget &
GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
// Determine default and user-specified characteristics
- // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
- // enabled, but some instructions do not respect them and they run at the
- // double precision rate, so don't enable by default.
//
// We want to be able to turn these off, but making this a subtarget feature
// for SI has the unhelpful behavior that it unsets everything else if you
@@ -88,20 +78,11 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
// unset everything else if it is disabled
// Assuming ECC is enabled is the conservative default.
- SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,+xnack,");
+ SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
- // FIXME: I don't think think Evergreen has any useful support for
- // denormals, but should be checked. Should we issue a warning somewhere
- // if someone tries to enable these?
- if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- FullFS += "+fp64-fp16-denormals,";
- } else {
- FullFS += "-fp32-denormals,";
- }
-
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
// Disable mutually exclusive bits.
@@ -145,12 +126,14 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
}
// Don't crash on invalid devices.
- if (WavefrontSize == 0)
- WavefrontSize = 64;
+ if (WavefrontSizeLog2 == 0)
+ WavefrontSizeLog2 = 5;
HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
- if (DoesNotSupportXNACK && EnableXNACK) {
+ // Disable XNACK on targets where it is not enabled by default unless it is
+ // explicitly requested.
+ if (!FS.contains("+xnack") && DoesNotSupportXNACK && EnableXNACK) {
ToggleFeature(AMDGPU::FeatureXNACK);
EnableXNACK = false;
}
@@ -170,8 +153,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
TargetTriple(TT),
Has16BitInsts(false),
HasMadMixInsts(false),
- FP32Denormals(false),
- FPExceptions(false),
+ HasMadMacF32Insts(false),
+ HasDsSrc2Insts(false),
HasSDWA(false),
HasVOP3PInsts(false),
HasMulI24(true),
@@ -182,7 +165,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
HasTrigReducedRange(false),
MaxWavesPerEU(10),
LocalMemorySize(0),
- WavefrontSize(0)
+ WavefrontSizeLog2(0)
{ }
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
@@ -196,9 +179,9 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
MaxPrivateElementSize(0),
FastFMAF32(false),
+ FastDenormalF32(false),
HalfRate64Ops(false),
- FP64FP16Denormals(false),
FlatForGlobal(false),
AutoWaitcntBeforeBarrier(false),
CodeObjectV3(false),
@@ -224,6 +207,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
GFX8Insts(false),
GFX9Insts(false),
GFX10Insts(false),
+ GFX10_3Insts(false),
GFX7GFX8GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
@@ -241,7 +225,10 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasDPP(false),
HasDPP8(false),
HasR128A16(false),
+ HasGFX10A16(false),
+ HasG16(false),
HasNSAEncoding(false),
+ GFX10_BEncoding(false),
HasDLInsts(false),
HasDot1Insts(false),
HasDot2Insts(false),
@@ -256,6 +243,8 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
DoesNotSupportSRAMECC(false),
HasNoSdstCMPX(false),
HasVscnt(false),
+ HasGetWaveIdInst(false),
+ HasSMemTimeInst(false),
HasRegisterBanking(false),
HasVOP3Literal(false),
HasNoDataDepHazard(false),
@@ -287,6 +276,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this);
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
+ InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
RegBankInfo.reset(new AMDGPURegisterBankInfo(*this));
InstSelector.reset(new AMDGPUInstructionSelector(
@@ -325,18 +315,41 @@ unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
}
+// FIXME: Should return min,max range.
unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
const Function &F) const {
- unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
- unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
- if (!WorkGroupsPerCu)
+ const unsigned MaxWorkGroupSize = getFlatWorkGroupSizes(F).second;
+ const unsigned MaxWorkGroupsPerCu = getMaxWorkGroupsPerCU(MaxWorkGroupSize);
+ if (!MaxWorkGroupsPerCu)
return 0;
- unsigned MaxWaves = getMaxWavesPerEU();
- unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
- unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
- NumWaves = std::min(NumWaves, MaxWaves);
- NumWaves = std::max(NumWaves, 1u);
- return NumWaves;
+
+ const unsigned WaveSize = getWavefrontSize();
+
+ // FIXME: Do we need to account for alignment requirement of LDS rounding the
+ // size up?
+ // Compute restriction based on LDS usage
+ unsigned NumGroups = getLocalMemorySize() / (Bytes ? Bytes : 1u);
+
+ // This can be queried with more LDS than is possible, so just assume the
+ // worst.
+ if (NumGroups == 0)
+ return 1;
+
+ NumGroups = std::min(MaxWorkGroupsPerCu, NumGroups);
+
+ // Round to the number of waves.
+ const unsigned MaxGroupNumWaves = (MaxWorkGroupSize + WaveSize - 1) / WaveSize;
+ unsigned MaxWaves = NumGroups * MaxGroupNumWaves;
+
+ // Clamp to the maximum possible number of waves.
+ MaxWaves = std::min(MaxWaves, getMaxWavesPerEU());
+
+ // FIXME: Needs to be a multiple of the group size?
+ //MaxWaves = MaxGroupNumWaves * (MaxWaves / MaxGroupNumWaves);
+
+ assert(MaxWaves > 0 && MaxWaves <= getMaxWavesPerEU() &&
+ "computed invalid occupancy");
+ return MaxWaves;
}
unsigned
@@ -396,13 +409,10 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
// number of waves per execution unit to values implied by requested
// minimum/maximum flat work group sizes.
unsigned MinImpliedByFlatWorkGroupSize =
- getMaxWavesPerEU(FlatWorkGroupSizes.second);
- bool RequestedFlatWorkGroupSize = false;
-
- if (F.hasFnAttribute("amdgpu-flat-work-group-size")) {
- Default.first = MinImpliedByFlatWorkGroupSize;
- RequestedFlatWorkGroupSize = true;
- }
+ getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second);
+ Default.first = MinImpliedByFlatWorkGroupSize;
+ bool RequestedFlatWorkGroupSize =
+ F.hasFnAttribute("amdgpu-flat-work-group-size");
// Requested minimum/maximum number of waves per execution unit.
std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
@@ -414,9 +424,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
// Make sure requested values do not violate subtarget's specifications.
if (Requested.first < getMinWavesPerEU() ||
- Requested.first > getMaxWavesPerEU())
- return Default;
- if (Requested.second > getMaxWavesPerEU())
+ Requested.second > getMaxWavesPerEU())
return Default;
// Make sure requested values are compatible with values implied by requested
@@ -497,12 +505,12 @@ uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
const DataLayout &DL = F.getParent()->getDataLayout();
uint64_t ExplicitArgBytes = 0;
- MaxAlign = Align::None();
+ MaxAlign = Align(1);
for (const Argument &Arg : F.args()) {
Type *ArgTy = Arg.getType();
- const Align Alignment(DL.getABITypeAlignment(ArgTy));
+ const Align Alignment = DL.getABITypeAlign(ArgTy);
uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
MaxAlign = std::max(MaxAlign, Alignment);
@@ -622,13 +630,12 @@ unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
return 2; // VCC.
}
-unsigned GCNSubtarget::computeOccupancy(const MachineFunction &MF,
- unsigned LDSSize,
+unsigned GCNSubtarget::computeOccupancy(const Function &F, unsigned LDSSize,
unsigned NumSGPRs,
unsigned NumVGPRs) const {
unsigned Occupancy =
std::min(getMaxWavesPerEU(),
- getOccupancyWithLocalMemSize(LDSSize, MF.getFunction()));
+ getOccupancyWithLocalMemSize(LDSSize, F));
if (NumSGPRs)
Occupancy = std::min(Occupancy, getOccupancyWithNumSGPRs(NumSGPRs));
if (NumVGPRs)
@@ -716,20 +723,20 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
return MaxNumVGPRs;
}
-void GCNSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
- SDep &Dep) const {
+void GCNSubtarget::adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use,
+ int UseOpIdx, SDep &Dep) const {
if (Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
- !Src->isInstr() || !Dst->isInstr())
+ !Def->isInstr() || !Use->isInstr())
return;
- MachineInstr *SrcI = Src->getInstr();
- MachineInstr *DstI = Dst->getInstr();
+ MachineInstr *DefI = Def->getInstr();
+ MachineInstr *UseI = Use->getInstr();
- if (SrcI->isBundle()) {
+ if (DefI->isBundle()) {
const SIRegisterInfo *TRI = getRegisterInfo();
auto Reg = Dep.getReg();
- MachineBasicBlock::const_instr_iterator I(SrcI->getIterator());
- MachineBasicBlock::const_instr_iterator E(SrcI->getParent()->instr_end());
+ MachineBasicBlock::const_instr_iterator I(DefI->getIterator());
+ MachineBasicBlock::const_instr_iterator E(DefI->getParent()->instr_end());
unsigned Lat = 0;
for (++I; I != E && I->isBundledWithPred(); ++I) {
if (I->modifiesRegister(Reg, TRI))
@@ -738,12 +745,12 @@ void GCNSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
--Lat;
}
Dep.setLatency(Lat);
- } else if (DstI->isBundle()) {
+ } else if (UseI->isBundle()) {
const SIRegisterInfo *TRI = getRegisterInfo();
auto Reg = Dep.getReg();
- MachineBasicBlock::const_instr_iterator I(DstI->getIterator());
- MachineBasicBlock::const_instr_iterator E(DstI->getParent()->instr_end());
- unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI);
+ MachineBasicBlock::const_instr_iterator I(UseI->getIterator());
+ MachineBasicBlock::const_instr_iterator E(UseI->getParent()->instr_end());
+ unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *DefI);
for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
if (I->readsRegister(Reg, TRI))
break;
@@ -754,53 +761,6 @@ void GCNSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
}
namespace {
-struct MemOpClusterMutation : ScheduleDAGMutation {
- const SIInstrInfo *TII;
-
- MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {}
-
- void apply(ScheduleDAGInstrs *DAG) override {
- SUnit *SUa = nullptr;
- // Search for two consequent memory operations and link them
- // to prevent scheduler from moving them apart.
- // In DAG pre-process SUnits are in the original order of
- // the instructions before scheduling.
- for (SUnit &SU : DAG->SUnits) {
- MachineInstr &MI2 = *SU.getInstr();
- if (!MI2.mayLoad() && !MI2.mayStore()) {
- SUa = nullptr;
- continue;
- }
- if (!SUa) {
- SUa = &SU;
- continue;
- }
-
- MachineInstr &MI1 = *SUa->getInstr();
- if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) ||
- (TII->isFLAT(MI1) && TII->isFLAT(MI2)) ||
- (TII->isSMRD(MI1) && TII->isSMRD(MI2)) ||
- (TII->isDS(MI1) && TII->isDS(MI2))) {
- SU.addPredBarrier(SUa);
-
- for (const SDep &SI : SU.Preds) {
- if (SI.getSUnit() != SUa)
- SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial));
- }
-
- if (&SU != &DAG->ExitSU) {
- for (const SDep &SI : SUa->Succs) {
- if (SI.getSUnit() != &SU)
- SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial));
- }
- }
- }
-
- SUa = &SU;
- }
- }
-};
-
struct FillMFMAShadowMutation : ScheduleDAGMutation {
const SIInstrInfo *TII;
@@ -927,7 +887,6 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
void GCNSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
- Mutations.push_back(std::make_unique<MemOpClusterMutation>(&InstrInfo));
Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 19a240800ba1..c833bfbcf936 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -16,6 +16,7 @@
#include "AMDGPU.h"
#include "AMDGPUCallLowering.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600FrameLowering.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
@@ -24,6 +25,7 @@
#include "SIInstrInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
@@ -65,8 +67,8 @@ private:
protected:
bool Has16BitInsts;
bool HasMadMixInsts;
- bool FP32Denormals;
- bool FPExceptions;
+ bool HasMadMacF32Insts;
+ bool HasDsSrc2Insts;
bool HasSDWA;
bool HasVOP3PInsts;
bool HasMulI24;
@@ -77,7 +79,7 @@ protected:
bool HasTrigReducedRange;
unsigned MaxWavesPerEU;
int LocalMemorySize;
- unsigned WavefrontSize;
+ char WavefrontSizeLog2;
public:
AMDGPUSubtarget(const Triple &TT);
@@ -140,6 +142,10 @@ public:
return isAmdHsaOS() || isMesaKernel(F);
}
+ bool isGCN() const {
+ return TargetTriple.getArch() == Triple::amdgcn;
+ }
+
bool has16BitInsts() const {
return Has16BitInsts;
}
@@ -148,17 +154,12 @@ public:
return HasMadMixInsts;
}
- bool hasFP32Denormals(const Function &F) const {
- // FIXME: This should not be a property of the subtarget. This should be a
- // property with a default set by the calling convention which can be
- // overridden by attributes. For now, use the subtarget feature as a
- // placeholder attribute. The function arguments only purpose is to
- // discourage use without a function context until this is removed.
- return FP32Denormals;
+ bool hasMadMacF32Insts() const {
+ return HasMadMacF32Insts || !isGCN();
}
- bool hasFPExceptions() const {
- return FPExceptions;
+ bool hasDsSrc2Insts() const {
+ return HasDsSrc2Insts;
}
bool hasSDWA() const {
@@ -194,7 +195,11 @@ public:
}
unsigned getWavefrontSize() const {
- return WavefrontSize;
+ return 1 << WavefrontSizeLog2;
+ }
+
+ unsigned getWavefrontSizeLog2() const {
+ return WavefrontSizeLog2;
}
int getLocalMemorySize() const {
@@ -221,9 +226,10 @@ public:
/// \returns Maximum flat work group size supported by the subtarget.
virtual unsigned getMaxFlatWorkGroupSize() const = 0;
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
+ /// \returns Number of waves per execution unit required to support the given
+ /// \p FlatWorkGroupSize.
+ virtual unsigned
+ getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
@@ -246,6 +252,13 @@ public:
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
+ /// \returns Corresponsing DWARF register number mapping flavour for the
+ /// \p WavefrontSize.
+ AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const {
+ return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32
+ : AMDGPUDwarfFlavour::Wave64;
+ }
+
virtual ~AMDGPUSubtarget() {}
};
@@ -278,6 +291,7 @@ public:
private:
/// GlobalISel related APIs.
std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+ std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
@@ -292,10 +306,10 @@ protected:
// Possibly statically set by tablegen, but may want to be overridden.
bool FastFMAF32;
+ bool FastDenormalF32;
bool HalfRate64Ops;
// Dynamially set bits that enable features.
- bool FP64FP16Denormals;
bool FlatForGlobal;
bool AutoWaitcntBeforeBarrier;
bool CodeObjectV3;
@@ -325,6 +339,7 @@ protected:
bool GFX8Insts;
bool GFX9Insts;
bool GFX10Insts;
+ bool GFX10_3Insts;
bool GFX7GFX8GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
@@ -342,7 +357,10 @@ protected:
bool HasDPP;
bool HasDPP8;
bool HasR128A16;
+ bool HasGFX10A16;
+ bool HasG16;
bool HasNSAEncoding;
+ bool GFX10_BEncoding;
bool HasDLInsts;
bool HasDot1Insts;
bool HasDot2Insts;
@@ -357,6 +375,8 @@ protected:
bool DoesNotSupportSRAMECC;
bool HasNoSdstCMPX;
bool HasVscnt;
+ bool HasGetWaveIdInst;
+ bool HasSMemTimeInst;
bool HasRegisterBanking;
bool HasVOP3Literal;
bool HasNoDataDepHazard;
@@ -426,6 +446,10 @@ public:
return CallLoweringInfo.get();
}
+ const InlineAsmLowering *getInlineAsmLowering() const override {
+ return InlineAsmLoweringInfo.get();
+ }
+
InstructionSelector *getInstructionSelector() const override {
return InstSelector.get();
}
@@ -453,10 +477,6 @@ public:
return (Generation)Gen;
}
- unsigned getWavefrontSizeLog2() const {
- return Log2_32(WavefrontSize);
- }
-
/// Return the number of high bits known to be zero fror a frame index.
unsigned getKnownHighZeroBitsForFrameIndex() const {
return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
@@ -506,6 +526,10 @@ public:
return getGeneration() >= VOLCANIC_ISLANDS;
}
+ bool hasFractBug() const {
+ return getGeneration() == SOUTHERN_ISLANDS;
+ }
+
bool hasBFE() const {
return true;
}
@@ -587,6 +611,11 @@ public:
return getGeneration() <= SEA_ISLANDS;
}
+ /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
+ bool partialVCCWritesUpdateVCCZ() const {
+ return getGeneration() >= GFX10;
+ }
+
/// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
/// was written by a VALU instruction.
bool hasSMRDReadVALUDefHazard() const {
@@ -617,20 +646,6 @@ public:
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
const Function &) const;
- /// Alias for hasFP64FP16Denormals
- bool hasFP16Denormals(const Function &F) const {
- return FP64FP16Denormals;
- }
-
- /// Alias for hasFP64FP16Denormals
- bool hasFP64Denormals(const Function &F) const {
- return FP64FP16Denormals;
- }
-
- bool hasFP64FP16Denormals(const Function &F) const {
- return FP64FP16Denormals;
- }
-
bool supportsMinMaxDenormModes() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
@@ -724,6 +739,18 @@ public:
return ScalarFlatScratchInsts;
}
+ bool hasGlobalAddTidInsts() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasAtomicCSub() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasMultiDwordFlatScratchAddressing() const {
+ return getGeneration() >= GFX9;
+ }
+
bool hasFlatSegmentOffsetBug() const {
return HasFlatSegmentOffsetBug;
}
@@ -853,6 +880,14 @@ public:
return HasVscnt;
}
+ bool hasGetWaveIdInst() const {
+ return HasGetWaveIdInst;
+ }
+
+ bool hasSMemTimeInst() const {
+ return HasSMemTimeInst;
+ }
+
bool hasRegisterBanking() const {
return HasRegisterBanking;
}
@@ -890,30 +925,6 @@ public:
void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
- /// \returns Number of execution units per compute unit supported by the
- /// subtarget.
- unsigned getEUsPerCU() const {
- return AMDGPU::IsaInfo::getEUsPerCU(this);
- }
-
- /// \returns Maximum number of waves per compute unit supported by the
- /// subtarget without any kind of limitation.
- unsigned getMaxWavesPerCU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
- }
-
- /// \returns Maximum number of waves per compute unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
- }
-
- /// \returns Number of waves per work group supported by the subtarget and
- /// limited by given \p FlatWorkGroupSize.
- unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
- }
-
// static wrappers
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
@@ -979,6 +990,14 @@ public:
return HasR128A16;
}
+ bool hasGFX10A16() const {
+ return HasGFX10A16;
+ }
+
+ bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
+
+ bool hasG16() const { return HasG16; }
+
bool hasOffset3fBug() const {
return HasOffset3fBug;
}
@@ -987,6 +1006,14 @@ public:
return HasNSAEncoding;
}
+ bool hasGFX10_BEncoding() const {
+ return GFX10_BEncoding;
+ }
+
+ bool hasGFX10_3Insts() const {
+ return GFX10_3Insts;
+ }
+
bool hasMadF16() const;
bool enableSIScheduler() const {
@@ -1059,6 +1086,8 @@ public:
return HasNSAtoVMEMBug;
}
+ bool hasHardClauses() const { return getGeneration() >= GFX10; }
+
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
@@ -1071,7 +1100,7 @@ public:
/// registers if provided.
/// Note, occupancy can be affected by the scratch allocation as well, but
/// we do not have enough information to compute it.
- unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0,
+ unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
/// \returns true if the flat_scratch register should be initialized with the
@@ -1178,7 +1207,7 @@ public:
const override;
bool isWave32() const {
- return WavefrontSize == 32;
+ return getWavefrontSize() == 32;
}
const TargetRegisterClass *getBoolRC() const {
@@ -1201,10 +1230,11 @@ public:
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
}
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ /// \returns Number of waves per execution unit required to support the given
+ /// \p FlatWorkGroupSize.
+ unsigned
+ getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
}
/// \returns Minimum number of waves per execution unit supported by the
@@ -1213,7 +1243,8 @@ public:
return AMDGPU::IsaInfo::getMinWavesPerEU(this);
}
- void adjustSchedDependency(SUnit *Src, SUnit *Dst, SDep &Dep) const override;
+ void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
+ SDep &Dep) const override;
};
class R600Subtarget final : public R600GenSubtargetInfo,
@@ -1338,10 +1369,11 @@ public:
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
}
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ /// \returns Number of waves per execution unit required to support the given
+ /// \p FlatWorkGroupSize.
+ unsigned
+ getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
}
/// \returns Minimum number of waves per execution unit supported by the
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c8dc6f6e3bf4..b4b10835837c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -16,6 +16,7 @@
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCallLowering.h"
+#include "AMDGPUExportClustering.h"
#include "AMDGPUInstructionSelector.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPUMacroFusion.h"
@@ -23,6 +24,7 @@
#include "AMDGPUTargetTransformInfo.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600MachineScheduler.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
@@ -30,6 +32,7 @@
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/Passes.h"
@@ -138,6 +141,13 @@ static cl::opt<bool, true> EnableAMDGPUFunctionCallsOpt(
cl::init(true),
cl::Hidden);
+static cl::opt<bool, true> EnableAMDGPUFixedFunctionABIOpt(
+ "amdgpu-fixed-function-abi",
+ cl::desc("Enable all implicit function arguments"),
+ cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI),
+ cl::init(false),
+ cl::Hidden);
+
// Enable lib calls simplifications
static cl::opt<bool> EnableLibCallSimplify(
"amdgpu-simplify-libcall",
@@ -183,6 +193,11 @@ static cl::opt<bool> EnableScalarIRPasses(
cl::init(true),
cl::Hidden);
+static cl::opt<bool> EnableStructurizerWorkarounds(
+ "amdgpu-enable-structurizer-workarounds",
+ cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
+ cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -217,22 +232,29 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPULowerKernelAttributesPass(*PR);
initializeAMDGPULowerIntrinsicsPass(*PR);
initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
+ initializeAMDGPUPostLegalizerCombinerPass(*PR);
+ initializeAMDGPUPreLegalizerCombinerPass(*PR);
initializeAMDGPUPromoteAllocaPass(*PR);
+ initializeAMDGPUPromoteAllocaToVectorPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
initializeAMDGPUPropagateAttributesEarlyPass(*PR);
initializeAMDGPUPropagateAttributesLatePass(*PR);
initializeAMDGPURewriteOutArgumentsPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
+ initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
initializeSIModeRegisterPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
+ initializeSIRemoveShortExecBranchesPass(*PR);
+ initializeSIPreEmitPeepholePass(*PR);
initializeSIInsertSkipsPass(*PR);
initializeSIMemoryLegalizerPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
initializeSIPreAllocateWWMRegsPass(*PR);
initializeSIFormMemoryClausesPass(*PR);
+ initializeSIPostRABundlerPass(*PR);
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
initializeAMDGPUAAWrapperPassPass(*PR);
initializeAMDGPUExternalAAWrapperPass(*PR);
@@ -242,6 +264,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUPrintfRuntimeBindingPass(*PR);
initializeGCNRegBankReassignPass(*PR);
initializeGCNNSAReassignPass(*PR);
+ initializeSIAddIMGInitPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -263,6 +286,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
+ DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
return DAG;
}
@@ -362,10 +386,17 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
TLOF(createTLOF(getTargetTriple())) {
initAsmInfo();
+ if (TT.getArch() == Triple::amdgcn) {
+ if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize64"))
+ MRI.reset(llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour::Wave64));
+ else if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
+ MRI.reset(llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour::Wave32));
+ }
}
bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
bool AMDGPUTargetMachine::EnableFunctionCalls = false;
+bool AMDGPUTargetMachine::EnableFixedFunctionABI = false;
AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
@@ -415,20 +446,19 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
}
PM.add(createAMDGPUUnifyMetadataPass());
PM.add(createAMDGPUPrintfRuntimeBinding());
- PM.add(createAMDGPUPropagateAttributesLatePass(this));
- if (Internalize) {
+ if (Internalize)
PM.add(createInternalizePass(mustPreserveGV));
+ PM.add(createAMDGPUPropagateAttributesLatePass(this));
+ if (Internalize)
PM.add(createGlobalDCEPass());
- }
if (EarlyInline)
PM.add(createAMDGPUAlwaysInlinePass(false));
});
- const auto &Opt = Options;
Builder.addExtension(
PassManagerBuilder::EP_EarlyAsPossible,
- [AMDGPUAA, LibCallSimplify, &Opt, this](const PassManagerBuilder &,
- legacy::PassManagerBase &PM) {
+ [AMDGPUAA, LibCallSimplify, this](const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
if (AMDGPUAA) {
PM.add(createAMDGPUAAWrapperPass());
PM.add(createAMDGPUExternalAAWrapperPass());
@@ -436,12 +466,12 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
PM.add(llvm::createAMDGPUPropagateAttributesEarlyPass(this));
PM.add(llvm::createAMDGPUUseNativeCallsPass());
if (LibCallSimplify)
- PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt, this));
+ PM.add(llvm::createAMDGPUSimplifyLibCallsPass(this));
});
Builder.addExtension(
PassManagerBuilder::EP_CGSCCOptimizerLate,
- [](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ [EnableOpt](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
// Add infer address spaces pass to the opt pipeline after inlining
// but before SROA to increase SROA opportunities.
PM.add(createInferAddressSpacesPass());
@@ -449,6 +479,11 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
// This should run after inlining to have any chance of doing anything,
// and before other cleanup optimizations.
PM.add(createAMDGPULowerKernelAttributesPass());
+
+ // Promote alloca to vector before SROA and loop unroll. If we manage
+ // to eliminate allocas before unroll we may choose to unroll less.
+ if (EnableOpt)
+ PM.add(createAMDGPUPromoteAllocaToVector());
});
}
@@ -616,7 +651,9 @@ public:
bool addILPOpts() override;
bool addInstSelector() override;
bool addIRTranslator() override;
+ void addPreLegalizeMachineIR() override;
bool addLegalizeMachineIR() override;
+ void addPreRegBankSelect() override;
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
void addFastRegAlloc() override;
@@ -750,10 +787,15 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
if (EnableLoadStoreVectorizer)
addPass(createLoadStoreVectorizerPass());
+
+ // LowerSwitch pass may introduce unreachable blocks that can
+ // cause unexpected behavior for subsequent passes. Placing it
+ // here seems better that these blocks would get cleaned up by
+ // UnreachableBlockElim inserted next in the pass flow.
+ addPass(createLowerSwitchPass());
}
bool AMDGPUPassConfig::addPreISel() {
- addPass(createLowerSwitchPass());
addPass(createFlattenCFGPass());
return false;
}
@@ -835,7 +877,11 @@ bool GCNPassConfig::addPreISel() {
// regions formed by them.
addPass(&AMDGPUUnifyDivergentExitNodesID);
if (!LateCFGStructurize) {
- addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
+ if (EnableStructurizerWorkarounds) {
+ addPass(createFixIrreduciblePass());
+ addPass(createUnifyLoopExitsPass());
+ }
+ addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
}
addPass(createSinkingPass());
addPass(createAMDGPUAnnotateUniformValues());
@@ -884,6 +930,12 @@ bool GCNPassConfig::addInstSelector() {
AMDGPUPassConfig::addInstSelector();
addPass(&SIFixSGPRCopiesID);
addPass(createSILowerI1CopiesPass());
+ // TODO: We have to add FinalizeISel
+ // to expand V_ADD/SUB_U64_PSEUDO before SIFixupVectorISel
+ // that expects V_ADD/SUB -> A_ADDC/SUBB pairs expanded.
+ // Will be removed as soon as SIFixupVectorISel is changed
+ // to work with V_ADD/SUB_U64_PSEUDO instead.
+ addPass(&FinalizeISelID);
addPass(createSIFixupVectorISelPass());
addPass(createSIAddIMGInitPass());
return false;
@@ -894,11 +946,22 @@ bool GCNPassConfig::addIRTranslator() {
return false;
}
+void GCNPassConfig::addPreLegalizeMachineIR() {
+ bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
+ addPass(new Localizer());
+}
+
bool GCNPassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
}
+void GCNPassConfig::addPreRegBankSelect() {
+ bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
+}
+
bool GCNPassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
return false;
@@ -932,12 +995,9 @@ void GCNPassConfig::addFastRegAlloc() {
}
void GCNPassConfig::addOptimizedRegAlloc() {
- if (OptExecMaskPreRA) {
+ if (OptExecMaskPreRA)
insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
- insertPass(&SIOptimizeExecMaskingPreRAID, &SIFormMemoryClausesID);
- } else {
- insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
- }
+ insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
@@ -972,6 +1032,7 @@ void GCNPassConfig::addPostRegAlloc() {
}
void GCNPassConfig::addPreSched2() {
+ addPass(&SIPostRABundlerID);
}
void GCNPassConfig::addPreEmitPass() {
@@ -992,8 +1053,12 @@ void GCNPassConfig::addPreEmitPass() {
// FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
// be better for it to emit S_NOP <N> when possible.
addPass(&PostRAHazardRecognizerID);
+ if (getOptLevel() > CodeGenOpt::None)
+ addPass(&SIInsertHardClausesID);
+ addPass(&SIRemoveShortExecBranchesID);
addPass(&SIInsertSkipsPassID);
+ addPass(&SIPreEmitPeepholeID);
addPass(&BranchRelaxationPassID);
}
@@ -1022,11 +1087,13 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
MFI->initializeBaseYamlFields(YamlMFI);
- auto parseRegister = [&](const yaml::StringValue &RegName, unsigned &RegVal) {
- if (parseNamedRegisterReference(PFS, RegVal, RegName.Value, Error)) {
+ auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
+ Register TempReg;
+ if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
SourceRange = RegName.SourceRange;
return true;
}
+ RegVal = TempReg;
return false;
};
@@ -1044,7 +1111,6 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
};
if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
- parseRegister(YamlMFI.ScratchWaveOffsetReg, MFI->ScratchWaveOffsetReg) ||
parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
return true;
@@ -1054,11 +1120,6 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
}
- if (MFI->ScratchWaveOffsetReg != AMDGPU::SCRATCH_WAVE_OFFSET_REG &&
- !AMDGPU::SGPR_32RegClass.contains(MFI->ScratchWaveOffsetReg)) {
- return diagnoseRegisterClass(YamlMFI.ScratchWaveOffsetReg);
- }
-
if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
!AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
@@ -1078,7 +1139,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
return false;
if (A->IsRegister) {
- unsigned Reg;
+ Register Reg;
if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
SourceRange = A->RegisterName.SourceRange;
return true;
@@ -1152,8 +1213,10 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
- MFI->Mode.FP32Denormals = YamlMFI.Mode.FP32Denormals;
- MFI->Mode.FP64FP16Denormals = YamlMFI.Mode.FP64FP16Denormals;
+ MFI->Mode.FP32InputDenormals = YamlMFI.Mode.FP32InputDenormals;
+ MFI->Mode.FP32OutputDenormals = YamlMFI.Mode.FP32OutputDenormals;
+ MFI->Mode.FP64FP16InputDenormals = YamlMFI.Mode.FP64FP16InputDenormals;
+ MFI->Mode.FP64FP16OutputDenormals = YamlMFI.Mode.FP64FP16OutputDenormals;
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 70fa3961236f..e223fecc8819 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -39,6 +39,7 @@ protected:
public:
static bool EnableLateStructurizeCFG;
static bool EnableFunctionCalls;
+ static bool EnableFixedFunctionABI;
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, TargetOptions Options,
@@ -56,8 +57,9 @@ public:
void adjustPassManager(PassManagerBuilder &) override;
/// Get the integer value of a null pointer in the given address space.
- uint64_t getNullPointerValue(unsigned AddrSpace) const {
+ static int64_t getNullPointerValue(unsigned AddrSpace) {
return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0;
}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
index 819bebb7932d..ed564ec1ad54 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -15,9 +15,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
-#include "AMDGPU.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index c4eeb81c5133..542a5f006c0f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -69,6 +69,21 @@ static cl::opt<unsigned> UnrollThresholdIf(
cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
cl::init(150), cl::Hidden);
+static cl::opt<bool> UnrollRuntimeLocal(
+ "amdgpu-unroll-runtime-local",
+ cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool> UseLegacyDA(
+ "amdgpu-use-legacy-divergence-analysis",
+ cl::desc("Enable legacy divergence analysis for AMDGPU"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> UnrollMaxBlockToAnalyze(
+ "amdgpu-unroll-max-block-to-analyze",
+ cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU"),
+ cl::init(20), cl::Hidden);
+
static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
unsigned Depth = 0) {
const Instruction *I = dyn_cast<Instruction>(Cond);
@@ -172,6 +187,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
(!isa<GlobalVariable>(GEP->getPointerOperand()) &&
!isa<Argument>(GEP->getPointerOperand())))
continue;
+ LLVM_DEBUG(dbgs() << "Allow unroll runtime for loop:\n"
+ << *L << " due to LDS use.\n");
+ UP.Runtime = UnrollRuntimeLocal;
}
// Check if GEP depends on a value defined by this loop itself.
@@ -210,13 +228,22 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
if (UP.Threshold >= MaxBoost)
return;
}
+
+ // If we got a GEP in a small BB from inner loop then increase max trip
+ // count to analyze for better estimation cost in unroll
+ if (L->empty() && BB->size() < UnrollMaxBlockToAnalyze)
+ UP.MaxIterationsCountToAnalyze = 32;
}
}
+void AMDGPUTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
// The concept of vector registers doesn't really exist. Some packed vector
// operations operate on the normal 32-bit registers.
- return 256;
+ return MaxVGPRs;
}
unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
@@ -225,6 +252,13 @@ unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
return getHardwareNumberOfRegisters(Vec) >> 3;
}
+unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const {
+ const SIRegisterInfo *TRI = ST->getRegisterInfo();
+ const TargetRegisterClass *RC = TRI->getRegClass(RCID);
+ unsigned NumVGPRs = (TRI->getRegSizeInBits(*RC) + 31) / 32;
+ return getHardwareNumberOfRegisters(false) / NumVGPRs;
+}
+
unsigned GCNTTIImpl::getRegisterBitWidth(bool Vector) const {
return 32;
}
@@ -234,8 +268,8 @@ unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const {
}
unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
- unsigned ChainSizeInBytes,
- VectorType *VecTy) const {
+ unsigned ChainSizeInBytes,
+ VectorType *VecTy) const {
unsigned VecRegBitWidth = VF * LoadSize;
if (VecRegBitWidth > 128 && VecTy->getScalarSizeInBits() < 32)
// TODO: Support element-size less than 32bit?
@@ -262,20 +296,16 @@ unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
return 512;
}
- if (AddrSpace == AMDGPUAS::FLAT_ADDRESS ||
- AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
- AddrSpace == AMDGPUAS::REGION_ADDRESS)
- return 128;
-
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
return 8 * ST->getMaxPrivateElementSize();
- llvm_unreachable("unhandled address space");
+ // Common to flat, global, local and region. Assume for unknown addrspace.
+ return 128;
}
bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
- unsigned AddrSpace) const {
+ Align Alignment,
+ unsigned AddrSpace) const {
// We allow vectorization of flat stores, even though we may need to decompose
// them later if they may access private memory. We don't have enough context
// here, and legalization can handle it.
@@ -287,17 +317,87 @@ bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
}
bool GCNTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
- unsigned AddrSpace) const {
+ Align Alignment,
+ unsigned AddrSpace) const {
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
}
bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
- unsigned AddrSpace) const {
+ Align Alignment,
+ unsigned AddrSpace) const {
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
}
+// FIXME: Really we would like to issue multiple 128-bit loads and stores per
+// iteration. Should we report a larger size and let it legalize?
+//
+// FIXME: Should we use narrower types for local/region, or account for when
+// unaligned access is legal?
+//
+// FIXME: This could use fine tuning and microbenchmarks.
+Type *GCNTTIImpl::getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+ unsigned SrcAddrSpace,
+ unsigned DestAddrSpace,
+ unsigned SrcAlign,
+ unsigned DestAlign) const {
+ unsigned MinAlign = std::min(SrcAlign, DestAlign);
+
+ // A (multi-)dword access at an address == 2 (mod 4) will be decomposed by the
+ // hardware into byte accesses. If you assume all alignments are equally
+ // probable, it's more efficient on average to use short accesses for this
+ // case.
+ if (MinAlign == 2)
+ return Type::getInt16Ty(Context);
+
+ // Not all subtargets have 128-bit DS instructions, and we currently don't
+ // form them by default.
+ if (SrcAddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ SrcAddrSpace == AMDGPUAS::REGION_ADDRESS ||
+ DestAddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ DestAddrSpace == AMDGPUAS::REGION_ADDRESS) {
+ return FixedVectorType::get(Type::getInt32Ty(Context), 2);
+ }
+
+ // Global memory works best with 16-byte accesses. Private memory will also
+ // hit this, although they'll be decomposed.
+ return FixedVectorType::get(Type::getInt32Ty(Context), 4);
+}
+
+void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
+ SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
+ unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ unsigned SrcAlign, unsigned DestAlign) const {
+ assert(RemainingBytes < 16);
+
+ unsigned MinAlign = std::min(SrcAlign, DestAlign);
+
+ if (MinAlign != 2) {
+ Type *I64Ty = Type::getInt64Ty(Context);
+ while (RemainingBytes >= 8) {
+ OpsOut.push_back(I64Ty);
+ RemainingBytes -= 8;
+ }
+
+ Type *I32Ty = Type::getInt32Ty(Context);
+ while (RemainingBytes >= 4) {
+ OpsOut.push_back(I32Ty);
+ RemainingBytes -= 4;
+ }
+ }
+
+ Type *I16Ty = Type::getInt16Ty(Context);
+ while (RemainingBytes >= 2) {
+ OpsOut.push_back(I16Ty);
+ RemainingBytes -= 2;
+ }
+
+ Type *I8Ty = Type::getInt8Ty(Context);
+ while (RemainingBytes) {
+ OpsOut.push_back(I8Ty);
+ --RemainingBytes;
+ }
+}
+
unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// Disable unrolling if the loop is not vectorized.
// TODO: Enable this again.
@@ -339,6 +439,7 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
}
int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info,
TTI::OperandValueProperties Opd1PropInfo,
@@ -347,7 +448,11 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
const Instruction *CxtI) {
EVT OrigTy = TLI->getValueType(DL, Ty);
if (!OrigTy.isSimple()) {
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ // FIXME: We're having to query the throughput cost so that the basic
+ // implementation tries to generate legalize and scalarization costs. Maybe
+ // we could hoist the scalarization code here?
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, TTI::TCK_RecipThroughput,
+ Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo);
}
@@ -455,24 +560,44 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return LT.first * NElts * Cost;
}
break;
+ case ISD::FNEG:
+ // Use the backend' estimation. If fneg is not free each element will cost
+ // one additional instruction.
+ return TLI->isFNegFree(SLT) ? 0 : NElts;
default:
break;
}
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info,
Opd1PropInfo, Opd2PropInfo);
}
-template <typename T>
-int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<T *> Args,
- FastMathFlags FMF, unsigned VF) {
- if (ID != Intrinsic::fma)
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+// Return true if there's a potential benefit from using v2f16 instructions for
+// an intrinsic, even if it requires nontrivial legalization.
+static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::fma: // TODO: fmuladd
+ // There's a small benefit to using vector ops in the legalized code.
+ case Intrinsic::round:
+ return true;
+ default:
+ return false;
+ }
+}
+
+int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ if (ICA.getID() == Intrinsic::fabs)
+ return 0;
+ if (!intrinsicHasPackedVectorBenefit(ICA.getID()))
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+
+ Type *RetTy = ICA.getReturnType();
EVT OrigTy = TLI->getValueType(DL, RetTy);
if (!OrigTy.isSimple()) {
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
// Legalize the type.
@@ -489,36 +614,34 @@ int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
if (ST->has16BitInsts() && SLT == MVT::f16)
NElts = (NElts + 1) / 2;
- return LT.first * NElts * (ST->hasFastFMAF32() ? getHalfRateInstrCost()
- : getQuarterRateInstrCost());
-}
+ // TODO: Get more refined intrinsic costs?
+ unsigned InstRate = getQuarterRateInstrCost();
+ if (ICA.getID() == Intrinsic::fma) {
+ InstRate = ST->hasFastFMAF32() ? getHalfRateInstrCost()
+ : getQuarterRateInstrCost();
+ }
-int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value*> Args, FastMathFlags FMF,
- unsigned VF) {
- return getIntrinsicInstrCost<Value>(ID, RetTy, Args, FMF, VF);
+ return LT.first * NElts * InstRate;
}
-int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed) {
- return getIntrinsicInstrCost<Type>(ID, RetTy, Tys, FMF,
- ScalarizationCostPassed);
-}
+unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind) {
+ if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
+ return Opcode == Instruction::PHI ? 0 : 1;
-unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
// XXX - For some reason this isn't called for switch.
switch (Opcode) {
case Instruction::Br:
case Instruction::Ret:
return 10;
default:
- return BaseT::getCFInstrCost(Opcode);
+ return BaseT::getCFInstrCost(Opcode, CostKind);
}
}
-int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwise) {
+int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ bool IsPairwise,
+ TTI::TargetCostKind CostKind) {
EVT OrigTy = TLI->getValueType(DL, Ty);
// Computes cost on targets that have packed math instructions(which support
@@ -526,15 +649,15 @@ int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
if (IsPairwise ||
!ST->hasVOP3PInsts() ||
OrigTy.getScalarSizeInBits() != 16)
- return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise);
+ return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise, CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
return LT.first * getFullRateInstrCost();
}
-int GCNTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
- bool IsPairwise,
- bool IsUnsigned) {
+int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwise, bool IsUnsigned,
+ TTI::TargetCostKind CostKind) {
EVT OrigTy = TLI->getValueType(DL, Ty);
// Computes cost on targets that have packed math instructions(which support
@@ -542,7 +665,8 @@ int GCNTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
if (IsPairwise ||
!ST->hasVOP3PInsts() ||
OrigTy.getScalarSizeInBits() != 16)
- return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned);
+ return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
+ CostKind);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
return LT.first * getHalfRateInstrCost();
@@ -573,8 +697,6 @@ int GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
}
}
-
-
static bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
@@ -601,6 +723,58 @@ static bool isArgPassedInSGPR(const Argument *A) {
}
}
+/// Analyze if the results of inline asm are divergent. If \p Indices is empty,
+/// this is analyzing the collective result of all output registers. Otherwise,
+/// this is only querying a specific result index if this returns multiple
+/// registers in a struct.
+bool GCNTTIImpl::isInlineAsmSourceOfDivergence(
+ const CallInst *CI, ArrayRef<unsigned> Indices) const {
+ // TODO: Handle complex extract indices
+ if (Indices.size() > 1)
+ return true;
+
+ const DataLayout &DL = CI->getModule()->getDataLayout();
+ const SIRegisterInfo *TRI = ST->getRegisterInfo();
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI->ParseConstraints(DL, ST->getRegisterInfo(), *CI);
+
+ const int TargetOutputIdx = Indices.empty() ? -1 : Indices[0];
+
+ int OutputIdx = 0;
+ for (auto &TC : TargetConstraints) {
+ if (TC.Type != InlineAsm::isOutput)
+ continue;
+
+ // Skip outputs we don't care about.
+ if (TargetOutputIdx != -1 && TargetOutputIdx != OutputIdx++)
+ continue;
+
+ TLI->ComputeConstraintToUse(TC, SDValue());
+
+ Register AssignedReg;
+ const TargetRegisterClass *RC;
+ std::tie(AssignedReg, RC) = TLI->getRegForInlineAsmConstraint(
+ TRI, TC.ConstraintCode, TC.ConstraintVT);
+ if (AssignedReg) {
+ // FIXME: This is a workaround for getRegForInlineAsmConstraint
+ // returning VS_32
+ RC = TRI->getPhysRegClass(AssignedReg);
+ }
+
+ // For AGPR constraints null is returned on subtargets without AGPRs, so
+ // assume divergent for null.
+ if (!RC || !TRI->isSGPRClass(RC))
+ return true;
+ }
+
+ return false;
+}
+
+/// \returns true if the new GPU divergence analysis is enabled.
+bool GCNTTIImpl::useGPUDivergenceAnalysis() const {
+ return !UseLegacyDA;
+}
+
/// \returns true if the result of the value could potentially be
/// different across workitems in a wavefront.
bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
@@ -628,7 +802,14 @@ bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID());
// Assume all function calls are a source of divergence.
- if (isa<CallInst>(V) || isa<InvokeInst>(V))
+ if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+ if (CI->isInlineAsm())
+ return isInlineAsmSourceOfDivergence(CI);
+ return true;
+ }
+
+ // Assume all function calls are a source of divergence.
+ if (isa<InvokeInst>(V))
return true;
return false;
@@ -643,9 +824,44 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
case Intrinsic::amdgcn_readlane:
case Intrinsic::amdgcn_icmp:
case Intrinsic::amdgcn_fcmp:
+ case Intrinsic::amdgcn_ballot:
+ case Intrinsic::amdgcn_if_break:
return true;
}
}
+
+ if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+ if (CI->isInlineAsm())
+ return !isInlineAsmSourceOfDivergence(CI);
+ return false;
+ }
+
+ const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V);
+ if (!ExtValue)
+ return false;
+
+ const CallInst *CI = dyn_cast<CallInst>(ExtValue->getOperand(0));
+ if (!CI)
+ return false;
+
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(CI)) {
+ switch (Intrinsic->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::amdgcn_if:
+ case Intrinsic::amdgcn_else: {
+ ArrayRef<unsigned> Indices = ExtValue->getIndices();
+ return Indices.size() == 1 && Indices[0] == 1;
+ }
+ }
+ }
+
+ // If we have inline asm returning mixed SGPR and VGPR results, we inferred
+ // divergent for the overall struct return. We need to override it in the
+ // case we're extracting an SGPR component here.
+ if (CI->isInlineAsm())
+ return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices());
+
return false;
}
@@ -666,8 +882,9 @@ bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
}
}
-bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace(
- IntrinsicInst *II, Value *OldV, Value *NewV) const {
+Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+ Value *OldV,
+ Value *NewV) const {
auto IntrID = II->getIntrinsicID();
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
@@ -677,7 +894,7 @@ bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace(
case Intrinsic::amdgcn_ds_fmax: {
const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4));
if (!IsVolatile->isZero())
- return false;
+ return nullptr;
Module *M = II->getParent()->getParent()->getParent();
Type *DestTy = II->getType();
Type *SrcTy = NewV->getType();
@@ -685,7 +902,7 @@ bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace(
Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
II->setArgOperand(0, NewV);
II->setCalledFunction(NewDecl);
- return true;
+ return II;
}
case Intrinsic::amdgcn_is_shared:
case Intrinsic::amdgcn_is_private: {
@@ -695,20 +912,49 @@ bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace(
LLVMContext &Ctx = NewV->getType()->getContext();
ConstantInt *NewVal = (TrueAS == NewAS) ?
ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
- II->replaceAllUsesWith(NewVal);
- II->eraseFromParent();
- return true;
+ return NewVal;
+ }
+ case Intrinsic::ptrmask: {
+ unsigned OldAS = OldV->getType()->getPointerAddressSpace();
+ unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+ Value *MaskOp = II->getArgOperand(1);
+ Type *MaskTy = MaskOp->getType();
+
+ bool DoTruncate = false;
+ if (!getTLI()->isNoopAddrSpaceCast(OldAS, NewAS)) {
+ // All valid 64-bit to 32-bit casts work by chopping off the high
+ // bits. Any masking only clearing the low bits will also apply in the new
+ // address space.
+ if (DL.getPointerSizeInBits(OldAS) != 64 ||
+ DL.getPointerSizeInBits(NewAS) != 32)
+ return nullptr;
+
+ // TODO: Do we need to thread more context in here?
+ KnownBits Known = computeKnownBits(MaskOp, DL, 0, nullptr, II);
+ if (Known.countMinLeadingOnes() < 32)
+ return nullptr;
+
+ DoTruncate = true;
+ }
+
+ IRBuilder<> B(II);
+ if (DoTruncate) {
+ MaskTy = B.getInt32Ty();
+ MaskOp = B.CreateTrunc(MaskOp, MaskTy);
+ }
+
+ return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy},
+ {NewV, MaskOp});
}
default:
- return false;
+ return nullptr;
}
}
-unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *VT,
+ int Index, VectorType *SubTp) {
if (ST->hasVOP3PInsts()) {
- VectorType *VT = cast<VectorType>(Tp);
- if (VT->getNumElements() == 2 &&
+ if (cast<FixedVectorType>(VT)->getNumElements() == 2 &&
DL.getTypeSizeInBits(VT->getElementType()) == 16) {
// With op_sel VOP3P instructions freely can access the low half or high
// half of a register, so any swizzle is free.
@@ -724,7 +970,7 @@ unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
}
}
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, VT, Index, SubTp);
}
bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
@@ -745,8 +991,8 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
// FIXME: dx10_clamp can just take the caller setting, but there seems to be
// no way to support merge for backend defined attributes.
- AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST);
- AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST);
+ AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
+ AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
return CallerMode.isInlineCompatible(CalleeMode);
}
@@ -755,117 +1001,9 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
CommonTTI.getUnrollingPreferences(L, SE, UP);
}
-unsigned GCNTTIImpl::getUserCost(const User *U,
- ArrayRef<const Value *> Operands) {
- const Instruction *I = dyn_cast<Instruction>(U);
- if (!I)
- return BaseT::getUserCost(U, Operands);
-
- // Estimate different operations to be optimized out
- switch (I->getOpcode()) {
- case Instruction::ExtractElement: {
- ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
- unsigned Idx = -1;
- if (CI)
- Idx = CI->getZExtValue();
- return getVectorInstrCost(I->getOpcode(), I->getOperand(0)->getType(), Idx);
- }
- case Instruction::InsertElement: {
- ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
- unsigned Idx = -1;
- if (CI)
- Idx = CI->getZExtValue();
- return getVectorInstrCost(I->getOpcode(), I->getType(), Idx);
- }
- case Instruction::Call: {
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- SmallVector<Value *, 4> Args(II->arg_operands());
- FastMathFlags FMF;
- if (auto *FPMO = dyn_cast<FPMathOperator>(II))
- FMF = FPMO->getFastMathFlags();
- return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
- FMF);
- } else {
- return BaseT::getUserCost(U, Operands);
- }
- }
- case Instruction::ShuffleVector: {
- const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
- Type *Ty = Shuffle->getType();
- Type *SrcTy = Shuffle->getOperand(0)->getType();
-
- // TODO: Identify and add costs for insert subvector, etc.
- int SubIndex;
- if (Shuffle->isExtractSubvectorMask(SubIndex))
- return getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty);
-
- if (Shuffle->changesLength())
- return BaseT::getUserCost(U, Operands);
-
- if (Shuffle->isIdentity())
- return 0;
-
- if (Shuffle->isReverse())
- return getShuffleCost(TTI::SK_Reverse, Ty, 0, nullptr);
-
- if (Shuffle->isSelect())
- return getShuffleCost(TTI::SK_Select, Ty, 0, nullptr);
-
- if (Shuffle->isTranspose())
- return getShuffleCost(TTI::SK_Transpose, Ty, 0, nullptr);
-
- if (Shuffle->isZeroEltSplat())
- return getShuffleCost(TTI::SK_Broadcast, Ty, 0, nullptr);
-
- if (Shuffle->isSingleSource())
- return getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, nullptr);
-
- return getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, 0, nullptr);
- }
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast:
- case Instruction::AddrSpaceCast: {
- return getCastInstrCost(I->getOpcode(), I->getType(),
- I->getOperand(0)->getType(), I);
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::FNeg: {
- return getArithmeticInstrCost(I->getOpcode(), I->getType(),
- TTI::OK_AnyValue, TTI::OK_AnyValue,
- TTI::OP_None, TTI::OP_None, Operands, I);
- }
- default:
- break;
- }
-
- return BaseT::getUserCost(U, Operands);
+void GCNTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ CommonTTI.getPeelingPreferences(L, SE, PP);
}
unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
@@ -903,7 +1041,7 @@ unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
}
bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ Align Alignment,
unsigned AddrSpace) const {
// We allow vectorization of flat stores, even though we may need to decompose
// them later if they may access private memory. We don't have enough context
@@ -912,13 +1050,13 @@ bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
}
bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ Align Alignment,
unsigned AddrSpace) const {
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
}
bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ Align Alignment,
unsigned AddrSpace) const {
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
}
@@ -932,14 +1070,18 @@ unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 8;
}
-unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) {
+unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind) {
+ if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
+ return Opcode == Instruction::PHI ? 0 : 1;
+
// XXX - For some reason this isn't called for switch.
switch (Opcode) {
case Instruction::Br:
case Instruction::Ret:
return 10;
default:
- return BaseT::getCFInstrCost(Opcode);
+ return BaseT::getCFInstrCost(Opcode, CostKind);
}
}
@@ -970,3 +1112,8 @@ void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
CommonTTI.getUnrollingPreferences(L, SE, UP);
}
+
+void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ CommonTTI.getPeelingPreferences(L, SE, PP);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 0b48f9f602b7..3364a9bcaccb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -61,6 +61,9 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
};
class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
@@ -70,10 +73,11 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
friend BaseT;
const GCNSubtarget *ST;
- const AMDGPUTargetLowering *TLI;
+ const SITargetLowering *TLI;
AMDGPUTTIImpl CommonTTI;
bool IsGraphicsShader;
bool HasFP32Denormals;
+ unsigned MaxVGPRs;
const FeatureBitset InlineFeatureIgnoreList = {
// Codegen control options which don't matter.
@@ -133,13 +137,21 @@ public:
TLI(ST->getTargetLowering()),
CommonTTI(TM, F),
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
- HasFP32Denormals(ST->hasFP32Denormals(F)) { }
+ HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()),
+ MaxVGPRs(ST->getMaxNumVGPRs(
+ std::max(ST->getWavesPerEU(F).first,
+ ST->getWavesPerEUForWorkGroup(
+ ST->getFlatWorkGroupSizes(F).second)))) {}
bool hasBranchDivergence() { return true; }
+ bool useGPUDivergenceAnalysis() const;
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
return TTI::PSK_FastHardware;
@@ -147,6 +159,7 @@ public:
unsigned getHardwareNumberOfRegisters(bool Vector) const;
unsigned getNumberOfRegisters(bool Vector) const;
+ unsigned getNumberOfRegisters(unsigned RCID) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getMinVectorRegisterBitWidth() const;
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
@@ -157,22 +170,30 @@ public:
VectorType *VecTy) const;
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
- bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
- bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
- bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
-
+ Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+ unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ unsigned SrcAlign, unsigned DestAlign) const;
+
+ void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
+ LLVMContext &Context,
+ unsigned RemainingBytes,
+ unsigned SrcAddrSpace,
+ unsigned DestAddrSpace,
+ unsigned SrcAlign,
+ unsigned DestAlign) const;
unsigned getMaxInterleaveFactor(unsigned VF);
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -180,7 +201,10 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
- unsigned getCFInstrCost(unsigned Opcode);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
+
+ bool isInlineAsmSourceOfDivergence(const CallInst *CI,
+ ArrayRef<unsigned> Indices = {}) const;
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
bool isSourceOfDivergence(const Value *V) const;
@@ -196,13 +220,13 @@ public:
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const;
- bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
- Value *OldV, Value *NewV) const;
+ Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
+ Value *NewV) const;
unsigned getVectorSplitCost() { return 0; }
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp);
+ unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
@@ -211,23 +235,17 @@ public:
int getInlinerVectorBonusPercent() { return 0; }
- int getArithmeticReductionCost(unsigned Opcode,
- Type *Ty,
- bool IsPairwise);
- template <typename T>
- int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<T *> Args, FastMathFlags FMF,
- unsigned VF);
- int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = UINT_MAX);
- int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF,
- unsigned VF = 1);
- int getMinMaxReductionCost(Type *Ty, Type *CondTy,
- bool IsPairwiseForm,
- bool IsUnsigned);
- unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
+ int getArithmeticReductionCost(
+ unsigned Opcode,
+ VectorType *Ty,
+ bool IsPairwise,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
+
+ int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
+ int getMinMaxReductionCost(
+ VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
};
class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
@@ -245,28 +263,28 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()),
ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
TLI(ST->getTargetLowering()),
- CommonTTI(TM, F) {}
+ CommonTTI(TM, F) {}
const R600Subtarget *getST() const { return ST; }
const AMDGPUTargetLowering *getTLI() const { return TLI; }
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
unsigned getHardwareNumberOfRegisters(bool Vec) const;
unsigned getNumberOfRegisters(bool Vec) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getMinVectorRegisterBitWidth() const;
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
- bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
+ bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
- bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
- bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
unsigned getMaxInterleaveFactor(unsigned VF);
- unsigned getCFInstrCost(unsigned Opcode);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 01bb60f07f2e..418296684d76 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -195,7 +195,11 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- if (PDT.getRoots().size() <= 1)
+
+ // If there's only one exit, we don't need to do anything, unless this is a
+ // pixel shader and that exit is an infinite loop, since we still have to
+ // insert an export in that case.
+ if (PDT.root_size() <= 1 && F.getCallingConv() != CallingConv::AMDGPU_PS)
return false;
LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
@@ -203,6 +207,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
// Loop over all of the blocks in a function, tracking all of the blocks that
// return.
SmallVector<BasicBlock *, 4> ReturningBlocks;
+ SmallVector<BasicBlock *, 4> UniformlyReachedRetBlocks;
SmallVector<BasicBlock *, 4> UnreachableBlocks;
// Dummy return block for infinite loop.
@@ -210,10 +215,13 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
bool InsertExport = false;
- for (BasicBlock *BB : PDT.getRoots()) {
+ bool Changed = false;
+ for (BasicBlock *BB : PDT.roots()) {
if (isa<ReturnInst>(BB->getTerminator())) {
if (!isUniformlyReached(DA, *BB))
ReturningBlocks.push_back(BB);
+ else
+ UniformlyReachedRetBlocks.push_back(BB);
} else if (isa<UnreachableInst>(BB->getTerminator())) {
if (!isUniformlyReached(DA, *BB))
UnreachableBlocks.push_back(BB);
@@ -273,6 +281,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
BB->getTerminator()->eraseFromParent();
BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
}
+ Changed = true;
}
}
@@ -291,6 +300,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
BB->getTerminator()->eraseFromParent();
BranchInst::Create(UnreachableBlock, BB);
}
+ Changed = true;
}
if (!ReturningBlocks.empty()) {
@@ -314,19 +324,32 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
// actually reached here.
ReturnInst::Create(F.getContext(), RetVal, UnreachableBlock);
ReturningBlocks.push_back(UnreachableBlock);
+ Changed = true;
}
}
// Now handle return blocks.
if (ReturningBlocks.empty())
- return false; // No blocks return
+ return Changed; // No blocks return
- if (ReturningBlocks.size() == 1)
- return false; // Already has a single return block
+ if (ReturningBlocks.size() == 1 && !InsertExport)
+ return Changed; // Already has a single return block
const TargetTransformInfo &TTI
= getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- unifyReturnBlockSet(F, ReturningBlocks, InsertExport, TTI, "UnifiedReturnBlock");
+ // Unify returning blocks. If we are going to insert the export it is also
+ // necessary to include blocks that are uniformly reached, because in addition
+ // to inserting the export the "done" bits on existing exports will be cleared
+ // and we do not want to end up with the normal export in a non-unified,
+ // uniformly reached block with the "done" bit cleared.
+ auto BlocksToUnify = std::move(ReturningBlocks);
+ if (InsertExport) {
+ BlocksToUnify.insert(BlocksToUnify.end(), UniformlyReachedRetBlocks.begin(),
+ UniformlyReachedRetBlocks.end());
+ }
+
+ unifyReturnBlockSet(F, BlocksToUnify, InsertExport, TTI,
+ "UnifiedReturnBlock");
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index f3aa1a582368..013b7a0cf25d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -163,6 +163,7 @@ public:
ImmTyUNorm,
ImmTyDA,
ImmTyR128A16,
+ ImmTyA16,
ImmTyLWE,
ImmTyExpTgt,
ImmTyExpCompr,
@@ -277,6 +278,7 @@ public:
isRegClass(AMDGPU::VReg_96RegClassID) ||
isRegClass(AMDGPU::VReg_128RegClassID) ||
isRegClass(AMDGPU::VReg_160RegClassID) ||
+ isRegClass(AMDGPU::VReg_192RegClassID) ||
isRegClass(AMDGPU::VReg_256RegClassID) ||
isRegClass(AMDGPU::VReg_512RegClassID) ||
isRegClass(AMDGPU::VReg_1024RegClassID);
@@ -315,6 +317,7 @@ public:
bool isUNorm() const { return isImmTy(ImmTyUNorm); }
bool isDA() const { return isImmTy(ImmTyDA); }
bool isR128A16() const { return isImmTy(ImmTyR128A16); }
+ bool isGFX10A16() const { return isImmTy(ImmTyA16); }
bool isLWE() const { return isImmTy(ImmTyLWE); }
bool isOff() const { return isImmTy(ImmTyOff); }
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
@@ -486,7 +489,7 @@ public:
}
bool isVSrcB16() const {
- return isVCSrcF16() || isLiteralImm(MVT::i16);
+ return isVCSrcB16() || isLiteralImm(MVT::i16);
}
bool isVSrcV2B16() const {
@@ -654,7 +657,7 @@ public:
bool isSendMsg() const;
bool isSwizzle() const;
bool isSMRDOffset8() const;
- bool isSMRDOffset20() const;
+ bool isSMEMOffset() const;
bool isSMRDLiteralOffset() const;
bool isDPP8() const;
bool isDPPCtrl() const;
@@ -847,6 +850,7 @@ public:
case ImmTyUNorm: OS << "UNorm"; break;
case ImmTyDA: OS << "DA"; break;
case ImmTyR128A16: OS << "R128A16"; break;
+ case ImmTyA16: OS << "A16"; break;
case ImmTyLWE: OS << "LWE"; break;
case ImmTyOff: OS << "Off"; break;
case ImmTyExpTgt: OS << "ExpTgt"; break;
@@ -1062,17 +1066,20 @@ private:
bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
RegisterKind RegKind, unsigned Reg1);
- bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
- unsigned& RegNum, unsigned& RegWidth);
- unsigned ParseRegularReg(RegisterKind &RegKind,
- unsigned &RegNum,
- unsigned &RegWidth);
- unsigned ParseSpecialReg(RegisterKind &RegKind,
- unsigned &RegNum,
- unsigned &RegWidth);
- unsigned ParseRegList(RegisterKind &RegKind,
- unsigned &RegNum,
- unsigned &RegWidth);
+ bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
+ unsigned &RegNum, unsigned &RegWidth,
+ bool RestoreOnFailure = false);
+ bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
+ unsigned &RegNum, unsigned &RegWidth,
+ SmallVectorImpl<AsmToken> &Tokens);
+ unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
+ unsigned &RegWidth,
+ SmallVectorImpl<AsmToken> &Tokens);
+ unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
+ unsigned &RegWidth,
+ SmallVectorImpl<AsmToken> &Tokens);
+ unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
+ unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
bool ParseRegRange(unsigned& Num, unsigned& Width);
unsigned getRegularReg(RegisterKind RegKind,
unsigned RegNum,
@@ -1157,6 +1164,10 @@ public:
return AMDGPU::hasPackedD16(getSTI());
}
+ bool hasGFX10A16() const {
+ return AMDGPU::hasGFX10A16(getSTI());
+ }
+
bool isSI() const {
return AMDGPU::isSI(getSTI());
}
@@ -1177,6 +1188,10 @@ public:
return AMDGPU::isGFX10(getSTI());
}
+ bool isGFX10_BEncoding() const {
+ return AMDGPU::isGFX10_BEncoding(getSTI());
+ }
+
bool hasInv2PiInlineImm() const {
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
@@ -1226,8 +1241,12 @@ public:
bool isForcedSDWA() const { return ForcedSDWA; }
ArrayRef<unsigned> getMatchedVariants() const;
- std::unique_ptr<AMDGPUOperand> parseRegister();
+ std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
+ bool RestoreOnFailure);
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
unsigned checkTargetMatchPredicate(MCInst &Inst) override;
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
@@ -1311,9 +1330,11 @@ private:
void errorExpTgt();
OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
+ SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
+ bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateSOPLiteral(const MCInst &Inst) const;
bool validateConstantBusLimitations(const MCInst &Inst);
bool validateEarlyClobberLimitations(const MCInst &Inst);
@@ -1329,6 +1350,7 @@ private:
bool validateOpSel(const MCInst &Inst);
bool validateVccOperand(unsigned Reg) const;
bool validateVOP3Literal(const MCInst &Inst) const;
+ bool validateMAIAccWrite(const MCInst &Inst);
unsigned getConstantBusLimit(unsigned Opcode) const;
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
@@ -1390,7 +1412,7 @@ public:
AMDGPUOperand::Ptr defaultSLC() const;
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
- AMDGPUOperand::Ptr defaultSMRDOffset20() const;
+ AMDGPUOperand::Ptr defaultSMEMOffset() const;
AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
AMDGPUOperand::Ptr defaultFlatOffset() const;
@@ -1524,6 +1546,16 @@ static bool isSafeTruncation(int64_t Val, unsigned Size) {
return isUIntN(Size, Val) || isIntN(Size, Val);
}
+static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
+ if (VT.getScalarType() == MVT::i16) {
+ // FP immediate values are broken.
+ return isInlinableIntLiteral(Val);
+ }
+
+ // f16/v2f16 operands work correctly for all values.
+ return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
+}
+
bool AMDGPUOperand::isInlinableImm(MVT type) const {
// This is a hack to enable named inline values like
@@ -1555,9 +1587,9 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
return false;
if (type.getScalarSizeInBits() == 16) {
- return AMDGPU::isInlinableLiteral16(
+ return isInlineableLiteralOp16(
static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
- AsmParser->hasInv2PiInlineImm());
+ type, AsmParser->hasInv2PiInlineImm());
}
// Check if single precision literal is inlinable
@@ -1577,9 +1609,9 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
}
if (type.getScalarSizeInBits() == 16) {
- return AMDGPU::isInlinableLiteral16(
+ return isInlineableLiteralOp16(
static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
- AsmParser->hasInv2PiInlineImm());
+ type, AsmParser->hasInv2PiInlineImm());
}
return AMDGPU::isInlinableLiteral32(
@@ -1901,6 +1933,7 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
case 3: return AMDGPU::VReg_96RegClassID;
case 4: return AMDGPU::VReg_128RegClassID;
case 5: return AMDGPU::VReg_160RegClassID;
+ case 6: return AMDGPU::VReg_192RegClassID;
case 8: return AMDGPU::VReg_256RegClassID;
case 16: return AMDGPU::VReg_512RegClassID;
case 32: return AMDGPU::VReg_1024RegClassID;
@@ -1919,7 +1952,10 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
default: return -1;
case 1: return AMDGPU::SGPR_32RegClassID;
case 2: return AMDGPU::SGPR_64RegClassID;
+ case 3: return AMDGPU::SGPR_96RegClassID;
case 4: return AMDGPU::SGPR_128RegClassID;
+ case 5: return AMDGPU::SGPR_160RegClassID;
+ case 6: return AMDGPU::SGPR_192RegClassID;
case 8: return AMDGPU::SGPR_256RegClassID;
case 16: return AMDGPU::SGPR_512RegClassID;
}
@@ -1928,7 +1964,11 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
default: return -1;
case 1: return AMDGPU::AGPR_32RegClassID;
case 2: return AMDGPU::AReg_64RegClassID;
+ case 3: return AMDGPU::AReg_96RegClassID;
case 4: return AMDGPU::AReg_128RegClassID;
+ case 5: return AMDGPU::AReg_160RegClassID;
+ case 6: return AMDGPU::AReg_192RegClassID;
+ case 8: return AMDGPU::AReg_256RegClassID;
case 16: return AMDGPU::AReg_512RegClassID;
case 32: return AMDGPU::AReg_1024RegClassID;
}
@@ -1975,12 +2015,13 @@ static unsigned getSpecialRegForName(StringRef RegName) {
.Case("tma_hi", AMDGPU::TMA_HI)
.Case("tba_lo", AMDGPU::TBA_LO)
.Case("tba_hi", AMDGPU::TBA_HI)
+ .Case("pc", AMDGPU::PC_REG)
.Case("null", AMDGPU::SGPR_NULL)
.Default(AMDGPU::NoRegister);
}
bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ SMLoc &EndLoc, bool RestoreOnFailure) {
auto R = parseRegister();
if (!R) return true;
assert(R->isReg());
@@ -1990,6 +2031,25 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
return false;
}
+bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
+}
+
+OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ bool Result =
+ ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
+ bool PendingErrors = getParser().hasPendingError();
+ getParser().clearPendingErrors();
+ if (PendingErrors)
+ return MatchOperand_ParseFail;
+ if (Result)
+ return MatchOperand_NoMatch;
+ return MatchOperand_Success;
+}
+
bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
RegisterKind RegKind, unsigned Reg1) {
switch (RegKind) {
@@ -2166,31 +2226,31 @@ AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
return true;
}
-unsigned
-AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
- unsigned &RegNum,
- unsigned &RegWidth) {
+unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
+ unsigned &RegNum, unsigned &RegWidth,
+ SmallVectorImpl<AsmToken> &Tokens) {
assert(isToken(AsmToken::Identifier));
unsigned Reg = getSpecialRegForName(getTokenStr());
if (Reg) {
RegNum = 0;
RegWidth = 1;
RegKind = IS_SPECIAL;
+ Tokens.push_back(getToken());
lex(); // skip register name
}
return Reg;
}
-unsigned
-AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
- unsigned &RegNum,
- unsigned &RegWidth) {
+unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
+ unsigned &RegNum, unsigned &RegWidth,
+ SmallVectorImpl<AsmToken> &Tokens) {
assert(isToken(AsmToken::Identifier));
StringRef RegName = getTokenStr();
const RegInfo *RI = getRegularRegInfo(RegName);
if (!RI)
return AMDGPU::NoRegister;
+ Tokens.push_back(getToken());
lex(); // skip register name
RegKind = RI->Kind;
@@ -2209,10 +2269,9 @@ AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
return getRegularReg(RegKind, RegNum, RegWidth);
}
-unsigned
-AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
- unsigned &RegNum,
- unsigned &RegWidth) {
+unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
+ unsigned &RegWidth,
+ SmallVectorImpl<AsmToken> &Tokens) {
unsigned Reg = AMDGPU::NoRegister;
if (!trySkipToken(AsmToken::LBrac))
@@ -2229,7 +2288,8 @@ AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
RegisterKind NextRegKind;
unsigned NextReg, NextRegNum, NextRegWidth;
- if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth))
+ if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth,
+ Tokens))
return AMDGPU::NoRegister;
if (NextRegWidth != 1)
return AMDGPU::NoRegister;
@@ -2248,24 +2308,40 @@ AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
return Reg;
}
-bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
- unsigned &Reg,
- unsigned &RegNum,
- unsigned &RegWidth) {
+bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
+ unsigned &RegNum, unsigned &RegWidth,
+ SmallVectorImpl<AsmToken> &Tokens) {
Reg = AMDGPU::NoRegister;
if (isToken(AsmToken::Identifier)) {
- Reg = ParseSpecialReg(RegKind, RegNum, RegWidth);
+ Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
if (Reg == AMDGPU::NoRegister)
- Reg = ParseRegularReg(RegKind, RegNum, RegWidth);
+ Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
} else {
- Reg = ParseRegList(RegKind, RegNum, RegWidth);
+ Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
}
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
}
+bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
+ unsigned &RegNum, unsigned &RegWidth,
+ bool RestoreOnFailure) {
+ Reg = AMDGPU::NoRegister;
+
+ SmallVector<AsmToken, 1> Tokens;
+ if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
+ if (RestoreOnFailure) {
+ while (!Tokens.empty()) {
+ getLexer().UnLex(Tokens.pop_back_val());
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
Optional<StringRef>
AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
switch (RegKind) {
@@ -2314,7 +2390,8 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
return true;
}
-std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
+std::unique_ptr<AMDGPUOperand>
+AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
const auto &Tok = Parser.getTok();
SMLoc StartLoc = Tok.getLoc();
SMLoc EndLoc = Tok.getEndLoc();
@@ -2758,16 +2835,22 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
case 2: {
const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
+ if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
+ return AMDGPU::isInlinableIntLiteral(Val);
+
if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
- OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
+ return AMDGPU::isInlinableIntLiteralV216(Val);
+
+ if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
- OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
- OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
+ OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
- } else {
- return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
- }
+
+ return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
}
default:
llvm_unreachable("invalid operand size");
@@ -3085,6 +3168,30 @@ bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
return !isSGPR(mc2PseudoReg(Reg), TRI);
}
+bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
+
+ const unsigned Opc = Inst.getOpcode();
+
+ if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
+ return true;
+
+ const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ assert(Src0Idx != -1);
+
+ const MCOperand &Src0 = Inst.getOperand(Src0Idx);
+ if (!Src0.isReg())
+ return true;
+
+ auto Reg = Src0.getReg();
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+ if (isSGPR(mc2PseudoReg(Reg), TRI)) {
+ Error(getLoc(), "source operand must be either a VGPR or an inline constant");
+ return false;
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -3335,6 +3442,46 @@ bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
return true;
}
+SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ if (Op.isSMEMOffset())
+ return Op.getStartLoc();
+ }
+ return getLoc();
+}
+
+bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
+ const OperandVector &Operands) {
+ if (isCI() || isSI())
+ return true;
+
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & SIInstrFlags::SMRD) == 0)
+ return true;
+
+ auto Opcode = Inst.getOpcode();
+ auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
+ if (OpNum == -1)
+ return true;
+
+ const auto &Op = Inst.getOperand(OpNum);
+ if (!Op.isImm())
+ return true;
+
+ uint64_t Offset = Op.getImm();
+ bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
+ if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
+ AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
+ return true;
+
+ Error(getSMEMOffsetLoc(Operands),
+ (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
+ "expected a 21-bit signed offset");
+
+ return false;
+}
+
bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
@@ -3512,6 +3659,12 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateFlatOffset(Inst, Operands)) {
return false;
}
+ if (!validateSMEMOffset(Inst, Operands)) {
+ return false;
+ }
+ if (!validateMAIAccWrite(Inst)) {
+ return false;
+ }
return true;
}
@@ -3556,7 +3709,7 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, getSTI());
+ Out.emitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature:
@@ -4307,19 +4460,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
if (Size > LocalMemorySize)
return Error(SizeLoc, "size is too large");
- int64_t Align = 4;
+ int64_t Alignment = 4;
if (getLexer().is(AsmToken::Comma)) {
Lex();
SMLoc AlignLoc = getLexer().getLoc();
- if (getParser().parseAbsoluteExpression(Align))
+ if (getParser().parseAbsoluteExpression(Alignment))
return true;
- if (Align < 0 || !isPowerOf2_64(Align))
+ if (Alignment < 0 || !isPowerOf2_64(Alignment))
return Error(AlignLoc, "alignment must be a power of two");
// Alignment larger than the size of LDS is possible in theory, as long
// as the linker manages to place to symbol at address 0, but we do want
// to make sure the alignment fits nicely into a 32-bit integer.
- if (Align >= 1u << 31)
+ if (Alignment >= 1u << 31)
return Error(AlignLoc, "alignment is too large");
}
@@ -4331,7 +4484,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
if (!Symbol->isUndefined())
return Error(NameLoc, "invalid symbol redefinition");
- getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
+ getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
return false;
}
@@ -4650,9 +4803,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
case AsmToken::Identifier: {
StringRef Tok = Parser.getTok().getString();
if (Tok == Name) {
- if (Tok == "r128" && isGFX9())
+ if (Tok == "r128" && !hasMIMG_R128())
Error(S, "r128 modifier is not supported on this GPU");
- if (Tok == "a16" && !isGFX9() && !isGFX10())
+ if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
Error(S, "a16 modifier is not supported on this GPU");
Bit = 1;
Parser.Lex();
@@ -4672,6 +4825,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
return MatchOperand_ParseFail;
+ if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
+ ImmTy = AMDGPUOperand::ImmTyR128A16;
+
Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
return MatchOperand_Success;
}
@@ -5987,6 +6143,8 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
+ if (IsGFX10)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
if (!IsGFX10)
@@ -6006,8 +6164,8 @@ bool AMDGPUOperand::isSMRDOffset8() const {
return isImm() && isUInt<8>(getImm());
}
-bool AMDGPUOperand::isSMRDOffset20() const {
- return isImm() && isUInt<20>(getImm());
+bool AMDGPUOperand::isSMEMOffset() const {
+ return isImm(); // Offset range is checked later by validator.
}
bool AMDGPUOperand::isSMRDLiteralOffset() const {
@@ -6020,7 +6178,7 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
@@ -6096,7 +6254,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
{"da", AMDGPUOperand::ImmTyDA, true, nullptr},
{"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
- {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
+ {"a16", AMDGPUOperand::ImmTyA16, true, nullptr},
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
@@ -6499,7 +6657,7 @@ OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
std::string Token;
if (getLexer().is(AsmToken::Integer)) {
SMLoc Loc = getLexer().getTok().getEndLoc();
- Token = getLexer().getTok().getString();
+ Token = std::string(getLexer().getTok().getString());
Parser.Lex();
if (getLexer().getTok().getLoc() != Loc)
return MatchOperand_ParseFail;
@@ -7032,6 +7190,8 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
case MCK_AttrChan:
return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
+ case MCK_ImmSMEMOffset:
+ return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
case MCK_SReg_64:
case MCK_SReg_64_XEXEC:
// Null is defined as a 32-bit register but
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 691aff4ecbb8..fa42ddc54b56 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1,4 +1,4 @@
-//===-- BUFInstructions.td - Buffer Instruction Defintions ----------------===//
+//===-- BUFInstructions.td - Buffer Instruction Definitions ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -374,7 +374,8 @@ class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> :
let AsmMatchConverter = "";
let hasSideEffects = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
// Set everything to 0.
let offen = 0;
@@ -1003,6 +1004,11 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global_64
>;
+let SubtargetPredicate = HasGFX10_BEncoding in
+defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
+ "buffer_atomic_csub", VGPR_32, i32, atomic_csub_global_32
+>;
+
let SubtargetPredicate = isGFX8GFX9 in {
def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
}
@@ -1152,22 +1158,6 @@ let SubtargetPredicate = isGFX10Plus in {
// MUBUF Patterns
//===----------------------------------------------------------------------===//
-def extract_glc : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
-}]>;
-
-def extract_slc : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
-}]>;
-
-def extract_dlc : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
-}]>;
-
-def extract_swz : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
-}]>;
-
//===----------------------------------------------------------------------===//
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
@@ -1177,24 +1167,24 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, 0)),
- (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, 0)),
- (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, timm)),
- (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (!cast<MUBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1202,9 +1192,9 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, timm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
- (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
+ (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
}
@@ -1221,6 +1211,7 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4i32, "BUFFER_LOAD_FORMAT_X
let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
@@ -1228,6 +1219,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in {
let SubtargetPredicate = HasPackedD16VMem in {
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i16, "BUFFER_LOAD_FORMAT_D16_X">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i16, "BUFFER_LOAD_FORMAT_D16_XY">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
@@ -1256,7 +1248,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, 0),
- (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1264,8 +1256,8 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, 0),
- (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (extract_glc $auxiliary),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
+ (as_i16timm $offset), (extract_glc $auxiliary),
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1273,8 +1265,8 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, timm),
- (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (extract_glc $auxiliary),
+ (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
+ (as_i16timm $offset), (extract_glc $auxiliary),
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1283,9 +1275,9 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, timm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
- $vdata,
- (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (extract_glc $auxiliary),
+ getVregSrcForVT<vt>.ret:$vdata,
+ (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary),
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1303,6 +1295,7 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4i32, "BUFFER_STORE_FORMA
let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i32, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
@@ -1310,6 +1303,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in {
let SubtargetPredicate = HasPackedD16VMem in {
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i16, "BUFFER_STORE_FORMAT_D16_X">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i32, "BUFFER_STORE_FORMAT_D16_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2f16, "BUFFER_STORE_FORMAT_D16_XY">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i16, "BUFFER_STORE_FORMAT_D16_XY">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZW">;
@@ -1338,37 +1332,37 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_short, i32, "BUFFER_STORE_SHORT">;
multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (vt (name vt:$vdata_in, v4i32:$rsrc, 0,
- 0, i32:$soffset, timm:$offset,
- timm:$cachepolicy, 0)),
- (!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
- (as_i16imm $offset), (extract_slc $cachepolicy))
+ (vt (name vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset,
+ timm:$offset, timm:$cachepolicy, 0)),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN)
+ getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
+ (as_i16timm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
- (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- 0, i32:$soffset, timm:$offset,
- timm:$cachepolicy, timm)),
- (!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (extract_slc $cachepolicy))
+ (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset,
+ timm:$offset, timm:$cachepolicy, timm)),
+ (!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in,
+ VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
+ (as_i16timm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
- (vt (name vt:$vdata_in, v4i32:$rsrc, 0,
- i32:$voffset, i32:$soffset, timm:$offset,
- timm:$cachepolicy, 0)),
- (!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (extract_slc $cachepolicy))
+ (vt (name vt:$vdata_in, v4i32:$rsrc, 0, i32:$voffset,
+ i32:$soffset, timm:$offset, timm:$cachepolicy, 0)),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in,
+ VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
+ (as_i16timm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
- (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- i32:$voffset, i32:$soffset, timm:$offset,
- timm:$cachepolicy, timm)),
+ (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, i32:$voffset,
+ i32:$soffset, timm:$offset, timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
- $vdata_in,
- (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy))
+ getVregSrcForVT<vt>.ret:$vdata_in,
+ (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (extract_slc $cachepolicy))
>;
}
@@ -1384,6 +1378,7 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_or, i32, "BUFFER_ATOMIC_OR">;
defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i32, "BUFFER_ATOMIC_XOR">;
defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i32, "BUFFER_ATOMIC_INC">;
defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i32, "BUFFER_ATOMIC_DEC">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_csub, i32, "BUFFER_ATOMIC_CSUB">;
defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i64, "BUFFER_ATOMIC_SWAP_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_add, i64, "BUFFER_ATOMIC_ADD_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i64, "BUFFER_ATOMIC_SUB_X2">;
@@ -1434,19 +1429,20 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
>;
}
+let SubtargetPredicate = HasAtomicFaddInsts in {
defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">;
defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">;
+}
def : GCNPat<
(SIbuffer_atomic_cmpswap
- i32:$data, i32:$cmp, v4i32:$rsrc, 0,
- 0, i32:$soffset, timm:$offset,
- timm:$cachepolicy, 0),
+ i32:$data, i32:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset,
+ timm:$offset, timm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
- (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
- sub0)
+ (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (extract_slc $cachepolicy)), sub0)
>;
def : GCNPat<
@@ -1456,8 +1452,8 @@ def : GCNPat<
timm:$cachepolicy, timm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
- (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
- $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
+ (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
+ VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)),
sub0)
>;
@@ -1468,8 +1464,8 @@ def : GCNPat<
timm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
- (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
- $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
+ (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
+ VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)),
sub0)
>;
@@ -1480,9 +1476,9 @@ def : GCNPat<
timm:$cachepolicy, timm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
- (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
- (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)),
+ (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
+ (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)),
sub0)
>;
@@ -1584,7 +1580,7 @@ defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET,
defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>;
foreach vt = Reg32Types.types in {
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, vt, load_private>;
}
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX3_OFFEN, BUFFER_LOAD_DWORDX3_OFFSET, v3i32, load_private>;
@@ -1692,8 +1688,8 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0)),
- (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $format),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1701,8 +1697,8 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, timm)),
- (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $format),
+ (!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1710,8 +1706,8 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0)),
- (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $format),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1720,9 +1716,9 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, timm)),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
- (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $format),
+ (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1739,12 +1735,14 @@ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">
let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
+ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XY_gfx80">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4i32, "TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X">;
+ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2f16, "TBUFFER_LOAD_FORMAT_D16_XY">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4f16, "TBUFFER_LOAD_FORMAT_D16_XYZW">;
} // End HasPackedD16VMem.
@@ -1754,8 +1752,8 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0),
- (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $format),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset,
+ (as_i16timm $offset), (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1763,8 +1761,8 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, timm),
- (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $format),
+ (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
+ (as_i16timm $offset), (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1772,8 +1770,8 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0),
- (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $format),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
+ (as_i16timm $offset), (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1782,9 +1780,9 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
timm:$offset, timm:$format, timm:$auxiliary, timm),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
- $vdata,
- (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
+ getVregSrcForVT<vt>.ret:$vdata,
+ (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary))
>;
@@ -1801,12 +1799,14 @@ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZ
let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
+ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XY_gfx80">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4i32, "TBUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
let SubtargetPredicate = HasPackedD16VMem in {
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X">;
+ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2f16, "TBUFFER_STORE_FORMAT_D16_XY">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4f16, "TBUFFER_STORE_FORMAT_D16_XYZW">;
} // End HasPackedD16VMem.
@@ -1888,8 +1888,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">;
}
- multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
- MUBUF_Real_AllAddr_gfx10<op> {
+ multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> {
def _BOTHEN_RTN_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
def _IDXEN_RTN_gfx10 :
@@ -1899,6 +1898,8 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
def _OFFSET_RTN_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
}
+ multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
+ MUBUF_Real_AllAddr_gfx10<op>, MUBUF_Real_Atomics_RTN_gfx10<op>;
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
@@ -2063,6 +2064,8 @@ defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
+defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_RTN_gfx10<0x034>;
+
defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>;
defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;
def BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/CaymanInstructions.td
index e2978624811d..f4ddbf1131c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/CaymanInstructions.td
@@ -57,11 +57,12 @@ def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
// RECIP_UINT emulation for Cayman
-// The multiplication scales from [0,1] to the unsigned integer range
+// The multiplication scales from [0,1) to the unsigned integer range,
+// rounding down a bit to avoid unwanted overflow.
def : R600Pat <
(AMDGPUurecip i32:$src0),
(FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
- (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
+ (MOV_IMM_I32 CONST.FP_4294966784)))
>;
def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
index fe7faca8b157..beb01b1abf0f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1,4 +1,4 @@
-//===-- DSInstructions.td - DS Instruction Defintions ---------------------===//
+//===-- DSInstructions.td - DS Instruction Definitions --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -388,7 +388,12 @@ defm DS_MAX_U32 : DS_1A1D_NORET_mc<"ds_max_u32">;
defm DS_AND_B32 : DS_1A1D_NORET_mc<"ds_and_b32">;
defm DS_OR_B32 : DS_1A1D_NORET_mc<"ds_or_b32">;
defm DS_XOR_B32 : DS_1A1D_NORET_mc<"ds_xor_b32">;
+
+let SubtargetPredicate = HasLDSFPAtomics in {
defm DS_ADD_F32 : DS_1A1D_NORET_mc<"ds_add_f32">;
+}
+
+// FIXME: Are these really present pre-gfx8?
defm DS_MIN_F32 : DS_1A1D_NORET_mc<"ds_min_f32">;
defm DS_MAX_F32 : DS_1A1D_NORET_mc<"ds_max_f32">;
@@ -443,7 +448,10 @@ defm DS_MIN_F64 : DS_1A1D_NORET_mc<"ds_min_f64", VReg_64>;
defm DS_MAX_F64 : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>;
defm DS_ADD_RTN_U32 : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
+
+let SubtargetPredicate = HasLDSFPAtomics in {
defm DS_ADD_RTN_F32 : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32, "ds_add_f32">;
+}
defm DS_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
defm DS_RSUB_RTN_U32 : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
defm DS_INC_RTN_U32 : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
@@ -497,6 +505,7 @@ def DS_GWS_SEMA_P : DS_GWS_0D<"ds_gws_sema_p">;
def DS_GWS_BARRIER : DS_GWS_1D<"ds_gws_barrier">;
}
+let SubtargetPredicate = HasDsSrc2Insts in {
def DS_ADD_SRC2_U32 : DS_1A<"ds_add_src2_u32">;
def DS_SUB_SRC2_U32 : DS_1A<"ds_sub_src2_u32">;
def DS_RSUB_SRC2_U32 : DS_1A<"ds_rsub_src2_u32">;
@@ -529,6 +538,7 @@ def DS_MAX_SRC2_F64 : DS_1A<"ds_max_src2_f64">;
def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
+} // End SubtargetPredicate = HasDsSrc2Insts
let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, SwizzleImm>;
@@ -609,10 +619,12 @@ def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
int_amdgcn_ds_bpermute>;
}
-def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
-
} // let SubtargetPredicate = isGFX8Plus
+let SubtargetPredicate = HasLDSFPAtomics, OtherPredicates = [HasDsSrc2Insts] in {
+def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
+}
+
//===----------------------------------------------------------------------===//
// DS Patterns
//===----------------------------------------------------------------------===//
@@ -725,7 +737,7 @@ defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
-foreach vt = VGPR_32.RegTypes in {
+foreach vt = Reg32Types.types in {
defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
}
@@ -737,31 +749,35 @@ def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_hi16_local>;
}
-
-class DS64Bit4ByteAlignedReadPat<DS_Pseudo inst, PatFrag frag> : GCNPat <
- (v2i32 (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
+class DS64Bit4ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+ (vt:$value (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
(inst $ptr, $offset0, $offset1, (i1 0))
>;
-class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, PatFrag frag> : GCNPat<
- (frag v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
- (inst $ptr, (i32 (EXTRACT_SUBREG $value, sub0)),
- (i32 (EXTRACT_SUBREG $value, sub1)), $offset0, $offset1,
+class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
+ (frag vt:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
+ (inst $ptr, (i32 (EXTRACT_SUBREG VReg_64:$value, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$value, sub1)), $offset0, $offset1,
(i1 0))
>;
-// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
-// related to bounds checking.
-let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
-def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, load_local_m0>;
-def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, store_local_m0>;
-}
+multiclass DS64Bit4ByteAlignedPat_mc<ValueType vt> {
+ let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
+ def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, vt, load_local_m0>;
+ def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, vt, store_local_m0>;
+ }
-let OtherPredicates = [NotLDSRequiresM0Init] in {
-def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32_gfx9, load_local>;
-def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, store_local>;
+ let OtherPredicates = [NotLDSRequiresM0Init] in {
+ def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32_gfx9, vt, load_local>;
+ def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, vt, store_local>;
+ }
}
+// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
+// related to bounds checking.
+foreach vt = VReg_64.RegTypes in {
+defm : DS64Bit4ByteAlignedPat_mc<vt>;
+}
let AddedComplexity = 100 in {
@@ -826,9 +842,12 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max">;
defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax">;
defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">;
+
+let SubtargetPredicate = HasLDSFPAtomics in {
defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax">;
defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd">;
+}
// 64-bit atomics.
defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 419513bdc248..9c2f2e7eecd1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -18,7 +18,6 @@
#include "Disassembler/AMDGPUDisassembler.h"
#include "AMDGPU.h"
-#include "AMDGPURegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
@@ -101,6 +100,18 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
return addOperand(Inst, MCOperand::createImm(Imm));
}
+static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ int64_t Offset;
+ if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
+ Offset = Imm & 0xFFFFF;
+ } else { // GFX9+ supports 21-bit signed offsets.
+ Offset = SignExtend64<21>(Imm);
+ }
+ return addOperand(Inst, MCOperand::createImm(Offset));
+}
+
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val,
uint64_t Addr, const void *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
@@ -285,6 +296,18 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Bytes.size() >= 8) {
const uint64_t QW = eatBytes<uint64_t>(Bytes);
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
+ Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address);
+ if (Res) {
+ if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
+ == -1)
+ break;
+ if (convertDPP8Inst(MI) == MCDisassembler::Success)
+ break;
+ MI = MCInst(); // clear
+ }
+ }
+
Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
@@ -334,6 +357,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
if (Res) break;
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
+ Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address);
+ if (Res) break;
+ }
+
Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
if (Res) break;
@@ -351,13 +379,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
} while (false);
- if (Res && (MaxInstBytesNum - Bytes.size()) == 12 && (!HasLiteral ||
- !(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3))) {
- MaxInstBytesNum = 8;
- Bytes = Bytes_.slice(0, MaxInstBytesNum);
- eatBytes<uint64_t>(Bytes);
- }
-
if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
@@ -931,6 +952,7 @@ unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
return AGPR_32RegClassID;
case OPW64: return AReg_64RegClassID;
case OPW128: return AReg_128RegClassID;
+ case OPW256: return AReg_256RegClassID;
case OPW512: return AReg_512RegClassID;
case OPW1024: return AReg_1024RegClassID;
}
@@ -1202,8 +1224,6 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
raw_ostream &/*cStream*/, int64_t Value,
uint64_t /*Address*/, bool IsBranch,
uint64_t /*Offset*/, uint64_t /*InstSize*/) {
- using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>;
- using SectionSymbolsTy = std::vector<SymbolInfoTy>;
if (!IsBranch) {
return false;
@@ -1215,11 +1235,11 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
auto Result = std::find_if(Symbols->begin(), Symbols->end(),
[Value](const SymbolInfoTy& Val) {
- return std::get<0>(Val) == static_cast<uint64_t>(Value)
- && std::get<2>(Val) == ELF::STT_NOTYPE;
+ return Val.Addr == static_cast<uint64_t>(Value)
+ && Val.Type == ELF::STT_NOTYPE;
});
if (Result != Symbols->end()) {
- auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result));
+ auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
Inst.addOperand(MCOperand::createExpr(Add));
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 88e554ae0bcc..97104a242d8c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -69,11 +69,11 @@ multiclass RAT_ATOMIC<bits<6> op_ret, bits<6> op_noret, string name> {
def _RTN: CF_MEM_RAT <op_ret, 0, 0xf,
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
(outs R600_Reg128:$out_gpr),
- name ## "_RTN" ## " $rw_gpr, $index_gpr", [] >;
+ name # "_RTN" # " $rw_gpr, $index_gpr", [] >;
def _NORET: CF_MEM_RAT <op_noret, 0, 0xf,
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
(outs R600_Reg128:$out_gpr),
- name ## " $rw_gpr, $index_gpr", [] >;
+ name # " $rw_gpr, $index_gpr", [] >;
}
}
@@ -422,6 +422,7 @@ def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
def : UMad24Pat<MULADD_UINT24_eg>;
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
+def : FSHRPattern <BIT_ALIGN_INT_eg>;
def : ROTRPattern <BIT_ALIGN_INT_eg>;
def MULADD_eg : MULADD_Common<0x14>;
def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
@@ -571,7 +572,7 @@ class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
}
class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
- R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> {
+ R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name#"_RET", pattern, "OQAP, "> {
let BaseOp = name;
let usesCustomInserter = 1;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 2057cac346d4..69facada2e96 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1,4 +1,4 @@
-//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
+//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -100,7 +100,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
!if(ps.is_flat_scratch, 0b01, 0));
// Signed offset. Highest bit ignored for flat and treated as 12-bit
- // unsigned for flat acceses.
+ // unsigned for flat accesses.
bits<13> offset;
bits<1> nv = 0; // XXX - What does this actually do?
@@ -175,7 +175,7 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
}
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
- let is_flat_global = 1 in {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
@@ -183,8 +183,27 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
}
}
+class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
+ bit HasTiedOutput = 0, bit HasSignedOffset = 0> : FLAT_Pseudo<
+ opName,
+ (outs regClass:$vdst),
+ !con((ins SReg_64:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
+ !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
+ " $vdst, $saddr$offset$glc$slc$dlc"> {
+ let is_flat_global = 1;
+ let has_data = 0;
+ let mayLoad = 1;
+ let has_vaddr = 0;
+ let has_saddr = 1;
+ let enabled_saddr = 1;
+ let maybeAtomic = 1;
+
+ let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
+ let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
+}
+
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
- let is_flat_global = 1 in {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
GlobalSaddrTable<0, opName>;
def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
@@ -192,6 +211,24 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
}
}
+class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
+ bit HasSignedOffset = 0> : FLAT_Pseudo<
+ opName,
+ (outs),
+ !con(
+ (ins vdataClass:$vdata, SReg_64:$saddr),
+ (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+ " $vdata, $saddr$offset$glc$slc$dlc"> {
+ let is_flat_global = 1;
+ let mayLoad = 0;
+ let mayStore = 1;
+ let has_vdst = 0;
+ let has_vaddr = 0;
+ let has_saddr = 1;
+ let enabled_saddr = 1;
+ let maybeAtomic = 1;
+}
+
class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
bit EnableSaddr = 0>: FLAT_Pseudo<
opName,
@@ -279,6 +316,7 @@ multiclass FLAT_Atomic_Pseudo<
AtomicNoRet <opName, 0> {
let PseudoInstr = NAME;
let FPAtomic = isFP;
+ let AddedComplexity = -1; // Prefer global atomics if available
}
def _RTN : FLAT_AtomicRet_Pseudo <opName,
@@ -290,6 +328,7 @@ multiclass FLAT_Atomic_Pseudo<
GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1>{
let FPAtomic = isFP;
+ let AddedComplexity = -1; // Prefer global atomics if available
}
}
@@ -367,10 +406,12 @@ multiclass FLAT_Global_Atomic_Pseudo<
SDPatternOperator atomic_rtn = null_frag,
SDPatternOperator atomic_no_rtn = null_frag,
ValueType data_vt = vt,
- RegisterClass data_rc = vdst_rc> :
- FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>,
- FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
-
+ RegisterClass data_rc = vdst_rc> {
+ let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
+ defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>;
+ defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
+ }
+}
//===----------------------------------------------------------------------===//
// Flat Instructions
@@ -507,7 +548,6 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
} // End SubtargetPredicate = isGFX7GFX10
-let SubtargetPredicate = HasFlatGlobalInsts in {
defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
@@ -523,6 +563,8 @@ defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16"
defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
+let OtherPredicates = [HasGFX10_BEncoding] in
+def GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
@@ -530,6 +572,8 @@ defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR
defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
+let OtherPredicates = [HasGFX10_BEncoding] in
+def GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
@@ -615,9 +659,12 @@ defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
VReg_64, i64, atomic_dec_global_64>;
+
+let SubtargetPredicate = HasGFX10_BEncoding in
+defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
+ VGPR_32, i32, atomic_csub_global_32>;
} // End is_flat_global = 1
-} // End SubtargetPredicate = HasFlatGlobalInsts
let SubtargetPredicate = HasFlatScratchInsts in {
@@ -912,6 +959,7 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CSUB_RTN, atomic_csub_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
@@ -1212,6 +1260,9 @@ multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
FLAT_Real_RTN_gfx10<op>,
FLAT_Real_SADDR_RTN_gfx10<op>;
+multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> :
+ FLAT_Real_RTN_gfx10<op>,
+ FLAT_Real_SADDR_RTN_gfx10<op>;
// ENC_FLAT.
defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>;
@@ -1297,6 +1348,7 @@ defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>;
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>;
defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>;
defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>;
+defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>;
defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>;
defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>;
defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>;
@@ -1325,7 +1377,8 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>;
defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>;
defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>;
defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
-
+defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x016>;
+defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x017>;
// ENC_FLAT_SCRATCH.
defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 10e2c3a263f1..719a968b8314 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -105,6 +105,11 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA);
+ }
+
private:
int getDPPOp(unsigned Op) const;
};
@@ -168,7 +173,9 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
}
auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
- OrigMI.getDebugLoc(), TII->get(DPPOp));
+ OrigMI.getDebugLoc(), TII->get(DPPOp))
+ .setMIFlags(OrigMI.getFlags());
+
bool Fail = false;
do {
auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
@@ -506,15 +513,32 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
break;
}
+ auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
+ auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
+ if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1]
+ LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
+ break;
+ }
+
+ assert(Src0 && "Src1 without Src0?");
+ if (Src1 && Src1->isIdenticalTo(*Src0)) {
+ assert(Src1->isReg());
+ LLVM_DEBUG(
+ dbgs()
+ << " " << OrigMI
+ << " failed: DPP register is used more than once per instruction\n");
+ break;
+ }
+
LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
- if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
+ if (Use == Src0) {
if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
OldOpndValue, CombBCZ)) {
DPPMIs.push_back(DPPInst);
Rollback = false;
}
- } else if (OrigMI.isCommutable() &&
- Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+ } else {
+ assert(Use == Src1 && OrigMI.isCommutable()); // by check [1]
auto *BB = OrigMI.getParent();
auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
BB->insert(OrigMI, NewMI);
@@ -528,8 +552,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
} else
LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
NewMI->eraseFromParent();
- } else
- LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
+ }
if (Rollback)
break;
OrigMIs.push_back(&OrigMI);
@@ -562,8 +585,6 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
TII = ST.getInstrInfo();
- assert(MRI->isSSA() && "Must be run on SSA");
-
bool Changed = false;
for (auto &MBB : MF) {
for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 3ef5a77af45e..8482dbfec250 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -228,11 +228,6 @@ void GCNHazardRecognizer::processBundle() {
CurrCycleInstr = nullptr;
}
-unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
- IsHazardRecognizerMode = false;
- return PreEmitNoopsCommon(SU->getInstr());
-}
-
unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
IsHazardRecognizerMode = true;
CurrCycleInstr = MI;
@@ -486,6 +481,14 @@ void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
addRegsToSet(TRI, MI.uses(), ClauseUses);
}
+static bool breaksSMEMSoftClause(MachineInstr *MI) {
+ return !SIInstrInfo::isSMRD(*MI);
+}
+
+static bool breaksVMEMSoftClause(MachineInstr *MI) {
+ return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI);
+}
+
int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
// SMEM soft clause are only present on VI+, and only matter if xnack is
// enabled.
@@ -512,7 +515,7 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
if (!MI)
break;
- if (IsSMRD != SIInstrInfo::isSMRD(*MI))
+ if (IsSMRD ? breaksSMEMSoftClause(MI) : breaksVMEMSoftClause(MI))
break;
addClauseInst(*MI);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 6aa2e70dfbfb..cd17f2755bd1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -105,7 +105,6 @@ public:
void EmitInstruction(MachineInstr *MI) override;
HazardType getHazardType(SUnit *SU, int Stalls) override;
void EmitNoop() override;
- unsigned PreEmitNoops(SUnit *SU) override;
unsigned PreEmitNoops(MachineInstr *) override;
unsigned PreEmitNoopsCommon(MachineInstr *);
void AdvanceCycle() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 90ab6a14ce20..75a02c839034 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -5,6 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the class GCNIterativeScheduler.
+///
+//===----------------------------------------------------------------------===//
#include "GCNIterativeScheduler.h"
#include "AMDGPUSubtarget.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h
index e6f83914af5b..a0d4f432aa48 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.h
@@ -5,6 +5,14 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the class GCNIterativeScheduler, which uses an iterative
+/// approach to find a best schedule for GCN architecture. It basically makes
+/// use of various lightweight schedules, scores them, chooses best one based on
+/// their scores, and finally implements the chosen one.
+///
+//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
#define LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index c469cf290e26..884b2e17289c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -5,6 +5,13 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines and imlements the class GCNMinRegScheduler, which
+/// implements an experimental, simple scheduler whose main goal is to learn
+/// ways about consuming less possible registers for a region.
+///
+//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -207,9 +214,8 @@ void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) {
LLVM_DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum
<< ")'s non-ready successors of " << Priority
<< " priority in ready queue: ");
- const auto SetEnd = Set.end();
for (auto &C : RQ) {
- if (Set.find(C.SU) != SetEnd) {
+ if (Set.count(C.SU)) {
C.Priority = Priority;
LLVM_DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')');
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index f6023f3a40a2..57346087d017 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -286,8 +286,15 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
}
Intervals.push_back(LI);
OrigRegs.push_back(VRM->getPhys(Reg));
- MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
- MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
+ if (LI->empty()) {
+ // The address input is undef, so it doesn't contribute to the relevant
+ // range. Seed a reasonable index range if required.
+ if (I == 0)
+ MinInd = MaxInd = LIS->getInstructionIndex(*MI);
+ continue;
+ }
+ MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
+ MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
}
if (Intervals.empty())
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td
index b926041afb2f..17e6098d880d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -183,3 +183,7 @@ def : ProcessorModel<"gfx1011", GFX10SpeedModel,
def : ProcessorModel<"gfx1012", GFX10SpeedModel,
FeatureISAVersion10_1_2.Features
>;
+
+def : ProcessorModel<"gfx1030", GFX10SpeedModel,
+ FeatureISAVersion10_3_0.Features
+>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
index 76593bc0e5ac..98d971630ca4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
@@ -168,13 +168,15 @@ private:
// 8 banks for SGPRs.
// Registers already processed and recorded in RegsUsed are excluded.
// If Bank is not -1 assume Reg:SubReg to belong to that Bank.
- unsigned getRegBankMask(unsigned Reg, unsigned SubReg, int Bank);
+ uint32_t getRegBankMask(unsigned Reg, unsigned SubReg, int Bank);
- // Return number of stalls in the instructions.
- // UsedBanks has bits set for the banks used by all operands.
- // If Reg and Bank provided substitute the Reg with the Bank.
- unsigned analyzeInst(const MachineInstr& MI, unsigned& UsedBanks,
- unsigned Reg = AMDGPU::NoRegister, int Bank = -1);
+ // Analyze one instruction returning the number of stalls and a mask of the
+ // banks used by all operands.
+ // If Reg and Bank are provided, assume all uses of Reg will be replaced with
+ // a register chosen from Bank.
+ std::pair<unsigned, unsigned> analyzeInst(const MachineInstr &MI,
+ unsigned Reg = AMDGPU::NoRegister,
+ int Bank = -1);
// Return true if register is regular VGPR or SGPR or their tuples.
// Returns false for special registers like m0, vcc etc.
@@ -280,7 +282,9 @@ unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
unsigned Size = TRI->getRegSizeInBits(*RC);
- if (Size > 32)
+ if (Size == 16)
+ Reg = TRI->get32BitRegister(Reg);
+ else if (Size > 32)
Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
if (TRI->hasVGPRs(RC)) {
@@ -292,7 +296,7 @@ unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const {
return Reg % NUM_SGPR_BANKS + SGPR_BANK_OFFSET;
}
-unsigned GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
+uint32_t GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
int Bank) {
if (Register::isVirtualRegister(Reg)) {
if (!VRM->isAssignedReg(Reg))
@@ -306,14 +310,21 @@ unsigned GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
}
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- unsigned Size = TRI->getRegSizeInBits(*RC) / 32;
- if (Size > 1)
- Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+
+ if (Size == 16) {
+ Reg = TRI->get32BitRegister(Reg);
+ Size = 1;
+ } else {
+ Size /= 32;
+ if (Size > 1)
+ Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
+ }
if (TRI->hasVGPRs(RC)) {
// VGPRs have 4 banks assigned in a round-robin fashion.
Reg -= AMDGPU::VGPR0;
- unsigned Mask = (1 << Size) - 1;
+ uint32_t Mask = maskTrailingOnes<uint32_t>(Size);
unsigned Used = 0;
// Bitmask lacks an extract method
for (unsigned I = 0; I < Size; ++I)
@@ -321,7 +332,7 @@ unsigned GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
Used |= 1 << I;
RegsUsed.set(Reg, Reg + Size);
Mask &= ~Used;
- Mask <<= (Bank == -1) ? Reg % NUM_VGPR_BANKS : unsigned(Bank);
+ Mask <<= (Bank == -1) ? Reg % NUM_VGPR_BANKS : uint32_t(Bank);
return (Mask | (Mask >> NUM_VGPR_BANKS)) & VGPR_BANK_MASK;
}
@@ -347,15 +358,14 @@ unsigned GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
return Mask << SGPR_BANK_OFFSET;
}
-unsigned GCNRegBankReassign::analyzeInst(const MachineInstr& MI,
- unsigned& UsedBanks,
- unsigned Reg,
- int Bank) {
+std::pair<unsigned, unsigned>
+GCNRegBankReassign::analyzeInst(const MachineInstr &MI, unsigned Reg,
+ int Bank) {
unsigned StallCycles = 0;
- UsedBanks = 0;
+ unsigned UsedBanks = 0;
if (MI.isDebugValue())
- return 0;
+ return std::make_pair(StallCycles, UsedBanks);
RegsUsed.reset();
OperandMasks.clear();
@@ -372,30 +382,30 @@ unsigned GCNRegBankReassign::analyzeInst(const MachineInstr& MI,
unsigned ShiftedBank = Bank;
if (Bank != -1 && R == Reg && Op.getSubReg()) {
- unsigned LM = TRI->getSubRegIndexLaneMask(Op.getSubReg()).getAsInteger();
- if (!(LM & 1) && (Bank < NUM_VGPR_BANKS)) {
+ unsigned Offset = TRI->getChannelFromSubReg(Op.getSubReg());
+ LaneBitmask LM = TRI->getSubRegIndexLaneMask(Op.getSubReg());
+ if (Offset && Bank < NUM_VGPR_BANKS) {
// If a register spans all banks we cannot shift it to avoid conflict.
- if (countPopulation(LM) >= NUM_VGPR_BANKS)
+ if (TRI->getNumCoveredRegs(LM) >= NUM_VGPR_BANKS)
continue;
- ShiftedBank = (Bank + countTrailingZeros(LM)) % NUM_VGPR_BANKS;
- } else if (!(LM & 3) && (Bank >= SGPR_BANK_OFFSET)) {
+ ShiftedBank = (Bank + Offset) % NUM_VGPR_BANKS;
+ } else if (Offset > 1 && Bank >= SGPR_BANK_OFFSET) {
// If a register spans all banks we cannot shift it to avoid conflict.
- if (countPopulation(LM) / 2 >= NUM_SGPR_BANKS)
+ if (TRI->getNumCoveredRegs(LM) / 2 >= NUM_SGPR_BANKS)
continue;
- ShiftedBank = SGPR_BANK_OFFSET + (Bank - SGPR_BANK_OFFSET +
- (countTrailingZeros(LM) >> 1)) %
- NUM_SGPR_BANKS;
+ ShiftedBank = SGPR_BANK_OFFSET +
+ (Bank - SGPR_BANK_OFFSET + (Offset >> 1)) % NUM_SGPR_BANKS;
}
}
- unsigned Mask = getRegBankMask(R, Op.getSubReg(),
+ uint32_t Mask = getRegBankMask(R, Op.getSubReg(),
(Reg == R) ? ShiftedBank : -1);
StallCycles += countPopulation(UsedBanks & Mask);
UsedBanks |= Mask;
OperandMasks.push_back(OperandMask(Op.getReg(), Op.getSubReg(), Mask));
}
- return StallCycles;
+ return std::make_pair(StallCycles, UsedBanks);
}
unsigned GCNRegBankReassign::getOperandGatherWeight(const MachineInstr& MI,
@@ -440,10 +450,19 @@ bool GCNRegBankReassign::isReassignable(unsigned Reg) const {
}
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+
+ // TODO: Support 16 bit registers. Those needs to be moved with their
+ // parent VGPR_32 and potentially a sibling 16 bit sub-register.
+ if (Size < 32)
+ return false;
+
if (TRI->hasVGPRs(RC))
return true;
- unsigned Size = TRI->getRegSizeInBits(*RC);
+ if (Size == 16)
+ return AMDGPU::SGPR_LO16RegClass.contains(PhysReg);
+
if (Size > 32)
PhysReg = TRI->getSubReg(PhysReg, AMDGPU::sub0);
@@ -496,16 +515,16 @@ unsigned GCNRegBankReassign::getFreeBanks(unsigned Reg,
unsigned FreeBanks = getFreeBanks(Mask, UsedBanks);
- unsigned LM = TRI->getSubRegIndexLaneMask(SubReg).getAsInteger();
- if (!(LM & 1) && (Mask & VGPR_BANK_MASK)) {
- unsigned Shift = countTrailingZeros(LM);
+ unsigned Offset = TRI->getChannelFromSubReg(SubReg);
+ if (Offset && (Mask & VGPR_BANK_MASK)) {
+ unsigned Shift = Offset;
if (Shift >= NUM_VGPR_BANKS)
return 0;
unsigned VB = FreeBanks & VGPR_BANK_MASK;
FreeBanks = ((VB >> Shift) | (VB << (NUM_VGPR_BANKS - Shift))) &
VGPR_BANK_MASK;
- } else if (!(LM & 3) && (Mask & SGPR_BANK_MASK)) {
- unsigned Shift = countTrailingZeros(LM) >> 1;
+ } else if (Offset > 1 && (Mask & SGPR_BANK_MASK)) {
+ unsigned Shift = Offset >> 1;
if (Shift >= NUM_SGPR_BANKS)
return 0;
unsigned SB = FreeBanks >> SGPR_BANK_OFFSET;
@@ -570,7 +589,6 @@ unsigned GCNRegBankReassign::computeStallCycles(unsigned SrcReg,
unsigned Reg, int Bank,
bool Collect) {
unsigned TotalStallCycles = 0;
- unsigned UsedBanks = 0;
SmallSet<const MachineInstr *, 16> Visited;
for (auto &MI : MRI->use_nodbg_instructions(SrcReg)) {
@@ -578,7 +596,9 @@ unsigned GCNRegBankReassign::computeStallCycles(unsigned SrcReg,
continue;
if (!Visited.insert(&MI).second)
continue;
- unsigned StallCycles = analyzeInst(MI, UsedBanks, Reg, Bank);
+ unsigned StallCycles;
+ unsigned UsedBanks;
+ std::tie(StallCycles, UsedBanks) = analyzeInst(MI, Reg, Bank);
TotalStallCycles += StallCycles;
if (Collect)
collectCandidates(MI, UsedBanks, StallCycles);
@@ -636,7 +656,11 @@ unsigned GCNRegBankReassign::tryReassign(Candidate &C) {
struct BankStall {
BankStall(unsigned b, unsigned s) : Bank(b), Stalls(s) {};
- bool operator< (const BankStall &RHS) const { return Stalls > RHS.Stalls; }
+ bool operator<(const BankStall &RHS) const {
+ if (Stalls == RHS.Stalls)
+ return Bank < RHS.Bank;
+ return Stalls > RHS.Stalls;
+ }
unsigned Bank;
unsigned Stalls;
};
@@ -653,7 +677,7 @@ unsigned GCNRegBankReassign::tryReassign(Candidate &C) {
}
}
}
- std::sort(BankStalls.begin(), BankStalls.end());
+ llvm::sort(BankStalls);
Register OrigReg = VRM->getPhys(C.Reg);
LRM->unassign(LI);
@@ -695,8 +719,9 @@ unsigned GCNRegBankReassign::collectCandidates(MachineFunction &MF,
if (MI.isBundle())
continue; // we analyze the instructions inside the bundle individually
- unsigned UsedBanks = 0;
- unsigned StallCycles = analyzeInst(MI, UsedBanks);
+ unsigned StallCycles;
+ unsigned UsedBanks;
+ std::tie(StallCycles, UsedBanks) = analyzeInst(MI);
if (Collect)
collectCandidates(MI, UsedBanks, StallCycles);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index d593204cba05..86a3cb9af32f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -5,6 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the GCNRegPressure class.
+///
+//===----------------------------------------------------------------------===//
#include "GCNRegPressure.h"
#include "AMDGPUSubtarget.h"
@@ -98,7 +103,8 @@ void GCNRegPressure::inc(unsigned Reg,
LaneBitmask PrevMask,
LaneBitmask NewMask,
const MachineRegisterInfo &MRI) {
- if (NewMask == PrevMask)
+ if (SIRegisterInfo::getNumCoveredRegs(NewMask) ==
+ SIRegisterInfo::getNumCoveredRegs(PrevMask))
return;
int Sign = 1;
@@ -106,25 +112,21 @@ void GCNRegPressure::inc(unsigned Reg,
std::swap(NewMask, PrevMask);
Sign = -1;
}
-#ifndef NDEBUG
- const auto MaxMask = MRI.getMaxLaneMaskForVReg(Reg);
-#endif
+
switch (auto Kind = getRegKind(Reg, MRI)) {
case SGPR32:
case VGPR32:
case AGPR32:
- assert(PrevMask.none() && NewMask == MaxMask);
Value[Kind] += Sign;
break;
case SGPR_TUPLE:
case VGPR_TUPLE:
case AGPR_TUPLE:
- assert(NewMask < MaxMask || NewMask == MaxMask);
assert(PrevMask < NewMask);
Value[Kind == SGPR_TUPLE ? SGPR32 : Kind == AGPR_TUPLE ? AGPR32 : VGPR32] +=
- Sign * (~PrevMask & NewMask).getNumLanes();
+ Sign * SIRegisterInfo::getNumCoveredRegs(~PrevMask & NewMask);
if (PrevMask.none()) {
assert(NewMask.any());
@@ -216,7 +218,7 @@ static LaneBitmask getUsedRegMask(const MachineOperand &MO,
return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg);
auto MaxMask = MRI.getMaxLaneMaskForVReg(MO.getReg());
- if (MaxMask == LaneBitmask::getLane(0)) // cannot have subregs
+ if (SIRegisterInfo::getNumCoveredRegs(MaxMask) > 1) // cannot have subregs
return MaxMask;
// For a tentative schedule LIS isn't updated yet but livemask should remain
@@ -327,8 +329,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
// update max pressure
MaxPressure = max(AtMIPressure, MaxPressure);
- for (const auto &MO : MI.defs()) {
- if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()) || MO.isDead())
+ for (const auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef() ||
+ !Register::isVirtualRegister(MO.getReg()) || MO.isDead())
continue;
auto Reg = MO.getReg();
@@ -403,8 +406,8 @@ void GCNDownwardRPTracker::advanceToNext() {
LastTrackedMI = &*NextMI++;
// Add new registers or mask bits.
- for (const auto &MO : LastTrackedMI->defs()) {
- if (!MO.isReg())
+ for (const auto &MO : LastTrackedMI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
if (!Register::isVirtualRegister(Reg))
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 5862cdb04166..2ef79410719f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -5,6 +5,14 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the GCNRegPressure class, which tracks registry pressure
+/// by bookkeeping number of SGPR/VGPRs used, weights for large SGPR/VGPRs. It
+/// also implements a compare function, which compares different register
+/// pressures, and declares one with max occupance as winner.
+///
+//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
#define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
@@ -208,7 +216,7 @@ getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
auto SI = SII.getInstructionIndex(*I);
Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex());
}
- std::sort(Indexes.begin(), Indexes.end());
+ llvm::sort(Indexes);
auto &MRI = (*R.begin())->getParent()->getParent()->getRegInfo();
DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> LiveRegMap;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index e109eed5f607..deed50b6db7d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -50,9 +50,9 @@ void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
} else {
SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
- SRI->getSGPRPressureSet());
+ AMDGPU::RegisterPressureSets::SReg_32);
VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
- SRI->getVGPRPressureSet());
+ AMDGPU::RegisterPressureSets::VGPR_32);
}
SGPRCriticalLimit -= ErrorMargin;
@@ -83,8 +83,8 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
}
- unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
- unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
+ unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
// If two instructions increase the pressure of different register sets
// by the same amount, the generic scheduler will prefer to schedule the
@@ -109,12 +109,12 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// marked as RegExcess in tryCandidate() when they are compared with
// instructions that increase the register pressure.
if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
- Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
+ Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
}
if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
- Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
+ Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
}
@@ -128,10 +128,12 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
if (SGPRDelta >= 0 || VGPRDelta >= 0) {
if (SGPRDelta > VGPRDelta) {
- Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
+ Cand.RPDelta.CriticalMax =
+ PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
} else {
- Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
+ Cand.RPDelta.CriticalMax =
+ PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
}
}
@@ -145,8 +147,8 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
SchedCandidate &Cand) {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
- unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
- unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
+ unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
@@ -231,33 +233,11 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
// Pick best from BotCand and TopCand.
LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
dbgs() << "Bot Cand: "; traceCandidate(BotCand););
- SchedCandidate Cand;
- if (TopCand.Reason == BotCand.Reason) {
- Cand = BotCand;
- GenericSchedulerBase::CandReason TopReason = TopCand.Reason;
- TopCand.Reason = NoCand;
- GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
- if (TopCand.Reason != NoCand) {
- Cand.setBest(TopCand);
- } else {
- TopCand.Reason = TopReason;
- }
- } else {
- if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) {
- Cand = TopCand;
- } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) {
- Cand = BotCand;
- } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
- Cand = TopCand;
- } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
- Cand = BotCand;
- } else {
- if (BotCand.Reason > TopCand.Reason) {
- Cand = TopCand;
- } else {
- Cand = BotCand;
- }
- }
+ SchedCandidate Cand = BotCand;
+ TopCand.Reason = NoCand;
+ GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
+ if (TopCand.Reason != NoCand) {
+ Cand.setBest(TopCand);
}
LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
@@ -316,13 +296,13 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
ST(MF.getSubtarget<GCNSubtarget>()),
MFI(*MF.getInfo<SIMachineFunctionInfo>()),
StartingOccupancy(MFI.getOccupancy()),
- MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) {
+ MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) {
LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
}
void GCNScheduleDAGMILive::schedule() {
- if (Stage == 0) {
+ if (Stage == Collect) {
// Just record regions at the first pass.
Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
return;
@@ -348,6 +328,7 @@ void GCNScheduleDAGMILive::schedule() {
ScheduleDAGMILive::schedule();
Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
+ RescheduleRegions[RegionIdx] = false;
if (!LIS)
return;
@@ -389,20 +370,28 @@ void GCNScheduleDAGMILive::schedule() {
<< MinOccupancy << ".\n");
}
+ unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
+ unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
+ if (PressureAfter.getVGPRNum() > MaxVGPRs ||
+ PressureAfter.getSGPRNum() > MaxSGPRs)
+ RescheduleRegions[RegionIdx] = true;
+
if (WavesAfter >= MinOccupancy) {
- unsigned TotalVGPRs = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
- unsigned TotalSGPRs = AMDGPU::IsaInfo::getAddressableNumSGPRs(&ST);
- if (WavesAfter > MFI.getMinWavesPerEU() ||
+ if (Stage == UnclusteredReschedule &&
+ !PressureAfter.less(ST, PressureBefore)) {
+ LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
+ } else if (WavesAfter > MFI.getMinWavesPerEU() ||
PressureAfter.less(ST, PressureBefore) ||
- (TotalVGPRs >= PressureAfter.getVGPRNum() &&
- TotalSGPRs >= PressureAfter.getSGPRNum())) {
+ !RescheduleRegions[RegionIdx]) {
Pressure[RegionIdx] = PressureAfter;
return;
+ } else {
+ LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
}
- LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
}
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
+ RescheduleRegions[RegionIdx] = true;
RegionEnd = RegionBegin;
for (MachineInstr *MI : Unsched) {
if (MI->isDebugInstr())
@@ -532,33 +521,55 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
LiveIns.resize(Regions.size());
Pressure.resize(Regions.size());
+ RescheduleRegions.resize(Regions.size());
+ RescheduleRegions.set();
if (!Regions.empty())
BBLiveInMap = getBBLiveInMap();
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
+
do {
Stage++;
RegionIdx = 0;
MachineBasicBlock *MBB = nullptr;
- if (Stage > 1) {
+ if (Stage > InitialSchedule) {
+ if (!LIS)
+ break;
+
// Retry function scheduling if we found resulting occupancy and it is
// lower than used for first pass scheduling. This will give more freedom
// to schedule low register pressure blocks.
// Code is partially copied from MachineSchedulerBase::scheduleRegions().
- if (!LIS || StartingOccupancy <= MinOccupancy)
- break;
+ if (Stage == UnclusteredReschedule) {
+ if (RescheduleRegions.none())
+ continue;
+ LLVM_DEBUG(dbgs() <<
+ "Retrying function scheduling without clustering.\n");
+ }
+
+ if (Stage == ClusteredLowOccupancyReschedule) {
+ if (StartingOccupancy <= MinOccupancy)
+ break;
- LLVM_DEBUG(
- dbgs()
- << "Retrying function scheduling with lowest recorded occupancy "
- << MinOccupancy << ".\n");
+ LLVM_DEBUG(
+ dbgs()
+ << "Retrying function scheduling with lowest recorded occupancy "
+ << MinOccupancy << ".\n");
- S.setTargetOccupancy(MinOccupancy);
+ S.setTargetOccupancy(MinOccupancy);
+ }
}
+ if (Stage == UnclusteredReschedule)
+ SavedMutations.swap(Mutations);
+
for (auto Region : Regions) {
+ if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx])
+ continue;
+
RegionBegin = Region.first;
RegionEnd = Region.second;
@@ -566,7 +577,7 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
if (MBB) finishBlock();
MBB = RegionBegin->getParent();
startBlock(MBB);
- if (Stage == 1)
+ if (Stage == InitialSchedule)
computeBlockPressure(MBB);
}
@@ -594,5 +605,7 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
}
finishBlock();
- } while (Stage < 2);
+ if (Stage == UnclusteredReschedule)
+ SavedMutations.swap(Mutations);
+ } while (Stage != LastStage);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index dd687a930c79..2d81d9977c31 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -64,6 +64,14 @@ public:
class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
+ enum : unsigned {
+ Collect,
+ InitialSchedule,
+ UnclusteredReschedule,
+ ClusteredLowOccupancyReschedule,
+ LastStage = ClusteredLowOccupancyReschedule
+ };
+
const GCNSubtarget &ST;
SIMachineFunctionInfo &MFI;
@@ -84,6 +92,10 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
SmallVector<std::pair<MachineBasicBlock::iterator,
MachineBasicBlock::iterator>, 32> Regions;
+ // Records if a region is not yet scheduled, or schedule has been reverted,
+ // or we generally desire to reschedule it.
+ BitVector RescheduleRegions;
+
// Region live-in cache.
SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 1f94ab799122..ea6e9038fd1e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -39,8 +40,8 @@ public:
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override;
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
bool mayNeedRelaxation(const MCInst &Inst,
const MCSubtargetInfo &STI) const override;
@@ -53,12 +54,13 @@ public:
} //End anonymous namespace
-void AMDGPUAsmBackend::relaxInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- MCInst &Res) const {
+void AMDGPUAsmBackend::relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
+ MCInst Res;
unsigned RelaxedOpcode = AMDGPU::getSOPPWithRelaxation(Inst.getOpcode());
Res.setOpcode(RelaxedOpcode);
Res.addOperand(Inst.getOperand(0));
+ Inst = std::move(Res);
return;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index d352219a7a98..619fde74e88d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -6,8 +6,10 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPUFixupKinds.h"
#include "AMDGPUMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
@@ -80,6 +82,15 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AMDGPU_ABS64;
}
+ if (Fixup.getTargetKind() == AMDGPU::fixup_si_sopp_br) {
+ const auto *SymA = Target.getSymA();
+ assert(SymA);
+
+ Ctx.reportError(Fixup.getLoc(),
+ Twine("undefined label '") + SymA->getSymbol().getName() + "'");
+ return ELF::R_AMDGPU_NONE;
+ }
+
llvm_unreachable("unhandled relocation type");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index f65dc25d7eec..fe063d33ea3e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -26,6 +27,28 @@
using namespace llvm;
using namespace llvm::AMDGPU;
+static cl::opt<bool> Keep16BitSuffixes(
+ "amdgpu-keep-16-bit-reg-suffixes",
+ cl::desc("Keep .l and .h suffixes in asm for debugging purposes"),
+ cl::init(false),
+ cl::ReallyHidden);
+
+void AMDGPUInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ // FIXME: The current implementation of
+ // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this
+ // as an integer or we provide a name which represents a physical register.
+ // For CFI instructions we really want to emit a name for the DWARF register
+ // instead, because there may be multiple DWARF registers corresponding to a
+ // single physical register. One case where this problem manifests is with
+ // wave32/wave64 where using the physical register name is ambiguous: if we
+ // write e.g. `.cfi_undefined v0` we lose information about the wavefront
+ // size which we need to encode the register in the final DWARF. Ideally we
+ // would extend MC to support parsing DWARF register names so we could do
+ // something like `.cfi_undefined dwarf_wave32_v0`. For now we just live with
+ // non-pretty DWARF register names in assembly text.
+ OS << RegNo;
+}
+
void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot, const MCSubtargetInfo &STI,
raw_ostream &OS) {
@@ -164,10 +187,10 @@ void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo,
printU32ImmOperand(MI, OpNo, STI, O);
}
-void AMDGPUInstPrinter::printSMRDOffset20(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- printU32ImmOperand(MI, OpNo, STI, O);
+ O << formatHex(MI->getOperand(OpNo).getImm());
}
void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
@@ -244,6 +267,11 @@ void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "r128");
}
+void AMDGPUInstPrinter::printGFX10A16(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ printNamedBit(MI, OpNo, O, "a16");
+}
+
void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "lwe");
@@ -287,7 +315,6 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
switch (RegNo) {
case AMDGPU::FP_REG:
case AMDGPU::SP_REG:
- case AMDGPU::SCRATCH_WAVE_OFFSET_REG:
case AMDGPU::PRIVATE_RSRC_REG:
llvm_unreachable("pseudo-register should not ever be emitted");
case AMDGPU::SCC:
@@ -297,7 +324,12 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
}
#endif
- O << getRegisterName(RegNo);
+ StringRef RegName(getRegisterName(RegNo));
+ if (!Keep16BitSuffixes)
+ if (!RegName.consume_back(".l"))
+ RegName.consume_back(".h");
+
+ O << RegName;
}
void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
@@ -346,11 +378,21 @@ void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo, STI, O);
}
+void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ int16_t SImm = static_cast<int16_t>(Imm);
+ if (isInlinableIntLiteral(SImm))
+ O << SImm;
+ else
+ O << formatHex(static_cast<uint64_t>(Imm));
+}
+
void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
int16_t SImm = static_cast<int16_t>(Imm);
- if (SImm >= -16 && SImm <= 64) {
+ if (isInlinableIntLiteral(SImm)) {
O << SImm;
return;
}
@@ -518,7 +560,8 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
printRegOperand(Op.getReg(), O, MRI);
} else if (Op.isImm()) {
- switch (Desc.OpInfo[OpNo].OperandType) {
+ const uint8_t OpTy = Desc.OpInfo[OpNo].OperandType;
+ switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
@@ -535,10 +578,12 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
printImmediate64(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
- case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_INT16:
+ printImmediateInt16(Op.getImm(), STI, O);
+ break;
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
printImmediate16(Op.getImm(), STI, O);
break;
@@ -549,11 +594,19 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
printImmediate32(Op.getImm(), STI, O);
break;
}
+
+ // Deal with 16-bit FP inline immediates not working.
+ if (OpTy == AMDGPU::OPERAND_REG_IMM_V2FP16) {
+ printImmediate16(static_cast<uint16_t>(Op.getImm()), STI, O);
+ break;
+ }
LLVM_FALLTHROUGH;
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ printImmediateInt16(static_cast<uint16_t>(Op.getImm()), STI, O);
+ break;
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
printImmediateV216(Op.getImm(), STI, O);
break;
case MCOI::OPERAND_UNKNOWN:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index ba53003e9041..6dfd23ea72e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -23,6 +23,7 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
//Autogenerated by tblgen
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
@@ -60,7 +61,7 @@ private:
raw_ostream &O);
void printSMRDOffset8(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printSMRDOffset20(const MCInst *MI, unsigned OpNo,
+ void printSMEMOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
@@ -86,6 +87,8 @@ private:
raw_ostream &O);
void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printGFX10A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printD16(const MCInst *MI, unsigned OpNo,
@@ -102,8 +105,12 @@ private:
raw_ostream &O);
void printVINTRPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediateIntV216(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
@@ -112,6 +119,10 @@ private:
raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ printOperand(MI, OpNum, STI, O);
+ }
void printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printOperandAndIntInputMods(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 9644e66fda4e..687cfef4559f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -43,6 +43,9 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT,
WeakRefDirective = ".weakref\t";
//===--- Dwarf Emission Directives -----------------------------------===//
SupportsDebugInformation = true;
+ DwarfRegNumForCFI = true;
+
+ UseIntegratedAssembler = false;
}
bool AMDGPUMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index 62757a707890..d7d8c8181b02 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -51,6 +51,12 @@ public:
return 0;
}
+ virtual unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return 0;
+ }
+
virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 9507836c64c2..7d3235efc59e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -61,7 +61,13 @@ static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) {
if (TT.getArch() == Triple::r600)
InitR600MCRegisterInfo(X, 0);
else
- InitAMDGPUMCRegisterInfo(X, 0);
+ InitAMDGPUMCRegisterInfo(X, AMDGPU::PC_REG);
+ return X;
+}
+
+MCRegisterInfo *llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAMDGPUMCRegisterInfo(X, AMDGPU::PC_REG, DwarfFlavour);
return X;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index 9754d31fee60..b9cdbc6502e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -33,6 +33,10 @@ class Target;
class Triple;
class raw_pwrite_stream;
+enum AMDGPUDwarfFlavour { Wave64 = 0, Wave32 = 1 };
+
+MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour);
+
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index fef665c2900e..3d202d7960d6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -43,7 +43,7 @@ using namespace llvm::AMDGPU::HSAMD;
bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
HSAMD::Metadata HSAMetadata;
- if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
+ if (HSAMD::fromString(std::string(HSAMetadataString), HSAMetadata))
return false;
return EmitHSAMetadata(HSAMetadata);
@@ -97,6 +97,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
}
@@ -148,6 +149,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
+ case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
@@ -210,9 +212,9 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
}
void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
- unsigned Align) {
- OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " << Align
- << '\n';
+ Align Alignment) {
+ OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
+ << Alignment.value() << '\n';
}
bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
@@ -393,9 +395,9 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
// AMDGPUTargetELFStreamer
//===----------------------------------------------------------------------===//
-AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
- MCStreamer &S, const MCSubtargetInfo &STI)
- : AMDGPUTargetStreamer(S), Streamer(S) {
+AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI)
+ : AMDGPUTargetStreamer(S), Streamer(S), Os(STI.getTargetTriple().getOS()) {
MCAssembler &MCA = getStreamer().getAssembler();
unsigned EFlags = MCA.getELFHeaderEFlags();
@@ -427,7 +429,7 @@ void AMDGPUTargetELFStreamer::finish() {
if (Blob.empty())
return;
EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
- [&](MCELFStreamer &OS) { OS.EmitBytes(Blob); });
+ [&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
}
void AMDGPUTargetELFStreamer::EmitNote(
@@ -438,16 +440,22 @@ void AMDGPUTargetELFStreamer::EmitNote(
auto NameSZ = Name.size() + 1;
+ unsigned NoteFlags = 0;
+ // TODO Apparently, this is currently needed for OpenCL as mentioned in
+ // https://reviews.llvm.org/D74995
+ if (Os == Triple::AMDHSA)
+ NoteFlags = ELF::SHF_ALLOC;
+
S.PushSection();
- S.SwitchSection(Context.getELFSection(
- ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
- S.EmitIntValue(NameSZ, 4); // namesz
- S.EmitValue(DescSZ, 4); // descz
- S.EmitIntValue(NoteType, 4); // type
- S.EmitBytes(Name); // name
- S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
+ S.SwitchSection(
+ Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
+ S.emitInt32(NameSZ); // namesz
+ S.emitValue(DescSZ, 4); // descz
+ S.emitInt32(NoteType); // type
+ S.emitBytes(Name); // name
+ S.emitValueToAlignment(4, 0, 1, 0); // padding 0
EmitDesc(S); // desc
- S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
+ S.emitValueToAlignment(4, 0, 1, 0); // padding 0
S.PopSection();
}
@@ -458,8 +466,8 @@ void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
- OS.EmitIntValue(Major, 4);
- OS.EmitIntValue(Minor, 4);
+ OS.emitInt32(Major);
+ OS.emitInt32(Minor);
});
}
@@ -478,15 +486,15 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
- OS.EmitIntValue(VendorNameSize, 2);
- OS.EmitIntValue(ArchNameSize, 2);
- OS.EmitIntValue(Major, 4);
- OS.EmitIntValue(Minor, 4);
- OS.EmitIntValue(Stepping, 4);
- OS.EmitBytes(VendorName);
- OS.EmitIntValue(0, 1); // NULL terminate VendorName
- OS.EmitBytes(ArchName);
- OS.EmitIntValue(0, 1); // NULL terminte ArchName
+ OS.emitInt16(VendorNameSize);
+ OS.emitInt16(ArchNameSize);
+ OS.emitInt32(Major);
+ OS.emitInt32(Minor);
+ OS.emitInt32(Stepping);
+ OS.emitBytes(VendorName);
+ OS.emitInt8(0); // NULL terminate VendorName
+ OS.emitBytes(ArchName);
+ OS.emitInt8(0); // NULL terminte ArchName
});
}
@@ -495,7 +503,7 @@ AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
MCStreamer &OS = getStreamer();
OS.PushSection();
- OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
+ OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
OS.PopSection();
}
@@ -507,9 +515,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
}
void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
- unsigned Align) {
- assert(isPowerOf2_32(Align));
-
+ Align Alignment) {
MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
SymbolELF->setType(ELF::STT_OBJECT);
@@ -518,7 +524,7 @@ void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
SymbolELF->setExternal(true);
}
- if (SymbolELF->declareCommon(Size, Align, true)) {
+ if (SymbolELF->declareCommon(Size, Alignment.value(), true)) {
report_fatal_error("Symbol: " + Symbol->getName() +
" redeclared as different type");
}
@@ -539,9 +545,9 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
[&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(IsaVersionString);
- OS.EmitLabel(DescEnd);
+ OS.emitLabel(DescBegin);
+ OS.emitBytes(IsaVersionString);
+ OS.emitLabel(DescEnd);
});
return true;
}
@@ -566,9 +572,9 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
[&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(HSAMetadataString);
- OS.EmitLabel(DescEnd);
+ OS.emitLabel(DescBegin);
+ OS.emitBytes(HSAMetadataString);
+ OS.emitLabel(DescEnd);
});
return true;
}
@@ -590,9 +596,9 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
[&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(HSAMetadataString);
- OS.EmitLabel(DescEnd);
+ OS.emitLabel(DescBegin);
+ OS.emitBytes(HSAMetadataString);
+ OS.emitLabel(DescEnd);
});
return true;
}
@@ -602,9 +608,9 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
MCStreamer &OS = getStreamer();
OS.PushSection();
- OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
+ OS.emitValueToAlignment(64, Encoded_s_code_end, 4);
for (unsigned I = 0; I < 48; ++I)
- OS.EmitIntValue(Encoded_s_code_end, 4);
+ OS.emitInt32(Encoded_s_code_end);
OS.PopSection();
return true;
}
@@ -637,22 +643,22 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
- Streamer.EmitLabel(KernelDescriptorSymbol);
- Streamer.EmitBytes(StringRef(
+ Streamer.emitLabel(KernelDescriptorSymbol);
+ Streamer.emitBytes(StringRef(
(const char*)&(KernelDescriptor),
offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)));
// FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
// expression being created is:
// (start of kernel code) - (start of kernel descriptor)
// It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
- Streamer.EmitValue(MCBinaryExpr::createSub(
+ Streamer.emitValue(MCBinaryExpr::createSub(
MCSymbolRefExpr::create(
KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
MCSymbolRefExpr::create(
KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
Context),
sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
- Streamer.EmitBytes(StringRef(
+ Streamer.emitBytes(StringRef(
(const char*)&(KernelDescriptor) +
offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) +
sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 683b3e363b9a..a19d4646deb2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -54,7 +54,7 @@ public:
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
virtual void emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
- unsigned Align) = 0;
+ Align Alignment) = 0;
/// \returns True on success, false on failure.
virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
@@ -110,7 +110,7 @@ public:
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
- void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override;
+ void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override;
/// \returns True on success, false on failure.
bool EmitISAVersion(StringRef IsaVersionString) override;
@@ -133,6 +133,7 @@ public:
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
MCStreamer &Streamer;
+ Triple::OSType Os;
void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType,
function_ref<void(MCELFStreamer &)> EmitDesc);
@@ -157,7 +158,7 @@ public:
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
- void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override;
+ void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override;
/// \returns True on success, false on failure.
bool EmitISAVersion(StringRef IsaVersionString) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 2f1f4e7a0392..f61470573050 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -47,7 +47,7 @@ public:
/// Encode the instruction and write it to the OS.
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ const MCSubtargetInfo &STI) const override;
/// \returns the encoding for an MCOperand.
uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index f8ec3c36f019..2cd6c3a81d2b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPURegisterInfo.h"
#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -71,6 +70,10 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
+ unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -105,6 +108,11 @@ static uint32_t getIntInlineImmEncoding(IntTy Imm) {
return 0;
}
+static uint32_t getLit16IntEncoding(uint16_t Val, const MCSubtargetInfo &STI) {
+ uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
+ return IntImm == 0 ? 255 : IntImm;
+}
+
static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) {
uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
if (IntImm != 0)
@@ -249,23 +257,27 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
// FIXME Is this correct? What do inline immediates do on SI for f16 src
// which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
-
case AMDGPU::OPERAND_REG_IMM_V2INT16:
- case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16: {
if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
+ if (OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
+ return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
LLVM_FALLTHROUGH;
+ }
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
uint16_t Lo16 = static_cast<uint16_t>(Imm);
uint32_t Encoding = getLit16Encoding(Lo16, STI);
@@ -359,6 +371,15 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
return getMachineOpValue(MI, MO, Fixups, STI);
}
+unsigned SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ auto Offset = MI.getOperand(OpNo).getImm();
+ // VI only supports 20-bit unsigned offsets.
+ assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset));
+ return Offset;
+}
+
unsigned
SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
@@ -419,7 +440,13 @@ SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
// instructions use acc[0:1] modifier bits to distinguish. These bits are
// encoded as a virtual 9th bit of the register for these operands.
if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg))
+ MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
Enc |= 512;
return Enc;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 4006a6205fb8..2bfc2d579533 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1,4 +1,4 @@
-//===-- MIMGInstructions.td - MIMG Instruction Defintions -----------------===//
+//===-- MIMGInstructions.td - MIMG Instruction Definitions ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -35,6 +35,7 @@ class MIMGBaseOpcode : PredicateControl {
bit Gather4 = 0;
bits<8> NumExtraArgs = 0;
bit Gradients = 0;
+ bit G16 = 0;
bit Coordinates = 1;
bit LodOrClampOrMip = 0;
bit HasD16 = 0;
@@ -47,9 +48,9 @@ def MIMGBaseOpcode : GenericEnum {
def MIMGBaseOpcodesTable : GenericTable {
let FilterClass = "MIMGBaseOpcode";
let CppTypeName = "MIMGBaseOpcodeInfo";
- let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler", "Gather4",
- "NumExtraArgs", "Gradients", "Coordinates", "LodOrClampOrMip",
- "HasD16"];
+ let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
+ "Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates",
+ "LodOrClampOrMip", "HasD16"];
GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
let PrimaryKey = ["BaseOpcode"];
@@ -117,6 +118,22 @@ def MIMGMIPMappingTable : GenericTable {
let PrimaryKeyName = "getMIMGMIPMappingInfo";
}
+class MIMGG16Mapping<MIMGBaseOpcode g, MIMGBaseOpcode g16> {
+ MIMGBaseOpcode G = g;
+ MIMGBaseOpcode G16 = g16;
+}
+
+def MIMGG16MappingTable : GenericTable {
+ let FilterClass = "MIMGG16Mapping";
+ let CppTypeName = "MIMGG16MappingInfo";
+ let Fields = ["G", "G16"];
+ GenericEnum TypeOf_G = MIMGBaseOpcode;
+ GenericEnum TypeOf_G16 = MIMGBaseOpcode;
+
+ let PrimaryKey = ["G"];
+ let PrimaryKeyName = "getMIMGG16MappingInfo";
+}
+
class MIMG_Base <dag outs, string dns = "">
: InstSI <outs, (ins), "", []> {
@@ -132,7 +149,6 @@ class MIMG_Base <dag outs, string dns = "">
let DecoderNamespace = dns;
let isAsmParserOnly = !if(!eq(dns,""), 1, 0);
- let usesCustomInserter = 1;
}
class MIMG <dag outs, string dns = "">
@@ -238,9 +254,9 @@ class MIMG_NoSampler_gfx10<int op, string opcode,
: MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -251,9 +267,9 @@ class MIMG_NoSampler_nsa_gfx10<int op, string opcode,
let InOperandList = !con(AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -331,9 +347,9 @@ class MIMG_Store_gfx10<int op, string opcode,
: MIMG_gfx10<op, (outs), dns> {
let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
- GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -345,9 +361,9 @@ class MIMG_Store_nsa_gfx10<int op, string opcode,
AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
- let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -436,8 +452,8 @@ class MIMG_Atomic_gfx10<mimg op, string opcode,
let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
- GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe);
- let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
+ GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe);
+ let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
}
class MIMG_Atomic_nsa_gfx10<mimg op, string opcode,
@@ -452,8 +468,8 @@ class MIMG_Atomic_nsa_gfx10<mimg op, string opcode,
AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe));
- let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
+ SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
}
multiclass MIMG_Atomic_Addr_Helper_m <mimg op, string asm,
@@ -522,10 +538,10 @@ class MIMG_Sampler_gfx10<int op, string opcode,
: MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
- GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
- #"$dlc$glc$slc$r128$tfe$lwe"
+ #"$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -536,10 +552,10 @@ class MIMG_Sampler_nsa_gfx10<int op, string opcode,
let InOperandList = !con(AddrIns,
(ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
- SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
- #"$dlc$glc$slc$r128$tfe$lwe"
+ #"$dlc$glc$slc$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@@ -646,10 +662,11 @@ class MIMG_Sampler_BaseOpcode<AMDGPUSampleVariant sample>
}
multiclass MIMG_Sampler <bits<8> op, AMDGPUSampleVariant sample, bit wqm = 0,
- bit isGetLod = 0,
- string asm = "image_sample"#sample.LowerCaseMod> {
+ bit isG16 = 0, bit isGetLod = 0,
+ string asm = "image_sample"#sample.LowerCaseMod#!if(isG16, "_g16", "")> {
def "" : MIMG_Sampler_BaseOpcode<sample> {
let HasD16 = !if(isGetLod, 0, 1);
+ let G16 = isG16;
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,
@@ -726,76 +743,95 @@ defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimg<0x1c>, "image_atomic_dec">;
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI
//} // End let FPAtomic = 1
-defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>;
-defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>;
-defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, AMDGPUSample_d>;
-defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, AMDGPUSample_d_cl>;
-defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, AMDGPUSample_l>;
-defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <0x00000025, AMDGPUSample_b>;
-defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <0x00000026, AMDGPUSample_b_cl>;
-defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, AMDGPUSample_lz>;
-defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <0x00000028, AMDGPUSample_c>;
-defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <0x00000029, AMDGPUSample_c_cl>;
-defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, AMDGPUSample_c_d>;
-defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, AMDGPUSample_c_d_cl>;
-defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, AMDGPUSample_c_l>;
-defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <0x0000002d, AMDGPUSample_c_b>;
-defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <0x0000002e, AMDGPUSample_c_b_cl>;
-defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, AMDGPUSample_c_lz>;
-defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <0x00000030, AMDGPUSample_o>;
-defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <0x00000031, AMDGPUSample_cl_o>;
-defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, AMDGPUSample_d_o>;
-defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, AMDGPUSample_d_cl_o>;
-defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, AMDGPUSample_l_o>;
-defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <0x00000035, AMDGPUSample_b_o>;
-defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <0x00000036, AMDGPUSample_b_cl_o>;
-defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, AMDGPUSample_lz_o>;
-defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <0x00000038, AMDGPUSample_c_o>;
-defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <0x00000039, AMDGPUSample_c_cl_o>;
-defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, AMDGPUSample_c_d_o>;
-defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, AMDGPUSample_c_d_cl_o>;
-defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, AMDGPUSample_c_l_o>;
-defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <0x0000003e, AMDGPUSample_c_b_cl_o>;
-defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <0x0000003d, AMDGPUSample_c_b_o>;
-defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, AMDGPUSample_c_lz_o>;
-defm IMAGE_GATHER4 : MIMG_Gather_WQM <0x00000040, AMDGPUSample>;
-defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <0x00000041, AMDGPUSample_cl>;
-defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, AMDGPUSample_l>;
-defm IMAGE_GATHER4_B : MIMG_Gather_WQM <0x00000045, AMDGPUSample_b>;
-defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <0x00000046, AMDGPUSample_b_cl>;
-defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, AMDGPUSample_lz>;
-defm IMAGE_GATHER4_C : MIMG_Gather_WQM <0x00000048, AMDGPUSample_c>;
-defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <0x00000049, AMDGPUSample_c_cl>;
-defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, AMDGPUSample_c_l>;
-defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <0x0000004d, AMDGPUSample_c_b>;
-defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <0x0000004e, AMDGPUSample_c_b_cl>;
-defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, AMDGPUSample_c_lz>;
-defm IMAGE_GATHER4_O : MIMG_Gather_WQM <0x00000050, AMDGPUSample_o>;
-defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <0x00000051, AMDGPUSample_cl_o>;
-defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, AMDGPUSample_l_o>;
-defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <0x00000055, AMDGPUSample_b_o>;
-defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, AMDGPUSample_b_cl_o>;
-defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, AMDGPUSample_lz_o>;
-defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <0x00000058, AMDGPUSample_c_o>;
-defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <0x00000059, AMDGPUSample_c_cl_o>;
-defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, AMDGPUSample_c_l_o>;
-defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, AMDGPUSample_c_b_o>;
-defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, AMDGPUSample_c_b_cl_o>;
-defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, AMDGPUSample_c_lz_o>;
-
-defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, AMDGPUSample, 1, 1, "image_get_lod">;
-
-defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, AMDGPUSample_cd>;
-defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, AMDGPUSample_cd_cl>;
-defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, AMDGPUSample_c_cd>;
-defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <0x0000006b, AMDGPUSample_c_cd_cl>;
-defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <0x0000006c, AMDGPUSample_cd_o>;
-defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <0x0000006d, AMDGPUSample_cd_cl_o>;
-defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <0x0000006e, AMDGPUSample_c_cd_o>;
-defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, AMDGPUSample_c_cd_cl_o>;
+defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>;
+defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>;
+defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, AMDGPUSample_d>;
+defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, AMDGPUSample_d_cl>;
+defm IMAGE_SAMPLE_D_G16 : MIMG_Sampler <0x000000a2, AMDGPUSample_d, 0, 1>;
+defm IMAGE_SAMPLE_D_CL_G16 : MIMG_Sampler <0x000000a3, AMDGPUSample_d_cl, 0, 1>;
+defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, AMDGPUSample_l>;
+defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <0x00000025, AMDGPUSample_b>;
+defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <0x00000026, AMDGPUSample_b_cl>;
+defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, AMDGPUSample_lz>;
+defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <0x00000028, AMDGPUSample_c>;
+defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <0x00000029, AMDGPUSample_c_cl>;
+defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, AMDGPUSample_c_d>;
+defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, AMDGPUSample_c_d_cl>;
+defm IMAGE_SAMPLE_C_D_G16 : MIMG_Sampler <0x000000aa, AMDGPUSample_c_d, 0, 1>;
+defm IMAGE_SAMPLE_C_D_CL_G16 : MIMG_Sampler <0x000000ab, AMDGPUSample_c_d_cl, 0, 1>;
+defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, AMDGPUSample_c_l>;
+defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <0x0000002d, AMDGPUSample_c_b>;
+defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <0x0000002e, AMDGPUSample_c_b_cl>;
+defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, AMDGPUSample_c_lz>;
+defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <0x00000030, AMDGPUSample_o>;
+defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <0x00000031, AMDGPUSample_cl_o>;
+defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, AMDGPUSample_d_o>;
+defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, AMDGPUSample_d_cl_o>;
+defm IMAGE_SAMPLE_D_O_G16 : MIMG_Sampler <0x000000b2, AMDGPUSample_d_o, 0, 1>;
+defm IMAGE_SAMPLE_D_CL_O_G16 : MIMG_Sampler <0x000000b3, AMDGPUSample_d_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, AMDGPUSample_l_o>;
+defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <0x00000035, AMDGPUSample_b_o>;
+defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <0x00000036, AMDGPUSample_b_cl_o>;
+defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, AMDGPUSample_lz_o>;
+defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <0x00000038, AMDGPUSample_c_o>;
+defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <0x00000039, AMDGPUSample_c_cl_o>;
+defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, AMDGPUSample_c_d_o>;
+defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, AMDGPUSample_c_d_cl_o>;
+defm IMAGE_SAMPLE_C_D_O_G16 : MIMG_Sampler <0x000000ba, AMDGPUSample_c_d_o, 0, 1>;
+defm IMAGE_SAMPLE_C_D_CL_O_G16 : MIMG_Sampler <0x000000bb, AMDGPUSample_c_d_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, AMDGPUSample_c_l_o>;
+defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <0x0000003e, AMDGPUSample_c_b_cl_o>;
+defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <0x0000003d, AMDGPUSample_c_b_o>;
+defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, AMDGPUSample_c_lz_o>;
+defm IMAGE_GATHER4 : MIMG_Gather_WQM <0x00000040, AMDGPUSample>;
+defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <0x00000041, AMDGPUSample_cl>;
+defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, AMDGPUSample_l>;
+defm IMAGE_GATHER4_B : MIMG_Gather_WQM <0x00000045, AMDGPUSample_b>;
+defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <0x00000046, AMDGPUSample_b_cl>;
+defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, AMDGPUSample_lz>;
+defm IMAGE_GATHER4_C : MIMG_Gather_WQM <0x00000048, AMDGPUSample_c>;
+defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <0x00000049, AMDGPUSample_c_cl>;
+defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, AMDGPUSample_c_l>;
+defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <0x0000004d, AMDGPUSample_c_b>;
+defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <0x0000004e, AMDGPUSample_c_b_cl>;
+defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, AMDGPUSample_c_lz>;
+defm IMAGE_GATHER4_O : MIMG_Gather_WQM <0x00000050, AMDGPUSample_o>;
+defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <0x00000051, AMDGPUSample_cl_o>;
+defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, AMDGPUSample_l_o>;
+defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <0x00000055, AMDGPUSample_b_o>;
+defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, AMDGPUSample_b_cl_o>;
+defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, AMDGPUSample_lz_o>;
+defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <0x00000058, AMDGPUSample_c_o>;
+defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <0x00000059, AMDGPUSample_c_cl_o>;
+defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, AMDGPUSample_c_l_o>;
+defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, AMDGPUSample_c_b_o>;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, AMDGPUSample_c_b_cl_o>;
+defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, AMDGPUSample_c_lz_o>;
+
+defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, AMDGPUSample, 1, 0, 1, "image_get_lod">;
+
+defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, AMDGPUSample_cd>;
+defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, AMDGPUSample_cd_cl>;
+defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, AMDGPUSample_c_cd>;
+defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <0x0000006b, AMDGPUSample_c_cd_cl>;
+defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <0x0000006c, AMDGPUSample_cd_o>;
+defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <0x0000006d, AMDGPUSample_cd_cl_o>;
+defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <0x0000006e, AMDGPUSample_c_cd_o>;
+defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, AMDGPUSample_c_cd_cl_o>;
+defm IMAGE_SAMPLE_CD_G16 : MIMG_Sampler <0x000000e8, AMDGPUSample_cd, 0, 1>;
+defm IMAGE_SAMPLE_CD_CL_G16 : MIMG_Sampler <0x000000e9, AMDGPUSample_cd_cl, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_G16 : MIMG_Sampler <0x000000ea, AMDGPUSample_c_cd, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_CL_G16 : MIMG_Sampler <0x000000eb, AMDGPUSample_c_cd_cl, 0, 1>;
+defm IMAGE_SAMPLE_CD_O_G16 : MIMG_Sampler <0x000000ec, AMDGPUSample_cd_o, 0, 1>;
+defm IMAGE_SAMPLE_CD_CL_O_G16 : MIMG_Sampler <0x000000ed, AMDGPUSample_cd_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_O_G16 : MIMG_Sampler <0x000000ee, AMDGPUSample_c_cd_o, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <0x000000ef, AMDGPUSample_c_cd_cl_o, 0, 1>;
//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
+let SubtargetPredicate = HasGFX10_BEncoding in
+defm IMAGE_MSAA_LOAD : MIMG_NoSampler <0x00000080, "image_msaa_load", 1>;
+
/********** ========================================= **********/
/********** Table of dimension-aware image intrinsics **********/
/********** ========================================= **********/
@@ -817,6 +853,11 @@ def ImageDimIntrinsicTable : GenericTable {
let PrimaryKeyEarlyOut = 1;
}
+def getImageDimInstrinsicByBaseOpcode : SearchIndex {
+ let Table = ImageDimIntrinsicTable;
+ let Key = ["BaseOpcode", "Dim"];
+}
+
foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
AMDGPUImageDimAtomicIntrinsics) in {
def : ImageDimIntrinsicInfo<intr>;
@@ -835,3 +876,21 @@ def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
// MIP to NONMIP Optimization Mapping
def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;
+
+// G to G16 Optimization Mapping
+def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL, IMAGE_SAMPLE_D_CL_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D, IMAGE_SAMPLE_C_D_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL, IMAGE_SAMPLE_C_D_CL_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_D_O, IMAGE_SAMPLE_D_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL_O, IMAGE_SAMPLE_D_CL_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_O, IMAGE_SAMPLE_C_D_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL_O, IMAGE_SAMPLE_C_D_CL_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD, IMAGE_SAMPLE_CD_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD_CL, IMAGE_SAMPLE_CD_CL_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD, IMAGE_SAMPLE_C_CD_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL, IMAGE_SAMPLE_C_CD_CL_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD_O, IMAGE_SAMPLE_CD_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD_CL_O, IMAGE_SAMPLE_CD_CL_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_O, IMAGE_SAMPLE_C_CD_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL_O, IMAGE_SAMPLE_C_CD_CL_O_G16>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
index ed23c8ea814b..d363baa15507 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
@@ -88,15 +88,15 @@ void R600AsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
}
}
- OutStreamer->EmitIntValue(RsrcReg, 4);
- OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
+ OutStreamer->emitInt32(RsrcReg);
+ OutStreamer->emitIntValue(S_NUM_GPRS(MaxGPR + 1) |
S_STACK_SIZE(MFI->CFStackSize), 4);
- OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
- OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
+ OutStreamer->emitInt32(R_02880C_DB_SHADER_CONTROL);
+ OutStreamer->emitInt32(S_02880C_KILL_ENABLE(killPixel));
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
- OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
- OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4);
+ OutStreamer->emitInt32(R_0288E8_SQ_LDS_ALLOC);
+ OutStreamer->emitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4);
}
}
@@ -115,7 +115,7 @@ bool R600AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
EmitProgramInfoR600(MF);
- EmitFunctionBody();
+ emitFunctionBody();
if (isVerbose()) {
MCSectionELF *CommentSection =
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.h
index 0da9526d716e..552d01f81b66 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600AsmPrinter.h
@@ -26,7 +26,7 @@ public:
StringRef getPassName() const override;
bool runOnMachineFunction(MachineFunction &MF) override;
/// Implemented in AMDGPUMCInstLower.cpp
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
/// Lower the specified LLVM Constant to an MCExpr.
/// The AsmPrinter::lowerConstantof does not know how to lower
/// addrspacecast, therefore they should be lowered by this function.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index e4160ac11c86..8124df68f688 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -159,8 +159,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
}
void CFStack::updateMaxStackSize() {
- unsigned CurrentStackSize =
- CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
+ unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4);
MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
}
@@ -308,7 +307,7 @@ private:
DstMI = Reg;
else
DstMI = TRI->getMatchingSuperReg(Reg,
- AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
+ R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
&R600::R600_Reg128RegClass);
}
if (MO.isUse()) {
@@ -317,7 +316,7 @@ private:
SrcMI = Reg;
else
SrcMI = TRI->getMatchingSuperReg(Reg,
- AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
+ R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
&R600::R600_Reg128RegClass);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index fd75c41040e1..5f682d86d26e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -219,13 +219,13 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
}
}
if (IsReduction) {
- unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(Chan);
+ unsigned SubRegIndex = R600RegisterInfo::getSubRegFromChannel(Chan);
Src0 = TRI.getSubReg(Src0, SubRegIndex);
Src1 = TRI.getSubReg(Src1, SubRegIndex);
} else if (IsCube) {
static const int CubeSrcSwz[] = {2, 2, 0, 1};
- unsigned SubRegIndex0 = AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz[Chan]);
- unsigned SubRegIndex1 = AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
+ unsigned SubRegIndex0 = R600RegisterInfo::getSubRegFromChannel(CubeSrcSwz[Chan]);
+ unsigned SubRegIndex1 = R600RegisterInfo::getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
Src1 = TRI.getSubReg(Src0, SubRegIndex1);
Src0 = TRI.getSubReg(Src0, SubRegIndex0);
}
@@ -234,7 +234,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
bool Mask = false;
bool NotLast = true;
if (IsCube) {
- unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(Chan);
+ unsigned SubRegIndex = R600RegisterInfo::getSubRegFromChannel(Chan);
DstReg = TRI.getSubReg(DstReg, SubRegIndex);
} else {
// Mask the write if the original instruction does not write to
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
index d9aa9ebe878d..c568a4aa61c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -18,9 +18,8 @@ using namespace llvm;
R600FrameLowering::~R600FrameLowering() = default;
/// \returns The number of registers allocated for \p FI.
-int R600FrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- unsigned &FrameReg) const {
+int R600FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const R600RegisterInfo *RI
= MF.getSubtarget<R600Subtarget>().getRegisterInfo();
@@ -35,15 +34,15 @@ int R600FrameLowering::getFrameIndexReference(const MachineFunction &MF,
int UpperBound = FI == -1 ? MFI.getNumObjects() : FI;
for (int i = MFI.getObjectIndexBegin(); i < UpperBound; ++i) {
- OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(i));
+ OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlign(i));
OffsetBytes += MFI.getObjectSize(i);
// Each register holds 4 bytes, so we must always align the offset to at
// least 4 bytes, so that 2 frame objects won't share the same register.
- OffsetBytes = alignTo(OffsetBytes, 4);
+ OffsetBytes = alignTo(OffsetBytes, Align(4));
}
if (FI != -1)
- OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(FI));
+ OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlign(FI));
return OffsetBytes / (getStackWidth(MF) * 4);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.h
index 283e4d1935ea..b877ecd29829 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600FrameLowering.h
@@ -16,7 +16,7 @@ namespace llvm {
class R600FrameLowering : public AMDGPUFrameLowering {
public:
R600FrameLowering(StackDirection D, Align StackAl, int LAO,
- Align TransAl = Align::None())
+ Align TransAl = Align(1))
: AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
~R600FrameLowering() override;
@@ -25,7 +25,7 @@ public:
void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const override {}
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
bool hasFP(const MachineFunction &MF) const override {
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 1b1f5f9a404a..dc2e73e1f94e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -615,21 +615,27 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return LowerImplicitParameter(DAG, VT, DL, 8);
case Intrinsic::r600_read_tgid_x:
+ case Intrinsic::amdgcn_workgroup_id_x:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_X, VT);
case Intrinsic::r600_read_tgid_y:
+ case Intrinsic::amdgcn_workgroup_id_y:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_Y, VT);
case Intrinsic::r600_read_tgid_z:
+ case Intrinsic::amdgcn_workgroup_id_z:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_Z, VT);
case Intrinsic::r600_read_tidig_x:
+ case Intrinsic::amdgcn_workitem_id_x:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_X, VT);
case Intrinsic::r600_read_tidig_y:
+ case Intrinsic::amdgcn_workitem_id_y:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_Y, VT);
case Intrinsic::r600_read_tidig_z:
+ case Intrinsic::amdgcn_workitem_id_z:
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_Z, VT);
@@ -699,9 +705,8 @@ SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
SmallVector<SDValue, 8> Args;
for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
- Args.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
- DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
+ Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
+ DAG.getVectorIdxConstant(i, DL)));
}
return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
@@ -1260,10 +1265,11 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return scalarizeVectorStore(StoreNode, DAG);
}
- unsigned Align = StoreNode->getAlignment();
- if (Align < MemVT.getStoreSize() &&
- !allowsMisalignedMemoryAccesses(
- MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
+ Align Alignment = StoreNode->getAlign();
+ if (Alignment < MemVT.getStoreSize() &&
+ !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment.value(),
+ StoreNode->getMemOperand()->getFlags(),
+ nullptr)) {
return expandUnalignedStore(StoreNode, DAG);
}
@@ -1543,7 +1549,7 @@ SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FIN->getIndex();
- unsigned IgnoredFrameReg;
+ Register IgnoredFrameReg;
unsigned Offset =
TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 346296c77377..088cf16d8ed2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -77,7 +77,7 @@ void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (VectorComponents > 0) {
for (unsigned I = 0; I < VectorComponents; I++) {
- unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(I);
+ unsigned SubRegIndex = R600RegisterInfo::getSubRegFromChannel(I);
buildDefaultInstruction(MBB, MI, R600::MOV,
RI.getSubReg(DestReg, SubRegIndex),
RI.getSubReg(SrcReg, SubRegIndex))
@@ -541,7 +541,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
std::vector<std::vector<std::pair<int, unsigned>>> IGSrcs;
ValidSwizzle.clear();
- unsigned ConstCount = 0;
+ unsigned ConstCount;
BankSwizzle TransBS = ALU_VEC_012_SCL_210;
for (unsigned i = 0, e = IG.size(); i < e; ++i) {
IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
@@ -676,7 +676,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
- // Most of the following comes from the ARM implementation of AnalyzeBranch
+ // Most of the following comes from the ARM implementation of analyzeBranch
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
@@ -1224,7 +1224,7 @@ int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
const R600FrameLowering *TFL = ST.getFrameLowering();
- unsigned IgnoredFrameReg;
+ Register IgnoredFrameReg;
Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg);
return getIndirectIndexBegin(MF) + Offset;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
index 869c183e2245..2cc21364c439 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1006,7 +1006,7 @@ class MULADD_Common <bits<5> inst> : R600_3OP <
class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
inst, "MULADD_IEEE",
- [(set f32:$dst, (fmad f32:$src0, f32:$src1, f32:$src2))]
+ [(set f32:$dst, (any_fmad f32:$src0, f32:$src1, f32:$src2))]
>;
class FMA_Common <bits<5> inst> : R600_3OP <
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index cec7f563f480..b0620663a230 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -56,9 +56,8 @@ using namespace llvm;
#define DEBUG_TYPE "vec-merger"
-static bool isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
- assert(MRI.isSSA());
- if (Register::isPhysicalRegister(Reg))
+static bool isImplicitlyDef(MachineRegisterInfo &MRI, Register Reg) {
+ if (Reg.isPhysical())
return false;
const MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
return MI && MI->isImplicitDef();
@@ -69,8 +68,8 @@ namespace {
class RegSeqInfo {
public:
MachineInstr *Instr;
- DenseMap<unsigned, unsigned> RegToChan;
- std::vector<unsigned> UndefReg;
+ DenseMap<Register, unsigned> RegToChan;
+ std::vector<Register> UndefReg;
RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
assert(MI->getOpcode() == R600::REG_SEQUENCE);
@@ -102,7 +101,7 @@ private:
InstructionSetMap PreviousRegSeqByUndefCount;
bool canSwizzle(const MachineInstr &MI) const;
- bool areAllUsesSwizzeable(unsigned Reg) const;
+ bool areAllUsesSwizzeable(Register Reg) const;
void SwizzleInput(MachineInstr &,
const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge,
@@ -130,6 +129,11 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA);
+ }
+
StringRef getPassName() const override {
return "R600 Vector Registers Merge Pass";
}
@@ -165,9 +169,9 @@ bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap)
const {
unsigned CurrentUndexIdx = 0;
- for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(),
+ for (DenseMap<Register, unsigned>::iterator It = ToMerge->RegToChan.begin(),
E = ToMerge->RegToChan.end(); It != E; ++It) {
- DenseMap<unsigned, unsigned>::const_iterator PosInUntouched =
+ DenseMap<Register, unsigned>::const_iterator PosInUntouched =
Untouched->RegToChan.find((*It).first);
if (PosInUntouched != Untouched->RegToChan.end()) {
Remap.push_back(std::pair<unsigned, unsigned>
@@ -203,9 +207,9 @@ MachineInstr *R600VectorRegMerger::RebuildVector(
DebugLoc DL = Pos->getDebugLoc();
Register SrcVec = BaseRSI->Instr->getOperand(0).getReg();
- DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
- std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
- for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
+ DenseMap<Register, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
+ std::vector<Register> UpdatedUndef = BaseRSI->UndefReg;
+ for (DenseMap<Register, unsigned>::iterator It = RSI->RegToChan.begin(),
E = RSI->RegToChan.end(); It != E; ++It) {
Register DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
unsigned SubReg = (*It).first;
@@ -218,7 +222,7 @@ MachineInstr *R600VectorRegMerger::RebuildVector(
.addReg(SubReg)
.addImm(Chan);
UpdatedRegToChan[SubReg] = Chan;
- std::vector<unsigned>::iterator ChanPos = llvm::find(UpdatedUndef, Chan);
+ std::vector<Register>::iterator ChanPos = llvm::find(UpdatedUndef, Chan);
if (ChanPos != UpdatedUndef.end())
UpdatedUndef.erase(ChanPos);
assert(!is_contained(UpdatedUndef, Chan) &&
@@ -279,7 +283,7 @@ void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
}
}
-bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const {
+bool R600VectorRegMerger::areAllUsesSwizzeable(Register Reg) const {
for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
E = MRI->use_instr_end(); It != E; ++It) {
if (!canSwizzle(*It))
@@ -322,7 +326,7 @@ bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
}
void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
- for (DenseMap<unsigned, unsigned>::const_iterator
+ for (DenseMap<Register, unsigned>::const_iterator
It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) {
PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
index ef12c1d24594..78ef71cdf8e3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -20,14 +20,21 @@
using namespace llvm;
-R600RegisterInfo::R600RegisterInfo() : R600GenRegisterInfo(0) {
- RCW.RegWeight = 0;
- RCW.WeightLimit = 0;
-}
-
#define GET_REGINFO_TARGET_DESC
#include "R600GenRegisterInfo.inc"
+unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) {
+ static const uint16_t SubRegFromChannelTable[] = {
+ R600::sub0, R600::sub1, R600::sub2, R600::sub3,
+ R600::sub4, R600::sub5, R600::sub6, R600::sub7,
+ R600::sub8, R600::sub9, R600::sub10, R600::sub11,
+ R600::sub12, R600::sub13, R600::sub14, R600::sub15
+ };
+
+ assert(Channel < array_lengthof(SubRegFromChannelTable));
+ return SubRegFromChannelTable[Channel];
+}
+
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
@@ -87,11 +94,6 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
}
}
-const RegClassWeight &R600RegisterInfo::getRegClassWeight(
- const TargetRegisterClass *RC) const {
- return RCW;
-}
-
bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const {
assert(!Register::isVirtualRegister(Reg));
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
index 9378b70ca580..06981c4cf9c5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -20,9 +20,11 @@
namespace llvm {
struct R600RegisterInfo final : public R600GenRegisterInfo {
- RegClassWeight RCW;
+ R600RegisterInfo() : R600GenRegisterInfo(0) {}
- R600RegisterInfo();
+ /// \returns the sub reg enum value for the given \p Channel
+ /// (e.g. getSubRegFromChannel(0) -> R600::sub0)
+ static unsigned getSubRegFromChannel(unsigned Channel);
BitVector getReservedRegs(const MachineFunction &MF) const override;
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
@@ -37,8 +39,9 @@ struct R600RegisterInfo final : public R600GenRegisterInfo {
/// CFGStructurizer
const TargetRegisterClass *getCFGStructurizerRegClass(MVT VT) const;
- const RegClassWeight &
- getRegClassWeight(const TargetRegisterClass *RC) const override;
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
+ return false;
+ }
// \returns true if \p Reg can be defined in one ALU clause and used in
// another.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.td
index 02164b74a01b..fdff7541edec 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600RegisterInfo.td
@@ -150,13 +150,16 @@ def AR_X : R600Reg<"AR.x", 0>;
def INDIRECT_BASE_ADDR : R600Reg <"INDIRECT_BASE_ADDR", 0>;
def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "ArrayBase%u", 448, 480))>;
+ (add (sequence "ArrayBase%u", 448, 480))> {
+ let Weight = 0;
+}
// special registers for ALU src operands
// const buffer reference, SRCx_SEL contains index
def ALU_CONST : R600Reg<"CBuf", 0>;
// interpolation param reference, SRCx_SEL contains index
def ALU_PARAM : R600Reg<"Param", 0>;
+let Weight = 0 in {
let isAllocatable = 0 in {
def R600_Addr : RegisterClass <"AMDGPU", [i32], 32, (add (sequence "Addr%u_X", 0, 127))>;
@@ -251,3 +254,4 @@ def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32, i64, f64], 64,
def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
(add V01_X, V01_Y, V01_Z, V01_W,
V23_X, V23_Y, V23_Z, V23_W)>;
+} // End let Weight = 0
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
index ee011286b8ff..90e48c63b5dc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
@@ -111,10 +111,6 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
unsigned ActiveLanes =
TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
- // Subreg indices are counted from 1
- // When D16 then we want next whole VGPR after write data.
- static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
-
bool Packed = !ST.hasUnpackedD16VMem();
unsigned InitIdx =
@@ -137,7 +133,7 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
// all the result registers to 0, otherwise just the error indication
// register (VGPRn+1)
unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
- unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
+ unsigned CurrIdx = ST.usePRTStrictNull() ? 0 : (InitIdx - 1);
if (DstSize == 1) {
// In this case we can just initialize the result directly
@@ -158,7 +154,7 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
.addReg(PrevDst)
.addReg(SubReg)
- .addImm(CurrIdx);
+ .addImm(SIRegisterInfo::getSubRegFromChannel(CurrIdx));
PrevDst = NewDst;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 27320472cacb..3c41bf1fef5e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -153,7 +153,7 @@ void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else,
{ IntMask, IntMask });
IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break,
- { IntMask, IntMask });
+ { IntMask });
Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask });
EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf, { IntMask });
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
index 23ef56afc39c..4f7d255eb450 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -333,7 +333,9 @@ enum Id { // HwRegCode, (6) [5:0]
ID_FLAT_SCR_HI = 21,
ID_XNACK_MASK = 22,
ID_POPS_PACKER = 25,
- ID_SYMBOLIC_LAST_ = 26,
+ ID_SHADER_CYCLES = 29,
+ ID_SYMBOLIC_FIRST_GFX1030_ = ID_SHADER_CYCLES,
+ ID_SYMBOLIC_LAST_ = 30,
ID_SHIFT_ = 0,
ID_WIDTH_ = 6,
ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
@@ -366,6 +368,28 @@ enum Width : unsigned {
WIDTH_DEFAULT_ = WIDTH_M1_DEFAULT_ + 1,
};
+enum ModeRegisterMasks : uint32_t {
+ FP_ROUND_MASK = 0xf << 0, // Bits 0..3
+ FP_DENORM_MASK = 0xf << 4, // Bits 4..7
+ DX10_CLAMP_MASK = 1 << 8,
+ IEEE_MODE_MASK = 1 << 9,
+ LOD_CLAMP_MASK = 1 << 10,
+ DEBUG_MASK = 1 << 11,
+
+ // EXCP_EN fields.
+ EXCP_EN_INVALID_MASK = 1 << 12,
+ EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13,
+ EXCP_EN_FLOAT_DIV0_MASK = 1 << 14,
+ EXCP_EN_OVERFLOW_MASK = 1 << 15,
+ EXCP_EN_UNDERFLOW_MASK = 1 << 16,
+ EXCP_EN_INEXACT_MASK = 1 << 17,
+ EXCP_EN_INT_DIV0_MASK = 1 << 18,
+
+ GPR_IDX_EN_MASK = 1 << 27,
+ VSKIP_MASK = 1 << 28,
+ CSP_MASK = 0x7u << 29 // Bits 29..31
+};
+
} // namespace Hwreg
namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 914d2a5ef148..ef64c5674bd1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -587,6 +587,11 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
}
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
+ // Only need to run this in SelectionDAG path.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected))
+ return false;
+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
MRI = &MF.getRegInfo();
TRI = ST.getRegisterInfo();
@@ -761,6 +766,7 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
bool AllAGPRUses = true;
SetVector<const MachineInstr *> worklist;
SmallSet<const MachineInstr *, 4> Visited;
+ SetVector<MachineInstr *> PHIOperands;
worklist.insert(&MI);
Visited.insert(&MI);
while (!worklist.empty()) {
@@ -805,6 +811,11 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) {
LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI);
MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0));
+ for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
+ MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(I).getReg());
+ if (DefMI && DefMI->isPHI())
+ PHIOperands.insert(DefMI);
+ }
}
bool hasVGPRInput = false;
@@ -824,8 +835,22 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
}
else if (Def->isCopy() &&
TRI->isVectorRegister(*MRI, Def->getOperand(1).getReg())) {
- hasVGPRInput = true;
- break;
+ Register SrcReg = Def->getOperand(1).getReg();
+ MachineInstr *SrcDef = MRI->getVRegDef(SrcReg);
+ unsigned SMovOp;
+ int64_t Imm;
+ if (!isSafeToFoldImmIntoCopy(Def, SrcDef, TII, SMovOp, Imm)) {
+ hasVGPRInput = true;
+ break;
+ } else {
+ // Formally, if we did not do this right away
+ // it would be done on the next iteration of the
+ // runOnMachineFunction main loop. But why not if we can?
+ MachineFunction *MF = MI.getParent()->getParent();
+ Def->getOperand(1).ChangeToImmediate(Imm);
+ Def->addImplicitDefUseOperands(*MF);
+ Def->setDesc(TII->get(SMovOp));
+ }
}
}
@@ -840,4 +865,8 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
TII->legalizeOperands(MI, MDT);
}
+ // Propagate register class back to PHI operands which are PHI themselves.
+ while (!PHIOperands.empty()) {
+ processPHINode(*PHIOperands.pop_back_val());
+ }
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
index a0119297b112..8e3402b537b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
@@ -217,6 +217,11 @@ static bool fixupGlobalSaddr(MachineBasicBlock &MBB,
}
bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) {
+ // Only need to run this in SelectionDAG path.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected))
+ return false;
+
if (skipFunction(MF.getFunction()))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 2ff8baf29394..ffcf4c30bc70 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -282,6 +282,9 @@ static bool updateOperand(FoldCandidate &Fold,
assert(!Fold.needsShrink() && "not handled");
if (Fold.isImm()) {
+ // FIXME: ChangeToImmediate should probably clear the subreg flags. It's
+ // reinterpreted as TargetFlags.
+ Old.setSubReg(0);
Old.ChangeToImmediate(Fold.ImmToFold);
return true;
}
@@ -612,19 +615,26 @@ void SIFoldOperands::foldOperand(
if (frameIndexMayFold(TII, *UseMI, UseOpIdx, OpToFold)) {
// Sanity check that this is a stack access.
// FIXME: Should probably use stack pseudos before frame lowering.
- MachineOperand *SOff = TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
- if (!SOff->isReg() || (SOff->getReg() != MFI->getScratchWaveOffsetReg() &&
- SOff->getReg() != MFI->getStackPtrOffsetReg()))
- return;
if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
MFI->getScratchRSrcReg())
return;
+ // Ensure this is either relative to the current frame or the current wave.
+ MachineOperand &SOff =
+ *TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
+ if ((!SOff.isReg() || SOff.getReg() != MFI->getStackPtrOffsetReg()) &&
+ (!SOff.isImm() || SOff.getImm() != 0))
+ return;
+
// A frame index will resolve to a positive constant, so it should always be
// safe to fold the addressing mode, even pre-GFX9.
UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
- SOff->setReg(MFI->getStackPtrOffsetReg());
+
+ // If this is relative to the current wave, update it to be relative to the
+ // current frame.
+ if (SOff.isImm())
+ SOff.ChangeToRegister(MFI->getStackPtrOffsetReg(), false);
return;
}
@@ -907,6 +917,21 @@ static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
case AMDGPU::S_XOR_B32:
Result = LHS ^ RHS;
return true;
+ case AMDGPU::S_XNOR_B32:
+ Result = ~(LHS ^ RHS);
+ return true;
+ case AMDGPU::S_NAND_B32:
+ Result = ~(LHS & RHS);
+ return true;
+ case AMDGPU::S_NOR_B32:
+ Result = ~(LHS | RHS);
+ return true;
+ case AMDGPU::S_ANDN2_B32:
+ Result = LHS & ~RHS;
+ return true;
+ case AMDGPU::S_ORN2_B32:
+ Result = LHS | ~RHS;
+ return true;
case AMDGPU::V_LSHL_B32_e64:
case AMDGPU::V_LSHL_B32_e32:
case AMDGPU::S_LSHL_B32:
@@ -1007,10 +1032,16 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
if (!Src0->isImm() && !Src1->isImm())
return false;
- if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
+ if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32 ||
+ MI->getOpcode() == AMDGPU::V_LSHL_ADD_U32 ||
+ MI->getOpcode() == AMDGPU::V_AND_OR_B32) {
if (Src0->isImm() && Src0->getImm() == 0) {
// v_lshl_or_b32 0, X, Y -> copy Y
// v_lshl_or_b32 0, X, K -> v_mov_b32 K
+ // v_lshl_add_b32 0, X, Y -> copy Y
+ // v_lshl_add_b32 0, X, K -> v_mov_b32 K
+ // v_and_or_b32 0, X, Y -> copy Y
+ // v_and_or_b32 0, X, K -> v_mov_b32 K
bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
MI->RemoveOperand(Src1Idx);
MI->RemoveOperand(Src0Idx);
@@ -1381,8 +1412,8 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
case AMDGPU::V_MUL_F32_e64:
case AMDGPU::V_MUL_F16_e64: {
// If output denormals are enabled, omod is ignored.
- if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32Denormals) ||
- (Op == AMDGPU::V_MUL_F16_e64 && MFI->getMode().FP64FP16Denormals))
+ if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32OutputDenormals) ||
+ (Op == AMDGPU::V_MUL_F16_e64 && MFI->getMode().FP64FP16OutputDenormals))
return std::make_pair(nullptr, SIOutMods::NONE);
const MachineOperand *RegOp = nullptr;
@@ -1411,8 +1442,8 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
case AMDGPU::V_ADD_F32_e64:
case AMDGPU::V_ADD_F16_e64: {
// If output denormals are enabled, omod is ignored.
- if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32Denormals) ||
- (Op == AMDGPU::V_ADD_F16_e64 && MFI->getMode().FP64FP16Denormals))
+ if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32OutputDenormals) ||
+ (Op == AMDGPU::V_ADD_F16_e64 && MFI->getMode().FP64FP16OutputDenormals))
return std::make_pair(nullptr, SIOutMods::NONE);
// Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 8364665dda04..a2e802009d09 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -24,18 +24,6 @@ using namespace llvm;
#define DEBUG_TYPE "frame-info"
-static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST,
- const MachineFunction &MF) {
- return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
- ST.getMaxNumSGPRs(MF) / 4);
-}
-
-static ArrayRef<MCPhysReg> getAllSGPRs(const GCNSubtarget &ST,
- const MachineFunction &MF) {
- return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
- ST.getMaxNumSGPRs(MF));
-}
-
// Find a scratch register that we can use at the start of the prologue to
// re-align the stack pointer. We avoid using callee-save registers since they
// may appear to be free when this is called from canUseAsPrologue (during
@@ -47,10 +35,10 @@ static ArrayRef<MCPhysReg> getAllSGPRs(const GCNSubtarget &ST,
// but we would then have to make sure that we were in fact saving at least one
// callee-save register in the prologue, which is additional complexity that
// doesn't seem worth the benefit.
-static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
- LivePhysRegs &LiveRegs,
- const TargetRegisterClass &RC,
- bool Unused = false) {
+static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
+ LivePhysRegs &LiveRegs,
+ const TargetRegisterClass &RC,
+ bool Unused = false) {
// Mark callee saved registers as used so we will not choose them.
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i)
@@ -59,12 +47,12 @@ static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
if (Unused) {
// We are looking for a register that can be used throughout the entire
// function, so any use is unacceptable.
- for (unsigned Reg : RC) {
+ for (MCRegister Reg : RC) {
if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
return Reg;
}
} else {
- for (unsigned Reg : RC) {
+ for (MCRegister Reg : RC) {
if (LiveRegs.available(MRI, Reg))
return Reg;
}
@@ -76,14 +64,67 @@ static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
if (!Unused)
report_fatal_error("failed to find free scratch register");
- return AMDGPU::NoRegister;
+ return MCRegister();
}
-static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) {
- LivePhysRegs LiveRegs;
- LiveRegs.init(*MRI.getTargetRegisterInfo());
- return findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
+static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
+ LivePhysRegs &LiveRegs,
+ Register &TempSGPR,
+ Optional<int> &FrameIndex,
+ bool IsFP) {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+
+#ifndef NDEBUG
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+#endif
+
+ // We need to save and restore the current FP/BP.
+
+ // 1: If there is already a VGPR with free lanes, use it. We
+ // may already have to pay the penalty for spilling a CSR VGPR.
+ if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
+ int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
+ TargetStackID::SGPRSpill);
+
+ if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
+ llvm_unreachable("allocate SGPR spill should have worked");
+
+ FrameIndex = NewFI;
+
+ LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
+ dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "
+ << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
+ << '\n');
+ return;
+ }
+
+ // 2: Next, try to save the FP/BP in an unused SGPR.
+ TempSGPR = findScratchNonCalleeSaveRegister(
+ MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
+
+ if (!TempSGPR) {
+ int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
+ TargetStackID::SGPRSpill);
+
+ if (MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
+ // 3: There's no free lane to spill, and no free register to save FP/BP,
+ // so we're forced to spill another VGPR to use for the spill.
+ FrameIndex = NewFI;
+ } else {
+ // 4: If all else fails, spill the FP/BP to memory.
+ FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
+ }
+
+ LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
+ dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
+ << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
+ << '\n';);
+ } else {
+ LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
+ << printReg(TempSGPR, TRI) << '\n');
+ }
}
// We need to specially emit stack operations here because a different frame
@@ -91,8 +132,8 @@ static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) {
// use.
static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const SIInstrInfo *TII, unsigned SpillReg,
- unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
+ const SIInstrInfo *TII, Register SpillReg,
+ Register ScratchRsrcReg, Register SPReg, int FI) {
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -100,7 +141,7 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4,
- MFI.getObjectAlignment(FI));
+ MFI.getObjectAlign(FI));
if (isUInt<12>(Offset)) {
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
@@ -139,15 +180,15 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const SIInstrInfo *TII, unsigned SpillReg,
- unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
+ const SIInstrInfo *TII, Register SpillReg,
+ Register ScratchRsrcReg, Register SPReg, int FI) {
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
int64_t Offset = MFI.getObjectOffset(FI);
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4,
- MFI.getObjectAlignment(FI));
+ MFI.getObjectAlign(FI));
if (isUInt<12>(Offset)) {
BuildMI(MBB, I, DebugLoc(),
@@ -184,11 +225,13 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
.addMemOperand(MMO);
}
-void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
- MachineFunction &MF,
- MachineBasicBlock &MBB) const {
+// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
+void SIFrameLowering::emitEntryFunctionFlatScratchInit(
+ MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
- const SIRegisterInfo* TRI = &TII->getRegisterInfo();
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// We don't need this if we only have spills since there is no user facing
@@ -201,11 +244,6 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
// pointer. Because we only detect if flat instructions are used at all,
// this will be used more often than necessary on VI.
- // Debug location must be unknown since the first debug location is used to
- // determine the end of the prologue.
- DebugLoc DL;
- MachineBasicBlock::iterator I = MBB.begin();
-
Register FlatScratchInitReg =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
@@ -216,8 +254,6 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
- unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
-
// Do a 64-bit pointer add.
if (ST.flatScratchIsPointer()) {
if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
@@ -266,19 +302,22 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
.addImm(8);
}
-unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
- const GCNSubtarget &ST,
- const SIInstrInfo *TII,
- const SIRegisterInfo *TRI,
- SIMachineFunctionInfo *MFI,
- MachineFunction &MF) const {
+// Shift down registers reserved for the scratch RSRC.
+Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
+ MachineFunction &MF) const {
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ assert(MFI->isEntryFunction());
+
+ Register ScratchRsrcReg = MFI->getScratchRSrcReg();
- // We need to insert initialization of the scratch resource descriptor.
- unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
- if (ScratchRsrcReg == AMDGPU::NoRegister ||
- !MRI.isPhysRegUsed(ScratchRsrcReg))
- return AMDGPU::NoRegister;
+ if (!ScratchRsrcReg || !MRI.isPhysRegUsed(ScratchRsrcReg))
+ return Register();
if (ST.hasSGPRInitBug() ||
ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
@@ -293,18 +332,19 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
// cannot do this for the resources required for scratch access. For now we
// skip over user SGPRs and may leave unused holes.
- // We find the resource first because it has an alignment requirement.
-
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
- ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
+ ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
// Skip the last N reserved elements because they should have already been
// reserved for VCC etc.
+ Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
for (MCPhysReg Reg : AllSGPR128s) {
// Pick the first unallocated one. Make sure we don't clobber the other
- // reserved input we needed.
- if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
+ // reserved input we needed. Also for PAL, make sure we don't clobber
+ // the GIT pointer passed in SGPR0 or SGPR8.
+ if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
+ !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
MRI.replaceRegWith(ScratchRsrcReg, Reg);
MFI->setScratchRSrcReg(Reg);
return Reg;
@@ -314,231 +354,138 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
return ScratchRsrcReg;
}
-// Shift down registers reserved for the scratch wave offset.
-std::pair<unsigned, bool>
-SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
- const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
- SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
-
- assert(MFI->isEntryFunction());
-
- // No replacement necessary.
- if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
- (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
- return std::make_pair(AMDGPU::NoRegister, false);
- }
-
- if (ST.hasSGPRInitBug())
- return std::make_pair(ScratchWaveOffsetReg, false);
-
- unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
-
- ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
- if (NumPreloaded > AllSGPRs.size())
- return std::make_pair(ScratchWaveOffsetReg, false);
-
- AllSGPRs = AllSGPRs.slice(NumPreloaded);
-
- // We need to drop register from the end of the list that we cannot use
- // for the scratch wave offset.
- // + 2 s102 and s103 do not exist on VI.
- // + 2 for vcc
- // + 2 for xnack_mask
- // + 2 for flat_scratch
- // + 4 for registers reserved for scratch resource register
- // + 1 for register reserved for scratch wave offset. (By exluding this
- // register from the list to consider, it means that when this
- // register is being used for the scratch wave offset and there
- // are no other free SGPRs, then the value will stay in this register.
- // + 1 if stack pointer is used.
- // ----
- // 13 (+1)
- unsigned ReservedRegCount = 13;
-
- if (AllSGPRs.size() < ReservedRegCount)
- return std::make_pair(ScratchWaveOffsetReg, false);
-
- bool HandledScratchWaveOffsetReg =
- ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
- bool FPAdjusted = false;
-
- for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
- // Pick the first unallocated SGPR. Be careful not to pick an alias of the
- // scratch descriptor, since we haven’t added its uses yet.
- if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
- if (!HandledScratchWaveOffsetReg) {
- HandledScratchWaveOffsetReg = true;
-
- MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
- if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) {
- assert(!hasFP(MF));
- MFI->setStackPtrOffsetReg(Reg);
- }
-
- MFI->setScratchWaveOffsetReg(Reg);
- MFI->setFrameOffsetReg(Reg);
- ScratchWaveOffsetReg = Reg;
- FPAdjusted = true;
- break;
- }
- }
- }
-
- return std::make_pair(ScratchWaveOffsetReg, FPAdjusted);
-}
-
void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
- // If we only have SGPR spills, we won't actually be using scratch memory
- // since these spill to VGPRs.
- //
- // FIXME: We should be cleaning up these unused SGPR spill frame indices
- // somewhere.
-
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
- const SIRegisterInfo *TRI = &TII->getRegisterInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const Function &F = MF.getFunction();
-
- // We need to do the replacement of the private segment buffer and wave offset
- // register even if there are no stack objects. There could be stores to undef
- // or a constant without an associated object.
+ // FIXME: If we only have SGPR spills, we won't actually be using scratch
+ // memory since these spill to VGPRs. We should be cleaning up these unused
+ // SGPR spill frame indices somewhere.
// FIXME: We still have implicit uses on SGPR spill instructions in case they
// need to spill to vector memory. It's likely that will not happen, but at
// this point it appears we need the setup. This part of the prolog should be
// emitted after frame indices are eliminated.
- if (MFI->hasFlatScratchInit())
- emitFlatScratchInit(ST, MF, MBB);
+ // FIXME: Remove all of the isPhysRegUsed checks
- unsigned ScratchRsrcReg
- = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const Function &F = MF.getFunction();
- unsigned ScratchWaveOffsetReg;
- bool FPAdjusted;
- std::tie(ScratchWaveOffsetReg, FPAdjusted) =
- getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
+ assert(MFI->isEntryFunction());
- // We need to insert initialization of the scratch resource descriptor.
Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
-
- unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
- if (ST.isAmdHsaOrMesa(F)) {
- PreloadedPrivateBufferReg = MFI->getPreloadedReg(
- AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
- }
-
- bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister &&
- MRI.isPhysRegUsed(ScratchWaveOffsetReg);
- bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
- MRI.isPhysRegUsed(ScratchRsrcReg);
-
// FIXME: Hack to not crash in situations which emitted an error.
- if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister)
+ if (!PreloadedScratchWaveOffsetReg)
return;
- // We added live-ins during argument lowering, but since they were not used
- // they were deleted. We're adding the uses now, so add them back.
- MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
- MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
-
- if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
- assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
- MRI.addLiveIn(PreloadedPrivateBufferReg);
- MBB.addLiveIn(PreloadedPrivateBufferReg);
+ // We need to do the replacement of the private segment buffer register even
+ // if there are no stack objects. There could be stores to undef or a
+ // constant without an associated object.
+ //
+ // This will return `Register()` in cases where there are no actual
+ // uses of the SRSRC.
+ Register ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
+
+ // Make the selected register live throughout the function.
+ if (ScratchRsrcReg) {
+ for (MachineBasicBlock &OtherBB : MF) {
+ if (&OtherBB != &MBB) {
+ OtherBB.addLiveIn(ScratchRsrcReg);
+ }
+ }
}
- // Make the register selected live throughout the function.
- for (MachineBasicBlock &OtherBB : MF) {
- if (&OtherBB == &MBB)
- continue;
-
- if (OffsetRegUsed || FPAdjusted)
- OtherBB.addLiveIn(ScratchWaveOffsetReg);
-
- if (ResourceRegUsed)
- OtherBB.addLiveIn(ScratchRsrcReg);
+ // Now that we have fixed the reserved SRSRC we need to locate the
+ // (potentially) preloaded SRSRC.
+ Register PreloadedScratchRsrcReg;
+ if (ST.isAmdHsaOrMesa(F)) {
+ PreloadedScratchRsrcReg =
+ MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
+ if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
+ // We added live-ins during argument lowering, but since they were not
+ // used they were deleted. We're adding the uses now, so add them back.
+ MRI.addLiveIn(PreloadedScratchRsrcReg);
+ MBB.addLiveIn(PreloadedScratchRsrcReg);
+ }
}
+ // Debug location must be unknown since the first debug location is used to
+ // determine the end of the prologue.
DebugLoc DL;
MachineBasicBlock::iterator I = MBB.begin();
- // If we reserved the original input registers, we don't need to copy to the
- // reserved registers.
-
- bool CopyBuffer = ResourceRegUsed &&
- PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
- ST.isAmdHsaOrMesa(F) &&
- ScratchRsrcReg != PreloadedPrivateBufferReg;
-
- // This needs to be careful of the copying order to avoid overwriting one of
- // the input registers before it's been copied to it's final
- // destination. Usually the offset should be copied first.
- bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg,
- ScratchWaveOffsetReg);
- if (CopyBuffer && CopyBufferFirst) {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
- .addReg(PreloadedPrivateBufferReg, RegState::Kill);
+ // We found the SRSRC first because it needs four registers and has an
+ // alignment requirement. If the SRSRC that we found is clobbering with
+ // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
+ // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
+ // wave offset to a free SGPR.
+ Register ScratchWaveOffsetReg;
+ if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
+ ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
+ unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
+ AllSGPRs = AllSGPRs.slice(
+ std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
+ Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
+ for (MCPhysReg Reg : AllSGPRs) {
+ if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
+ !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
+ ScratchWaveOffsetReg = Reg;
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
+ .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
+ break;
+ }
+ }
+ } else {
+ ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
}
+ assert(ScratchWaveOffsetReg);
- unsigned SPReg = MFI->getStackPtrOffsetReg();
- assert(SPReg != AMDGPU::SP_REG);
-
- // FIXME: Remove the isPhysRegUsed checks
- const bool HasFP = hasFP(MF);
-
- if (HasFP || OffsetRegUsed) {
- assert(ScratchWaveOffsetReg);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
- .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);
+ if (MF.getFrameInfo().hasCalls()) {
+ Register SPReg = MFI->getStackPtrOffsetReg();
+ assert(SPReg != AMDGPU::SP_REG);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
+ .addImm(MF.getFrameInfo().getStackSize() * ST.getWavefrontSize());
}
- if (CopyBuffer && !CopyBufferFirst) {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
- .addReg(PreloadedPrivateBufferReg, RegState::Kill);
+ if (hasFP(MF)) {
+ Register FPReg = MFI->getFrameOffsetReg();
+ assert(FPReg != AMDGPU::FP_REG);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
}
- if (ResourceRegUsed) {
- emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
- PreloadedPrivateBufferReg, ScratchRsrcReg);
+ if (MFI->hasFlatScratchInit() || ScratchRsrcReg) {
+ MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
+ MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
}
- if (HasFP) {
- DebugLoc DL;
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- int64_t StackSize = FrameInfo.getStackSize();
+ if (MFI->hasFlatScratchInit()) {
+ emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
+ }
- // On kernel entry, the private scratch wave offset is the SP value.
- if (StackSize == 0) {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg)
- .addReg(MFI->getScratchWaveOffsetReg());
- } else {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
- .addReg(MFI->getScratchWaveOffsetReg())
- .addImm(StackSize * ST.getWavefrontSize());
- }
+ if (ScratchRsrcReg) {
+ emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
+ PreloadedScratchRsrcReg,
+ ScratchRsrcReg, ScratchWaveOffsetReg);
}
}
-// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
-void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
- MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI,
- MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
- unsigned ScratchRsrcReg) const {
+// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
+void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
+ MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ const DebugLoc &DL, Register PreloadedScratchRsrcReg,
+ Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const Function &Fn = MF.getFunction();
- DebugLoc DL;
if (ST.isAmdPalOS()) {
// The pointer to the GIT is formed from the offset passed in and either
@@ -557,19 +504,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
BuildMI(MBB, I, DL, GetPC64, Rsrc01);
}
- auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
- if (ST.hasMergedShaders()) {
- switch (MF.getFunction().getCallingConv()) {
- case CallingConv::AMDGPU_HS:
- case CallingConv::AMDGPU_GS:
- // Low GIT address is passed in s8 rather than s0 for an LS+HS or
- // ES+GS merged shader on gfx9+.
- GitPtrLo = AMDGPU::SGPR8;
- break;
- default:
- break;
- }
- }
+ Register GitPtrLo = MFI->getGITPtrLoReg(MF);
MF.getRegInfo().addLiveIn(GitPtrLo);
MBB.addLiveIn(GitPtrLo);
BuildMI(MBB, I, DL, SMovB32, RsrcLo)
@@ -582,12 +517,12 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
auto MMO = MF.getMachineMemOperand(PtrInfo,
MachineMemOperand::MOLoad |
- MachineMemOperand::MOInvariant |
- MachineMemOperand::MODereferenceable,
- 16, 4);
+ MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable,
+ 16, Align(4));
unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
- unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset);
+ unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
.addReg(Rsrc01)
.addImm(EncodedOffset) // offset
@@ -595,10 +530,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
.addImm(0) // dlc
.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
.addMemOperand(MMO);
- return;
- }
- if (ST.isMesaGfxShader(Fn)
- || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
+ } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
assert(!ST.isAmdHsaOrMesa(Fn));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
@@ -621,11 +553,11 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
- auto MMO = MF.getMachineMemOperand(PtrInfo,
- MachineMemOperand::MOLoad |
- MachineMemOperand::MOInvariant |
- MachineMemOperand::MODereferenceable,
- 8, 4);
+ auto MMO = MF.getMachineMemOperand(
+ PtrInfo,
+ MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable,
+ 8, Align(4));
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
.addReg(MFI->getImplicitBufferPtrUserSGPR())
.addImm(0) // offset
@@ -658,7 +590,37 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
BuildMI(MBB, I, DL, SMovB32, Rsrc3)
.addImm(Rsrc23 >> 32)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ } else if (ST.isAmdHsaOrMesa(Fn)) {
+ assert(PreloadedScratchRsrcReg);
+
+ if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
+ .addReg(PreloadedScratchRsrcReg, RegState::Kill);
+ }
}
+
+ // Add the scratch wave offset into the scratch RSRC.
+ //
+ // We only want to update the first 48 bits, which is the base address
+ // pointer, without touching the adjacent 16 bits of flags. We know this add
+ // cannot carry-out from bit 47, otherwise the scratch allocation would be
+ // impossible to fit in the 48-bit global address space.
+ //
+ // TODO: Evaluate if it is better to just construct an SRD using the flat
+ // scratch init and some constants rather than update the one we are passed.
+ Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+
+ // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
+ // the kernel body via inreg arguments.
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
+ .addReg(ScratchRsrcSub0)
+ .addReg(ScratchWaveOffsetReg)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
+ .addReg(ScratchRsrcSub1)
+ .addImm(0)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
}
bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
@@ -673,6 +635,50 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
llvm_unreachable("Invalid TargetStackID::Value");
}
+// Activate all lanes, returns saved exec.
+static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool IsProlog) {
+ Register ScratchExecCopy;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ DebugLoc DL;
+
+ if (LiveRegs.empty()) {
+ if (IsProlog) {
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveIns(MBB);
+ if (FuncInfo->SGPRForFPSaveRestoreCopy)
+ LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+
+ if (FuncInfo->SGPRForBPSaveRestoreCopy)
+ LiveRegs.removeReg(FuncInfo->SGPRForBPSaveRestoreCopy);
+ } else {
+ // In epilog.
+ LiveRegs.init(*ST.getRegisterInfo());
+ LiveRegs.addLiveOuts(MBB);
+ LiveRegs.stepBackward(*MBBI);
+ }
+ }
+
+ ScratchExecCopy = findScratchNonCalleeSaveRegister(
+ MRI, LiveRegs, *TRI.getWaveMaskRegClass());
+
+ if (!IsProlog)
+ LiveRegs.removeReg(ScratchExecCopy);
+
+ const unsigned OrSaveExec =
+ ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
+ BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
+
+ return ScratchExecCopy;
+}
+
void SIFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
@@ -687,51 +693,81 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
- unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
- unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
+ Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
+ Register FramePtrReg = FuncInfo->getFrameOffsetReg();
+ Register BasePtrReg =
+ TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
LivePhysRegs LiveRegs;
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc DL;
bool HasFP = false;
+ bool HasBP = false;
uint32_t NumBytes = MFI.getStackSize();
uint32_t RoundedSize = NumBytes;
// To avoid clobbering VGPRs in lanes that weren't active on function entry,
// turn on all lanes before doing the spill to memory.
- unsigned ScratchExecCopy = AMDGPU::NoRegister;
+ Register ScratchExecCopy;
+
+ bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue();
+ bool SpillFPToMemory = false;
+ // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
+ // Otherwise we are spilling the FP to memory.
+ if (HasFPSaveIndex) {
+ SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) !=
+ TargetStackID::SGPRSpill;
+ }
+
+ bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue();
+ bool SpillBPToMemory = false;
+ // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
+ // Otherwise we are spilling the BP to memory.
+ if (HasBPSaveIndex) {
+ SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) !=
+ TargetStackID::SGPRSpill;
+ }
// Emit the copy if we need an FP, and are using a free SGPR to save it.
- if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
+ if (FuncInfo->SGPRForFPSaveRestoreCopy) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
.addReg(FramePtrReg)
.setMIFlag(MachineInstr::FrameSetup);
}
+ // Emit the copy if we need a BP, and are using a free SGPR to save it.
+ if (FuncInfo->SGPRForBPSaveRestoreCopy) {
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
+ FuncInfo->SGPRForBPSaveRestoreCopy)
+ .addReg(BasePtrReg)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // If a copy has been emitted for FP and/or BP, Make the SGPRs
+ // used in the copy instructions live throughout the function.
+ SmallVector<MCPhysReg, 2> TempSGPRs;
+ if (FuncInfo->SGPRForFPSaveRestoreCopy)
+ TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
+
+ if (FuncInfo->SGPRForBPSaveRestoreCopy)
+ TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
+
+ if (!TempSGPRs.empty()) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MCPhysReg Reg : TempSGPRs)
+ MBB.addLiveIn(Reg);
+
+ MBB.sortUniqueLiveIns();
+ }
+ }
+
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
: FuncInfo->getSGPRSpillVGPRs()) {
if (!Reg.FI.hasValue())
continue;
- if (ScratchExecCopy == AMDGPU::NoRegister) {
- if (LiveRegs.empty()) {
- LiveRegs.init(TRI);
- LiveRegs.addLiveIns(MBB);
- if (FuncInfo->SGPRForFPSaveRestoreCopy)
- LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
- }
-
- ScratchExecCopy
- = findScratchNonCalleeSaveRegister(MRI, LiveRegs,
- *TRI.getWaveMaskRegClass());
- assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy);
-
- const unsigned OrSaveExec = ST.isWave32() ?
- AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
- BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec),
- ScratchExecCopy)
- .addImm(-1);
- }
+ if (!ScratchExecCopy)
+ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
FuncInfo->getScratchRSrcReg(),
@@ -739,84 +775,153 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
Reg.FI.getValue());
}
- if (ScratchExecCopy != AMDGPU::NoRegister) {
+ if (HasFPSaveIndex && SpillFPToMemory) {
+ assert(!MFI.isDeadObjectIndex(FuncInfo->FramePointerSaveIndex.getValue()));
+
+ if (!ScratchExecCopy)
+ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
+
+ MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
+ MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
+ .addReg(FramePtrReg);
+
+ buildPrologSpill(LiveRegs, MBB, MBBI, TII, TmpVGPR,
+ FuncInfo->getScratchRSrcReg(), StackPtrReg,
+ FuncInfo->FramePointerSaveIndex.getValue());
+ }
+
+ if (HasBPSaveIndex && SpillBPToMemory) {
+ assert(!MFI.isDeadObjectIndex(*FuncInfo->BasePointerSaveIndex));
+
+ if (!ScratchExecCopy)
+ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
+
+ MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
+ MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
+ .addReg(BasePtrReg);
+
+ buildPrologSpill(LiveRegs, MBB, MBBI, TII, TmpVGPR,
+ FuncInfo->getScratchRSrcReg(), StackPtrReg,
+ *FuncInfo->BasePointerSaveIndex);
+ }
+
+ if (ScratchExecCopy) {
// FIXME: Split block and make terminator.
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
- .addReg(ScratchExecCopy, RegState::Kill);
+ .addReg(ScratchExecCopy, RegState::Kill);
LiveRegs.addReg(ScratchExecCopy);
}
-
- if (FuncInfo->FramePointerSaveIndex) {
+ // In this case, spill the FP to a reserved VGPR.
+ if (HasFPSaveIndex && !SpillFPToMemory) {
const int FI = FuncInfo->FramePointerSaveIndex.getValue();
- assert(!MFI.isDeadObjectIndex(FI) &&
- MFI.getStackID(FI) == TargetStackID::SGPRSpill);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
- = FuncInfo->getSGPRToVGPRSpills(FI);
+ assert(!MFI.isDeadObjectIndex(FI));
+
+ assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
+ FuncInfo->getSGPRToVGPRSpills(FI);
assert(Spill.size() == 1);
// Save FP before setting it up.
// FIXME: This should respect spillSGPRToVGPR;
BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
Spill[0].VGPR)
- .addReg(FramePtrReg)
- .addImm(Spill[0].Lane)
- .addReg(Spill[0].VGPR, RegState::Undef);
+ .addReg(FramePtrReg)
+ .addImm(Spill[0].Lane)
+ .addReg(Spill[0].VGPR, RegState::Undef);
+ }
+
+ // In this case, spill the BP to a reserved VGPR.
+ if (HasBPSaveIndex && !SpillBPToMemory) {
+ const int BasePtrFI = *FuncInfo->BasePointerSaveIndex;
+ assert(!MFI.isDeadObjectIndex(BasePtrFI));
+
+ assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
+ FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
+ assert(Spill.size() == 1);
+
+ // Save BP before setting it up.
+ // FIXME: This should respect spillSGPRToVGPR;
+ BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
+ Spill[0].VGPR)
+ .addReg(BasePtrReg)
+ .addImm(Spill[0].Lane)
+ .addReg(Spill[0].VGPR, RegState::Undef);
}
if (TRI.needsStackRealignment(MF)) {
HasFP = true;
- const unsigned Alignment = MFI.getMaxAlignment();
+ const unsigned Alignment = MFI.getMaxAlign().value();
RoundedSize += Alignment;
if (LiveRegs.empty()) {
LiveRegs.init(TRI);
LiveRegs.addLiveIns(MBB);
LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+ LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
}
- unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(
+ Register ScratchSPReg = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
- assert(ScratchSPReg != AMDGPU::NoRegister &&
- ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy);
+ assert(ScratchSPReg && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy &&
+ ScratchSPReg != FuncInfo->SGPRForBPSaveRestoreCopy);
// s_add_u32 tmp_reg, s32, NumBytes
// s_and_b32 s32, tmp_reg, 0b111...0000
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
- .addReg(StackPtrReg)
- .addImm((Alignment - 1) * ST.getWavefrontSize())
- .setMIFlag(MachineInstr::FrameSetup);
+ .addReg(StackPtrReg)
+ .addImm((Alignment - 1) * ST.getWavefrontSize())
+ .setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
- .addReg(ScratchSPReg, RegState::Kill)
- .addImm(-Alignment * ST.getWavefrontSize())
- .setMIFlag(MachineInstr::FrameSetup);
+ .addReg(ScratchSPReg, RegState::Kill)
+ .addImm(-Alignment * ST.getWavefrontSize())
+ .setMIFlag(MachineInstr::FrameSetup);
FuncInfo->setIsStackRealigned(true);
} else if ((HasFP = hasFP(MF))) {
- // If we need a base pointer, set it up here. It's whatever the value of
- // the stack pointer is at this point. Any variable size objects will be
- // allocated after this, so we can still use the base pointer to reference
- // locals.
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
- .addReg(StackPtrReg)
- .setMIFlag(MachineInstr::FrameSetup);
+ .addReg(StackPtrReg)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value of
+ // the stack pointer is at this point. Any variable size objects will be
+ // allocated after this, so we can still use the base pointer to reference
+ // the incoming arguments.
+ if ((HasBP = TRI.hasBasePointer(MF))) {
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
+ .addReg(StackPtrReg)
+ .setMIFlag(MachineInstr::FrameSetup);
}
if (HasFP && RoundedSize != 0) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
- .addReg(StackPtrReg)
- .addImm(RoundedSize * ST.getWavefrontSize())
- .setMIFlag(MachineInstr::FrameSetup);
+ .addReg(StackPtrReg)
+ .addImm(RoundedSize * ST.getWavefrontSize())
+ .setMIFlag(MachineInstr::FrameSetup);
}
- assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister ||
+ assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
FuncInfo->FramePointerSaveIndex)) &&
"Needed to save FP but didn't save it anywhere");
- assert((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister &&
+ assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
!FuncInfo->FramePointerSaveIndex)) &&
"Saved FP but didn't need it");
+
+ assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
+ FuncInfo->BasePointerSaveIndex)) &&
+ "Needed to save BP but didn't save it anywhere");
+
+ assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
+ !FuncInfo->BasePointerSaveIndex)) &&
+ "Saved BP but didn't need it");
}
void SIFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -828,81 +933,126 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
LivePhysRegs LiveRegs;
DebugLoc DL;
const MachineFrameInfo &MFI = MF.getFrameInfo();
uint32_t NumBytes = MFI.getStackSize();
- uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
- NumBytes + MFI.getMaxAlignment() : NumBytes;
+ uint32_t RoundedSize = FuncInfo->isStackRealigned()
+ ? NumBytes + MFI.getMaxAlign().value()
+ : NumBytes;
+ const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
+ const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
+ const Register BasePtrReg =
+ TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
+
+ bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue();
+ bool SpillFPToMemory = false;
+ if (HasFPSaveIndex) {
+ SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) !=
+ TargetStackID::SGPRSpill;
+ }
+
+ bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue();
+ bool SpillBPToMemory = false;
+ if (HasBPSaveIndex) {
+ SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) !=
+ TargetStackID::SGPRSpill;
+ }
if (RoundedSize != 0 && hasFP(MF)) {
- const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(RoundedSize * ST.getWavefrontSize())
.setMIFlag(MachineInstr::FrameDestroy);
}
- if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg())
- .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
- .setMIFlag(MachineInstr::FrameSetup);
+ if (FuncInfo->SGPRForFPSaveRestoreCopy) {
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
+ .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
+ .setMIFlag(MachineInstr::FrameSetup);
}
- if (FuncInfo->FramePointerSaveIndex) {
- const int FI = FuncInfo->FramePointerSaveIndex.getValue();
+ if (FuncInfo->SGPRForBPSaveRestoreCopy) {
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
+ .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
- assert(!MF.getFrameInfo().isDeadObjectIndex(FI) &&
- MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill);
+ Register ScratchExecCopy;
+ if (HasFPSaveIndex) {
+ const int FI = FuncInfo->FramePointerSaveIndex.getValue();
+ assert(!MFI.isDeadObjectIndex(FI));
+ if (SpillFPToMemory) {
+ if (!ScratchExecCopy)
+ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
+
+ MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister(
+ MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+ buildEpilogReload(LiveRegs, MBB, MBBI, TII, TempVGPR,
+ FuncInfo->getScratchRSrcReg(), StackPtrReg, FI);
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
+ .addReg(TempVGPR, RegState::Kill);
+ } else {
+ // Reload from VGPR spill.
+ assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
+ FuncInfo->getSGPRToVGPRSpills(FI);
+ assert(Spill.size() == 1);
+ BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+ FramePtrReg)
+ .addReg(Spill[0].VGPR)
+ .addImm(Spill[0].Lane);
+ }
+ }
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
- = FuncInfo->getSGPRToVGPRSpills(FI);
- assert(Spill.size() == 1);
- BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
- FuncInfo->getFrameOffsetReg())
- .addReg(Spill[0].VGPR)
- .addImm(Spill[0].Lane);
+ if (HasBPSaveIndex) {
+ const int BasePtrFI = *FuncInfo->BasePointerSaveIndex;
+ assert(!MFI.isDeadObjectIndex(BasePtrFI));
+ if (SpillBPToMemory) {
+ if (!ScratchExecCopy)
+ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
+
+ MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister(
+ MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+ buildEpilogReload(LiveRegs, MBB, MBBI, TII, TempVGPR,
+ FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI);
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
+ .addReg(TempVGPR, RegState::Kill);
+ } else {
+ // Reload from VGPR spill.
+ assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
+ FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
+ assert(Spill.size() == 1);
+ BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+ BasePtrReg)
+ .addReg(Spill[0].VGPR)
+ .addImm(Spill[0].Lane);
+ }
}
- unsigned ScratchExecCopy = AMDGPU::NoRegister;
- for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
- : FuncInfo->getSGPRSpillVGPRs()) {
+ for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg :
+ FuncInfo->getSGPRSpillVGPRs()) {
if (!Reg.FI.hasValue())
continue;
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
- if (ScratchExecCopy == AMDGPU::NoRegister) {
- // See emitPrologue
- if (LiveRegs.empty()) {
- LiveRegs.init(*ST.getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- LiveRegs.stepBackward(*MBBI);
- }
-
- ScratchExecCopy = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, *TRI.getWaveMaskRegClass());
- LiveRegs.removeReg(ScratchExecCopy);
-
- const unsigned OrSaveExec =
- ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
-
- BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
- .addImm(-1);
- }
+ if (!ScratchExecCopy)
+ ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
- FuncInfo->getScratchRSrcReg(),
- FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue());
+ FuncInfo->getScratchRSrcReg(), StackPtrReg,
+ Reg.FI.getValue());
}
- if (ScratchExecCopy != AMDGPU::NoRegister) {
+ if (ScratchExecCopy) {
// FIXME: Split block and make terminator.
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
- .addReg(ScratchExecCopy, RegState::Kill);
+ .addReg(ScratchExecCopy, RegState::Kill);
}
}
@@ -920,12 +1070,14 @@ static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
#ifndef NDEBUG
static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
- Optional<int> FramePointerSaveIndex) {
+ Optional<int> FramePointerSaveIndex,
+ Optional<int> BasePointerSaveIndex) {
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
I != E; ++I) {
if (!MFI.isDeadObjectIndex(I) &&
MFI.getStackID(I) == TargetStackID::SGPRSpill &&
- FramePointerSaveIndex && I != FramePointerSaveIndex) {
+ ((FramePointerSaveIndex && I != FramePointerSaveIndex) ||
+ (BasePointerSaveIndex && I != BasePointerSaveIndex))) {
return false;
}
}
@@ -935,7 +1087,7 @@ static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
#endif
int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const {
+ Register &FrameReg) const {
const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
FrameReg = RI->getFrameRegister(MF);
@@ -952,7 +1104,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
FuncInfo->removeDeadFrameIndices(MFI);
- assert(allSGPRSpillsAreDead(MFI, None) &&
+ assert(allSGPRSpillsAreDead(MFI, None, None) &&
"SGPR spill should have been removed in SILowerSGPRSpills");
// FIXME: The other checks should be redundant with allStackObjectsAreDead,
@@ -967,9 +1119,8 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
RS->addScavengingFrameIndex(ScavengeFI);
} else {
int ScavengeFI = MFI.CreateStackObject(
- TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
- TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass),
- false);
+ TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
+ TRI->getSpillAlign(AMDGPU::SGPR_32RegClass), false);
RS->addScavengingFrameIndex(ScavengeFI);
}
}
@@ -984,7 +1135,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (MFI->isEntryFunction())
return;
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -1008,46 +1159,19 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
for (auto SSpill : MFI->getSGPRSpillVGPRs())
SavedVGPRs.reset(SSpill.VGPR);
- const bool HasFP = WillHaveFP || hasFP(MF);
- if (!HasFP)
- return;
-
- if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
- int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
- TargetStackID::SGPRSpill);
-
- // If there is already a VGPR with free lanes, use it. We may already have
- // to pay the penalty for spilling a CSR VGPR.
- if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
- llvm_unreachable("allocate SGPR spill should have worked");
-
- MFI->FramePointerSaveIndex = NewFI;
+ LivePhysRegs LiveRegs;
+ LiveRegs.init(*TRI);
- LLVM_DEBUG(
- auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
- dbgs() << "Spilling FP to " << printReg(Spill.VGPR, TRI)
- << ':' << Spill.Lane << '\n');
- return;
+ if (WillHaveFP || hasFP(MF)) {
+ getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
+ MFI->FramePointerSaveIndex, true);
}
- MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo());
-
- if (!MFI->SGPRForFPSaveRestoreCopy) {
- // There's no free lane to spill, and no free register to save FP, so we're
- // forced to spill another VGPR to use for the spill.
- int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
- TargetStackID::SGPRSpill);
- if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
- llvm_unreachable("allocate SGPR spill should have worked");
- MFI->FramePointerSaveIndex = NewFI;
-
- LLVM_DEBUG(
- auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
- dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)
- << ':' << Spill.Lane << '\n';);
- } else {
- LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<
- printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n');
+ if (TRI->hasBasePointer(MF)) {
+ if (MFI->SGPRForFPSaveRestoreCopy)
+ LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
+ getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
+ MFI->BasePointerSaveIndex, false);
}
}
@@ -1074,14 +1198,31 @@ bool SIFrameLowering::assignCalleeSavedSpillSlots(
return true; // Early exit if no callee saved registers are modified!
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- if (!FuncInfo->SGPRForFPSaveRestoreCopy)
+ if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
+ !FuncInfo->SGPRForBPSaveRestoreCopy)
return false;
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *RI = ST.getRegisterInfo();
+ Register FramePtrReg = FuncInfo->getFrameOffsetReg();
+ Register BasePtrReg = RI->getBaseRegister();
+ unsigned NumModifiedRegs = 0;
+
+ if (FuncInfo->SGPRForFPSaveRestoreCopy)
+ NumModifiedRegs++;
+ if (FuncInfo->SGPRForBPSaveRestoreCopy)
+ NumModifiedRegs++;
+
for (auto &CS : CSI) {
- if (CS.getReg() == FuncInfo->getFrameOffsetReg()) {
- if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister)
- CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
- break;
+ if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
+ CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+ if (--NumModifiedRegs)
+ break;
+ } else if (CS.getReg() == BasePtrReg &&
+ FuncInfo->SGPRForBPSaveRestoreCopy) {
+ CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
+ if (--NumModifiedRegs)
+ break;
}
}
@@ -1104,12 +1245,10 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
if (!hasReservedCallFrame(MF)) {
- unsigned Align = getStackAlignment();
-
- Amount = alignTo(Amount, Align);
+ Amount = alignTo(Amount, getStackAlign());
assert(isUInt<32>(Amount) && "exceeded stack address space size");
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- unsigned SPReg = MFI->getStackPtrOffsetReg();
+ Register SPReg = MFI->getStackPtrOffsetReg();
unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
BuildMI(MBB, I, DL, TII->get(Op), SPReg)
@@ -1124,19 +1263,17 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.hasCalls()) {
+
+ // For entry functions we can use an immediate offset in most cases, so the
+ // presence of calls doesn't imply we need a distinct frame pointer.
+ if (MFI.hasCalls() &&
+ !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
// All offsets are unsigned, so need to be addressed in the same direction
// as stack growth.
// FIXME: This function is pretty broken, since it can be called before the
// frame layout is determined or CSR spills are inserted.
- if (MFI.getStackSize() != 0)
- return true;
-
- // For the entry point, the input wave scratch offset must be copied to the
- // API SP if there are calls.
- if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction())
- return true;
+ return MFI.getStackSize() != 0;
}
return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index d9970fd6b4b8..e89432040661 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -21,7 +21,7 @@ class GCNSubtarget;
class SIFrameLowering final : public AMDGPUFrameLowering {
public:
SIFrameLowering(StackDirection D, Align StackAl, int LAO,
- Align TransAl = Align::None())
+ Align TransAl = Align(1))
: AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
~SIFrameLowering() override = default;
@@ -32,7 +32,7 @@ public:
void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;
@@ -55,26 +55,19 @@ public:
MachineBasicBlock::iterator MI) const override;
private:
- void emitFlatScratchInit(const GCNSubtarget &ST,
- MachineFunction &MF,
- MachineBasicBlock &MBB) const;
-
- unsigned getReservedPrivateSegmentBufferReg(
- const GCNSubtarget &ST,
- const SIInstrInfo *TII,
- const SIRegisterInfo *TRI,
- SIMachineFunctionInfo *MFI,
- MachineFunction &MF) const;
-
- std::pair<unsigned, bool> getReservedPrivateSegmentWaveByteOffsetReg(
- const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
- SIMachineFunctionInfo *MFI, MachineFunction &MF) const;
-
- // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
- void emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF,
- MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI,
- MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
- unsigned ScratchRsrcReg) const;
+ void emitEntryFunctionFlatScratchInit(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ Register ScratchWaveOffsetReg) const;
+
+ Register getEntryFunctionReservedScratchRsrcReg(MachineFunction &MF) const;
+
+ void emitEntryFunctionScratchRsrcRegSetup(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ Register PreloadedPrivateBufferReg, Register ScratchRsrcReg,
+ Register ScratchWaveOffsetReg) const;
public:
bool hasFP(const MachineFunction &MF) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e73d87cd66af..d035aa8f72bd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11,11 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#if defined(_MSC_VER) || defined(__MINGW32__)
-// Provide M_PI.
-#define _USE_MATH_DEFINES
-#endif
-
#include "SIISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
@@ -40,6 +35,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -95,14 +91,24 @@ static cl::opt<bool> DisableLoopAlignment(
cl::desc("Do not align and prefetch loops"),
cl::init(false));
+static cl::opt<bool> VGPRReserveforSGPRSpill(
+ "amdgpu-reserve-vgpr-for-sgpr-spill",
+ cl::desc("Allocates one VGPR for future SGPR Spill"), cl::init(true));
+
+static cl::opt<bool> UseDivergentRegisterIndexing(
+ "amdgpu-use-divergent-register-indexing",
+ cl::Hidden,
+ cl::desc("Use indirect register addressing for divergent indexes"),
+ cl::init(false));
+
static bool hasFP32Denormals(const MachineFunction &MF) {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- return Info->getMode().FP32Denormals;
+ return Info->getMode().allFP32Denormals();
}
static bool hasFP64FP16Denormals(const MachineFunction &MF) {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- return Info->getMode().FP64FP16Denormals;
+ return Info->getMode().allFP64FP16Denormals();
}
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
@@ -141,12 +147,21 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
- addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
+ addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
- addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
+ addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
+ addRegisterClass(MVT::v4f64, &AMDGPU::VReg_256RegClass);
+
+ addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
+ addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
+ addRegisterClass(MVT::v8f64, &AMDGPU::VReg_512RegClass);
+
+ addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
+ addRegisterClass(MVT::v16f64, &AMDGPU::VReg_1024RegClass);
+
if (Subtarget->has16BitInsts()) {
addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
@@ -158,10 +173,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
}
- if (Subtarget->hasMAIInsts()) {
- addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
- addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
- }
+ addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
+ addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -202,6 +215,17 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v8i32, MVT::v8i8, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand);
+ setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand);
+ setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
+ setTruncStoreAction(MVT::v16i16, MVT::v16i8, Expand);
+ setTruncStoreAction(MVT::v32i16, MVT::v32i8, Expand);
+
+ setTruncStoreAction(MVT::v4i64, MVT::v4i8, Expand);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i8, Expand);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i16, Expand);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i32, Expand);
+ setTruncStoreAction(MVT::v16i64, MVT::v16i32, Expand);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
@@ -224,6 +248,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f32, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::v8f32, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::v16f32, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
@@ -260,7 +290,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// with > 4 elements.
for (MVT VT : { MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16,
- MVT::v32i32, MVT::v32f32 }) {
+ MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64,
+ MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32 }) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -304,6 +335,48 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v4i32);
}
+ for (MVT Vec64 : { MVT::v4i64, MVT::v4f64 }) {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
+ AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v8i32);
+
+ setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
+ AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v8i32);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v8i32);
+ }
+
+ for (MVT Vec64 : { MVT::v8i64, MVT::v8f64 }) {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
+ AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v16i32);
+
+ setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
+ AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v16i32);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v16i32);
+ }
+
+ for (MVT Vec64 : { MVT::v16i64, MVT::v16f64 }) {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
+ AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v32i32);
+
+ setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
+ AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v32i32);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v32i32);
+ }
+
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
@@ -361,9 +434,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
}
- setOperationAction(ISD::BSWAP, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+ // FIXME: This should be narrowed to i32, but that only happens if i64 is
+ // illegal.
+ // FIXME: Should lower sub-i32 bswaps to bit-ops without v_perm_b32.
+ setOperationAction(ISD::BSWAP, MVT::i64, Legal);
+ setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+
// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
setOperationAction(ISD::TRAP, MVT::Other, Custom);
@@ -376,10 +454,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG10, MVT::f16, Custom);
}
- // v_mad_f32 does not support denormals. We report it as unconditionally
- // legal, and the context where it is formed will disallow it when fp32
- // denormals are enabled.
- setOperationAction(ISD::FMAD, MVT::f32, Legal);
+ if (Subtarget->hasMadMacF32Insts())
+ setOperationAction(ISD::FMAD, MVT::f32, Legal);
if (!Subtarget->hasBFI()) {
// fcopysign can be done in a single instruction with BFI.
@@ -463,7 +539,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SREM, MVT::i16, Promote);
setOperationAction(ISD::UREM, MVT::i16, Promote);
- setOperationAction(ISD::BSWAP, MVT::i16, Promote);
setOperationAction(ISD::BITREVERSE, MVT::i16, Promote);
setOperationAction(ISD::CTTZ, MVT::i16, Promote);
@@ -499,8 +574,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// F16 - VOP1 Actions.
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::FCOS, MVT::f16, Promote);
- setOperationAction(ISD::FSIN, MVT::f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::f16, Custom);
+ setOperationAction(ISD::FSIN, MVT::f16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Custom);
@@ -545,6 +620,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
}
+ // v_perm_b32 can handle either of these.
+ setOperationAction(ISD::BSWAP, MVT::i16, Legal);
+ setOperationAction(ISD::BSWAP, MVT::v2i16, Legal);
+ setOperationAction(ISD::BSWAP, MVT::v4i16, Custom);
+
// XXX - Do these do anything? Vector constants turn into build_vector.
setOperationAction(ISD::Constant, MVT::v2i16, Legal);
setOperationAction(ISD::ConstantFP, MVT::v2f16, Legal);
@@ -686,6 +766,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
}
+ setOperationAction(ISD::SMULO, MVT::i64, Custom);
+ setOperationAction(ISD::UMULO, MVT::i64, Custom);
+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
@@ -762,6 +845,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
setTargetDAGCombine(ISD::ATOMIC_LOAD_FADD);
+ // FIXME: In other contexts we pretend this is a per-function property.
+ setStackPointerRegisterToSaveRestore(AMDGPU::SGPR32);
+
setSchedulingPreference(Sched::RegPressure);
}
@@ -783,6 +869,7 @@ bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
DestVT.getScalarType() == MVT::f32 &&
SrcVT.getScalarType() == MVT::f16 &&
+ // TODO: This probably only requires no input flushing?
!hasFP32Denormals(DAG.getMachineFunction());
}
@@ -877,45 +964,33 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
}
-static MVT memVTFromAggregate(Type *Ty) {
- // Only limited forms of aggregate type currently expected.
- assert(Ty->isStructTy() && "Expected struct type");
-
+static EVT memVTFromImageData(Type *Ty, unsigned DMaskLanes) {
+ assert(DMaskLanes != 0);
- Type *ElementType = nullptr;
- unsigned NumElts;
- if (Ty->getContainedType(0)->isVectorTy()) {
- VectorType *VecComponent = cast<VectorType>(Ty->getContainedType(0));
- ElementType = VecComponent->getElementType();
- NumElts = VecComponent->getNumElements();
- } else {
- ElementType = Ty->getContainedType(0);
- NumElts = 1;
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+ unsigned NumElts = std::min(DMaskLanes, VT->getNumElements());
+ return EVT::getVectorVT(Ty->getContext(),
+ EVT::getEVT(VT->getElementType()),
+ NumElts);
}
- assert((Ty->getContainedType(1) && Ty->getContainedType(1)->isIntegerTy(32)) && "Expected int32 type");
+ return EVT::getEVT(Ty);
+}
- // Calculate the size of the memVT type from the aggregate
- unsigned Pow2Elts = 0;
- unsigned ElementSize;
- switch (ElementType->getTypeID()) {
- default:
- llvm_unreachable("Unknown type!");
- case Type::IntegerTyID:
- ElementSize = cast<IntegerType>(ElementType)->getBitWidth();
- break;
- case Type::HalfTyID:
- ElementSize = 16;
- break;
- case Type::FloatTyID:
- ElementSize = 32;
- break;
- }
- unsigned AdditionalElts = ElementSize == 16 ? 2 : 1;
- Pow2Elts = 1 << Log2_32_Ceil(NumElts + AdditionalElts);
+// Peek through TFE struct returns to only use the data size.
+static EVT memVTFromImageReturn(Type *Ty, unsigned DMaskLanes) {
+ auto *ST = dyn_cast<StructType>(Ty);
+ if (!ST)
+ return memVTFromImageData(Ty, DMaskLanes);
- return MVT::getVectorVT(MVT::getVT(ElementType, false),
- Pow2Elts);
+ // Some intrinsics return an aggregate type - special case to work out the
+ // correct memVT.
+ //
+ // Only limited forms of aggregate type currently expected.
+ if (ST->getNumContainedTypes() != 2 ||
+ !ST->getContainedType(1)->isIntegerTy(32))
+ return EVT();
+ return memVTFromImageData(ST->getContainedType(0), DMaskLanes);
}
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
@@ -944,17 +1019,40 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MODereferenceable;
if (Attr.hasFnAttribute(Attribute::ReadOnly)) {
+ unsigned DMaskLanes = 4;
+
+ if (RsrcIntr->IsImage) {
+ const AMDGPU::ImageDimIntrinsicInfo *Intr
+ = AMDGPU::getImageDimIntrinsicInfo(IntrID);
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
+
+ if (!BaseOpcode->Gather4) {
+ // If this isn't a gather, we may have excess loaded elements in the
+ // IR type. Check the dmask for the real number of elements loaded.
+ unsigned DMask
+ = cast<ConstantInt>(CI.getArgOperand(0))->getZExtValue();
+ DMaskLanes = DMask == 0 ? 1 : countPopulation(DMask);
+ }
+
+ Info.memVT = memVTFromImageReturn(CI.getType(), DMaskLanes);
+ } else
+ Info.memVT = EVT::getEVT(CI.getType());
+
+ // FIXME: What does alignment mean for an image?
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(CI.getType(), true);
- if (Info.memVT == MVT::Other) {
- // Some intrinsics return an aggregate type - special case to work out
- // the correct memVT
- Info.memVT = memVTFromAggregate(CI.getType());
- }
Info.flags |= MachineMemOperand::MOLoad;
} else if (Attr.hasFnAttribute(Attribute::WriteOnly)) {
Info.opc = ISD::INTRINSIC_VOID;
- Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
+
+ Type *DataTy = CI.getArgOperand(0)->getType();
+ if (RsrcIntr->IsImage) {
+ unsigned DMask = cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue();
+ unsigned DMaskLanes = DMask == 0 ? 1 : countPopulation(DMask);
+ Info.memVT = memVTFromImageData(DataTy, DMaskLanes);
+ } else
+ Info.memVT = EVT::getEVT(DataTy);
+
Info.flags |= MachineMemOperand::MOStore;
} else {
// Atomic
@@ -1031,6 +1129,17 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
+ case Intrinsic::amdgcn_global_atomic_csub: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = CI.getOperand(0);
+ Info.align.reset();
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOVolatile;
+ return true;
+ }
case Intrinsic::amdgcn_ds_gws_init:
case Intrinsic::amdgcn_ds_gws_barrier:
case Intrinsic::amdgcn_ds_gws_sema_v:
@@ -1226,9 +1335,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
// addressing modes, so treat them as having no offset like flat
// instructions.
return isLegalFlatAddressingMode(AM);
- } else {
- llvm_unreachable("unhandled address space");
}
+
+ // Assume a user alias of global for unknown address spaces.
+ return isLegalGlobalAddressingMode(AM);
}
bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
@@ -1279,9 +1389,11 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
// If we have an uniform constant load, it still requires using a slow
// buffer instruction if unaligned.
if (IsFast) {
+ // Accesses can really be issued as 1-byte aligned or 4-byte aligned, so
+ // 2-byte alignment is worse than 1 unless doing a 2-byte accesss.
*IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
- (Align % 4 == 0) : true;
+ Align >= 4 : Align != 2;
}
return true;
@@ -1320,18 +1432,17 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
}
EVT SITargetLowering::getOptimalMemOpType(
- uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes) const {
// FIXME: Should account for address space here.
// The default fallback uses the private pointer size as a guess for a type to
// use. Make sure we switch these to 64-bit accesses.
- if (Size >= 16 && DstAlign >= 4) // XXX: Should only do for global
+ if (Op.size() >= 16 &&
+ Op.isDstAligned(Align(4))) // XXX: Should only do for global
return MVT::v4i32;
- if (Size >= 8 && DstAlign >= 4)
+ if (Op.size() >= 8 && Op.isDstAligned(Align(4)))
return MVT::v2i32;
// Use the default.
@@ -1416,9 +1527,10 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
const ArgDescriptor *InputPtrReg;
const TargetRegisterClass *RC;
+ LLT ArgTy;
- std::tie(InputPtrReg, RC)
- = Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+ std::tie(InputPtrReg, RC, ArgTy) =
+ Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
@@ -1457,7 +1569,7 @@ SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
}
if (MemVT.isFloatingPoint())
- Val = getFPExtOrFPTrunc(DAG, Val, SL, VT);
+ Val = getFPExtOrFPRound(DAG, Val, SL, VT);
else if (Signed)
Val = DAG.getSExtOrTrunc(Val, SL, VT);
else
@@ -1467,16 +1579,15 @@ SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
}
SDValue SITargetLowering::lowerKernargMemParameter(
- SelectionDAG &DAG, EVT VT, EVT MemVT,
- const SDLoc &SL, SDValue Chain,
- uint64_t Offset, unsigned Align, bool Signed,
- const ISD::InputArg *Arg) const {
+ SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain,
+ uint64_t Offset, Align Alignment, bool Signed,
+ const ISD::InputArg *Arg) const {
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
// Try to avoid using an extload by loading earlier than the argument address,
// and extracting the relevant bits. The load should hopefully be merged with
// the previous argument.
- if (MemVT.getStoreSize() < 4 && Align < 4) {
+ if (MemVT.getStoreSize() < 4 && Alignment < 4) {
// TODO: Handle align < 4 and size >= 4 (can happen with packed structs).
int64_t AlignDownOffset = alignDown(Offset, 4);
int64_t OffsetDiff = Offset - AlignDownOffset;
@@ -1502,9 +1613,9 @@ SDValue SITargetLowering::lowerKernargMemParameter(
}
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
- SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
+ SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Alignment,
MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
+ MachineMemOperand::MOInvariant);
SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
@@ -1565,8 +1676,9 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
const ArgDescriptor *Reg;
const TargetRegisterClass *RC;
+ LLT Ty;
- std::tie(Reg, RC) = MFI.getPreloadedValue(PVID);
+ std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
}
@@ -1666,7 +1778,7 @@ static ArgDescriptor allocateVGPR32Input(CCState &CCInfo, unsigned Mask = ~0u,
unsigned RegIdx = CCInfo.getFirstUnallocated(ArgVGPRs);
if (RegIdx == ArgVGPRs.size()) {
// Spill to stack required.
- int64_t Offset = CCInfo.AllocateStack(4, 4);
+ int64_t Offset = CCInfo.AllocateStack(4, Align(4));
return ArgDescriptor::createStack(Offset, Mask);
}
@@ -1706,10 +1818,11 @@ static ArgDescriptor allocateSGPR64Input(CCState &CCInfo) {
return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16);
}
-void SITargetLowering::allocateSpecialInputVGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) const {
+/// Allocate implicit function VGPR arguments at the end of allocated user
+/// arguments.
+void SITargetLowering::allocateSpecialInputVGPRs(
+ CCState &CCInfo, MachineFunction &MF,
+ const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const {
const unsigned Mask = 0x3ff;
ArgDescriptor Arg;
@@ -1727,6 +1840,20 @@ void SITargetLowering::allocateSpecialInputVGPRs(CCState &CCInfo,
Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo, Mask << 20, Arg));
}
+/// Allocate implicit function VGPR arguments in fixed registers.
+void SITargetLowering::allocateSpecialInputVGPRsFixed(
+ CCState &CCInfo, MachineFunction &MF,
+ const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const {
+ Register Reg = CCInfo.AllocateReg(AMDGPU::VGPR31);
+ if (!Reg)
+ report_fatal_error("failed to allocated VGPR for implicit arguments");
+
+ const unsigned Mask = 0x3ff;
+ Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg, Mask));
+ Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg, Mask << 10));
+ Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg, Mask << 20));
+}
+
void SITargetLowering::allocateSpecialInputSGPRs(
CCState &CCInfo,
MachineFunction &MF,
@@ -1742,8 +1869,10 @@ void SITargetLowering::allocateSpecialInputSGPRs(
if (Info.hasQueuePtr())
ArgInfo.QueuePtr = allocateSGPR64Input(CCInfo);
- if (Info.hasKernargSegmentPtr())
- ArgInfo.KernargSegmentPtr = allocateSGPR64Input(CCInfo);
+ // Implicit arg ptr takes the place of the kernarg segment pointer. This is a
+ // constant offset from the kernarg segment.
+ if (Info.hasImplicitArgPtr())
+ ArgInfo.ImplicitArgPtr = allocateSGPR64Input(CCInfo);
if (Info.hasDispatchID())
ArgInfo.DispatchID = allocateSGPR64Input(CCInfo);
@@ -1758,9 +1887,6 @@ void SITargetLowering::allocateSpecialInputSGPRs(
if (Info.hasWorkGroupIDZ())
ArgInfo.WorkGroupIDZ = allocateSGPR32Input(CCInfo);
-
- if (Info.hasImplicitArgPtr())
- ArgInfo.ImplicitArgPtr = allocateSGPR64Input(CCInfo);
}
// Allocate special inputs passed in user SGPRs.
@@ -1916,67 +2042,45 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
Info.setScratchRSrcReg(ReservedBufferReg);
}
- // hasFP should be accurate for kernels even before the frame is finalized.
- if (ST.getFrameLowering()->hasFP(MF)) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
- // Try to use s32 as the SP, but move it if it would interfere with input
- // arguments. This won't work with calls though.
- //
- // FIXME: Move SP to avoid any possible inputs, or find a way to spill input
- // registers.
- if (!MRI.isLiveIn(AMDGPU::SGPR32)) {
- Info.setStackPtrOffsetReg(AMDGPU::SGPR32);
- } else {
- assert(AMDGPU::isShader(MF.getFunction().getCallingConv()));
+ // For entry functions we have to set up the stack pointer if we use it,
+ // whereas non-entry functions get this "for free". This means there is no
+ // intrinsic advantage to using S32 over S34 in cases where we do not have
+ // calls but do need a frame pointer (i.e. if we are requested to have one
+ // because frame pointer elimination is disabled). To keep things simple we
+ // only ever use S32 as the call ABI stack pointer, and so using it does not
+ // imply we need a separate frame pointer.
+ //
+ // Try to use s32 as the SP, but move it if it would interfere with input
+ // arguments. This won't work with calls though.
+ //
+ // FIXME: Move SP to avoid any possible inputs, or find a way to spill input
+ // registers.
+ if (!MRI.isLiveIn(AMDGPU::SGPR32)) {
+ Info.setStackPtrOffsetReg(AMDGPU::SGPR32);
+ } else {
+ assert(AMDGPU::isShader(MF.getFunction().getCallingConv()));
- if (MFI.hasCalls())
- report_fatal_error("call in graphics shader with too many input SGPRs");
+ if (MFI.hasCalls())
+ report_fatal_error("call in graphics shader with too many input SGPRs");
- for (unsigned Reg : AMDGPU::SGPR_32RegClass) {
- if (!MRI.isLiveIn(Reg)) {
- Info.setStackPtrOffsetReg(Reg);
- break;
- }
+ for (unsigned Reg : AMDGPU::SGPR_32RegClass) {
+ if (!MRI.isLiveIn(Reg)) {
+ Info.setStackPtrOffsetReg(Reg);
+ break;
}
-
- if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG)
- report_fatal_error("failed to find register for SP");
}
- if (MFI.hasCalls()) {
- Info.setScratchWaveOffsetReg(AMDGPU::SGPR33);
- Info.setFrameOffsetReg(AMDGPU::SGPR33);
- } else {
- unsigned ReservedOffsetReg =
- TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
- Info.setScratchWaveOffsetReg(ReservedOffsetReg);
- Info.setFrameOffsetReg(ReservedOffsetReg);
- }
- } else if (RequiresStackAccess) {
- assert(!MFI.hasCalls());
- // We know there are accesses and they will be done relative to SP, so just
- // pin it to the input.
- //
- // FIXME: Should not do this if inline asm is reading/writing these
- // registers.
- Register PreloadedSP = Info.getPreloadedReg(
- AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
-
- Info.setStackPtrOffsetReg(PreloadedSP);
- Info.setScratchWaveOffsetReg(PreloadedSP);
- Info.setFrameOffsetReg(PreloadedSP);
- } else {
- assert(!MFI.hasCalls());
+ if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG)
+ report_fatal_error("failed to find register for SP");
+ }
- // There may not be stack access at all. There may still be spills, or
- // access of a constant pointer (in which cases an extra copy will be
- // emitted in the prolog).
- unsigned ReservedOffsetReg
- = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
- Info.setStackPtrOffsetReg(ReservedOffsetReg);
- Info.setScratchWaveOffsetReg(ReservedOffsetReg);
- Info.setFrameOffsetReg(ReservedOffsetReg);
+ // hasFP should be accurate for entry functions even before the frame is
+ // finalized, because it does not rely on the known stack size, only
+ // properties like whether variable sized objects are present.
+ if (ST.getFrameLowering()->hasFP(MF)) {
+ Info.setFrameOffsetReg(AMDGPU::SGPR33);
}
}
@@ -2110,6 +2214,10 @@ SDValue SITargetLowering::LowerFormalArguments(
if (IsEntryFunc) {
allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info);
+ } else {
+ // For the fixed ABI, pass workitem IDs in the last argument register.
+ if (AMDGPUTargetMachine::EnableFixedFunctionABI)
+ allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
}
if (IsKernel) {
@@ -2126,9 +2234,9 @@ SDValue SITargetLowering::LowerFormalArguments(
//
// FIXME: Alignment of explicit arguments totally broken with non-0 explicit
// kern arg offset.
- const unsigned KernelArgBaseAlign = 16;
+ const Align KernelArgBaseAlign = Align(16);
- for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+ for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
if (Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) {
InVals.push_back(DAG.getUNDEF(Arg.VT));
@@ -2143,10 +2251,11 @@ SDValue SITargetLowering::LowerFormalArguments(
EVT MemVT = VA.getLocVT();
const uint64_t Offset = VA.getLocMemOffset();
- unsigned Align = MinAlign(KernelArgBaseAlign, Offset);
+ Align Alignment = commonAlignment(KernelArgBaseAlign, Offset);
- SDValue Arg = lowerKernargMemParameter(
- DAG, VT, MemVT, DL, Chain, Offset, Align, Ins[i].Flags.isSExt(), &Ins[i]);
+ SDValue Arg =
+ lowerKernargMemParameter(DAG, VT, MemVT, DL, Chain, Offset, Alignment,
+ Ins[i].Flags.isSExt(), &Ins[i]);
Chains.push_back(Arg.getValue(1));
auto *ParamTy =
@@ -2221,7 +2330,7 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Val);
}
- if (!IsEntryFunc) {
+ if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) {
// Special inputs come after user arguments.
allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
}
@@ -2231,8 +2340,6 @@ SDValue SITargetLowering::LowerFormalArguments(
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader);
} else {
CCInfo.AllocateReg(Info->getScratchRSrcReg());
- CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
- CCInfo.AllocateReg(Info->getFrameOffsetReg());
allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
}
@@ -2442,50 +2549,51 @@ void SITargetLowering::passSpecialInputs(
SDValue Chain) const {
// If we don't have a call site, this was a call inserted by
// legalization. These can never use special inputs.
- if (!CLI.CS)
+ if (!CLI.CB)
return;
- const Function *CalleeFunc = CLI.CS.getCalledFunction();
- assert(CalleeFunc);
-
SelectionDAG &DAG = CLI.DAG;
const SDLoc &DL = CLI.DL;
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
-
- auto &ArgUsageInfo =
- DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
- const AMDGPUFunctionArgInfo &CalleeArgInfo
- = ArgUsageInfo.lookupFuncArgInfo(*CalleeFunc);
-
const AMDGPUFunctionArgInfo &CallerArgInfo = Info.getArgInfo();
+ const AMDGPUFunctionArgInfo *CalleeArgInfo
+ = &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
+ if (const Function *CalleeFunc = CLI.CB->getCalledFunction()) {
+ auto &ArgUsageInfo =
+ DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
+ CalleeArgInfo = &ArgUsageInfo.lookupFuncArgInfo(*CalleeFunc);
+ }
+
// TODO: Unify with private memory register handling. This is complicated by
// the fact that at least in kernels, the input argument is not necessarily
// in the same location as the input.
AMDGPUFunctionArgInfo::PreloadedValue InputRegs[] = {
AMDGPUFunctionArgInfo::DISPATCH_PTR,
AMDGPUFunctionArgInfo::QUEUE_PTR,
- AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR,
+ AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR,
AMDGPUFunctionArgInfo::DISPATCH_ID,
AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
- AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
};
for (auto InputID : InputRegs) {
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
+ LLT ArgTy;
- std::tie(OutgoingArg, ArgRC) = CalleeArgInfo.getPreloadedValue(InputID);
+ std::tie(OutgoingArg, ArgRC, ArgTy) =
+ CalleeArgInfo->getPreloadedValue(InputID);
if (!OutgoingArg)
continue;
const ArgDescriptor *IncomingArg;
const TargetRegisterClass *IncomingArgRC;
- std::tie(IncomingArg, IncomingArgRC)
- = CallerArgInfo.getPreloadedValue(InputID);
+ LLT Ty;
+ std::tie(IncomingArg, IncomingArgRC, Ty) =
+ CallerArgInfo.getPreloadedValue(InputID);
assert(IncomingArgRC == ArgRC);
// All special arguments are ints for now.
@@ -2503,8 +2611,11 @@ void SITargetLowering::passSpecialInputs(
if (OutgoingArg->isRegister()) {
RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+ if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
+ report_fatal_error("failed to allocate implicit input argument");
} else {
- unsigned SpecialArgOffset = CCInfo.AllocateStack(ArgVT.getStoreSize(), 4);
+ unsigned SpecialArgOffset =
+ CCInfo.AllocateStack(ArgVT.getStoreSize(), Align(4));
SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
SpecialArgOffset);
MemOpChains.push_back(ArgStore);
@@ -2515,33 +2626,34 @@ void SITargetLowering::passSpecialInputs(
// packed.
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
+ LLT Ty;
- std::tie(OutgoingArg, ArgRC) =
- CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
+ std::tie(OutgoingArg, ArgRC, Ty) =
+ CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
if (!OutgoingArg)
- std::tie(OutgoingArg, ArgRC) =
- CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
+ std::tie(OutgoingArg, ArgRC, Ty) =
+ CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
if (!OutgoingArg)
- std::tie(OutgoingArg, ArgRC) =
- CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
+ std::tie(OutgoingArg, ArgRC, Ty) =
+ CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
if (!OutgoingArg)
return;
- const ArgDescriptor *IncomingArgX
- = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X).first;
- const ArgDescriptor *IncomingArgY
- = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y).first;
- const ArgDescriptor *IncomingArgZ
- = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z).first;
+ const ArgDescriptor *IncomingArgX = std::get<0>(
+ CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X));
+ const ArgDescriptor *IncomingArgY = std::get<0>(
+ CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y));
+ const ArgDescriptor *IncomingArgZ = std::get<0>(
+ CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z));
SDValue InputReg;
SDLoc SL;
// If incoming ids are not packed we need to pack them.
- if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo.WorkItemIDX)
+ if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX)
InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
- if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo.WorkItemIDY) {
+ if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
DAG.getShiftAmountConstant(10, MVT::i32, SL));
@@ -2549,7 +2661,7 @@ void SITargetLowering::passSpecialInputs(
DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Y) : Y;
}
- if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo.WorkItemIDZ) {
+ if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
DAG.getShiftAmountConstant(20, MVT::i32, SL));
@@ -2569,8 +2681,9 @@ void SITargetLowering::passSpecialInputs(
if (OutgoingArg->isRegister()) {
RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+ CCInfo.AllocateReg(OutgoingArg->getRegister());
} else {
- unsigned SpecialArgOffset = CCInfo.AllocateStack(4, 4);
+ unsigned SpecialArgOffset = CCInfo.AllocateStack(4, Align(4));
SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
SpecialArgOffset);
MemOpChains.push_back(ArgStore);
@@ -2703,10 +2816,11 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
"unsupported call to variadic function ");
}
- if (!CLI.CS.getInstruction())
+ if (!CLI.CB)
report_fatal_error("unsupported libcall legalization");
- if (!CLI.CS.getCalledFunction()) {
+ if (!AMDGPUTargetMachine::EnableFixedFunctionABI &&
+ !CLI.CB->getCalledFunction()) {
return lowerUnhandledCall(CLI, InVals,
"unsupported indirect call to function ");
}
@@ -2726,7 +2840,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsTailCall) {
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
- if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall()) {
+ if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
}
@@ -2743,12 +2857,19 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
+ if (AMDGPUTargetMachine::EnableFixedFunctionABI) {
+ // With a fixed ABI, allocate fixed registers before user arguments.
+ passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
+ }
+
CCInfo.AnalyzeCallOperands(Outs, AssignFn);
// Get a count of how many bytes are to be pushed on the stack.
@@ -2767,7 +2888,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
// arguments to begin at SP+0. Completely unused for non-tail calls.
int32_t FPDiff = 0;
MachineFrameInfo &MFI = MF.getFrameInfo();
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
@@ -2784,7 +2904,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain = DAG.getTokenFactor(DL, CopyFromChains);
}
- SmallVector<SDValue, 8> MemOpChains;
MVT PtrVT = MVT::i32;
// Walk the register/memloc assignments, inserting copies/loads.
@@ -2837,7 +2956,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
// FIXME: We can have better than the minimum byval required alignment.
Alignment =
Flags.isByVal()
- ? MaybeAlign(Flags.getByValAlign())
+ ? Flags.getNonZeroByValAlign()
: commonAlignment(Subtarget->getStackAlignment(), Offset);
Offset = Offset + FPDiff;
@@ -2864,11 +2983,12 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
if (Outs[i].Flags.isByVal()) {
SDValue SizeNode =
DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i32);
- SDValue Cpy = DAG.getMemcpy(
- Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
- /*isVol = */ false, /*AlwaysInline = */ true,
- /*isTailCall = */ false, DstInfo,
- MachinePointerInfo(AMDGPUAS::PRIVATE_ADDRESS));
+ SDValue Cpy =
+ DAG.getMemcpy(Chain, DL, DstAddr, Arg, SizeNode,
+ Outs[i].Flags.getNonZeroByValAlign(),
+ /*isVol = */ false, /*AlwaysInline = */ true,
+ /*isTailCall = */ false, DstInfo,
+ MachinePointerInfo(AMDGPUAS::PRIVATE_ADDRESS));
MemOpChains.push_back(Cpy);
} else {
@@ -2879,8 +2999,10 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
}
- // Copy special input registers after user input arguments.
- passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
+ if (!AMDGPUTargetMachine::EnableFixedFunctionABI) {
+ // Copy special input registers after user input arguments.
+ passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
+ }
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
@@ -2927,9 +3049,12 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
Ops.push_back(Callee);
// Add a redundant copy of the callee global which will not be legalized, as
// we need direct access to the callee later.
- GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Callee);
- const GlobalValue *GV = GSD->getGlobal();
- Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
+ if (GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = GSD->getGlobal();
+ Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
+ } else {
+ Ops.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
+ }
if (IsTailCall) {
// Each tail call may have to adjust the stack by a different amount, so
@@ -2985,6 +3110,71 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
IsThisReturn ? OutVals[0] : SDValue());
}
+// This is identical to the default implementation in ExpandDYNAMIC_STACKALLOC,
+// except for applying the wave size scale to the increment amount.
+SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(
+ SDValue Op, SelectionDAG &DAG) const {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ SDValue Tmp1 = Op;
+ SDValue Tmp2 = Op.getValue(1);
+ SDValue Tmp3 = Op.getOperand(2);
+ SDValue Chain = Tmp1.getOperand(0);
+
+ Register SPReg = Info->getStackPtrOffsetReg();
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
+
+ SDValue Size = Tmp2.getOperand(1);
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ MaybeAlign Alignment = cast<ConstantSDNode>(Tmp3)->getMaybeAlignValue();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const TargetFrameLowering *TFL = ST.getFrameLowering();
+ unsigned Opc =
+ TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ?
+ ISD::ADD : ISD::SUB;
+
+ SDValue ScaledSize = DAG.getNode(
+ ISD::SHL, dl, VT, Size,
+ DAG.getConstant(ST.getWavefrontSizeLog2(), dl, MVT::i32));
+
+ Align StackAlign = TFL->getStackAlign();
+ Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value
+ if (Alignment && *Alignment > StackAlign) {
+ Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
+ DAG.getConstant(-(uint64_t)Alignment->value()
+ << ST.getWavefrontSizeLog2(),
+ dl, VT));
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
+ Tmp2 = DAG.getCALLSEQ_END(
+ Chain, DAG.getIntPtrConstant(0, dl, true),
+ DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
+
+ return DAG.getMergeValues({Tmp1, Tmp2}, dl);
+}
+
+SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ // We only handle constant sizes here to allow non-entry block, static sized
+ // allocas. A truly dynamic value is more difficult to support because we
+ // don't know if the size value is uniform or not. If the size isn't uniform,
+ // we would need to do a wave reduction to get the maximum size to know how
+ // much to increment the uniform stack pointer.
+ SDValue Size = Op.getOperand(1);
+ if (isa<ConstantSDNode>(Size))
+ return lowerDYNAMIC_STACKALLOCImpl(Op, DAG); // Use "generic" expansion.
+
+ return AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(Op, DAG);
+}
+
Register SITargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
Register Reg = StringSwitch<Register>(RegName)
@@ -3310,9 +3500,15 @@ static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII,
auto InsPt = emitLoadM0FromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, *Idx,
InitResultReg, DstReg, PhiReg, TmpExec,
Offset, UseGPRIdxMode, IsIndirectSrc);
-
- MachineBasicBlock::iterator First = RemainderBB->begin();
- BuildMI(*RemainderBB, First, DL, TII->get(MovExecOpc), Exec)
+ MachineBasicBlock* LandingPad = MF->CreateMachineBasicBlock();
+ MachineFunction::iterator MBBI(LoopBB);
+ ++MBBI;
+ MF->insert(MBBI, LandingPad);
+ LoopBB->removeSuccessor(RemainderBB);
+ LandingPad->addSuccessor(RemainderBB);
+ LoopBB->addSuccessor(LandingPad);
+ MachineBasicBlock::iterator First = LandingPad->begin();
+ BuildMI(*LandingPad, First, DL, TII->get(MovExecOpc), Exec)
.addReg(SaveExec);
return InsPt;
@@ -3331,7 +3527,7 @@ computeIndirectRegAndOffset(const SIRegisterInfo &TRI,
if (Offset >= NumElts || Offset < 0)
return std::make_pair(AMDGPU::sub0, Offset);
- return std::make_pair(AMDGPU::sub0 + Offset, 0);
+ return std::make_pair(SIRegisterInfo::getSubRegFromChannel(Offset), 0);
}
// Return true if the index is an SGPR and was set.
@@ -3465,24 +3661,6 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
return LoopBB;
}
-static unsigned getMOVRELDPseudo(const SIRegisterInfo &TRI,
- const TargetRegisterClass *VecRC) {
- switch (TRI.getRegSizeInBits(*VecRC)) {
- case 32: // 4 bytes
- return AMDGPU::V_MOVRELD_B32_V1;
- case 64: // 8 bytes
- return AMDGPU::V_MOVRELD_B32_V2;
- case 128: // 16 bytes
- return AMDGPU::V_MOVRELD_B32_V4;
- case 256: // 32 bytes
- return AMDGPU::V_MOVRELD_B32_V8;
- case 512: // 64 bytes
- return AMDGPU::V_MOVRELD_B32_V16;
- default:
- llvm_unreachable("unsupported size for MOVRELD pseudos");
- }
-}
-
static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
MachineBasicBlock &MBB,
const GCNSubtarget &ST) {
@@ -3522,28 +3700,18 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
return &MBB;
}
+ const MCInstrDesc &MovRelDesc
+ = TII->getIndirectRegWritePseudo(TRI.getRegSizeInBits(*VecRC), 32, false);
+
if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, false)) {
MachineBasicBlock::iterator I(&MI);
const DebugLoc &DL = MI.getDebugLoc();
-
- if (UseGPRIdxMode) {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
- .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst
- .add(*Val)
- .addReg(Dst, RegState::ImplicitDefine)
- .addReg(SrcVec->getReg(), RegState::Implicit)
- .addReg(AMDGPU::M0, RegState::Implicit);
-
+ BuildMI(MBB, I, DL, MovRelDesc, Dst)
+ .addReg(SrcVec->getReg())
+ .add(*Val)
+ .addImm(SubReg);
+ if (UseGPRIdxMode)
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
- } else {
- const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC));
-
- BuildMI(MBB, I, DL, MovRelDesc)
- .addReg(Dst, RegState::Define)
- .addReg(SrcVec->getReg())
- .add(*Val)
- .addImm(SubReg - AMDGPU::sub0);
- }
MI.eraseFromParent();
return &MBB;
@@ -3560,26 +3728,14 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
Offset, UseGPRIdxMode, false);
MachineBasicBlock *LoopBB = InsPt->getParent();
- if (UseGPRIdxMode) {
- BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
- .addReg(PhiReg, RegState::Undef, SubReg) // vdst
- .add(*Val) // src0
- .addReg(Dst, RegState::ImplicitDefine)
- .addReg(PhiReg, RegState::Implicit)
- .addReg(AMDGPU::M0, RegState::Implicit);
+ BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst)
+ .addReg(PhiReg)
+ .add(*Val)
+ .addImm(AMDGPU::sub0);
+ if (UseGPRIdxMode)
BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
- } else {
- const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC));
-
- BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
- .addReg(Dst, RegState::Define)
- .addReg(PhiReg)
- .add(*Val)
- .addImm(SubReg - AMDGPU::sub0);
- }
MI.eraseFromParent();
-
return LoopBB;
}
@@ -3590,17 +3746,27 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MachineFunction *MF = BB->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- if (TII->isMIMG(MI)) {
- if (MI.memoperands_empty() && MI.mayLoadOrStore()) {
- report_fatal_error("missing mem operand from MIMG instruction");
- }
- // Add a memoperand for mimg instructions so that they aren't assumed to
- // be ordered memory instuctions.
+ switch (MI.getOpcode()) {
+ case AMDGPU::S_UADDO_PSEUDO:
+ case AMDGPU::S_USUBO_PSEUDO: {
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineOperand &Dest0 = MI.getOperand(0);
+ MachineOperand &Dest1 = MI.getOperand(1);
+ MachineOperand &Src0 = MI.getOperand(2);
+ MachineOperand &Src1 = MI.getOperand(3);
+
+ unsigned Opc = (MI.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
+ ? AMDGPU::S_ADD_I32
+ : AMDGPU::S_SUB_I32;
+ BuildMI(*BB, MI, DL, TII->get(Opc), Dest0.getReg()).add(Src0).add(Src1);
+
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CSELECT_B64), Dest1.getReg())
+ .addImm(1)
+ .addImm(0);
+ MI.eraseFromParent();
return BB;
}
-
- switch (MI.getOpcode()) {
case AMDGPU::S_ADD_U64_PSEUDO:
case AMDGPU::S_SUB_U64_PSEUDO: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
@@ -3616,35 +3782,150 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(MI, MRI,
- Src0, BoolRC, AMDGPU::sub0,
- &AMDGPU::SReg_32RegClass);
- MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(MI, MRI,
- Src0, BoolRC, AMDGPU::sub1,
- &AMDGPU::SReg_32RegClass);
+ MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
+ MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
- MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(MI, MRI,
- Src1, BoolRC, AMDGPU::sub0,
- &AMDGPU::SReg_32RegClass);
- MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(MI, MRI,
- Src1, BoolRC, AMDGPU::sub1,
- &AMDGPU::SReg_32RegClass);
+ MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
+ MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
- BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0)
- .add(Src0Sub0)
- .add(Src1Sub0);
- BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1)
- .add(Src0Sub1)
- .add(Src1Sub1);
+ BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0).add(Src0Sub0).add(Src1Sub0);
+ BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1).add(Src0Sub1).add(Src1Sub1);
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg())
- .addReg(DestSub0)
- .addImm(AMDGPU::sub0)
- .addReg(DestSub1)
- .addImm(AMDGPU::sub1);
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+ MI.eraseFromParent();
+ return BB;
+ }
+ case AMDGPU::V_ADD_U64_PSEUDO:
+ case AMDGPU::V_SUB_U64_PSEUDO: {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ bool IsAdd = (MI.getOpcode() == AMDGPU::V_ADD_U64_PSEUDO);
+
+ const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+
+ Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ Register CarryReg = MRI.createVirtualRegister(CarryRC);
+ Register DeadCarryReg = MRI.createVirtualRegister(CarryRC);
+
+ MachineOperand &Dest = MI.getOperand(0);
+ MachineOperand &Src0 = MI.getOperand(1);
+ MachineOperand &Src1 = MI.getOperand(2);
+
+ const TargetRegisterClass *Src0RC = Src0.isReg()
+ ? MRI.getRegClass(Src0.getReg())
+ : &AMDGPU::VReg_64RegClass;
+ const TargetRegisterClass *Src1RC = Src1.isReg()
+ ? MRI.getRegClass(Src1.getReg())
+ : &AMDGPU::VReg_64RegClass;
+
+ const TargetRegisterClass *Src0SubRC =
+ TRI->getSubRegClass(Src0RC, AMDGPU::sub0);
+ const TargetRegisterClass *Src1SubRC =
+ TRI->getSubRegClass(Src1RC, AMDGPU::sub1);
+
+ MachineOperand SrcReg0Sub0 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
+ MachineOperand SrcReg1Sub0 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
+
+ MachineOperand SrcReg0Sub1 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
+ MachineOperand SrcReg1Sub1 = TII->buildExtractSubRegOrImm(
+ MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
+
+ unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+ MachineInstr *LoHalf = BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0)
+ .addReg(CarryReg, RegState::Define)
+ .add(SrcReg0Sub0)
+ .add(SrcReg1Sub0)
+ .addImm(0); // clamp bit
+
+ unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
+ MachineInstr *HiHalf =
+ BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1)
+ .addReg(DeadCarryReg, RegState::Define | RegState::Dead)
+ .add(SrcReg0Sub1)
+ .add(SrcReg1Sub1)
+ .addReg(CarryReg, RegState::Kill)
+ .addImm(0); // clamp bit
+
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg())
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+ TII->legalizeOperands(*LoHalf);
+ TII->legalizeOperands(*HiHalf);
+ MI.eraseFromParent();
+ return BB;
+ }
+ case AMDGPU::S_ADD_CO_PSEUDO:
+ case AMDGPU::S_SUB_CO_PSEUDO: {
+ // This pseudo has a chance to be selected
+ // only from uniform add/subcarry node. All the VGPR operands
+ // therefore assumed to be splat vectors.
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ MachineBasicBlock::iterator MII = MI;
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineOperand &Dest = MI.getOperand(0);
+ MachineOperand &CarryDest = MI.getOperand(1);
+ MachineOperand &Src0 = MI.getOperand(2);
+ MachineOperand &Src1 = MI.getOperand(3);
+ MachineOperand &Src2 = MI.getOperand(4);
+ unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
+ ? AMDGPU::S_ADDC_U32
+ : AMDGPU::S_SUBB_U32;
+ if (Src0.isReg() && TRI->isVectorRegister(MRI, Src0.getReg())) {
+ Register RegOp0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0)
+ .addReg(Src0.getReg());
+ Src0.setReg(RegOp0);
+ }
+ if (Src1.isReg() && TRI->isVectorRegister(MRI, Src1.getReg())) {
+ Register RegOp1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp1)
+ .addReg(Src1.getReg());
+ Src1.setReg(RegOp1);
+ }
+ Register RegOp2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ if (TRI->isVectorRegister(MRI, Src2.getReg())) {
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp2)
+ .addReg(Src2.getReg());
+ Src2.setReg(RegOp2);
+ }
+
+ if (TRI->getRegSizeInBits(*MRI.getRegClass(Src2.getReg())) == 64) {
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
+ .addReg(Src2.getReg())
+ .addImm(0);
+ } else {
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMPK_LG_U32))
+ .addReg(Src2.getReg())
+ .addImm(0);
+ }
+
+ BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1);
+
+ BuildMI(*BB, MII, DL, TII->get(AMDGPU::COPY), CarryDest.getReg())
+ .addReg(AMDGPU::SCC);
MI.eraseFromParent();
return BB;
}
@@ -3741,12 +4022,14 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::SI_INDIRECT_SRC_V4:
case AMDGPU::SI_INDIRECT_SRC_V8:
case AMDGPU::SI_INDIRECT_SRC_V16:
+ case AMDGPU::SI_INDIRECT_SRC_V32:
return emitIndirectSrc(MI, *BB, *getSubtarget());
case AMDGPU::SI_INDIRECT_DST_V1:
case AMDGPU::SI_INDIRECT_DST_V2:
case AMDGPU::SI_INDIRECT_DST_V4:
case AMDGPU::SI_INDIRECT_DST_V8:
case AMDGPU::SI_INDIRECT_DST_V16:
+ case AMDGPU::SI_INDIRECT_DST_V32:
return emitIndirectDst(MI, *BB, *getSubtarget());
case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
case AMDGPU::SI_KILL_I1_PSEUDO:
@@ -3870,6 +4153,75 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
return emitGWSMemViolTestLoop(MI, BB);
+ case AMDGPU::S_SETREG_B32: {
+ if (!getSubtarget()->hasDenormModeInst())
+ return BB;
+
+ // Try to optimize cases that only set the denormal mode or rounding mode.
+ //
+ // If the s_setreg_b32 fully sets all of the bits in the rounding mode or
+ // denormal mode to a constant, we can use s_round_mode or s_denorm_mode
+ // instead.
+ //
+ // FIXME: This could be predicates on the immediate, but tablegen doesn't
+ // allow you to have a no side effect instruction in the output of a
+ // sideeffecting pattern.
+
+ // TODO: Should also emit a no side effects pseudo if only FP bits are
+ // touched, even if not all of them or to a variable.
+ unsigned ID, Offset, Width;
+ AMDGPU::Hwreg::decodeHwreg(MI.getOperand(1).getImm(), ID, Offset, Width);
+ if (ID != AMDGPU::Hwreg::ID_MODE)
+ return BB;
+
+ const unsigned WidthMask = maskTrailingOnes<unsigned>(Width);
+ const unsigned SetMask = WidthMask << Offset;
+ unsigned SetDenormOp = 0;
+ unsigned SetRoundOp = 0;
+
+ // The dedicated instructions can only set the whole denorm or round mode at
+ // once, not a subset of bits in either.
+ if (Width == 8 && (SetMask & (AMDGPU::Hwreg::FP_ROUND_MASK |
+ AMDGPU::Hwreg::FP_DENORM_MASK)) == SetMask) {
+ // If this fully sets both the round and denorm mode, emit the two
+ // dedicated instructions for these.
+ assert(Offset == 0);
+ SetRoundOp = AMDGPU::S_ROUND_MODE;
+ SetDenormOp = AMDGPU::S_DENORM_MODE;
+ } else if (Width == 4) {
+ if ((SetMask & AMDGPU::Hwreg::FP_ROUND_MASK) == SetMask) {
+ SetRoundOp = AMDGPU::S_ROUND_MODE;
+ assert(Offset == 0);
+ } else if ((SetMask & AMDGPU::Hwreg::FP_DENORM_MASK) == SetMask) {
+ SetDenormOp = AMDGPU::S_DENORM_MODE;
+ assert(Offset == 4);
+ }
+ }
+
+ if (SetRoundOp || SetDenormOp) {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ MachineInstr *Def = MRI.getVRegDef(MI.getOperand(0).getReg());
+ if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) {
+ unsigned ImmVal = Def->getOperand(1).getImm();
+ if (SetRoundOp) {
+ BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetRoundOp))
+ .addImm(ImmVal & 0xf);
+
+ // If we also have the denorm mode, get just the denorm mode bits.
+ ImmVal >>= 4;
+ }
+
+ if (SetDenormOp) {
+ BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetDenormOp))
+ .addImm(ImmVal & 0xf);
+ }
+
+ MI.eraseFromParent();
+ }
+ }
+
+ return BB;
+ }
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
@@ -3925,10 +4277,13 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32: {
- // This is as fast on some subtargets. However, we always have full rate f32
- // mad available which returns the same result as the separate operations
- // which we should prefer over fma. We can't use this if we want to support
- // denormals, so only report this in these cases.
+ // If mad is not available this depends only on if f32 fma is full rate.
+ if (!Subtarget->hasMadMacF32Insts())
+ return Subtarget->hasFastFMAF32();
+
+ // Otherwise f32 mad is always full rate and returns the same result as
+ // the separate operations so should be preferred over fma.
+ // However does not support denomals.
if (hasFP32Denormals(MF))
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
@@ -3946,13 +4301,14 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
return false;
}
-bool SITargetLowering::isFMADLegalForFAddFSub(const SelectionDAG &DAG,
- const SDNode *N) const {
+bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG,
+ const SDNode *N) const {
// TODO: Check future ftz flag
// v_mad_f32/v_mac_f32 do not support denormals.
EVT VT = N->getValueType(0);
if (VT == MVT::f32)
- return !hasFP32Denormals(DAG.getMachineFunction());
+ return Subtarget->hasMadMacF32Insts() &&
+ !hasFP32Denormals(DAG.getMachineFunction());
if (VT == MVT::f16) {
return Subtarget->hasMadF16() &&
!hasFP64FP16Denormals(DAG.getMachineFunction());
@@ -3971,7 +4327,7 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
- assert(VT == MVT::v4f16);
+ assert(VT == MVT::v4f16 || VT == MVT::v4i16);
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
@@ -4080,6 +4436,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FABS:
case ISD::FNEG:
case ISD::FCANONICALIZE:
+ case ISD::BSWAP:
return splitUnaryVectorOp(Op, DAG);
case ISD::FMINNUM:
case ISD::FMAXNUM:
@@ -4101,6 +4458,11 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE:
return splitBinaryVectorOp(Op, DAG);
+ case ISD::SMULO:
+ case ISD::UMULO:
+ return lowerXMULO(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
return SDValue();
}
@@ -4204,9 +4566,8 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
const auto *CD = cast<ConstantSDNode>(N->getOperand(3));
- int CondCode = CD->getSExtValue();
- if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
- CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
+ unsigned CondCode = CD->getZExtValue();
+ if (!ICmpInst::isIntPredicate(static_cast<ICmpInst::Predicate>(CondCode)))
return DAG.getUNDEF(VT);
ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
@@ -4241,11 +4602,9 @@ static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
EVT VT = N->getValueType(0);
const auto *CD = cast<ConstantSDNode>(N->getOperand(3));
- int CondCode = CD->getSExtValue();
- if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
- CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE) {
+ unsigned CondCode = CD->getZExtValue();
+ if (!FCmpInst::isFPPredicate(static_cast<FCmpInst::Predicate>(CondCode)))
return DAG.getUNDEF(VT);
- }
SDValue Src0 = N->getOperand(1);
SDValue Src1 = N->getOperand(2);
@@ -4268,6 +4627,43 @@ static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
return DAG.getZExtOrTrunc(SetCC, SL, VT);
}
+static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(1);
+ SDLoc SL(N);
+
+ if (Src.getOpcode() == ISD::SETCC) {
+ // (ballot (ISD::SETCC ...)) -> (AMDGPUISD::SETCC ...)
+ return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src.getOperand(0),
+ Src.getOperand(1), Src.getOperand(2));
+ }
+ if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) {
+ // (ballot 0) -> 0
+ if (Arg->isNullValue())
+ return DAG.getConstant(0, SL, VT);
+
+ // (ballot 1) -> EXEC/EXEC_LO
+ if (Arg->isOne()) {
+ Register Exec;
+ if (VT.getScalarSizeInBits() == 32)
+ Exec = AMDGPU::EXEC_LO;
+ else if (VT.getScalarSizeInBits() == 64)
+ Exec = AMDGPU::EXEC;
+ else
+ return SDValue();
+
+ return DAG.getCopyFromReg(DAG.getEntryNode(), SL, Exec, VT);
+ }
+ }
+
+ // (ballot (i1 $src)) -> (AMDGPUISD::SETCC (i32 (zext $src)) (i32 0)
+ // ISD::SETNE)
+ return DAG.getNode(
+ AMDGPUISD::SETCC, SL, VT, DAG.getZExtOrTrunc(Src, SL, MVT::i32),
+ DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
+}
+
void SITargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -4440,9 +4836,7 @@ bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
// FIXME: Either avoid relying on address space here or change the default
// address space for functions to avoid the explicit check.
return (GV->getValueType()->isFunctionTy() ||
- GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
- GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
- GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
+ !isNonGlobalAddrSpace(GV->getAddressSpace())) &&
!shouldEmitFixup(GV) &&
!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
}
@@ -4451,6 +4845,14 @@ bool SITargetLowering::shouldEmitPCReloc(const GlobalValue *GV) const {
return !shouldEmitFixup(GV) && !shouldEmitGOTReloc(GV);
}
+bool SITargetLowering::shouldUseLDSConstAddress(const GlobalValue *GV) const {
+ if (!GV->hasExternalLinkage())
+ return true;
+
+ const auto OS = getTargetMachine().getTargetTriple().getOS();
+ return OS == Triple::AMDHSA || OS == Triple::AMDPAL;
+}
+
/// This transforms the control flow intrinsics to get the branch destination as
/// last parameter, also switches branch target with BR if the need arise
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
@@ -4470,16 +4872,10 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
} else {
// Get the target from BR if we don't negate the condition
BR = findUser(BRCOND, ISD::BR);
+ assert(BR && "brcond missing unconditional branch user");
Target = BR->getOperand(1);
}
- // FIXME: This changes the types of the intrinsics instead of introducing new
- // nodes with the correct types.
- // e.g. llvm.amdgcn.loop
-
- // eg: i1,ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3
- // => t9: ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3, BasicBlock:ch<bb1 0x7fee5286d088>
-
unsigned CFNode = isCFIntrinsic(Intr);
if (CFNode == 0) {
// This is a uniform branch so we don't need to legalize.
@@ -4524,7 +4920,6 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
};
SDValue NewBR = DAG.getNode(ISD::BR, DL, BR->getVTList(), Ops);
DAG.ReplaceAllUsesWith(BR, NewBR.getNode());
- BR = NewBR.getNode();
}
SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
@@ -4577,13 +4972,14 @@ SDValue SITargetLowering::LowerRETURNADDR(SDValue Op,
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
}
-SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG,
+SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
SDValue Op,
const SDLoc &DL,
EVT VT) const {
return Op.getValueType().bitsLE(VT) ?
DAG.getNode(ISD::FP_EXTEND, DL, VT, Op) :
- DAG.getNode(ISD::FTRUNC, DL, VT, Op);
+ DAG.getNode(ISD::FP_ROUND, DL, VT, Op,
+ DAG.getTargetConstant(0, DL, MVT::i32));
}
SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
@@ -4609,7 +5005,7 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
bool IsIEEEMode = Info->getMode().IEEE;
- // FIXME: Assert during eslection that this is only selected for
+ // FIXME: Assert during selection that this is only selected for
// ieee_mode. Currently a combine can produce the ieee version for non-ieee
// mode functions, but this happens to be OK since it's only done in cases
// where there is known no sNaN.
@@ -4621,6 +5017,42 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
return Op;
}
+SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc SL(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ bool isSigned = Op.getOpcode() == ISD::SMULO;
+
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
+ const APInt &C = RHSC->getAPIntValue();
+ // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
+ if (C.isPowerOf2()) {
+ // smulo(x, signed_min) is same as umulo(x, signed_min).
+ bool UseArithShift = isSigned && !C.isMinSignedValue();
+ SDValue ShiftAmt = DAG.getConstant(C.logBase2(), SL, MVT::i32);
+ SDValue Result = DAG.getNode(ISD::SHL, SL, VT, LHS, ShiftAmt);
+ SDValue Overflow = DAG.getSetCC(SL, MVT::i1,
+ DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
+ SL, VT, Result, ShiftAmt),
+ LHS, ISD::SETNE);
+ return DAG.getMergeValues({ Result, Overflow }, SL);
+ }
+ }
+
+ SDValue Result = DAG.getNode(ISD::MUL, SL, VT, LHS, RHS);
+ SDValue Top = DAG.getNode(isSigned ? ISD::MULHS : ISD::MULHU,
+ SL, VT, LHS, RHS);
+
+ SDValue Sign = isSigned
+ ? DAG.getNode(ISD::SRA, SL, VT, Result,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, SL, MVT::i32))
+ : DAG.getConstant(0, SL, VT);
+ SDValue Overflow = DAG.getSetCC(SL, MVT::i1, Top, Sign, ISD::SETNE);
+
+ return DAG.getMergeValues({ Result, Overflow }, SL);
+}
+
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
@@ -4694,7 +5126,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- unsigned UserSGPR = Info->getQueuePtrUserSGPR();
+ Register UserSGPR = Info->getQueuePtrUserSGPR();
assert(UserSGPR != AMDGPU::NoRegister);
SDValue QueuePtr = CreateLiveInRegister(
@@ -4765,6 +5197,10 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
}
}
+ if (ASC->getDestAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
+ Src.getValueType() == MVT::i64)
+ return DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
+
// global <-> flat are no-ops and never emitted.
const MachineFunction &MF = DAG.getMachineFunction();
@@ -5036,8 +5472,9 @@ SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
static SDValue
buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
- const SDLoc &DL, unsigned Offset, EVT PtrVT,
+ const SDLoc &DL, int64_t Offset, EVT PtrVT,
unsigned GAFlags = SIInstrInfo::MO_NONE) {
+ assert(isInt<32>(Offset + 4) && "32-bit offset is expected!");
// In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
// lowered to the following code sequence:
//
@@ -5086,9 +5523,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GSD->getGlobal();
if ((GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
- (!GV->hasExternalLinkage() ||
- getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
- getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL)) ||
+ shouldUseLDSConstAddress(GV)) ||
GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS ||
GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
@@ -5114,11 +5549,11 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
const DataLayout &DataLayout = DAG.getDataLayout();
- unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
+ Align Alignment = DataLayout.getABITypeAlign(PtrTy);
MachinePointerInfo PtrInfo
= MachinePointerInfo::getGOT(DAG.getMachineFunction());
- return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Align,
+ return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Alignment,
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
}
@@ -5144,8 +5579,8 @@ SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
MVT VT,
unsigned Offset) const {
SDLoc SL(Op);
- SDValue Param = lowerKernargMemParameter(DAG, MVT::i32, MVT::i32, SL,
- DAG.getEntryNode(), Offset, 4, false);
+ SDValue Param = lowerKernargMemParameter(
+ DAG, MVT::i32, MVT::i32, SL, DAG.getEntryNode(), Offset, Align(4), false);
// The local size values will have the hi 16-bits as zero.
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
DAG.getValueType(VT));
@@ -5181,6 +5616,9 @@ static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
} else if (Elts.size() == 2) {
Type = MVT::v2f32;
NumElts = 2;
+ } else if (Elts.size() == 3) {
+ Type = MVT::v3f32;
+ NumElts = 3;
} else if (Elts.size() <= 4) {
Type = MVT::v4f32;
NumElts = 4;
@@ -5230,6 +5668,24 @@ static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
return Value == 0;
}
+static SDValue padEltsToUndef(SelectionDAG &DAG, const SDLoc &DL, EVT CastVT,
+ SDValue Src, int ExtraElts) {
+ EVT SrcVT = Src.getValueType();
+
+ SmallVector<SDValue, 8> Elts;
+
+ if (SrcVT.isVector())
+ DAG.ExtractVectorElements(Src, Elts);
+ else
+ Elts.push_back(Src);
+
+ SDValue Undef = DAG.getUNDEF(SrcVT.getScalarType());
+ while (ExtraElts--)
+ Elts.push_back(Undef);
+
+ return DAG.getBuildVector(CastVT, DL, Elts);
+}
+
// Re-construct the required return value for a image load intrinsic.
// This is more complicated due to the optional use TexFailCtrl which means the required
// return type is an aggregate
@@ -5241,76 +5697,56 @@ static SDValue constructRetValue(SelectionDAG &DAG,
const SDLoc &DL, LLVMContext &Context) {
// Determine the required return type. This is the same regardless of IsTexFail flag
EVT ReqRetVT = ResultTypes[0];
- EVT ReqRetEltVT = ReqRetVT.isVector() ? ReqRetVT.getVectorElementType() : ReqRetVT;
int ReqRetNumElts = ReqRetVT.isVector() ? ReqRetVT.getVectorNumElements() : 1;
- EVT AdjEltVT = Unpacked && IsD16 ? MVT::i32 : ReqRetEltVT;
- EVT AdjVT = Unpacked ? ReqRetNumElts > 1 ? EVT::getVectorVT(Context, AdjEltVT, ReqRetNumElts)
- : AdjEltVT
- : ReqRetVT;
-
- // Extract data part of the result
- // Bitcast the result to the same type as the required return type
- int NumElts;
- if (IsD16 && !Unpacked)
- NumElts = NumVDataDwords << 1;
- else
- NumElts = NumVDataDwords;
+ int NumDataDwords = (!IsD16 || (IsD16 && Unpacked)) ?
+ ReqRetNumElts : (ReqRetNumElts + 1) / 2;
- EVT CastVT = NumElts > 1 ? EVT::getVectorVT(Context, AdjEltVT, NumElts)
- : AdjEltVT;
+ int MaskPopDwords = (!IsD16 || (IsD16 && Unpacked)) ?
+ DMaskPop : (DMaskPop + 1) / 2;
- // Special case for v6f16. Rather than add support for this, use v3i32 to
- // extract the data elements
- bool V6F16Special = false;
- if (NumElts == 6) {
- CastVT = EVT::getVectorVT(Context, MVT::i32, NumElts / 2);
- DMaskPop >>= 1;
- ReqRetNumElts >>= 1;
- V6F16Special = true;
- AdjVT = MVT::v2i32;
- }
+ MVT DataDwordVT = NumDataDwords == 1 ?
+ MVT::i32 : MVT::getVectorVT(MVT::i32, NumDataDwords);
- SDValue N = SDValue(Result, 0);
- SDValue CastRes = DAG.getNode(ISD::BITCAST, DL, CastVT, N);
+ MVT MaskPopVT = MaskPopDwords == 1 ?
+ MVT::i32 : MVT::getVectorVT(MVT::i32, MaskPopDwords);
- // Iterate over the result
- SmallVector<SDValue, 4> BVElts;
+ SDValue Data(Result, 0);
+ SDValue TexFail;
- if (CastVT.isVector()) {
- DAG.ExtractVectorElements(CastRes, BVElts, 0, DMaskPop);
- } else {
- BVElts.push_back(CastRes);
- }
- int ExtraElts = ReqRetNumElts - DMaskPop;
- while(ExtraElts--)
- BVElts.push_back(DAG.getUNDEF(AdjEltVT));
+ if (IsTexFail) {
+ SDValue ZeroIdx = DAG.getConstant(0, DL, MVT::i32);
+ if (MaskPopVT.isVector()) {
+ Data = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskPopVT,
+ SDValue(Result, 0), ZeroIdx);
+ } else {
+ Data = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskPopVT,
+ SDValue(Result, 0), ZeroIdx);
+ }
- SDValue PreTFCRes;
- if (ReqRetNumElts > 1) {
- SDValue NewVec = DAG.getBuildVector(AdjVT, DL, BVElts);
- if (IsD16 && Unpacked)
- PreTFCRes = adjustLoadValueTypeImpl(NewVec, ReqRetVT, DL, DAG, Unpacked);
- else
- PreTFCRes = NewVec;
- } else {
- PreTFCRes = BVElts[0];
+ TexFail = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ SDValue(Result, 0),
+ DAG.getConstant(MaskPopDwords, DL, MVT::i32));
}
- if (V6F16Special)
- PreTFCRes = DAG.getNode(ISD::BITCAST, DL, MVT::v4f16, PreTFCRes);
+ if (DataDwordVT.isVector())
+ Data = padEltsToUndef(DAG, DL, DataDwordVT, Data,
+ NumDataDwords - MaskPopDwords);
- if (!IsTexFail) {
- if (Result->getNumValues() > 1)
- return DAG.getMergeValues({PreTFCRes, SDValue(Result, 1)}, DL);
- else
- return PreTFCRes;
- }
+ if (IsD16)
+ Data = adjustLoadValueTypeImpl(Data, ReqRetVT, DL, DAG, Unpacked);
+
+ if (!ReqRetVT.isVector())
+ Data = DAG.getNode(ISD::TRUNCATE, DL, ReqRetVT.changeTypeToInteger(), Data);
+
+ Data = DAG.getNode(ISD::BITCAST, DL, ReqRetVT, Data);
- // Extract the TexFail result and insert into aggregate return
- SmallVector<SDValue, 1> TFCElt;
- DAG.ExtractVectorElements(N, TFCElt, DMaskPop, 1);
- SDValue TFCRes = DAG.getNode(ISD::BITCAST, DL, ResultTypes[1], TFCElt[0]);
- return DAG.getMergeValues({PreTFCRes, TFCRes, SDValue(Result, 1)}, DL);
+ if (TexFail)
+ return DAG.getMergeValues({Data, TexFail, SDValue(Result, 1)}, DL);
+
+ if (Result->getNumValues() == 1)
+ return Data;
+
+ return DAG.getMergeValues({Data, SDValue(Result, 1)}, DL);
}
static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
@@ -5331,6 +5767,35 @@ static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
return Value == 0;
}
+static void packImageA16AddressToDwords(SelectionDAG &DAG, SDValue Op,
+ MVT PackVectorVT,
+ SmallVectorImpl<SDValue> &PackedAddrs,
+ unsigned DimIdx, unsigned EndIdx,
+ unsigned NumGradients) {
+ SDLoc DL(Op);
+ for (unsigned I = DimIdx; I < EndIdx; I++) {
+ SDValue Addr = Op.getOperand(I);
+
+ // Gradients are packed with undef for each coordinate.
+ // In <hi 16 bit>,<lo 16 bit> notation, the registers look like this:
+ // 1D: undef,dx/dh; undef,dx/dv
+ // 2D: dy/dh,dx/dh; dy/dv,dx/dv
+ // 3D: dy/dh,dx/dh; undef,dz/dh; dy/dv,dx/dv; undef,dz/dv
+ if (((I + 1) >= EndIdx) ||
+ ((NumGradients / 2) % 2 == 1 && (I == DimIdx + (NumGradients / 2) - 1 ||
+ I == DimIdx + NumGradients - 1))) {
+ if (Addr.getValueType() != MVT::i16)
+ Addr = DAG.getBitcast(MVT::i16, Addr);
+ Addr = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Addr);
+ } else {
+ Addr = DAG.getBuildVector(PackVectorVT, DL, {Addr, Op.getOperand(I + 1)});
+ I++;
+ }
+ Addr = DAG.getBitcast(MVT::f32, Addr);
+ PackedAddrs.push_back(Addr);
+ }
+}
+
SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const {
@@ -5350,6 +5815,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
SmallVector<EVT, 3> ResultTypes(Op->value_begin(), Op->value_end());
SmallVector<EVT, 3> OrigResultTypes(Op->value_begin(), Op->value_end());
bool IsD16 = false;
+ bool IsG16 = false;
bool IsA16 = false;
SDValue VData;
int NumVDataDwords;
@@ -5456,41 +5922,67 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
}
- // Check for 16 bit addresses and pack if true.
+ // Push back extra arguments.
+ for (unsigned I = 0; I < BaseOpcode->NumExtraArgs; I++)
+ VAddrs.push_back(Op.getOperand(AddrIdx + I));
+
+ // Check for 16 bit addresses or derivatives and pack if true.
unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
+ unsigned CoordIdx = DimIdx + NumGradients;
+ unsigned CoordsEnd = AddrIdx + NumMIVAddrs;
+
MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
- const MVT VAddrScalarVT = VAddrVT.getScalarType();
- if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16)) &&
- ST->hasFeature(AMDGPU::FeatureR128A16)) {
- IsA16 = true;
- const MVT VectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
- for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
- SDValue AddrLo, AddrHi;
- // Push back extra arguments.
- if (i < DimIdx) {
- AddrLo = Op.getOperand(i);
- } else {
- AddrLo = Op.getOperand(i);
- // Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
- // in 1D, derivatives dx/dh and dx/dv are packed with undef.
- if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
- ((NumGradients / 2) % 2 == 1 &&
- (i == DimIdx + (NumGradients / 2) - 1 ||
- i == DimIdx + NumGradients - 1))) {
- AddrHi = DAG.getUNDEF(MVT::f16);
- } else {
- AddrHi = Op.getOperand(i + 1);
- i++;
- }
- AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VectorVT,
- {AddrLo, AddrHi});
- AddrLo = DAG.getBitcast(MVT::i32, AddrLo);
+ MVT VAddrScalarVT = VAddrVT.getScalarType();
+ MVT PackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
+ IsG16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
+
+ VAddrVT = Op.getOperand(CoordIdx).getSimpleValueType();
+ VAddrScalarVT = VAddrVT.getScalarType();
+ IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
+ if (IsA16 || IsG16) {
+ if (IsA16) {
+ if (!ST->hasA16()) {
+ LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
+ "support 16 bit addresses\n");
+ return Op;
+ }
+ if (!IsG16) {
+ LLVM_DEBUG(
+ dbgs() << "Failed to lower image intrinsic: 16 bit addresses "
+ "need 16 bit derivatives but got 32 bit derivatives\n");
+ return Op;
}
- VAddrs.push_back(AddrLo);
+ } else if (!ST->hasG16()) {
+ LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
+ "support 16 bit derivatives\n");
+ return Op;
+ }
+
+ if (BaseOpcode->Gradients && !IsA16) {
+ if (!ST->hasG16()) {
+ LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
+ "support 16 bit derivatives\n");
+ return Op;
+ }
+ // Activate g16
+ const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
+ AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode);
+ IntrOpcode = G16MappingInfo->G16; // set new opcode to variant with _g16
+ }
+
+ // Don't compress addresses for G16
+ const int PackEndIdx = IsA16 ? CoordsEnd : CoordIdx;
+ packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs, DimIdx,
+ PackEndIdx, NumGradients);
+
+ if (!IsA16) {
+ // Add uncompressed address
+ for (unsigned I = CoordIdx; I < CoordsEnd; I++)
+ VAddrs.push_back(Op.getOperand(I));
}
} else {
- for (unsigned i = 0; i < NumMIVAddrs; ++i)
- VAddrs.push_back(Op.getOperand(AddrIdx + i));
+ for (unsigned I = DimIdx; I < CoordsEnd; I++)
+ VAddrs.push_back(Op.getOperand(I));
}
// If the register allocator cannot place the address registers contiguously
@@ -5557,8 +6049,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
EVT NewVT = NumVDataDwords > 1 ?
- EVT::getVectorVT(*DAG.getContext(), MVT::f32, NumVDataDwords)
- : MVT::f32;
+ EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumVDataDwords)
+ : MVT::i32;
ResultTypes[0] = NewVT;
if (ResultTypes.size() == 3) {
@@ -5603,10 +6095,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
Ops.push_back(DLC);
Ops.push_back(GLC);
Ops.push_back(SLC);
- Ops.push_back(IsA16 && // a16 or r128
+ Ops.push_back(IsA16 && // r128, a16 for gfx9
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
- Ops.push_back(TFE); // tfe
- Ops.push_back(LWE); // lwe
+ if (IsGFX10)
+ Ops.push_back(IsA16 ? True : False);
+ Ops.push_back(TFE);
+ Ops.push_back(LWE);
if (!IsGFX10)
Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)
@@ -5655,26 +6149,25 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
- SDValue Offset, SDValue GLC, SDValue DLC,
+ SDValue Offset, SDValue CachePolicy,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const DataLayout &DataLayout = DAG.getDataLayout();
- unsigned Align =
- DataLayout.getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+ Align Alignment =
+ DataLayout.getABITypeAlign(VT.getTypeForEVT(*DAG.getContext()));
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
- VT.getStoreSize(), Align);
+ VT.getStoreSize(), Alignment);
if (!Offset->isDivergent()) {
SDValue Ops[] = {
Rsrc,
Offset, // Offset
- GLC,
- DLC,
+ CachePolicy
};
// Widen vec3 load to vec4.
@@ -5684,9 +6177,8 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
auto WidenedOp = DAG.getMemIntrinsicNode(
AMDGPUISD::SBUFFER_LOAD, DL, DAG.getVTList(WidenedVT), Ops, WidenedVT,
MF.getMachineMemOperand(MMO, 0, WidenedVT.getStoreSize()));
- auto Subvector = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, VT, WidenedOp,
- DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
+ auto Subvector = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, WidenedOp,
+ DAG.getVectorIdxConstant(0, DL));
return Subvector;
}
@@ -5705,11 +6197,10 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
if (NumElts == 8 || NumElts == 16) {
NumLoads = NumElts / 4;
- LoadVT = MVT::v4i32;
+ LoadVT = MVT::getVectorVT(LoadVT.getScalarType(), 4);
}
SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
- unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
SDValue Ops[] = {
DAG.getEntryNode(), // Chain
Rsrc, // rsrc
@@ -5717,13 +6208,14 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
{}, // voffset
{}, // soffset
{}, // offset
- DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy
+ CachePolicy, // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
// Use the alignment to ensure that the required offsets will fit into the
// immediate offsets.
- setBufferOffsets(Offset, DAG, &Ops[3], NumLoads > 1 ? 16 * NumLoads : 4);
+ setBufferOffsets(Offset, DAG, &Ops[3],
+ NumLoads > 1 ? Align(16 * NumLoads) : Align(4));
uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
for (unsigned i = 0; i < NumLoads; ++i) {
@@ -5732,7 +6224,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
LoadVT, MMO, DAG));
}
- if (VT == MVT::v8i32 || VT == MVT::v16i32)
+ if (NumElts == 8 || NumElts == 16)
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Loads);
return Loads[0];
@@ -5777,6 +6269,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
}
case Intrinsic::amdgcn_kernarg_segment_ptr: {
+ if (!AMDGPU::isKernel(MF.getFunction().getCallingConv())) {
+ // This only makes sense to call in a kernel, so just lower to null.
+ return DAG.getConstant(0, DL, VT);
+ }
+
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
}
@@ -5790,8 +6287,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_rsq_legacy:
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
-
- return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+ return SDValue();
case Intrinsic::amdgcn_rcp_legacy:
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
@@ -5815,37 +6311,43 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_X, 4, false);
+ SI::KernelInputOffsets::NGROUPS_X, Align(4),
+ false);
case Intrinsic::r600_read_ngroups_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Y, 4, false);
+ SI::KernelInputOffsets::NGROUPS_Y, Align(4),
+ false);
case Intrinsic::r600_read_ngroups_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Z, 4, false);
+ SI::KernelInputOffsets::NGROUPS_Z, Align(4),
+ false);
case Intrinsic::r600_read_global_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_X, 4, false);
+ SI::KernelInputOffsets::GLOBAL_SIZE_X,
+ Align(4), false);
case Intrinsic::r600_read_global_size_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Y, 4, false);
+ SI::KernelInputOffsets::GLOBAL_SIZE_Y,
+ Align(4), false);
case Intrinsic::r600_read_global_size_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Z, 4, false);
+ SI::KernelInputOffsets::GLOBAL_SIZE_Z,
+ Align(4), false);
case Intrinsic::r600_read_local_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
@@ -5865,29 +6367,23 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Z);
case Intrinsic::amdgcn_workgroup_id_x:
- case Intrinsic::r600_read_tgid_x:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
case Intrinsic::amdgcn_workgroup_id_y:
- case Intrinsic::r600_read_tgid_y:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
case Intrinsic::amdgcn_workgroup_id_z:
- case Intrinsic::r600_read_tgid_z:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
case Intrinsic::amdgcn_workitem_id_x:
- case Intrinsic::r600_read_tidig_x:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDX);
case Intrinsic::amdgcn_workitem_id_y:
- case Intrinsic::r600_read_tidig_y:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDY);
case Intrinsic::amdgcn_workitem_id_z:
- case Intrinsic::r600_read_tidig_z:
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDZ);
@@ -5901,53 +6397,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (!parseCachePolicy(Op.getOperand(3), DAG, &GLC, nullptr,
IsGFX10 ? &DLC : nullptr))
return Op;
- return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), GLC, DLC,
+ return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
DAG);
}
case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
- case Intrinsic::amdgcn_interp_p1_f16: {
- SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
- Op.getOperand(5), SDValue());
- if (getSubtarget()->getLDSBankCount() == 16) {
- // 16 bank LDS
-
- // FIXME: This implicitly will insert a second CopyToReg to M0.
- SDValue S = DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, DL, MVT::f32,
- DAG.getTargetConstant(Intrinsic::amdgcn_interp_mov, DL, MVT::i32),
- DAG.getConstant(2, DL, MVT::i32), // P0
- Op.getOperand(2), // Attrchan
- Op.getOperand(3), // Attr
- Op.getOperand(5)); // m0
-
- SDValue Ops[] = {
- Op.getOperand(1), // Src0
- Op.getOperand(2), // Attrchan
- Op.getOperand(3), // Attr
- DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
- S, // Src2 - holds two f16 values selected by high
- DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
- Op.getOperand(4), // high
- DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
- DAG.getTargetConstant(0, DL, MVT::i32) // $omod
- };
- return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
- } else {
- // 32 bank LDS
- SDValue Ops[] = {
- Op.getOperand(1), // Src0
- Op.getOperand(2), // Attrchan
- Op.getOperand(3), // Attr
- DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
- Op.getOperand(4), // high
- DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
- DAG.getTargetConstant(0, DL, MVT::i32), // $omod
- ToM0.getValue(1)
- };
- return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
- }
- }
case Intrinsic::amdgcn_sin:
return DAG.getNode(AMDGPUISD::SIN_HW, DL, VT, Op.getOperand(1));
@@ -5988,9 +6442,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
- case Intrinsic::amdgcn_trig_preop:
- return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::amdgcn_div_scale: {
const ConstantSDNode *Param = cast<ConstantSDNode>(Op.getOperand(3));
@@ -6020,6 +6471,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_fcmp: {
return lowerFCMPIntrinsic(*this, Op.getNode(), DAG);
}
+ case Intrinsic::amdgcn_ballot:
+ return lowerBALLOTIntrinsic(*this, Op.getNode(), DAG);
case Intrinsic::amdgcn_fmed3:
return DAG.getNode(AMDGPUISD::FMED3, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
@@ -6098,6 +6551,19 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(1, SL, MVT::i32));
return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ);
}
+ case Intrinsic::amdgcn_alignbit:
+ return DAG.getNode(ISD::FSHR, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::amdgcn_reloc_constant: {
+ Module *M = const_cast<Module *>(MF.getFunction().getParent());
+ const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
+ auto SymbolName = cast<MDString>(Metadata->getOperand(0))->getString();
+ auto RelocSymbol = cast<GlobalVariable>(
+ M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext())));
+ SDValue GA = DAG.getTargetGlobalAddress(RelocSymbol, DL, MVT::i32, 0,
+ SIInstrInfo::MO_ABS32_LO);
+ return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
+ }
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
@@ -6131,6 +6597,28 @@ static unsigned getBufferOffsetForMMO(SDValue VOffset,
cast<ConstantSDNode>(Offset)->getSExtValue();
}
+static unsigned getDSShaderTypeValue(const MachineFunction &MF) {
+ switch (MF.getFunction().getCallingConv()) {
+ case CallingConv::AMDGPU_PS:
+ return 1;
+ case CallingConv::AMDGPU_VS:
+ return 2;
+ case CallingConv::AMDGPU_GS:
+ return 3;
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_LS:
+ case CallingConv::AMDGPU_ES:
+ report_fatal_error("ds_ordered_count unsupported for this calling conv");
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::C:
+ case CallingConv::Fast:
+ default:
+ // Assume other calling conventions are various compute callable functions
+ return 0;
+ }
+}
+
SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -6146,8 +6634,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
unsigned IndexOperand = M->getConstantOperandVal(7);
unsigned WaveRelease = M->getConstantOperandVal(8);
unsigned WaveDone = M->getConstantOperandVal(9);
- unsigned ShaderType;
- unsigned Instruction;
unsigned OrderedCountIndex = IndexOperand & 0x3f;
IndexOperand &= ~0x3f;
@@ -6166,36 +6652,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
if (IndexOperand)
report_fatal_error("ds_ordered_count: bad index operand");
- switch (IntrID) {
- case Intrinsic::amdgcn_ds_ordered_add:
- Instruction = 0;
- break;
- case Intrinsic::amdgcn_ds_ordered_swap:
- Instruction = 1;
- break;
- }
-
if (WaveDone && !WaveRelease)
report_fatal_error("ds_ordered_count: wave_done requires wave_release");
- switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
- case CallingConv::AMDGPU_CS:
- case CallingConv::AMDGPU_KERNEL:
- ShaderType = 0;
- break;
- case CallingConv::AMDGPU_PS:
- ShaderType = 1;
- break;
- case CallingConv::AMDGPU_VS:
- ShaderType = 2;
- break;
- case CallingConv::AMDGPU_GS:
- ShaderType = 3;
- break;
- default:
- report_fatal_error("ds_ordered_count unsupported for this calling conv");
- }
-
+ unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
+ unsigned ShaderType = getDSShaderTypeValue(DAG.getMachineFunction());
unsigned Offset0 = OrderedCountIndex << 2;
unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
(Instruction << 4);
@@ -6425,6 +6886,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_buffer_atomic_swap:
case Intrinsic::amdgcn_buffer_atomic_add:
case Intrinsic::amdgcn_buffer_atomic_sub:
+ case Intrinsic::amdgcn_buffer_atomic_csub:
case Intrinsic::amdgcn_buffer_atomic_smin:
case Intrinsic::amdgcn_buffer_atomic_umin:
case Intrinsic::amdgcn_buffer_atomic_smax:
@@ -6467,6 +6929,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_buffer_atomic_sub:
Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
break;
+ case Intrinsic::amdgcn_buffer_atomic_csub:
+ Opcode = AMDGPUISD::BUFFER_ATOMIC_CSUB;
+ break;
case Intrinsic::amdgcn_buffer_atomic_smin:
Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
break;
@@ -6715,6 +7180,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
}
+ case Intrinsic::amdgcn_global_atomic_csub: {
+ MemSDNode *M = cast<MemSDNode>(Op);
+ SDValue Ops[] = {
+ M->getOperand(0), // Chain
+ M->getOperand(2), // Ptr
+ M->getOperand(3) // Value
+ };
+
+ return DAG.getMemIntrinsicNode(AMDGPUISD::ATOMIC_LOAD_CSUB, SDLoc(Op),
+ M->getVTList(), Ops, M->getMemoryVT(),
+ M->getMemOperand());
+ }
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
@@ -6750,9 +7227,8 @@ SDValue SITargetLowering::getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL,
auto NewOp = DAG.getMemIntrinsicNode(Opcode, DL, WidenedVTList, Ops,
WidenedMemVT, MMO);
if (WidenedVT != VT) {
- auto Extract = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, VT, NewOp,
- DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
+ auto Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, NewOp,
+ DAG.getVectorIdxConstant(0, DL));
NewOp = DAG.getMergeValues({ Extract, SDValue(NewOp.getNode(), 1) }, DL);
}
return NewOp;
@@ -6792,52 +7268,29 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
switch (IntrinsicID) {
- case Intrinsic::amdgcn_exp: {
- const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
- const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
- const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(8));
- const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(9));
-
- const SDValue Ops[] = {
- Chain,
- DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
- DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
- Op.getOperand(4), // src0
- Op.getOperand(5), // src1
- Op.getOperand(6), // src2
- Op.getOperand(7), // src3
- DAG.getTargetConstant(0, DL, MVT::i1), // compr
- DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
- };
-
- unsigned Opc = Done->isNullValue() ?
- AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
- return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
- }
case Intrinsic::amdgcn_exp_compr: {
- const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
- const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
SDValue Src0 = Op.getOperand(4);
SDValue Src1 = Op.getOperand(5);
- const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
- const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(7));
+ // Hack around illegal type on SI by directly selecting it.
+ if (isTypeLegal(Src0.getValueType()))
+ return SDValue();
+ const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
SDValue Undef = DAG.getUNDEF(MVT::f32);
const SDValue Ops[] = {
- Chain,
- DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
- DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
- DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0),
- DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1),
+ Op.getOperand(2), // tgt
+ DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0), // src0
+ DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1), // src1
Undef, // src2
Undef, // src3
+ Op.getOperand(7), // vm
DAG.getTargetConstant(1, DL, MVT::i1), // compr
- DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
+ Op.getOperand(3), // en
+ Op.getOperand(0) // Chain
};
- unsigned Opc = Done->isNullValue() ?
- AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
- return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
+ unsigned Opc = Done->isNullValue() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
+ return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
}
case Intrinsic::amdgcn_s_barrier: {
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
@@ -7183,13 +7636,14 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
// pointed to by Offsets.
unsigned SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
- SelectionDAG &DAG, SDValue *Offsets,
- unsigned Align) const {
+ SelectionDAG &DAG, SDValue *Offsets,
+ Align Alignment) const {
SDLoc DL(CombinedOffset);
if (auto C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
uint32_t Imm = C->getZExtValue();
uint32_t SOffset, ImmOffset;
- if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) {
+ if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget,
+ Alignment)) {
Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
@@ -7202,7 +7656,7 @@ unsigned SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
uint32_t SOffset, ImmOffset;
int Offset = cast<ConstantSDNode>(N1)->getSExtValue();
if (Offset >= 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
- Subtarget, Align)) {
+ Subtarget, Alignment)) {
Offsets[0] = N0;
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
@@ -7413,7 +7867,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUAS::FLAT_ADDRESS)
+ if (AS == AMDGPUAS::FLAT_ADDRESS &&
+ !Subtarget->hasMultiDwordFlatScratchAddressing())
AS = MFI->hasFlatScratchInit() ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
@@ -7438,7 +7893,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
AS == AMDGPUAS::GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
- !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) &&
+ Load->isSimple() && isMemOpHasNoClobberedMemOperand(Load) &&
Alignment >= 4 && NumElements < 32) {
if (MemVT.isPow2VectorType())
return SDValue();
@@ -7547,55 +8002,54 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
SDValue RHS = Op.getOperand(1);
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
- bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || Flags.hasAllowReciprocal();
- if (!Unsafe && VT == MVT::f32 && hasFP32Denormals(DAG.getMachineFunction()))
+ bool AllowInaccurateRcp = DAG.getTarget().Options.UnsafeFPMath ||
+ Flags.hasApproximateFuncs();
+
+ // Without !fpmath accuracy information, we can't do more because we don't
+ // know exactly whether rcp is accurate enough to meet !fpmath requirement.
+ if (!AllowInaccurateRcp)
return SDValue();
if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
- if (Unsafe || VT == MVT::f32 || VT == MVT::f16) {
- if (CLHS->isExactlyValue(1.0)) {
- // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
- // the CI documentation has a worst case error of 1 ulp.
- // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
- // use it as long as we aren't trying to use denormals.
- //
- // v_rcp_f16 and v_rsq_f16 DO support denormals.
-
- // 1.0 / sqrt(x) -> rsq(x)
-
- // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
- // error seems really high at 2^29 ULP.
- if (RHS.getOpcode() == ISD::FSQRT)
- return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));
-
- // 1.0 / x -> rcp(x)
- return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
- }
+ if (CLHS->isExactlyValue(1.0)) {
+ // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
+ // the CI documentation has a worst case error of 1 ulp.
+ // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
+ // use it as long as we aren't trying to use denormals.
+ //
+ // v_rcp_f16 and v_rsq_f16 DO support denormals.
- // Same as for 1.0, but expand the sign out of the constant.
- if (CLHS->isExactlyValue(-1.0)) {
- // -1.0 / x -> rcp (fneg x)
- SDValue FNegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
- return DAG.getNode(AMDGPUISD::RCP, SL, VT, FNegRHS);
- }
+ // 1.0 / sqrt(x) -> rsq(x)
+
+ // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
+ // error seems really high at 2^29 ULP.
+ if (RHS.getOpcode() == ISD::FSQRT)
+ return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));
+
+ // 1.0 / x -> rcp(x)
+ return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
}
- }
- if (Unsafe) {
- // Turn into multiply by the reciprocal.
- // x / y -> x * (1.0 / y)
- SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
- return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, Flags);
+ // Same as for 1.0, but expand the sign out of the constant.
+ if (CLHS->isExactlyValue(-1.0)) {
+ // -1.0 / x -> rcp (fneg x)
+ SDValue FNegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
+ return DAG.getNode(AMDGPUISD::RCP, SL, VT, FNegRHS);
+ }
}
- return SDValue();
+ // Turn into multiply by the reciprocal.
+ // x / y -> x * (1.0 / y)
+ SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
+ return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, Flags);
}
static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
- EVT VT, SDValue A, SDValue B, SDValue GlueChain) {
+ EVT VT, SDValue A, SDValue B, SDValue GlueChain,
+ SDNodeFlags Flags) {
if (GlueChain->getNumValues() <= 1) {
- return DAG.getNode(Opcode, SL, VT, A, B);
+ return DAG.getNode(Opcode, SL, VT, A, B, Flags);
}
assert(GlueChain->getNumValues() == 3);
@@ -7608,15 +8062,16 @@ static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
break;
}
- return DAG.getNode(Opcode, SL, VTList, GlueChain.getValue(1), A, B,
- GlueChain.getValue(2));
+ return DAG.getNode(Opcode, SL, VTList,
+ {GlueChain.getValue(1), A, B, GlueChain.getValue(2)},
+ Flags);
}
static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
EVT VT, SDValue A, SDValue B, SDValue C,
- SDValue GlueChain) {
+ SDValue GlueChain, SDNodeFlags Flags) {
if (GlueChain->getNumValues() <= 1) {
- return DAG.getNode(Opcode, SL, VT, A, B, C);
+ return DAG.getNode(Opcode, SL, VT, {A, B, C}, Flags);
}
assert(GlueChain->getNumValues() == 3);
@@ -7629,8 +8084,9 @@ static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
break;
}
- return DAG.getNode(Opcode, SL, VTList, GlueChain.getValue(1), A, B, C,
- GlueChain.getValue(2));
+ return DAG.getNode(Opcode, SL, VTList,
+ {GlueChain.getValue(1), A, B, C, GlueChain.getValue(2)},
+ Flags);
}
SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
@@ -7704,6 +8160,13 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
return FastLowered;
+ // The selection matcher assumes anything with a chain selecting to a
+ // mayRaiseFPException machine instruction. Since we're introducing a chain
+ // here, we need to explicitly report nofpexcept for the regular fdiv
+ // lowering.
+ SDNodeFlags Flags = Op->getFlags();
+ Flags.setNoFPExcept(true);
+
SDLoc SL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -7713,95 +8176,100 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);
SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
- RHS, RHS, LHS);
+ {RHS, RHS, LHS}, Flags);
SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
- LHS, RHS, LHS);
+ {LHS, RHS, LHS}, Flags);
// Denominator is scaled to not be denormal, so using rcp is ok.
SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32,
- DenominatorScaled);
+ DenominatorScaled, Flags);
SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32,
- DenominatorScaled);
+ DenominatorScaled, Flags);
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
- const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
+ const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
if (!HasFP32Denormals) {
+ // Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
+ // lowering. The chain dependence is insufficient, and we need glue. We do
+ // not need the glue variants in a strictfp function.
+
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue EnableDenorm;
+ SDNode *EnableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue EnableDenormValue =
getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
- DAG.getEntryNode(), EnableDenormValue);
+ DAG.getEntryNode(), EnableDenormValue).getNode();
} else {
const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
SL, MVT::i32);
- EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
- DAG.getEntryNode(), EnableDenormValue,
- BitField);
+ EnableDenorm =
+ DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
+ {EnableDenormValue, BitField, DAG.getEntryNode()});
}
SDValue Ops[3] = {
NegDivScale0,
- EnableDenorm.getValue(0),
- EnableDenorm.getValue(1)
+ SDValue(EnableDenorm, 0),
+ SDValue(EnableDenorm, 1)
};
NegDivScale0 = DAG.getMergeValues(Ops, SL);
}
SDValue Fma0 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0,
- ApproxRcp, One, NegDivScale0);
+ ApproxRcp, One, NegDivScale0, Flags);
SDValue Fma1 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp,
- ApproxRcp, Fma0);
+ ApproxRcp, Fma0, Flags);
SDValue Mul = getFPBinOp(DAG, ISD::FMUL, SL, MVT::f32, NumeratorScaled,
- Fma1, Fma1);
+ Fma1, Fma1, Flags);
SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
- NumeratorScaled, Mul);
+ NumeratorScaled, Mul, Flags);
- SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul, Fma2);
+ SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32,
+ Fma2, Fma1, Mul, Fma2, Flags);
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
- NumeratorScaled, Fma3);
+ NumeratorScaled, Fma3, Flags);
if (!HasFP32Denormals) {
- SDValue DisableDenorm;
+ SDNode *DisableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue DisableDenormValue =
getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
Fma4.getValue(1), DisableDenormValue,
- Fma4.getValue(2));
+ Fma4.getValue(2)).getNode();
} else {
const SDValue DisableDenormValue =
DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
- DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
- Fma4.getValue(1), DisableDenormValue,
- BitField, Fma4.getValue(2));
+ DisableDenorm = DAG.getMachineNode(
+ AMDGPU::S_SETREG_B32, SL, MVT::Other,
+ {DisableDenormValue, BitField, Fma4.getValue(1), Fma4.getValue(2)});
}
SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
- DisableDenorm, DAG.getRoot());
+ SDValue(DisableDenorm, 0), DAG.getRoot());
DAG.setRoot(OutputChain);
}
SDValue Scale = NumeratorScaled.getValue(1);
SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32,
- Fma4, Fma1, Fma3, Scale);
+ {Fma4, Fma1, Fma3, Scale}, Flags);
- return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS);
+ return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS, Flags);
}
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
@@ -7916,7 +8384,8 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUAS::FLAT_ADDRESS)
+ if (AS == AMDGPUAS::FLAT_ADDRESS &&
+ !Subtarget->hasMultiDwordFlatScratchAddressing())
AS = MFI->hasFlatScratchInit() ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
@@ -7976,22 +8445,24 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
SDValue Arg = Op.getOperand(0);
SDValue TrigVal;
- // TODO: Should this propagate fast-math-flags?
+ // Propagate fast-math flags so that the multiply we introduce can be folded
+ // if Arg is already the result of a multiply by constant.
+ auto Flags = Op->getFlags();
- SDValue OneOver2Pi = DAG.getConstantFP(0.5 / M_PI, DL, VT);
+ SDValue OneOver2Pi = DAG.getConstantFP(0.5 * numbers::inv_pi, DL, VT);
if (Subtarget->hasTrigReducedRange()) {
- SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi);
- TrigVal = DAG.getNode(AMDGPUISD::FRACT, DL, VT, MulVal);
+ SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi, Flags);
+ TrigVal = DAG.getNode(AMDGPUISD::FRACT, DL, VT, MulVal, Flags);
} else {
- TrigVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi);
+ TrigVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi, Flags);
}
switch (Op.getOpcode()) {
case ISD::FCOS:
- return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, TrigVal);
+ return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, TrigVal, Flags);
case ISD::FSIN:
- return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, TrigVal);
+ return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, TrigVal, Flags);
default:
llvm_unreachable("Wrong trig opcode");
}
@@ -8032,7 +8503,7 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
EVT ScalarVT = VT.getScalarType();
- if (ScalarVT != MVT::f32)
+ if (ScalarVT != MVT::f32 && ScalarVT != MVT::f16)
return SDValue();
SelectionDAG &DAG = DCI.DAG;
@@ -8047,8 +8518,14 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
// about in practice.
if (DCI.isAfterLegalizeDAG() && SrcVT == MVT::i32) {
if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) {
- SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Src);
+ SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, MVT::f32, Src);
DCI.AddToWorklist(Cvt.getNode());
+
+ // For the f16 case, fold to a cast to f32 and then cast back to f16.
+ if (ScalarVT != MVT::f32) {
+ Cvt = DAG.getNode(ISD::FP_ROUND, DL, VT, Cvt,
+ DAG.getTargetConstant(0, DL, MVT::i32));
+ }
return Cvt;
}
}
@@ -8525,7 +9002,7 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
}
}
- if (VT != MVT::i64)
+ if (VT != MVT::i64 || DCI.isBeforeLegalizeOps())
return SDValue();
// TODO: This could be a generic combine with a predicate for extracting the
@@ -8735,6 +9212,11 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
N->getFlags());
}
+ if ((VT == MVT::f32 || VT == MVT::f16) && N0.getOpcode() == ISD::FSQRT) {
+ return DCI.DAG.getNode(AMDGPUISD::RSQ, SDLoc(N), VT,
+ N0.getOperand(0), N->getFlags());
+ }
+
return AMDGPUTargetLowering::performRcpCombine(N, DCI);
}
@@ -8776,9 +9258,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case AMDGPUISD::RSQ:
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::RCP_LEGACY:
- case AMDGPUISD::RSQ_LEGACY:
case AMDGPUISD::RCP_IFLAG:
- case AMDGPUISD::TRIG_PREOP:
case AMDGPUISD::DIV_SCALE:
case AMDGPUISD::DIV_FMAS:
case AMDGPUISD::DIV_FIXUP:
@@ -8881,6 +9361,12 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_fdot2:
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_rsq_clamp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_rsq_legacy:
+ case Intrinsic::amdgcn_trig_preop:
return true;
default:
break;
@@ -9099,8 +9585,7 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
return SDValue();
// Ordered >= (although NaN inputs should have folded away by now).
- APFloat::cmpResult Cmp = K0->getValueAPF().compare(K1->getValueAPF());
- if (Cmp == APFloat::cmpGreaterThan)
+ if (K0->getValueAPF() > K1->getValueAPF())
return SDValue();
const MachineFunction &MF = DAG.getMachineFunction();
@@ -9275,6 +9760,50 @@ SDValue SITargetLowering::performCvtPkRTZCombine(SDNode *N,
return SDValue();
}
+// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be
+// expanded into a set of cmp/select instructions.
+bool SITargetLowering::shouldExpandVectorDynExt(unsigned EltSize,
+ unsigned NumElem,
+ bool IsDivergentIdx) {
+ if (UseDivergentRegisterIndexing)
+ return false;
+
+ unsigned VecSize = EltSize * NumElem;
+
+ // Sub-dword vectors of size 2 dword or less have better implementation.
+ if (VecSize <= 64 && EltSize < 32)
+ return false;
+
+ // Always expand the rest of sub-dword instructions, otherwise it will be
+ // lowered via memory.
+ if (EltSize < 32)
+ return true;
+
+ // Always do this if var-idx is divergent, otherwise it will become a loop.
+ if (IsDivergentIdx)
+ return true;
+
+ // Large vectors would yield too many compares and v_cndmask_b32 instructions.
+ unsigned NumInsts = NumElem /* Number of compares */ +
+ ((EltSize + 31) / 32) * NumElem /* Number of cndmasks */;
+ return NumInsts <= 16;
+}
+
+static bool shouldExpandVectorDynExt(SDNode *N) {
+ SDValue Idx = N->getOperand(N->getNumOperands() - 1);
+ if (isa<ConstantSDNode>(Idx))
+ return false;
+
+ SDValue Vec = N->getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned EltSize = EltVT.getSizeInBits();
+ unsigned NumElem = VecVT.getVectorNumElements();
+
+ return SITargetLowering::shouldExpandVectorDynExt(EltSize, NumElem,
+ Idx->isDivergent());
+}
+
SDValue SITargetLowering::performExtractVectorEltCombine(
SDNode *N, DAGCombinerInfo &DCI) const {
SDValue Vec = N->getOperand(0);
@@ -9336,18 +9865,12 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
unsigned EltSize = EltVT.getSizeInBits();
// EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
- // This elminates non-constant index and subsequent movrel or scratch access.
- // Sub-dword vectors of size 2 dword or less have better implementation.
- // Vectors of size bigger than 8 dwords would yield too many v_cndmask_b32
- // instructions.
- if (VecSize <= 256 && (VecSize > 64 || EltSize >= 32) &&
- !isa<ConstantSDNode>(N->getOperand(1))) {
+ if (::shouldExpandVectorDynExt(N)) {
SDLoc SL(N);
SDValue Idx = N->getOperand(1);
- EVT IdxVT = Idx.getValueType();
SDValue V;
for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
- SDValue IC = DAG.getConstant(I, SL, IdxVT);
+ SDValue IC = DAG.getVectorIdxConstant(I, SL);
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
if (I == 0)
V = Elt;
@@ -9402,17 +9925,10 @@ SITargetLowering::performInsertVectorEltCombine(SDNode *N,
SDValue Idx = N->getOperand(2);
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
- unsigned VecSize = VecVT.getSizeInBits();
- unsigned EltSize = EltVT.getSizeInBits();
// INSERT_VECTOR_ELT (<n x e>, var-idx)
// => BUILD_VECTOR n x select (e, const-idx)
- // This elminates non-constant index and subsequent movrel or scratch access.
- // Sub-dword vectors of size 2 dword or less have better implementation.
- // Vectors of size bigger than 8 dwords would yield too many v_cndmask_b32
- // instructions.
- if (isa<ConstantSDNode>(Idx) ||
- VecSize > 256 || (VecSize <= 64 && EltSize < 32))
+ if (!::shouldExpandVectorDynExt(N))
return SDValue();
SelectionDAG &DAG = DCI.DAG;
@@ -9919,39 +10435,50 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
SDValue Src = N->getOperand(0);
- SDValue Srl = N->getOperand(0);
- if (Srl.getOpcode() == ISD::ZERO_EXTEND)
- Srl = Srl.getOperand(0);
+ SDValue Shift = N->getOperand(0);
- // TODO: Handle (or x, (srl y, 8)) pattern when known bits are zero.
- if (Srl.getOpcode() == ISD::SRL) {
+ // TODO: Extend type shouldn't matter (assuming legal types).
+ if (Shift.getOpcode() == ISD::ZERO_EXTEND)
+ Shift = Shift.getOperand(0);
+
+ if (Shift.getOpcode() == ISD::SRL || Shift.getOpcode() == ISD::SHL) {
+ // cvt_f32_ubyte1 (shl x, 8) -> cvt_f32_ubyte0 x
+ // cvt_f32_ubyte3 (shl x, 16) -> cvt_f32_ubyte1 x
// cvt_f32_ubyte0 (srl x, 16) -> cvt_f32_ubyte2 x
// cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x
- // cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x
-
- if (const ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
- Srl = DAG.getZExtOrTrunc(Srl.getOperand(0), SDLoc(Srl.getOperand(0)),
- EVT(MVT::i32));
+ // cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x
+ if (auto *C = dyn_cast<ConstantSDNode>(Shift.getOperand(1))) {
+ Shift = DAG.getZExtOrTrunc(Shift.getOperand(0),
+ SDLoc(Shift.getOperand(0)), MVT::i32);
+
+ unsigned ShiftOffset = 8 * Offset;
+ if (Shift.getOpcode() == ISD::SHL)
+ ShiftOffset -= C->getZExtValue();
+ else
+ ShiftOffset += C->getZExtValue();
- unsigned SrcOffset = C->getZExtValue() + 8 * Offset;
- if (SrcOffset < 32 && SrcOffset % 8 == 0) {
- return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + SrcOffset / 8, SL,
- MVT::f32, Srl);
+ if (ShiftOffset < 32 && (ShiftOffset % 8) == 0) {
+ return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + ShiftOffset / 8, SL,
+ MVT::f32, Shift);
}
}
}
- APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
-
- KnownBits Known;
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(Src, Demanded, Known, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
+ APInt DemandedBits = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
+ if (TLI.SimplifyDemandedBits(Src, DemandedBits, DCI)) {
+ // We simplified Src. If this node is not dead, visit it again so it is
+ // folded properly.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
}
+ // Handle (or x, (srl y, 8)) pattern when known bits are zero.
+ if (SDValue DemandedSrc =
+ TLI.SimplifyMultipleUseDemandedBits(Src, DemandedBits, DAG))
+ return DAG.getNode(N->getOpcode(), SL, MVT::f32, DemandedSrc);
+
return SDValue();
}
@@ -9964,16 +10491,13 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
const MachineFunction &MF = DCI.DAG.getMachineFunction();
const APFloat &F = CSrc->getValueAPF();
APFloat Zero = APFloat::getZero(F.getSemantics());
- APFloat::cmpResult Cmp0 = F.compare(Zero);
- if (Cmp0 == APFloat::cmpLessThan ||
- (Cmp0 == APFloat::cmpUnordered &&
- MF.getInfo<SIMachineFunctionInfo>()->getMode().DX10Clamp)) {
+ if (F < Zero ||
+ (F.isNaN() && MF.getInfo<SIMachineFunctionInfo>()->getMode().DX10Clamp)) {
return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
}
APFloat One(F.getSemantics(), "1.0");
- APFloat::cmpResult Cmp1 = F.compare(One);
- if (Cmp1 == APFloat::cmpGreaterThan)
+ if (F > One)
return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0));
return SDValue(CSrc, 0);
@@ -10061,10 +10585,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::FRACT:
case AMDGPUISD::RSQ:
case AMDGPUISD::RCP_LEGACY:
- case AMDGPUISD::RSQ_LEGACY:
case AMDGPUISD::RCP_IFLAG:
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::LDEXP: {
+ // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
SDValue Src = N->getOperand(0);
if (Src.isUndef())
return Src;
@@ -10406,24 +10930,6 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
Ops.push_back(ImpDef.getValue(1));
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
- case AMDGPU::V_PERMLANE16_B32:
- case AMDGPU::V_PERMLANEX16_B32: {
- ConstantSDNode *FI = cast<ConstantSDNode>(Node->getOperand(0));
- ConstantSDNode *BC = cast<ConstantSDNode>(Node->getOperand(2));
- if (!FI->getZExtValue() && !BC->getZExtValue())
- break;
- SDValue VDstIn = Node->getOperand(6);
- if (VDstIn.isMachineOpcode()
- && VDstIn.getMachineOpcode() == AMDGPU::IMPLICIT_DEF)
- break;
- MachineSDNode *ImpDef = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
- SDLoc(Node), MVT::i32);
- SmallVector<SDValue, 8> Ops = { SDValue(FI, 0), Node->getOperand(1),
- SDValue(BC, 0), Node->getOperand(3),
- Node->getOperand(4), Node->getOperand(5),
- SDValue(ImpDef, 0), Node->getOperand(7) };
- return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
- }
default:
break;
}
@@ -10592,89 +11098,50 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
MVT VT) const {
const TargetRegisterClass *RC = nullptr;
if (Constraint.size() == 1) {
+ const unsigned BitWidth = VT.getSizeInBits();
switch (Constraint[0]) {
default:
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
case 's':
case 'r':
- switch (VT.getSizeInBits()) {
- default:
- return std::make_pair(0U, nullptr);
- case 32:
+ switch (BitWidth) {
case 16:
RC = &AMDGPU::SReg_32RegClass;
break;
case 64:
RC = &AMDGPU::SGPR_64RegClass;
break;
- case 96:
- RC = &AMDGPU::SReg_96RegClass;
- break;
- case 128:
- RC = &AMDGPU::SGPR_128RegClass;
- break;
- case 160:
- RC = &AMDGPU::SReg_160RegClass;
- break;
- case 256:
- RC = &AMDGPU::SReg_256RegClass;
- break;
- case 512:
- RC = &AMDGPU::SReg_512RegClass;
+ default:
+ RC = SIRegisterInfo::getSGPRClassForBitWidth(BitWidth);
+ if (!RC)
+ return std::make_pair(0U, nullptr);
break;
}
break;
case 'v':
- switch (VT.getSizeInBits()) {
- default:
- return std::make_pair(0U, nullptr);
- case 32:
+ switch (BitWidth) {
case 16:
RC = &AMDGPU::VGPR_32RegClass;
break;
- case 64:
- RC = &AMDGPU::VReg_64RegClass;
- break;
- case 96:
- RC = &AMDGPU::VReg_96RegClass;
- break;
- case 128:
- RC = &AMDGPU::VReg_128RegClass;
- break;
- case 160:
- RC = &AMDGPU::VReg_160RegClass;
- break;
- case 256:
- RC = &AMDGPU::VReg_256RegClass;
- break;
- case 512:
- RC = &AMDGPU::VReg_512RegClass;
+ default:
+ RC = SIRegisterInfo::getVGPRClassForBitWidth(BitWidth);
+ if (!RC)
+ return std::make_pair(0U, nullptr);
break;
}
break;
case 'a':
if (!Subtarget->hasMAIInsts())
break;
- switch (VT.getSizeInBits()) {
- default:
- return std::make_pair(0U, nullptr);
- case 32:
+ switch (BitWidth) {
case 16:
RC = &AMDGPU::AGPR_32RegClass;
break;
- case 64:
- RC = &AMDGPU::AReg_64RegClass;
- break;
- case 128:
- RC = &AMDGPU::AReg_128RegClass;
- break;
- case 512:
- RC = &AMDGPU::AReg_512RegClass;
+ default:
+ RC = SIRegisterInfo::getAGPRClassForBitWidth(BitWidth);
+ if (!RC)
+ return std::make_pair(0U, nullptr);
break;
- case 1024:
- RC = &AMDGPU::AReg_1024RegClass;
- // v32 types are not legal but we support them here.
- return std::make_pair(0U, RC);
}
break;
}
@@ -10701,9 +11168,29 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(RC->getRegister(Idx), RC);
}
}
+
+ // FIXME: Returns VS_32 for physical SGPR constraints
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
+static bool isImmConstraint(StringRef Constraint) {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'I':
+ case 'J':
+ case 'A':
+ case 'B':
+ case 'C':
+ return true;
+ }
+ } else if (Constraint == "DA" ||
+ Constraint == "DB") {
+ return true;
+ }
+ return false;
+}
+
SITargetLowering::ConstraintType
SITargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
@@ -10715,9 +11202,115 @@ SITargetLowering::getConstraintType(StringRef Constraint) const {
return C_RegisterClass;
}
}
+ if (isImmConstraint(Constraint)) {
+ return C_Other;
+ }
return TargetLowering::getConstraintType(Constraint);
}
+static uint64_t clearUnusedBits(uint64_t Val, unsigned Size) {
+ if (!AMDGPU::isInlinableIntLiteral(Val)) {
+ Val = Val & maskTrailingOnes<uint64_t>(Size);
+ }
+ return Val;
+}
+
+void SITargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ if (isImmConstraint(Constraint)) {
+ uint64_t Val;
+ if (getAsmOperandConstVal(Op, Val) &&
+ checkAsmConstraintVal(Op, Constraint, Val)) {
+ Val = clearUnusedBits(Val, Op.getScalarValueSizeInBits());
+ Ops.push_back(DAG.getTargetConstant(Val, SDLoc(Op), MVT::i64));
+ }
+ } else {
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+ }
+}
+
+bool SITargetLowering::getAsmOperandConstVal(SDValue Op, uint64_t &Val) const {
+ unsigned Size = Op.getScalarValueSizeInBits();
+ if (Size > 64)
+ return false;
+
+ if (Size == 16 && !Subtarget->has16BitInsts())
+ return false;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ Val = C->getSExtValue();
+ return true;
+ }
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
+ Val = C->getValueAPF().bitcastToAPInt().getSExtValue();
+ return true;
+ }
+ if (BuildVectorSDNode *V = dyn_cast<BuildVectorSDNode>(Op)) {
+ if (Size != 16 || Op.getNumOperands() != 2)
+ return false;
+ if (Op.getOperand(0).isUndef() || Op.getOperand(1).isUndef())
+ return false;
+ if (ConstantSDNode *C = V->getConstantSplatNode()) {
+ Val = C->getSExtValue();
+ return true;
+ }
+ if (ConstantFPSDNode *C = V->getConstantFPSplatNode()) {
+ Val = C->getValueAPF().bitcastToAPInt().getSExtValue();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool SITargetLowering::checkAsmConstraintVal(SDValue Op,
+ const std::string &Constraint,
+ uint64_t Val) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'I':
+ return AMDGPU::isInlinableIntLiteral(Val);
+ case 'J':
+ return isInt<16>(Val);
+ case 'A':
+ return checkAsmConstraintValA(Op, Val);
+ case 'B':
+ return isInt<32>(Val);
+ case 'C':
+ return isUInt<32>(clearUnusedBits(Val, Op.getScalarValueSizeInBits())) ||
+ AMDGPU::isInlinableIntLiteral(Val);
+ default:
+ break;
+ }
+ } else if (Constraint.size() == 2) {
+ if (Constraint == "DA") {
+ int64_t HiBits = static_cast<int32_t>(Val >> 32);
+ int64_t LoBits = static_cast<int32_t>(Val);
+ return checkAsmConstraintValA(Op, HiBits, 32) &&
+ checkAsmConstraintValA(Op, LoBits, 32);
+ }
+ if (Constraint == "DB") {
+ return true;
+ }
+ }
+ llvm_unreachable("Invalid asm constraint");
+}
+
+bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
+ uint64_t Val,
+ unsigned MaxSize) const {
+ unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
+ bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
+ if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
+ (Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
+ (Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
+ return true;
+ }
+ return false;
+}
+
// Figure out which registers should be reserved for stack access. Only after
// the function is legalized do we know all of the non-spill stack objects or if
// calls are present.
@@ -10745,11 +11338,6 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
if (Info->getFrameOffsetReg() != AMDGPU::FP_REG)
MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());
- if (Info->getScratchWaveOffsetReg() != AMDGPU::SCRATCH_WAVE_OFFSET_REG) {
- MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG,
- Info->getScratchWaveOffsetReg());
- }
-
Info->limitOccupancy(MF);
if (ST.isWave32() && !MF.empty()) {
@@ -10772,15 +11360,18 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
}
TargetLoweringBase::finalizeLowering(MF);
+
+ // Allocate a VGPR for future SGPR Spill if
+ // "amdgpu-reserve-vgpr-for-sgpr-spill" option is used
+ // FIXME: We won't need this hack if we split SGPR allocation from VGPR
+ if (VGPRReserveforSGPRSpill && !Info->VGPRReservedForSGPRSpill &&
+ !Info->isEntryFunction() && MF.getFrameInfo().hasStackObjects())
+ Info->reserveVGPRforSGPRSpills(MF);
}
-void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
- KnownBits &Known,
- const APInt &DemandedElts,
- const SelectionDAG &DAG,
- unsigned Depth) const {
- TargetLowering::computeKnownBitsForFrameIndex(Op, Known, DemandedElts,
- DAG, Depth);
+void SITargetLowering::computeKnownBitsForFrameIndex(
+ const int FI, KnownBits &Known, const MachineFunction &MF) const {
+ TargetLowering::computeKnownBitsForFrameIndex(FI, Known, MF);
// Set the high bits to zero based on the maximum allowed scratch size per
// wave. We can't use vaddr in MUBUF instructions if we don't know the address
@@ -10788,6 +11379,27 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex());
}
+Align SITargetLowering::computeKnownAlignForTargetInstr(
+ GISelKnownBits &KB, Register R, const MachineRegisterInfo &MRI,
+ unsigned Depth) const {
+ const MachineInstr *MI = MRI.getVRegDef(R);
+ switch (MI->getOpcode()) {
+ case AMDGPU::G_INTRINSIC:
+ case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
+ // FIXME: Can this move to generic code? What about the case where the call
+ // site specifies a lower alignment?
+ Intrinsic::ID IID = MI->getIntrinsicID();
+ LLVMContext &Ctx = KB.getMachineFunction().getFunction().getContext();
+ AttributeList Attrs = Intrinsic::getAttributes(Ctx, IID);
+ if (MaybeAlign RetAlign = Attrs.getRetAlignment())
+ return *RetAlign;
+ return Align(1);
+ }
+ default:
+ return Align(1);
+ }
+}
+
Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
const Align PrefAlign = TargetLowering::getPrefLoopAlignment(ML);
const Align CacheLineAlign = Align(64);
@@ -10879,30 +11491,19 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
case ISD::CopyFromReg:
{
const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
- const MachineFunction * MF = FLI->MF;
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
- unsigned Reg = R->getReg();
- if (Register::isPhysicalRegister(Reg))
- return !TRI.isSGPRReg(MRI, Reg);
-
- if (MRI.isLiveIn(Reg)) {
- // workitem.id.x workitem.id.y workitem.id.z
- // Any VGPR formal argument is also considered divergent
- if (!TRI.isSGPRReg(MRI, Reg))
- return true;
- // Formal arguments of non-entry functions
- // are conservatively considered divergent
- else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
- return true;
- return false;
- }
- const Value *V = FLI->getValueFromVirtualReg(Reg);
- if (V)
+ const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ Register Reg = R->getReg();
+
+ // FIXME: Why does this need to consider isLiveIn?
+ if (Reg.isPhysical() || MRI.isLiveIn(Reg))
+ return !TRI->isSGPRReg(MRI, Reg);
+
+ if (const Value *V = FLI->getValueFromVirtualReg(R->getReg()))
return KDA->isDivergent(V);
+
assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N));
- return !TRI.isSGPRReg(MRI, Reg);
+ return !TRI->isSGPRReg(MRI, Reg);
}
break;
case ISD::LOAD: {
@@ -11004,7 +11605,19 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
return RC;
}
-static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited) {
+// FIXME: This is a workaround for DivergenceAnalysis not understanding always
+// uniform values (as produced by the mask results of control flow intrinsics)
+// used outside of divergent blocks. The phi users need to also be treated as
+// always uniform.
+static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited,
+ unsigned WaveSize) {
+ // FIXME: We asssume we never cast the mask results of a control flow
+ // intrinsic.
+ // Early exit if the type won't be consistent as a compile time hack.
+ IntegerType *IT = dyn_cast<IntegerType>(V->getType());
+ if (!IT || IT->getBitWidth() != WaveSize)
+ return false;
+
if (!isa<Instruction>(V))
return false;
if (!Visited.insert(V).second)
@@ -11036,7 +11649,7 @@ static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited) {
}
}
} else {
- Result = hasCFUser(U, Visited);
+ Result = hasCFUser(U, Visited, WaveSize);
}
if (Result)
break;
@@ -11046,36 +11659,16 @@ static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited) {
bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
const Value *V) const {
- if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::amdgcn_if_break:
- return true;
- }
- }
- if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) {
- if (const IntrinsicInst *Intrinsic =
- dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::amdgcn_if:
- case Intrinsic::amdgcn_else: {
- ArrayRef<unsigned> Indices = ExtValue->getIndices();
- if (Indices.size() == 1 && Indices[0] == 1) {
- return true;
- }
- }
- }
- }
- }
if (const CallInst *CI = dyn_cast<CallInst>(V)) {
- if (isa<InlineAsm>(CI->getCalledValue())) {
+ if (CI->isInlineAsm()) {
+ // FIXME: This cannot give a correct answer. This should only trigger in
+ // the case where inline asm returns mixed SGPR and VGPR results, used
+ // outside the defining block. We don't have a specific result to
+ // consider, so this assumes if any value is SGPR, the overall register
+ // also needs to be SGPR.
const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
- ImmutableCallSite CS(CI);
TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
- MF.getDataLayout(), Subtarget->getRegisterInfo(), CS);
+ MF.getDataLayout(), Subtarget->getRegisterInfo(), *CI);
for (auto &TC : TargetConstraints) {
if (TC.Type == InlineAsm::isOutput) {
ComputeConstraintToUse(TC, SDValue());
@@ -11095,5 +11688,20 @@ bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
}
}
SmallPtrSet<const Value *, 16> Visited;
- return hasCFUser(V, Visited);
+ return hasCFUser(V, Visited, Subtarget->getWavefrontSize());
+}
+
+std::pair<int, MVT>
+SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
+ Type *Ty) const {
+ auto Cost = TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
+ auto Size = DL.getTypeSizeInBits(Ty);
+ // Maximum load or store can handle 8 dwords for scalar and 4 for
+ // vector ALU. Let's assume anything above 8 dwords is expensive
+ // even if legal.
+ if (Size <= 256)
+ return Cost;
+
+ Cost.first = (Size + 255) / 256;
+ return Cost;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
index d59495b052a4..f4c076464057 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -42,7 +42,8 @@ private:
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
- uint64_t Offset, unsigned Align, bool Signed,
+ uint64_t Offset, Align Alignment,
+ bool Signed,
const ISD::InputArg *Arg = nullptr) const;
SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
@@ -60,7 +61,7 @@ private:
SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const;
SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
- SDValue GLC, SDValue DLC, SelectionDAG &DAG) const;
+ SDValue CachePolicy, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
@@ -107,7 +108,7 @@ private:
/// Converts \p Op, which must be of floating point type, to the
/// floating point type \p VT, by either extending or truncating it.
- SDValue getFPExtOrFPTrunc(SelectionDAG &DAG,
+ SDValue getFPExtOrFPRound(SelectionDAG &DAG,
SDValue Op,
const SDLoc &DL,
EVT VT) const;
@@ -119,6 +120,7 @@ private:
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;
SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const;
@@ -199,6 +201,15 @@ public:
/// global value \p GV, false otherwise.
bool shouldEmitPCReloc(const GlobalValue *GV) const;
+ /// \returns true if this should use a literal constant for an LDS address,
+ /// and not emit a relocation for an LDS global.
+ bool shouldUseLDSConstAddress(const GlobalValue *GV) const;
+
+ /// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be
+ /// expanded into a set of cmp/select instructions.
+ static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem,
+ bool IsDivergentIdx);
+
private:
// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
@@ -206,7 +217,7 @@ private:
/// \returns 0 If there is a non-constant offset or if the offset is 0.
/// Otherwise returns the constant offset.
unsigned setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
- SDValue *Offsets, unsigned Align = 4) const;
+ SDValue *Offsets, Align Alignment = Align(4)) const;
// Handle 8 bit and 16 bit buffer loads
SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
@@ -253,15 +264,18 @@ public:
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const override;
- EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset,
- bool MemcpyStrSrc,
+ EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
bool isMemOpUniform(const SDNode *N) const;
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
+ static bool isNonGlobalAddrSpace(unsigned AS) {
+ return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS ||
+ AS == AMDGPUAS::PRIVATE_ADDRESS;
+ }
+
+ // FIXME: Missing constant_32bit
static bool isFlatGlobalAddrSpace(unsigned AS) {
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
AS == AMDGPUAS::FLAT_ADDRESS ||
@@ -330,6 +344,9 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue lowerDYNAMIC_STACKALLOCImpl(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
@@ -351,8 +368,7 @@ public:
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
- bool isFMADLegalForFAddFSub(const SelectionDAG &DAG,
- const SDNode *N) const override;
+ bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
@@ -377,17 +393,29 @@ public:
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
+ void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
+ bool getAsmOperandConstVal(SDValue Op, uint64_t &Val) const;
+ bool checkAsmConstraintVal(SDValue Op,
+ const std::string &Constraint,
+ uint64_t Val) const;
+ bool checkAsmConstraintValA(SDValue Op,
+ uint64_t Val,
+ unsigned MaxSize = 64) const;
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL,
SDValue V) const;
void finalizeLowering(MachineFunction &MF) const override;
- void computeKnownBitsForFrameIndex(const SDValue Op,
+ void computeKnownBitsForFrameIndex(int FrameIdx,
KnownBits &Known,
- const APInt &DemandedElts,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const override;
+ const MachineFunction &MF) const override;
+ Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R,
+ const MachineRegisterInfo &MRI,
+ unsigned Depth = 0) const override;
bool isSDNodeSourceOfDivergence(const SDNode *N,
FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override;
@@ -432,6 +460,13 @@ public:
MachineFunction &MF,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) const;
+ void allocateSpecialInputVGPRsFixed(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const;
+
+ std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
+ Type *Ty) const;
};
} // End namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
new file mode 100644
index 000000000000..35c49ae8c0dd
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -0,0 +1,203 @@
+//===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Insert s_clause instructions to form hard clauses.
+///
+/// Clausing load instructions can give cache coherency benefits. Before gfx10,
+/// the hardware automatically detected "soft clauses", which were sequences of
+/// memory instructions of the same type. In gfx10 this detection was removed,
+/// and the s_clause instruction was introduced to explicitly mark "hard
+/// clauses".
+///
+/// It's the scheduler's job to form the clauses by putting similar memory
+/// instructions next to each other. Our job is just to insert an s_clause
+/// instruction to mark the start of each clause.
+///
+/// Note that hard clauses are very similar to, but logically distinct from, the
+/// groups of instructions that have to be restartable when XNACK is enabled.
+/// The rules are slightly different in each case. For example an s_nop
+/// instruction breaks a restartable group, but can appear in the middle of a
+/// hard clause. (Before gfx10 there wasn't a distinction, and both were called
+/// "soft clauses" or just "clauses".)
+///
+/// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
+/// groups, not hard clauses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-insert-hard-clauses"
+
+namespace {
+
+enum HardClauseType {
+ // Texture, buffer, global or scratch memory instructions.
+ HARDCLAUSE_VMEM,
+ // Flat (not global or scratch) memory instructions.
+ HARDCLAUSE_FLAT,
+ // Instructions that access LDS.
+ HARDCLAUSE_LDS,
+ // Scalar memory instructions.
+ HARDCLAUSE_SMEM,
+ // VALU instructions.
+ HARDCLAUSE_VALU,
+ LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
+
+ // Internal instructions, which are allowed in the middle of a hard clause,
+ // except for s_waitcnt.
+ HARDCLAUSE_INTERNAL,
+ // Instructions that are not allowed in a hard clause: SALU, export, branch,
+ // message, GDS, s_waitcnt and anything else not mentioned above.
+ HARDCLAUSE_ILLEGAL,
+};
+
+HardClauseType getHardClauseType(const MachineInstr &MI) {
+ // On current architectures we only get a benefit from clausing loads.
+ if (MI.mayLoad()) {
+ if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
+ return HARDCLAUSE_VMEM;
+ if (SIInstrInfo::isFLAT(MI))
+ return HARDCLAUSE_FLAT;
+ // TODO: LDS
+ if (SIInstrInfo::isSMRD(MI))
+ return HARDCLAUSE_SMEM;
+ }
+
+ // Don't form VALU clauses. It's not clear what benefit they give, if any.
+
+ // In practice s_nop is the only internal instruction we're likely to see.
+ // It's safe to treat the rest as illegal.
+ if (MI.getOpcode() == AMDGPU::S_NOP)
+ return HARDCLAUSE_INTERNAL;
+ return HARDCLAUSE_ILLEGAL;
+}
+
+class SIInsertHardClauses : public MachineFunctionPass {
+public:
+ static char ID;
+
+ SIInsertHardClauses() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ // Track information about a clause as we discover it.
+ struct ClauseInfo {
+ // The type of all (non-internal) instructions in the clause.
+ HardClauseType Type = HARDCLAUSE_ILLEGAL;
+ // The first (necessarily non-internal) instruction in the clause.
+ MachineInstr *First = nullptr;
+ // The last non-internal instruction in the clause.
+ MachineInstr *Last = nullptr;
+ // The length of the clause including any internal instructions in the
+ // middle or after the end of the clause.
+ unsigned Length = 0;
+ // The base operands of *Last.
+ SmallVector<const MachineOperand *, 4> BaseOps;
+ };
+
+ bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
+ // Get the size of the clause excluding any internal instructions at the
+ // end.
+ unsigned Size =
+ std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
+ if (Size < 2)
+ return false;
+ assert(Size <= 64 && "Hard clause is too long!");
+
+ auto &MBB = *CI.First->getParent();
+ auto ClauseMI =
+ BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
+ .addImm(Size - 1);
+ finalizeBundle(MBB, ClauseMI->getIterator(),
+ std::next(CI.Last->getIterator()));
+ return true;
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ if (!ST.hasHardClauses())
+ return false;
+
+ const SIInstrInfo *SII = ST.getInstrInfo();
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+
+ bool Changed = false;
+ for (auto &MBB : MF) {
+ ClauseInfo CI;
+ for (auto &MI : MBB) {
+ HardClauseType Type = getHardClauseType(MI);
+
+ int64_t Dummy1;
+ bool Dummy2;
+ unsigned Dummy3;
+ SmallVector<const MachineOperand *, 4> BaseOps;
+ if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
+ if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
+ Dummy3, TRI)) {
+ // We failed to get the base operands, so we'll never clause this
+ // instruction with any other, so pretend it's illegal.
+ Type = HARDCLAUSE_ILLEGAL;
+ }
+ }
+
+ if (CI.Length == 64 ||
+ (CI.Length && Type != HARDCLAUSE_INTERNAL &&
+ (Type != CI.Type ||
+ // Note that we lie to shouldClusterMemOps about the size of the
+ // cluster. When shouldClusterMemOps is called from the machine
+ // scheduler it limits the size of the cluster to avoid increasing
+ // register pressure too much, but this pass runs after register
+ // allocation so there is no need for that kind of limit.
+ !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
+ // Finish the current clause.
+ Changed |= emitClause(CI, SII);
+ CI = ClauseInfo();
+ }
+
+ if (CI.Length) {
+ // Extend the current clause.
+ ++CI.Length;
+ if (Type != HARDCLAUSE_INTERNAL) {
+ CI.Last = &MI;
+ CI.BaseOps = std::move(BaseOps);
+ }
+ } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
+ // Start a new clause.
+ CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
+ }
+ }
+
+ // Finish the last clause in the basic block if any.
+ if (CI.Length)
+ Changed |= emitClause(CI, SII);
+ }
+
+ return Changed;
+ }
+};
+
+} // namespace
+
+char SIInsertHardClauses::ID = 0;
+
+char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
+
+INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
+ false, false)
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 87e63fcc4a04..052db5f6ea71 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -18,9 +18,11 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -28,6 +30,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -41,7 +44,7 @@ using namespace llvm;
#define DEBUG_TYPE "si-insert-skips"
static cl::opt<unsigned> SkipThresholdFlag(
- "amdgpu-skip-threshold",
+ "amdgpu-skip-threshold-legacy",
cl::desc("Number of instructions before jumping over divergent control flow"),
cl::init(12), cl::Hidden);
@@ -52,21 +55,22 @@ private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
unsigned SkipThreshold = 0;
+ MachineDominatorTree *MDT = nullptr;
+
+ MachineBasicBlock *EarlyExitBlock = nullptr;
bool shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const;
- bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);
-
- void kill(MachineInstr &MI);
+ bool dominatesAllReachable(MachineBasicBlock &MBB);
+ void createEarlyExitBlock(MachineBasicBlock &MBB);
+ void skipIfDead(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ DebugLoc DL);
- MachineBasicBlock *insertSkipBlock(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ bool kill(MachineInstr &MI);
bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
- bool optimizeVccBranch(MachineInstr &MI) const;
-
public:
static char ID;
@@ -79,6 +83,8 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -87,8 +93,11 @@ public:
char SIInsertSkips::ID = 0;
-INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE,
- "SI insert s_cbranch_execz instructions", false, false)
+INITIALIZE_PASS_BEGIN(SIInsertSkips, DEBUG_TYPE,
+ "SI insert s_cbranch_execz instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(SIInsertSkips, DEBUG_TYPE,
+ "SI insert s_cbranch_execz instructions", false, false)
char &llvm::SIInsertSkipsPassID = SIInsertSkips::ID;
@@ -146,42 +155,110 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
return false;
}
-bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
- MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction *MF = MBB.getParent();
-
- if (MF->getFunction().getCallingConv() != CallingConv::AMDGPU_PS ||
- !shouldSkip(MBB, MBB.getParent()->back()))
- return false;
+/// Check whether \p MBB dominates all blocks that are reachable from it.
+bool SIInsertSkips::dominatesAllReachable(MachineBasicBlock &MBB) {
+ for (MachineBasicBlock *Other : depth_first(&MBB)) {
+ if (!MDT->dominates(&MBB, Other))
+ return false;
+ }
+ return true;
+}
- MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());
+static void generatePsEndPgm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ const SIInstrInfo *TII) {
+ // Generate "null export; s_endpgm".
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addImm(1) // vm
+ .addImm(0) // compr
+ .addImm(0); // en
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
+}
- const DebugLoc &DL = MI.getDebugLoc();
+void SIInsertSkips::createEarlyExitBlock(MachineBasicBlock &MBB) {
+ MachineFunction *MF = MBB.getParent();
+ DebugLoc DL;
- // If the exec mask is non-zero, skip the next two instructions
- BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addMBB(&NextBB);
+ assert(!EarlyExitBlock);
+ EarlyExitBlock = MF->CreateMachineBasicBlock();
+ MF->insert(MF->end(), EarlyExitBlock);
- MachineBasicBlock::iterator Insert = SkipBB->begin();
+ generatePsEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII);
+}
- // Exec mask is zero: Export to NULL target...
- BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP_DONE))
- .addImm(0x09) // V_008DFC_SQ_EXP_NULL
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addImm(1) // vm
- .addImm(0) // compr
- .addImm(0); // en
+/// Insert an "if exec=0 { null export; s_endpgm }" sequence before the given
+/// iterator. Only applies to pixel shaders.
+void SIInsertSkips::skipIfDead(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL) {
+ MachineFunction *MF = MBB.getParent();
+ assert(MF->getFunction().getCallingConv() == CallingConv::AMDGPU_PS);
+
+ // It is possible for an SI_KILL_*_TERMINATOR to sit at the bottom of a
+ // basic block that has no further successors (e.g., there was an
+ // `unreachable` there in IR). This can happen with original source of the
+ // form:
+ //
+ // if (uniform_condition) {
+ // write_to_memory();
+ // discard;
+ // }
+ //
+ // In this case, we write the "null_export; s_endpgm" skip code in the
+ // already-existing basic block.
+ auto NextBBI = std::next(MBB.getIterator());
+ bool NoSuccessor = I == MBB.end() &&
+ llvm::find(MBB.successors(), &*NextBBI) == MBB.succ_end();
+
+ if (NoSuccessor) {
+ generatePsEndPgm(MBB, I, DL, TII);
+ } else {
+ if (!EarlyExitBlock) {
+ createEarlyExitBlock(MBB);
+ // Update next block pointer to reflect any new blocks
+ NextBBI = std::next(MBB.getIterator());
+ }
- // ... and terminate wavefront.
- BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
+ auto BranchMI = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+ .addMBB(EarlyExitBlock);
+
+ // Split the block if the branch will not come at the end.
+ auto Next = std::next(BranchMI->getIterator());
+ if (Next != MBB.end() && !Next->isTerminator()) {
+ MachineBasicBlock *SplitBB =
+ MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF->insert(NextBBI, SplitBB);
+ SplitBB->splice(SplitBB->begin(), &MBB, I, MBB.end());
+ SplitBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ // FIXME: the expectation is that this will be used near the beginning
+ // of a block so just assume all registers are still live.
+ for (auto LiveIn : MBB.liveins())
+ SplitBB->addLiveIn(LiveIn);
+ MBB.addSuccessor(SplitBB);
+
+ // Update dominator tree
+ using DomTreeT = DomTreeBase<MachineBasicBlock>;
+ SmallVector<DomTreeT::UpdateType, 16> DTUpdates;
+ for (MachineBasicBlock *Succ : SplitBB->successors()) {
+ DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ});
+ DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ});
+ }
+ DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
+ MDT->getBase().applyUpdates(DTUpdates);
+ }
- return true;
+ MBB.addSuccessor(EarlyExitBlock);
+ MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
+ }
}
-void SIInsertSkips::kill(MachineInstr &MI) {
+/// Translate a SI_KILL_*_TERMINATOR into exec-manipulating instructions.
+/// Return true unless the terminator is a no-op.
+bool SIInsertSkips::kill(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
@@ -268,7 +345,7 @@ void SIInsertSkips::kill(MachineInstr &MI) {
I.addImm(0); // omod
}
- break;
+ return true;
}
case AMDGPU::SI_KILL_I1_TERMINATOR: {
const MachineFunction *MF = MI.getParent()->getParent();
@@ -283,11 +360,13 @@ void SIInsertSkips::kill(MachineInstr &MI) {
int64_t Imm = Op.getImm();
assert(Imm == 0 || Imm == -1);
- if (Imm == KillVal)
+ if (Imm == KillVal) {
BuildMI(MBB, &MI, DL, TII->get(ST.isWave32() ? AMDGPU::S_MOV_B32
: AMDGPU::S_MOV_B64), Exec)
.addImm(0);
- break;
+ return true;
+ }
+ return false;
}
unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
@@ -296,27 +375,13 @@ void SIInsertSkips::kill(MachineInstr &MI) {
BuildMI(MBB, &MI, DL, TII->get(Opcode), Exec)
.addReg(Exec)
.add(Op);
- break;
+ return true;
}
default:
llvm_unreachable("invalid opcode, expected SI_KILL_*_TERMINATOR");
}
}
-MachineBasicBlock *SIInsertSkips::insertSkipBlock(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
- MachineFunction *MF = MBB.getParent();
-
- MachineBasicBlock *SkipBB = MF->CreateMachineBasicBlock();
- MachineFunction::iterator MBBI(MBB);
- ++MBBI;
-
- MF->insert(MBBI, SkipBB);
- MBB.addSuccessor(SkipBB);
-
- return SkipBB;
-}
-
// Returns true if a branch over the block was inserted.
bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
MachineBasicBlock &SrcMBB) {
@@ -334,140 +399,24 @@ bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
return true;
}
-bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const {
- // Match:
- // sreg = -1
- // vcc = S_AND_B64 exec, sreg
- // S_CBRANCH_VCC[N]Z
- // =>
- // S_CBRANCH_EXEC[N]Z
- bool Changed = false;
- MachineBasicBlock &MBB = *MI.getParent();
- const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
- const bool IsWave32 = ST.isWave32();
- const unsigned CondReg = TRI->getVCC();
- const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
-
- MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
- E = MBB.rend();
- bool ReadsCond = false;
- unsigned Threshold = 5;
- for (++A ; A != E ; ++A) {
- if (!--Threshold)
- return false;
- if (A->modifiesRegister(ExecReg, TRI))
- return false;
- if (A->modifiesRegister(CondReg, TRI)) {
- if (!A->definesRegister(CondReg, TRI) || A->getOpcode() != And)
- return false;
- break;
- }
- ReadsCond |= A->readsRegister(CondReg, TRI);
- }
- if (A == E)
- return false;
-
- MachineOperand &Op1 = A->getOperand(1);
- MachineOperand &Op2 = A->getOperand(2);
- if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {
- TII->commuteInstruction(*A);
- Changed = true;
- }
- if (Op1.getReg() != ExecReg)
- return Changed;
- if (Op2.isImm() && Op2.getImm() != -1)
- return Changed;
-
- unsigned SReg = AMDGPU::NoRegister;
- if (Op2.isReg()) {
- SReg = Op2.getReg();
- auto M = std::next(A);
- bool ReadsSreg = false;
- for ( ; M != E ; ++M) {
- if (M->definesRegister(SReg, TRI))
- break;
- if (M->modifiesRegister(SReg, TRI))
- return Changed;
- ReadsSreg |= M->readsRegister(SReg, TRI);
- }
- if (M == E ||
- !M->isMoveImmediate() ||
- !M->getOperand(1).isImm() ||
- M->getOperand(1).getImm() != -1)
- return Changed;
- // First if sreg is only used in and instruction fold the immediate
- // into that and.
- if (!ReadsSreg && Op2.isKill()) {
- A->getOperand(2).ChangeToImmediate(-1);
- M->eraseFromParent();
- }
- }
-
- if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
- MI.killsRegister(CondReg, TRI))
- A->eraseFromParent();
-
- bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
- if (SReg == ExecReg) {
- if (IsVCCZ) {
- MI.eraseFromParent();
- return true;
- }
- MI.setDesc(TII->get(AMDGPU::S_BRANCH));
- } else {
- MI.setDesc(TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ
- : AMDGPU::S_CBRANCH_EXECNZ));
- }
-
- MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));
- MI.addImplicitDefUseOperands(*MBB.getParent());
-
- return true;
-}
-
bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
SkipThreshold = SkipThresholdFlag;
- bool HaveKill = false;
+ SmallVector<MachineInstr *, 4> KillInstrs;
bool MadeChange = false;
- // Track depth of exec mask, divergent branches.
- SmallVector<MachineBasicBlock *, 16> ExecBranchStack;
-
- MachineFunction::iterator NextBB;
-
- MachineBasicBlock *EmptyMBBAtEnd = nullptr;
-
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; BI = NextBB) {
- NextBB = std::next(BI);
- MachineBasicBlock &MBB = *BI;
- bool HaveSkipBlock = false;
-
- if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) {
- // Reached convergence point for last divergent branch.
- ExecBranchStack.pop_back();
- }
-
- if (HaveKill && ExecBranchStack.empty()) {
- HaveKill = false;
-
- // TODO: Insert skip if exec is 0?
- }
-
+ for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator I, Next;
for (I = MBB.begin(); I != MBB.end(); I = Next) {
Next = std::next(I);
-
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
case AMDGPU::SI_MASK_BRANCH:
- ExecBranchStack.push_back(MI.getOperand(0).getMBB());
MadeChange |= skipMaskBranch(MI, MBB);
break;
@@ -475,64 +424,60 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
// Optimize out branches to the next block.
// FIXME: Shouldn't this be handled by BranchFolding?
if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
+ assert(&MI == &MBB.back());
MI.eraseFromParent();
- } else if (HaveSkipBlock) {
- // Remove the given unconditional branch when a skip block has been
- // inserted after the current one and let skip the two instructions
- // performing the kill if the exec mask is non-zero.
- MI.eraseFromParent();
+ MadeChange = true;
}
break;
case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
- case AMDGPU::SI_KILL_I1_TERMINATOR:
+ case AMDGPU::SI_KILL_I1_TERMINATOR: {
MadeChange = true;
- kill(MI);
-
- if (ExecBranchStack.empty()) {
- if (NextBB != BE && skipIfDead(MI, *NextBB)) {
- HaveSkipBlock = true;
- NextBB = std::next(BI);
- BE = MF.end();
- }
+ bool CanKill = kill(MI);
+
+ // Check if we can add an early "if exec=0 { end shader }".
+ //
+ // Note that we _always_ do this if it is correct, even if the kill
+ // happens fairly late in the shader, because the null export should
+ // generally still be cheaper than normal export(s).
+ //
+ // TODO: The dominatesAllReachable check is conservative: if the
+ // dominance is only missing due to _uniform_ branches, we could
+ // in fact insert the early-exit as well.
+ if (CanKill &&
+ MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS &&
+ dominatesAllReachable(MBB)) {
+ // Mark the instruction for kill-if-dead insertion. We delay this
+ // change because it modifies the CFG.
+ KillInstrs.push_back(&MI);
} else {
- HaveKill = true;
+ MI.eraseFromParent();
}
-
- MI.eraseFromParent();
break;
+ }
- case AMDGPU::SI_RETURN_TO_EPILOG:
- // FIXME: Should move somewhere else
- assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
-
- // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
- // because external bytecode will be appended at the end.
- if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
- // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at
- // the end and jump there.
- if (!EmptyMBBAtEnd) {
- EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
- MF.insert(MF.end(), EmptyMBBAtEnd);
- }
-
- MBB.addSuccessor(EmptyMBBAtEnd);
- BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
- .addMBB(EmptyMBBAtEnd);
- I->eraseFromParent();
+ case AMDGPU::SI_KILL_CLEANUP:
+ if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS &&
+ dominatesAllReachable(MBB)) {
+ KillInstrs.push_back(&MI);
+ } else {
+ MI.eraseFromParent();
}
break;
- case AMDGPU::S_CBRANCH_VCCZ:
- case AMDGPU::S_CBRANCH_VCCNZ:
- MadeChange |= optimizeVccBranch(MI);
- break;
-
default:
break;
}
}
}
+ for (MachineInstr *Kill : KillInstrs) {
+ skipIfDead(*Kill->getParent(), std::next(Kill->getIterator()),
+ Kill->getDebugLoc());
+ Kill->eraseFromParent();
+ }
+ KillInstrs.clear();
+ EarlyExitBlock = nullptr;
+
return MadeChange;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ef662d55cb0a..2a157eb20ab4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -32,6 +32,7 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -57,7 +58,6 @@
#include <cstring>
#include <memory>
#include <utility>
-#include <vector>
using namespace llvm;
@@ -109,15 +109,13 @@ iterator_range<enum_iterator<InstCounterType>> inst_counter_types() {
enum_iterator<InstCounterType>(NUM_INST_CNTS));
}
-using RegInterval = std::pair<signed, signed>;
+using RegInterval = std::pair<int, int>;
struct {
- uint32_t VmcntMax;
- uint32_t ExpcntMax;
- uint32_t LgkmcntMax;
- uint32_t VscntMax;
- int32_t NumVGPRsMax;
- int32_t NumSGPRsMax;
+ unsigned VmcntMax;
+ unsigned ExpcntMax;
+ unsigned LgkmcntMax;
+ unsigned VscntMax;
} HardwareLimits;
struct {
@@ -143,7 +141,7 @@ enum WaitEventType {
NUM_WAIT_EVENTS,
};
-static const uint32_t WaitEventMaskForInst[NUM_INST_CNTS] = {
+static const unsigned WaitEventMaskForInst[NUM_INST_CNTS] = {
(1 << VMEM_ACCESS) | (1 << VMEM_READ_ACCESS),
(1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) |
(1 << SQ_MESSAGE),
@@ -166,6 +164,28 @@ enum RegisterMapping {
NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts.
};
+// Enumerate different types of result-returning VMEM operations. Although
+// s_waitcnt orders them all with a single vmcnt counter, in the absence of
+// s_waitcnt only instructions of the same VmemType are guaranteed to write
+// their results in order -- so there is no need to insert an s_waitcnt between
+// two instructions of the same type that write the same vgpr.
+enum VmemType {
+ // BUF instructions and MIMG instructions without a sampler.
+ VMEM_NOSAMPLER,
+ // MIMG instructions with a sampler.
+ VMEM_SAMPLER,
+};
+
+VmemType getVmemType(const MachineInstr &Inst) {
+ assert(SIInstrInfo::isVMEM(Inst));
+ if (!SIInstrInfo::isMIMG(Inst))
+ return VMEM_NOSAMPLER;
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode());
+ return AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler
+ ? VMEM_SAMPLER
+ : VMEM_NOSAMPLER;
+}
+
void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
switch (T) {
case VM_CNT:
@@ -195,12 +215,9 @@ void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
// "s_waitcnt 0" before use.
class WaitcntBrackets {
public:
- WaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) {
- for (auto T : inst_counter_types())
- memset(VgprScores[T], 0, sizeof(VgprScores[T]));
- }
+ WaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) {}
- static uint32_t getWaitCountMax(InstCounterType T) {
+ static unsigned getWaitCountMax(InstCounterType T) {
switch (T) {
case VM_CNT:
return HardwareLimits.VmcntMax;
@@ -216,17 +233,13 @@ public:
return 0;
}
- uint32_t getScoreLB(InstCounterType T) const {
+ unsigned getScoreLB(InstCounterType T) const {
assert(T < NUM_INST_CNTS);
- if (T >= NUM_INST_CNTS)
- return 0;
return ScoreLBs[T];
}
- uint32_t getScoreUB(InstCounterType T) const {
+ unsigned getScoreUB(InstCounterType T) const {
assert(T < NUM_INST_CNTS);
- if (T >= NUM_INST_CNTS)
- return 0;
return ScoreUBs[T];
}
@@ -242,7 +255,7 @@ public:
return EXP_CNT;
}
- uint32_t getRegScore(int GprNo, InstCounterType T) {
+ unsigned getRegScore(int GprNo, InstCounterType T) {
if (GprNo < NUM_ALL_VGPRS) {
return VgprScores[T][GprNo];
}
@@ -250,30 +263,16 @@ public:
return SgprScores[GprNo - NUM_ALL_VGPRS];
}
- void clear() {
- memset(ScoreLBs, 0, sizeof(ScoreLBs));
- memset(ScoreUBs, 0, sizeof(ScoreUBs));
- PendingEvents = 0;
- memset(MixedPendingEvents, 0, sizeof(MixedPendingEvents));
- for (auto T : inst_counter_types())
- memset(VgprScores[T], 0, sizeof(VgprScores[T]));
- memset(SgprScores, 0, sizeof(SgprScores));
- }
-
bool merge(const WaitcntBrackets &Other);
RegInterval getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII,
const MachineRegisterInfo *MRI,
- const SIRegisterInfo *TRI, unsigned OpNo,
- bool Def) const;
-
- int32_t getMaxVGPR() const { return VgprUB; }
- int32_t getMaxSGPR() const { return SgprUB; }
+ const SIRegisterInfo *TRI, unsigned OpNo) const;
bool counterOutOfOrder(InstCounterType T) const;
bool simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
bool simplifyWaitcnt(InstCounterType T, unsigned &Count) const;
- void determineWait(InstCounterType T, uint32_t ScoreToWait,
+ void determineWait(InstCounterType T, unsigned ScoreToWait,
AMDGPU::Waitcnt &Wait) const;
void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
void applyWaitcnt(InstCounterType T, unsigned Count);
@@ -286,6 +285,12 @@ public:
return PendingEvents & (1 << E);
}
+ bool hasMixedPendingEvents(InstCounterType T) const {
+ unsigned Events = PendingEvents & WaitEventMaskForInst[T];
+ // Return true if more than one bit is set in Events.
+ return Events & (Events - 1);
+ }
+
bool hasPendingFlat() const {
return ((LastFlat[LGKM_CNT] > ScoreLBs[LGKM_CNT] &&
LastFlat[LGKM_CNT] <= ScoreUBs[LGKM_CNT]) ||
@@ -298,71 +303,77 @@ public:
LastFlat[LGKM_CNT] = ScoreUBs[LGKM_CNT];
}
+ // Return true if there might be pending writes to the specified vgpr by VMEM
+ // instructions with types different from V.
+ bool hasOtherPendingVmemTypes(int GprNo, VmemType V) const {
+ assert(GprNo < NUM_ALL_VGPRS);
+ return VgprVmemTypes[GprNo] & ~(1 << V);
+ }
+
+ void clearVgprVmemTypes(int GprNo) {
+ assert(GprNo < NUM_ALL_VGPRS);
+ VgprVmemTypes[GprNo] = 0;
+ }
+
void print(raw_ostream &);
void dump() { print(dbgs()); }
private:
struct MergeInfo {
- uint32_t OldLB;
- uint32_t OtherLB;
- uint32_t MyShift;
- uint32_t OtherShift;
+ unsigned OldLB;
+ unsigned OtherLB;
+ unsigned MyShift;
+ unsigned OtherShift;
};
- static bool mergeScore(const MergeInfo &M, uint32_t &Score,
- uint32_t OtherScore);
+ static bool mergeScore(const MergeInfo &M, unsigned &Score,
+ unsigned OtherScore);
- void setScoreLB(InstCounterType T, uint32_t Val) {
+ void setScoreLB(InstCounterType T, unsigned Val) {
assert(T < NUM_INST_CNTS);
- if (T >= NUM_INST_CNTS)
- return;
ScoreLBs[T] = Val;
}
- void setScoreUB(InstCounterType T, uint32_t Val) {
+ void setScoreUB(InstCounterType T, unsigned Val) {
assert(T < NUM_INST_CNTS);
- if (T >= NUM_INST_CNTS)
- return;
ScoreUBs[T] = Val;
if (T == EXP_CNT) {
- uint32_t UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT);
+ unsigned UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT);
if (ScoreLBs[T] < UB && UB < ScoreUBs[T])
ScoreLBs[T] = UB;
}
}
- void setRegScore(int GprNo, InstCounterType T, uint32_t Val) {
+ void setRegScore(int GprNo, InstCounterType T, unsigned Val) {
if (GprNo < NUM_ALL_VGPRS) {
- if (GprNo > VgprUB) {
- VgprUB = GprNo;
- }
+ VgprUB = std::max(VgprUB, GprNo);
VgprScores[T][GprNo] = Val;
} else {
assert(T == LGKM_CNT);
- if (GprNo - NUM_ALL_VGPRS > SgprUB) {
- SgprUB = GprNo - NUM_ALL_VGPRS;
- }
+ SgprUB = std::max(SgprUB, GprNo - NUM_ALL_VGPRS);
SgprScores[GprNo - NUM_ALL_VGPRS] = Val;
}
}
void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
- unsigned OpNo, uint32_t Val);
+ unsigned OpNo, unsigned Val);
const GCNSubtarget *ST = nullptr;
- uint32_t ScoreLBs[NUM_INST_CNTS] = {0};
- uint32_t ScoreUBs[NUM_INST_CNTS] = {0};
- uint32_t PendingEvents = 0;
- bool MixedPendingEvents[NUM_INST_CNTS] = {false};
+ unsigned ScoreLBs[NUM_INST_CNTS] = {0};
+ unsigned ScoreUBs[NUM_INST_CNTS] = {0};
+ unsigned PendingEvents = 0;
// Remember the last flat memory operation.
- uint32_t LastFlat[NUM_INST_CNTS] = {0};
+ unsigned LastFlat[NUM_INST_CNTS] = {0};
// wait_cnt scores for every vgpr.
// Keep track of the VgprUB and SgprUB to make merge at join efficient.
- int32_t VgprUB = 0;
- int32_t SgprUB = 0;
- uint32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
+ int VgprUB = -1;
+ int SgprUB = -1;
+ unsigned VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS] = {{0}};
// Wait cnt scores for every sgpr, only lgkmcnt is relevant.
- uint32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
+ unsigned SgprScores[SQ_MAX_PGM_SGPRS] = {0};
+ // Bitmask of the VmemTypes of VMEM instructions that might have a pending
+ // write to each vgpr.
+ unsigned char VgprVmemTypes[NUM_ALL_VGPRS] = {0};
};
class SIInsertWaitcnts : public MachineFunctionPass {
@@ -385,8 +396,7 @@ private:
explicit BlockInfo(MachineBasicBlock *MBB) : MBB(MBB) {}
};
- std::vector<BlockInfo> BlockInfos; // by reverse post-order traversal index
- DenseMap<MachineBasicBlock *, unsigned> RpotIdxMap;
+ MapVector<MachineBasicBlock *, BlockInfo> BlockInfos;
// ForceEmitZeroWaitcnts: force all waitcnts insts to be s_waitcnt 0
// because of amdgpu-waitcnt-forcezero flag
@@ -464,10 +474,10 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
const SIInstrInfo *TII,
const MachineRegisterInfo *MRI,
const SIRegisterInfo *TRI,
- unsigned OpNo, bool Def) const {
+ unsigned OpNo) const {
const MachineOperand &Op = MI->getOperand(OpNo);
- if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()) ||
- (Def && !Op.isDef()) || TRI->isAGPR(*MRI, Op.getReg()))
+ assert(Op.isReg());
+ if (!TRI->isInAllocatableClass(Op.getReg()) || TRI->isAGPR(*MRI, Op.getReg()))
return {-1, -1};
// A use via a PW operand does not need a waitcnt.
@@ -475,29 +485,27 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
assert(!Op.getSubReg() || !Op.isUndef());
RegInterval Result;
- const MachineRegisterInfo &MRIA = *MRI;
unsigned Reg = TRI->getEncodingValue(Op.getReg());
- if (TRI->isVGPR(MRIA, Op.getReg())) {
+ if (TRI->isVGPR(*MRI, Op.getReg())) {
assert(Reg >= RegisterEncoding.VGPR0 && Reg <= RegisterEncoding.VGPRL);
Result.first = Reg - RegisterEncoding.VGPR0;
assert(Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
- } else if (TRI->isSGPRReg(MRIA, Op.getReg())) {
+ } else if (TRI->isSGPRReg(*MRI, Op.getReg())) {
assert(Reg >= RegisterEncoding.SGPR0 && Reg < SQ_MAX_PGM_SGPRS);
Result.first = Reg - RegisterEncoding.SGPR0 + NUM_ALL_VGPRS;
assert(Result.first >= NUM_ALL_VGPRS &&
Result.first < SQ_MAX_PGM_SGPRS + NUM_ALL_VGPRS);
}
// TODO: Handle TTMP
- // else if (TRI->isTTMP(MRIA, Reg.getReg())) ...
+ // else if (TRI->isTTMP(*MRI, Reg.getReg())) ...
else
return {-1, -1};
- const MachineInstr &MIA = *MI;
- const TargetRegisterClass *RC = TII->getOpRegClass(MIA, OpNo);
+ const TargetRegisterClass *RC = TII->getOpRegClass(*MI, OpNo);
unsigned Size = TRI->getRegSizeInBits(*RC);
- Result.second = Result.first + (Size / 32);
+ Result.second = Result.first + ((Size + 16) / 32);
return Result;
}
@@ -506,13 +514,10 @@ void WaitcntBrackets::setExpScore(const MachineInstr *MI,
const SIInstrInfo *TII,
const SIRegisterInfo *TRI,
const MachineRegisterInfo *MRI, unsigned OpNo,
- uint32_t Val) {
- RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false);
- LLVM_DEBUG({
- const MachineOperand &Opnd = MI->getOperand(OpNo);
- assert(TRI->isVGPR(*MRI, Opnd.getReg()));
- });
- for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ unsigned Val) {
+ RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo);
+ assert(TRI->isVGPR(*MRI, MI->getOperand(OpNo).getReg()));
+ for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
setRegScore(RegNo, EXP_CNT, Val);
}
}
@@ -521,19 +526,14 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
const SIRegisterInfo *TRI,
const MachineRegisterInfo *MRI,
WaitEventType E, MachineInstr &Inst) {
- const MachineRegisterInfo &MRIA = *MRI;
InstCounterType T = eventCounter(E);
- uint32_t CurrScore = getScoreUB(T) + 1;
+ unsigned CurrScore = getScoreUB(T) + 1;
if (CurrScore == 0)
report_fatal_error("InsertWaitcnt score wraparound");
// PendingEvents and ScoreUB need to be update regardless if this event
// changes the score of a register or not.
// Examples including vm_cnt when buffer-store or lgkm_cnt when send-message.
- if (!hasPendingEvent(E)) {
- if (PendingEvents & WaitEventMaskForInst[T])
- MixedPendingEvents[T] = true;
- PendingEvents |= 1 << E;
- }
+ PendingEvents |= 1 << E;
setScoreUB(T, CurrScore);
if (T == EXP_CNT) {
@@ -574,7 +574,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) {
for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
const MachineOperand &Op = Inst.getOperand(I);
- if (Op.isReg() && !Op.isDef() && TRI->isVGPR(MRIA, Op.getReg())) {
+ if (Op.isReg() && !Op.isDef() && TRI->isVGPR(*MRI, Op.getReg())) {
setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
}
}
@@ -622,7 +622,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
MachineOperand &DefMO = Inst.getOperand(I);
if (DefMO.isReg() && DefMO.isDef() &&
- TRI->isVGPR(MRIA, DefMO.getReg())) {
+ TRI->isVGPR(*MRI, DefMO.getReg())) {
setRegScore(TRI->getEncodingValue(DefMO.getReg()), EXP_CNT,
CurrScore);
}
@@ -630,7 +630,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
}
for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
MachineOperand &MO = Inst.getOperand(I);
- if (MO.isReg() && !MO.isDef() && TRI->isVGPR(MRIA, MO.getReg())) {
+ if (MO.isReg() && !MO.isDef() && TRI->isVGPR(*MRI, MO.getReg())) {
setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
}
}
@@ -641,8 +641,8 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX4) {
MachineOperand *MO = TII->getNamedOperand(Inst, AMDGPU::OpName::data);
unsigned OpNo;//TODO: find the OpNo for this operand;
- RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, OpNo, false);
- for (signed RegNo = Interval.first; RegNo < Interval.second;
+ RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, OpNo);
+ for (int RegNo = Interval.first; RegNo < Interval.second;
++RegNo) {
setRegScore(RegNo + NUM_ALL_VGPRS, t, CurrScore);
}
@@ -650,10 +650,20 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
} else {
// Match the score to the destination registers.
for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
- RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, I, true);
- if (T == VM_CNT && Interval.first >= NUM_ALL_VGPRS)
+ auto &Op = Inst.getOperand(I);
+ if (!Op.isReg() || !Op.isDef())
continue;
- for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, I);
+ if (T == VM_CNT) {
+ if (Interval.first >= NUM_ALL_VGPRS)
+ continue;
+ if (SIInstrInfo::isVMEM(Inst)) {
+ VmemType V = getVmemType(Inst);
+ for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo)
+ VgprVmemTypes[RegNo] |= 1 << V;
+ }
+ }
+ for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
setRegScore(RegNo, T, CurrScore);
}
}
@@ -666,8 +676,8 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
void WaitcntBrackets::print(raw_ostream &OS) {
OS << '\n';
for (auto T : inst_counter_types()) {
- uint32_t LB = getScoreLB(T);
- uint32_t UB = getScoreUB(T);
+ unsigned LB = getScoreLB(T);
+ unsigned UB = getScoreUB(T);
switch (T) {
case VM_CNT:
@@ -689,11 +699,11 @@ void WaitcntBrackets::print(raw_ostream &OS) {
if (LB < UB) {
// Print vgpr scores.
- for (int J = 0; J <= getMaxVGPR(); J++) {
- uint32_t RegScore = getRegScore(J, T);
+ for (int J = 0; J <= VgprUB; J++) {
+ unsigned RegScore = getRegScore(J, T);
if (RegScore <= LB)
continue;
- uint32_t RelScore = RegScore - LB - 1;
+ unsigned RelScore = RegScore - LB - 1;
if (J < SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS) {
OS << RelScore << ":v" << J << " ";
} else {
@@ -702,11 +712,11 @@ void WaitcntBrackets::print(raw_ostream &OS) {
}
// Also need to print sgpr scores for lgkm_cnt.
if (T == LGKM_CNT) {
- for (int J = 0; J <= getMaxSGPR(); J++) {
- uint32_t RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+ for (int J = 0; J <= SgprUB; J++) {
+ unsigned RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
if (RegScore <= LB)
continue;
- uint32_t RelScore = RegScore - LB - 1;
+ unsigned RelScore = RegScore - LB - 1;
OS << RelScore << ":s" << J << " ";
}
}
@@ -727,8 +737,8 @@ bool WaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const {
bool WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
unsigned &Count) const {
- const uint32_t LB = getScoreLB(T);
- const uint32_t UB = getScoreUB(T);
+ const unsigned LB = getScoreLB(T);
+ const unsigned UB = getScoreUB(T);
if (Count < UB && UB - Count > LB)
return true;
@@ -736,12 +746,12 @@ bool WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
return false;
}
-void WaitcntBrackets::determineWait(InstCounterType T, uint32_t ScoreToWait,
+void WaitcntBrackets::determineWait(InstCounterType T, unsigned ScoreToWait,
AMDGPU::Waitcnt &Wait) const {
// If the score of src_operand falls within the bracket, we need an
// s_waitcnt instruction.
- const uint32_t LB = getScoreLB(T);
- const uint32_t UB = getScoreUB(T);
+ const unsigned LB = getScoreLB(T);
+ const unsigned UB = getScoreUB(T);
if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
if ((T == VM_CNT || T == LGKM_CNT) &&
hasPendingFlat() &&
@@ -758,7 +768,7 @@ void WaitcntBrackets::determineWait(InstCounterType T, uint32_t ScoreToWait,
} else {
// If a counter has been maxed out avoid overflow by waiting for
// MAX(CounterType) - 1 instead.
- uint32_t NeededWait = std::min(UB - ScoreToWait, getWaitCountMax(T) - 1);
+ unsigned NeededWait = std::min(UB - ScoreToWait, getWaitCountMax(T) - 1);
addWait(Wait, T, NeededWait);
}
}
@@ -772,7 +782,7 @@ void WaitcntBrackets::applyWaitcnt(const AMDGPU::Waitcnt &Wait) {
}
void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
- const uint32_t UB = getScoreUB(T);
+ const unsigned UB = getScoreUB(T);
if (Count >= UB)
return;
if (Count != 0) {
@@ -781,7 +791,6 @@ void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
setScoreLB(T, std::max(getScoreLB(T), UB - Count));
} else {
setScoreLB(T, UB);
- MixedPendingEvents[T] = false;
PendingEvents &= ~WaitEventMaskForInst[T];
}
}
@@ -792,7 +801,7 @@ bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const {
// Scalar memory read always can go out of order.
if (T == LGKM_CNT && hasPendingEvent(SMEM_ACCESS))
return true;
- return MixedPendingEvents[T];
+ return hasMixedPendingEvents(T);
}
INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
@@ -954,10 +963,10 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
int CallAddrOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
- RegInterval CallAddrOpInterval = ScoreBrackets.getRegInterval(
- &MI, TII, MRI, TRI, CallAddrOpIdx, false);
+ RegInterval CallAddrOpInterval =
+ ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, CallAddrOpIdx);
- for (signed RegNo = CallAddrOpInterval.first;
+ for (int RegNo = CallAddrOpInterval.first;
RegNo < CallAddrOpInterval.second; ++RegNo)
ScoreBrackets.determineWait(
LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
@@ -965,10 +974,10 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
int RtnAddrOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
if (RtnAddrOpIdx != -1) {
- RegInterval RtnAddrOpInterval = ScoreBrackets.getRegInterval(
- &MI, TII, MRI, TRI, RtnAddrOpIdx, false);
+ RegInterval RtnAddrOpInterval =
+ ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, RtnAddrOpIdx);
- for (signed RegNo = RtnAddrOpInterval.first;
+ for (int RegNo = RtnAddrOpInterval.first;
RegNo < RtnAddrOpInterval.second; ++RegNo)
ScoreBrackets.determineWait(
LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
@@ -982,7 +991,19 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
// emitted.
// If the source operand was defined by a load, add the s_waitcnt
// instruction.
+ //
+ // Two cases are handled for destination operands:
+ // 1) If the destination operand was defined by a load, add the s_waitcnt
+ // instruction to guarantee the right WAW order.
+ // 2) If a destination operand that was used by a recent export/store ins,
+ // add s_waitcnt on exp_cnt to guarantee the WAR order.
for (const MachineMemOperand *Memop : MI.memoperands()) {
+ const Value *Ptr = Memop->getValue();
+ if (Memop->isStore() && SLoadAddresses.count(Ptr)) {
+ addWait(Wait, LGKM_CNT, 0);
+ if (PDT->dominates(MI.getParent(), SLoadAddresses.find(Ptr)->second))
+ SLoadAddresses.erase(Ptr);
+ }
unsigned AS = Memop->getAddrSpace();
if (AS != AMDGPUAS::LOCAL_ADDRESS)
continue;
@@ -990,67 +1011,41 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
// VM_CNT is only relevant to vgpr or LDS.
ScoreBrackets.determineWait(
VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
- }
-
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &Op = MI.getOperand(I);
- const MachineRegisterInfo &MRIA = *MRI;
- RegInterval Interval =
- ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, false);
- for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
- if (TRI->isVGPR(MRIA, Op.getReg())) {
- // VM_CNT is only relevant to vgpr or LDS.
- ScoreBrackets.determineWait(
- VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
- }
- ScoreBrackets.determineWait(
- LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
- }
- }
- // End of for loop that looks at all source operands to decide vm_wait_cnt
- // and lgk_wait_cnt.
-
- // Two cases are handled for destination operands:
- // 1) If the destination operand was defined by a load, add the s_waitcnt
- // instruction to guarantee the right WAW order.
- // 2) If a destination operand that was used by a recent export/store ins,
- // add s_waitcnt on exp_cnt to guarantee the WAR order.
- if (MI.mayStore()) {
- // FIXME: Should not be relying on memoperands.
- for (const MachineMemOperand *Memop : MI.memoperands()) {
- const Value *Ptr = Memop->getValue();
- if (SLoadAddresses.count(Ptr)) {
- addWait(Wait, LGKM_CNT, 0);
- if (PDT->dominates(MI.getParent(),
- SLoadAddresses.find(Ptr)->second))
- SLoadAddresses.erase(Ptr);
- }
- unsigned AS = Memop->getAddrSpace();
- if (AS != AMDGPUAS::LOCAL_ADDRESS)
- continue;
- unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
- ScoreBrackets.determineWait(
- VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+ if (Memop->isStore()) {
ScoreBrackets.determineWait(
EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
}
}
+
+ // Loop over use and def operands.
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- MachineOperand &Def = MI.getOperand(I);
- const MachineRegisterInfo &MRIA = *MRI;
+ MachineOperand &Op = MI.getOperand(I);
+ if (!Op.isReg())
+ continue;
RegInterval Interval =
- ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, true);
- for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
- if (TRI->isVGPR(MRIA, Def.getReg())) {
- ScoreBrackets.determineWait(
- VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
- ScoreBrackets.determineWait(
- EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
+ ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I);
+ for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ if (TRI->isVGPR(*MRI, Op.getReg())) {
+ // RAW always needs an s_waitcnt. WAW needs an s_waitcnt unless the
+ // previous write and this write are the same type of VMEM
+ // instruction, in which case they're guaranteed to write their
+ // results in order anyway.
+ if (Op.isUse() || !SIInstrInfo::isVMEM(MI) ||
+ ScoreBrackets.hasOtherPendingVmemTypes(RegNo,
+ getVmemType(MI))) {
+ ScoreBrackets.determineWait(
+ VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+ ScoreBrackets.clearVgprVmemTypes(RegNo);
+ }
+ if (Op.isDef()) {
+ ScoreBrackets.determineWait(
+ EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
+ }
}
ScoreBrackets.determineWait(
LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
}
- } // End of for loop that looks at all dest operands.
+ }
}
}
@@ -1154,7 +1149,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
}
LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
- << "Old Instr: " << MI << '\n'
+ << "Old Instr: " << MI
<< "New Instr: " << *II << '\n');
if (!Wait.hasWait())
@@ -1171,7 +1166,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
Modified = true;
LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
- << "Old Instr: " << MI << '\n'
+ << "Old Instr: " << MI
<< "New Instr: " << *SWaitInst << '\n');
}
@@ -1187,7 +1182,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
Modified = true;
LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
- << "Old Instr: " << MI << '\n'
+ << "Old Instr: " << MI
<< "New Instr: " << *SWaitInst << '\n');
}
@@ -1303,10 +1298,10 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
}
}
-bool WaitcntBrackets::mergeScore(const MergeInfo &M, uint32_t &Score,
- uint32_t OtherScore) {
- uint32_t MyShifted = Score <= M.OldLB ? 0 : Score + M.MyShift;
- uint32_t OtherShifted =
+bool WaitcntBrackets::mergeScore(const MergeInfo &M, unsigned &Score,
+ unsigned OtherScore) {
+ unsigned MyShifted = Score <= M.OldLB ? 0 : Score + M.MyShift;
+ unsigned OtherShifted =
OtherScore <= M.OtherLB ? 0 : OtherScore + M.OtherShift;
Score = std::max(MyShifted, OtherShifted);
return OtherShifted > MyShifted;
@@ -1320,44 +1315,50 @@ bool WaitcntBrackets::mergeScore(const MergeInfo &M, uint32_t &Score,
bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
bool StrictDom = false;
+ VgprUB = std::max(VgprUB, Other.VgprUB);
+ SgprUB = std::max(SgprUB, Other.SgprUB);
+
for (auto T : inst_counter_types()) {
// Merge event flags for this counter
const bool OldOutOfOrder = counterOutOfOrder(T);
- const uint32_t OldEvents = PendingEvents & WaitEventMaskForInst[T];
- const uint32_t OtherEvents = Other.PendingEvents & WaitEventMaskForInst[T];
+ const unsigned OldEvents = PendingEvents & WaitEventMaskForInst[T];
+ const unsigned OtherEvents = Other.PendingEvents & WaitEventMaskForInst[T];
if (OtherEvents & ~OldEvents)
StrictDom = true;
- if (Other.MixedPendingEvents[T] ||
- (OldEvents && OtherEvents && OldEvents != OtherEvents))
- MixedPendingEvents[T] = true;
PendingEvents |= OtherEvents;
// Merge scores for this counter
- const uint32_t MyPending = ScoreUBs[T] - ScoreLBs[T];
- const uint32_t OtherPending = Other.ScoreUBs[T] - Other.ScoreLBs[T];
+ const unsigned MyPending = ScoreUBs[T] - ScoreLBs[T];
+ const unsigned OtherPending = Other.ScoreUBs[T] - Other.ScoreLBs[T];
+ const unsigned NewUB = ScoreLBs[T] + std::max(MyPending, OtherPending);
+ if (NewUB < ScoreLBs[T])
+ report_fatal_error("waitcnt score overflow");
+
MergeInfo M;
M.OldLB = ScoreLBs[T];
M.OtherLB = Other.ScoreLBs[T];
- M.MyShift = OtherPending > MyPending ? OtherPending - MyPending : 0;
- M.OtherShift = ScoreUBs[T] - Other.ScoreUBs[T] + M.MyShift;
+ M.MyShift = NewUB - ScoreUBs[T];
+ M.OtherShift = NewUB - Other.ScoreUBs[T];
- const uint32_t NewUB = ScoreUBs[T] + M.MyShift;
- if (NewUB < ScoreUBs[T])
- report_fatal_error("waitcnt score overflow");
ScoreUBs[T] = NewUB;
- ScoreLBs[T] = std::min(M.OldLB + M.MyShift, M.OtherLB + M.OtherShift);
StrictDom |= mergeScore(M, LastFlat[T], Other.LastFlat[T]);
bool RegStrictDom = false;
- for (int J = 0, E = std::max(getMaxVGPR(), Other.getMaxVGPR()) + 1; J != E;
- J++) {
+ for (int J = 0; J <= VgprUB; J++) {
RegStrictDom |= mergeScore(M, VgprScores[T][J], Other.VgprScores[T][J]);
}
+ if (T == VM_CNT) {
+ for (int J = 0; J <= VgprUB; J++) {
+ unsigned char NewVmemTypes = VgprVmemTypes[J] | Other.VgprVmemTypes[J];
+ RegStrictDom |= NewVmemTypes != VgprVmemTypes[J];
+ VgprVmemTypes[J] = NewVmemTypes;
+ }
+ }
+
if (T == LGKM_CNT) {
- for (int J = 0, E = std::max(getMaxSGPR(), Other.getMaxSGPR()) + 1;
- J != E; J++) {
+ for (int J = 0; J <= SgprUB; J++) {
RegStrictDom |= mergeScore(M, SgprScores[J], Other.SgprScores[J]);
}
}
@@ -1366,9 +1367,6 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
StrictDom = true;
}
- VgprUB = std::max(getMaxVGPR(), Other.getMaxVGPR());
- SgprUB = std::max(getMaxSGPR(), Other.getMaxSGPR());
-
return StrictDom;
}
@@ -1383,6 +1381,10 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
ScoreBrackets.dump();
});
+ // Assume VCCZ is correct at basic block boundaries, unless and until we need
+ // to handle cases where that is not true.
+ bool VCCZCorrect = true;
+
// Walk over the instructions.
MachineInstr *OldWaitcntInstr = nullptr;
@@ -1402,13 +1404,26 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
continue;
}
- bool VCCZBugWorkAround = false;
+ // We might need to restore vccz to its correct value for either of two
+ // different reasons; see ST->hasReadVCCZBug() and
+ // ST->partialVCCWritesUpdateVCCZ().
+ bool RestoreVCCZ = false;
if (readsVCCZ(Inst)) {
- if (ScoreBrackets.getScoreLB(LGKM_CNT) <
- ScoreBrackets.getScoreUB(LGKM_CNT) &&
- ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
- if (ST->hasReadVCCZBug())
- VCCZBugWorkAround = true;
+ if (!VCCZCorrect)
+ RestoreVCCZ = true;
+ else if (ST->hasReadVCCZBug()) {
+ // There is a hardware bug on CI/SI where SMRD instruction may corrupt
+ // vccz bit, so when we detect that an instruction may read from a
+ // corrupt vccz bit, we need to:
+ // 1. Insert s_waitcnt lgkm(0) to wait for all outstanding SMRD
+ // operations to complete.
+ // 2. Restore the correct value of vccz by writing the current value
+ // of vcc back to vcc.
+ if (ScoreBrackets.getScoreLB(LGKM_CNT) <
+ ScoreBrackets.getScoreUB(LGKM_CNT) &&
+ ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
+ RestoreVCCZ = true;
+ }
}
}
@@ -1419,6 +1434,16 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
}
}
+ if (!ST->partialVCCWritesUpdateVCCZ()) {
+ // Up to gfx9, writes to vcc_lo and vcc_hi don't update vccz.
+ // Writes to vcc will fix it.
+ if (Inst.definesRegister(AMDGPU::VCC_LO) ||
+ Inst.definesRegister(AMDGPU::VCC_HI))
+ VCCZCorrect = false;
+ else if (Inst.definesRegister(AMDGPU::VCC))
+ VCCZCorrect = true;
+ }
+
// Generate an s_waitcnt instruction to be placed before
// cur_Inst, if needed.
Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr);
@@ -1444,7 +1469,7 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
// TODO: Remove this work-around after fixing the scheduler and enable the
// assert above.
- if (VCCZBugWorkAround) {
+ if (RestoreVCCZ) {
// Restore the vccz bit. Any time a value is written to vcc, the vcc
// bit is updated, so we can restore the bit by reading the value of
// vcc and then writing it back to the register.
@@ -1452,6 +1477,7 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
TII->get(ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
TRI->getVCC())
.addReg(TRI->getVCC());
+ VCCZCorrect = true;
Modified = true;
}
@@ -1479,29 +1505,23 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
HardwareLimits.LgkmcntMax = AMDGPU::getLgkmcntBitMask(IV);
HardwareLimits.VscntMax = ST->hasVscnt() ? 63 : 0;
- HardwareLimits.NumVGPRsMax = ST->getAddressableNumVGPRs();
- HardwareLimits.NumSGPRsMax = ST->getAddressableNumSGPRs();
- assert(HardwareLimits.NumVGPRsMax <= SQ_MAX_PGM_VGPRS);
- assert(HardwareLimits.NumSGPRsMax <= SQ_MAX_PGM_SGPRS);
+ unsigned NumVGPRsMax = ST->getAddressableNumVGPRs();
+ unsigned NumSGPRsMax = ST->getAddressableNumSGPRs();
+ assert(NumVGPRsMax <= SQ_MAX_PGM_VGPRS);
+ assert(NumSGPRsMax <= SQ_MAX_PGM_SGPRS);
RegisterEncoding.VGPR0 = TRI->getEncodingValue(AMDGPU::VGPR0);
- RegisterEncoding.VGPRL =
- RegisterEncoding.VGPR0 + HardwareLimits.NumVGPRsMax - 1;
+ RegisterEncoding.VGPRL = RegisterEncoding.VGPR0 + NumVGPRsMax - 1;
RegisterEncoding.SGPR0 = TRI->getEncodingValue(AMDGPU::SGPR0);
- RegisterEncoding.SGPRL =
- RegisterEncoding.SGPR0 + HardwareLimits.NumSGPRsMax - 1;
+ RegisterEncoding.SGPRL = RegisterEncoding.SGPR0 + NumSGPRsMax - 1;
TrackedWaitcntSet.clear();
- RpotIdxMap.clear();
BlockInfos.clear();
// Keep iterating over the blocks in reverse post order, inserting and
// updating s_waitcnt where needed, until a fix point is reached.
- for (MachineBasicBlock *MBB :
- ReversePostOrderTraversal<MachineFunction *>(&MF)) {
- RpotIdxMap[MBB] = BlockInfos.size();
- BlockInfos.emplace_back(MBB);
- }
+ for (auto *MBB : ReversePostOrderTraversal<MachineFunction *>(&MF))
+ BlockInfos.insert({MBB, BlockInfo(MBB)});
std::unique_ptr<WaitcntBrackets> Brackets;
bool Modified = false;
@@ -1509,12 +1529,12 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
do {
Repeat = false;
- for (BlockInfo &BI : BlockInfos) {
+ for (auto BII = BlockInfos.begin(), BIE = BlockInfos.end(); BII != BIE;
+ ++BII) {
+ BlockInfo &BI = BII->second;
if (!BI.Dirty)
continue;
- unsigned Idx = std::distance(&*BlockInfos.begin(), &BI);
-
if (BI.Incoming) {
if (!Brackets)
Brackets = std::make_unique<WaitcntBrackets>(*BI.Incoming);
@@ -1524,7 +1544,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
if (!Brackets)
Brackets = std::make_unique<WaitcntBrackets>(ST);
else
- Brackets->clear();
+ *Brackets = WaitcntBrackets(ST);
}
Modified |= insertWaitcntInBlock(MF, *BI.MBB, *Brackets);
@@ -1533,11 +1553,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
if (Brackets->hasPending()) {
BlockInfo *MoveBracketsToSucc = nullptr;
for (MachineBasicBlock *Succ : BI.MBB->successors()) {
- unsigned SuccIdx = RpotIdxMap[Succ];
- BlockInfo &SuccBI = BlockInfos[SuccIdx];
+ auto SuccBII = BlockInfos.find(Succ);
+ BlockInfo &SuccBI = SuccBII->second;
if (!SuccBI.Incoming) {
SuccBI.Dirty = true;
- if (SuccIdx <= Idx)
+ if (SuccBII <= BII)
Repeat = true;
if (!MoveBracketsToSucc) {
MoveBracketsToSucc = &SuccBI;
@@ -1546,7 +1566,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
}
} else if (SuccBI.Incoming->merge(*Brackets)) {
SuccBI.Dirty = true;
- if (SuccIdx <= Idx)
+ if (SuccBII <= BII)
Repeat = true;
}
}
@@ -1612,13 +1632,15 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
// TODO: Could insert earlier and schedule more liberally with operations
// that only use caller preserved registers.
MachineBasicBlock &EntryBB = MF.front();
+ MachineBasicBlock::iterator I = EntryBB.begin();
+ for (MachineBasicBlock::iterator E = EntryBB.end();
+ I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
+ ;
+ BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
if (ST->hasVscnt())
- BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(),
- TII->get(AMDGPU::S_WAITCNT_VSCNT))
- .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
- .addImm(0);
- BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm(0);
+ BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
+ .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+ .addImm(0);
Modified = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 4dcbe92861f2..428c21c896d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -114,6 +114,9 @@ class InstSI <dag outs, dag ins, string asm = "",
// FLAT_SCRATCH segment. Must be 0 for non-FLAT instructions.
field bit IsNonFlatSeg = 0;
+ // Reads the mode register, usually for FP environment.
+ field bit ReadsModeReg = 0;
+
// This bit indicates that this uses the floating point double precision
// rounding mode flags
field bit FPDPRounding = 0;
@@ -303,7 +306,7 @@ class MIMGe_gfx10 <bits<8> op> : MIMGe {
bits<3> dim;
bits<2> nsa;
bits<1> dlc;
- bits<1> a16 = 0; // TODO: this should be an operand
+ bits<1> a16;
let Inst{0} = op{7};
let Inst{2-1} = nsa;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d53950ca4465..9af8ffedce0f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -63,6 +63,8 @@
using namespace llvm;
+#define DEBUG_TYPE "si-instr-info"
+
#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenInstrInfo.inc"
@@ -83,6 +85,12 @@ static cl::opt<unsigned>
BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
cl::desc("Restrict range of branch instructions (DEBUG)"));
+static cl::opt<bool> Fix16BitCopies(
+ "amdgpu-fix-16-bit-physreg-copies",
+ cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"),
+ cl::init(true),
+ cl::ReallyHidden);
+
SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
RI(ST), ST(ST) {
@@ -136,6 +144,8 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::V_ACCVGPR_READ_B32:
+ case AMDGPU::V_ACCVGPR_WRITE_B32:
// No implicit operands.
return MI.getNumOperands() == MI.getDesc().getNumOperands();
default:
@@ -258,43 +268,49 @@ static bool isStride64(unsigned Opc) {
}
}
-bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
- const MachineOperand *&BaseOp,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool SIInstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore())
return false;
unsigned Opc = LdSt.getOpcode();
+ OffsetIsScalable = false;
+ const MachineOperand *BaseOp, *OffsetOp;
+ int DataOpIdx;
if (isDS(LdSt)) {
- const MachineOperand *OffsetImm =
- getNamedOperand(LdSt, AMDGPU::OpName::offset);
- if (OffsetImm) {
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
+ OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset);
+ if (OffsetOp) {
// Normal, single offset LDS instruction.
- BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
- // TODO: ds_consume/ds_append use M0 for the base address. Is it safe to
- // report that here?
- if (!BaseOp || !BaseOp->isReg())
+ if (!BaseOp) {
+ // DS_CONSUME/DS_APPEND use M0 for the base address.
+ // TODO: find the implicit use operand for M0 and use that as BaseOp?
+ return false;
+ }
+ BaseOps.push_back(BaseOp);
+ Offset = OffsetOp->getImm();
+ // Get appropriate operand, and compute width accordingly.
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
+ if (DataOpIdx == -1)
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
+ Width = getOpSize(LdSt, DataOpIdx);
+ } else {
+ // The 2 offset instructions use offset0 and offset1 instead. We can treat
+ // these as a load with a single offset if the 2 offsets are consecutive.
+ // We will use this for some partially aligned loads.
+ const MachineOperand *Offset0Op =
+ getNamedOperand(LdSt, AMDGPU::OpName::offset0);
+ const MachineOperand *Offset1Op =
+ getNamedOperand(LdSt, AMDGPU::OpName::offset1);
+
+ unsigned Offset0 = Offset0Op->getImm();
+ unsigned Offset1 = Offset1Op->getImm();
+ if (Offset0 + 1 != Offset1)
return false;
- Offset = OffsetImm->getImm();
-
- return true;
- }
-
- // The 2 offset instructions use offset0 and offset1 instead. We can treat
- // these as a load with a single offset if the 2 offsets are consecutive. We
- // will use this for some partially aligned loads.
- const MachineOperand *Offset0Imm =
- getNamedOperand(LdSt, AMDGPU::OpName::offset0);
- const MachineOperand *Offset1Imm =
- getNamedOperand(LdSt, AMDGPU::OpName::offset1);
-
- uint8_t Offset0 = Offset0Imm->getImm();
- uint8_t Offset1 = Offset1Imm->getImm();
-
- if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
// Each of these offsets is in element sized units, so we need to convert
// to bytes of the individual reads.
@@ -310,16 +326,20 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
if (isStride64(Opc))
EltSize *= 64;
- BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
- if (!BaseOp->isReg())
- return false;
-
+ BaseOps.push_back(BaseOp);
Offset = EltSize * Offset0;
-
- return true;
+ // Get appropriate operand(s), and compute width accordingly.
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
+ if (DataOpIdx == -1) {
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
+ Width = getOpSize(LdSt, DataOpIdx);
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
+ Width += getOpSize(LdSt, DataOpIdx);
+ } else {
+ Width = getOpSize(LdSt, DataOpIdx);
+ }
}
-
- return false;
+ return true;
}
if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
@@ -339,59 +359,78 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
const MachineOperand *OffsetImm =
getNamedOperand(LdSt, AMDGPU::OpName::offset);
- BaseOp = SOffset;
+ BaseOps.push_back(RSrc);
+ BaseOps.push_back(SOffset);
Offset = OffsetImm->getImm();
- return true;
- }
-
- const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
- if (!AddrReg)
- return false;
+ } else {
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
+ if (!BaseOp) // e.g. BUFFER_WBINVL1_VOL
+ return false;
+ BaseOps.push_back(BaseOp);
- const MachineOperand *OffsetImm =
- getNamedOperand(LdSt, AMDGPU::OpName::offset);
- BaseOp = AddrReg;
- Offset = OffsetImm->getImm();
- if (SOffset) // soffset can be an inline immediate.
- Offset += SOffset->getImm();
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ if (BaseOp)
+ BaseOps.push_back(BaseOp);
- if (!BaseOp->isReg())
- return false;
+ const MachineOperand *OffsetImm =
+ getNamedOperand(LdSt, AMDGPU::OpName::offset);
+ Offset = OffsetImm->getImm();
+ if (SOffset) // soffset can be an inline immediate.
+ Offset += SOffset->getImm();
+ }
+ // Get appropriate operand, and compute width accordingly.
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
+ if (DataOpIdx == -1)
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
+ Width = getOpSize(LdSt, DataOpIdx);
+ return true;
+ }
+ if (isMIMG(LdSt)) {
+ int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+ BaseOps.push_back(&LdSt.getOperand(SRsrcIdx));
+ int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
+ if (VAddr0Idx >= 0) {
+ // GFX10 possible NSA encoding.
+ for (int I = VAddr0Idx; I < SRsrcIdx; ++I)
+ BaseOps.push_back(&LdSt.getOperand(I));
+ } else {
+ BaseOps.push_back(getNamedOperand(LdSt, AMDGPU::OpName::vaddr));
+ }
+ Offset = 0;
+ // Get appropriate operand, and compute width accordingly.
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
+ Width = getOpSize(LdSt, DataOpIdx);
return true;
}
if (isSMRD(LdSt)) {
- const MachineOperand *OffsetImm =
- getNamedOperand(LdSt, AMDGPU::OpName::offset);
- if (!OffsetImm)
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
+ if (!BaseOp) // e.g. S_MEMTIME
return false;
-
- const MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
- BaseOp = SBaseReg;
- Offset = OffsetImm->getImm();
- if (!BaseOp->isReg())
- return false;
-
+ BaseOps.push_back(BaseOp);
+ OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset);
+ Offset = OffsetOp ? OffsetOp->getImm() : 0;
+ // Get appropriate operand, and compute width accordingly.
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
+ Width = getOpSize(LdSt, DataOpIdx);
return true;
}
if (isFLAT(LdSt)) {
- const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
- if (VAddr) {
- // Can't analyze 2 offsets.
- if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
- return false;
-
- BaseOp = VAddr;
- } else {
- // scratch instructions have either vaddr or saddr.
- BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
- }
-
+ // Instructions have either vaddr or saddr or both.
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ if (BaseOp)
+ BaseOps.push_back(BaseOp);
+ BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
+ if (BaseOp)
+ BaseOps.push_back(BaseOp);
Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
- if (!BaseOp->isReg())
- return false;
+ // Get appropriate operand, and compute width accordingly.
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
+ if (DataOpIdx == -1)
+ DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
+ Width = getOpSize(LdSt, DataOpIdx);
return true;
}
@@ -399,15 +438,13 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
}
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
- const MachineOperand &BaseOp1,
+ ArrayRef<const MachineOperand *> BaseOps1,
const MachineInstr &MI2,
- const MachineOperand &BaseOp2) {
- // Support only base operands with base registers.
- // Note: this could be extended to support FI operands.
- if (!BaseOp1.isReg() || !BaseOp2.isReg())
- return false;
-
- if (BaseOp1.isIdenticalTo(BaseOp2))
+ ArrayRef<const MachineOperand *> BaseOps2) {
+ // Only examine the first "base" operand of each instruction, on the
+ // assumption that it represents the real base address of the memory access.
+ // Other operands are typically offsets or indices from this base address.
+ if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))
return true;
if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
@@ -433,62 +470,31 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
return Base1 == Base2;
}
-bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
- const MachineOperand &BaseOp2,
- unsigned NumLoads) const {
- const MachineInstr &FirstLdSt = *BaseOp1.getParent();
- const MachineInstr &SecondLdSt = *BaseOp2.getParent();
-
- if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOp1, SecondLdSt, BaseOp2))
- return false;
-
- const MachineOperand *FirstDst = nullptr;
- const MachineOperand *SecondDst = nullptr;
-
- if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
- (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
- (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
- const unsigned MaxGlobalLoadCluster = 6;
- if (NumLoads > MaxGlobalLoadCluster)
- return false;
-
- FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
- if (!FirstDst)
- FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
- SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
- if (!SecondDst)
- SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
- } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
- FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
- SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
- } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
- FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
- SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
- }
-
- if (!FirstDst || !SecondDst)
+bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ ArrayRef<const MachineOperand *> BaseOps2,
+ unsigned NumLoads,
+ unsigned NumBytes) const {
+ // If current mem ops pair do not have same base pointer, then they cannot be
+ // clustered.
+ assert(!BaseOps1.empty() && !BaseOps2.empty());
+ const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
+ const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
+ if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
return false;
- // Try to limit clustering based on the total number of bytes loaded
- // rather than the number of instructions. This is done to help reduce
- // register pressure. The method used is somewhat inexact, though,
- // because it assumes that all loads in the cluster will load the
- // same number of bytes as FirstLdSt.
-
- // The unit of this value is bytes.
- // FIXME: This needs finer tuning.
- unsigned LoadClusterThreshold = 16;
-
- const MachineRegisterInfo &MRI =
- FirstLdSt.getParent()->getParent()->getRegInfo();
-
- const Register Reg = FirstDst->getReg();
-
- const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg)
- ? MRI.getRegClass(Reg)
- : RI.getPhysRegClass(Reg);
-
- return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
+ // Compute max cluster size based on average number bytes clustered till now,
+ // and decide based on it, if current mem ops pair can be clustered or not.
+ assert((NumLoads > 0) && (NumBytes > 0) && (NumBytes >= NumLoads) &&
+ "Invalid NumLoads/NumBytes values");
+ unsigned MaxNumLoads;
+ if (NumBytes <= 4 * NumLoads) {
+ // Loads are dword or smaller (on average).
+ MaxNumLoads = 5;
+ } else {
+ // Loads are bigger than a dword (on average).
+ MaxNumLoads = 4;
+ }
+ return NumLoads <= MaxNumLoads;
}
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
@@ -516,11 +522,10 @@ bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
- MCRegister SrcReg, bool KillSrc) {
+ MCRegister SrcReg, bool KillSrc,
+ const char *Msg = "illegal SGPR to VGPR copy") {
MachineFunction *MF = MBB.getParent();
- DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
- "illegal SGPR to VGPR copy",
- DL, DS_Error);
+ DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error);
LLVMContext &C = MF->getFunction().getContext();
C.diagnose(IllegalCopy);
@@ -534,6 +539,25 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MCRegister SrcReg, bool KillSrc) const {
const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
+ // FIXME: This is hack to resolve copies between 16 bit and 32 bit
+ // registers until all patterns are fixed.
+ if (Fix16BitCopies &&
+ ((RI.getRegSizeInBits(*RC) == 16) ^
+ (RI.getRegSizeInBits(*RI.getPhysRegClass(SrcReg)) == 16))) {
+ MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg;
+ MCRegister Super = RI.get32BitRegister(RegToFix);
+ assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix);
+ RegToFix = Super;
+
+ if (DestReg == SrcReg) {
+ // Insert empty bundle since ExpandPostRA expects an instruction here.
+ BuildMI(MBB, MI, DL, get(AMDGPU::BUNDLE));
+ return;
+ }
+
+ RC = RI.getPhysRegClass(DestReg);
+ }
+
if (RC == &AMDGPU::VGPR_32RegClass) {
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
AMDGPU::SReg_32RegClass.contains(SrcReg) ||
@@ -580,6 +604,13 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (RC == &AMDGPU::SReg_64RegClass) {
+ if (SrcReg == AMDGPU::SCC) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B64), DestReg)
+ .addImm(1)
+ .addImm(0);
+ return;
+ }
+
if (DestReg == AMDGPU::VCC) {
if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
@@ -606,10 +637,18 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (DestReg == AMDGPU::SCC) {
+ // Copying 64-bit or 32-bit sources to SCC barely makes sense,
+ // but SelectionDAG emits such copies for i1 sources.
+ // TODO: Use S_BITCMP0_B32 instead and only consider the 0th bit.
+ if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
+ SrcReg = RI.getSubReg(SrcReg, AMDGPU::sub0);
+ }
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+
BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addImm(0);
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+
return;
}
@@ -660,7 +699,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Registers in the sequence are allocated contiguously so we can just
// use register number to pick one of three round-robin temps.
unsigned RegNo = DestReg % 3;
- unsigned Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+ Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
if (!Tmp)
report_fatal_error("Cannot scavenge VGPR to copy to AGPR");
RS.setRegUsed(Tmp);
@@ -685,6 +724,72 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ if (RI.getRegSizeInBits(*RC) == 16) {
+ assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
+ AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
+ AMDGPU::AGPR_LO16RegClass.contains(SrcReg));
+
+ bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
+ bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
+ bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
+ bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
+ bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) ||
+ AMDGPU::SReg_LO16RegClass.contains(DestReg) ||
+ AMDGPU::AGPR_LO16RegClass.contains(DestReg);
+ bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
+ AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
+ MCRegister NewDestReg = RI.get32BitRegister(DestReg);
+ MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
+
+ if (IsSGPRDst) {
+ if (!IsSGPRSrc) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
+ return;
+ }
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg)
+ .addReg(NewSrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (IsAGPRDst || IsAGPRSrc) {
+ if (!DstLow || !SrcLow) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc,
+ "Cannot use hi16 subreg with an AGPR!");
+ }
+
+ copyPhysReg(MBB, MI, DL, NewDestReg, NewSrcReg, KillSrc);
+ return;
+ }
+
+ if (IsSGPRSrc && !ST.hasSDWAScalar()) {
+ if (!DstLow || !SrcLow) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc,
+ "Cannot use hi16 subreg on VI!");
+ }
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg)
+ .addReg(NewSrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg)
+ .addImm(0) // src0_modifiers
+ .addReg(NewSrcReg)
+ .addImm(0) // clamp
+ .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0
+ : AMDGPU::SDWA::SdwaSel::WORD_1)
+ .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE)
+ .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0
+ : AMDGPU::SDWA::SdwaSel::WORD_1)
+ .addReg(NewDestReg, RegState::Implicit | RegState::Undef);
+ // First implicit operand is $exec.
+ MIB->tieOperands(0, MIB->getNumOperands() - 1);
+ return;
+ }
+
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
if (RI.isSGPRClass(RC)) {
@@ -806,7 +911,7 @@ void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
int64_t IdxValue = Idx == 0 ? Value : 0;
MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
- get(Opcode), RI.getSubReg(DestReg, Idx));
+ get(Opcode), RI.getSubReg(DestReg, SubIndices[Idx]));
Builder.addImm(IdxValue);
}
}
@@ -818,10 +923,10 @@ SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const {
void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DstReg,
+ const DebugLoc &DL, Register DstReg,
ArrayRef<MachineOperand> Cond,
- unsigned TrueReg,
- unsigned FalseReg) const {
+ Register TrueReg,
+ Register FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineFunction *MF = MBB.getParent();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
@@ -944,10 +1049,10 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
}
}
-unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
+Register SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL,
- unsigned SrcReg, int Value) const {
+ Register SrcReg, int Value) const {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Register Reg = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
@@ -957,10 +1062,10 @@ unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
return Reg;
}
-unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
+Register SIInstrInfo::insertNE(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL,
- unsigned SrcReg, int Value) const {
+ Register SrcReg, int Value) const {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Register Reg = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
@@ -984,6 +1089,80 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
return AMDGPU::COPY;
}
+static unsigned getIndirectVGPRWritePseudoOpc(unsigned VecSize) {
+ if (VecSize <= 32) // 4 bytes
+ return AMDGPU::V_INDIRECT_REG_WRITE_B32_V1;
+ if (VecSize <= 64) // 8 bytes
+ return AMDGPU::V_INDIRECT_REG_WRITE_B32_V2;
+ if (VecSize <= 96) // 12 bytes
+ return AMDGPU::V_INDIRECT_REG_WRITE_B32_V3;
+ if (VecSize <= 128) // 16 bytes
+ return AMDGPU::V_INDIRECT_REG_WRITE_B32_V4;
+ if (VecSize <= 160) // 20 bytes
+ return AMDGPU::V_INDIRECT_REG_WRITE_B32_V5;
+ if (VecSize <= 256) // 32 bytes
+ return AMDGPU::V_INDIRECT_REG_WRITE_B32_V8;
+ if (VecSize <= 512) // 64 bytes
+ return AMDGPU::V_INDIRECT_REG_WRITE_B32_V16;
+ if (VecSize <= 1024) // 128 bytes
+ return AMDGPU::V_INDIRECT_REG_WRITE_B32_V32;
+
+ llvm_unreachable("unsupported size for IndirectRegWrite pseudos");
+}
+
+static unsigned getIndirectSGPRWritePseudo32(unsigned VecSize) {
+ if (VecSize <= 32) // 4 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B32_V1;
+ if (VecSize <= 64) // 8 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B32_V2;
+ if (VecSize <= 96) // 12 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B32_V3;
+ if (VecSize <= 128) // 16 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B32_V4;
+ if (VecSize <= 160) // 20 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B32_V5;
+ if (VecSize <= 256) // 32 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B32_V8;
+ if (VecSize <= 512) // 64 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B32_V16;
+ if (VecSize <= 1024) // 128 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B32_V32;
+
+ llvm_unreachable("unsupported size for IndirectRegWrite pseudos");
+}
+
+static unsigned getIndirectSGPRWritePseudo64(unsigned VecSize) {
+ if (VecSize <= 64) // 8 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B64_V1;
+ if (VecSize <= 128) // 16 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B64_V2;
+ if (VecSize <= 256) // 32 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B64_V4;
+ if (VecSize <= 512) // 64 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B64_V8;
+ if (VecSize <= 1024) // 128 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_B64_V16;
+
+ llvm_unreachable("unsupported size for IndirectRegWrite pseudos");
+}
+
+const MCInstrDesc &SIInstrInfo::getIndirectRegWritePseudo(
+ unsigned VecSize, unsigned EltSize, bool IsSGPR) const {
+ if (IsSGPR) {
+ switch (EltSize) {
+ case 32:
+ return get(getIndirectSGPRWritePseudo32(VecSize));
+ case 64:
+ return get(getIndirectSGPRWritePseudo64(VecSize));
+ default:
+ llvm_unreachable("invalid reg indexing elt size");
+ }
+ }
+
+ assert(EltSize == 32 && "invalid reg indexing elt size");
+ return get(getIndirectVGPRWritePseudoOpc(VecSize));
+}
+
static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
switch (Size) {
case 4:
@@ -996,6 +1175,8 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_S128_SAVE;
case 20:
return AMDGPU::SI_SPILL_S160_SAVE;
+ case 24:
+ return AMDGPU::SI_SPILL_S192_SAVE;
case 32:
return AMDGPU::SI_SPILL_S256_SAVE;
case 64:
@@ -1019,6 +1200,8 @@ static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_V128_SAVE;
case 20:
return AMDGPU::SI_SPILL_V160_SAVE;
+ case 24:
+ return AMDGPU::SI_SPILL_V192_SAVE;
case 32:
return AMDGPU::SI_SPILL_V256_SAVE;
case 64:
@@ -1049,7 +1232,7 @@ static unsigned getAGPRSpillSaveOpcode(unsigned Size) {
void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill,
+ Register SrcReg, bool isKill,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
@@ -1058,18 +1241,18 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
const DebugLoc &DL = MBB.findDebugLoc(MI);
- unsigned Size = FrameInfo.getObjectSize(FrameIndex);
- unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
- MachineMemOperand *MMO
- = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- Size, Align);
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FrameIndex),
+ FrameInfo.getObjectAlign(FrameIndex));
unsigned SpillSize = TRI->getSpillSize(*RC);
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled");
+ assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
+ SrcReg != AMDGPU::EXEC && "exec should not be spilled");
// We are only allowed to create one new instruction when spilling
// registers, so we need to use pseudo instruction for spilling SGPRs.
@@ -1079,7 +1262,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
// to make sure we are using the correct register class.
if (Register::isVirtualRegister(SrcReg) && SpillSize == 4) {
MachineRegisterInfo &MRI = MF->getRegInfo();
- MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
+ MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
}
BuildMI(MBB, MI, DL, OpDesc)
@@ -1126,6 +1309,8 @@ static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_S128_RESTORE;
case 20:
return AMDGPU::SI_SPILL_S160_RESTORE;
+ case 24:
+ return AMDGPU::SI_SPILL_S192_RESTORE;
case 32:
return AMDGPU::SI_SPILL_S256_RESTORE;
case 64:
@@ -1149,6 +1334,8 @@ static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_V128_RESTORE;
case 20:
return AMDGPU::SI_SPILL_V160_RESTORE;
+ case 24:
+ return AMDGPU::SI_SPILL_V192_RESTORE;
case 32:
return AMDGPU::SI_SPILL_V256_RESTORE;
case 64:
@@ -1179,33 +1366,34 @@ static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
const DebugLoc &DL = MBB.findDebugLoc(MI);
- unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
- unsigned Size = FrameInfo.getObjectSize(FrameIndex);
unsigned SpillSize = TRI->getSpillSize(*RC);
MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
MachineMemOperand *MMO = MF->getMachineMemOperand(
- PtrInfo, MachineMemOperand::MOLoad, Size, Align);
+ PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FrameIndex),
+ FrameInfo.getObjectAlign(FrameIndex));
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into");
+ assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
+ DestReg != AMDGPU::EXEC && "exec should not be spilled");
// FIXME: Maybe this should not include a memoperand because it will be
// lowered to non-memory instructions.
const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
- if (Register::isVirtualRegister(DestReg) && SpillSize == 4) {
+ if (DestReg.isVirtual() && SpillSize == 4) {
MachineRegisterInfo &MRI = MF->getRegInfo();
- MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
+ MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
}
if (RI.spillSGPRToVGPR())
@@ -1244,7 +1432,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
unsigned WavefrontSize = ST.getWavefrontSize();
- unsigned TIDReg = MFI->getTIDReg();
+ Register TIDReg = MFI->getTIDReg();
if (!MFI->hasCalculatedTID()) {
MachineBasicBlock &Entry = MBB.getParent()->front();
MachineBasicBlock::iterator Insert = Entry.front();
@@ -1272,8 +1460,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
RS->enterBasicBlock(Entry);
// FIXME: Can we scavenge an SReg_64 and access the subregs?
- unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
- unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+ Register STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+ Register STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
.addReg(InputPtrReg)
.addImm(SI::KernelInputOffsets::NGROUPS_Z);
@@ -1482,30 +1670,55 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
break;
}
- case AMDGPU::V_MOVRELD_B32_V1:
- case AMDGPU::V_MOVRELD_B32_V2:
- case AMDGPU::V_MOVRELD_B32_V4:
- case AMDGPU::V_MOVRELD_B32_V8:
- case AMDGPU::V_MOVRELD_B32_V16: {
- const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
+ case AMDGPU::V_INDIRECT_REG_WRITE_B32_V1:
+ case AMDGPU::V_INDIRECT_REG_WRITE_B32_V2:
+ case AMDGPU::V_INDIRECT_REG_WRITE_B32_V3:
+ case AMDGPU::V_INDIRECT_REG_WRITE_B32_V4:
+ case AMDGPU::V_INDIRECT_REG_WRITE_B32_V5:
+ case AMDGPU::V_INDIRECT_REG_WRITE_B32_V8:
+ case AMDGPU::V_INDIRECT_REG_WRITE_B32_V16:
+ case AMDGPU::V_INDIRECT_REG_WRITE_B32_V32:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B32_V1:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B32_V2:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B32_V3:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B32_V4:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B32_V5:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B32_V8:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B32_V16:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B32_V32:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B64_V1:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B64_V2:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B64_V4:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B64_V8:
+ case AMDGPU::S_INDIRECT_REG_WRITE_B64_V16: {
+ const TargetRegisterClass *EltRC = getOpRegClass(MI, 2);
+
+ unsigned Opc;
+ if (RI.hasVGPRs(EltRC)) {
+ Opc = ST.useVGPRIndexMode() ?
+ AMDGPU::V_MOV_B32_indirect : AMDGPU::V_MOVRELD_B32_e32;
+ } else {
+ Opc = RI.getRegSizeInBits(*EltRC) == 64 ?
+ AMDGPU::S_MOVRELD_B64 : AMDGPU::S_MOVRELD_B32;
+ }
+
+ const MCInstrDesc &OpDesc = get(Opc);
Register VecReg = MI.getOperand(0).getReg();
bool IsUndef = MI.getOperand(1).isUndef();
- unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
+ unsigned SubReg = MI.getOperand(3).getImm();
assert(VecReg == MI.getOperand(1).getReg());
- MachineInstr *MovRel =
- BuildMI(MBB, MI, DL, MovRelDesc)
- .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
- .add(MI.getOperand(2))
- .addReg(VecReg, RegState::ImplicitDefine)
- .addReg(VecReg,
- RegState::Implicit | (IsUndef ? RegState::Undef : 0));
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, OpDesc)
+ .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
+ .add(MI.getOperand(2))
+ .addReg(VecReg, RegState::ImplicitDefine)
+ .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
const int ImpDefIdx =
- MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
+ OpDesc.getNumOperands() + OpDesc.getNumImplicitUses();
const int ImpUseIdx = ImpDefIdx + 1;
- MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
-
+ MIB->tieOperands(ImpDefIdx, ImpUseIdx);
MI.eraseFromParent();
break;
}
@@ -1549,22 +1762,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
break;
}
- case TargetOpcode::BUNDLE: {
- if (!MI.mayLoad() || MI.hasUnmodeledSideEffects())
- return false;
-
- // If it is a load it must be a memory clause
- for (MachineBasicBlock::instr_iterator I = MI.getIterator();
- I->isBundledWithSucc(); ++I) {
- I->unbundleFromSucc();
- for (MachineOperand &MO : I->operands())
- if (MO.isReg())
- MO.setIsInternalRead(false);
- }
-
- MI.eraseFromParent();
- break;
- }
}
return true;
}
@@ -1662,9 +1859,15 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
RegOp.ChangeToImmediate(NonRegOp.getImm());
else if (NonRegOp.isFI())
RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
- else
+ else if (NonRegOp.isGlobal()) {
+ RegOp.ChangeToGA(NonRegOp.getGlobal(), NonRegOp.getOffset(),
+ NonRegOp.getTargetFlags());
+ } else
return nullptr;
+ // Make sure we don't reinterpret a subreg index in the target flags.
+ RegOp.setTargetFlags(NonRegOp.getTargetFlags());
+
NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
NonRegOp.setSubReg(SubReg);
@@ -2085,6 +2288,7 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Copy the flags onto the implicit condition register operand.
preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
+ fixImplicitOperands(*CondBr);
if (BytesAdded)
*BytesAdded = 4;
@@ -2125,8 +2329,8 @@ bool SIInstrInfo::reverseBranchCondition(
bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles,
+ Register DstReg, Register TrueReg,
+ Register FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const {
switch (Cond[0].getImm()) {
case VCCNZ:
@@ -2165,8 +2369,8 @@ bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
- unsigned DstReg, ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg) const {
+ Register DstReg, ArrayRef<MachineOperand> Cond,
+ Register TrueReg, Register FalseReg) const {
BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
if (Pred == VCCZ || Pred == SCC_FALSE) {
Pred = static_cast<BranchPredicate>(-Pred);
@@ -2178,14 +2382,17 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
unsigned DstSize = RI.getRegSizeInBits(*DstRC);
if (DstSize == 32) {
- unsigned SelOp = Pred == SCC_TRUE ?
- AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
-
- // Instruction's operands are backwards from what is expected.
- MachineInstr *Select =
- BuildMI(MBB, I, DL, get(SelOp), DstReg)
- .addReg(FalseReg)
- .addReg(TrueReg);
+ MachineInstr *Select;
+ if (Pred == SCC_TRUE) {
+ Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B32), DstReg)
+ .addReg(TrueReg)
+ .addReg(FalseReg);
+ } else {
+ // Instruction's operands are backwards from what is expected.
+ Select = BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e32), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg);
+ }
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
return;
@@ -2194,8 +2401,8 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
if (DstSize == 64 && Pred == SCC_TRUE) {
MachineInstr *Select =
BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
- .addReg(FalseReg)
- .addReg(TrueReg);
+ .addReg(TrueReg)
+ .addReg(FalseReg);
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
return;
@@ -2239,17 +2446,26 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
I = MIB->getIterator();
- SmallVector<unsigned, 8> Regs;
+ SmallVector<Register, 8> Regs;
for (int Idx = 0; Idx != NElts; ++Idx) {
Register DstElt = MRI.createVirtualRegister(EltRC);
Regs.push_back(DstElt);
unsigned SubIdx = SubIndices[Idx];
- MachineInstr *Select =
- BuildMI(MBB, I, DL, get(SelOp), DstElt)
- .addReg(FalseReg, 0, SubIdx)
- .addReg(TrueReg, 0, SubIdx);
+ MachineInstr *Select;
+ if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
+ Select =
+ BuildMI(MBB, I, DL, get(SelOp), DstElt)
+ .addReg(FalseReg, 0, SubIdx)
+ .addReg(TrueReg, 0, SubIdx);
+ } else {
+ Select =
+ BuildMI(MBB, I, DL, get(SelOp), DstElt)
+ .addReg(TrueReg, 0, SubIdx)
+ .addReg(FalseReg, 0, SubIdx);
+ }
+
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
fixImplicitOperands(*Select);
@@ -2313,7 +2529,7 @@ static void removeModOperands(MachineInstr &MI) {
}
bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
- unsigned Reg, MachineRegisterInfo *MRI) const {
+ Register Reg, MachineRegisterInfo *MRI) const {
if (!MRI->hasOneNonDBGUse(Reg))
return false;
@@ -2339,15 +2555,40 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned Opc = UseMI.getOpcode();
if (Opc == AMDGPU::COPY) {
- bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
+ Register DstReg = UseMI.getOperand(0).getReg();
+ bool Is16Bit = getOpSize(UseMI, 0) == 2;
+ bool isVGPRCopy = RI.isVGPR(*MRI, DstReg);
unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
- if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) {
- if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32))
+ APInt Imm(32, ImmOp->getImm());
+
+ if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
+ Imm = Imm.ashr(16);
+
+ if (RI.isAGPR(*MRI, DstReg)) {
+ if (!isInlineConstant(Imm))
return false;
NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;
}
+
+ if (Is16Bit) {
+ if (isVGPRCopy)
+ return false; // Do not clobber vgpr_hi16
+
+ if (DstReg.isVirtual() &&
+ UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
+ return false;
+
+ UseMI.getOperand(0).setSubReg(0);
+ if (DstReg.isPhysical()) {
+ DstReg = RI.get32BitRegister(DstReg);
+ UseMI.getOperand(0).setReg(DstReg);
+ }
+ assert(UseMI.getOperand(1).getReg().isVirtual());
+ }
+
UseMI.setDesc(get(NewOpc));
- UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
+ UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
+ UseMI.getOperand(1).setTargetFlags(0);
UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
return true;
}
@@ -2517,6 +2758,18 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
return false;
}
+static bool
+memOpsHaveSameBaseOperands(ArrayRef<const MachineOperand *> BaseOps1,
+ ArrayRef<const MachineOperand *> BaseOps2) {
+ if (BaseOps1.size() != BaseOps2.size())
+ return false;
+ for (size_t I = 0, E = BaseOps1.size(); I < E; ++I) {
+ if (!BaseOps1[I]->isIdenticalTo(*BaseOps2[I]))
+ return false;
+ }
+ return true;
+}
+
static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
int WidthB, int OffsetB) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
@@ -2527,26 +2780,26 @@ static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
const MachineInstr &MIb) const {
- const MachineOperand *BaseOp0, *BaseOp1;
+ SmallVector<const MachineOperand *, 4> BaseOps0, BaseOps1;
int64_t Offset0, Offset1;
+ unsigned Dummy0, Dummy1;
+ bool Offset0IsScalable, Offset1IsScalable;
+ if (!getMemOperandsWithOffsetWidth(MIa, BaseOps0, Offset0, Offset0IsScalable,
+ Dummy0, &RI) ||
+ !getMemOperandsWithOffsetWidth(MIb, BaseOps1, Offset1, Offset1IsScalable,
+ Dummy1, &RI))
+ return false;
- if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) &&
- getMemOperandWithOffset(MIb, BaseOp1, Offset1, &RI)) {
- if (!BaseOp0->isIdenticalTo(*BaseOp1))
- return false;
+ if (!memOpsHaveSameBaseOperands(BaseOps0, BaseOps1))
+ return false;
- if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
- // FIXME: Handle ds_read2 / ds_write2.
- return false;
- }
- unsigned Width0 = (*MIa.memoperands_begin())->getSize();
- unsigned Width1 = (*MIb.memoperands_begin())->getSize();
- if (offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
- return true;
- }
+ if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) {
+ // FIXME: Handle ds_read2 / ds_write2.
+ return false;
}
-
- return false;
+ unsigned Width0 = MIa.memoperands().front()->getSize();
+ unsigned Width1 = MIb.memoperands().front()->getSize();
+ return offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1);
}
bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
@@ -2586,7 +2839,7 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
if (isSMRD(MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
- return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa);
+ return !isFLAT(MIb) && !isMUBUF(MIb) && !isMTBUF(MIb);
}
if (isFLAT(MIa)) {
@@ -2732,16 +2985,30 @@ static bool changesVGPRIndexingMode(const MachineInstr &MI) {
bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const {
- // XXX - Do we want the SP check in the base implementation?
+ // Skipping the check for SP writes in the base implementation. The reason it
+ // was added was apparently due to compile time concerns.
+ //
+ // TODO: Do we really want this barrier? It triggers unnecessary hazard nops
+ // but is probably avoidable.
+
+ // Copied from base implementation.
+ // Terminators and labels can't be scheduled around.
+ if (MI.isTerminator() || MI.isPosition())
+ return true;
+
+ // INLINEASM_BR can jump to another block
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+ return true;
// Target-independent instructions do not have an implicit-use of EXEC, even
// when they operate on VGPRs. Treating EXEC modifications as scheduling
// boundaries prevents incorrect movements of such instructions.
- return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) ||
- MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
+
+ // TODO: Don't treat setreg with known constant that only changes MODE as
+ // barrier.
+ return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
- MI.getOpcode() == AMDGPU::S_DENORM_MODE ||
changesVGPRIndexingMode(MI);
}
@@ -2755,6 +3022,20 @@ bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
Opcode == AMDGPU::DS_GWS_BARRIER;
}
+bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) {
+ // Skip the full operand and register alias search modifiesRegister
+ // does. There's only a handful of instructions that touch this, it's only an
+ // implicit def, and doesn't alias any other registers.
+ if (const MCPhysReg *ImpDef = MI.getDesc().getImplicitDefs()) {
+ for (; ImpDef && *ImpDef; ++ImpDef) {
+ if (*ImpDef == AMDGPU::MODE)
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
@@ -2780,6 +3061,10 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
if (MI.isCall() || MI.isInlineAsm())
return true; // conservative assumption
+ // A mode change is a scalar operation that influences vector instructions.
+ if (modifiesModeRegister(MI))
+ return true;
+
// These are like SALU instructions in terms of effects, so it's questionable
// whether we should return true for those.
//
@@ -2866,10 +3151,26 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
return AMDGPU::isInlinableLiteral64(MO.getImm(),
ST.hasInv2PiInlineImm());
case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ // We would expect inline immediates to not be concerned with an integer/fp
+ // distinction. However, in the case of 16-bit integer operations, the
+ // "floating point" values appear to not work. It seems read the low 16-bits
+ // of 32-bit immediates, which happens to always work for the integer
+ // values.
+ //
+ // See llvm bugzilla 46302.
+ //
+ // TODO: Theoretically we could use op-sel to use the high bits of the
+ // 32-bit FP values.
+ return AMDGPU::isInlinableIntLiteral(Imm);
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ // This suffers the same problem as the scalar 16-bit cases.
+ return AMDGPU::isInlinableIntLiteralV216(Imm);
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
// A few special case instructions have 16-bit operands on subtargets
@@ -2883,11 +3184,8 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
return false;
}
- case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
uint32_t Trunc = static_cast<uint32_t>(Imm);
return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
@@ -3056,7 +3354,8 @@ static void copyFlagsToImplicitVCC(MachineInstr &MI,
const MachineOperand &Orig) {
for (MachineOperand &Use : MI.implicit_operands()) {
- if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
+ if (Use.isUse() &&
+ (Use.getReg() == AMDGPU::VCC || Use.getReg() == AMDGPU::VCC_LO)) {
Use.setIsUndef(Orig.isUndef());
Use.setIsKill(Orig.isKill());
return;
@@ -3068,7 +3367,8 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
unsigned Op32) const {
MachineBasicBlock *MBB = MI.getParent();;
MachineInstrBuilder Inst32 =
- BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
+ BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32))
+ .setMIFlags(MI.getFlags());
// Add the dst operand if the 32-bit encoding also has an explicit $vdst.
// For VOPC instructions, this is replaced by an implicit def of vcc.
@@ -3138,7 +3438,7 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
}
}
-static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
+static Register findImplicitSGPRRead(const MachineInstr &MI) {
for (const MachineOperand &MO : MI.implicit_operands()) {
// We only care about reads.
if (MO.isDef())
@@ -3239,6 +3539,11 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
}
+ if (isMIMG(MI) && MI.memoperands_empty() && MI.mayLoadOrStore()) {
+ ErrInfo = "missing memory operand from MIMG instruction.";
+ return false;
+ }
+
// Make sure the register classes are correct.
for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
if (MI.getOperand(i).isFPImm()) {
@@ -3446,8 +3751,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
++ConstantBusCount;
- SmallVector<unsigned, 2> SGPRsUsed;
- unsigned SGPRUsed = findImplicitSGPRRead(MI);
+ SmallVector<Register, 2> SGPRsUsed;
+ Register SGPRUsed = findImplicitSGPRRead(MI);
if (SGPRUsed != AMDGPU::NoRegister) {
++ConstantBusCount;
SGPRsUsed.push_back(SGPRUsed);
@@ -3482,7 +3787,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
if (isVOP3(MI) && LiteralCount) {
- if (LiteralCount && !ST.hasVOP3Literal()) {
+ if (!ST.hasVOP3Literal()) {
ErrInfo = "VOP3 instruction uses literal";
return false;
}
@@ -3665,11 +3970,34 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
return false;
}
+ bool IsA16 = false;
+ if (ST.hasR128A16()) {
+ const MachineOperand *R128A16 = getNamedOperand(MI, AMDGPU::OpName::r128);
+ IsA16 = R128A16->getImm() != 0;
+ } else if (ST.hasGFX10A16()) {
+ const MachineOperand *A16 = getNamedOperand(MI, AMDGPU::OpName::a16);
+ IsA16 = A16->getImm() != 0;
+ }
+
+ bool PackDerivatives = IsA16 || BaseOpcode->G16;
bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
- unsigned AddrWords = BaseOpcode->NumExtraArgs +
- (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
- (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
- (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+
+ unsigned AddrWords = BaseOpcode->NumExtraArgs;
+ unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
+ (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+ if (IsA16)
+ AddrWords += (AddrComponents + 1) / 2;
+ else
+ AddrWords += AddrComponents;
+
+ if (BaseOpcode->Gradients) {
+ if (PackDerivatives)
+ // There are two gradients per coordinate, we pack them separately.
+ // For the 3d case, we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
+ AddrWords += (Dim->NumGradients / 2 + 1) / 2 * 2;
+ else
+ AddrWords += Dim->NumGradients;
+ }
unsigned VAddrWords;
if (IsNSA) {
@@ -3681,14 +4009,15 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
AddrWords = 16;
else if (AddrWords > 4)
AddrWords = 8;
- else if (AddrWords == 3 && VAddrWords == 4) {
- // CodeGen uses the V4 variant of instructions for three addresses,
- // because the selection DAG does not support non-power-of-two types.
+ else if (AddrWords == 4)
AddrWords = 4;
- }
+ else if (AddrWords == 3)
+ AddrWords = 3;
}
if (VAddrWords != AddrWords) {
+ LLVM_DEBUG(dbgs() << "bad vaddr size, expected " << AddrWords
+ << " but got " << VAddrWords << "\n");
ErrInfo = "bad vaddr size";
return false;
}
@@ -4217,7 +4546,7 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
}
}
-unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
+Register SIInstrInfo::readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
MachineRegisterInfo &MRI) const {
const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
@@ -5002,6 +5331,76 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
Inst.eraseFromParent();
continue;
+
+ // TODO: remove as soon as everything is ready
+ // to replace VGPR to SGPR copy with V_READFIRSTLANEs.
+ // S_ADD/SUB_CO_PSEUDO as well as S_UADDO/USUBO_PSEUDO
+ // can only be selected from the uniform SDNode.
+ case AMDGPU::S_ADD_CO_PSEUDO:
+ case AMDGPU::S_SUB_CO_PSEUDO: {
+ unsigned Opc = (Inst.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
+ ? AMDGPU::V_ADDC_U32_e64
+ : AMDGPU::V_SUBB_U32_e64;
+ const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+
+ Register CarryInReg = Inst.getOperand(4).getReg();
+ if (!MRI.constrainRegClass(CarryInReg, CarryRC)) {
+ Register NewCarryReg = MRI.createVirtualRegister(CarryRC);
+ BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), NewCarryReg)
+ .addReg(CarryInReg);
+ }
+
+ Register CarryOutReg = Inst.getOperand(1).getReg();
+
+ Register DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
+ MRI.getRegClass(Inst.getOperand(0).getReg())));
+ MachineInstr *CarryOp =
+ BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(Opc), DestReg)
+ .addReg(CarryOutReg, RegState::Define)
+ .add(Inst.getOperand(2))
+ .add(Inst.getOperand(3))
+ .addReg(CarryInReg)
+ .addImm(0);
+ legalizeOperands(*CarryOp);
+ MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg);
+ addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
+ Inst.eraseFromParent();
+ }
+ continue;
+ case AMDGPU::S_UADDO_PSEUDO:
+ case AMDGPU::S_USUBO_PSEUDO: {
+ const DebugLoc &DL = Inst.getDebugLoc();
+ MachineOperand &Dest0 = Inst.getOperand(0);
+ MachineOperand &Dest1 = Inst.getOperand(1);
+ MachineOperand &Src0 = Inst.getOperand(2);
+ MachineOperand &Src1 = Inst.getOperand(3);
+
+ unsigned Opc = (Inst.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
+ ? AMDGPU::V_ADD_I32_e64
+ : AMDGPU::V_SUB_I32_e64;
+ const TargetRegisterClass *NewRC =
+ RI.getEquivalentVGPRClass(MRI.getRegClass(Dest0.getReg()));
+ Register DestReg = MRI.createVirtualRegister(NewRC);
+ MachineInstr *NewInstr = BuildMI(*MBB, &Inst, DL, get(Opc), DestReg)
+ .addReg(Dest1.getReg(), RegState::Define)
+ .add(Src0)
+ .add(Src1)
+ .addImm(0); // clamp bit
+
+ legalizeOperands(*NewInstr, MDT);
+
+ MRI.replaceRegWith(Dest0.getReg(), DestReg);
+ addUsersToMoveToVALUWorklist(NewInstr->getOperand(0).getReg(), MRI,
+ Worklist);
+ Inst.eraseFromParent();
+ }
+ continue;
+
+ case AMDGPU::S_CSELECT_B32:
+ case AMDGPU::S_CSELECT_B64:
+ lowerSelect(Worklist, Inst, MDT);
+ Inst.eraseFromParent();
+ continue;
}
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
@@ -5142,6 +5541,78 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
return false;
}
+void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
+
+ MachineBasicBlock &MBB = *Inst.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ DebugLoc DL = Inst.getDebugLoc();
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MachineOperand &Src0 = Inst.getOperand(1);
+ MachineOperand &Src1 = Inst.getOperand(2);
+ MachineOperand &Cond = Inst.getOperand(3);
+
+ Register SCCSource = Cond.getReg();
+ // Find SCC def, and if that is a copy (SCC = COPY reg) then use reg instead.
+ if (!Cond.isUndef()) {
+ for (MachineInstr &CandI :
+ make_range(std::next(MachineBasicBlock::reverse_iterator(Inst)),
+ Inst.getParent()->rend())) {
+ if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) !=
+ -1) {
+ if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
+ SCCSource = CandI.getOperand(1).getReg();
+ }
+ break;
+ }
+ }
+ }
+
+ // If this is a trivial select where the condition is effectively not SCC
+ // (SCCSource is a source of copy to SCC), then the select is semantically
+ // equivalent to copying SCCSource. Hence, there is no need to create
+ // V_CNDMASK, we can just use that and bail out.
+ if ((SCCSource != AMDGPU::SCC) && Src0.isImm() && (Src0.getImm() == -1) &&
+ Src1.isImm() && (Src1.getImm() == 0)) {
+ MRI.replaceRegWith(Dest.getReg(), SCCSource);
+ return;
+ }
+
+ const TargetRegisterClass *TC = ST.getWavefrontSize() == 64
+ ? &AMDGPU::SReg_64_XEXECRegClass
+ : &AMDGPU::SReg_32_XM0_XEXECRegClass;
+ Register CopySCC = MRI.createVirtualRegister(TC);
+
+ if (SCCSource == AMDGPU::SCC) {
+ // Insert a trivial select instead of creating a copy, because a copy from
+ // SCC would semantically mean just copying a single bit, but we may need
+ // the result to be a vector condition mask that needs preserving.
+ unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64
+ : AMDGPU::S_CSELECT_B32;
+ auto NewSelect =
+ BuildMI(MBB, MII, DL, get(Opcode), CopySCC).addImm(-1).addImm(0);
+ NewSelect->getOperand(3).setIsUndef(Cond.isUndef());
+ } else {
+ BuildMI(MBB, MII, DL, get(AMDGPU::COPY), CopySCC).addReg(SCCSource);
+ }
+
+ Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ auto UpdatedInst =
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_CNDMASK_B32_e64), ResultReg)
+ .addImm(0)
+ .add(Src1) // False
+ .addImm(0)
+ .add(Src0) // True
+ .addReg(CopySCC);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ legalizeOperands(*UpdatedInst, MDT);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+}
+
void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
@@ -5623,7 +6094,7 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
}
void SIInstrInfo::addUsersToMoveToVALUWorklist(
- unsigned DstReg,
+ Register DstReg,
MachineRegisterInfo &MRI,
SetVectorType &Worklist) const {
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
@@ -5723,20 +6194,60 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
MachineInstr &SCCDefInst,
SetVectorType &Worklist) const {
+ bool SCCUsedImplicitly = false;
+
// Ensure that def inst defines SCC, which is still live.
assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() &&
!Op.isDead() && Op.getParent() == &SCCDefInst);
+ SmallVector<MachineInstr *, 4> CopyToDelete;
// This assumes that all the users of SCC are in the same block
// as the SCC def.
for (MachineInstr &MI : // Skip the def inst itself.
make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)),
SCCDefInst.getParent()->end())) {
// Check if SCC is used first.
- if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
- Worklist.insert(&MI);
+ if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1) {
+ if (MI.isCopy()) {
+ MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ unsigned DestReg = MI.getOperand(0).getReg();
+
+ for (auto &User : MRI.use_nodbg_instructions(DestReg)) {
+ if ((User.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) ||
+ (User.getOpcode() == AMDGPU::S_SUB_CO_PSEUDO)) {
+ User.getOperand(4).setReg(RI.getVCC());
+ Worklist.insert(&User);
+ } else if (User.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
+ User.getOperand(5).setReg(RI.getVCC());
+ // No need to add to Worklist.
+ }
+ }
+ CopyToDelete.push_back(&MI);
+ } else {
+ if (MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
+ MI.getOpcode() == AMDGPU::S_CSELECT_B64) {
+ // This is an implicit use of SCC and it is really expected by
+ // the SCC users to handle.
+ // We cannot preserve the edge to the user so add the explicit
+ // copy: SCC = COPY VCC.
+ // The copy will be cleaned up during the processing of the user
+ // in lowerSelect.
+ SCCUsedImplicitly = true;
+ }
+
+ Worklist.insert(&MI);
+ }
+ }
// Exit if we find another SCC def.
if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
- return;
+ break;
+ }
+ for (auto &Copy : CopyToDelete)
+ Copy->eraseFromParent();
+
+ if (SCCUsedImplicitly) {
+ BuildMI(*SCCDefInst.getParent(), std::next(SCCDefInst.getIterator()),
+ SCCDefInst.getDebugLoc(), get(AMDGPU::COPY), AMDGPU::SCC)
+ .addReg(RI.getVCC());
}
}
@@ -5789,7 +6300,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
}
// Find the one SGPR operand we are allowed to use.
-unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
+Register SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
int OpIndices[3]) const {
const MCInstrDesc &Desc = MI.getDesc();
@@ -5802,11 +6313,11 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI,
//
// If the operand's class is an SGPR, we can never move it.
- unsigned SGPRReg = findImplicitSGPRRead(MI);
+ Register SGPRReg = findImplicitSGPRRead(MI);
if (SGPRReg != AMDGPU::NoRegister)
return SGPRReg;
- unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
+ Register UsedSGPRs[3] = { AMDGPU::NoRegister };
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
for (unsigned i = 0; i < 3; ++i) {
@@ -5919,10 +6430,9 @@ bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const {
return isSMRD(Opc);
}
-bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const {
- unsigned Opc = MI.getOpcode();
-
- return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
+bool SIInstrInfo::isHighLatencyDef(int Opc) const {
+ return get(Opc).mayLoad() &&
+ (isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc) || isFLAT(Opc));
}
unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
@@ -6198,7 +6708,7 @@ MachineInstrBuilder
SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL,
- unsigned DestReg) const {
+ Register DestReg) const {
if (ST.hasAddNoCarry())
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
@@ -6608,20 +7118,24 @@ MachineInstr *SIInstrInfo::foldMemoryOperandImpl(
// %0 may even spill. We can't spill $m0 normally (it would require copying to
// a numbered SGPR anyway), and since it is in the SReg_32 register class,
// TargetInstrInfo::foldMemoryOperand() is going to try.
+ // A similar issue also exists with spilling and reloading $exec registers.
//
// To prevent that, constrain the %0 register class here.
if (MI.isFullCopy()) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
-
- if (DstReg == AMDGPU::M0 && SrcReg.isVirtual()) {
- MF.getRegInfo().constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
- return nullptr;
- }
-
- if (SrcReg == AMDGPU::M0 && DstReg.isVirtual()) {
- MF.getRegInfo().constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
- return nullptr;
+ if ((DstReg.isVirtual() || SrcReg.isVirtual()) &&
+ (DstReg.isVirtual() != SrcReg.isVirtual())) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register VirtReg = DstReg.isVirtual() ? DstReg : SrcReg;
+ const TargetRegisterClass *RC = MRI.getRegClass(VirtReg);
+ if (RC->hasSuperClassEq(&AMDGPU::SReg_32RegClass)) {
+ MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
+ return nullptr;
+ } else if (RC->hasSuperClassEq(&AMDGPU::SReg_64RegClass)) {
+ MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
+ return nullptr;
+ }
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index b151a94b0d11..53e2ffba0f65 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -84,6 +84,9 @@ private:
bool moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
MachineDominatorTree *MDT = nullptr) const;
+ void lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
+
void lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const;
@@ -119,7 +122,7 @@ private:
MachineRegisterInfo &MRI,
MachineInstr &Inst) const;
- void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI,
+ void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
SetVectorType &Worklist) const;
void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
@@ -132,7 +135,7 @@ private:
bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
const MachineInstr &MIb) const;
- unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
+ Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
protected:
bool swapSourceModifiers(MachineInstr &MI,
@@ -181,14 +184,15 @@ public:
int64_t &Offset1,
int64_t &Offset2) const override;
- bool getMemOperandWithOffset(const MachineInstr &LdSt,
- const MachineOperand *&BaseOp,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const final;
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt,
+ SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const final;
- bool shouldClusterMemOps(const MachineOperand &BaseOp1,
- const MachineOperand &BaseOp2,
- unsigned NumLoads) const override;
+ bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+ ArrayRef<const MachineOperand *> BaseOps2,
+ unsigned NumLoads, unsigned NumBytes) const override;
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
int64_t Offset1, unsigned NumLoads) const override;
@@ -210,22 +214,22 @@ public:
const TargetRegisterClass *getPreferredSelectRegClass(
unsigned Size) const;
- unsigned insertNE(MachineBasicBlock *MBB,
+ Register insertNE(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
- unsigned SrcReg, int Value) const;
+ Register SrcReg, int Value) const;
- unsigned insertEQ(MachineBasicBlock *MBB,
+ Register insertEQ(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
- unsigned SrcReg, int Value) const;
+ Register SrcReg, int Value) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned SrcReg,
+ MachineBasicBlock::iterator MI, Register SrcReg,
bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned DestReg,
+ MachineBasicBlock::iterator MI, Register DestReg,
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
@@ -244,6 +248,9 @@ public:
// DstRC, then AMDGPU::COPY is returned.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
+ const MCInstrDesc &getIndirectRegWritePseudo(
+ unsigned VecSize, unsigned EltSize, bool IsSGPR) const;
+
LLVM_READONLY
int commuteOpcode(unsigned Opc) const;
@@ -293,20 +300,19 @@ public:
SmallVectorImpl<MachineOperand> &Cond) const override;
bool canInsertSelect(const MachineBasicBlock &MBB,
- ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles,
+ ArrayRef<MachineOperand> Cond, Register DstReg,
+ Register TrueReg, Register FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const override;
void insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
- unsigned DstReg, ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg) const override;
+ Register DstReg, ArrayRef<MachineOperand> Cond,
+ Register TrueReg, Register FalseReg) const override;
void insertVectorSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
- unsigned DstReg, ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg) const;
+ Register DstReg, ArrayRef<MachineOperand> Cond,
+ Register TrueReg, Register FalseReg) const;
unsigned getAddressSpaceForPseudoSourceKind(
unsigned Kind) const override;
@@ -317,7 +323,7 @@ public:
bool isFoldableCopy(const MachineInstr &MI) const;
- bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const final;
unsigned getMachineCSELookAheadLimit() const override { return 500; }
@@ -685,6 +691,9 @@ public:
return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
}
+ /// Return true if the instruction modifies the mode register.q
+ static bool modifiesModeRegister(const MachineInstr &MI);
+
/// Whether we must prevent this instruction from executing with EXEC = 0.
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
@@ -824,11 +833,7 @@ public:
const MachineOperand &MO = MI.getOperand(OpNo);
if (MO.isReg()) {
if (unsigned SubReg = MO.getSubReg()) {
- assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(
- MI.getParent()->getParent()->getRegInfo().
- getRegClass(MO.getReg()), SubReg)) >= 32 &&
- "Sub-dword subregs are not supported");
- return RI.getSubRegIndexLaneMask(SubReg).getNumLanes() * 4;
+ return RI.getSubRegIdxSize(SubReg) / 8;
}
}
return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
@@ -874,7 +879,7 @@ public:
/// be used when it is know that the value in SrcReg is same across all
/// threads in the wave.
/// \returns The SGPR register that \p SrcReg was copied to.
- unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
+ Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
MachineRegisterInfo &MRI) const;
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
@@ -928,7 +933,7 @@ public:
uint64_t getScratchRsrcWords23() const;
bool isLowLatencyInstruction(const MachineInstr &MI) const;
- bool isHighLatencyInstruction(const MachineInstr &MI) const;
+ bool isHighLatencyDef(int Opc) const override;
/// Return the descriptor of the target-specific machine instruction
/// that corresponds to the specified pseudo or native opcode.
@@ -995,7 +1000,7 @@ public:
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL,
- unsigned DestReg) const;
+ Register DestReg) const;
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 85e8d0582dcd..7aee52f91360 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -7,11 +7,9 @@
//===----------------------------------------------------------------------===//
def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
- AssemblerPredicate <"FeatureWavefrontSize32">;
+ AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
- AssemblerPredicate <"FeatureWavefrontSize64">;
-
-def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
+ AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
class GCNPredicateControl : PredicateControl {
Predicate SIAssemblerPredicate = isGFX6GFX7;
@@ -30,6 +28,7 @@ def SIEncodingFamily {
int GFX9 = 5;
int GFX10 = 6;
int SDWA10 = 7;
+ int GFX10_B = 8;
}
//===----------------------------------------------------------------------===//
@@ -39,8 +38,7 @@ def SIEncodingFamily {
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
- SDTypeProfile<1, 4, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>,
- SDTCisVT<4, i1>]>,
+ SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
[SDNPMayLoad, SDNPMemOperand]
>;
@@ -57,6 +55,10 @@ def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
+def SIatomic_csub : SDNode<"AMDGPUISD::ATOMIC_LOAD_CSUB", SDTAtomic2,
+ [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
+>;
+
def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
]>;
@@ -200,6 +202,7 @@ def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
+def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>;
def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>;
@@ -267,7 +270,7 @@ def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
- [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
>;
//===----------------------------------------------------------------------===//
@@ -308,6 +311,10 @@ class isPackedType<ValueType SrcVT> {
// PatFrags for global memory operations
//===----------------------------------------------------------------------===//
+let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_global").AddrSpaces in {
+defm atomic_csub_global : binary_atomic_op<SIatomic_csub>;
+}
+
foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
@@ -631,6 +638,16 @@ def add_ctpop : PatFrag <
(add (ctpop $src0), $src1)
>;
+foreach I = 1-4 in {
+def shl#I#_add : PatFrag <
+ (ops node:$src0, node:$src1),
+ (add (shl_oneuse $src0, (i32 I)), $src1)> {
+ // FIXME: Poor substitute for disabling pattern in SelectionDAG
+ let PredicateCode = [{return false;}];
+ let GISelPredicateCode = [{return true;}];
+}
+}
+
multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
SDTypeProfile tc = SDTAtomic2,
bit IsInt = 1> {
@@ -651,6 +668,7 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
+defm atomic_load_csub : SIAtomicM0Glue2 <"LOAD_CSUB", 1>;
defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>;
defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>;
defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
@@ -665,7 +683,7 @@ defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>;
defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>;
-def as_i1imm : SDNodeXForm<imm, [{
+def as_i1timm : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
}]>;
@@ -673,6 +691,10 @@ def as_i8imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
}]>;
+def as_i8timm : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
+}]>;
+
def as_i16imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
}]>;
@@ -766,7 +788,7 @@ def NegSubInlineConst32 : ImmLeaf<i32, [{
return Imm < -16 && Imm >= -64;
}], NegateImm>;
-def NegSubInlineConst16 : ImmLeaf<i16, [{
+def NegSubInlineIntConst16 : ImmLeaf<i16, [{
return Imm < -16 && Imm >= -64;
}], NegateImm>;
@@ -791,6 +813,26 @@ def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
}], getNegV2I16Imm>;
//===----------------------------------------------------------------------===//
+// MUBUF/SMEM Patterns
+//===----------------------------------------------------------------------===//
+
+def extract_glc : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
+}]>;
+
+def extract_slc : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
+}]>;
+
+def extract_dlc : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
+}]>;
+
+def extract_swz : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
+}]>;
+
+//===----------------------------------------------------------------------===//
// Custom Operands
//===----------------------------------------------------------------------===//
@@ -935,7 +977,7 @@ def VOPDstS64orS32 : BoolRC {
}
// SCSrc_i1 is the operand for pseudo instructions only.
-// Boolean immeadiates shall not be exposed to codegen instructions.
+// Boolean immediates shall not be exposed to codegen instructions.
def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_REG_IMM_INT32";
@@ -1067,6 +1109,7 @@ def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
+def GFX10A16 : NamedOperandBit<"GFX10A16", NamedMatchClass<"GFX10A16">>;
def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
@@ -1099,9 +1142,9 @@ def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
-def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
+def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
-def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
+def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
}
@@ -1274,19 +1317,14 @@ def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
// VOP3Mods, but the input source is known to never be NaN.
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
-// VOP3Mods, but only allowed for f32 operands.
-def VOP3Mods_f32 : ComplexPattern<fAny, 2, "SelectVOP3Mods_f32">;
def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
-def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
-def VOP3OpSel0 : ComplexPattern<untyped, 3, "SelectVOP3OpSel0">;
def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
-def VOP3OpSelMods0 : ComplexPattern<untyped, 3, "SelectVOP3OpSelMods0">;
def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
@@ -1347,6 +1385,7 @@ def HWREG {
int FLAT_SCR_HI = 21;
int XNACK_MASK = 22;
int POPS_PACKER = 25;
+ int SHADER_CYCLES = 29;
}
class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
@@ -1380,24 +1419,21 @@ class SIMCInstr <string pseudo, int subtarget> {
// EXP classes
//===----------------------------------------------------------------------===//
-class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
+class EXP_Helper<bit done> : EXPCommon<
(outs),
(ins exp_tgt:$tgt,
ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
- exp_vm:$vm, exp_compr:$compr, i8imm:$en),
- "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
- [(node (i8 timm:$tgt), (i8 timm:$en),
- f32:$src0, f32:$src1, f32:$src2, f32:$src3,
- (i1 timm:$compr), (i1 timm:$vm))]> {
+ exp_vm:$vm, exp_compr:$compr, i32imm:$en),
+ "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", []> {
let AsmMatchConverter = "cvtExp";
}
// Split EXP instruction into EXP and EXP_DONE so we can set
// mayLoad for done=1.
-multiclass EXP_m<bit done, SDPatternOperator node> {
+multiclass EXP_m<bit done> {
let mayLoad = done, DisableWQM = 1 in {
let isPseudo = 1, isCodeGenOnly = 1 in {
- def "" : EXP_Helper<done, node>,
+ def "" : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
}
@@ -1685,7 +1721,7 @@ class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
!if (HasClamp,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp,
+ clampmod0:$clamp,
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
neg_lo:$neg_lo, neg_hi:$neg_hi),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
@@ -1697,7 +1733,7 @@ class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp,
+ clampmod0:$clamp,
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
neg_lo:$neg_lo, neg_hi:$neg_hi),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
@@ -1720,7 +1756,7 @@ class getInsVOP3OpSel <RegisterOperand Src0RC,
!if (HasClamp,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp,
+ clampmod0:$clamp,
op_sel:$op_sel),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
@@ -1730,7 +1766,7 @@ class getInsVOP3OpSel <RegisterOperand Src0RC,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp,
+ clampmod0:$clamp,
op_sel:$op_sel),
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
Src1Mod:$src1_modifiers, Src1RC:$src1,
@@ -2242,6 +2278,7 @@ def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
+def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], 0, /*EnableClamp=*/1>;
def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
@@ -2455,7 +2492,8 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.GFX80)],
[!cast<string>(SIEncodingFamily.GFX9)],
[!cast<string>(SIEncodingFamily.GFX10)],
- [!cast<string>(SIEncodingFamily.SDWA10)]];
+ [!cast<string>(SIEncodingFamily.SDWA10)],
+ [!cast<string>(SIEncodingFamily.GFX10_B)]];
}
// Get equivalent SOPK instruction.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
index d84720f820ee..0c4c9e0e9df2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1,4 +1,4 @@
-//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
+//===-- SIInstructions.td - SI Instruction Definitions --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -24,8 +24,38 @@ include "BUFInstructions.td"
// EXP Instructions
//===----------------------------------------------------------------------===//
-defm EXP : EXP_m<0, AMDGPUexport>;
-defm EXP_DONE : EXP_m<1, AMDGPUexport_done>;
+defm EXP : EXP_m<0>;
+defm EXP_DONE : EXP_m<1>;
+
+class ExpPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
+ (int_amdgcn_exp timm:$tgt, timm:$en,
+ (vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
+ (vt ExpSrc2:$src2), (vt ExpSrc3:$src3),
+ done_val, timm:$vm),
+ (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
+ ExpSrc2:$src2, ExpSrc3:$src3, timm:$vm, 0, timm:$en)
+>;
+
+class ExpComprPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
+ (int_amdgcn_exp_compr timm:$tgt, timm:$en,
+ (vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
+ done_val, timm:$vm),
+ (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
+ (IMPLICIT_DEF), (IMPLICIT_DEF), timm:$vm, 1, timm:$en)
+>;
+
+// FIXME: The generated DAG matcher seems to have strange behavior
+// with a 1-bit literal to match, so use a -1 for checking a true
+// 1-bit value.
+def : ExpPattern<i32, EXP, 0>;
+def : ExpPattern<i32, EXP_DONE, -1>;
+def : ExpPattern<f32, EXP, 0>;
+def : ExpPattern<f32, EXP_DONE, -1>;
+
+def : ExpComprPattern<v2i16, EXP, 0>;
+def : ExpComprPattern<v2i16, EXP_DONE, -1>;
+def : ExpComprPattern<v2f16, EXP, 0>;
+def : ExpComprPattern<v2f16, EXP_DONE, -1>;
//===----------------------------------------------------------------------===//
// VINTRP Instructions
@@ -34,9 +64,9 @@ defm EXP_DONE : EXP_m<1, AMDGPUexport_done>;
// Used to inject printing of "_e32" suffix for VI (there are "_e64" variants for VI)
def VINTRPDst : VINTRPDstOperand <VGPR_32>;
-let Uses = [M0, EXEC] in {
+let Uses = [MODE, M0, EXEC] in {
-// FIXME: Specify SchedRW for VINTRP insturctions.
+// FIXME: Specify SchedRW for VINTRP instructions.
multiclass V_INTERP_P1_F32_m : VINTRP_m <
0x00000000,
@@ -76,10 +106,10 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
- [(set f32:$vdst, (int_amdgcn_interp_mov (i32 imm:$vsrc),
+ [(set f32:$vdst, (int_amdgcn_interp_mov (i32 timm:$vsrc),
(i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
-} // End Uses = [M0, EXEC]
+} // End Uses = [MODE, M0, EXEC]
//===----------------------------------------------------------------------===//
// Pseudo Instructions
@@ -136,7 +166,8 @@ def WWM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
def ENTER_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> {
- let Defs = [EXEC];
+ let Uses = [EXEC];
+ let Defs = [EXEC, SCC];
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
@@ -162,16 +193,27 @@ def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst),
let Constraints = "$src = $vdst";
}
+let usesCustomInserter = 1, Defs = [VCC, EXEC] in {
+def V_ADD_U64_PSEUDO : VPseudoInstSI <
+ (outs VReg_64:$vdst), (ins VSrc_b64:$src0, VSrc_b64:$src1),
+ [(set VReg_64:$vdst, (getDivergentFrag<add>.ret i64:$src0, i64:$src1))]
+>;
+
+def V_SUB_U64_PSEUDO : VPseudoInstSI <
+ (outs VReg_64:$vdst), (ins VSrc_b64:$src0, VSrc_b64:$src1),
+ [(set VReg_64:$vdst, (getDivergentFrag<sub>.ret i64:$src0, i64:$src1))]
+>;
+} // End usesCustomInserter = 1, Defs = [VCC, EXEC]
let usesCustomInserter = 1, Defs = [SCC] in {
def S_ADD_U64_PSEUDO : SPseudoInstSI <
- (outs SReg_64:$vdst), (ins SSrc_b64:$src0, SSrc_b64:$src1),
- [(set SReg_64:$vdst, (add i64:$src0, i64:$src1))]
+ (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1),
+ [(set SReg_64:$sdst, (UniformBinFrag<add> i64:$src0, i64:$src1))]
>;
def S_SUB_U64_PSEUDO : SPseudoInstSI <
- (outs SReg_64:$vdst), (ins SSrc_b64:$src0, SSrc_b64:$src1),
- [(set SReg_64:$vdst, (sub i64:$src0, i64:$src1))]
+ (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1),
+ [(set SReg_64:$sdst, (UniformBinFrag<sub> i64:$src0, i64:$src1))]
>;
def S_ADD_U64_CO_PSEUDO : SPseudoInstSI <
@@ -181,6 +223,23 @@ def S_ADD_U64_CO_PSEUDO : SPseudoInstSI <
def S_SUB_U64_CO_PSEUDO : SPseudoInstSI <
(outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
>;
+
+def S_ADD_CO_PSEUDO : SPseudoInstSI <
+ (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in)
+>;
+
+def S_SUB_CO_PSEUDO : SPseudoInstSI <
+ (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1, SSrc_i1:$scc_in)
+>;
+
+def S_UADDO_PSEUDO : SPseudoInstSI <
+ (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1)
+>;
+
+def S_USUBO_PSEUDO : SPseudoInstSI <
+ (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1)
+>;
+
} // End usesCustomInserter = 1, Defs = [SCC]
let usesCustomInserter = 1 in {
@@ -199,6 +258,7 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
let hasSideEffects = base_inst.hasSideEffects;
let UseNamedOperandTable = base_inst.UseNamedOperandTable;
let CodeSize = base_inst.CodeSize;
+ let SchedRW = base_inst.SchedRW;
}
let WaveSizePredicate = isWave64 in {
@@ -214,13 +274,14 @@ def S_OR_B32_term : WrapTerminatorInst<S_OR_B32>;
def S_ANDN2_B32_term : WrapTerminatorInst<S_ANDN2_B32>;
}
+
def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
[(int_amdgcn_wave_barrier)]> {
let SchedRW = [];
let hasNoSchedulingInfo = 1;
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
let isConvergent = 1;
let FixedSize = 1;
let Size = 0;
@@ -318,6 +379,9 @@ multiclass PseudoInstKill <dag ins> {
defm SI_KILL_I1 : PseudoInstKill <(ins SCSrc_i1:$src, i1imm:$killvalue)>;
defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>;
+let Defs = [EXEC] in
+def SI_KILL_CLEANUP : SPseudoInstSI <(outs), (ins)>;
+
let Defs = [EXEC,VCC] in
def SI_ILLEGAL_COPY : SPseudoInstSI <
(outs unknown:$dst), (ins unknown:$src),
@@ -386,7 +450,7 @@ def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
def : GCNPat <
(int_amdgcn_init_exec timm:$src),
- (SI_INIT_EXEC_LO (as_i32imm imm:$src))> {
+ (SI_INIT_EXEC_LO (as_i32timm timm:$src))> {
let WaveSizePredicate = isWave32;
}
@@ -413,8 +477,8 @@ def SI_RETURN : SPseudoInstSI <
// Return for returning function calls without output register.
//
-// This version is only needed so we can fill in the output regiter in
-// the custom inserter.
+// This version is only needed so we can fill in the output register
+// in the custom inserter.
def SI_CALL_ISEL : SPseudoInstSI <
(outs), (ins SSrc_b64:$src0, unknown:$callee),
[(AMDGPUcall i64:$src0, tglobaladdr:$callee)]> {
@@ -426,6 +490,11 @@ def SI_CALL_ISEL : SPseudoInstSI <
let isConvergent = 1;
}
+def : GCNPat<
+ (AMDGPUcall i64:$src0, (i64 0)),
+ (SI_CALL_ISEL $src0, (i64 0))
+>;
+
// Wrapper around s_swappc_b64 with extra $callee parameter to track
// the called function after regalloc.
def SI_CALL : SPseudoInstSI <
@@ -480,6 +549,8 @@ def ADJCALLSTACKDOWN : SPseudoInstSI<
let Defs = [M0, EXEC, SCC],
UseNamedOperandTable = 1 in {
+// SI_INDIRECT_SRC/DST are only used by legacy SelectionDAG indirect
+// addressing implementation.
class SI_INDIRECT_SRC<RegisterClass rc> : VPseudoInstSI <
(outs VGPR_32:$vdst),
(ins rc:$src, VS_32:$idx, i32imm:$offset)> {
@@ -493,21 +564,81 @@ class SI_INDIRECT_DST<RegisterClass rc> : VPseudoInstSI <
let usesCustomInserter = 1;
}
-// TODO: We can support indirect SGPR access.
def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;
def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;
def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;
def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC<VReg_256>;
def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC<VReg_512>;
+def SI_INDIRECT_SRC_V32 : SI_INDIRECT_SRC<VReg_1024>;
def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
+def SI_INDIRECT_DST_V32 : SI_INDIRECT_DST<VReg_1024>;
} // End Uses = [EXEC], Defs = [M0, EXEC]
+
+// This is a pseudo variant of the v_movreld_b32 (or v_mov_b32
+// expecting to be executed with gpr indexing mode enabled)
+// instruction in which the vector operand appears only twice, once as
+// def and once as use. Using this pseudo avoids problems with the Two
+// Address instructions pass.
+class INDIRECT_REG_WRITE_pseudo<RegisterClass rc,
+ RegisterOperand val_ty> : PseudoInstSI <
+ (outs rc:$vdst), (ins rc:$vsrc, val_ty:$val, i32imm:$subreg)> {
+ let Constraints = "$vsrc = $vdst";
+ let Uses = [M0];
+}
+
+class V_INDIRECT_REG_WRITE_B32_pseudo<RegisterClass rc> :
+ INDIRECT_REG_WRITE_pseudo<rc, VSrc_b32> {
+ let VALU = 1;
+ let VOP1 = 1;
+ let Uses = [M0, EXEC];
+}
+
+class S_INDIRECT_REG_WRITE_pseudo<RegisterClass rc,
+ RegisterOperand val_ty> :
+ INDIRECT_REG_WRITE_pseudo<rc, val_ty> {
+ let SALU = 1;
+ let SOP1 = 1;
+ let Uses = [M0];
+}
+
+class S_INDIRECT_REG_WRITE_B32_pseudo<RegisterClass rc> :
+ S_INDIRECT_REG_WRITE_pseudo<rc, SSrc_b32>;
+class S_INDIRECT_REG_WRITE_B64_pseudo<RegisterClass rc> :
+ S_INDIRECT_REG_WRITE_pseudo<rc, SSrc_b64>;
+
+
+def V_INDIRECT_REG_WRITE_B32_V1 : V_INDIRECT_REG_WRITE_B32_pseudo<VGPR_32>;
+def V_INDIRECT_REG_WRITE_B32_V2 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_64>;
+def V_INDIRECT_REG_WRITE_B32_V3 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_96>;
+def V_INDIRECT_REG_WRITE_B32_V4 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_128>;
+def V_INDIRECT_REG_WRITE_B32_V5 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_160>;
+def V_INDIRECT_REG_WRITE_B32_V8 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_256>;
+def V_INDIRECT_REG_WRITE_B32_V16 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_512>;
+def V_INDIRECT_REG_WRITE_B32_V32 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_1024>;
+
+def S_INDIRECT_REG_WRITE_B32_V1 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_32>;
+def S_INDIRECT_REG_WRITE_B32_V2 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_64>;
+def S_INDIRECT_REG_WRITE_B32_V3 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_96>;
+def S_INDIRECT_REG_WRITE_B32_V4 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_128>;
+def S_INDIRECT_REG_WRITE_B32_V5 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_160>;
+def S_INDIRECT_REG_WRITE_B32_V8 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_256>;
+def S_INDIRECT_REG_WRITE_B32_V16 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_512>;
+def S_INDIRECT_REG_WRITE_B32_V32 : S_INDIRECT_REG_WRITE_B32_pseudo<SReg_1024>;
+
+def S_INDIRECT_REG_WRITE_B64_V1 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_64>;
+def S_INDIRECT_REG_WRITE_B64_V2 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_128>;
+def S_INDIRECT_REG_WRITE_B64_V4 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_256>;
+def S_INDIRECT_REG_WRITE_B64_V8 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_512>;
+def S_INDIRECT_REG_WRITE_B64_V16 : S_INDIRECT_REG_WRITE_B64_pseudo<SReg_1024>;
+
+
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
let UseNamedOperandTable = 1, SGPRSpill = 1, Uses = [EXEC] in {
def _SAVE : PseudoInstSI <
@@ -535,6 +666,7 @@ defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
defm SI_SPILL_S96 : SI_SPILL_SGPR <SReg_96>;
defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>;
+defm SI_SPILL_S192 : SI_SPILL_SGPR <SReg_192>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
@@ -574,6 +706,7 @@ defm SI_SPILL_V64 : SI_SPILL_VGPR <VReg_64>;
defm SI_SPILL_V96 : SI_SPILL_VGPR <VReg_96>;
defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>;
+defm SI_SPILL_V192 : SI_SPILL_VGPR <VReg_192>;
defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>;
@@ -639,12 +772,6 @@ def : GCNPat<
>;
def : Pat <
- // -1.0 as i32 (LowerINTRINSIC_VOID converts all other constants to -1.0)
- (AMDGPUkill (i32 -1082130432)),
- (SI_KILL_I1_PSEUDO (i1 0), 0)
->;
-
-def : Pat <
(int_amdgcn_kill i1:$src),
(SI_KILL_I1_PSEUDO SCSrc_i1:$src, 0)
>;
@@ -655,11 +782,6 @@ def : Pat <
>;
def : Pat <
- (AMDGPUkill i32:$src),
- (SI_KILL_F32_COND_IMM_PSEUDO VSrc_b32:$src, 0, 3) // 3 means SETOGE
->;
-
-def : Pat <
(int_amdgcn_kill (i1 (setcc f32:$src, InlineImmFP32:$imm, cond:$cond))),
(SI_KILL_F32_COND_IMM_PSEUDO VSrc_b32:$src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond))
>;
@@ -693,14 +815,14 @@ def : RsqPat<V_RSQ_F64_e32, f64>;
def : GCNPat <
(f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
(f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
- (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+ (V_FRACT_F32_e64 $mods, $x)
>;
// Convert (x + (-floor(x))) to fract(x)
def : GCNPat <
(f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
(f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
- (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+ (V_FRACT_F64_e64 $mods, $x)
>;
} // End OtherPredicates = [UnsafeFPMath]
@@ -709,27 +831,27 @@ def : GCNPat <
// f16_to_fp patterns
def : GCNPat <
(f32 (f16_to_fp i32:$src0)),
- (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+ (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0)
>;
def : GCNPat <
(f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))),
- (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+ (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0)
>;
def : GCNPat <
(f32 (f16_to_fp (i32 (srl_oneuse (and_oneuse i32:$src0, 0x7fff0000), (i32 16))))),
- (V_CVT_F32_F16_e64 SRCMODS.ABS, (i32 (V_LSHRREV_B32_e64 (i32 16), i32:$src0)), DSTCLAMP.NONE, DSTOMOD.NONE)
+ (V_CVT_F32_F16_e64 SRCMODS.ABS, (i32 (V_LSHRREV_B32_e64 (i32 16), i32:$src0)))
>;
def : GCNPat <
(f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))),
- (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+ (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0)
>;
def : GCNPat <
(f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))),
- (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+ (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0)
>;
def : GCNPat <
@@ -740,7 +862,7 @@ def : GCNPat <
// fp_to_fp16 patterns
def : GCNPat <
(i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
- (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+ (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0)
>;
def : GCNPat <
@@ -767,20 +889,29 @@ def : GCNPat <
// VOP2 Patterns
//===----------------------------------------------------------------------===//
-multiclass FMADPat <ValueType vt, Instruction inst> {
- def : GCNPat <
- (vt (fmad (VOP3NoMods vt:$src0),
- (VOP3NoMods vt:$src1),
- (VOP3NoMods vt:$src2))),
+// TODO: Check only no src2 mods?
+class FMADPat <ValueType vt, Instruction inst, SDPatternOperator node>
+ : GCNPat <(vt (node (vt (VOP3NoMods vt:$src0)),
+ (vt (VOP3NoMods vt:$src1)),
+ (vt (VOP3NoMods vt:$src2)))),
(inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
- >;
+>;
+
+
+// Prefer mac form when there are no modifiers.
+let AddedComplexity = 9 in {
+def : FMADPat <f32, V_MAC_F32_e64, fmad>;
+def : FMADPat <f32, V_MAC_F32_e64, AMDGPUfmad_ftz>;
+
+let SubtargetPredicate = Has16BitInsts in {
+def : FMADPat <f16, V_MAC_F16_e64, fmad>;
+def : FMADPat <f16, V_MAC_F16_e64, AMDGPUfmad_ftz>;
}
-defm : FMADPat <f16, V_MAC_F16_e64>;
-defm : FMADPat <f32, V_MAC_F32_e64>;
+}
-class FMADModsPat<Instruction inst, SDPatternOperator mad_opr, ValueType Ty>
+class FMADModsPat<ValueType Ty, Instruction inst, SDPatternOperator mad_opr>
: GCNPat<
(Ty (mad_opr (Ty (VOP3Mods Ty:$src0, i32:$src0_mod)),
(Ty (VOP3Mods Ty:$src1, i32:$src1_mod)),
@@ -789,24 +920,28 @@ class FMADModsPat<Instruction inst, SDPatternOperator mad_opr, ValueType Ty>
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-// FIXME: This should select to V_MAC_F32
-def : FMADModsPat<V_MAD_F32, AMDGPUfmad_ftz, f32>;
-def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
+let SubtargetPredicate = HasMadMacF32Insts in
+def : FMADModsPat<f32, V_MAD_F32, AMDGPUfmad_ftz>;
+def : FMADModsPat<f16, V_MAD_F16, AMDGPUfmad_ftz> {
let SubtargetPredicate = Has16BitInsts;
}
-multiclass SelectPat <ValueType vt> {
- def : GCNPat <
- (vt (select i1:$src0, (VOP3Mods_f32 vt:$src1, i32:$src1_mods),
- (VOP3Mods_f32 vt:$src2, i32:$src2_mods))),
- (V_CNDMASK_B32_e64 $src2_mods, $src2, $src1_mods, $src1, $src0)
- >;
-}
+class VOPSelectModsPat <ValueType vt> : GCNPat <
+ (vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),
+ (VOP3Mods vt:$src2, i32:$src2_mods))),
+ (V_CNDMASK_B32_e64 FP32InputMods:$src2_mods, VSrc_b32:$src2,
+ FP32InputMods:$src1_mods, VSrc_b32:$src1, SSrc_i1:$src0)
+>;
+
+class VOPSelectPat <ValueType vt> : GCNPat <
+ (vt (select i1:$src0, vt:$src1, vt:$src2)),
+ (V_CNDMASK_B32_e64 0, VSrc_b32:$src2, 0, VSrc_b32:$src1, SSrc_i1:$src0)
+>;
-defm : SelectPat <i16>;
-defm : SelectPat <i32>;
-defm : SelectPat <f16>;
-defm : SelectPat <f32>;
+def : VOPSelectModsPat <i32>;
+def : VOPSelectModsPat <f32>;
+def : VOPSelectPat <f16>;
+def : VOPSelectPat <i16>;
let AddedComplexity = 1 in {
def : GCNPat <
@@ -1039,6 +1174,8 @@ def : BitConvert <v4f32, v2f64, VReg_128>;
def : BitConvert <v4i32, v2f64, VReg_128>;
def : BitConvert <v2i64, v2f64, VReg_128>;
def : BitConvert <v2f64, v2i64, VReg_128>;
+def : BitConvert <v4f32, v2i64, VReg_128>;
+def : BitConvert <v2i64, v4f32, VReg_128>;
// 160-bit bitcast
def : BitConvert <v5i32, v5f32, SGPR_160>;
@@ -1049,14 +1186,46 @@ def : BitConvert <v8i32, v8f32, SReg_256>;
def : BitConvert <v8f32, v8i32, SReg_256>;
def : BitConvert <v8i32, v8f32, VReg_256>;
def : BitConvert <v8f32, v8i32, VReg_256>;
+def : BitConvert <v4i64, v4f64, VReg_256>;
+def : BitConvert <v4f64, v4i64, VReg_256>;
+def : BitConvert <v4i64, v8i32, VReg_256>;
+def : BitConvert <v4i64, v8f32, VReg_256>;
+def : BitConvert <v4f64, v8i32, VReg_256>;
+def : BitConvert <v4f64, v8f32, VReg_256>;
+def : BitConvert <v8i32, v4i64, VReg_256>;
+def : BitConvert <v8f32, v4i64, VReg_256>;
+def : BitConvert <v8i32, v4f64, VReg_256>;
+def : BitConvert <v8f32, v4f64, VReg_256>;
+
// 512-bit bitcast
def : BitConvert <v16i32, v16f32, VReg_512>;
def : BitConvert <v16f32, v16i32, VReg_512>;
+def : BitConvert <v8i64, v8f64, VReg_512>;
+def : BitConvert <v8f64, v8i64, VReg_512>;
+def : BitConvert <v8i64, v16i32, VReg_512>;
+def : BitConvert <v8f64, v16i32, VReg_512>;
+def : BitConvert <v16i32, v8i64, VReg_512>;
+def : BitConvert <v16i32, v8f64, VReg_512>;
+def : BitConvert <v8i64, v16f32, VReg_512>;
+def : BitConvert <v8f64, v16f32, VReg_512>;
+def : BitConvert <v16f32, v8i64, VReg_512>;
+def : BitConvert <v16f32, v8f64, VReg_512>;
// 1024-bit bitcast
def : BitConvert <v32i32, v32f32, VReg_1024>;
def : BitConvert <v32f32, v32i32, VReg_1024>;
+def : BitConvert <v16i64, v16f64, VReg_1024>;
+def : BitConvert <v16f64, v16i64, VReg_1024>;
+def : BitConvert <v16i64, v32i32, VReg_1024>;
+def : BitConvert <v32i32, v16i64, VReg_1024>;
+def : BitConvert <v16f64, v32f32, VReg_1024>;
+def : BitConvert <v32f32, v16f64, VReg_1024>;
+def : BitConvert <v16i64, v32f32, VReg_1024>;
+def : BitConvert <v32i32, v16f64, VReg_1024>;
+def : BitConvert <v16f64, v32i32, VReg_1024>;
+def : BitConvert <v32f32, v16i64, VReg_1024>;
+
/********** =================== **********/
/********** Src & Dst modifiers **********/
@@ -1155,7 +1324,7 @@ def : GCNPat <
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
>;
-// FIXME: The implicit-def of scc from S_[X]OR_B32 is mishandled
+// FIXME: The implicit-def of scc from S_[X]OR/AND_B32 is mishandled
// def : GCNPat <
// (fneg (f64 SReg_64:$src)),
// (REG_SEQUENCE SReg_64,
@@ -1176,6 +1345,17 @@ def : GCNPat <
// sub1)
// >;
+// FIXME: Use S_BITSET0_B32/B64?
+// def : GCNPat <
+// (fabs (f64 SReg_64:$src)),
+// (REG_SEQUENCE SReg_64,
+// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+// sub0,
+// (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+// (i32 (S_MOV_B32 (i32 0x7fffffff)))),
+// sub1)
+// >;
+
} // End let AddedComplexity = 1
def : GCNPat <
@@ -1372,11 +1552,12 @@ class Ext32Pat <SDNode ext> : GCNPat <
def : Ext32Pat <zext>;
def : Ext32Pat <anyext>;
-// The multiplication scales from [0,1] to the unsigned integer range
+// The multiplication scales from [0,1) to the unsigned integer range,
+// rounding down a bit to avoid unwanted overflow.
def : GCNPat <
(AMDGPUurecip i32:$src0),
(V_CVT_U32_F32_e32
- (V_MUL_F32_e32 (i32 CONST.FP_UINT_MAX_PLUS_1),
+ (V_MUL_F32_e32 (i32 CONST.FP_4294966784),
(V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
>;
@@ -1421,11 +1602,13 @@ defm : SI_INDIRECT_Pattern <v2f32, f32, "V2">;
defm : SI_INDIRECT_Pattern <v4f32, f32, "V4">;
defm : SI_INDIRECT_Pattern <v8f32, f32, "V8">;
defm : SI_INDIRECT_Pattern <v16f32, f32, "V16">;
+defm : SI_INDIRECT_Pattern <v32f32, f32, "V32">;
defm : SI_INDIRECT_Pattern <v2i32, i32, "V2">;
defm : SI_INDIRECT_Pattern <v4i32, i32, "V4">;
defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;
+defm : SI_INDIRECT_Pattern <v32i32, i32, "V32">;
//===----------------------------------------------------------------------===//
// SAD Patterns
@@ -1695,102 +1878,187 @@ def : GCNPat <
def : GCNPat <
(i32 (bswap i32:$a)),
(V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)),
- (V_ALIGNBIT_B32 $a, $a, (i32 24)),
- (V_ALIGNBIT_B32 $a, $a, (i32 8)))
+ (V_ALIGNBIT_B32 VSrc_b32:$a, VSrc_b32:$a, (i32 24)),
+ (V_ALIGNBIT_B32 VSrc_b32:$a, VSrc_b32:$a, (i32 8)))
>;
-let OtherPredicates = [NoFP16Denormals] in {
-def : GCNPat<
- (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
- (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src, 0, 0)
+// FIXME: This should have been narrowed to i32 during legalization.
+// This pattern should also be skipped for GlobalISel
+def : GCNPat <
+ (i64 (bswap i64:$a)),
+ (REG_SEQUENCE VReg_64,
+ (V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)),
+ (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
+ (i32 24)),
+ (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub1)),
+ (i32 8))),
+ sub0,
+ (V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)),
+ (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
+ (i32 24)),
+ (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$a, sub0)),
+ (i32 8))),
+ sub1)
+>;
+
+// FIXME: The AddedComplexity should not be needed, but in GlobalISel
+// the BFI pattern ends up taking precedence without it.
+let SubtargetPredicate = isGFX8Plus, AddedComplexity = 1 in {
+// Magic number: 3 | (2 << 8) | (1 << 16) | (0 << 24)
+//
+// My reading of the manual suggests we should be using src0 for the
+// register value, but this is what seems to work.
+def : GCNPat <
+ (i32 (bswap i32:$a)),
+ (V_PERM_B32 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x00010203)))
>;
-def : GCNPat<
- (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
- (V_MUL_F16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src, 0, 0)
+// FIXME: This should have been narrowed to i32 during legalization.
+// This pattern should also be skipped for GlobalISel
+def : GCNPat <
+ (i64 (bswap i64:$a)),
+ (REG_SEQUENCE VReg_64,
+ (V_PERM_B32 (i32 0), (EXTRACT_SUBREG VReg_64:$a, sub1),
+ (S_MOV_B32 (i32 0x00010203))),
+ sub0,
+ (V_PERM_B32 (i32 0), (EXTRACT_SUBREG VReg_64:$a, sub0),
+ (S_MOV_B32 (i32 0x00010203))),
+ sub1)
>;
-def : GCNPat<
- (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
- (V_PK_MUL_F16 0, (i32 CONST.FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
+// Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24)
+// The 12s emit 0s.
+def : GCNPat <
+ (i16 (bswap i16:$a)),
+ (V_PERM_B32 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
>;
-}
-let OtherPredicates = [FP16Denormals] in {
-def : GCNPat<
- (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
- (V_MAX_F16_e64 $src_mods, $src, $src_mods, $src, 0, 0)
+def : GCNPat <
+ (i32 (zext (bswap i16:$a))),
+ (V_PERM_B32 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
>;
-let SubtargetPredicate = HasVOP3PInsts in {
-def : GCNPat<
- (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
- (V_PK_MAX_F16 $src_mods, $src, $src_mods, $src, DSTCLAMP.NONE)
+// Magic number: 1 | (0 << 8) | (3 << 16) | (2 << 24)
+def : GCNPat <
+ (v2i16 (bswap v2i16:$a)),
+ (V_PERM_B32 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x02030001)))
>;
+
}
-}
-let OtherPredicates = [NoFP32Denormals] in {
+
+// Prefer selecting to max when legal, but using mul is always valid.
+let AddedComplexity = -5 in {
def : GCNPat<
- (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
- (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0)
+ (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
+ (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
>;
def : GCNPat<
- (fcanonicalize (f32 (fneg (VOP3Mods f32:$src, i32:$src_mods)))),
- (V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src, 0, 0)
+ (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
+ (V_MUL_F16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
+>;
+
+def : GCNPat<
+ (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
+ (V_PK_MUL_F16 0, (i32 CONST.FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
>;
-}
-let OtherPredicates = [FP32Denormals] in {
def : GCNPat<
(fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
- (V_MAX_F32_e64 $src_mods, $src, $src_mods, $src, 0, 0)
+ (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src)
>;
-}
-let OtherPredicates = [NoFP64Denormals] in {
def : GCNPat<
- (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
- (V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src, 0, 0)
+ (fcanonicalize (f32 (fneg (VOP3Mods f32:$src, i32:$src_mods)))),
+ (V_MUL_F32_e64 0, (i32 CONST.FP32_NEG_ONE), $src_mods, $src)
>;
-}
-let OtherPredicates = [FP64Denormals] in {
+// TODO: Handle fneg like other types.
def : GCNPat<
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
- (V_MAX_F64 $src_mods, $src, $src_mods, $src, 0, 0)
+ (V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src)
>;
+} // End AddedComplexity = -5
+
+multiclass SelectCanonicalizeAsMax<
+ list<Predicate> f32_preds = [],
+ list<Predicate> f64_preds = [],
+ list<Predicate> f16_preds = []> {
+ def : GCNPat<
+ (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
+ (V_MAX_F32_e64 $src_mods, $src, $src_mods, $src)> {
+ let OtherPredicates = f32_preds;
+ }
+
+ def : GCNPat<
+ (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
+ (V_MAX_F64 $src_mods, $src, $src_mods, $src)> {
+ let OtherPredicates = f64_preds;
+ }
+
+ def : GCNPat<
+ (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
+ (V_MAX_F16_e64 $src_mods, $src, $src_mods, $src, 0, 0)> {
+ // FIXME: Should have 16-bit inst subtarget predicate
+ let OtherPredicates = f16_preds;
+ }
+
+ def : GCNPat<
+ (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
+ (V_PK_MAX_F16 $src_mods, $src, $src_mods, $src, DSTCLAMP.NONE)> {
+ // FIXME: Should have VOP3P subtarget predicate
+ let OtherPredicates = f16_preds;
+ }
}
+// On pre-gfx9 targets, v_max_*/v_min_* did not respect the denormal
+// mode, and would never flush. For f64, it's faster to do implement
+// this with a max. For f16/f32 it's a wash, but prefer max when
+// valid.
+//
+// FIXME: Lowering f32/f16 with max is worse since we can use a
+// smaller encoding if the input is fneg'd. It also adds an extra
+// register use.
+let SubtargetPredicate = HasMinMaxDenormModes in {
+ defm : SelectCanonicalizeAsMax<[], [], []>;
+} // End SubtargetPredicate = HasMinMaxDenormModes
+
+let SubtargetPredicate = NotHasMinMaxDenormModes in {
+ // Use the max lowering if we don't need to flush.
+
+ // FIXME: We don't do use this for f32 as a workaround for the
+ // library being compiled with the default ieee mode, but
+ // potentially being called from flushing kernels. Really we should
+ // not be mixing code expecting different default FP modes, but mul
+ // works in any FP environment.
+ defm : SelectCanonicalizeAsMax<[FalsePredicate], [FP64Denormals], [FP16Denormals]>;
+} // End SubtargetPredicate = NotHasMinMaxDenormModes
+
+
let OtherPredicates = [HasDLInsts] in {
def : GCNPat <
- (fma (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+ (fma (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)),
(f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)),
(f32 (VOP3NoMods f32:$src2))),
(V_FMAC_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
- SRCMODS.NONE, $src2, $clamp, $omod)
+ SRCMODS.NONE, $src2)
>;
} // End OtherPredicates = [HasDLInsts]
let SubtargetPredicate = isGFX10Plus in
def : GCNPat <
- (fma (f16 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+ (fma (f16 (VOP3Mods f32:$src0, i32:$src0_modifiers)),
(f16 (VOP3Mods f32:$src1, i32:$src1_modifiers)),
(f16 (VOP3NoMods f32:$src2))),
(V_FMAC_F16_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
- SRCMODS.NONE, $src2, $clamp, $omod)
->;
-
-// Allow integer inputs
-class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPat<
- (node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)),
- (Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en)
+ SRCMODS.NONE, $src2)
>;
-def : ExpPattern<AMDGPUexport, i32, EXP>;
-def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
-
// COPY is workaround tablegen bug from multiple outputs
// from S_LSHL_B32's multiple outputs from implicit scc def.
def : GCNPat <
@@ -1873,19 +2141,20 @@ def : GCNPat <
>;
def : GCNPat <
- (i64 (int_amdgcn_mov_dpp i64:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
- timm:$bound_ctrl)),
- (V_MOV_B64_DPP_PSEUDO $src, $src, (as_i32imm $dpp_ctrl),
- (as_i32imm $row_mask), (as_i32imm $bank_mask),
- (as_i1imm $bound_ctrl))
+ (i64 (int_amdgcn_mov_dpp i64:$src, timm:$dpp_ctrl, timm:$row_mask,
+ timm:$bank_mask, timm:$bound_ctrl)),
+ (V_MOV_B64_DPP_PSEUDO VReg_64:$src, VReg_64:$src,
+ (as_i32timm $dpp_ctrl), (as_i32timm $row_mask),
+ (as_i32timm $bank_mask),
+ (as_i1timm $bound_ctrl))
>;
def : GCNPat <
(i64 (int_amdgcn_update_dpp i64:$old, i64:$src, timm:$dpp_ctrl, timm:$row_mask,
timm:$bank_mask, timm:$bound_ctrl)),
- (V_MOV_B64_DPP_PSEUDO $old, $src, (as_i32imm $dpp_ctrl),
- (as_i32imm $row_mask), (as_i32imm $bank_mask),
- (as_i1imm $bound_ctrl))
+ (V_MOV_B64_DPP_PSEUDO VReg_64:$old, VReg_64:$src, (as_i32timm $dpp_ctrl),
+ (as_i32timm $row_mask), (as_i32timm $bank_mask),
+ (as_i1timm $bound_ctrl))
>;
//===----------------------------------------------------------------------===//
@@ -1901,6 +2170,11 @@ let SubtargetPredicate = isGFX6 in {
// fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
// Convert floor(x) to (x - fract(x))
+
+// Don't bother handling this for GlobalISel, it's handled during
+// lowering.
+//
+// FIXME: DAG should also custom lower this.
def : GCNPat <
(f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
(V_ADD_F64
@@ -1910,13 +2184,11 @@ def : GCNPat <
(V_CNDMASK_B64_PSEUDO
(V_MIN_F64
SRCMODS.NONE,
- (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+ (V_FRACT_F64_e64 $mods, $x),
SRCMODS.NONE,
- (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
- DSTCLAMP.NONE, DSTOMOD.NONE),
+ (V_MOV_B64_PSEUDO 0x3fefffffffffffff)),
$x,
- (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3 /*NaN*/))),
- DSTCLAMP.NONE, DSTOMOD.NONE)
+ (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3 /*NaN*/))))
>;
} // End SubtargetPredicates = isGFX6
@@ -2061,13 +2333,164 @@ def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
let hasSideEffects = 0;
}
+def G_AMDGPU_RCP_IFLAG : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = 0;
+}
+
+class BufferLoadGenericInstruction : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$rsrc, type2:$vindex, type2:$voffset,
+ type2:$soffset, untyped_imm_0:$offset,
+ untyped_imm_0:$cachepolicy, untyped_imm_0:$idxen);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+}
+
+class TBufferLoadGenericInstruction : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$rsrc, type2:$vindex, type2:$voffset,
+ type2:$soffset, untyped_imm_0:$offset, untyped_imm_0:$format,
+ untyped_imm_0:$cachepolicy, untyped_imm_0:$idxen);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+}
+
+def G_AMDGPU_BUFFER_LOAD_UBYTE : BufferLoadGenericInstruction;
+def G_AMDGPU_BUFFER_LOAD_SBYTE : BufferLoadGenericInstruction;
+def G_AMDGPU_BUFFER_LOAD_USHORT : BufferLoadGenericInstruction;
+def G_AMDGPU_BUFFER_LOAD_SSHORT : BufferLoadGenericInstruction;
+def G_AMDGPU_BUFFER_LOAD : BufferLoadGenericInstruction;
+def G_AMDGPU_BUFFER_LOAD_FORMAT : BufferLoadGenericInstruction;
+def G_AMDGPU_BUFFER_LOAD_FORMAT_D16 : BufferLoadGenericInstruction;
+def G_AMDGPU_TBUFFER_LOAD_FORMAT : TBufferLoadGenericInstruction;
+def G_AMDGPU_TBUFFER_LOAD_FORMAT_D16 : TBufferLoadGenericInstruction;
+
+class BufferStoreGenericInstruction : AMDGPUGenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins type0:$vdata, type1:$rsrc, type2:$vindex, type2:$voffset,
+ type2:$soffset, untyped_imm_0:$offset,
+ untyped_imm_0:$cachepolicy, untyped_imm_0:$idxen);
+ let hasSideEffects = 0;
+ let mayStore = 1;
+}
+
+class TBufferStoreGenericInstruction : AMDGPUGenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins type0:$vdata, type1:$rsrc, type2:$vindex, type2:$voffset,
+ type2:$soffset, untyped_imm_0:$offset,
+ untyped_imm_0:$format,
+ untyped_imm_0:$cachepolicy, untyped_imm_0:$idxen);
+ let hasSideEffects = 0;
+ let mayStore = 1;
+}
+
+def G_AMDGPU_BUFFER_STORE : BufferStoreGenericInstruction;
+def G_AMDGPU_BUFFER_STORE_BYTE : BufferStoreGenericInstruction;
+def G_AMDGPU_BUFFER_STORE_SHORT : BufferStoreGenericInstruction;
+def G_AMDGPU_BUFFER_STORE_FORMAT : BufferStoreGenericInstruction;
+def G_AMDGPU_BUFFER_STORE_FORMAT_D16 : BufferStoreGenericInstruction;
+def G_AMDGPU_TBUFFER_STORE_FORMAT : TBufferStoreGenericInstruction;
+def G_AMDGPU_TBUFFER_STORE_FORMAT_D16 : TBufferStoreGenericInstruction;
+
+def G_AMDGPU_FMIN_LEGACY : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1);
+ let hasSideEffects = 0;
+}
+
+def G_AMDGPU_FMAX_LEGACY : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1);
+ let hasSideEffects = 0;
+}
+
+foreach N = 0-3 in {
+def G_AMDGPU_CVT_F32_UBYTE#N : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0);
+ let hasSideEffects = 0;
+}
+}
+
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
// operand Expects a MachineMemOperand in addition to explicit
// operands.
def G_AMDGPU_ATOMIC_CMPXCHG : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$oldval);
- let InOperandList = (ins ptype1:$addr, type0:$cmpval_nnenwval);
+ let InOperandList = (ins ptype1:$addr, type0:$cmpval_newval);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+let Namespace = "AMDGPU" in {
+def G_AMDGPU_ATOMIC_INC : G_ATOMICRMW_OP;
+def G_AMDGPU_ATOMIC_DEC : G_ATOMICRMW_OP;
+}
+
+class BufferAtomicGenericInstruction : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$vdata, type1:$rsrc, type2:$vindex, type2:$voffset,
+ type2:$soffset, untyped_imm_0:$offset,
+ untyped_imm_0:$cachepolicy, untyped_imm_0:$idxen);
let hasSideEffects = 0;
let mayLoad = 1;
let mayStore = 1;
}
+
+def G_AMDGPU_BUFFER_ATOMIC_SWAP : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_ADD : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_SUB : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_SMIN : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_UMIN : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_SMAX : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_UMAX : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_AND : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_OR : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_XOR : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_INC : BufferAtomicGenericInstruction;
+def G_AMDGPU_BUFFER_ATOMIC_DEC : BufferAtomicGenericInstruction;
+
+def G_AMDGPU_BUFFER_ATOMIC_CMPSWAP : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$vdata, type0:$cmp, type1:$rsrc, type2:$vindex,
+ type2:$voffset, type2:$soffset, untyped_imm_0:$offset,
+ untyped_imm_0:$cachepolicy, untyped_imm_0:$idxen);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+// Wrapper around llvm.amdgcn.s.buffer.load. This is mostly needed as
+// a workaround for the intrinsic being defined as readnone, but
+// really needs a memory operand.
+def G_AMDGPU_S_BUFFER_LOAD : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$rsrc, type2:$offset, untyped_imm_0:$cachepolicy);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+// This is equivalent to the G_INTRINSIC*, but the operands may have
+// been legalized depending on the subtarget requirements.
+def G_AMDGPU_INTRIN_IMAGE_LOAD : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins unknown:$intrin, variable_ops);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+
+ // FIXME: Use separate opcode for atomics.
+ let mayStore = 1;
+}
+
+// This is equivalent to the G_INTRINSIC*, but the operands may have
+// been legalized depending on the subtarget requirements.
+def G_AMDGPU_INTRIN_IMAGE_STORE : AMDGPUGenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins unknown:$intrin, variable_ops);
+ let hasSideEffects = 0;
+ let mayStore = 1;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index d2b1abc8a9fb..2eb1c52f1b59 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -103,15 +103,19 @@ enum InstClassEnum {
TBUFFER_STORE,
};
-enum RegisterEnum {
- SBASE = 0x1,
- SRSRC = 0x2,
- SOFFSET = 0x4,
- VADDR = 0x8,
- ADDR = 0x10,
- SSAMP = 0x20,
+struct AddressRegs {
+ unsigned char NumVAddrs = 0;
+ bool SBase = false;
+ bool SRsrc = false;
+ bool SOffset = false;
+ bool VAddr = false;
+ bool Addr = false;
+ bool SSamp = false;
};
+// GFX10 image_sample instructions can have 12 vaddrs + srsrc + ssamp.
+const unsigned MaxAddressRegs = 12 + 1 + 1;
+
class SILoadStoreOptimizer : public MachineFunctionPass {
struct CombineInfo {
MachineBasicBlock::iterator I;
@@ -126,10 +130,10 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
bool SLC;
bool DLC;
bool UseST64;
- SmallVector<MachineInstr *, 8> InstsToMove;
- int AddrIdx[5];
- const MachineOperand *AddrReg[5];
+ int AddrIdx[MaxAddressRegs];
+ const MachineOperand *AddrReg[MaxAddressRegs];
unsigned NumAddresses;
+ unsigned Order;
bool hasSameBaseAddress(const MachineInstr &MI) {
for (unsigned i = 0; i < NumAddresses; i++) {
@@ -183,8 +187,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
};
struct BaseRegisters {
- unsigned LoReg = 0;
- unsigned HiReg = 0;
+ Register LoReg;
+ Register HiReg;
unsigned LoSubReg = 0;
unsigned HiSubReg = 0;
@@ -201,7 +205,6 @@ private:
const GCNSubtarget *STM = nullptr;
const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr;
- const MCSubtargetInfo *STI = nullptr;
MachineRegisterInfo *MRI = nullptr;
AliasAnalysis *AA = nullptr;
bool OptimizeAgain;
@@ -209,9 +212,9 @@ private:
static bool dmasksCanBeCombined(const CombineInfo &CI,
const SIInstrInfo &TII,
const CombineInfo &Paired);
- static bool offsetsCanBeCombined(CombineInfo &CI, const MCSubtargetInfo &STI,
- CombineInfo &Paired);
- static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI,
+ static bool offsetsCanBeCombined(CombineInfo &CI, const GCNSubtarget &STI,
+ CombineInfo &Paired, bool Modify = false);
+ static bool widthsFit(const GCNSubtarget &STI, const CombineInfo &CI,
const CombineInfo &Paired);
static unsigned getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired);
static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI,
@@ -219,25 +222,42 @@ private:
const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI,
const CombineInfo &Paired);
- bool findMatchingInst(CombineInfo &CI, CombineInfo &Paired);
+ bool checkAndPrepareMerge(CombineInfo &CI, CombineInfo &Paired,
+ SmallVectorImpl<MachineInstr *> &InstsToMove);
unsigned read2Opcode(unsigned EltSize) const;
unsigned read2ST64Opcode(unsigned EltSize) const;
- MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired);
+ MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI,
+ CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove);
unsigned write2Opcode(unsigned EltSize) const;
unsigned write2ST64Opcode(unsigned EltSize) const;
- MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired);
- MachineBasicBlock::iterator mergeImagePair(CombineInfo &CI, CombineInfo &Paired);
- MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI, CombineInfo &Paired);
- MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired);
- MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired);
- MachineBasicBlock::iterator mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired);
- MachineBasicBlock::iterator mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired);
-
- void updateBaseAndOffset(MachineInstr &I, unsigned NewBase,
+ MachineBasicBlock::iterator
+ mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator
+ mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator
+ mergeSBufferLoadImmPair(CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator
+ mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator
+ mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator
+ mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator
+ mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove);
+
+ void updateBaseAndOffset(MachineInstr &I, Register NewBase,
int32_t NewOffset) const;
- unsigned computeBase(MachineInstr &MI, const MemAddress &Addr) const;
+ Register computeBase(MachineInstr &MI, const MemAddress &Addr) const;
MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI) const;
Optional<int32_t> extractConstOffset(const MachineOperand &Op) const;
void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr) const;
@@ -249,8 +269,11 @@ private:
SmallPtrSet<MachineInstr *, 4> &Promoted) const;
void addInstToMergeableList(const CombineInfo &CI,
std::list<std::list<CombineInfo> > &MergeableInsts) const;
- bool collectMergeableInsts(MachineBasicBlock &MBB,
- std::list<std::list<CombineInfo> > &MergeableInsts) const;
+
+ std::pair<MachineBasicBlock::iterator, bool> collectMergeableInsts(
+ MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End,
+ MemInfoMap &Visited, SmallPtrSet<MachineInstr *, 4> &AnchorList,
+ std::list<std::list<CombineInfo>> &MergeableInsts) const;
public:
static char ID;
@@ -259,8 +282,6 @@ public:
initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
}
- void removeCombinedInst(std::list<CombineInfo> &MergeList,
- const MachineInstr &MI);
bool optimizeInstsWithSameBaseAddr(std::list<CombineInfo> &MergeList,
bool &OptimizeListAgain);
bool optimizeBlock(std::list<std::list<CombineInfo> > &MergeableInsts);
@@ -275,6 +296,11 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA);
+ }
};
static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
@@ -327,7 +353,8 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
}
if (TII.isMIMG(Opc)) {
// Ignore instructions encoded without vaddr.
- if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1)
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1 &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0) == -1)
return UNKNOWN;
// TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD.
if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() ||
@@ -400,58 +427,54 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
}
}
-static unsigned getRegs(unsigned Opc, const SIInstrInfo &TII) {
- if (TII.isMUBUF(Opc)) {
- unsigned result = 0;
+static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
+ AddressRegs Result;
- if (AMDGPU::getMUBUFHasVAddr(Opc)) {
- result |= VADDR;
- }
-
- if (AMDGPU::getMUBUFHasSrsrc(Opc)) {
- result |= SRSRC;
- }
-
- if (AMDGPU::getMUBUFHasSoffset(Opc)) {
- result |= SOFFSET;
- }
-
- return result;
+ if (TII.isMUBUF(Opc)) {
+ if (AMDGPU::getMUBUFHasVAddr(Opc))
+ Result.VAddr = true;
+ if (AMDGPU::getMUBUFHasSrsrc(Opc))
+ Result.SRsrc = true;
+ if (AMDGPU::getMUBUFHasSoffset(Opc))
+ Result.SOffset = true;
+
+ return Result;
}
if (TII.isMIMG(Opc)) {
- unsigned result = VADDR | SRSRC;
+ int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
+ if (VAddr0Idx >= 0) {
+ int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+ Result.NumVAddrs = SRsrcIdx - VAddr0Idx;
+ } else {
+ Result.VAddr = true;
+ }
+ Result.SRsrc = true;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
if (Info && AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler)
- result |= SSAMP;
+ Result.SSamp = true;
- return result;
+ return Result;
}
if (TII.isMTBUF(Opc)) {
- unsigned result = 0;
-
- if (AMDGPU::getMTBUFHasVAddr(Opc)) {
- result |= VADDR;
- }
-
- if (AMDGPU::getMTBUFHasSrsrc(Opc)) {
- result |= SRSRC;
- }
-
- if (AMDGPU::getMTBUFHasSoffset(Opc)) {
- result |= SOFFSET;
- }
-
- return result;
+ if (AMDGPU::getMTBUFHasVAddr(Opc))
+ Result.VAddr = true;
+ if (AMDGPU::getMTBUFHasSrsrc(Opc))
+ Result.SRsrc = true;
+ if (AMDGPU::getMTBUFHasSoffset(Opc))
+ Result.SOffset = true;
+
+ return Result;
}
switch (Opc) {
default:
- return 0;
+ return Result;
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
- return SBASE;
+ Result.SBase = true;
+ return Result;
case AMDGPU::DS_READ_B32:
case AMDGPU::DS_READ_B64:
case AMDGPU::DS_READ_B32_gfx9:
@@ -460,7 +483,8 @@ static unsigned getRegs(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::DS_WRITE_B64:
case AMDGPU::DS_WRITE_B32_gfx9:
case AMDGPU::DS_WRITE_B64_gfx9:
- return ADDR;
+ Result.Addr = true;
+ return Result;
}
}
@@ -486,7 +510,7 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
: 4;
break;
case S_BUFFER_LOAD_IMM:
- EltSize = AMDGPU::getSMRDEncodedOffset(STM, 4);
+ EltSize = AMDGPU::convertSMRDOffsetUnits(STM, 4);
break;
default:
EltSize = 4;
@@ -495,6 +519,8 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
if (InstClass == MIMG) {
DMask = TII.getNamedOperand(*I, AMDGPU::OpName::dmask)->getImm();
+ // Offset is not considered for MIMG instructions.
+ Offset = 0;
} else {
int OffsetIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::offset);
Offset = I->getOperand(OffsetIdx).getImm();
@@ -515,40 +541,34 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
DLC = TII.getNamedOperand(*I, AMDGPU::OpName::dlc)->getImm();
}
- unsigned AddrOpName[5] = {0};
- NumAddresses = 0;
- const unsigned Regs = getRegs(I->getOpcode(), TII);
-
- if (Regs & ADDR) {
- AddrOpName[NumAddresses++] = AMDGPU::OpName::addr;
- }
+ AddressRegs Regs = getRegs(Opc, TII);
- if (Regs & SBASE) {
- AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase;
- }
-
- if (Regs & SRSRC) {
- AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc;
- }
-
- if (Regs & SOFFSET) {
- AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset;
- }
-
- if (Regs & VADDR) {
- AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr;
- }
-
- if (Regs & SSAMP) {
- AddrOpName[NumAddresses++] = AMDGPU::OpName::ssamp;
- }
-
- for (unsigned i = 0; i < NumAddresses; i++) {
- AddrIdx[i] = AMDGPU::getNamedOperandIdx(I->getOpcode(), AddrOpName[i]);
- AddrReg[i] = &I->getOperand(AddrIdx[i]);
- }
-
- InstsToMove.clear();
+ NumAddresses = 0;
+ for (unsigned J = 0; J < Regs.NumVAddrs; J++)
+ AddrIdx[NumAddresses++] =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0) + J;
+ if (Regs.Addr)
+ AddrIdx[NumAddresses++] =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::addr);
+ if (Regs.SBase)
+ AddrIdx[NumAddresses++] =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sbase);
+ if (Regs.SRsrc)
+ AddrIdx[NumAddresses++] =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+ if (Regs.SOffset)
+ AddrIdx[NumAddresses++] =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset);
+ if (Regs.VAddr)
+ AddrIdx[NumAddresses++] =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
+ if (Regs.SSamp)
+ AddrIdx[NumAddresses++] =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::ssamp);
+ assert(NumAddresses <= MaxAddressRegs);
+
+ for (unsigned J = 0; J < NumAddresses; J++)
+ AddrReg[J] = &I->getOperand(AddrIdx[J]);
}
} // end anonymous namespace.
@@ -578,8 +598,8 @@ static void moveInstsAfter(MachineBasicBlock::iterator I,
}
static void addDefsUsesToList(const MachineInstr &MI,
- DenseSet<unsigned> &RegDefs,
- DenseSet<unsigned> &PhysRegUses) {
+ DenseSet<Register> &RegDefs,
+ DenseSet<Register> &PhysRegUses) {
for (const MachineOperand &Op : MI.operands()) {
if (Op.isReg()) {
if (Op.isDef())
@@ -601,8 +621,8 @@ static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
// Add MI and its defs to the lists if MI reads one of the defs that are
// already in the list. Returns true in that case.
-static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
- DenseSet<unsigned> &PhysRegUses,
+static bool addToListsIfDependent(MachineInstr &MI, DenseSet<Register> &RegDefs,
+ DenseSet<Register> &PhysRegUses,
SmallVectorImpl<MachineInstr *> &Insts) {
for (MachineOperand &Use : MI.operands()) {
// If one of the defs is read, then there is a use of Def between I and the
@@ -671,7 +691,8 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
// Check other optional immediate operands for equality.
unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc,
AMDGPU::OpName::d16, AMDGPU::OpName::unorm,
- AMDGPU::OpName::da, AMDGPU::OpName::r128};
+ AMDGPU::OpName::da, AMDGPU::OpName::r128,
+ AMDGPU::OpName::a16, AMDGPU::OpName::dlc};
for (auto op : OperandsToMatch) {
int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op);
@@ -695,7 +716,7 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
static unsigned getBufferFormatWithCompCount(unsigned OldFormat,
unsigned ComponentCount,
- const MCSubtargetInfo &STI) {
+ const GCNSubtarget &STI) {
if (ComponentCount > 4)
return 0;
@@ -719,8 +740,9 @@ static unsigned getBufferFormatWithCompCount(unsigned OldFormat,
}
bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
- const MCSubtargetInfo &STI,
- CombineInfo &Paired) {
+ const GCNSubtarget &STI,
+ CombineInfo &Paired,
+ bool Modify) {
assert(CI.InstClass != MIMG);
// XXX - Would the same offset be OK? Is there any reason this would happen or
@@ -761,7 +783,7 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
CI.UseST64 = false;
CI.BaseOff = 0;
- // Handle SMEM and VMEM instructions.
+ // Handle DS instructions.
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
return (EltOffset0 + CI.Width == EltOffset1 ||
EltOffset1 + Paired.Width == EltOffset0) &&
@@ -769,20 +791,25 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
(CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC == Paired.SLC);
}
+ // Handle SMEM and VMEM instructions.
// If the offset in elements doesn't fit in 8-bits, we might be able to use
// the stride 64 versions.
if ((EltOffset0 % 64 == 0) && (EltOffset1 % 64) == 0 &&
isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64)) {
- CI.Offset = EltOffset0 / 64;
- Paired.Offset = EltOffset1 / 64;
- CI.UseST64 = true;
+ if (Modify) {
+ CI.Offset = EltOffset0 / 64;
+ Paired.Offset = EltOffset1 / 64;
+ CI.UseST64 = true;
+ }
return true;
}
// Check if the new offsets fit in the reduced 8-bit range.
if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1)) {
- CI.Offset = EltOffset0;
- Paired.Offset = EltOffset1;
+ if (Modify) {
+ CI.Offset = EltOffset0;
+ Paired.Offset = EltOffset1;
+ }
return true;
}
@@ -791,15 +818,19 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
CI.BaseOff = std::min(CI.Offset, Paired.Offset);
if ((OffsetDiff % 64 == 0) && isUInt<8>(OffsetDiff / 64)) {
- CI.Offset = (EltOffset0 - CI.BaseOff / CI.EltSize) / 64;
- Paired.Offset = (EltOffset1 - CI.BaseOff / CI.EltSize) / 64;
- CI.UseST64 = true;
+ if (Modify) {
+ CI.Offset = (EltOffset0 - CI.BaseOff / CI.EltSize) / 64;
+ Paired.Offset = (EltOffset1 - CI.BaseOff / CI.EltSize) / 64;
+ CI.UseST64 = true;
+ }
return true;
}
if (isUInt<8>(OffsetDiff)) {
- CI.Offset = EltOffset0 - CI.BaseOff / CI.EltSize;
- Paired.Offset = EltOffset1 - CI.BaseOff / CI.EltSize;
+ if (Modify) {
+ CI.Offset = EltOffset0 - CI.BaseOff / CI.EltSize;
+ Paired.Offset = EltOffset1 - CI.BaseOff / CI.EltSize;
+ }
return true;
}
@@ -824,11 +855,19 @@ bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
}
}
-bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI,
- CombineInfo &Paired) {
- MachineBasicBlock *MBB = CI.I->getParent();
- MachineBasicBlock::iterator E = MBB->end();
- MachineBasicBlock::iterator MBBI = CI.I;
+/// This function assumes that CI comes before Paired in a basic block.
+bool SILoadStoreOptimizer::checkAndPrepareMerge(
+ CombineInfo &CI, CombineInfo &Paired,
+ SmallVectorImpl<MachineInstr *> &InstsToMove) {
+
+ // Check both offsets (or masks for MIMG) can be combined and fit in the
+ // reduced range.
+ if (CI.InstClass == MIMG && !dmasksCanBeCombined(CI, *TII, Paired))
+ return false;
+
+ if (CI.InstClass != MIMG &&
+ (!widthsFit(*STM, CI, Paired) || !offsetsCanBeCombined(CI, *STM, Paired)))
+ return false;
const unsigned Opc = CI.I->getOpcode();
const InstClassEnum InstClass = getInstClass(Opc, *TII);
@@ -844,14 +883,25 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI,
if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm())
return false;
- ++MBBI;
-
- DenseSet<unsigned> RegDefsToMove;
- DenseSet<unsigned> PhysRegUsesToMove;
+ DenseSet<Register> RegDefsToMove;
+ DenseSet<Register> PhysRegUsesToMove;
addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove);
+ MachineBasicBlock::iterator E = std::next(Paired.I);
+ MachineBasicBlock::iterator MBBI = std::next(CI.I);
+ MachineBasicBlock::iterator MBBE = CI.I->getParent()->end();
for (; MBBI != E; ++MBBI) {
+ if (MBBI == MBBE) {
+ // CombineInfo::Order is a hint on the instruction ordering within the
+ // basic block. This hint suggests that CI precedes Paired, which is
+ // true most of the time. However, moveInstsAfter() processing a
+ // previous list may have changed this order in a situation when it
+ // moves an instruction which exists in some other merge list.
+ // In this case it must be dependent.
+ return false;
+ }
+
if ((getInstClass(MBBI->getOpcode(), *TII) != InstClass) ||
(getInstSubclass(MBBI->getOpcode(), *TII) != InstSubclass)) {
// This is not a matching instruction, but we can keep looking as
@@ -868,11 +918,11 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI,
if (MBBI->mayLoadOrStore() &&
(!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
- !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))) {
+ !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA))) {
// We fail condition #1, but we may still be able to satisfy condition
// #2. Add this instruction to the move list and then we will check
// if condition #2 holds once we have selected the matching instruction.
- CI.InstsToMove.push_back(&*MBBI);
+ InstsToMove.push_back(&*MBBI);
addDefsUsesToList(*MBBI, RegDefsToMove, PhysRegUsesToMove);
continue;
}
@@ -881,7 +931,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI,
// to the location of the matched instruction any uses of I will need to
// be moved down as well.
addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove,
- CI.InstsToMove);
+ InstsToMove);
continue;
}
@@ -901,26 +951,24 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI,
// where the DS_READ_B32 ends up in InstsToMove and therefore prevents
// merging of the two writes.
if (addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove,
- CI.InstsToMove))
+ InstsToMove))
continue;
- bool Match = CI.hasSameBaseAddress(*MBBI);
-
- if (Match) {
- Paired.setMI(MBBI, *TII, *STM);
-
- // Check both offsets (or masks for MIMG) can be combined and fit in the
- // reduced range.
- bool canBeCombined =
- CI.InstClass == MIMG
- ? dmasksCanBeCombined(CI, *TII, Paired)
- : widthsFit(*STM, CI, Paired) && offsetsCanBeCombined(CI, *STI, Paired);
-
- // We also need to go through the list of instructions that we plan to
+ if (&*MBBI == &*Paired.I) {
+ // We need to go through the list of instructions that we plan to
// move and make sure they are all safe to move down past the merged
// instruction.
- if (canBeCombined && canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
+ if (canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA)) {
+
+ // Call offsetsCanBeCombined with modify = true so that the offsets are
+ // correct for the new instruction. This should return true, because
+ // this function should only be called on CombineInfo objects that
+ // have already been confirmed to be mergeable.
+ if (CI.InstClass != MIMG)
+ offsetsCanBeCombined(CI, *STM, Paired, true);
return true;
+ }
+ return false;
}
// We've found a load/store that we couldn't merge for some reason.
@@ -929,7 +977,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI,
// down past this instruction.
// check if we can move I across MBBI and if we can move all I's users
if (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
- !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
+ !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA))
break;
}
return false;
@@ -950,7 +998,8 @@ unsigned SILoadStoreOptimizer::read2ST64Opcode(unsigned EltSize) const {
}
MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired) {
+SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove) {
MachineBasicBlock *MBB = CI.I->getParent();
// Be careful, since the addresses could be subregisters themselves in weird
@@ -1023,7 +1072,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired) {
.add(*Dest1)
.addReg(DestReg, RegState::Kill, SubRegIdx1);
- moveInstsAfter(Copy1, CI.InstsToMove);
+ moveInstsAfter(Copy1, InstsToMove);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1049,7 +1098,8 @@ unsigned SILoadStoreOptimizer::write2ST64Opcode(unsigned EltSize) const {
}
MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired) {
+SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove) {
MachineBasicBlock *MBB = CI.I->getParent();
// Be sure to use .addOperand(), and not .addReg() with these. We want to be
@@ -1106,7 +1156,7 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired) {
.addImm(0) // gds
.cloneMergedMemRefs({&*CI.I, &*Paired.I});
- moveInstsAfter(Write2, CI.InstsToMove);
+ moveInstsAfter(Write2, InstsToMove);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1116,7 +1166,8 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired) {
}
MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired) {
+SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
const unsigned Opcode = getNewOpcode(CI, Paired);
@@ -1161,15 +1212,16 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired) {
.add(*Dest1)
.addReg(DestReg, RegState::Kill, SubRegIdx1);
- moveInstsAfter(Copy1, CI.InstsToMove);
+ moveInstsAfter(Copy1, InstsToMove);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
return New;
}
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI, CombineInfo &Paired) {
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
+ CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
const unsigned Opcode = getNewOpcode(CI, Paired);
@@ -1211,15 +1263,16 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI, CombineInfo &Pair
.add(*Dest1)
.addReg(DestReg, RegState::Kill, SubRegIdx1);
- moveInstsAfter(Copy1, CI.InstsToMove);
+ moveInstsAfter(Copy1, InstsToMove);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
return New;
}
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired) {
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
+ CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
@@ -1233,9 +1286,9 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired)
auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg);
- const unsigned Regs = getRegs(Opcode, *TII);
+ AddressRegs Regs = getRegs(Opcode, *TII);
- if (Regs & VADDR)
+ if (Regs.VAddr)
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
// It shouldn't be possible to get this far if the two instructions
@@ -1273,15 +1326,16 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired)
.add(*Dest1)
.addReg(DestReg, RegState::Kill, SubRegIdx1);
- moveInstsAfter(Copy1, CI.InstsToMove);
+ moveInstsAfter(Copy1, InstsToMove);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
return New;
}
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired) {
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
+ CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
@@ -1295,13 +1349,13 @@ SILoadStoreOptimizer::mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired)
auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg);
- const unsigned Regs = getRegs(Opcode, *TII);
+ AddressRegs Regs = getRegs(Opcode, *TII);
- if (Regs & VADDR)
+ if (Regs.VAddr)
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
unsigned JoinedFormat =
- getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STI);
+ getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STM);
// It shouldn't be possible to get this far if the two instructions
// don't have a single memoperand, because MachineInstr::mayAlias()
@@ -1340,15 +1394,16 @@ SILoadStoreOptimizer::mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired)
.add(*Dest1)
.addReg(DestReg, RegState::Kill, SubRegIdx1);
- moveInstsAfter(Copy1, CI.InstsToMove);
+ moveInstsAfter(Copy1, InstsToMove);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
return New;
}
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired) {
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
+ CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
@@ -1374,13 +1429,13 @@ SILoadStoreOptimizer::mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired
auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
- const unsigned Regs = getRegs(Opcode, *TII);
+ AddressRegs Regs = getRegs(Opcode, *TII);
- if (Regs & VADDR)
+ if (Regs.VAddr)
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
unsigned JoinedFormat =
- getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STI);
+ getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STM);
// It shouldn't be possible to get this far if the two instructions
// don't have a single memoperand, because MachineInstr::mayAlias()
@@ -1403,7 +1458,7 @@ SILoadStoreOptimizer::mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired
.addMemOperand(
combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
- moveInstsAfter(MIB, CI.InstsToMove);
+ moveInstsAfter(MIB, InstsToMove);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1491,9 +1546,9 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
case 4:
return &AMDGPU::SGPR_128RegClass;
case 8:
- return &AMDGPU::SReg_256RegClass;
+ return &AMDGPU::SGPR_256RegClass;
case 16:
- return &AMDGPU::SReg_512RegClass;
+ return &AMDGPU::SGPR_512RegClass;
}
} else {
switch (CI.Width + Paired.Width) {
@@ -1509,8 +1564,9 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
}
}
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired) {
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
+ CombineInfo &CI, CombineInfo &Paired,
+ const SmallVectorImpl<MachineInstr *> &InstsToMove) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
@@ -1536,9 +1592,9 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired)
auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
- const unsigned Regs = getRegs(Opcode, *TII);
+ AddressRegs Regs = getRegs(Opcode, *TII);
- if (Regs & VADDR)
+ if (Regs.VAddr)
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
@@ -1561,7 +1617,7 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired)
.addImm(0) // swz
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
- moveInstsAfter(MIB, CI.InstsToMove);
+ moveInstsAfter(MIB, InstsToMove);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1585,7 +1641,7 @@ SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) const {
}
// Compute base address using Addr and return the final register.
-unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
+Register SILoadStoreOptimizer::computeBase(MachineInstr &MI,
const MemAddress &Addr) const {
MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::iterator MBBI = MI.getIterator();
@@ -1644,7 +1700,7 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
// Update base and offset with the NewBase and NewOffset in MI.
void SILoadStoreOptimizer::updateBaseAndOffset(MachineInstr &MI,
- unsigned NewBase,
+ Register NewBase,
int32_t NewOffset) const {
auto Base = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
Base->setReg(NewBase);
@@ -1856,7 +1912,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
<< AnchorAddr.Offset << "\n\n");
// Instead of moving up, just re-compute anchor-instruction's base address.
- unsigned Base = computeBase(MI, AnchorAddr);
+ Register Base = computeBase(MI, AnchorAddr);
updateBaseAndOffset(MI, Base, MAddr.Offset - AnchorAddr.Offset);
LLVM_DEBUG(dbgs() << " After promotion: "; MI.dump(););
@@ -1894,39 +1950,80 @@ void SILoadStoreOptimizer::addInstToMergeableList(const CombineInfo &CI,
MergeableInsts.emplace_back(1, CI);
}
-bool SILoadStoreOptimizer::collectMergeableInsts(MachineBasicBlock &MBB,
- std::list<std::list<CombineInfo> > &MergeableInsts) const {
+std::pair<MachineBasicBlock::iterator, bool>
+SILoadStoreOptimizer::collectMergeableInsts(
+ MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End,
+ MemInfoMap &Visited, SmallPtrSet<MachineInstr *, 4> &AnchorList,
+ std::list<std::list<CombineInfo>> &MergeableInsts) const {
bool Modified = false;
- // Contain the list
- MemInfoMap Visited;
- // Contains the list of instructions for which constant offsets are being
- // promoted to the IMM.
- SmallPtrSet<MachineInstr *, 4> AnchorList;
// Sort potential mergeable instructions into lists. One list per base address.
- for (MachineInstr &MI : MBB.instrs()) {
+ unsigned Order = 0;
+ MachineBasicBlock::iterator BlockI = Begin;
+ for (; BlockI != End; ++BlockI) {
+ MachineInstr &MI = *BlockI;
+
// We run this before checking if an address is mergeable, because it can produce
// better code even if the instructions aren't mergeable.
if (promoteConstantOffsetToImm(MI, Visited, AnchorList))
Modified = true;
+ // Don't combine if volatile. We also won't be able to merge across this, so
+ // break the search. We can look after this barrier for separate merges.
+ if (MI.hasOrderedMemoryRef()) {
+ LLVM_DEBUG(dbgs() << "Breaking search on memory fence: " << MI);
+
+ // Search will resume after this instruction in a separate merge list.
+ ++BlockI;
+ break;
+ }
+
const InstClassEnum InstClass = getInstClass(MI.getOpcode(), *TII);
if (InstClass == UNKNOWN)
continue;
- // Don't combine if volatile.
- if (MI.hasOrderedMemoryRef())
- continue;
-
CombineInfo CI;
CI.setMI(MI, *TII, *STM);
+ CI.Order = Order++;
if (!CI.hasMergeableAddress(*MRI))
continue;
+ LLVM_DEBUG(dbgs() << "Mergeable: " << MI);
+
addInstToMergeableList(CI, MergeableInsts);
}
- return Modified;
+
+ // At this point we have lists of Mergeable instructions.
+ //
+ // Part 2: Sort lists by offset and then for each CombineInfo object in the
+ // list try to find an instruction that can be merged with I. If an instruction
+ // is found, it is stored in the Paired field. If no instructions are found, then
+ // the CombineInfo object is deleted from the list.
+
+ for (std::list<std::list<CombineInfo>>::iterator I = MergeableInsts.begin(),
+ E = MergeableInsts.end(); I != E;) {
+
+ std::list<CombineInfo> &MergeList = *I;
+ if (MergeList.size() <= 1) {
+ // This means we have found only one instruction with a given address
+ // that can be merged, and we need at least 2 instructions to do a merge,
+ // so this list can be discarded.
+ I = MergeableInsts.erase(I);
+ continue;
+ }
+
+ // Sort the lists by offsets, this way mergeable instructions will be
+ // adjacent to each other in the list, which will make it easier to find
+ // matches.
+ MergeList.sort(
+ [] (const CombineInfo &A, CombineInfo &B) {
+ return A.Offset < B.Offset;
+ });
+ ++I;
+ }
+
+ return std::make_pair(BlockI, Modified);
}
// Scan through looking for adjacent LDS operations with constant offsets from
@@ -1936,117 +2033,126 @@ bool SILoadStoreOptimizer::optimizeBlock(
std::list<std::list<CombineInfo> > &MergeableInsts) {
bool Modified = false;
- for (std::list<CombineInfo> &MergeList : MergeableInsts) {
- if (MergeList.size() < 2)
- continue;
+ for (std::list<std::list<CombineInfo>>::iterator I = MergeableInsts.begin(),
+ E = MergeableInsts.end(); I != E;) {
+ std::list<CombineInfo> &MergeList = *I;
bool OptimizeListAgain = false;
if (!optimizeInstsWithSameBaseAddr(MergeList, OptimizeListAgain)) {
- // We weren't able to make any changes, so clear the list so we don't
+ // We weren't able to make any changes, so delete the list so we don't
// process the same instructions the next time we try to optimize this
// block.
- MergeList.clear();
+ I = MergeableInsts.erase(I);
continue;
}
- // We made changes, but also determined that there were no more optimization
- // opportunities, so we don't need to reprocess the list
- if (!OptimizeListAgain)
- MergeList.clear();
-
- OptimizeAgain |= OptimizeListAgain;
Modified = true;
- }
- return Modified;
-}
-void
-SILoadStoreOptimizer::removeCombinedInst(std::list<CombineInfo> &MergeList,
- const MachineInstr &MI) {
-
- for (auto CI = MergeList.begin(), E = MergeList.end(); CI != E; ++CI) {
- if (&*CI->I == &MI) {
- MergeList.erase(CI);
- return;
+ // We made changes, but also determined that there were no more optimization
+ // opportunities, so we don't need to reprocess the list
+ if (!OptimizeListAgain) {
+ I = MergeableInsts.erase(I);
+ continue;
}
+ OptimizeAgain = true;
}
+ return Modified;
}
bool
SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
std::list<CombineInfo> &MergeList,
bool &OptimizeListAgain) {
+ if (MergeList.empty())
+ return false;
+
bool Modified = false;
- for (auto I = MergeList.begin(); I != MergeList.end(); ++I) {
- CombineInfo &CI = *I;
- CombineInfo Paired;
- if (CI.InstClass == UNKNOWN)
- continue;
+ for (auto I = MergeList.begin(), Next = std::next(I); Next != MergeList.end();
+ Next = std::next(I)) {
+
+ auto First = I;
+ auto Second = Next;
+
+ if ((*First).Order > (*Second).Order)
+ std::swap(First, Second);
+ CombineInfo &CI = *First;
+ CombineInfo &Paired = *Second;
- if (!findMatchingInst(CI, Paired))
- goto done;
+ SmallVector<MachineInstr *, 8> InstsToMove;
+ if (!checkAndPrepareMerge(CI, Paired, InstsToMove)) {
+ ++I;
+ continue;
+ }
Modified = true;
- removeCombinedInst(MergeList, *Paired.I);
+
+ LLVM_DEBUG(dbgs() << "Merging: " << *CI.I << " with: " << *Paired.I);
switch (CI.InstClass) {
default:
llvm_unreachable("unknown InstClass");
break;
case DS_READ: {
- MachineBasicBlock::iterator NewMI = mergeRead2Pair(CI, Paired);
+ MachineBasicBlock::iterator NewMI =
+ mergeRead2Pair(CI, Paired, InstsToMove);
CI.setMI(NewMI, *TII, *STM);
break;
}
case DS_WRITE: {
- MachineBasicBlock::iterator NewMI = mergeWrite2Pair(CI, Paired);
+ MachineBasicBlock::iterator NewMI =
+ mergeWrite2Pair(CI, Paired, InstsToMove);
CI.setMI(NewMI, *TII, *STM);
break;
}
case S_BUFFER_LOAD_IMM: {
- MachineBasicBlock::iterator NewMI = mergeSBufferLoadImmPair(CI, Paired);
+ MachineBasicBlock::iterator NewMI =
+ mergeSBufferLoadImmPair(CI, Paired, InstsToMove);
CI.setMI(NewMI, *TII, *STM);
OptimizeListAgain |= (CI.Width + Paired.Width) < 16;
break;
}
case BUFFER_LOAD: {
- MachineBasicBlock::iterator NewMI = mergeBufferLoadPair(CI, Paired);
+ MachineBasicBlock::iterator NewMI =
+ mergeBufferLoadPair(CI, Paired, InstsToMove);
CI.setMI(NewMI, *TII, *STM);
OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
}
case BUFFER_STORE: {
- MachineBasicBlock::iterator NewMI = mergeBufferStorePair(CI, Paired);
+ MachineBasicBlock::iterator NewMI =
+ mergeBufferStorePair(CI, Paired, InstsToMove);
CI.setMI(NewMI, *TII, *STM);
OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
}
case MIMG: {
- MachineBasicBlock::iterator NewMI = mergeImagePair(CI, Paired);
+ MachineBasicBlock::iterator NewMI =
+ mergeImagePair(CI, Paired, InstsToMove);
CI.setMI(NewMI, *TII, *STM);
OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
}
case TBUFFER_LOAD: {
- MachineBasicBlock::iterator NewMI = mergeTBufferLoadPair(CI, Paired);
+ MachineBasicBlock::iterator NewMI =
+ mergeTBufferLoadPair(CI, Paired, InstsToMove);
CI.setMI(NewMI, *TII, *STM);
OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
}
case TBUFFER_STORE: {
- MachineBasicBlock::iterator NewMI = mergeTBufferStorePair(CI, Paired);
+ MachineBasicBlock::iterator NewMI =
+ mergeTBufferStorePair(CI, Paired, InstsToMove);
CI.setMI(NewMI, *TII, *STM);
OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
}
}
+ CI.Order = Paired.Order;
+ if (I == Second)
+ I = Next;
-done:
- // Clear the InstsToMove after we have finished searching so we don't have
- // stale values left over if we search for this CI again in another pass
- // over the block.
- CI.InstsToMove.clear();
+ MergeList.erase(Second);
}
return Modified;
@@ -2062,26 +2168,41 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
TII = STM->getInstrInfo();
TRI = &TII->getRegisterInfo();
- STI = &MF.getSubtarget<MCSubtargetInfo>();
MRI = &MF.getRegInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- assert(MRI->isSSA() && "Must be run on SSA");
-
LLVM_DEBUG(dbgs() << "Running SILoadStoreOptimizer\n");
bool Modified = false;
+ // Contains the list of instructions for which constant offsets are being
+ // promoted to the IMM. This is tracked for an entire block at time.
+ SmallPtrSet<MachineInstr *, 4> AnchorList;
+ MemInfoMap Visited;
for (MachineBasicBlock &MBB : MF) {
- std::list<std::list<CombineInfo> > MergeableInsts;
- // First pass: Collect list of all instructions we know how to merge.
- Modified |= collectMergeableInsts(MBB, MergeableInsts);
- do {
- OptimizeAgain = false;
- Modified |= optimizeBlock(MergeableInsts);
- } while (OptimizeAgain);
+ MachineBasicBlock::iterator SectionEnd;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;
+ I = SectionEnd) {
+ bool CollectModified;
+ std::list<std::list<CombineInfo>> MergeableInsts;
+
+ // First pass: Collect list of all instructions we know how to merge in a
+ // subset of the block.
+ std::tie(SectionEnd, CollectModified) =
+ collectMergeableInsts(I, E, Visited, AnchorList, MergeableInsts);
+
+ Modified |= CollectModified;
+
+ do {
+ OptimizeAgain = false;
+ Modified |= optimizeBlock(MergeableInsts);
+ } while (OptimizeAgain);
+ }
+
+ Visited.clear();
+ AnchorList.clear();
}
return Modified;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index bf052dc3c930..36d52ac3ee89 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -38,8 +38,8 @@
/// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch
///
/// label0:
-/// %sgpr0 = S_OR_SAVEEXEC_B64 %exec // Restore the exec mask for the Then block
-/// %exec = S_XOR_B64 %sgpr0, %exec // Clear live bits from saved exec mask
+/// %sgpr0 = S_OR_SAVEEXEC_B64 %sgpr0 // Restore the exec mask for the Then block
+/// %exec = S_XOR_B64 %sgpr0, %exec // Update the exec mask
/// S_BRANCH_EXECZ label1 // Use our branch optimization
/// // instruction again.
/// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr // Do the THEN block
@@ -51,6 +51,8 @@
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -73,6 +75,10 @@ using namespace llvm;
#define DEBUG_TYPE "si-lower-control-flow"
+static cl::opt<bool>
+RemoveRedundantEndcf("amdgpu-remove-redundant-endcf",
+ cl::init(true), cl::ReallyHidden);
+
namespace {
class SILowerControlFlow : public MachineFunctionPass {
@@ -81,8 +87,12 @@ private:
const SIInstrInfo *TII = nullptr;
LiveIntervals *LIS = nullptr;
MachineRegisterInfo *MRI = nullptr;
+ SetVector<MachineInstr*> LoweredEndCf;
+ DenseSet<Register> LoweredIf;
+ SmallSet<MachineInstr *, 16> NeedsKillCleanup;
const TargetRegisterClass *BoolRC = nullptr;
+ bool InsertKillCleanups;
unsigned AndOpc;
unsigned OrOpc;
unsigned XorOpc;
@@ -98,13 +108,23 @@ private:
void emitLoop(MachineInstr &MI);
void emitEndCf(MachineInstr &MI);
- Register getSaveExec(MachineInstr* MI);
-
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
SmallVectorImpl<MachineOperand> &Src) const;
void combineMasks(MachineInstr &MI);
+ void process(MachineInstr &MI);
+
+ // Skip to the next instruction, ignoring debug instructions, and trivial
+ // block boundaries (blocks that have one (typically fallthrough) successor,
+ // and the successor has one predecessor.
+ MachineBasicBlock::iterator
+ skipIgnoreExecInstsTrivialSucc(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator It) const;
+
+ // Remove redundant SI_END_CF instructions.
+ void optimizeEndCf();
+
public:
static char ID;
@@ -144,62 +164,44 @@ static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) {
char &llvm::SILowerControlFlowID = SILowerControlFlow::ID;
-static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
- const SIInstrInfo *TII) {
- Register SaveExecReg = MI.getOperand(0).getReg();
- auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
-
- if (U == MRI->use_instr_nodbg_end() ||
- std::next(U) != MRI->use_instr_nodbg_end() ||
- U->getOpcode() != AMDGPU::SI_END_CF)
- return false;
-
- // Check for SI_KILL_*_TERMINATOR on path from if to endif.
- // if there is any such terminator simplififcations are not safe.
- auto SMBB = MI.getParent();
- auto EMBB = U->getParent();
+static bool hasKill(const MachineBasicBlock *Begin,
+ const MachineBasicBlock *End, const SIInstrInfo *TII) {
DenseSet<const MachineBasicBlock*> Visited;
- SmallVector<MachineBasicBlock*, 4> Worklist(SMBB->succ_begin(),
- SMBB->succ_end());
+ SmallVector<MachineBasicBlock *, 4> Worklist(Begin->succ_begin(),
+ Begin->succ_end());
while (!Worklist.empty()) {
MachineBasicBlock *MBB = Worklist.pop_back_val();
- if (MBB == EMBB || !Visited.insert(MBB).second)
+ if (MBB == End || !Visited.insert(MBB).second)
continue;
- for(auto &Term : MBB->terminators())
+ for (auto &Term : MBB->terminators())
if (TII->isKillTerminator(Term.getOpcode()))
- return false;
+ return true;
Worklist.append(MBB->succ_begin(), MBB->succ_end());
}
- return true;
+ return false;
}
-Register SILowerControlFlow::getSaveExec(MachineInstr *MI) {
- MachineBasicBlock *MBB = MI->getParent();
- MachineOperand &SaveExec = MI->getOperand(0);
- assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister);
-
- Register SaveExecReg = SaveExec.getReg();
- unsigned FalseTermOpc =
- TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
- MachineBasicBlock::iterator I = (MI);
- MachineBasicBlock::iterator J = std::next(I);
- if (J != MBB->end() && J->getOpcode() == FalseTermOpc &&
- J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) {
- SaveExecReg = J->getOperand(0).getReg();
- J->eraseFromParent();
- }
- return SaveExecReg;
+static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI) {
+ Register SaveExecReg = MI.getOperand(0).getReg();
+ auto U = MRI->use_instr_nodbg_begin(SaveExecReg);
+
+ if (U == MRI->use_instr_nodbg_end() ||
+ std::next(U) != MRI->use_instr_nodbg_end() ||
+ U->getOpcode() != AMDGPU::SI_END_CF)
+ return false;
+
+ return true;
}
void SILowerControlFlow::emitIf(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
- Register SaveExecReg = getSaveExec(&MI);
+ Register SaveExecReg = MI.getOperand(0).getReg();
MachineOperand& Cond = MI.getOperand(1);
assert(Cond.getSubReg() == AMDGPU::NoSubRegister);
@@ -209,7 +211,35 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
// If there is only one use of save exec register and that use is SI_END_CF,
// we can optimize SI_IF by returning the full saved exec mask instead of
// just cleared bits.
- bool SimpleIf = isSimpleIf(MI, MRI, TII);
+ bool SimpleIf = isSimpleIf(MI, MRI);
+
+ if (InsertKillCleanups) {
+ // Check for SI_KILL_*_TERMINATOR on full path of control flow and
+ // flag the associated SI_END_CF for insertion of a kill cleanup.
+ auto UseMI = MRI->use_instr_nodbg_begin(SaveExecReg);
+ while (UseMI->getOpcode() != AMDGPU::SI_END_CF) {
+ assert(std::next(UseMI) == MRI->use_instr_nodbg_end());
+ assert(UseMI->getOpcode() == AMDGPU::SI_ELSE);
+ MachineOperand &NextExec = UseMI->getOperand(0);
+ Register NextExecReg = NextExec.getReg();
+ if (NextExec.isDead()) {
+ assert(!SimpleIf);
+ break;
+ }
+ UseMI = MRI->use_instr_nodbg_begin(NextExecReg);
+ }
+ if (UseMI->getOpcode() == AMDGPU::SI_END_CF) {
+ if (hasKill(MI.getParent(), UseMI->getParent(), TII)) {
+ NeedsKillCleanup.insert(&*UseMI);
+ SimpleIf = false;
+ }
+ }
+ } else if (SimpleIf) {
+ // Check for SI_KILL_*_TERMINATOR on path from if to endif.
+ // if there is any such terminator simplifications are not safe.
+ auto UseMI = MRI->use_instr_nodbg_begin(SaveExecReg);
+ SimpleIf = !hasKill(MI.getParent(), UseMI->getParent(), TII);
+ }
// Add an implicit def of exec to discourage scheduling VALU after this which
// will interfere with trying to form s_and_saveexec_b64 later.
@@ -219,6 +249,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg)
.addReg(Exec)
.addReg(Exec, RegState::ImplicitDefine);
+ LoweredIf.insert(CopyReg);
Register Tmp = MRI->createVirtualRegister(BoolRC);
@@ -244,9 +275,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
.addReg(Tmp, RegState::Kill);
- // Insert a pseudo terminator to help keep the verifier happy. This will also
- // be used later when inserting skips.
- MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+ // Insert the S_CBRANCH_EXECZ instruction which will be optimized later
+ // during SIRemoveShortExecBranches.
+ MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
.add(MI.getOperand(2));
if (!LIS) {
@@ -282,7 +313,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
- Register DstReg = getSaveExec(&MI);
+ Register DstReg = MI.getOperand(0).getReg();
bool ExecModified = MI.getOperand(3).getImm() != 0;
MachineBasicBlock::iterator Start = MBB.begin();
@@ -323,8 +354,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
.addReg(DstReg);
MachineInstr *Branch =
- BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
- .addMBB(DestBB);
+ BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+ .addMBB(DestBB);
if (!LIS) {
MI.eraseFromParent();
@@ -354,7 +385,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
- auto Dst = getSaveExec(&MI);
+ auto Dst = MI.getOperand(0).getReg();
// Skip ANDing with exec if the break condition is already masked by exec
// because it is a V_CMP in the same basic block. (We know the break
@@ -416,6 +447,38 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
MI.eraseFromParent();
}
+MachineBasicBlock::iterator
+SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
+
+ SmallSet<const MachineBasicBlock *, 4> Visited;
+ MachineBasicBlock *B = &MBB;
+ do {
+ if (!Visited.insert(B).second)
+ return MBB.end();
+
+ auto E = B->end();
+ for ( ; It != E; ++It) {
+ if (It->getOpcode() == AMDGPU::SI_KILL_CLEANUP)
+ continue;
+ if (TII->mayReadEXEC(*MRI, *It))
+ break;
+ }
+
+ if (It != E)
+ return It;
+
+ if (B->succ_size() != 1)
+ return MBB.end();
+
+ // If there is one trivial successor, advance to the next block.
+ MachineBasicBlock *Succ = *B->succ_begin();
+
+ It = Succ->begin();
+ B = Succ;
+ } while (true);
+}
+
void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -430,8 +493,20 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
.addReg(Exec)
.add(MI.getOperand(0));
- if (LIS)
+ LoweredEndCf.insert(NewMI);
+
+ // If this ends control flow which contains kills (as flagged in emitIf)
+ // then insert an SI_KILL_CLEANUP immediately following the exec mask
+ // manipulation. This can be lowered to early termination if appropriate.
+ MachineInstr *CleanUpMI = nullptr;
+ if (NeedsKillCleanup.count(&MI))
+ CleanUpMI = BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::SI_KILL_CLEANUP));
+
+ if (LIS) {
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+ if (CleanUpMI)
+ LIS->InsertMachineInstrInMaps(*CleanUpMI);
+ }
MI.eraseFromParent();
@@ -494,6 +569,84 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) {
MRI->getUniqueVRegDef(Reg)->eraseFromParent();
}
+void SILowerControlFlow::optimizeEndCf() {
+ // If the only instruction immediately following this END_CF is an another
+ // END_CF in the only successor we can avoid emitting exec mask restore here.
+ if (!RemoveRedundantEndcf)
+ return;
+
+ for (MachineInstr *MI : LoweredEndCf) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ auto Next =
+ skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator()));
+ if (Next == MBB.end() || !LoweredEndCf.count(&*Next))
+ continue;
+ // Only skip inner END_CF if outer ENDCF belongs to SI_IF.
+ // If that belongs to SI_ELSE then saved mask has an inverted value.
+ Register SavedExec
+ = TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
+ assert(SavedExec.isVirtual() && "Expected saved exec to be src1!");
+
+ const MachineInstr *Def = MRI->getUniqueVRegDef(SavedExec);
+ if (Def && LoweredIf.count(SavedExec)) {
+ LLVM_DEBUG(dbgs() << "Skip redundant "; MI->dump());
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+ }
+}
+
+void SILowerControlFlow::process(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineBasicBlock::iterator I(MI);
+ MachineInstr *Prev = (I != MBB.begin()) ? &*(std::prev(I)) : nullptr;
+
+ switch (MI.getOpcode()) {
+ case AMDGPU::SI_IF:
+ emitIf(MI);
+ break;
+
+ case AMDGPU::SI_ELSE:
+ emitElse(MI);
+ break;
+
+ case AMDGPU::SI_IF_BREAK:
+ emitIfBreak(MI);
+ break;
+
+ case AMDGPU::SI_LOOP:
+ emitLoop(MI);
+ break;
+
+ case AMDGPU::SI_END_CF:
+ emitEndCf(MI);
+ break;
+
+ default:
+ assert(false && "Attempt to process unsupported instruction");
+ break;
+ }
+
+ MachineBasicBlock::iterator Next;
+ for (I = Prev ? Prev->getIterator() : MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MaskMI = *I;
+ switch (MaskMI.getOpcode()) {
+ case AMDGPU::S_AND_B64:
+ case AMDGPU::S_OR_B64:
+ case AMDGPU::S_AND_B32:
+ case AMDGPU::S_OR_B32:
+ // Cleanup bit manipulations on exec mask
+ combineMasks(MaskMI);
+ break;
+ default:
+ I = MBB.end();
+ break;
+ }
+ }
+}
+
bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
@@ -503,6 +656,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
LIS = getAnalysisIfAvailable<LiveIntervals>();
MRI = &MF.getRegInfo();
BoolRC = TRI->getBoolRC();
+ InsertKillCleanups =
+ MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS;
if (ST.isWave32()) {
AndOpc = AMDGPU::S_AND_B32;
@@ -524,57 +679,49 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
Exec = AMDGPU::EXEC;
}
+ SmallVector<MachineInstr *, 32> Worklist;
+
MachineFunction::iterator NextBB;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; BI = NextBB) {
NextBB = std::next(BI);
MachineBasicBlock &MBB = *BI;
- MachineBasicBlock::iterator I, Next, Last;
-
- for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
case AMDGPU::SI_IF:
- emitIf(MI);
+ process(MI);
break;
case AMDGPU::SI_ELSE:
- emitElse(MI);
- break;
-
case AMDGPU::SI_IF_BREAK:
- emitIfBreak(MI);
- break;
-
case AMDGPU::SI_LOOP:
- emitLoop(MI);
- break;
-
case AMDGPU::SI_END_CF:
- emitEndCf(MI);
+ // Only build worklist if SI_IF instructions must be processed first.
+ if (InsertKillCleanups)
+ Worklist.push_back(&MI);
+ else
+ process(MI);
break;
- case AMDGPU::S_AND_B64:
- case AMDGPU::S_OR_B64:
- case AMDGPU::S_AND_B32:
- case AMDGPU::S_OR_B32:
- // Cleanup bit manipulations on exec mask
- combineMasks(MI);
- Last = I;
- continue;
-
default:
- Last = I;
- continue;
+ break;
}
-
- // Replay newly inserted code to combine masks
- Next = (Last == MBB.end()) ? MBB.begin() : Last;
}
}
+ for (MachineInstr *MI : Worklist)
+ process(*MI);
+
+ optimizeEndCf();
+
+ LoweredEndCf.clear();
+ LoweredIf.clear();
+ NeedsKillCleanup.clear();
+
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 1d45e6241d22..236a24a02ece 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -452,6 +452,11 @@ static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
/// all others, because phi lowering looks through copies and can therefore
/// often make copy lowering unnecessary.
bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
+ // Only need to run this in SelectionDAG path.
+ if (TheMF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected))
+ return false;
+
MF = &TheMF;
MRI = &MF->getRegInfo();
DT = &getAnalysis<MachineDominatorTree>();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 57ccf7641666..1349d3b6bf3f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -100,7 +100,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
unsigned Reg = CS.getReg();
MachineInstrSpan MIS(I, &SaveBlock);
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, MVT::i32);
TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
TRI);
@@ -118,7 +119,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
/// Insert restore code for the callee-saved registers used in the function.
static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
- std::vector<CalleeSavedInfo> &CSI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
LiveIntervals *LIS) {
MachineFunction &MF = *RestoreBlock.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
@@ -133,7 +134,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
for (const CalleeSavedInfo &CI : reverse(CSI)) {
unsigned Reg = CI.getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, MVT::i32);
TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
assert(I != RestoreBlock.begin() &&
@@ -206,10 +208,10 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
for (unsigned I = 0; CSRegs[I]; ++I) {
unsigned Reg = CSRegs[I];
if (SavedRegs.test(Reg)) {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, MVT::i32);
int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
- TRI->getSpillAlignment(*RC),
- true);
+ TRI->getSpillAlign(*RC), true);
CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
}
@@ -228,6 +230,47 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
return false;
}
+// Find lowest available VGPR and use it as VGPR reserved for SGPR spills.
+static bool lowerShiftReservedVGPR(MachineFunction &MF,
+ const GCNSubtarget &ST) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ Register LowestAvailableVGPR, ReservedVGPR;
+ ArrayRef<MCPhysReg> AllVGPR32s = ST.getRegisterInfo()->getAllVGPR32(MF);
+ for (MCPhysReg Reg : AllVGPR32s) {
+ if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) {
+ LowestAvailableVGPR = Reg;
+ break;
+ }
+ }
+
+ if (!LowestAvailableVGPR)
+ return false;
+
+ ReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill;
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
+ int i = 0;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
+ if (Reg.VGPR == ReservedVGPR) {
+ MBB.removeLiveIn(ReservedVGPR);
+ MBB.addLiveIn(LowestAvailableVGPR);
+ Optional<int> FI;
+ if (FuncInfo->isCalleeSavedReg(CSRegs, LowestAvailableVGPR))
+ FI = FrameInfo.CreateSpillStackObject(4, Align(4));
+
+ FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, i);
+ }
+ ++i;
+ }
+ MBB.sortUniqueLiveIns();
+ }
+
+ return true;
+}
+
bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
@@ -267,6 +310,9 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
//
// This operates under the assumption that only other SGPR spills are users
// of the frame index.
+
+ lowerShiftReservedVGPR(MF, ST);
+
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator Next;
for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
@@ -315,6 +361,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
}
MadeChange = true;
+ } else if (FuncInfo->VGPRReservedForSGPRSpill) {
+ FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
}
SaveBlocks.clear();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 0c67b1467a5d..788e9873f780 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -8,6 +8,7 @@
#include "SIMachineFunctionInfo.h"
#include "AMDGPUArgumentUsageInfo.h"
+#include "AMDGPUTargetMachine.h"
#include "AMDGPUSubtarget.h"
#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -52,9 +53,18 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
WavesPerEU = ST.getWavesPerEU(F);
- Occupancy = ST.computeOccupancy(MF, getLDSSize());
+ Occupancy = ST.computeOccupancy(F, getLDSSize());
CallingConv::ID CC = F.getCallingConv();
+ // FIXME: Should have analysis or something rather than attribute to detect
+ // calls.
+ const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
+
+ // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
+ // have any calls.
+ const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
+ (!isEntryFunction() || HasCalls);
+
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
if (!F.arg_empty())
KernargSegmentPtr = true;
@@ -68,16 +78,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
// Non-entry functions have no special inputs for now, other registers
// required for scratch access.
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
- ScratchWaveOffsetReg = AMDGPU::SGPR33;
// TODO: Pick a high register, and shift down, similar to a kernel.
- FrameOffsetReg = AMDGPU::SGPR34;
+ FrameOffsetReg = AMDGPU::SGPR33;
StackPtrOffsetReg = AMDGPU::SGPR32;
ArgInfo.PrivateSegmentBuffer =
ArgDescriptor::createRegister(ScratchRSrcReg);
- ArgInfo.PrivateSegmentWaveByteOffset =
- ArgDescriptor::createRegister(ScratchWaveOffsetReg);
if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
ImplicitArgPtr = true;
@@ -89,27 +96,35 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
}
- if (F.hasFnAttribute("amdgpu-work-group-id-x"))
+ if (UseFixedABI) {
WorkGroupIDX = true;
-
- if (F.hasFnAttribute("amdgpu-work-group-id-y"))
WorkGroupIDY = true;
-
- if (F.hasFnAttribute("amdgpu-work-group-id-z"))
WorkGroupIDZ = true;
-
- if (F.hasFnAttribute("amdgpu-work-item-id-x"))
WorkItemIDX = true;
-
- if (F.hasFnAttribute("amdgpu-work-item-id-y"))
WorkItemIDY = true;
-
- if (F.hasFnAttribute("amdgpu-work-item-id-z"))
WorkItemIDZ = true;
+ ImplicitArgPtr = true;
+ } else {
+ if (F.hasFnAttribute("amdgpu-work-group-id-x"))
+ WorkGroupIDX = true;
+
+ if (F.hasFnAttribute("amdgpu-work-group-id-y"))
+ WorkGroupIDY = true;
+
+ if (F.hasFnAttribute("amdgpu-work-group-id-z"))
+ WorkGroupIDZ = true;
+
+ if (F.hasFnAttribute("amdgpu-work-item-id-x"))
+ WorkItemIDX = true;
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- bool HasStackObjects = FrameInfo.hasStackObjects();
+ if (F.hasFnAttribute("amdgpu-work-item-id-y"))
+ WorkItemIDY = true;
+
+ if (F.hasFnAttribute("amdgpu-work-item-id-z"))
+ WorkItemIDZ = true;
+ }
+ bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
if (isEntryFunction()) {
// X, XY, and XYZ are the only supported combinations, so make sure Y is
// enabled if Z is.
@@ -129,36 +144,34 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (isAmdHsaOrMesa) {
PrivateSegmentBuffer = true;
- if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
+ if (UseFixedABI) {
DispatchPtr = true;
-
- if (F.hasFnAttribute("amdgpu-queue-ptr"))
QueuePtr = true;
- if (F.hasFnAttribute("amdgpu-dispatch-id"))
+ // FIXME: We don't need this?
DispatchID = true;
+ } else {
+ if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
+ DispatchPtr = true;
+
+ if (F.hasFnAttribute("amdgpu-queue-ptr"))
+ QueuePtr = true;
+
+ if (F.hasFnAttribute("amdgpu-dispatch-id"))
+ DispatchID = true;
+ }
} else if (ST.isMesaGfxShader(F)) {
ImplicitBufferPtr = true;
}
- if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
+ if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
KernargSegmentPtr = true;
if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
- auto hasNonSpillStackObjects = [&]() {
- // Avoid expensive checking if there's no stack objects.
- if (!HasStackObjects)
- return false;
- for (auto OI = FrameInfo.getObjectIndexBegin(),
- OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI)
- if (!FrameInfo.isSpillSlotObjectIndex(OI))
- return true;
- // All stack objects are spill slots.
- return false;
- };
// TODO: This could be refined a lot. The attribute is a poor way of
- // detecting calls that may require it before argument lowering.
- if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch"))
+ // detecting calls or stack objects that may require it before argument
+ // lowering.
+ if (HasCalls || HasStackObjects)
FlatScratchInit = true;
}
@@ -184,7 +197,7 @@ void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
MF.getFunction()));
}
-unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
+Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
const SIRegisterInfo &TRI) {
ArgInfo.PrivateSegmentBuffer =
ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
@@ -193,21 +206,21 @@ unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
return ArgInfo.PrivateSegmentBuffer.getRegister();
}
-unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
+Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
return ArgInfo.DispatchPtr.getRegister();
}
-unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
+Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
return ArgInfo.QueuePtr.getRegister();
}
-unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
+Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
ArgInfo.KernargSegmentPtr
= ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
@@ -215,28 +228,29 @@ unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI)
return ArgInfo.KernargSegmentPtr.getRegister();
}
-unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
+Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
return ArgInfo.DispatchID.getRegister();
}
-unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
+Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
return ArgInfo.FlatScratchInit.getRegister();
}
-unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
+Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
return ArgInfo.ImplicitBufferPtr.getRegister();
}
-static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
+bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
+ MCPhysReg Reg) {
for (unsigned I = 0; CSRegs[I]; ++I) {
if (CSRegs[I] == Reg)
return true;
@@ -270,22 +284,35 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned WaveSize = ST.getWavefrontSize();
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
unsigned Size = FrameInfo.getObjectSize(FI);
- assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
- assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
+ unsigned NumLanes = Size / 4;
+
+ if (NumLanes > WaveSize)
+ return false;
- int NumLanes = Size / 4;
+ assert(Size >= 4 && "invalid sgpr spill size");
+ assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
// Make sure to handle the case where a wide SGPR spill may span between two
// VGPRs.
- for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
- unsigned LaneVGPR;
+ for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
+ Register LaneVGPR;
unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
- if (VGPRIndex == 0) {
+ // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
+ // when one of the two conditions is true:
+ // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
+ // reserved.
+ // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
+ // required.
+ if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
+ assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
+ LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
+ } else if (VGPRIndex == 0) {
LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
if (LaneVGPR == AMDGPU::NoRegister) {
// We have no VGPRs left for spilling SGPRs. Reset because we will not
@@ -298,7 +325,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
Optional<int> CSRSpillFI;
if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
isCalleeSavedReg(CSRegs, LaneVGPR)) {
- CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
+ CSRSpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
}
SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
@@ -317,6 +344,19 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
return true;
}
+/// Reserve a VGPR for spilling of SGPRs
+bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+
+ Register LaneVGPR = TRI->findUnusedRegister(
+ MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
+ SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None));
+ FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
+ return true;
+}
+
/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
/// Either AGPR is spilled to VGPR to vice versa.
/// Returns true if a \p FI can be eliminated completely.
@@ -386,9 +426,9 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
}
void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
- // The FP spill hasn't been inserted yet, so keep it around.
+ // The FP & BP spills haven't been inserted yet, so keep them around.
for (auto &R : SGPRToVGPRSpills) {
- if (R.first != FramePointerSaveIndex)
+ if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
MFI.RemoveStackObject(R.first);
}
@@ -396,7 +436,7 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
// ID.
for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
++i)
- if (i != FramePointerSaveIndex)
+ if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
MFI.setStackID(i, TargetStackID::Default);
for (auto &R : VGPRToAGPRSpills) {
@@ -414,7 +454,28 @@ MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
}
-static yaml::StringValue regToString(unsigned Reg,
+Register
+SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ if (!ST.isAmdPalOS())
+ return Register();
+ Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
+ if (ST.hasMergedShaders()) {
+ switch (MF.getFunction().getCallingConv()) {
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_GS:
+ // Low GIT address is passed in s8 rather than s0 for an LS+HS or
+ // ES+GS merged shader on gfx9+.
+ GitPtrLo = AMDGPU::SGPR8;
+ return GitPtrLo;
+ default:
+ return GitPtrLo;
+ }
+ }
+ return GitPtrLo;
+}
+
+static yaml::StringValue regToString(Register Reg,
const TargetRegisterInfo &TRI) {
yaml::StringValue Dest;
{
@@ -487,7 +548,6 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
WaveLimiter(MFI.needsWaveLimiter()),
HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
- ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
@@ -509,3 +569,21 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
WaveLimiter = YamlMFI.WaveLimiter;
return false;
}
+
+// Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
+bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
+ MachineFunction &MF) {
+ for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
+ if (i->VGPR == ReservedVGPR) {
+ SpillVGPRs.erase(i);
+
+ for (MachineBasicBlock &MBB : MF) {
+ MBB.removeLiveIn(ReservedVGPR);
+ MBB.sortUniqueLiveIns();
+ }
+ this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ef0186f7d57f..cf1629fda0af 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -236,23 +236,29 @@ template <> struct MappingTraits<SIArgumentInfo> {
struct SIMode {
bool IEEE = true;
bool DX10Clamp = true;
- bool FP32Denormals = true;
- bool FP64FP16Denormals = true;
+ bool FP32InputDenormals = true;
+ bool FP32OutputDenormals = true;
+ bool FP64FP16InputDenormals = true;
+ bool FP64FP16OutputDenormals = true;
SIMode() = default;
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
IEEE = Mode.IEEE;
DX10Clamp = Mode.DX10Clamp;
- FP32Denormals = Mode.FP32Denormals;
- FP64FP16Denormals = Mode.FP64FP16Denormals;
+ FP32InputDenormals = Mode.FP32InputDenormals;
+ FP32OutputDenormals = Mode.FP32OutputDenormals;
+ FP64FP16InputDenormals = Mode.FP64FP16InputDenormals;
+ FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;
}
bool operator ==(const SIMode Other) const {
return IEEE == Other.IEEE &&
DX10Clamp == Other.DX10Clamp &&
- FP32Denormals == Other.FP32Denormals &&
- FP64FP16Denormals == Other.FP64FP16Denormals;
+ FP32InputDenormals == Other.FP32InputDenormals &&
+ FP32OutputDenormals == Other.FP32OutputDenormals &&
+ FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
+ FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
}
};
@@ -260,8 +266,10 @@ template <> struct MappingTraits<SIMode> {
static void mapping(IO &YamlIO, SIMode &Mode) {
YamlIO.mapOptional("ieee", Mode.IEEE, true);
YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
- YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true);
- YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true);
+ YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
+ YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
+ YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
+ YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
}
};
@@ -276,7 +284,6 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
uint32_t HighBitsOf32BitAddress = 0;
StringValue ScratchRSrcReg = "$private_rsrc_reg";
- StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
StringValue FrameOffsetReg = "$fp_reg";
StringValue StackPtrOffsetReg = "$sp_reg";
@@ -303,8 +310,6 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
StringValue("$private_rsrc_reg"));
- YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
- StringValue("$scratch_wave_offset_reg"));
YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
StringValue("$fp_reg"));
YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
@@ -323,20 +328,20 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
friend class GCNTargetMachine;
- unsigned TIDReg = AMDGPU::NoRegister;
+ Register TIDReg = AMDGPU::NoRegister;
// Registers that may be reserved for spilling purposes. These may be the same
// as the input registers.
- unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
- unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
+ Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
- // This is the current function's incremented size from the kernel's scratch
- // wave offset register. For an entry function, this is exactly the same as
- // the ScratchWaveOffsetReg.
- unsigned FrameOffsetReg = AMDGPU::FP_REG;
+ // This is the the unswizzled offset from the current dispatch's scratch wave
+ // base to the beginning of the current function's frame.
+ Register FrameOffsetReg = AMDGPU::FP_REG;
- // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
- unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
+ // This is an ABI register used in the non-entry calling convention to
+ // communicate the unswizzled offset from the current dispatch's scratch wave
+ // base to the beginning of the new function's frame.
+ Register StackPtrOffsetReg = AMDGPU::SP_REG;
AMDGPUFunctionArgInfo ArgInfo;
@@ -429,11 +434,11 @@ private:
public:
struct SpilledReg {
- unsigned VGPR = 0;
+ Register VGPR;
int Lane = -1;
SpilledReg() = default;
- SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
+ SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}
bool hasLane() { return Lane != -1;}
bool hasReg() { return VGPR != 0;}
@@ -441,13 +446,13 @@ public:
struct SGPRSpillVGPRCSR {
// VGPR used for SGPR spills
- unsigned VGPR;
+ Register VGPR;
// If the VGPR is a CSR, the stack slot used to save/restore it in the
// prolog/epilog.
Optional<int> FI;
- SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
+ SGPRSpillVGPRCSR(Register V, Optional<int> F) : VGPR(V), FI(F) {}
};
struct VGPRSpillToAGPR {
@@ -457,12 +462,9 @@ public:
SparseBitVector<> WWMReservedRegs;
- void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
+ void ReserveWWMRegister(Register Reg) { WWMReservedRegs.set(Reg); }
private:
- // SGPR->VGPR spilling support.
- using SpillRegMask = std::pair<unsigned, unsigned>;
-
// Track VGPR + wave index for each subregister of the SGPR spilled to
// frameindex key.
DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
@@ -480,9 +482,17 @@ private:
public: // FIXME
/// If this is set, an SGPR used for save/restore of the register used for the
/// frame pointer.
- unsigned SGPRForFPSaveRestoreCopy = 0;
+ Register SGPRForFPSaveRestoreCopy;
Optional<int> FramePointerSaveIndex;
+ /// If this is set, an SGPR used for save/restore of the register used for the
+ /// base pointer.
+ Register SGPRForBPSaveRestoreCopy;
+ Optional<int> BasePointerSaveIndex;
+
+ Register VGPRReservedForSGPRSpill;
+ bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
+
public:
SIMachineFunctionInfo(const MachineFunction &MF);
@@ -498,6 +508,14 @@ public:
return SpillVGPRs;
}
+ void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
+ SpillVGPRs[Index].VGPR = NewVGPR;
+ SpillVGPRs[Index].FI = newFI;
+ VGPRReservedForSGPRSpill = NewVGPR;
+ }
+
+ bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
+
ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
return SpillAGPR;
}
@@ -515,12 +533,13 @@ public:
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
+ bool reserveVGPRforSGPRSpills(MachineFunction &MF);
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
void removeDeadFrameIndices(MachineFrameInfo &MFI);
bool hasCalculatedTID() const { return TIDReg != 0; };
- unsigned getTIDReg() const { return TIDReg; };
- void setTIDReg(unsigned Reg) { TIDReg = Reg; }
+ Register getTIDReg() const { return TIDReg; };
+ void setTIDReg(Register Reg) { TIDReg = Reg; }
unsigned getBytesInStackArgArea() const {
return BytesInStackArgArea;
@@ -531,34 +550,34 @@ public:
}
// Add user SGPRs.
- unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
- unsigned addDispatchPtr(const SIRegisterInfo &TRI);
- unsigned addQueuePtr(const SIRegisterInfo &TRI);
- unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
- unsigned addDispatchID(const SIRegisterInfo &TRI);
- unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
- unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
+ Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
+ Register addDispatchPtr(const SIRegisterInfo &TRI);
+ Register addQueuePtr(const SIRegisterInfo &TRI);
+ Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
+ Register addDispatchID(const SIRegisterInfo &TRI);
+ Register addFlatScratchInit(const SIRegisterInfo &TRI);
+ Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
// Add system SGPRs.
- unsigned addWorkGroupIDX() {
+ Register addWorkGroupIDX() {
ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDX.getRegister();
}
- unsigned addWorkGroupIDY() {
+ Register addWorkGroupIDY() {
ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDY.getRegister();
}
- unsigned addWorkGroupIDZ() {
+ Register addWorkGroupIDZ() {
ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDZ.getRegister();
}
- unsigned addWorkGroupInfo() {
+ Register addWorkGroupInfo() {
ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupInfo.getRegister();
@@ -577,14 +596,14 @@ public:
ArgInfo.WorkItemIDZ = Arg;
}
- unsigned addPrivateSegmentWaveByteOffset() {
+ Register addPrivateSegmentWaveByteOffset() {
ArgInfo.PrivateSegmentWaveByteOffset
= ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
}
- void setPrivateSegmentWaveByteOffset(unsigned Reg) {
+ void setPrivateSegmentWaveByteOffset(Register Reg) {
ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
}
@@ -660,13 +679,13 @@ public:
return ArgInfo;
}
- std::pair<const ArgDescriptor *, const TargetRegisterClass *>
+ std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
return ArgInfo.getPreloadedValue(Value);
}
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
- auto Arg = ArgInfo.getPreloadedValue(Value).first;
+ auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
return Arg ? Arg->getRegister() : Register();
}
@@ -674,6 +693,8 @@ public:
return GITPtrHigh;
}
+ Register getGITPtrLoReg(const MachineFunction &MF) const;
+
uint32_t get32BitAddressHighBits() const {
return HighBitsOf32BitAddress;
}
@@ -690,35 +711,31 @@ public:
return NumUserSGPRs + NumSystemSGPRs;
}
- unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
+ Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
}
/// Returns the physical register reserved for use as the resource
/// descriptor for scratch accesses.
- unsigned getScratchRSrcReg() const {
+ Register getScratchRSrcReg() const {
return ScratchRSrcReg;
}
- void setScratchRSrcReg(unsigned Reg) {
+ void setScratchRSrcReg(Register Reg) {
assert(Reg != 0 && "Should never be unset");
ScratchRSrcReg = Reg;
}
- unsigned getScratchWaveOffsetReg() const {
- return ScratchWaveOffsetReg;
- }
-
- unsigned getFrameOffsetReg() const {
+ Register getFrameOffsetReg() const {
return FrameOffsetReg;
}
- void setFrameOffsetReg(unsigned Reg) {
+ void setFrameOffsetReg(Register Reg) {
assert(Reg != 0 && "Should never be unset");
FrameOffsetReg = Reg;
}
- void setStackPtrOffsetReg(unsigned Reg) {
+ void setStackPtrOffsetReg(Register Reg) {
assert(Reg != 0 && "Should never be unset");
StackPtrOffsetReg = Reg;
}
@@ -727,20 +744,15 @@ public:
// NoRegister. This is mostly a workaround for MIR tests where state that
// can't be directly computed from the function is not preserved in serialized
// MIR.
- unsigned getStackPtrOffsetReg() const {
+ Register getStackPtrOffsetReg() const {
return StackPtrOffsetReg;
}
- void setScratchWaveOffsetReg(unsigned Reg) {
- assert(Reg != 0 && "Should never be unset");
- ScratchWaveOffsetReg = Reg;
- }
-
- unsigned getQueuePtrUserSGPR() const {
+ Register getQueuePtrUserSGPR() const {
return ArgInfo.QueuePtr.getRegister();
}
- unsigned getImplicitBufferPtrUserSGPR() const {
+ Register getImplicitBufferPtrUserSGPR() const {
return ArgInfo.ImplicitBufferPtr.getRegister();
}
@@ -853,7 +865,7 @@ public:
}
/// \returns SGPR used for \p Dim's work group ID.
- unsigned getWorkGroupIDSGPR(unsigned Dim) const {
+ Register getWorkGroupIDSGPR(unsigned Dim) const {
switch (Dim) {
case 0:
assert(hasWorkGroupIDX());
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 004a3cb185d6..3ba05aadbbbe 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -269,8 +269,8 @@ SUnit* SIScheduleBlock::pickNode() {
// Predict register usage after this instruction.
TryCand.SU = SU;
TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
- TryCand.SGPRUsage = pressure[DAG->getSGPRSetID()];
- TryCand.VGPRUsage = pressure[DAG->getVGPRSetID()];
+ TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
+ TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
TryCand.HasLowLatencyNonWaitedParent =
@@ -595,10 +595,12 @@ void SIScheduleBlock::printDebug(bool full) {
}
if (Scheduled) {
- dbgs() << "LiveInPressure " << LiveInPressure[DAG->getSGPRSetID()] << ' '
- << LiveInPressure[DAG->getVGPRSetID()] << '\n';
- dbgs() << "LiveOutPressure " << LiveOutPressure[DAG->getSGPRSetID()] << ' '
- << LiveOutPressure[DAG->getVGPRSetID()] << "\n\n";
+ dbgs() << "LiveInPressure "
+ << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
+ << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';
+ dbgs() << "LiveOutPressure "
+ << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
+ << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";
dbgs() << "LiveIns:\n";
for (unsigned Reg : LiveInRegs)
dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
@@ -1637,7 +1639,7 @@ SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
TryCand.VGPRUsageDiff =
checkRegUsageImpact(TryCand.Block->getInRegs(),
- TryCand.Block->getOutRegs())[DAG->getVGPRSetID()];
+ TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
TryCand.NumHighLatencySuccessors =
TryCand.Block->getNumHighLatencySuccessors();
@@ -1796,9 +1798,6 @@ SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)) {
SITII = static_cast<const SIInstrInfo*>(TII);
SITRI = static_cast<const SIRegisterInfo*>(TRI);
-
- VGPRSetID = SITRI->getVGPRPressureSet();
- SGPRSetID = SITRI->getSGPRPressureSet();
}
SIScheduleDAGMI::~SIScheduleDAGMI() = default;
@@ -1909,9 +1908,9 @@ SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
continue;
PSetIterator PSetI = MRI.getPressureSets(Reg);
for (; PSetI.isValid(); ++PSetI) {
- if (*PSetI == VGPRSetID)
+ if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
VgprUsage += PSetI.getWeight();
- else if (*PSetI == SGPRSetID)
+ else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
SgprUsage += PSetI.getWeight();
}
}
@@ -1952,10 +1951,11 @@ void SIScheduleDAGMI::schedule()
int64_t OffLatReg;
if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
IsLowLatencySU[i] = 1;
+ bool OffsetIsScalable;
if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
- TRI))
+ OffsetIsScalable, TRI))
LowLatencyOffset[i] = OffLatReg;
- } else if (SITII->isHighLatencyInstruction(*SU->getInstr()))
+ } else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode()))
IsHighLatencySU[i] = 1;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
index ec450a316467..02e0a3fe1b61 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -435,9 +435,6 @@ class SIScheduleDAGMI final : public ScheduleDAGMILive {
std::vector<unsigned> ScheduledSUnits;
std::vector<unsigned> ScheduledSUnitsInv;
- unsigned VGPRSetID;
- unsigned SGPRSetID;
-
public:
SIScheduleDAGMI(MachineSchedContext *C);
@@ -484,9 +481,6 @@ public:
return OutRegs;
};
- unsigned getVGPRSetID() const { return VGPRSetID; }
- unsigned getSGPRSetID() const { return SGPRSetID; }
-
private:
void topologicalSort();
// After scheduling is done, improve low latency placements.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index e914573306ae..4e6c72ca20e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -254,6 +254,9 @@ protected:
IsaVersion IV;
+ /// Whether to insert cache invalidation instructions.
+ bool InsertCacheInv;
+
SICacheControl(const GCNSubtarget &ST);
public:
@@ -650,6 +653,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
TII = ST.getInstrInfo();
IV = getIsaVersion(ST.getCPU());
+ InsertCacheInv = !ST.isAmdPalOS();
}
/* static */
@@ -714,6 +718,9 @@ bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
+ if (!InsertCacheInv)
+ return false;
+
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
@@ -852,6 +859,9 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
+ if (!InsertCacheInv)
+ return false;
+
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
@@ -954,6 +964,9 @@ bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
+ if (!InsertCacheInv)
+ return false;
+
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
@@ -1289,6 +1302,21 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
for (auto &MBB : MF) {
for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
+
+ if (MI->getOpcode() == TargetOpcode::BUNDLE && MI->mayLoadOrStore()) {
+ MachineBasicBlock::instr_iterator II(MI->getIterator());
+ for (MachineBasicBlock::instr_iterator I = ++II, E = MBB.instr_end();
+ I != E && I->isBundledWithPred(); ++I) {
+ I->unbundleFromPred();
+ for (MachineOperand &MO : I->operands())
+ if (MO.isReg())
+ MO.setIsInternalRead(false);
+ }
+
+ MI->eraseFromParent();
+ MI = II->getIterator();
+ }
+
if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 52989a280e80..0e162ac42c11 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -83,9 +83,7 @@ struct Status {
return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
}
- bool isCombinable(Status &S) {
- return !(Mask & S.Mask) || isCompatible(S);
- }
+ bool isCombinable(Status &S) { return !(Mask & S.Mask) || isCompatible(S); }
};
class BlockData {
@@ -110,7 +108,11 @@ public:
// which is used in Phase 3 if we need to insert a mode change.
MachineInstr *FirstInsertionPoint;
- BlockData() : FirstInsertionPoint(nullptr) {};
+ // A flag to indicate whether an Exit value has been set (we can't tell by
+ // examining the Exit value itself as all values may be valid results).
+ bool ExitSet;
+
+ BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){};
};
namespace {
@@ -131,6 +133,8 @@ public:
Status DefaultStatus =
Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
+ bool Changed = false;
+
public:
SIModeRegister() : MachineFunctionPass(ID) {}
@@ -201,6 +205,7 @@ void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
(Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
(AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
++NumSetregInserted;
+ Changed = true;
InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
}
}
@@ -325,24 +330,53 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
// exit value is propagated.
void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
const SIInstrInfo *TII) {
-// BlockData *BI = BlockInfo[MBB.getNumber()];
+ bool RevisitRequired = false;
+ bool ExitSet = false;
unsigned ThisBlock = MBB.getNumber();
if (MBB.pred_empty()) {
// There are no predecessors, so use the default starting status.
BlockInfo[ThisBlock]->Pred = DefaultStatus;
+ ExitSet = true;
} else {
// Build a status that is common to all the predecessors by intersecting
// all the predecessor exit status values.
+ // Mask bits (which represent the Mode bits with a known value) can only be
+ // added by explicit SETREG instructions or the initial default value -
+ // the intersection process may remove Mask bits.
+ // If we find a predecessor that has not yet had an exit value determined
+ // (this can happen for example if a block is its own predecessor) we defer
+ // use of that value as the Mask will be all zero, and we will revisit this
+ // block again later (unless the only predecessor without an exit value is
+ // this block).
MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
MachineBasicBlock &PB = *(*P);
- BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit;
+ unsigned PredBlock = PB.getNumber();
+ if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
+ BlockInfo[ThisBlock]->Pred = DefaultStatus;
+ ExitSet = true;
+ } else if (BlockInfo[PredBlock]->ExitSet) {
+ BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
+ ExitSet = true;
+ } else if (PredBlock != ThisBlock)
+ RevisitRequired = true;
for (P = std::next(P); P != E; P = std::next(P)) {
MachineBasicBlock *Pred = *P;
- BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->getNumber()]->Exit);
+ unsigned PredBlock = Pred->getNumber();
+ if (BlockInfo[PredBlock]->ExitSet) {
+ if (BlockInfo[ThisBlock]->ExitSet) {
+ BlockInfo[ThisBlock]->Pred =
+ BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
+ } else {
+ BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
+ }
+ ExitSet = true;
+ } else if (PredBlock != ThisBlock)
+ RevisitRequired = true;
}
}
- Status TmpStatus = BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
+ Status TmpStatus =
+ BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
BlockInfo[ThisBlock]->Exit = TmpStatus;
// Add the successors to the work list so we can propagate the changed exit
@@ -354,6 +388,9 @@ void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
Phase2List.push(&B);
}
}
+ BlockInfo[ThisBlock]->ExitSet = ExitSet;
+ if (RevisitRequired)
+ Phase2List.push(&MBB);
}
// In Phase 3 we revisit each block and if it has an insertion point defined we
@@ -361,10 +398,10 @@ void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
// not we insert an appropriate setreg instruction to modify the Mode register.
void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
const SIInstrInfo *TII) {
-// BlockData *BI = BlockInfo[MBB.getNumber()];
unsigned ThisBlock = MBB.getNumber();
if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
- Status Delta = BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
+ Status Delta =
+ BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
if (BlockInfo[ThisBlock]->FirstInsertionPoint)
insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
else
@@ -401,5 +438,5 @@ bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
BlockInfo.clear();
- return NumSetregInserted > 0;
+ return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 34199d3e425c..8af00fcf62a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -7,15 +7,8 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// This pass removes redundant S_OR_B64 instructions enabling lanes in
-/// the exec. If two SI_END_CF (lowered as S_OR_B64) come together without any
-/// vector instructions between them we can only keep outer SI_END_CF, given
-/// that CFG is structured and exec bits of the outer end statement are always
-/// not less than exec bit of the inner one.
-///
-/// This needs to be done before the RA to eliminate saved exec bits registers
-/// but after register coalescer to have no vector registers copies in between
-/// of different end cf statements.
+/// This pass performs exec mask handling peephole optimizations which needs
+/// to be done before register allocation to reduce register pressure.
///
//===----------------------------------------------------------------------===//
@@ -40,14 +33,6 @@ private:
MachineRegisterInfo *MRI;
public:
- MachineBasicBlock::iterator skipIgnoreExecInsts(
- MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const;
-
- MachineBasicBlock::iterator skipIgnoreExecInstsTrivialSucc(
- MachineBasicBlock *&MBB,
- MachineBasicBlock::iterator It) const;
-
-public:
static char ID;
SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
@@ -83,93 +68,15 @@ FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
return new SIOptimizeExecMaskingPreRA();
}
-static bool isEndCF(const MachineInstr &MI, const SIRegisterInfo *TRI,
- const GCNSubtarget &ST) {
- if (ST.isWave32()) {
- return MI.getOpcode() == AMDGPU::S_OR_B32 &&
- MI.modifiesRegister(AMDGPU::EXEC_LO, TRI);
- }
-
- return MI.getOpcode() == AMDGPU::S_OR_B64 &&
- MI.modifiesRegister(AMDGPU::EXEC, TRI);
-}
-
static bool isFullExecCopy(const MachineInstr& MI, const GCNSubtarget& ST) {
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- if (MI.isCopy() && MI.getOperand(1).getReg() == Exec) {
- assert(MI.isFullCopy());
+ if (MI.isFullCopy() && MI.getOperand(1).getReg() == Exec)
return true;
- }
return false;
}
-static unsigned getOrNonExecReg(const MachineInstr &MI,
- const SIInstrInfo &TII,
- const GCNSubtarget& ST) {
- unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- auto Op = TII.getNamedOperand(MI, AMDGPU::OpName::src1);
- if (Op->isReg() && Op->getReg() != Exec)
- return Op->getReg();
- Op = TII.getNamedOperand(MI, AMDGPU::OpName::src0);
- if (Op->isReg() && Op->getReg() != Exec)
- return Op->getReg();
- return AMDGPU::NoRegister;
-}
-
-static MachineInstr* getOrExecSource(const MachineInstr &MI,
- const SIInstrInfo &TII,
- const MachineRegisterInfo &MRI,
- const GCNSubtarget& ST) {
- auto SavedExec = getOrNonExecReg(MI, TII, ST);
- if (SavedExec == AMDGPU::NoRegister)
- return nullptr;
- auto SaveExecInst = MRI.getUniqueVRegDef(SavedExec);
- if (!SaveExecInst || !isFullExecCopy(*SaveExecInst, ST))
- return nullptr;
- return SaveExecInst;
-}
-
-/// Skip over instructions that don't care about the exec mask.
-MachineBasicBlock::iterator SIOptimizeExecMaskingPreRA::skipIgnoreExecInsts(
- MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const {
- for ( ; I != E; ++I) {
- if (TII->mayReadEXEC(*MRI, *I))
- break;
- }
-
- return I;
-}
-
-// Skip to the next instruction, ignoring debug instructions, and trivial block
-// boundaries (blocks that have one (typically fallthrough) successor, and the
-// successor has one predecessor.
-MachineBasicBlock::iterator
-SIOptimizeExecMaskingPreRA::skipIgnoreExecInstsTrivialSucc(
- MachineBasicBlock *&MBB,
- MachineBasicBlock::iterator It) const {
-
- do {
- It = skipIgnoreExecInsts(It, MBB->end());
- if (It != MBB->end() || MBB->succ_size() != 1)
- break;
-
- // If there is one trivial successor, advance to the next block.
- MachineBasicBlock *Succ = *MBB->succ_begin();
-
- // TODO: Is this really necessary?
- if (!MBB->isLayoutSuccessor(Succ))
- break;
-
- It = Succ->begin();
- MBB = Succ;
- } while (true);
-
- return It;
-}
-
-
// Optimize sequence
// %sel = V_CNDMASK_B32_e64 0, 1, %cc
// %cmp = V_CMP_NE_U32 1, %1
@@ -261,6 +168,11 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
And->getOperand(0).getReg())
.addReg(ExecReg)
.addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg());
+ MachineOperand &AndSCC = And->getOperand(3);
+ assert(AndSCC.getReg() == AMDGPU::SCC);
+ MachineOperand &Andn2SCC = Andn2->getOperand(3);
+ assert(Andn2SCC.getReg() == AMDGPU::SCC);
+ Andn2SCC.setIsDead(AndSCC.isDead());
And->eraseFromParent();
LIS->InsertMachineInstrInMaps(*Andn2);
@@ -379,57 +291,30 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- // Try to collapse adjacent endifs.
- auto E = MBB.end();
- auto Lead = skipDebugInstructionsForward(MBB.begin(), E);
- if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI, ST))
- continue;
-
- MachineBasicBlock *TmpMBB = &MBB;
- auto NextLead = skipIgnoreExecInstsTrivialSucc(TmpMBB, std::next(Lead));
- if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, TRI, ST) ||
- !getOrExecSource(*NextLead, *TII, MRI, ST))
- continue;
-
- LLVM_DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n');
-
- auto SaveExec = getOrExecSource(*Lead, *TII, MRI, ST);
- unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII, ST);
- for (auto &Op : Lead->operands()) {
- if (Op.isReg())
- RecalcRegs.insert(Op.getReg());
- }
-
- LIS->RemoveMachineInstrFromMaps(*Lead);
- Lead->eraseFromParent();
- if (SaveExecReg) {
- LIS->removeInterval(SaveExecReg);
- LIS->createAndComputeVirtRegInterval(SaveExecReg);
- }
-
- Changed = true;
-
- // If the only use of saved exec in the removed instruction is S_AND_B64
- // fold the copy now.
- if (!SaveExec || !SaveExec->isFullCopy())
- continue;
+ // If the only user of a logical operation is move to exec, fold it now
+ // to prevent forming of saveexec. I.e:
+ //
+ // %0:sreg_64 = COPY $exec
+ // %1:sreg_64 = S_AND_B64 %0:sreg_64, %2:sreg_64
+ // =>
+ // %1 = S_AND_B64 $exec, %2:sreg_64
+ unsigned ScanThreshold = 10;
+ for (auto I = MBB.rbegin(), E = MBB.rend(); I != E
+ && ScanThreshold--; ++I) {
+ if (!isFullExecCopy(*I, ST))
+ continue;
- Register SavedExec = SaveExec->getOperand(0).getReg();
- bool SafeToReplace = true;
- for (auto& U : MRI.use_nodbg_instructions(SavedExec)) {
- if (U.getParent() != SaveExec->getParent()) {
- SafeToReplace = false;
- break;
+ Register SavedExec = I->getOperand(0).getReg();
+ if (SavedExec.isVirtual() && MRI.hasOneNonDBGUse(SavedExec) &&
+ MRI.use_instr_nodbg_begin(SavedExec)->getParent() == I->getParent()) {
+ LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n');
+ LIS->RemoveMachineInstrFromMaps(*I);
+ I->eraseFromParent();
+ MRI.replaceRegWith(SavedExec, Exec);
+ LIS->removeInterval(SavedExec);
+ Changed = true;
}
-
- LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n');
- }
-
- if (SafeToReplace) {
- LIS->RemoveMachineInstrFromMaps(*SaveExec);
- SaveExec->eraseFromParent();
- MRI.replaceRegWith(SavedExec, Exec);
- LIS->removeInterval(SavedExec);
+ break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index c4f511abc4ae..9a1855c3458b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -922,18 +922,24 @@ void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
if (I->modifiesRegister(AMDGPU::VCC, TRI))
return;
}
+
// Make the two new e32 instruction variants.
// Replace MI with V_{SUB|ADD}_I32_e32
- auto NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc));
- NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst));
- NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
- NewMI.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1));
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(Opc))
+ .add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))
+ .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))
+ .add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))
+ .setMIFlags(MI.getFlags());
+
MI.eraseFromParent();
+
// Replace MISucc with V_{SUBB|ADDC}_U32_e32
- auto NewInst = BuildMI(MBB, MISucc, MISucc.getDebugLoc(), TII->get(SuccOpc));
- NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::vdst));
- NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src0));
- NewInst.add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src1));
+ BuildMI(MBB, MISucc, MISucc.getDebugLoc(), TII->get(SuccOpc))
+ .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::vdst))
+ .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src0))
+ .add(*TII->getNamedOperand(MISucc, AMDGPU::OpName::src1))
+ .setMIFlags(MISucc.getFlags());
+
MISucc.eraseFromParent();
}
@@ -1010,7 +1016,8 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
// Create SDWA version of instruction MI and initialize its operands
MachineInstrBuilder SDWAInst =
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc)
+ .setMIFlags(MI.getFlags());
// Copy dst, if it is present in original then should also be present in SDWA
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
new file mode 100644
index 000000000000..4c72fa235975
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
@@ -0,0 +1,139 @@
+//===-- SIPostRABundler.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass creates bundles of memory instructions to protect adjacent loads
+/// and stores from beeing rescheduled apart from each other post-RA.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-post-ra-bundler"
+
+namespace {
+
+class SIPostRABundler : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIPostRABundler() : MachineFunctionPass(ID) {
+ initializeSIPostRABundlerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "SI post-RA bundler";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ const SIRegisterInfo *TRI;
+
+ SmallSet<Register, 16> Defs;
+
+ bool isDependentLoad(const MachineInstr &MI) const;
+
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIPostRABundler, DEBUG_TYPE, "SI post-RA bundler", false, false)
+
+char SIPostRABundler::ID = 0;
+
+char &llvm::SIPostRABundlerID = SIPostRABundler::ID;
+
+FunctionPass *llvm::createSIPostRABundlerPass() {
+ return new SIPostRABundler();
+}
+
+bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
+ if (!MI.mayLoad())
+ return false;
+
+ for (const MachineOperand &Op : MI.explicit_operands()) {
+ if (!Op.isReg())
+ continue;
+ Register Reg = Op.getReg();
+ for (Register Def : Defs)
+ if (TRI->regsOverlap(Reg, Def))
+ return true;
+ }
+
+ return false;
+}
+
+bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ bool Changed = false;
+ const uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
+ SIInstrFlags::SMRD | SIInstrFlags::DS |
+ SIInstrFlags::FLAT | SIInstrFlags::MIMG;
+
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::instr_iterator Next;
+ MachineBasicBlock::instr_iterator B = MBB.instr_begin();
+ MachineBasicBlock::instr_iterator E = MBB.instr_end();
+ for (auto I = B; I != E; I = Next) {
+ Next = std::next(I);
+
+ const uint64_t IMemFlags = I->getDesc().TSFlags & MemFlags;
+
+ if (IMemFlags == 0 || I->isBundled() || !I->mayLoadOrStore() ||
+ B->mayLoad() != I->mayLoad() || B->mayStore() != I->mayStore() ||
+ ((B->getDesc().TSFlags & MemFlags) != IMemFlags) ||
+ isDependentLoad(*I)) {
+
+ if (B != I) {
+ if (std::next(B) != I) {
+ finalizeBundle(MBB, B, I);
+ Changed = true;
+ }
+ Next = I;
+ }
+
+ B = Next;
+ Defs.clear();
+ continue;
+ }
+
+ if (I->getNumExplicitDefs() == 0)
+ continue;
+
+ Defs.insert(I->defs().begin()->getReg());
+ }
+
+ if (B != E && std::next(B) != E) {
+ finalizeBundle(MBB, B, E);
+ Changed = true;
+ }
+
+ Defs.clear();
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
new file mode 100644
index 000000000000..f31c722db1b2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -0,0 +1,326 @@
+//===-- SIPreEmitPeephole.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass performs the peephole optimizations before code emission.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-pre-emit-peephole"
+
+namespace {
+
+class SIPreEmitPeephole : public MachineFunctionPass {
+private:
+ const SIInstrInfo *TII = nullptr;
+ const SIRegisterInfo *TRI = nullptr;
+
+ bool optimizeVccBranch(MachineInstr &MI) const;
+ bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const;
+
+public:
+ static char ID;
+
+ SIPreEmitPeephole() : MachineFunctionPass(ID) {
+ initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE,
+ "SI peephole optimizations", false, false)
+
+char SIPreEmitPeephole::ID = 0;
+
+char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID;
+
+bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
+ // Match:
+ // sreg = -1 or 0
+ // vcc = S_AND_B64 exec, sreg or S_ANDN2_B64 exec, sreg
+ // S_CBRANCH_VCC[N]Z
+ // =>
+ // S_CBRANCH_EXEC[N]Z
+ // We end up with this pattern sometimes after basic block placement.
+ // It happens while combining a block which assigns -1 or 0 to a saved mask
+ // and another block which consumes that saved mask and then a branch.
+ bool Changed = false;
+ MachineBasicBlock &MBB = *MI.getParent();
+ const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
+ const bool IsWave32 = ST.isWave32();
+ const unsigned CondReg = TRI->getVCC();
+ const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+ const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
+
+ MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
+ E = MBB.rend();
+ bool ReadsCond = false;
+ unsigned Threshold = 5;
+ for (++A; A != E; ++A) {
+ if (!--Threshold)
+ return false;
+ if (A->modifiesRegister(ExecReg, TRI))
+ return false;
+ if (A->modifiesRegister(CondReg, TRI)) {
+ if (!A->definesRegister(CondReg, TRI) ||
+ (A->getOpcode() != And && A->getOpcode() != AndN2))
+ return false;
+ break;
+ }
+ ReadsCond |= A->readsRegister(CondReg, TRI);
+ }
+ if (A == E)
+ return false;
+
+ MachineOperand &Op1 = A->getOperand(1);
+ MachineOperand &Op2 = A->getOperand(2);
+ if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {
+ TII->commuteInstruction(*A);
+ Changed = true;
+ }
+ if (Op1.getReg() != ExecReg)
+ return Changed;
+ if (Op2.isImm() && !(Op2.getImm() == -1 || Op2.getImm() == 0))
+ return Changed;
+
+ int64_t MaskValue = 0;
+ Register SReg;
+ if (Op2.isReg()) {
+ SReg = Op2.getReg();
+ auto M = std::next(A);
+ bool ReadsSreg = false;
+ for (; M != E; ++M) {
+ if (M->definesRegister(SReg, TRI))
+ break;
+ if (M->modifiesRegister(SReg, TRI))
+ return Changed;
+ ReadsSreg |= M->readsRegister(SReg, TRI);
+ }
+ if (M == E || !M->isMoveImmediate() || !M->getOperand(1).isImm() ||
+ (M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0))
+ return Changed;
+ MaskValue = M->getOperand(1).getImm();
+ // First if sreg is only used in the AND instruction fold the immediate
+ // into into the AND.
+ if (!ReadsSreg && Op2.isKill()) {
+ A->getOperand(2).ChangeToImmediate(MaskValue);
+ M->eraseFromParent();
+ }
+ } else if (Op2.isImm()) {
+ MaskValue = Op2.getImm();
+ } else {
+ llvm_unreachable("Op2 must be register or immediate");
+ }
+
+ // Invert mask for s_andn2
+ assert(MaskValue == 0 || MaskValue == -1);
+ if (A->getOpcode() == AndN2)
+ MaskValue = ~MaskValue;
+
+ if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
+ MI.killsRegister(CondReg, TRI))
+ A->eraseFromParent();
+
+ bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
+ if (SReg == ExecReg) {
+ // EXEC is updated directly
+ if (IsVCCZ) {
+ MI.eraseFromParent();
+ return true;
+ }
+ MI.setDesc(TII->get(AMDGPU::S_BRANCH));
+ } else if (IsVCCZ && MaskValue == 0) {
+ // Will always branch
+ // Remove all succesors shadowed by new unconditional branch
+ MachineBasicBlock *Parent = MI.getParent();
+ SmallVector<MachineInstr *, 4> ToRemove;
+ bool Found = false;
+ for (MachineInstr &Term : Parent->terminators()) {
+ if (Found) {
+ if (Term.isBranch())
+ ToRemove.push_back(&Term);
+ } else {
+ Found = Term.isIdenticalTo(MI);
+ }
+ }
+ assert(Found && "conditional branch is not terminator");
+ for (auto BranchMI : ToRemove) {
+ MachineOperand &Dst = BranchMI->getOperand(0);
+ assert(Dst.isMBB() && "destination is not basic block");
+ Parent->removeSuccessor(Dst.getMBB());
+ BranchMI->eraseFromParent();
+ }
+
+ if (MachineBasicBlock *Succ = Parent->getFallThrough()) {
+ Parent->removeSuccessor(Succ);
+ }
+
+ // Rewrite to unconditional branch
+ MI.setDesc(TII->get(AMDGPU::S_BRANCH));
+ } else if (!IsVCCZ && MaskValue == 0) {
+ // Will never branch
+ MachineOperand &Dst = MI.getOperand(0);
+ assert(Dst.isMBB() && "destination is not basic block");
+ MI.getParent()->removeSuccessor(Dst.getMBB());
+ MI.eraseFromParent();
+ return true;
+ } else if (MaskValue == -1) {
+ // Depends only on EXEC
+ MI.setDesc(
+ TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ));
+ }
+
+ MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));
+ MI.addImplicitDefUseOperands(*MBB.getParent());
+
+ return true;
+}
+
+bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
+ MachineInstr &MI) const {
+ MachineBasicBlock &MBB = *MI.getParent();
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ Register IdxReg = Idx->isReg() ? Idx->getReg() : Register();
+ SmallVector<MachineInstr *, 4> ToRemove;
+ bool IdxOn = true;
+
+ if (!MI.isIdenticalTo(First))
+ return false;
+
+ // Scan back to find an identical S_SET_GPR_IDX_ON
+ for (MachineBasicBlock::iterator I = std::next(First.getIterator()),
+ E = MI.getIterator(); I != E; ++I) {
+ switch (I->getOpcode()) {
+ case AMDGPU::S_SET_GPR_IDX_MODE:
+ return false;
+ case AMDGPU::S_SET_GPR_IDX_OFF:
+ IdxOn = false;
+ ToRemove.push_back(&*I);
+ break;
+ default:
+ if (I->modifiesRegister(AMDGPU::M0, TRI))
+ return false;
+ if (IdxReg && I->modifiesRegister(IdxReg, TRI))
+ return false;
+ if (llvm::any_of(I->operands(),
+ [&MRI, this](const MachineOperand &MO) {
+ return MO.isReg() &&
+ TRI->isVectorRegister(MRI, MO.getReg());
+ })) {
+ // The only exception allowed here is another indirect vector move
+ // with the same mode.
+ if (!IdxOn ||
+ !((I->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
+ I->hasRegisterImplicitUseOperand(AMDGPU::M0)) ||
+ I->getOpcode() == AMDGPU::V_MOV_B32_indirect))
+ return false;
+ }
+ }
+ }
+
+ MI.eraseFromParent();
+ for (MachineInstr *RI : ToRemove)
+ RI->eraseFromParent();
+ return true;
+}
+
+bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+ MachineBasicBlock *EmptyMBBAtEnd = nullptr;
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator MBBE = MBB.getFirstTerminator();
+ if (MBBE != MBB.end()) {
+ MachineInstr &MI = *MBBE;
+ switch (MI.getOpcode()) {
+ case AMDGPU::S_CBRANCH_VCCZ:
+ case AMDGPU::S_CBRANCH_VCCNZ:
+ Changed |= optimizeVccBranch(MI);
+ continue;
+ case AMDGPU::SI_RETURN_TO_EPILOG:
+ // FIXME: This is not an optimization and should be
+ // moved somewhere else.
+ assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
+
+ // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
+ // because external bytecode will be appended at the end.
+ if (&MBB != &MF.back() || &MI != &MBB.back()) {
+ // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block
+ // at the end and jump there.
+ if (!EmptyMBBAtEnd) {
+ EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
+ MF.insert(MF.end(), EmptyMBBAtEnd);
+ }
+
+ MBB.addSuccessor(EmptyMBBAtEnd);
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
+ .addMBB(EmptyMBBAtEnd);
+ MI.eraseFromParent();
+ MBBE = MBB.getFirstTerminator();
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!ST.hasVGPRIndexMode())
+ continue;
+
+ MachineInstr *SetGPRMI = nullptr;
+ const unsigned Threshold = 20;
+ unsigned Count = 0;
+ // Scan the block for two S_SET_GPR_IDX_ON instructions to see if a
+ // second is not needed. Do expensive checks in the optimizeSetGPR()
+ // and limit the distance to 20 instructions for compile time purposes.
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBBE; ) {
+ MachineInstr &MI = *MBBI;
+ ++MBBI;
+
+ if (Count == Threshold)
+ SetGPRMI = nullptr;
+ else
+ ++Count;
+
+ if (MI.getOpcode() != AMDGPU::S_SET_GPR_IDX_ON)
+ continue;
+
+ Count = 0;
+ if (!SetGPRMI) {
+ SetGPRMI = &MI;
+ continue;
+ }
+
+ if (optimizeSetGPR(*SetGPRMI, MI))
+ Changed = true;
+ else
+ SetGPRMI = &MI;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index fbadad3c84ad..5d6009ebf384 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -26,27 +26,12 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
+#include <vector>
using namespace llvm;
-static bool hasPressureSet(const int *PSets, unsigned PSetID) {
- for (unsigned i = 0; PSets[i] != -1; ++i) {
- if (PSets[i] == (int)PSetID)
- return true;
- }
- return false;
-}
-
-void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
- BitVector &PressureSets) const {
- for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
- const int *PSets = getRegUnitPressureSets(*U);
- if (hasPressureSet(PSets, PSetID)) {
- PressureSets.set(PSetID);
- break;
- }
- }
-}
+#define GET_REGINFO_TARGET_DESC
+#include "AMDGPUGenRegisterInfo.inc"
static cl::opt<bool> EnableSpillSGPRToVGPR(
"amdgpu-spill-sgpr-to-vgpr",
@@ -54,90 +39,200 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
cl::ReallyHidden,
cl::init(true));
-SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
- AMDGPURegisterInfo(),
- ST(ST),
- SGPRPressureSets(getNumRegPressureSets()),
- VGPRPressureSets(getNumRegPressureSets()),
- AGPRPressureSets(getNumRegPressureSets()),
- SpillSGPRToVGPR(EnableSpillSGPRToVGPR),
- isWave32(ST.isWave32()) {
- unsigned NumRegPressureSets = getNumRegPressureSets();
-
- SGPRSetID = NumRegPressureSets;
- VGPRSetID = NumRegPressureSets;
- AGPRSetID = NumRegPressureSets;
-
- for (unsigned i = 0; i < NumRegPressureSets; ++i) {
- classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
- classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
- classifyPressureSet(i, AMDGPU::AGPR0, AGPRPressureSets);
- }
-
- // Determine the number of reg units for each pressure set.
- std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
- for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
- const int *PSets = getRegUnitPressureSets(i);
- for (unsigned j = 0; PSets[j] != -1; ++j) {
- ++PressureSetRegUnits[PSets[j]];
+std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
+
+SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
+ : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
+ SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
+
+ assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
+ getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
+ (getSubRegIndexLaneMask(AMDGPU::lo16) |
+ getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
+ getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
+ "getNumCoveredRegs() will not work with generated subreg masks!");
+
+ RegPressureIgnoredUnits.resize(getNumRegUnits());
+ RegPressureIgnoredUnits.set(*MCRegUnitIterator(AMDGPU::M0, this));
+ for (auto Reg : AMDGPU::VGPR_HI16RegClass)
+ RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this));
+
+ // HACK: Until this is fully tablegen'd.
+ static llvm::once_flag InitializeRegSplitPartsFlag;
+
+ static auto InitializeRegSplitPartsOnce = [this]() {
+ for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
+ unsigned Size = getSubRegIdxSize(Idx);
+ if (Size & 31)
+ continue;
+ std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
+ unsigned Pos = getSubRegIdxOffset(Idx);
+ if (Pos % Size)
+ continue;
+ Pos /= Size;
+ if (Vec.empty()) {
+ unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
+ Vec.resize(MaxNumParts);
+ }
+ Vec[Pos] = Idx;
}
+ };
+
+
+ llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
+}
+
+void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
+ MCRegister Reg) const {
+ MCRegAliasIterator R(Reg, this, true);
+
+ for (; R.isValid(); ++R)
+ Reserved.set(*R);
+}
+
+// Forced to be here by one .inc
+const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
+ const MachineFunction *MF) const {
+ CallingConv::ID CC = MF->getFunction().getCallingConv();
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::Fast:
+ case CallingConv::Cold:
+ return CSR_AMDGPU_HighRegs_SaveList;
+ default: {
+ // Dummy to not crash RegisterClassInfo.
+ static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
+ return &NoCalleeSavedReg;
}
+ }
+}
- unsigned VGPRMax = 0, SGPRMax = 0, AGPRMax = 0;
- for (unsigned i = 0; i < NumRegPressureSets; ++i) {
- if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
- VGPRSetID = i;
- VGPRMax = PressureSetRegUnits[i];
- continue;
- }
- if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
- SGPRSetID = i;
- SGPRMax = PressureSetRegUnits[i];
- }
- if (isAGPRPressureSet(i) && PressureSetRegUnits[i] > AGPRMax) {
- AGPRSetID = i;
- AGPRMax = PressureSetRegUnits[i];
- continue;
- }
+const MCPhysReg *
+SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
+ return nullptr;
+}
+
+const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::Fast:
+ case CallingConv::Cold:
+ return CSR_AMDGPU_HighRegs_RegMask;
+ default:
+ return nullptr;
}
+}
- assert(SGPRSetID < NumRegPressureSets &&
- VGPRSetID < NumRegPressureSets &&
- AGPRSetID < NumRegPressureSets);
+Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const SIFrameLowering *TFI =
+ MF.getSubtarget<GCNSubtarget>().getFrameLowering();
+ const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ // During ISel lowering we always reserve the stack pointer in entry
+ // functions, but never actually want to reference it when accessing our own
+ // frame. If we need a frame pointer we use it, but otherwise we can just use
+ // an immediate "0" which we represent by returning NoRegister.
+ if (FuncInfo->isEntryFunction()) {
+ return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
+ }
+ return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
+ : FuncInfo->getStackPtrOffsetReg();
}
-unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
- const MachineFunction &MF) const {
- unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
- unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
- return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
+bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ // When we need stack realignment, we can't reference off of the
+ // stack pointer, so we reserve a base pointer.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ return MFI.getNumFixedObjects() && needsStackRealignment(MF);
}
-static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
- unsigned Reg;
+Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
- // Try to place it in a hole after PrivateSegmentBufferReg.
- if (RegCount & 3) {
- // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
- // alignment constraints, so we have a hole where can put the wave offset.
- Reg = RegCount - 1;
- } else {
- // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
- // wave offset before it.
- Reg = RegCount - 5;
- }
+const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const {
+ return CSR_AMDGPU_AllVGPRs_RegMask;
+}
- return Reg;
+const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
+ return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
}
-unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
+// FIXME: TableGen should generate something to make this manageable for all
+// register classes. At a minimum we could use the opposite of
+// composeSubRegIndices and go up from the base 32-bit subreg.
+unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
+ unsigned NumRegs) {
+ // Table of NumRegs sized pieces at every 32-bit offset.
+ static const uint16_t SubRegFromChannelTable[][32] = {
+ {AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+ AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
+ AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
+ AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
+ AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31},
+ {AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3,
+ AMDGPU::sub3_sub4, AMDGPU::sub4_sub5, AMDGPU::sub5_sub6,
+ AMDGPU::sub6_sub7, AMDGPU::sub7_sub8, AMDGPU::sub8_sub9,
+ AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12,
+ AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15,
+ AMDGPU::sub15_sub16, AMDGPU::sub16_sub17, AMDGPU::sub17_sub18,
+ AMDGPU::sub18_sub19, AMDGPU::sub19_sub20, AMDGPU::sub20_sub21,
+ AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24,
+ AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27,
+ AMDGPU::sub27_sub28, AMDGPU::sub28_sub29, AMDGPU::sub29_sub30,
+ AMDGPU::sub30_sub31, AMDGPU::NoSubRegister},
+ {AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3,
+ AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5,
+ AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7,
+ AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9,
+ AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11,
+ AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13,
+ AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15,
+ AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17,
+ AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19,
+ AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21,
+ AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23,
+ AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25,
+ AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27,
+ AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29,
+ AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31,
+ AMDGPU::NoSubRegister, AMDGPU::NoSubRegister},
+ {AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4,
+ AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6,
+ AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8,
+ AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10,
+ AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12,
+ AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14,
+ AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16,
+ AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18,
+ AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20,
+ AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22,
+ AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24,
+ AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26,
+ AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28,
+ AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30,
+ AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister,
+ AMDGPU::NoSubRegister, AMDGPU::NoSubRegister}};
+
+ const unsigned NumRegIndex = NumRegs - 1;
+
+ assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) &&
+ "Not implemented");
+ assert(Channel < array_lengthof(SubRegFromChannelTable[0]));
+ return SubRegFromChannelTable[NumRegIndex][Channel];
+}
+
+MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
const MachineFunction &MF) const {
- unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
- return AMDGPU::SGPR_32RegClass.getRegister(Reg);
+ unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
+ MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
+ return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
}
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ Reserved.set(AMDGPU::MODE);
// EXEC_LO and EXEC_HI could be allocated and used as regular register, but
// this seems likely to result in bugs, so I'm marking them as reserved.
@@ -205,6 +300,18 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, Reg);
}
+ for (auto Reg : AMDGPU::SReg_32RegClass) {
+ Reserved.set(getSubReg(Reg, AMDGPU::hi16));
+ Register Low = getSubReg(Reg, AMDGPU::lo16);
+ // This is to prevent BB vcc liveness errors.
+ if (!AMDGPU::SGPR_LO16RegClass.contains(Low))
+ Reserved.set(Low);
+ }
+
+ for (auto Reg : AMDGPU::AGPR_32RegClass) {
+ Reserved.set(getSubReg(Reg, AMDGPU::hi16));
+ }
+
// Reserve all the rest AGPRs if there are no instructions to use it.
if (!ST.hasMAIInsts()) {
for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
@@ -215,38 +322,37 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
- if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
- // Reserve 1 SGPR for scratch wave offset in case we need to spill.
- reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
- }
-
unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
if (ScratchRSrcReg != AMDGPU::NoRegister) {
// Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
// to spill.
// TODO: May need to reserve a VGPR if doing LDS spilling.
reserveRegisterTuples(Reserved, ScratchRSrcReg);
- assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
}
// We have to assume the SP is needed in case there are calls in the function,
// which is detected after the function is lowered. If we aren't really going
// to need SP, don't bother reserving it.
- unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
+ MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
- if (StackPtrReg != AMDGPU::NoRegister) {
+ if (StackPtrReg) {
reserveRegisterTuples(Reserved, StackPtrReg);
assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
}
- unsigned FrameReg = MFI->getFrameOffsetReg();
- if (FrameReg != AMDGPU::NoRegister) {
+ MCRegister FrameReg = MFI->getFrameOffsetReg();
+ if (FrameReg) {
reserveRegisterTuples(Reserved, FrameReg);
assert(!isSubRegister(ScratchRSrcReg, FrameReg));
}
- for (unsigned Reg : MFI->WWMReservedRegs) {
+ if (hasBasePointer(MF)) {
+ MCRegister BasePtrReg = getBaseRegister();
+ reserveRegisterTuples(Reserved, BasePtrReg);
+ assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
+ }
+
+ for (MCRegister Reg : MFI->WWMReservedRegs) {
reserveRegisterTuples(Reserved, Reg);
}
@@ -257,6 +363,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
reserveRegisterTuples(Reserved, Reg);
+ if (MFI->VGPRReservedForSGPRSpill)
+ for (auto SSpill : MFI->getSGPRSpillVGPRs())
+ reserveRegisterTuples(Reserved, SSpill.VGPR);
+
return Reserved;
}
@@ -305,11 +415,6 @@ bool SIRegisterInfo::requiresVirtualBaseRegisters(
return true;
}
-bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- // This helps catch bugs as verifier errors.
- return true;
-}
-
int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
assert(SIInstrInfo::isMUBUF(*MI));
@@ -340,7 +445,7 @@ bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
}
void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
- unsigned BaseReg,
+ Register BaseReg,
int FrameIdx,
int64_t Offset) const {
MachineBasicBlock::iterator Ins = MBB->begin();
@@ -374,7 +479,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addImm(0); // clamp bit
}
-void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const {
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -411,7 +516,7 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
}
bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
- unsigned BaseReg,
+ Register BaseReg,
int64_t Offset) const {
if (!SIInstrInfo::isMUBUF(*MI))
return false;
@@ -451,6 +556,11 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_V256_SAVE:
case AMDGPU::SI_SPILL_V256_RESTORE:
return 8;
+ case AMDGPU::SI_SPILL_S192_SAVE:
+ case AMDGPU::SI_SPILL_S192_RESTORE:
+ case AMDGPU::SI_SPILL_V192_SAVE:
+ case AMDGPU::SI_SPILL_V192_RESTORE:
+ return 6;
case AMDGPU::SI_SPILL_S160_SAVE:
case AMDGPU::SI_SPILL_S160_RESTORE:
case AMDGPU::SI_SPILL_V160_SAVE:
@@ -614,10 +724,10 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
int Index,
- unsigned ValueReg,
+ Register ValueReg,
bool IsKill,
- unsigned ScratchRsrcReg,
- unsigned ScratchOffsetReg,
+ MCRegister ScratchRsrcReg,
+ MCRegister ScratchOffsetReg,
int64_t InstOffset,
MachineMemOperand *MMO,
RegScavenger *RS) const {
@@ -625,13 +735,14 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
MachineFunction *MF = MI->getParent()->getParent();
const SIInstrInfo *TII = ST.getInstrInfo();
const MachineFrameInfo &MFI = MF->getFrameInfo();
+ const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
const MCInstrDesc &Desc = TII->get(LoadStoreOp);
const DebugLoc &DL = MI->getDebugLoc();
bool IsStore = Desc.mayStore();
bool Scavenged = false;
- unsigned SOffset = ScratchOffsetReg;
+ MCRegister SOffset = ScratchOffsetReg;
const unsigned EltSize = 4;
const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
@@ -640,7 +751,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
int64_t ScratchOffsetRegDelta = 0;
- unsigned Align = MFI.getObjectAlignment(Index);
+ Align Alignment = MFI.getObjectAlign(Index);
const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
Register TmpReg =
@@ -650,7 +761,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
if (!isUInt<12>(Offset + Size - EltSize)) {
- SOffset = AMDGPU::NoRegister;
+ SOffset = MCRegister();
// We currently only support spilling VGPRs to EltSize boundaries, meaning
// we can simplify the adjustment of Offset here to just scale with
@@ -662,23 +773,33 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
if (RS)
SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
- if (SOffset == AMDGPU::NoRegister) {
+ if (!SOffset) {
// There are no free SGPRs, and since we are in the process of spilling
// VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
// on SI/CI and on VI it is true until we implement spilling using scalar
// stores), we have no way to free up an SGPR. Our solution here is to
- // add the offset directly to the ScratchOffset register, and then
- // subtract the offset after the spill to return ScratchOffset to it's
- // original value.
+ // add the offset directly to the ScratchOffset or StackPtrOffset
+ // register, and then subtract the offset after the spill to return the
+ // register to it's original value.
+ if (!ScratchOffsetReg)
+ ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
SOffset = ScratchOffsetReg;
ScratchOffsetRegDelta = Offset;
} else {
Scavenged = true;
}
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
- .addReg(ScratchOffsetReg)
- .addImm(Offset);
+ if (!SOffset)
+ report_fatal_error("could not scavenge SGPR to spill in entry function");
+
+ if (ScratchOffsetReg == AMDGPU::NoRegister) {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset)
+ .addImm(Offset);
+ } else {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
+ .addReg(ScratchOffsetReg)
+ .addImm(Offset);
+ }
Offset = 0;
}
@@ -708,21 +829,26 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
}
MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
- MachineMemOperand *NewMMO
- = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
- EltSize, MinAlign(Align, EltSize * i));
+ MachineMemOperand *NewMMO =
+ MF->getMachineMemOperand(PInfo, MMO->getFlags(), EltSize,
+ commonAlignment(Alignment, EltSize * i));
MIB = BuildMI(*MBB, MI, DL, Desc)
- .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
- .addReg(ScratchRsrcReg)
- .addReg(SOffset, SOffsetRegState)
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .addImm(0) // dlc
- .addImm(0) // swz
- .addMemOperand(NewMMO);
+ .addReg(SubReg,
+ getDefRegState(!IsStore) | getKillRegState(IsKill))
+ .addReg(ScratchRsrcReg);
+ if (SOffset == AMDGPU::NoRegister) {
+ MIB.addImm(0);
+ } else {
+ MIB.addReg(SOffset, SOffsetRegState);
+ }
+ MIB.addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addImm(0) // dlc
+ .addImm(0) // swz
+ .addMemOperand(NewMMO);
if (!IsStore && TmpReg != AMDGPU::NoRegister)
MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
@@ -736,12 +862,124 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
if (ScratchOffsetRegDelta != 0) {
// Subtract the offset we added to the ScratchOffset register.
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
- .addReg(ScratchOffsetReg)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset)
+ .addReg(SOffset)
.addImm(ScratchOffsetRegDelta);
}
}
+// Generate a VMEM access which loads or stores the VGPR containing an SGPR
+// spill such that all the lanes set in VGPRLanes are loaded or stored.
+// This generates exec mask manipulation and will use SGPRs available in MI
+// or VGPR lanes in the VGPR to save and restore the exec mask.
+void SIRegisterInfo::buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI,
+ int Index, int Offset,
+ unsigned EltSize, Register VGPR,
+ int64_t VGPRLanes,
+ RegScavenger *RS,
+ bool IsLoad) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction *MF = MBB->getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ Register SuperReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
+ ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
+ unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
+ unsigned FirstPart = Offset * 32;
+ unsigned ExecLane = 0;
+
+ bool IsKill = MI->getOperand(0).isKill();
+ const DebugLoc &DL = MI->getDebugLoc();
+
+ // Cannot handle load/store to EXEC
+ assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
+ SuperReg != AMDGPU::EXEC && "exec should never spill");
+
+ // On Wave32 only handle EXEC_LO.
+ // On Wave64 only update EXEC_HI if there is sufficent space for a copy.
+ bool OnlyExecLo = isWave32 || NumSubRegs == 1 || SuperReg == AMDGPU::EXEC_HI;
+
+ unsigned ExecMovOpc = OnlyExecLo ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ Register ExecReg = OnlyExecLo ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ Register SavedExecReg;
+
+ // Backup EXEC
+ if (OnlyExecLo) {
+ SavedExecReg = NumSubRegs == 1
+ ? SuperReg
+ : getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]);
+ } else {
+ // If src/dst is an odd size it is possible subreg0 is not aligned.
+ for (; ExecLane < (NumSubRegs - 1); ++ExecLane) {
+ SavedExecReg = getMatchingSuperReg(
+ getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]), AMDGPU::sub0,
+ &AMDGPU::SReg_64_XEXECRegClass);
+ if (SavedExecReg)
+ break;
+ }
+ }
+ assert(SavedExecReg);
+ BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), SavedExecReg).addReg(ExecReg);
+
+ // Setup EXEC
+ BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg).addImm(VGPRLanes);
+
+ // Load/store VGPR
+ MachineFrameInfo &FrameInfo = MF->getFrameInfo();
+ assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill);
+
+ Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
+ ? getBaseRegister()
+ : getFrameRegister(*MF);
+
+ Align Alignment = FrameInfo.getObjectAlign(Index);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(*MF, Index);
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore,
+ EltSize, Alignment);
+
+ if (IsLoad) {
+ buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
+ Index,
+ VGPR, false,
+ MFI->getScratchRSrcReg(), FrameReg,
+ Offset * EltSize, MMO,
+ RS);
+ } else {
+ buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, Index, VGPR,
+ IsKill, MFI->getScratchRSrcReg(), FrameReg,
+ Offset * EltSize, MMO, RS);
+ // This only ever adds one VGPR spill
+ MFI->addToSpilledVGPRs(1);
+ }
+
+ // Restore EXEC
+ BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg)
+ .addReg(SavedExecReg, getKillRegState(IsLoad || IsKill));
+
+ // Restore clobbered SGPRs
+ if (IsLoad) {
+ // Nothing to do; register will be overwritten
+ } else if (!IsKill) {
+ // Restore SGPRs from appropriate VGPR lanes
+ if (!OnlyExecLo) {
+ BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+ getSubReg(SuperReg, SplitParts[FirstPart + ExecLane + 1]))
+ .addReg(VGPR)
+ .addImm(ExecLane + 1);
+ }
+ BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+ NumSubRegs == 1
+ ? SavedExecReg
+ : getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]))
+ .addReg(VGPR, RegState::Kill)
+ .addImm(ExecLane);
+ }
+}
+
bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
int Index,
RegScavenger *RS,
@@ -749,7 +987,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MBB->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
+ DenseSet<Register> SGPRSpillVGPRDefinedSet;
ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
= MFI->getSGPRToVGPRSpills(Index);
@@ -763,13 +1001,12 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
bool IsKill = MI->getOperand(0).isKill();
const DebugLoc &DL = MI->getDebugLoc();
- MachineFrameInfo &FrameInfo = MF->getFrameInfo();
-
assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
- SuperReg != MFI->getFrameOffsetReg() &&
- SuperReg != MFI->getScratchWaveOffsetReg()));
+ SuperReg != MFI->getFrameOffsetReg()));
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
+ assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
+ SuperReg != AMDGPU::EXEC && "exec should never spill");
unsigned EltSize = 4;
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
@@ -777,17 +1014,10 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
- // Scavenged temporary VGPR to use. It must be scavenged once for any number
- // of spilled subregs.
- Register TmpVGPR;
-
- // SubReg carries the "Kill" flag when SubReg == SuperReg.
- unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
- for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- Register SubReg =
- NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
-
- if (SpillToVGPR) {
+ if (SpillToVGPR) {
+ for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+ Register SubReg =
+ NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
// During SGPR spilling to VGPR, determine if the VGPR is defined. The
@@ -809,42 +1039,53 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
// FIXME: Since this spills to another register instead of an actual
// frame index, we should delete the frame index when all references to
// it are fixed.
- } else {
- // XXX - Can to VGPR spill fail for some subregisters but not others?
- if (OnlyToVGPR)
- return false;
-
- // Spill SGPR to a frame index.
- if (!TmpVGPR.isValid())
- TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
-
- MachineInstrBuilder Mov
- = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
- .addReg(SubReg, SubKillState);
-
- // There could be undef components of a spilled super register.
- // TODO: Can we detect this and skip the spill?
- if (NumSubRegs > 1) {
- // The last implicit use of the SuperReg carries the "Kill" flag.
- unsigned SuperKillState = 0;
- if (i + 1 == e)
- SuperKillState |= getKillRegState(IsKill);
- Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
+ }
+ } else {
+ // Scavenged temporary VGPR to use. It must be scavenged once for any number
+ // of spilled subregs.
+ Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+ RS->setRegUsed(TmpVGPR);
+
+ // SubReg carries the "Kill" flag when SubReg == SuperReg.
+ unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
+
+ unsigned PerVGPR = 32;
+ unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR;
+ int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL;
+
+ for (unsigned Offset = 0; Offset < NumVGPRs; ++Offset) {
+ unsigned TmpVGPRFlags = RegState::Undef;
+
+ // Write sub registers into the VGPR
+ for (unsigned i = Offset * PerVGPR,
+ e = std::min((Offset + 1) * PerVGPR, NumSubRegs);
+ i < e; ++i) {
+ Register SubReg =
+ NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
+
+ MachineInstrBuilder WriteLane =
+ BuildMI(*MBB, MI, DL,
+ TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
+ TmpVGPR)
+ .addReg(SubReg, SubKillState)
+ .addImm(i % PerVGPR)
+ .addReg(TmpVGPR, TmpVGPRFlags);
+ TmpVGPRFlags = 0;
+
+ // There could be undef components of a spilled super register.
+ // TODO: Can we detect this and skip the spill?
+ if (NumSubRegs > 1) {
+ // The last implicit use of the SuperReg carries the "Kill" flag.
+ unsigned SuperKillState = 0;
+ if (i + 1 == NumSubRegs)
+ SuperKillState |= getKillRegState(IsKill);
+ WriteLane.addReg(SuperReg, RegState::Implicit | SuperKillState);
+ }
}
- unsigned Align = FrameInfo.getObjectAlignment(Index);
- MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
- MachineMemOperand *MMO
- = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- EltSize, MinAlign(Align, EltSize * i));
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
- .addReg(TmpVGPR, RegState::Kill) // src
- .addFrameIndex(Index) // vaddr
- .addReg(MFI->getScratchRSrcReg()) // srrsrc
- .addReg(MFI->getStackPtrOffsetReg()) // soffset
- .addImm(i * 4) // offset
- .addMemOperand(MMO);
+ // Write out VGPR
+ buildSGPRSpillLoadStore(MI, Index, Offset, EltSize, TmpVGPR, VGPRLanes,
+ RS, false);
}
}
@@ -867,13 +1108,14 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
if (OnlyToVGPR && !SpillToVGPR)
return false;
- MachineFrameInfo &FrameInfo = MF->getFrameInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
const DebugLoc &DL = MI->getDebugLoc();
Register SuperReg = MI->getOperand(0).getReg();
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
+ assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
+ SuperReg != AMDGPU::EXEC && "exec should never spill");
unsigned EltSize = 4;
@@ -882,52 +1124,49 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
- Register TmpVGPR;
-
- for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- Register SubReg =
- NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
+ if (SpillToVGPR) {
+ for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+ Register SubReg =
+ NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
- if (SpillToVGPR) {
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
auto MIB =
BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
SubReg)
.addReg(Spill.VGPR)
.addImm(Spill.Lane);
-
if (NumSubRegs > 1 && i == 0)
MIB.addReg(SuperReg, RegState::ImplicitDefine);
- } else {
- if (OnlyToVGPR)
- return false;
-
- // Restore SGPR from a stack slot.
- // FIXME: We should use S_LOAD_DWORD here for VI.
- if (!TmpVGPR.isValid())
- TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
- unsigned Align = FrameInfo.getObjectAlignment(Index);
-
- MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
-
- MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
- MachineMemOperand::MOLoad, EltSize,
- MinAlign(Align, EltSize * i));
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpVGPR)
- .addFrameIndex(Index) // vaddr
- .addReg(MFI->getScratchRSrcReg()) // srsrc
- .addReg(MFI->getStackPtrOffsetReg()) // soffset
- .addImm(i * 4) // offset
- .addMemOperand(MMO);
-
- auto MIB =
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
- .addReg(TmpVGPR, RegState::Kill);
-
- if (NumSubRegs > 1)
- MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
+ }
+ } else {
+ Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+ RS->setRegUsed(TmpVGPR);
+
+ unsigned PerVGPR = 32;
+ unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR;
+ int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL;
+
+ for (unsigned Offset = 0; Offset < NumVGPRs; ++Offset) {
+ // Load in VGPR data
+ buildSGPRSpillLoadStore(MI, Index, Offset, EltSize, TmpVGPR, VGPRLanes,
+ RS, true);
+
+ // Unpack lanes
+ for (unsigned i = Offset * PerVGPR,
+ e = std::min((Offset + 1) * PerVGPR, NumSubRegs);
+ i < e; ++i) {
+ Register SubReg =
+ NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
+
+ bool LastSubReg = (i + 1 == e);
+ auto MIB =
+ BuildMI(*MBB, MI, DL,
+ TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg)
+ .addReg(TmpVGPR, getKillRegState(LastSubReg))
+ .addImm(i);
+ if (NumSubRegs > 1 && i == 0)
+ MIB.addReg(SuperReg, RegState::ImplicitDefine);
+ }
}
}
@@ -946,6 +1185,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
case AMDGPU::SI_SPILL_S1024_SAVE:
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S192_SAVE:
case AMDGPU::SI_SPILL_S160_SAVE:
case AMDGPU::SI_SPILL_S128_SAVE:
case AMDGPU::SI_SPILL_S96_SAVE:
@@ -955,6 +1195,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
case AMDGPU::SI_SPILL_S1024_RESTORE:
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S192_RESTORE:
case AMDGPU::SI_SPILL_S160_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE:
case AMDGPU::SI_SPILL_S96_RESTORE:
@@ -981,13 +1222,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineOperand &FIOp = MI->getOperand(FIOperandNum);
int Index = MI->getOperand(FIOperandNum).getIndex();
- Register FrameReg = getFrameRegister(*MF);
+ Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
+ ? getBaseRegister()
+ : getFrameRegister(*MF);
switch (MI->getOpcode()) {
// SGPR register spill
case AMDGPU::SI_SPILL_S1024_SAVE:
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S192_SAVE:
case AMDGPU::SI_SPILL_S160_SAVE:
case AMDGPU::SI_SPILL_S128_SAVE:
case AMDGPU::SI_SPILL_S96_SAVE:
@@ -1001,6 +1245,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_S1024_RESTORE:
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S192_RESTORE:
case AMDGPU::SI_SPILL_S160_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE:
case AMDGPU::SI_SPILL_S96_RESTORE:
@@ -1076,42 +1321,30 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
bool IsMUBUF = TII->isMUBUF(*MI);
if (!IsMUBUF && !MFI->isEntryFunction()) {
- // Convert to an absolute stack address by finding the offset from the
- // scratch wave base and scaling by the wave size.
+ // Convert to a swizzled stack address by scaling by the wave size.
//
- // In an entry function/kernel the offset is already the absolute
- // address relative to the frame register.
-
- Register TmpDiffReg =
- RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
-
- // If there's no free SGPR, in-place modify the FP
- Register DiffReg = TmpDiffReg.isValid() ? TmpDiffReg : FrameReg;
+ // In an entry function/kernel the offset is already swizzled.
bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
- Register ResultReg = IsCopy ?
- MI->getOperand(0).getReg() :
- RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
- .addReg(FrameReg)
- .addReg(MFI->getScratchWaveOffsetReg());
+ Register ResultReg =
+ IsCopy ? MI->getOperand(0).getReg()
+ : RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
int64_t Offset = FrameInfo.getObjectOffset(Index);
if (Offset == 0) {
// XXX - This never happens because of emergency scavenging slot at 0?
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
.addImm(ST.getWavefrontSizeLog2())
- .addReg(DiffReg);
+ .addReg(FrameReg);
} else {
if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) {
- Register ScaledReg =
- RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MIB, 0);
+ // Reuse ResultReg in intermediate step.
+ Register ScaledReg = ResultReg;
BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
ScaledReg)
.addImm(ST.getWavefrontSizeLog2())
- .addReg(DiffReg, RegState::Kill);
+ .addReg(FrameReg);
const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
@@ -1148,10 +1381,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// unavailable. Only one additional mov is needed.
Register TmpScaledReg =
RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
- Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : DiffReg;
+ Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
- .addReg(DiffReg, RegState::Kill)
+ .addReg(FrameReg)
.addImm(ST.getWavefrontSizeLog2());
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg)
.addReg(ScaledReg, RegState::Kill)
@@ -1165,19 +1398,12 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addReg(ScaledReg, RegState::Kill)
.addImm(Offset);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
- .addReg(DiffReg, RegState::Kill)
+ .addReg(FrameReg)
.addImm(ST.getWavefrontSizeLog2());
}
}
}
- if (!TmpDiffReg.isValid()) {
- // Restore the FP.
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), FrameReg)
- .addReg(FrameReg)
- .addReg(MFI->getScratchWaveOffsetReg());
- }
-
// Don't introduce an extra copy if we're just materializing in a mov.
if (IsCopy)
MI->eraseFromParent();
@@ -1192,10 +1418,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::vaddr));
- assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
- MFI->getStackPtrOffsetReg());
-
- TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);
+ auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
+ assert((SOffset.isReg() &&
+ SOffset.getReg() == MFI->getStackPtrOffsetReg()) ||
+ (SOffset.isImm() && SOffset.getImm() == 0));
+ if (SOffset.isReg()) {
+ if (FrameReg == AMDGPU::NoRegister) {
+ SOffset.ChangeToImmediate(0);
+ } else {
+ SOffset.setReg(FrameReg);
+ }
+ }
int64_t Offset = FrameInfo.getObjectOffset(Index);
int64_t OldImm
@@ -1224,16 +1457,99 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
}
-StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
+StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
return AMDGPUInstPrinter::getRegisterName(Reg);
}
+const TargetRegisterClass *
+SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) {
+ if (BitWidth == 1)
+ return &AMDGPU::VReg_1RegClass;
+ if (BitWidth <= 16)
+ return &AMDGPU::VGPR_LO16RegClass;
+ if (BitWidth <= 32)
+ return &AMDGPU::VGPR_32RegClass;
+ if (BitWidth <= 64)
+ return &AMDGPU::VReg_64RegClass;
+ if (BitWidth <= 96)
+ return &AMDGPU::VReg_96RegClass;
+ if (BitWidth <= 128)
+ return &AMDGPU::VReg_128RegClass;
+ if (BitWidth <= 160)
+ return &AMDGPU::VReg_160RegClass;
+ if (BitWidth <= 192)
+ return &AMDGPU::VReg_192RegClass;
+ if (BitWidth <= 256)
+ return &AMDGPU::VReg_256RegClass;
+ if (BitWidth <= 512)
+ return &AMDGPU::VReg_512RegClass;
+ if (BitWidth <= 1024)
+ return &AMDGPU::VReg_1024RegClass;
+
+ return nullptr;
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) {
+ if (BitWidth <= 16)
+ return &AMDGPU::AGPR_LO16RegClass;
+ if (BitWidth <= 32)
+ return &AMDGPU::AGPR_32RegClass;
+ if (BitWidth <= 64)
+ return &AMDGPU::AReg_64RegClass;
+ if (BitWidth <= 96)
+ return &AMDGPU::AReg_96RegClass;
+ if (BitWidth <= 128)
+ return &AMDGPU::AReg_128RegClass;
+ if (BitWidth <= 160)
+ return &AMDGPU::AReg_160RegClass;
+ if (BitWidth <= 192)
+ return &AMDGPU::AReg_192RegClass;
+ if (BitWidth <= 256)
+ return &AMDGPU::AReg_256RegClass;
+ if (BitWidth <= 512)
+ return &AMDGPU::AReg_512RegClass;
+ if (BitWidth <= 1024)
+ return &AMDGPU::AReg_1024RegClass;
+
+ return nullptr;
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
+ if (BitWidth <= 16)
+ return &AMDGPU::SGPR_LO16RegClass;
+ if (BitWidth <= 32)
+ return &AMDGPU::SReg_32RegClass;
+ if (BitWidth <= 64)
+ return &AMDGPU::SReg_64RegClass;
+ if (BitWidth <= 96)
+ return &AMDGPU::SGPR_96RegClass;
+ if (BitWidth <= 128)
+ return &AMDGPU::SGPR_128RegClass;
+ if (BitWidth <= 160)
+ return &AMDGPU::SGPR_160RegClass;
+ if (BitWidth <= 192)
+ return &AMDGPU::SGPR_192RegClass;
+ if (BitWidth <= 256)
+ return &AMDGPU::SGPR_256RegClass;
+ if (BitWidth <= 512)
+ return &AMDGPU::SGPR_512RegClass;
+ if (BitWidth <= 1024)
+ return &AMDGPU::SGPR_1024RegClass;
+
+ return nullptr;
+}
+
// FIXME: This is very slow. It might be worth creating a map from physreg to
// register class.
-const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
- assert(!Register::isVirtualRegister(Reg));
-
+const TargetRegisterClass *
+SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
static const TargetRegisterClass *const BaseClasses[] = {
+ &AMDGPU::VGPR_LO16RegClass,
+ &AMDGPU::VGPR_HI16RegClass,
+ &AMDGPU::SReg_LO16RegClass,
+ &AMDGPU::AGPR_LO16RegClass,
&AMDGPU::VGPR_32RegClass,
&AMDGPU::SReg_32RegClass,
&AMDGPU::AGPR_32RegClass,
@@ -1242,13 +1558,19 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
&AMDGPU::AReg_64RegClass,
&AMDGPU::VReg_96RegClass,
&AMDGPU::SReg_96RegClass,
+ &AMDGPU::AReg_96RegClass,
&AMDGPU::VReg_128RegClass,
&AMDGPU::SReg_128RegClass,
&AMDGPU::AReg_128RegClass,
&AMDGPU::VReg_160RegClass,
&AMDGPU::SReg_160RegClass,
+ &AMDGPU::AReg_160RegClass,
+ &AMDGPU::VReg_192RegClass,
+ &AMDGPU::SReg_192RegClass,
+ &AMDGPU::AReg_192RegClass,
&AMDGPU::VReg_256RegClass,
&AMDGPU::SReg_256RegClass,
+ &AMDGPU::AReg_256RegClass,
&AMDGPU::VReg_512RegClass,
&AMDGPU::SReg_512RegClass,
&AMDGPU::AReg_512RegClass,
@@ -1272,122 +1594,54 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
unsigned Size = getRegSizeInBits(*RC);
- switch (Size) {
- case 32:
- return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
- case 64:
- return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
- case 96:
- return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
- case 128:
- return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
- case 160:
- return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
- case 256:
- return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
- case 512:
- return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
- case 1024:
- return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
- case 1:
- return getCommonSubClass(&AMDGPU::VReg_1RegClass, RC) != nullptr;
- default:
+ if (Size == 16) {
+ return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr ||
+ getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr;
+ }
+ const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
+ if (!VRC) {
assert(Size < 32 && "Invalid register class size");
return false;
}
+ return getCommonSubClass(VRC, RC) != nullptr;
}
bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
unsigned Size = getRegSizeInBits(*RC);
- if (Size < 32)
+ if (Size < 16)
return false;
- switch (Size) {
- case 32:
- return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
- case 64:
- return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
- case 96:
+ const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
+ if (!ARC) {
+ assert(getVGPRClassForBitWidth(Size) && "Invalid register class size");
return false;
- case 128:
- return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
- case 160:
- case 256:
- return false;
- case 512:
- return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
- case 1024:
- return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
- default:
- llvm_unreachable("Invalid register class size");
}
+ return getCommonSubClass(ARC, RC) != nullptr;
}
-const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
- const TargetRegisterClass *SRC) const {
- switch (getRegSizeInBits(*SRC)) {
- case 32:
- return &AMDGPU::VGPR_32RegClass;
- case 64:
- return &AMDGPU::VReg_64RegClass;
- case 96:
- return &AMDGPU::VReg_96RegClass;
- case 128:
- return &AMDGPU::VReg_128RegClass;
- case 160:
- return &AMDGPU::VReg_160RegClass;
- case 256:
- return &AMDGPU::VReg_256RegClass;
- case 512:
- return &AMDGPU::VReg_512RegClass;
- case 1024:
- return &AMDGPU::VReg_1024RegClass;
- case 1:
- return &AMDGPU::VReg_1RegClass;
- default:
- llvm_unreachable("Invalid register class size");
- }
+const TargetRegisterClass *
+SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const {
+ unsigned Size = getRegSizeInBits(*SRC);
+ const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
+ assert(VRC && "Invalid register class size");
+ return VRC;
}
-const TargetRegisterClass *SIRegisterInfo::getEquivalentAGPRClass(
- const TargetRegisterClass *SRC) const {
- switch (getRegSizeInBits(*SRC)) {
- case 32:
- return &AMDGPU::AGPR_32RegClass;
- case 64:
- return &AMDGPU::AReg_64RegClass;
- case 128:
- return &AMDGPU::AReg_128RegClass;
- case 512:
- return &AMDGPU::AReg_512RegClass;
- case 1024:
- return &AMDGPU::AReg_1024RegClass;
- default:
- llvm_unreachable("Invalid register class size");
- }
+const TargetRegisterClass *
+SIRegisterInfo::getEquivalentAGPRClass(const TargetRegisterClass *SRC) const {
+ unsigned Size = getRegSizeInBits(*SRC);
+ const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
+ assert(ARC && "Invalid register class size");
+ return ARC;
}
-const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
- const TargetRegisterClass *VRC) const {
- switch (getRegSizeInBits(*VRC)) {
- case 32:
+const TargetRegisterClass *
+SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const {
+ unsigned Size = getRegSizeInBits(*VRC);
+ if (Size == 32)
return &AMDGPU::SGPR_32RegClass;
- case 64:
- return &AMDGPU::SReg_64RegClass;
- case 96:
- return &AMDGPU::SReg_96RegClass;
- case 128:
- return &AMDGPU::SGPR_128RegClass;
- case 160:
- return &AMDGPU::SReg_160RegClass;
- case 256:
- return &AMDGPU::SReg_256RegClass;
- case 512:
- return &AMDGPU::SReg_512RegClass;
- case 1024:
- return &AMDGPU::SReg_1024RegClass;
- default:
- llvm_unreachable("Invalid register class size");
- }
+ const TargetRegisterClass *SRC = getSGPRClassForBitWidth(Size);
+ assert(SRC && "Invalid register class size");
+ return SRC;
}
const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
@@ -1396,62 +1650,19 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
return RC;
// We can assume that each lane corresponds to one 32-bit register.
- unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
+ unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
if (isSGPRClass(RC)) {
- switch (Count) {
- case 1:
- return &AMDGPU::SGPR_32RegClass;
- case 2:
- return &AMDGPU::SReg_64RegClass;
- case 3:
- return &AMDGPU::SReg_96RegClass;
- case 4:
- return &AMDGPU::SGPR_128RegClass;
- case 5:
- return &AMDGPU::SReg_160RegClass;
- case 8:
- return &AMDGPU::SReg_256RegClass;
- case 16:
- return &AMDGPU::SReg_512RegClass;
- case 32: /* fall-through */
- default:
- llvm_unreachable("Invalid sub-register class size");
- }
+ if (Size == 32)
+ RC = &AMDGPU::SGPR_32RegClass;
+ else
+ RC = getSGPRClassForBitWidth(Size);
} else if (hasAGPRs(RC)) {
- switch (Count) {
- case 1:
- return &AMDGPU::AGPR_32RegClass;
- case 2:
- return &AMDGPU::AReg_64RegClass;
- case 4:
- return &AMDGPU::AReg_128RegClass;
- case 16:
- return &AMDGPU::AReg_512RegClass;
- case 32: /* fall-through */
- default:
- llvm_unreachable("Invalid sub-register class size");
- }
+ RC = getAGPRClassForBitWidth(Size);
} else {
- switch (Count) {
- case 1:
- return &AMDGPU::VGPR_32RegClass;
- case 2:
- return &AMDGPU::VReg_64RegClass;
- case 3:
- return &AMDGPU::VReg_96RegClass;
- case 4:
- return &AMDGPU::VReg_128RegClass;
- case 5:
- return &AMDGPU::VReg_160RegClass;
- case 8:
- return &AMDGPU::VReg_256RegClass;
- case 16:
- return &AMDGPU::VReg_512RegClass;
- case 32: /* fall-through */
- default:
- llvm_unreachable("Invalid sub-register class size");
- }
+ RC = getVGPRClassForBitWidth(Size);
}
+ assert(RC && "Invalid sub-register class size");
+ return RC;
}
bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
@@ -1487,215 +1698,60 @@ bool SIRegisterInfo::shouldRewriteCopySrc(
return getCommonSubClass(DefRC, SrcRC) != nullptr;
}
-/// Returns a register that is not used at any point in the function.
+/// Returns a lowest register that is not used at any point in the function.
/// If all registers are used, then this function will return
-// AMDGPU::NoRegister.
-unsigned
-SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC,
- const MachineFunction &MF) const {
-
- for (unsigned Reg : *RC)
- if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
- return Reg;
- return AMDGPU::NoRegister;
+/// AMDGPU::NoRegister. If \p ReserveHighestVGPR = true, then return
+/// highest unused register.
+MCRegister SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
+ const TargetRegisterClass *RC,
+ const MachineFunction &MF,
+ bool ReserveHighestVGPR) const {
+ if (ReserveHighestVGPR) {
+ for (MCRegister Reg : reverse(*RC))
+ if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
+ return Reg;
+ } else {
+ for (MCRegister Reg : *RC)
+ if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
+ return Reg;
+ }
+ return MCRegister();
}
ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const {
- if (EltSize == 4) {
- static const int16_t Sub0_31[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
- AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
- AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
- AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
- AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
- AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
- AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31,
- };
-
- static const int16_t Sub0_15[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
- AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
- AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
- };
-
- static const int16_t Sub0_7[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
- };
-
- static const int16_t Sub0_4[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
- };
-
- static const int16_t Sub0_3[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- };
-
- static const int16_t Sub0_2[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
- };
-
- static const int16_t Sub0_1[] = {
- AMDGPU::sub0, AMDGPU::sub1,
- };
-
- switch (AMDGPU::getRegBitWidth(*RC->MC)) {
- case 32:
- return {};
- case 64:
- return makeArrayRef(Sub0_1);
- case 96:
- return makeArrayRef(Sub0_2);
- case 128:
- return makeArrayRef(Sub0_3);
- case 160:
- return makeArrayRef(Sub0_4);
- case 256:
- return makeArrayRef(Sub0_7);
- case 512:
- return makeArrayRef(Sub0_15);
- case 1024:
- return makeArrayRef(Sub0_31);
- default:
- llvm_unreachable("unhandled register size");
- }
- }
-
- if (EltSize == 8) {
- static const int16_t Sub0_31_64[] = {
- AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
- AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
- AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
- AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
- AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
- AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
- AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
- AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
- };
-
- static const int16_t Sub0_15_64[] = {
- AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
- AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
- AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
- AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
- };
-
- static const int16_t Sub0_7_64[] = {
- AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
- AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
- };
-
-
- static const int16_t Sub0_3_64[] = {
- AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
- };
-
- switch (AMDGPU::getRegBitWidth(*RC->MC)) {
- case 64:
- return {};
- case 128:
- return makeArrayRef(Sub0_3_64);
- case 256:
- return makeArrayRef(Sub0_7_64);
- case 512:
- return makeArrayRef(Sub0_15_64);
- case 1024:
- return makeArrayRef(Sub0_31_64);
- default:
- llvm_unreachable("unhandled register size");
- }
- }
+ const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC);
+ assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
- if (EltSize == 16) {
-
- static const int16_t Sub0_31_128[] = {
- AMDGPU::sub0_sub1_sub2_sub3,
- AMDGPU::sub4_sub5_sub6_sub7,
- AMDGPU::sub8_sub9_sub10_sub11,
- AMDGPU::sub12_sub13_sub14_sub15,
- AMDGPU::sub16_sub17_sub18_sub19,
- AMDGPU::sub20_sub21_sub22_sub23,
- AMDGPU::sub24_sub25_sub26_sub27,
- AMDGPU::sub28_sub29_sub30_sub31
- };
-
- static const int16_t Sub0_15_128[] = {
- AMDGPU::sub0_sub1_sub2_sub3,
- AMDGPU::sub4_sub5_sub6_sub7,
- AMDGPU::sub8_sub9_sub10_sub11,
- AMDGPU::sub12_sub13_sub14_sub15
- };
-
- static const int16_t Sub0_7_128[] = {
- AMDGPU::sub0_sub1_sub2_sub3,
- AMDGPU::sub4_sub5_sub6_sub7
- };
-
- switch (AMDGPU::getRegBitWidth(*RC->MC)) {
- case 128:
- return {};
- case 256:
- return makeArrayRef(Sub0_7_128);
- case 512:
- return makeArrayRef(Sub0_15_128);
- case 1024:
- return makeArrayRef(Sub0_31_128);
- default:
- llvm_unreachable("unhandled register size");
- }
- }
-
- assert(EltSize == 32 && "unhandled elt size");
-
- static const int16_t Sub0_31_256[] = {
- AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
- AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
- AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
- AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
- };
+ const unsigned RegDWORDs = RegBitWidth / 32;
+ const unsigned EltDWORDs = EltSize / 4;
+ assert(RegSplitParts.size() + 1 >= EltDWORDs);
- static const int16_t Sub0_15_256[] = {
- AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
- AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
- };
+ const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
+ const unsigned NumParts = RegDWORDs / EltDWORDs;
- switch (AMDGPU::getRegBitWidth(*RC->MC)) {
- case 256:
- return {};
- case 512:
- return makeArrayRef(Sub0_15_256);
- case 1024:
- return makeArrayRef(Sub0_31_256);
- default:
- llvm_unreachable("unhandled register size");
- }
+ return makeArrayRef(Parts.data(), NumParts);
}
const TargetRegisterClass*
SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
- unsigned Reg) const {
- if (Register::isVirtualRegister(Reg))
- return MRI.getRegClass(Reg);
-
- return getPhysRegClass(Reg);
+ Register Reg) const {
+ return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegClass(Reg);
}
bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
- unsigned Reg) const {
- const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
- assert(RC && "Register class for the reg not found");
- return hasVGPRs(RC);
+ Register Reg) const {
+ const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
+ // Registers without classes are unaddressable, SGPR-like registers.
+ return RC && hasVGPRs(RC);
}
bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
- unsigned Reg) const {
- const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
- assert(RC && "Register class for the reg not found");
- return hasAGPRs(RC);
+ Register Reg) const {
+ const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
+
+ // Registers without classes are unaddressable, SGPR-like registers.
+ return RC && hasAGPRs(RC);
}
bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
@@ -1727,36 +1783,41 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MF.getFunction());
switch (RC->getID()) {
default:
- return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
+ return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
case AMDGPU::VGPR_32RegClassID:
+ case AMDGPU::VGPR_LO16RegClassID:
+ case AMDGPU::VGPR_HI16RegClassID:
return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
case AMDGPU::SGPR_32RegClassID:
+ case AMDGPU::SGPR_LO16RegClassID:
return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
}
}
unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
unsigned Idx) const {
- if (Idx == getVGPRPressureSet() || Idx == getAGPRPressureSet())
+ if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
+ Idx == AMDGPU::RegisterPressureSets::AGPR_32)
return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
const_cast<MachineFunction &>(MF));
- if (Idx == getSGPRPressureSet())
+ if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
const_cast<MachineFunction &>(MF));
- return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
+ llvm_unreachable("Unexpected register pressure set!");
}
const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
static const int Empty[] = { -1 };
- if (hasRegUnit(AMDGPU::M0, RegUnit))
+ if (RegPressureIgnoredUnits[RegUnit])
return Empty;
- return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
+
+ return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
}
-unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
+MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
// Not a callee saved register.
return AMDGPU::SGPR30_SGPR31;
}
@@ -1765,49 +1826,19 @@ const TargetRegisterClass *
SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
const RegisterBank &RB,
const MachineRegisterInfo &MRI) const {
- switch (Size) {
- case 1: {
- switch (RB.getID()) {
- case AMDGPU::VGPRRegBankID:
- return &AMDGPU::VGPR_32RegClass;
- case AMDGPU::VCCRegBankID:
- return isWave32 ?
- &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
- case AMDGPU::SGPRRegBankID:
- return &AMDGPU::SReg_32RegClass;
- default:
- llvm_unreachable("unknown register bank");
- }
- }
- case 32:
- return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
- &AMDGPU::SReg_32RegClass;
- case 64:
- return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
- &AMDGPU::SReg_64RegClass;
- case 96:
- return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
- &AMDGPU::SReg_96RegClass;
- case 128:
- return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
- &AMDGPU::SGPR_128RegClass;
- case 160:
- return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
- &AMDGPU::SReg_160RegClass;
- case 256:
- return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
- &AMDGPU::SReg_256RegClass;
- case 512:
- return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
- &AMDGPU::SReg_512RegClass;
- case 1024:
- return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_1024RegClass :
- &AMDGPU::SReg_1024RegClass;
+ switch (RB.getID()) {
+ case AMDGPU::VGPRRegBankID:
+ return getVGPRClassForBitWidth(std::max(32u, Size));
+ case AMDGPU::VCCRegBankID:
+ assert(Size == 1);
+ return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
+ : &AMDGPU::SReg_64_XEXECRegClass;
+ case AMDGPU::SGPRRegBankID:
+ return getSGPRClassForBitWidth(std::max(32u, Size));
+ case AMDGPU::AGPRRegBankID:
+ return getAGPRClassForBitWidth(std::max(32u, Size));
default:
- if (Size < 32)
- return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
- &AMDGPU::SReg_32RegClass;
- return nullptr;
+ llvm_unreachable("unknown register bank");
}
}
@@ -1822,7 +1853,7 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
return getAllocatableClass(RC);
}
-unsigned SIRegisterInfo::getVCC() const {
+MCRegister SIRegisterInfo::getVCC() const {
return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
}
@@ -1837,12 +1868,12 @@ SIRegisterInfo::getRegClass(unsigned RCID) const {
case -1:
return nullptr;
default:
- return AMDGPURegisterInfo::getRegClass(RCID);
+ return AMDGPUGenRegisterInfo::getRegClass(RCID);
}
}
// Find reaching register definition
-MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg,
+MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
MachineInstr &Use,
MachineRegisterInfo &MRI,
LiveIntervals *LIS) const {
@@ -1850,7 +1881,7 @@ MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg,
SlotIndex UseIdx = LIS->getInstructionIndex(Use);
SlotIndex DefIdx;
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
if (!LIS->hasInterval(Reg))
return nullptr;
LiveInterval &LI = LIS->getInterval(Reg);
@@ -1894,3 +1925,49 @@ MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg,
return Def;
}
+
+MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const {
+ assert(getRegSizeInBits(*getPhysRegClass(Reg)) <= 32);
+
+ for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
+ AMDGPU::SReg_32RegClass,
+ AMDGPU::AGPR_32RegClass } ) {
+ if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
+ return Super;
+ }
+ if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
+ &AMDGPU::VGPR_32RegClass)) {
+ return Super;
+ }
+
+ return AMDGPU::NoRegister;
+}
+
+bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
+ switch (PhysReg) {
+ case AMDGPU::SGPR_NULL:
+ case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_LIMIT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+ArrayRef<MCPhysReg>
+SIRegisterInfo::getAllSGPR128(const MachineFunction &MF) const {
+ return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
+ ST.getMaxNumSGPRs(MF) / 4);
+}
+
+ArrayRef<MCPhysReg>
+SIRegisterInfo::getAllSGPR32(const MachineFunction &MF) const {
+ return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
+}
+
+ArrayRef<MCPhysReg>
+SIRegisterInfo::getAllVGPR32(const MachineFunction &MF) const {
+ return makeArrayRef(AMDGPU::VGPR_32RegClass.begin(), ST.getMaxNumVGPRs(MF));
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index ac8c56fa3a03..62d9f1174337 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -14,7 +14,9 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
#define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
-#include "AMDGPURegisterInfo.h"
+#define GET_REGINFO_HEADER
+#include "AMDGPUGenRegisterInfo.inc"
+
#include "SIDefines.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -22,38 +24,38 @@ namespace llvm {
class GCNSubtarget;
class LiveIntervals;
-class MachineRegisterInfo;
class SIMachineFunctionInfo;
-class SIRegisterInfo final : public AMDGPURegisterInfo {
+class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
private:
const GCNSubtarget &ST;
- unsigned SGPRSetID;
- unsigned VGPRSetID;
- unsigned AGPRSetID;
- BitVector SGPRPressureSets;
- BitVector VGPRPressureSets;
- BitVector AGPRPressureSets;
bool SpillSGPRToVGPR;
bool isWave32;
+ BitVector RegPressureIgnoredUnits;
+
+ /// Sub reg indexes for getRegSplitParts.
+ /// First index represents subreg size from 1 to 16 DWORDs.
+ /// The inner vector is sorted by bit offset.
+ /// Provided a register can be fully split with given subregs,
+ /// all elements of the inner vector combined give a full lane mask.
+ static std::array<std::vector<int16_t>, 16> RegSplitParts;
+
+ void reserveRegisterTuples(BitVector &, MCRegister Reg) const;
- void classifyPressureSet(unsigned PSetID, unsigned Reg,
- BitVector &PressureSets) const;
public:
SIRegisterInfo(const GCNSubtarget &ST);
+ /// \returns the sub reg enum value for the given \p Channel
+ /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
+ static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
+
bool spillSGPRToVGPR() const {
return SpillSGPRToVGPR;
}
/// Return the end register initially reserved for the scratch buffer in case
/// spilling is needed.
- unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
-
- /// Return the end register initially reserved for the scratch wave offset in
- /// case spilling is needed.
- unsigned reservedPrivateSegmentWaveByteOffsetReg(
- const MachineFunction &MF) const;
+ MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
@@ -70,6 +72,9 @@ public:
Register getFrameRegister(const MachineFunction &MF) const override;
+ bool hasBasePointer(const MachineFunction &MF) const;
+ Register getBaseRegister() const;
+
bool canRealignStack(const MachineFunction &MF) const override;
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
@@ -77,7 +82,6 @@ public:
bool requiresFrameIndexReplacementScavenging(
const MachineFunction &MF) const override;
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const;
@@ -86,19 +90,24 @@ public:
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- void materializeFrameBaseRegister(MachineBasicBlock *MBB,
- unsigned BaseReg, int FrameIdx,
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB, Register BaseReg,
+ int FrameIdx,
int64_t Offset) const override;
- void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
+ bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
int64_t Offset) const override;
const TargetRegisterClass *getPointerRegClass(
const MachineFunction &MF, unsigned Kind = 0) const override;
+ void buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI, int Index,
+ int Offset, unsigned EltSize, Register VGPR,
+ int64_t VGPRLanes, RegScavenger *RS,
+ bool IsLoad) const;
+
/// If \p OnlyToVGPR is true, this will only succeed if this
bool spillSGPR(MachineBasicBlock::iterator MI,
int FI, RegScavenger *RS,
@@ -115,15 +124,19 @@ public:
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,
int FI, RegScavenger *RS) const;
- StringRef getRegAsmName(unsigned Reg) const override;
+ StringRef getRegAsmName(MCRegister Reg) const override;
- unsigned getHWRegIndex(unsigned Reg) const {
+ unsigned getHWRegIndex(MCRegister Reg) const {
return getEncodingValue(Reg) & 0xff;
}
+ static const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth);
+ static const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth);
+ static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
+
/// Return the 'base' register class for this register.
/// e.g. SGPR0 => SReg_32, VGPR => VGPR_32 SGPR0_SGPR1 -> SReg_32, etc.
- const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
+ const TargetRegisterClass *getPhysRegClass(MCRegister Reg) const;
/// \returns true if this class contains only SGPR registers
bool isSGPRClass(const TargetRegisterClass *RC) const {
@@ -135,9 +148,9 @@ public:
return isSGPRClass(getRegClass(RCID));
}
- bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const {
+ bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const {
const TargetRegisterClass *RC;
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
RC = MRI.getRegClass(Reg);
else
RC = getPhysRegClass(Reg);
@@ -161,16 +174,16 @@ public:
}
/// \returns A VGPR reg class with the same width as \p SRC
- const TargetRegisterClass *getEquivalentVGPRClass(
- const TargetRegisterClass *SRC) const;
+ const TargetRegisterClass *
+ getEquivalentVGPRClass(const TargetRegisterClass *SRC) const;
/// \returns An AGPR reg class with the same width as \p SRC
- const TargetRegisterClass *getEquivalentAGPRClass(
- const TargetRegisterClass *SRC) const;
+ const TargetRegisterClass *
+ getEquivalentAGPRClass(const TargetRegisterClass *SRC) const;
/// \returns A SGPR reg class with the same width as \p SRC
- const TargetRegisterClass *getEquivalentSGPRClass(
- const TargetRegisterClass *VRC) const;
+ const TargetRegisterClass *
+ getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
/// \returns The register class that is used for a sub-register of \p RC for
/// the given \p SubIdx. If \p SubIdx equals NoSubRegister, \p RC will
@@ -196,38 +209,23 @@ public:
/// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
bool opCanUseInlineConstant(unsigned OpType) const;
- unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC,
- const MachineFunction &MF) const;
-
- unsigned getSGPRPressureSet() const { return SGPRSetID; };
- unsigned getVGPRPressureSet() const { return VGPRSetID; };
- unsigned getAGPRPressureSet() const { return AGPRSetID; };
+ MCRegister findUnusedRegister(const MachineRegisterInfo &MRI,
+ const TargetRegisterClass *RC,
+ const MachineFunction &MF,
+ bool ReserveHighestVGPR = false) const;
const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
- unsigned Reg) const;
- bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
- bool isAGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
- bool isVectorRegister(const MachineRegisterInfo &MRI, unsigned Reg) const {
+ Register Reg) const;
+ bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
+ bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
+ bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
}
- virtual bool
- isDivergentRegClass(const TargetRegisterClass *RC) const override {
- return !isSGPRClass(RC);
- }
+ bool isConstantPhysReg(MCRegister PhysReg) const override;
- bool isSGPRPressureSet(unsigned SetID) const {
- return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID) &&
- !AGPRPressureSets.test(SetID);
- }
- bool isVGPRPressureSet(unsigned SetID) const {
- return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID) &&
- !AGPRPressureSets.test(SetID);
- }
- bool isAGPRPressureSet(unsigned SetID) const {
- return AGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID) &&
- !VGPRPressureSets.test(SetID);
+ bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
+ return !isSGPRClass(RC);
}
ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
@@ -249,7 +247,7 @@ public:
const int *getRegUnitPressureSets(unsigned RegUnit) const override;
- unsigned getReturnAddressReg(const MachineFunction &MF) const;
+ MCRegister getReturnAddressReg(const MachineFunction &MF) const;
const TargetRegisterClass *
getRegClassForSizeOnBank(unsigned Size,
@@ -277,12 +275,12 @@ public:
: &AMDGPU::SReg_64_XEXECRegClass;
}
- unsigned getVCC() const;
+ MCRegister getVCC() const;
const TargetRegisterClass *getRegClass(unsigned RCID) const;
// Find reaching register definition
- MachineInstr *findReachingDef(unsigned Reg, unsigned SubReg,
+ MachineInstr *findReachingDef(Register Reg, unsigned SubReg,
MachineInstr &Use,
MachineRegisterInfo &MRI,
LiveIntervals *LIS) const;
@@ -290,14 +288,51 @@ public:
const uint32_t *getAllVGPRRegMask() const;
const uint32_t *getAllAllocatableSRegMask() const;
+ // \returns number of 32 bit registers covered by a \p LM
+ static unsigned getNumCoveredRegs(LaneBitmask LM) {
+ // The assumption is that every lo16 subreg is an even bit and every hi16
+ // is an adjacent odd bit or vice versa.
+ uint64_t Mask = LM.getAsInteger();
+ uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL;
+ Mask = (Even >> 1) | Mask;
+ uint64_t Odd = Mask & 0x5555555555555555ULL;
+ return countPopulation(Odd);
+ }
+
+ // \returns a DWORD offset of a \p SubReg
+ unsigned getChannelFromSubReg(unsigned SubReg) const {
+ return SubReg ? (getSubRegIdxOffset(SubReg) + 31) / 32 : 0;
+ }
+
+ // \returns a DWORD size of a \p SubReg
+ unsigned getNumChannelsFromSubReg(unsigned SubReg) const {
+ return getNumCoveredRegs(getSubRegIndexLaneMask(SubReg));
+ }
+
+ // For a given 16 bit \p Reg \returns a 32 bit register holding it.
+ // \returns \p Reg otherwise.
+ MCPhysReg get32BitRegister(MCPhysReg Reg) const;
+
+ /// Return all SGPR128 which satisfy the waves per execution unit requirement
+ /// of the subtarget.
+ ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
+
+ /// Return all SGPR32 which satisfy the waves per execution unit requirement
+ /// of the subtarget.
+ ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
+
+ /// Return all VGPR32 which satisfy the waves per execution unit requirement
+ /// of the subtarget.
+ ArrayRef<MCPhysReg> getAllVGPR32(const MachineFunction &MF) const;
+
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
int Index,
- unsigned ValueReg,
+ Register ValueReg,
bool ValueIsKill,
- unsigned ScratchRsrcReg,
- unsigned ScratchOffsetReg,
+ MCRegister ScratchRsrcReg,
+ MCRegister ScratchOffsetReg,
int64_t InstrOffset,
MachineMemOperand *MMO,
RegScavenger *RS) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 6ea6ec00e742..ff1f5c4bc49b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -7,6 +7,50 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// Subregister declarations
+//===----------------------------------------------------------------------===//
+
+class Indexes<int N> {
+ list<int> all = [0, 1, 2, 3, 4, 5, 6 , 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31];
+
+ // Returns list of indexes [0..N)
+ list<int> slice =
+ !foldl([]<int>, all, acc, cur,
+ !listconcat(acc, !if(!lt(cur, N), [cur], [])));
+}
+
+let Namespace = "AMDGPU" in {
+
+def lo16 : SubRegIndex<16, 0>;
+def hi16 : SubRegIndex<16, 16>;
+
+foreach Index = 0-31 in {
+ def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
+}
+
+foreach Index = 1-31 in {
+ def sub#Index#_lo16 : ComposedSubRegIndex<!cast<SubRegIndex>(sub#Index), lo16>;
+ def sub#Index#_hi16 : ComposedSubRegIndex<!cast<SubRegIndex>(sub#Index), hi16>;
+}
+
+foreach Size = {2-6,8,16} in {
+ foreach Index = Indexes<!add(33, !mul(Size, -1))>.slice in {
+ def !foldl("", Indexes<Size>.slice, acc, cur,
+ !strconcat(acc#!if(!eq(acc,""),"","_"), "sub"#!add(cur, Index))) :
+ SubRegIndex<!mul(Size, 32), !shl(Index, 5)> {
+ let CoveringSubRegIndices =
+ !foldl([]<SubRegIndex>, Indexes<Size>.slice, acc, cur,
+ !listconcat(acc, [!cast<SubRegIndex>(sub#!add(cur, Index))]));
+ }
+ }
+}
+
+}
+
+//===----------------------------------------------------------------------===//
// Helpers
//===----------------------------------------------------------------------===//
@@ -15,6 +59,7 @@ class getSubRegs<int size> {
list<SubRegIndex> ret3 = [sub0, sub1, sub2];
list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3];
list<SubRegIndex> ret5 = [sub0, sub1, sub2, sub3, sub4];
+ list<SubRegIndex> ret6 = [sub0, sub1, sub2, sub3, sub4, sub5];
list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,
sub4, sub5, sub6, sub7,
@@ -33,8 +78,10 @@ class getSubRegs<int size> {
!if(!eq(size, 3), ret3,
!if(!eq(size, 4), ret4,
!if(!eq(size, 5), ret5,
- !if(!eq(size, 8), ret8,
- !if(!eq(size, 16), ret16, ret32))))));
+ !if(!eq(size, 6), ret6,
+ !if(!eq(size, 8), ret8,
+ !if(!eq(size, 16), ret16,
+ ret32)))))));
}
// Generates list of sequential register tuple names.
@@ -74,39 +121,69 @@ class SIRegisterTuples<list<SubRegIndex> Indices, RegisterClass RC,
// Declarations that describe the SI registers
//===----------------------------------------------------------------------===//
class SIReg <string n, bits<16> regIdx = 0> :
- Register<n>,
- DwarfRegNum<[!cast<int>(HWEncoding)]> {
+ Register<n> {
let Namespace = "AMDGPU";
-
- // This is the not yet the complete register encoding. An additional
- // bit is set for VGPRs.
let HWEncoding = regIdx;
}
+class SIRegWithSubRegs <string n, list<Register> subregs, bits<16> regIdx> :
+ RegisterWithSubRegs<n, subregs> {
+}
+
+multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
+ bit HWEncodingHigh = 0> {
+ // There is no special encoding for 16 bit subregs, these are not real
+ // registers but rather operands for instructions preserving other 16 bits
+ // of the result or reading just 16 bits of a 32 bit VGPR.
+ // It is encoded as a corresponding 32 bit register.
+ // Non-VGPR register classes use it as we need to have matching subregisters
+ // to move instructions and data between ALUs.
+ def _LO16 : SIReg<n#".l", regIdx> {
+ let HWEncoding{8} = HWEncodingHigh;
+ }
+ def _HI16 : SIReg<!if(ArtificialHigh, "", n#".h"), regIdx> {
+ let isArtificial = ArtificialHigh;
+ let HWEncoding{8} = HWEncodingHigh;
+ }
+ def "" : RegisterWithSubRegs<n, [!cast<Register>(NAME#"_LO16"),
+ !cast<Register>(NAME#"_HI16")]> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [lo16, hi16];
+ let CoveredBySubRegs = !if(ArtificialHigh,0,1);
+ let HWEncoding = regIdx;
+ let HWEncoding{8} = HWEncodingHigh;
+ }
+}
+
// Special Registers
-def VCC_LO : SIReg<"vcc_lo", 106>;
-def VCC_HI : SIReg<"vcc_hi", 107>;
+defm VCC_LO : SIRegLoHi16<"vcc_lo", 106>;
+defm VCC_HI : SIRegLoHi16<"vcc_hi", 107>;
// Pseudo-registers: Used as placeholders during isel and immediately
// replaced, never seeing the verifier.
def PRIVATE_RSRC_REG : SIReg<"private_rsrc", 0>;
def FP_REG : SIReg<"fp", 0>;
def SP_REG : SIReg<"sp", 0>;
-def SCRATCH_WAVE_OFFSET_REG : SIReg<"scratch_wave_offset", 0>;
+
+// Pseudo-register to represent the program-counter DWARF register.
+def PC_REG : SIReg<"pc", 0>, DwarfRegNum<[16, 16]> {
+ // There is no physical register corresponding to a "program counter", but
+ // we need to encode the concept in debug information in order to represent
+ // things like the return value in unwind information.
+ let isArtificial = 1;
+}
// VCC for 64-bit instructions
-def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
- DwarfRegAlias<VCC_LO> {
+def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 106;
}
-def EXEC_LO : SIReg<"exec_lo", 126>;
-def EXEC_HI : SIReg<"exec_hi", 127>;
+defm EXEC_LO : SIRegLoHi16<"exec_lo", 126>, DwarfRegNum<[1, 1]>;
+defm EXEC_HI : SIRegLoHi16<"exec_hi", 127>;
-def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>,
- DwarfRegAlias<EXEC_LO> {
+def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 126;
@@ -114,71 +191,76 @@ def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>,
// 32-bit real registers, for MC only.
// May be used with both 32-bit and 64-bit operands.
-def SRC_VCCZ : SIReg<"src_vccz", 251>;
-def SRC_EXECZ : SIReg<"src_execz", 252>;
-def SRC_SCC : SIReg<"src_scc", 253>;
+defm SRC_VCCZ : SIRegLoHi16<"src_vccz", 251>;
+defm SRC_EXECZ : SIRegLoHi16<"src_execz", 252>;
+defm SRC_SCC : SIRegLoHi16<"src_scc", 253>;
// 1-bit pseudo register, for codegen only.
// Should never be emitted.
def SCC : SIReg<"scc">;
-def M0 : SIReg <"m0", 124>;
-def SGPR_NULL : SIReg<"null", 125>;
+defm M0 : SIRegLoHi16 <"m0", 124>;
+defm SGPR_NULL : SIRegLoHi16 <"null", 125>;
-def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>;
-def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
-def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
-def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
-def SRC_POPS_EXITING_WAVE_ID : SIReg<"src_pops_exiting_wave_id", 239>;
+defm SRC_SHARED_BASE : SIRegLoHi16<"src_shared_base", 235>;
+defm SRC_SHARED_LIMIT : SIRegLoHi16<"src_shared_limit", 236>;
+defm SRC_PRIVATE_BASE : SIRegLoHi16<"src_private_base", 237>;
+defm SRC_PRIVATE_LIMIT : SIRegLoHi16<"src_private_limit", 238>;
+defm SRC_POPS_EXITING_WAVE_ID : SIRegLoHi16<"src_pops_exiting_wave_id", 239>;
-def LDS_DIRECT : SIReg <"src_lds_direct", 254>;
+// Not addressable
+def MODE : SIReg <"mode", 0>;
-def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
-def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
+def LDS_DIRECT : SIReg <"src_lds_direct", 254> {
+ // There is no physical register corresponding to this. This is an
+ // encoding value in a source field, which will ultimately trigger a
+ // read from m0.
+ let isArtificial = 1;
+}
-def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
- DwarfRegAlias<XNACK_MASK_LO> {
+defm XNACK_MASK_LO : SIRegLoHi16<"xnack_mask_lo", 104>;
+defm XNACK_MASK_HI : SIRegLoHi16<"xnack_mask_hi", 105>;
+
+def XNACK_MASK :
+ RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 104;
}
// Trap handler registers
-def TBA_LO : SIReg<"tba_lo", 108>;
-def TBA_HI : SIReg<"tba_hi", 109>;
+defm TBA_LO : SIRegLoHi16<"tba_lo", 108>;
+defm TBA_HI : SIRegLoHi16<"tba_hi", 109>;
-def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
- DwarfRegAlias<TBA_LO> {
+def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 108;
}
-def TMA_LO : SIReg<"tma_lo", 110>;
-def TMA_HI : SIReg<"tma_hi", 111>;
+defm TMA_LO : SIRegLoHi16<"tma_lo", 110>;
+defm TMA_HI : SIRegLoHi16<"tma_hi", 111>;
-def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
- DwarfRegAlias<TMA_LO> {
+def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 110;
}
foreach Index = 0-15 in {
- def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
- def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>;
- def TTMP#Index : SIReg<"ttmp"#Index, 0>;
+ defm TTMP#Index#_vi : SIRegLoHi16<"ttmp"#Index, !add(112, Index)>;
+ defm TTMP#Index#_gfx9_gfx10 : SIRegLoHi16<"ttmp"#Index, !add(108, Index)>;
+ defm TTMP#Index : SIRegLoHi16<"ttmp"#Index, 0>;
}
multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
- def _ci : SIReg<n, ci_e>;
- def _vi : SIReg<n, vi_e>;
- def "" : SIReg<n, 0>;
+ defm _ci : SIRegLoHi16<n, ci_e>;
+ defm _vi : SIRegLoHi16<n, vi_e>;
+ defm "" : SIRegLoHi16<n, 0>;
}
class FlatReg <Register lo, Register hi, bits<16> encoding> :
- RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
- DwarfRegAlias<lo> {
+ RegisterWithSubRegs<"flat_scratch", [lo, hi]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = encoding;
@@ -193,21 +275,24 @@ def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
// SGPR registers
foreach Index = 0-105 in {
- def SGPR#Index : SIReg <"s"#Index, Index>;
+ defm SGPR#Index :
+ SIRegLoHi16 <"s"#Index, Index>,
+ DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
+ !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>;
}
// VGPR registers
foreach Index = 0-255 in {
- def VGPR#Index : SIReg <"v"#Index, Index> {
- let HWEncoding{8} = 1;
- }
+ defm VGPR#Index :
+ SIRegLoHi16 <"v"#Index, Index, 0, 1>,
+ DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]>;
}
// AccVGPR registers
foreach Index = 0-255 in {
- def AGPR#Index : SIReg <"a"#Index, Index> {
- let HWEncoding{8} = 1;
- }
+ defm AGPR#Index :
+ SIRegLoHi16 <"a"#Index, Index, 1, 1>,
+ DwarfRegNum<[!add(Index, 3072), !add(Index, 2048)]>;
}
//===----------------------------------------------------------------------===//
@@ -224,14 +309,35 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
let isAllocatable = 0;
}
+def M0_CLASS_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
+ let CopyCost = 1;
+ let Size = 16;
+ let isAllocatable = 0;
+}
+
// TODO: Do we need to set DwarfRegAlias on register tuples?
+def SGPR_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+ (add (sequence "SGPR%u_LO16", 0, 105))> {
+ let AllocationPriority = 9;
+ let Size = 16;
+ let GeneratePressureSet = 0;
+}
+
+def SGPR_HI16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+ (add (sequence "SGPR%u_HI16", 0, 105))> {
+ let isAllocatable = 0;
+ let Size = 16;
+ let GeneratePressureSet = 0;
+}
+
// SGPR 32-bit registers
def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "SGPR%u", 0, 105))> {
// Give all SGPR classes higher priority than VGPR classes, because
// we want to spill SGPRs to VGPRs.
let AllocationPriority = 9;
+ let GeneratePressureSet = 0;
}
// SGPR 64-bit registers
@@ -246,6 +352,9 @@ def SGPR_128Regs : SIRegisterTuples<getSubRegs<4>.ret, SGPR_32, 105, 4, 4, "s">;
// SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs.
def SGPR_160Regs : SIRegisterTuples<getSubRegs<5>.ret, SGPR_32, 105, 4, 5, "s">;
+// SGPR 192-bit registers
+def SGPR_192Regs : SIRegisterTuples<getSubRegs<6>.ret, SGPR_32, 105, 4, 6, "s">;
+
// SGPR 256-bit registers
def SGPR_256Regs : SIRegisterTuples<getSubRegs<8>.ret, SGPR_32, 105, 4, 8, "s">;
@@ -261,6 +370,12 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
let isAllocatable = 0;
}
+def TTMP_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+ (add (sequence "TTMP%u_LO16", 0, 15))> {
+ let Size = 16;
+ let isAllocatable = 0;
+}
+
// Trap handler TMP 64-bit registers
def TTMP_64Regs : SIRegisterTuples<getSubRegs<2>.ret, TTMP_32, 15, 2, 2, "ttmp">;
@@ -357,6 +472,19 @@ class RegisterTypes<list<ValueType> reg_types> {
def Reg16Types : RegisterTypes<[i16, f16]>;
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>;
+def VGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+ (add (sequence "VGPR%u_LO16", 0, 255))> {
+ let AllocationPriority = 1;
+ let Size = 16;
+ let GeneratePressureSet = 0;
+}
+
+def VGPR_HI16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+ (add (sequence "VGPR%u_HI16", 0, 255))> {
+ let AllocationPriority = 1;
+ let Size = 16;
+ let GeneratePressureSet = 0;
+}
// VGPR 32-bit registers
// i16/f16 only on VI+
@@ -364,6 +492,7 @@ def VGPR_32 : RegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.t
(add (sequence "VGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
+ let Weight = 1;
}
// VGPR 64-bit registers
@@ -378,6 +507,9 @@ def VGPR_128 : SIRegisterTuples<getSubRegs<4>.ret, VGPR_32, 255, 1, 4, "v">;
// VGPR 160-bit registers
def VGPR_160 : SIRegisterTuples<getSubRegs<5>.ret, VGPR_32, 255, 1, 5, "v">;
+// VGPR 192-bit registers
+def VGPR_192 : SIRegisterTuples<getSubRegs<6>.ret, VGPR_32, 255, 1, 6, "v">;
+
// VGPR 256-bit registers
def VGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, VGPR_32, 255, 1, 8, "v">;
@@ -387,19 +519,39 @@ def VGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, VGPR_32, 255, 1, 16, "v">;
// VGPR 1024-bit registers
def VGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, VGPR_32, 255, 1, 32, "v">;
+def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+ (add (sequence "AGPR%u_LO16", 0, 255))> {
+ let isAllocatable = 0;
+ let Size = 16;
+ let GeneratePressureSet = 0;
+}
+
// AccVGPR 32-bit registers
def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "AGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
+ let Weight = 1;
}
// AGPR 64-bit registers
def AGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, AGPR_32, 255, 1, 2, "a">;
+// AGPR 96-bit registers
+def AGPR_96 : SIRegisterTuples<getSubRegs<3>.ret, AGPR_32, 255, 1, 3, "a">;
+
// AGPR 128-bit registers
def AGPR_128 : SIRegisterTuples<getSubRegs<4>.ret, AGPR_32, 255, 1, 4, "a">;
+// AGPR 160-bit registers
+def AGPR_160 : SIRegisterTuples<getSubRegs<5>.ret, AGPR_32, 255, 1, 5, "a">;
+
+// AGPR 192-bit registers
+def AGPR_192 : SIRegisterTuples<getSubRegs<6>.ret, AGPR_32, 255, 1, 6, "a">;
+
+// AGPR 256-bit registers
+def AGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, AGPR_32, 255, 1, 8, "a">;
+
// AGPR 512-bit registers
def AGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, AGPR_32, 255, 1, 16, "a">;
@@ -411,7 +563,7 @@ def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">;
//===----------------------------------------------------------------------===//
def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> {
+ (add FP_REG, SP_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
}
@@ -422,12 +574,13 @@ def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
let CopyCost = -1;
}
-def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32,
(add LDS_DIRECT)> {
let isAllocatable = 0;
let CopyCost = -1;
}
+let GeneratePressureSet = 0 in {
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
@@ -438,24 +591,54 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1
let AllocationPriority = 10;
}
+def SReg_LO16_XM0_XEXEC : RegisterClass<"AMDGPU", [i16, f16], 16,
+ (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16,
+ XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, TTMP_LO16, TMA_LO_LO16,
+ TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16,
+ SRC_SHARED_LIMIT_LO16, SRC_PRIVATE_BASE_LO16, SRC_PRIVATE_LIMIT_LO16,
+ SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16)> {
+ let Size = 16;
+ let AllocationPriority = 10;
+}
+
def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
let AllocationPriority = 10;
}
+def SReg_LO16_XEXEC_HI : RegisterClass<"AMDGPU", [i16, f16], 16,
+ (add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, M0_CLASS_LO16)> {
+ let Size = 16;
+ let AllocationPriority = 10;
+}
+
def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 10;
}
+def SReg_LO16_XM0 : RegisterClass<"AMDGPU", [i16, f16], 16,
+ (add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, EXEC_HI_LO16)> {
+ let Size = 16;
+ let AllocationPriority = 10;
+}
+
+def SReg_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+ (add SGPR_LO16, SReg_LO16_XM0, M0_CLASS_LO16, EXEC_LO_LO16, EXEC_HI_LO16, SReg_LO16_XEXEC_HI)> {
+ let Size = 16;
+ let AllocationPriority = 10;
+}
+} // End GeneratePressureSet = 0
+
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> {
let AllocationPriority = 10;
}
-def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
- (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> {
+let GeneratePressureSet = 0 in {
+def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+ (add SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
}
@@ -528,7 +711,6 @@ def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
(add SGPR_128, TTMP_128)> {
- let AllocationPriority = 15;
let isAllocatable = 0;
}
@@ -543,39 +725,50 @@ def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
(add SGPR_160)> {
- let AllocationPriority = 16;
+ // FIXME: Should be isAllocatable = 0, but that causes all TableGen-generated
+ // subclasses of SGPR_160 to be marked unallocatable too.
}
-def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> {
+def SGPR_192 : RegisterClass<"AMDGPU", [untyped], 32, (add SGPR_192Regs)> {
+ let Size = 192;
let AllocationPriority = 17;
}
-def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> {
+def SReg_192 : RegisterClass<"AMDGPU", [untyped], 32, (add SGPR_192)> {
+ let Size = 192;
+ let isAllocatable = 0;
+}
+
+def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32, v4i64, v4f64], 32, (add SGPR_256Regs)> {
+ let AllocationPriority = 18;
+}
+
+def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32, v4i64, v4f64], 32, (add TTMP_256Regs)> {
let isAllocatable = 0;
}
-def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
+def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32, v4i64, v4f64], 32,
(add SGPR_256, TTMP_256)> {
// Requires 4 s_mov_b64 to copy
let CopyCost = 4;
- let AllocationPriority = 17;
+ let isAllocatable = 0;
}
-def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32, v8i64, v8f64], 32,
(add SGPR_512Regs)> {
- let AllocationPriority = 18;
+ let AllocationPriority = 19;
}
-def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32, v8i64, v8f64], 32,
(add TTMP_512Regs)> {
let isAllocatable = 0;
}
-def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32, v8i64, v8f64], 32,
(add SGPR_512, TTMP_512)> {
// Requires 8 s_mov_b64 to copy
let CopyCost = 8;
- let AllocationPriority = 18;
+ let isAllocatable = 0;
}
def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
@@ -583,105 +776,55 @@ def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 3
let isAllocatable = 0;
}
-def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32, v16i64, v16f64], 32,
(add SGPR_1024Regs)> {
- let AllocationPriority = 19;
+ let AllocationPriority = 20;
}
-def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32, v16i64, v16f64], 32,
(add SGPR_1024)> {
let CopyCost = 16;
- let AllocationPriority = 19;
-}
-
-// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4], 32,
- (add VGPR_64)> {
- let Size = 64;
-
- // Requires 2 v_mov_b32 to copy
- let CopyCost = 2;
- let AllocationPriority = 2;
-}
-
-def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96)> {
- let Size = 96;
-
- // Requires 3 v_mov_b32 to copy
- let CopyCost = 3;
- let AllocationPriority = 3;
-}
-
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
- (add VGPR_128)> {
- let Size = 128;
-
- // Requires 4 v_mov_b32 to copy
- let CopyCost = 4;
- let AllocationPriority = 4;
-}
-
-def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
- (add VGPR_160)> {
- let Size = 160;
-
- // Requires 5 v_mov_b32 to copy
- let CopyCost = 5;
- let AllocationPriority = 5;
-}
-
-def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
- (add VGPR_256)> {
- let Size = 256;
- let CopyCost = 8;
- let AllocationPriority = 6;
-}
-
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
- (add VGPR_512)> {
- let Size = 512;
- let CopyCost = 16;
- let AllocationPriority = 7;
-}
-
-def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
- (add VGPR_1024)> {
- let Size = 1024;
- let CopyCost = 32;
- let AllocationPriority = 8;
+ let isAllocatable = 0;
}
-def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
- (add AGPR_64)> {
- let Size = 64;
+// Register class for all vector registers (VGPRs + Interpolation Registers)
+class VRegClass<int numRegs, list<ValueType> regTypes, dag regList> :
+ RegisterClass<"AMDGPU", regTypes, 32, regList> {
+ let Size = !mul(numRegs, 32);
- let CopyCost = 5;
- let AllocationPriority = 2;
+ // Requires n v_mov_b32 to copy
+ let CopyCost = numRegs;
+ let AllocationPriority = numRegs;
+ let Weight = numRegs;
}
-def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
- (add AGPR_128)> {
- let Size = 128;
+def VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
+ (add VGPR_64)>;
+def VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>;
+def VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, i128], (add VGPR_128)>;
+def VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;
+def VReg_192 : VRegClass<6, [untyped], (add VGPR_192)>;
+def VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64], (add VGPR_256)>;
+def VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>;
+def VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;
- // Requires 4 v_accvgpr_write and 4 v_accvgpr_read to copy + burn 1 vgpr
- let CopyCost = 9;
- let AllocationPriority = 4;
+class ARegClass<int numRegs, list<ValueType> regTypes, dag regList> :
+ VRegClass<numRegs, regTypes, regList> {
+ // Requires n v_accvgpr_write and n v_accvgpr_read to copy + burn 1 vgpr
+ let CopyCost = !add(numRegs, numRegs, 1);
}
-def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
- (add AGPR_512)> {
- let Size = 512;
- let CopyCost = 33;
- let AllocationPriority = 7;
-}
-
-def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
- (add AGPR_1024)> {
- let Size = 1024;
- let CopyCost = 65;
- let AllocationPriority = 8;
-}
+def AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16],
+ (add AGPR_64)>;
+def AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>;
+def AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64], (add AGPR_128)>;
+def AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;
+def AReg_192 : ARegClass<6, [untyped], (add AGPR_192)>;
+def AReg_256 : ARegClass<8, [v8i32, v8f32, v4i64, v4f64], (add AGPR_256)>;
+def AReg_512 : ARegClass<16, [v16i32, v16f32, v8i64, v8f64], (add AGPR_512)>;
+def AReg_1024 : ARegClass<32, [v32i32, v32f32, v16i64, v16f64], (add AGPR_1024)>;
+} // End GeneratePressureSet = 0
// This is not a real register. This is just to have a register to add
// to VReg_1 that does not alias any real register that would
@@ -690,6 +833,7 @@ def ARTIFICIAL_VGPR : SIReg <"invalid vgpr", 0> {
let isArtificial = 1;
}
+let GeneratePressureSet = 0 in {
// FIXME: Should specify an empty set for this. No register should
// ever be allocated using VReg_1. This is a hack for SelectionDAG
// that should always be lowered by SILowerI1Copies. TableGen crashes
@@ -718,6 +862,7 @@ def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32,
(add AReg_64, VReg_64)> {
let isAllocatable = 0;
}
+} // End GeneratePressureSet = 0
//===----------------------------------------------------------------------===//
// Register operands
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
new file mode 100644
index 000000000000..64fca0b46797
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
@@ -0,0 +1,160 @@
+//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass optmizes the s_cbranch_execz instructions.
+/// The pass removes this skip instruction for short branches,
+/// if there is no unwanted sideeffect in the fallthrough code sequence.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-remove-short-exec-branches"
+
+static unsigned SkipThreshold;
+
+static cl::opt<unsigned, true> SkipThresholdFlag(
+ "amdgpu-skip-threshold", cl::Hidden,
+ cl::desc(
+ "Number of instructions before jumping over divergent control flow"),
+ cl::location(SkipThreshold), cl::init(12));
+
+namespace {
+
+class SIRemoveShortExecBranches : public MachineFunctionPass {
+private:
+ const SIInstrInfo *TII = nullptr;
+ bool getBlockDestinations(MachineBasicBlock &SrcMBB,
+ MachineBasicBlock *&TrueMBB,
+ MachineBasicBlock *&FalseMBB,
+ SmallVectorImpl<MachineOperand> &Cond);
+ bool mustRetainExeczBranch(const MachineBasicBlock &From,
+ const MachineBasicBlock &To) const;
+ bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
+
+public:
+ static char ID;
+
+ SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
+ initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
+ "SI remove short exec branches", false, false)
+
+char SIRemoveShortExecBranches::ID = 0;
+
+char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;
+
+bool SIRemoveShortExecBranches::getBlockDestinations(
+ MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
+ MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
+ if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
+ return false;
+
+ if (!FalseMBB)
+ FalseMBB = SrcMBB.getNextNode();
+
+ return true;
+}
+
+bool SIRemoveShortExecBranches::mustRetainExeczBranch(
+ const MachineBasicBlock &From, const MachineBasicBlock &To) const {
+ unsigned NumInstr = 0;
+ const MachineFunction *MF = From.getParent();
+
+ for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
+ MBBI != End && MBBI != ToI; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ // When a uniform loop is inside non-uniform control flow, the branch
+ // leaving the loop might never be taken when EXEC = 0.
+ // Hence we should retain cbranch out of the loop lest it become infinite.
+ if (I->isConditionalBranch())
+ return true;
+
+ if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
+ return true;
+
+ if (TII->isKillTerminator(I->getOpcode()))
+ return true;
+
+ // These instructions are potentially expensive even if EXEC = 0.
+ if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
+ I->getOpcode() == AMDGPU::S_WAITCNT)
+ return true;
+
+ ++NumInstr;
+ if (NumInstr >= SkipThreshold)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Returns true if the skip branch instruction is removed.
+bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
+ MachineBasicBlock &SrcMBB) {
+ MachineBasicBlock *TrueMBB = nullptr;
+ MachineBasicBlock *FalseMBB = nullptr;
+ SmallVector<MachineOperand, 1> Cond;
+
+ if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
+ return false;
+
+ // Consider only the forward branches.
+ if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
+ mustRetainExeczBranch(*FalseMBB, *TrueMBB))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
+ MI.eraseFromParent();
+ SrcMBB.removeSuccessor(TrueMBB);
+
+ return true;
+}
+
+bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ TII = ST.getInstrInfo();
+ MF.RenumberBlocks();
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ if (MBBI == MBB.end())
+ continue;
+
+ MachineInstr &MI = *MBBI;
+ switch (MI.getOpcode()) {
+ case AMDGPU::S_CBRANCH_EXECZ:
+ Changed = removeExeczBranch(MI, MBB);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
index 824d1aeb0df9..932381c99e0b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -1,4 +1,4 @@
-//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
+//===-- SISchedule.td - SI Scheduling definitions -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -27,10 +27,14 @@ def WriteBarrier : SchedWrite;
def MIVGPRRead : SchedRead;
def MIMFMARead : SchedRead;
-// Vector ALU instructions
+// Normal 16 or 32 bit VALU instructions
def Write32Bit : SchedWrite;
+// Conversion to or from F32 (but not converting F64 to or from F32)
+def WriteFloatCvt : SchedWrite;
+// F16 or F32 transcendental instructions (these are quarter rate)
+def WriteTrans32 : SchedWrite;
+// Other quarter rate VALU instructions
def WriteQuarterRate32 : SchedWrite;
-def WriteFullOrQuarterRate32 : SchedWrite;
def WriteFloatFMA : SchedWrite;
@@ -43,6 +47,10 @@ def WriteDoubleAdd : SchedWrite;
// Conversion to or from f64 instruction
def WriteDoubleCvt : SchedWrite;
+// F64 "transcendental" (actually only reciprocal and/or square root)
+// instructions
+def WriteTrans64 : SchedWrite;
+
// Half rate 64-bit instructions.
def Write64Bit : SchedWrite;
@@ -56,7 +64,7 @@ def Write16PassMAI : SchedWrite;
// instructions)
class SISchedMachineModel : SchedMachineModel {
- let CompleteModel = 0;
+ let CompleteModel = 1;
// MicroOpBufferSize = 1 means that instructions will always be added
// the ready queue when they become available. This exposes them
// to the register pressure analysis.
@@ -127,6 +135,8 @@ multiclass SICommonWriteRes {
def : HWVALUWriteRes<Write32Bit, 1>;
def : HWVALUWriteRes<Write64Bit, 2>;
+ def : HWVALUWriteRes<WriteFloatCvt, 4>;
+ def : HWVALUWriteRes<WriteTrans32, 4>;
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
def : HWVALUWriteRes<Write2PassMAI, 2>;
def : HWVALUWriteRes<Write8PassMAI, 8>;
@@ -135,9 +145,9 @@ multiclass SICommonWriteRes {
def : ReadAdvance<MIVGPRRead, -2>;
def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32$")>;
- // Technicaly mfma reads can be from 0 to 4 cycles but that does not make
+ // Technically mfma reads can be from 0 to 4 cycles but that does not make
// sense to model because its register setup is huge. In particular if we
- // properly model read advanice as -2 for a vgpr read it will result in a
+ // properly model read advance as -2 for a vgpr read it will result in a
// bad scheduling of acc writes before that mfma. To avoid it we would
// need to consume 2 or 4 more vgprs to be initialized before the acc
// write sequence. Just assume worst case here.
@@ -163,6 +173,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>;
def : HWVALUWriteRes<WriteDouble, 4>;
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
+def : HWVALUWriteRes<WriteTrans64, 4>;
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -176,6 +187,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 16>;
def : HWVALUWriteRes<WriteDouble, 16>;
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
+def : HWVALUWriteRes<WriteTrans64, 16>;
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -186,17 +198,20 @@ let SchedModel = GFX10SpeedModel in {
// The latency values are 1 / (operations / cycle).
// Add 1 stall cycle for VGPR read.
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
-def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>;
-def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>;
+def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
+def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
+def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>;
+def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>;
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
-def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 17>;
-def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 17>;
-def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 17>;
+def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 22>;
+def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 22>;
+def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 22>;
+def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 24>;
def : HWWriteRes<WriteBranch, [HWBranch], 32>;
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
-def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 5>;
+def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 3986ca6dfa81..9c6833a7dab6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -185,6 +185,11 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
if (!MI.getOperand(0).isReg())
TII->commuteInstruction(MI, false, 0, 1);
+ // cmpk requires src0 to be a register
+ const MachineOperand &Src0 = MI.getOperand(0);
+ if (!Src0.isReg())
+ return;
+
const MachineOperand &Src1 = MI.getOperand(1);
if (!Src1.isImm())
return;
@@ -220,7 +225,7 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
- if (Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+ if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
return;
MachineFunction *MF = MI.getParent()->getParent();
@@ -323,60 +328,61 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
MachineOperand *SrcReg = Src0;
MachineOperand *SrcImm = Src1;
- if (SrcImm->isImm() &&
- !AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST.hasInv2PiInlineImm())) {
- uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
- uint32_t NewImm = 0;
-
- if (Opc == AMDGPU::S_AND_B32) {
- if (isPowerOf2_32(~Imm)) {
- NewImm = countTrailingOnes(Imm);
- Opc = AMDGPU::S_BITSET0_B32;
- } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
- NewImm = ~Imm;
- Opc = AMDGPU::S_ANDN2_B32;
- }
- } else if (Opc == AMDGPU::S_OR_B32) {
- if (isPowerOf2_32(Imm)) {
- NewImm = countTrailingZeros(Imm);
- Opc = AMDGPU::S_BITSET1_B32;
- } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
- NewImm = ~Imm;
- Opc = AMDGPU::S_ORN2_B32;
- }
- } else if (Opc == AMDGPU::S_XOR_B32) {
- if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
- NewImm = ~Imm;
- Opc = AMDGPU::S_XNOR_B32;
- }
- } else {
- llvm_unreachable("unexpected opcode");
- }
+ if (!SrcImm->isImm() ||
+ AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST.hasInv2PiInlineImm()))
+ return false;
+
+ uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
+ uint32_t NewImm = 0;
- if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
- SrcImm == Src0) {
- if (!TII->commuteInstruction(MI, false, 1, 2))
- NewImm = 0;
+ if (Opc == AMDGPU::S_AND_B32) {
+ if (isPowerOf2_32(~Imm)) {
+ NewImm = countTrailingOnes(Imm);
+ Opc = AMDGPU::S_BITSET0_B32;
+ } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_ANDN2_B32;
+ }
+ } else if (Opc == AMDGPU::S_OR_B32) {
+ if (isPowerOf2_32(Imm)) {
+ NewImm = countTrailingZeros(Imm);
+ Opc = AMDGPU::S_BITSET1_B32;
+ } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_ORN2_B32;
+ }
+ } else if (Opc == AMDGPU::S_XOR_B32) {
+ if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ NewImm = ~Imm;
+ Opc = AMDGPU::S_XNOR_B32;
}
+ } else {
+ llvm_unreachable("unexpected opcode");
+ }
- if (NewImm != 0) {
- if (Register::isVirtualRegister(Dest->getReg()) && SrcReg->isReg()) {
- MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
- MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
- return true;
- }
+ if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
+ SrcImm == Src0) {
+ if (!TII->commuteInstruction(MI, false, 1, 2))
+ NewImm = 0;
+ }
- if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
- MI.setDesc(TII->get(Opc));
- if (Opc == AMDGPU::S_BITSET0_B32 ||
- Opc == AMDGPU::S_BITSET1_B32) {
- Src0->ChangeToImmediate(NewImm);
- // Remove the immediate and add the tied input.
- MI.getOperand(2).ChangeToRegister(Dest->getReg(), false);
- MI.tieOperands(0, 2);
- } else {
- SrcImm->setImm(NewImm);
- }
+ if (NewImm != 0) {
+ if (Register::isVirtualRegister(Dest->getReg()) && SrcReg->isReg()) {
+ MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
+ MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
+ return true;
+ }
+
+ if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
+ MI.setDesc(TII->get(Opc));
+ if (Opc == AMDGPU::S_BITSET0_B32 ||
+ Opc == AMDGPU::S_BITSET1_B32) {
+ Src0->ChangeToImmediate(NewImm);
+ // Remove the immediate and add the tied input.
+ MI.getOperand(2).ChangeToRegister(Dest->getReg(), false);
+ MI.tieOperands(0, 2);
+ } else {
+ SrcImm->setImm(NewImm);
}
}
}
@@ -426,8 +432,7 @@ getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I,
if (Register::isPhysicalRegister(Reg)) {
Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
} else {
- LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub);
- Sub = TRI.getSubRegFromChannel(I + countTrailingZeros(LM.getAsInteger()));
+ Sub = TRI.getSubRegFromChannel(I + TRI.getChannelFromSubReg(Sub));
}
}
return TargetInstrInfo::RegSubRegPair(Reg, Sub);
@@ -472,26 +477,30 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
if (!TRI.isVGPR(MRI, X))
return nullptr;
- for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) {
- if (YTop.getSubReg() != Tsub)
- continue;
-
- MachineInstr &MovY = *YTop.getParent();
- if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 &&
- MovY.getOpcode() != AMDGPU::COPY) ||
- MovY.getOperand(1).getSubReg() != Tsub)
+ const unsigned SearchLimit = 16;
+ unsigned Count = 0;
+ for (auto Iter = std::next(MovT.getIterator()),
+ E = MovT.getParent()->instr_end();
+ Iter != E && Count < SearchLimit; ++Iter, ++Count) {
+
+ MachineInstr *MovY = &*Iter;
+ if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
+ MovY->getOpcode() != AMDGPU::COPY) ||
+ !MovY->getOperand(1).isReg() ||
+ MovY->getOperand(1).getReg() != T ||
+ MovY->getOperand(1).getSubReg() != Tsub)
continue;
- Register Y = MovY.getOperand(0).getReg();
- unsigned Ysub = MovY.getOperand(0).getSubReg();
+ Register Y = MovY->getOperand(0).getReg();
+ unsigned Ysub = MovY->getOperand(0).getSubReg();
- if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent())
+ if (!TRI.isVGPR(MRI, Y))
continue;
MachineInstr *MovX = nullptr;
- auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end();
- for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) {
- if (instReadsReg(&*I, X, Xsub, TRI) ||
+ for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator());
+ I != IY; ++I) {
+ if (instReadsReg(&*I, X, Xsub, TRI) ||
instModifiesReg(&*I, Y, Ysub, TRI) ||
instModifiesReg(&*I, T, Tsub, TRI) ||
(MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
@@ -516,7 +525,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
MovX = &*I;
}
- if (!MovX || I == E)
+ if (!MovX)
continue;
LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
@@ -533,7 +542,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
}
MovX->eraseFromParent();
- MovY.eraseFromParent();
+ MovY->eraseFromParent();
MachineInstr *Next = &*std::next(MovT.getIterator());
if (MRI.use_nodbg_empty(T))
MovT.eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 39f5df767977..b1c73df269fb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -61,6 +61,7 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -154,7 +155,7 @@ private:
LiveIntervals *LIS;
DenseMap<const MachineInstr *, InstrInfo> Instructions;
- DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
+ MapVector<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<MachineInstr *, 1> LiveMaskQueries;
SmallVector<MachineInstr *, 4> LowerToMovInstrs;
SmallVector<MachineInstr *, 4> LowerToCopyInstrs;
@@ -170,8 +171,6 @@ private:
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
char analyzeFunction(MachineFunction &MF);
- bool requiresCorrectState(const MachineInstr &MI) const;
-
MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before);
MachineBasicBlock::iterator
@@ -525,36 +524,6 @@ char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) {
return GlobalFlags;
}
-/// Whether \p MI really requires the exec state computed during analysis.
-///
-/// Scalar instructions must occasionally be marked WQM for correct propagation
-/// (e.g. thread masks leading up to branches), but when it comes to actual
-/// execution, they don't care about EXEC.
-bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const {
- if (MI.isTerminator())
- return true;
-
- // Skip instructions that are not affected by EXEC
- if (TII->isScalarUnit(MI))
- return false;
-
- // Generic instructions such as COPY will either disappear by register
- // coalescing or be lowered to SALU or VALU instructions.
- if (MI.isTransient()) {
- if (MI.getNumExplicitOperands() >= 1) {
- const MachineOperand &Op = MI.getOperand(0);
- if (Op.isReg()) {
- if (TRI->isSGPRReg(*MRI, Op.getReg())) {
- // SGPR instructions are not affected by EXEC
- return false;
- }
- }
- }
- }
-
- return true;
-}
-
MachineBasicBlock::iterator
SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before) {
@@ -741,7 +710,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
if (II != IE) {
MachineInstr &MI = *II;
- if (requiresCorrectState(MI)) {
+ if (MI.isTerminator() || TII->mayReadEXEC(*MRI, MI)) {
auto III = Instructions.find(&MI);
if (III != Instructions.end()) {
if (III->second.Needs & StateWWM)
@@ -793,18 +762,23 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
if (State == StateWWM) {
assert(SavedNonWWMReg);
fromWWM(MBB, Before, SavedNonWWMReg);
+ LIS->createAndComputeVirtRegInterval(SavedNonWWMReg);
+ SavedNonWWMReg = 0;
State = NonWWMState;
}
if (Needs == StateWWM) {
NonWWMState = State;
+ assert(!SavedNonWWMReg);
SavedNonWWMReg = MRI->createVirtualRegister(BoolRC);
toWWM(MBB, Before, SavedNonWWMReg);
State = StateWWM;
} else {
if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) {
- if (!WQMFromExec && (OutNeeds & StateWQM))
+ if (!WQMFromExec && (OutNeeds & StateWQM)) {
+ assert(!SavedWQMReg);
SavedWQMReg = MRI->createVirtualRegister(BoolRC);
+ }
toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
State = StateExact;
@@ -837,6 +811,8 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
break;
II = Next;
}
+ assert(!SavedWQMReg);
+ assert(!SavedNonWWMReg);
}
void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
@@ -929,10 +905,12 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
if (GlobalFlags == StateWQM) {
// For a shader that needs only WQM, we can just set it once.
- BuildMI(Entry, EntryMI, DebugLoc(), TII->get(ST->isWave32() ?
- AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64),
- Exec)
- .addReg(Exec);
+ auto MI = BuildMI(Entry, EntryMI, DebugLoc(),
+ TII->get(ST->isWave32() ? AMDGPU::S_WQM_B32
+ : AMDGPU::S_WQM_B64),
+ Exec)
+ .addReg(Exec);
+ LIS->InsertMachineInstrInMaps(*MI);
lowerCopyInstrs();
// EntryMI may become invalid here
@@ -948,6 +926,9 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
for (auto BII : Blocks)
processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin());
+ if (LiveMaskReg)
+ LIS->createAndComputeVirtRegInterval(LiveMaskReg);
+
// Physical registers like SCC aren't tracked by default anyway, so just
// removing the ranges we computed is the simplest option for maintaining
// the analysis results.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
index 79982d96c2c8..70bf215c03f3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -1,4 +1,4 @@
-//===---- SMInstructions.td - Scalar Memory Instruction Defintions --------===//
+//===---- SMInstructions.td - Scalar Memory Instruction Definitions -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,9 +11,11 @@ def smrd_offset_8 : NamedOperandU32<"SMRDOffset8",
let OperandType = "OPERAND_IMMEDIATE";
}
-def smrd_offset_20 : NamedOperandU32<"SMRDOffset20",
- NamedMatchClass<"SMRDOffset20">> {
+def smem_offset : NamedOperandU32<"SMEMOffset",
+ NamedMatchClass<"SMEMOffset">> {
let OperandType = "OPERAND_IMMEDIATE";
+ let EncoderMethod = "getSMEMOffsetEncoding";
+ let DecoderMethod = "decodeSMEMOffset";
}
//===----------------------------------------------------------------------===//
@@ -43,6 +45,7 @@ class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
bit has_dlc = 0;
bits<1> has_offset = 1;
bits<1> offset_is_imm = 0;
+ bit is_buffer = 0;
}
class SM_Real <SM_Pseudo ps>
@@ -51,9 +54,15 @@ class SM_Real <SM_Pseudo ps>
let isPseudo = 0;
let isCodeGenOnly = 0;
+ Instruction Opcode = !cast<Instruction>(NAME);
+
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
let AsmMatchConverter = ps.AsmMatchConverter;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let SMRD = ps.SMRD;
+
+ bit is_buffer = ps.is_buffer;
// encoding
bits<7> sbase;
@@ -153,7 +162,7 @@ multiclass SM_Pseudo_Stores<string opName,
}
multiclass SM_Pseudo_Discards<string opName> {
- def _IMM : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, smrd_offset_20:$offset), 1>;
+ def _IMM : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, smem_offset:$offset), 1>;
def _SGPR : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, SReg_32:$offset), 0>;
}
@@ -178,14 +187,14 @@ class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pse
class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
opName, (outs), (ins), "", [(node)]> {
let hasSideEffects = 1;
- let mayStore = 1;
+ let mayStore = 0;
let has_sdst = 0;
let has_sbase = 0;
let has_offset = 0;
}
multiclass SM_Pseudo_Probe<string opName, RegisterClass baseClass> {
- def _IMM : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, smrd_offset_20:$offset), 1>;
+ def _IMM : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, smem_offset:$offset), 1>;
def _SGPR : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, SReg_32:$offset), 0>;
}
@@ -228,7 +237,7 @@ class SM_Pseudo_Atomic<string opName,
SM_Atomic_Pseudo<opName,
!if(isRet, (outs dataClass:$sdst), (outs)),
!if(isImm,
- (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset, DLC:$dlc),
+ (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, DLC:$dlc),
(ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)),
!if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc",
isRet> {
@@ -266,6 +275,7 @@ defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>;
defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>;
defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>;
+let is_buffer = 1 in {
defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <
"s_buffer_load_dword", SReg_128, SReg_32_XM0_XEXEC
>;
@@ -287,12 +297,14 @@ defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <
defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
"s_buffer_load_dwordx16", SReg_128, SReg_512
>;
+}
let SubtargetPredicate = HasScalarStores in {
defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
+let is_buffer = 1 in {
defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <
"s_buffer_store_dword", SReg_128, SReg_32_XM0_XEXEC
>;
@@ -304,8 +316,10 @@ defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <
defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
"s_buffer_store_dwordx4", SReg_128, SReg_128
>;
+}
} // End SubtargetPredicate = HasScalarStores
+let SubtargetPredicate = HasSMemTimeInst in
def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
@@ -321,13 +335,16 @@ def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb
def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
+let is_buffer = 1 in {
defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
+}
} // SubtargetPredicate = isGFX8Plus
-let SubtargetPredicate = isGFX10Plus in {
+let SubtargetPredicate = isGFX10Plus in
def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
+let SubtargetPredicate = HasGetWaveIdInst in
def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
-} // End SubtargetPredicate = isGFX10Plus
+
let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
@@ -341,6 +358,7 @@ defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg
let SubtargetPredicate = HasScalarAtomics in {
+let is_buffer = 1 in {
defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_buffer_atomic_swap", SReg_128, SReg_32_XM0_XEXEC>;
defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap", SReg_128, SReg_64_XEXEC>;
defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <"s_buffer_atomic_add", SReg_128, SReg_32_XM0_XEXEC>;
@@ -368,6 +386,7 @@ defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_or_x2",
defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_xor_x2", SReg_128, SReg_64_XEXEC>;
defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_inc_x2", SReg_128, SReg_64_XEXEC>;
defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_dec_x2", SReg_128, SReg_64_XEXEC>;
+}
defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_atomic_swap", SReg_64, SReg_32_XM0_XEXEC>;
defm S_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_atomic_cmpswap", SReg_64, SReg_64_XEXEC>;
@@ -481,14 +500,17 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
let Inst{17} = imm;
let Inst{25-18} = op;
let Inst{31-26} = 0x30; //encoding
- let Inst{51-32} = !if(ps.has_offset, offset{19-0}, ?);
+
+ // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed.
+ // Offset value is corrected accordingly when offset is encoded/decoded.
+ let Inst{52-32} = !if(ps.has_offset, offset{20-0}, ?);
}
multiclass SM_Real_Loads_vi<bits<8> op, string ps,
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_vi : SMEM_Real_vi <op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
}
def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
@@ -509,7 +531,7 @@ multiclass SM_Real_Stores_vi<bits<8> op, string ps,
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
- let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
}
def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
@@ -665,12 +687,10 @@ class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
let LGKM_CNT = ps.LGKM_CNT;
- let SMRD = ps.SMRD;
let mayLoad = ps.mayLoad;
let mayStore = ps.mayStore;
let hasSideEffects = ps.hasSideEffects;
let SchedRW = ps.SchedRW;
- let UseNamedOperandTable = ps.UseNamedOperandTable;
let Inst{7-0} = 0xff;
let Inst{8} = 0;
@@ -768,23 +788,26 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 1. Offset as an immediate
def : GCNPat <
- (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc, i1:$dlc),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc),
- (as_i1imm $dlc)))
- >;
+ (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_glc $cachepolicy),
+ (extract_dlc $cachepolicy)))> {
+ let AddedComplexity = 2;
+ }
// 2. 32-bit IMM offset on CI
def : GCNPat <
- (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc, i1:$dlc)),
- (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))> {
+ (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
+ (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
+ (extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
let OtherPredicates = [isGFX7Only];
+ let AddedComplexity = 1;
}
// 3. Offset loaded in an 32bit SGPR
def : GCNPat <
- (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc, i1:$dlc),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc),
- (as_i1imm $dlc)))
+ (SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_glc $cachepolicy),
+ (extract_dlc $cachepolicy)))
>;
}
@@ -805,8 +828,13 @@ foreach vt = SReg_128.RegTypes in {
defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>;
}
-defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
+foreach vt = SReg_256.RegTypes in {
+defm : SMRD_Pattern <"S_LOAD_DWORDX8", vt>;
+}
+
+foreach vt = SReg_512.RegTypes in {
+defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>;
+}
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>;
@@ -821,10 +849,21 @@ defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>;
} // End let AddedComplexity = 100
+let OtherPredicates = [HasSMemTimeInst] in {
def : GCNPat <
(i64 (readcyclecounter)),
(S_MEMTIME)
>;
+} // let OtherPredicates = [HasSMemTimeInst]
+
+let OtherPredicates = [HasNoSMemTimeInst] in {
+def : GCNPat <
+ (i64 (readcyclecounter)),
+ (REG_SEQUENCE SReg_64,
+ (S_GETREG_B32 getHwRegImm<HWREG.SHADER_CYCLES, 0, -12>.ret), sub0,
+ (S_MOV_B32 (i32 0)), sub1)
+>;
+} // let OtherPredicates = [HasNoSMemTimeInst]
//===----------------------------------------------------------------------===//
// GFX10.
@@ -844,7 +883,7 @@ class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
let Inst{16} = !if(ps.has_glc, glc, ?);
let Inst{25-18} = op;
let Inst{31-26} = 0x3d;
- let Inst{51-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{19-0}, ?), ?);
+ let Inst{52-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{20-0}, ?), ?);
let Inst{63-57} = !if(ps.offset_is_imm, !cast<int>(SGPR_NULL.HWEncoding),
!if(ps.has_offset, offset{6-0}, ?));
}
@@ -853,7 +892,7 @@ multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
}
def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
@@ -873,7 +912,7 @@ multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
- let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+ let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
}
def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
@@ -1020,3 +1059,12 @@ defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">;
} // End SubtargetPredicate = HasScalarAtomics
+
+def SMInfoTable : GenericTable {
+ let FilterClass = "SM_Real";
+ let CppTypeName = "SMInfo";
+ let Fields = ["Opcode", "is_buffer"];
+
+ let PrimaryKey = ["Opcode"];
+ let PrimaryKeyName = "getSMEMOpcodeHelper";
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 73ba2ae367f7..9d7b25d55217 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1,4 +1,4 @@
-//===-- SOPInstructions.td - SOP Instruction Defintions -------------------===//
+//===-- SOPInstructions.td - SOP Instruction Definitions ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -97,6 +97,17 @@ class SOP1_0_32 <string opName, list<dag> pattern = []> : SOP1_Pseudo <
let has_sdst = 0;
}
+// Special case for movreld where sdst is treated as a use operand.
+class SOP1_32_movreld <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+ opName, (outs), (ins SReg_32:$sdst, SSrc_b32:$src0),
+ "$sdst, $src0", pattern>;
+
+// Special case for movreld where sdst is treated as a use operand.
+class SOP1_64_movreld <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+ opName, (outs), (ins SReg_64:$sdst, SSrc_b64:$src0),
+ "$sdst, $src0", pattern
+>;
+
class SOP1_0_32R <string opName, list<dag> pattern = []> : SOP1_Pseudo <
opName, (outs), (ins SReg_32:$src0),
"$src0", pattern> {
@@ -199,7 +210,9 @@ def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64",
def S_FF0_I32_B32 : SOP1_32 <"s_ff0_i32_b32">;
def S_FF0_I32_B64 : SOP1_32_64 <"s_ff0_i32_b64">;
-def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64">;
+def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64",
+ [(set i32:$sdst, (AMDGPUffbl_b32 i64:$src0))]
+>;
def S_FF1_I32_B32 : SOP1_32 <"s_ff1_i32_b32",
[(set i32:$sdst, (AMDGPUffbl_b32 i32:$src0))]
@@ -209,7 +222,9 @@ def S_FLBIT_I32_B32 : SOP1_32 <"s_flbit_i32_b32",
[(set i32:$sdst, (AMDGPUffbh_u32 i32:$src0))]
>;
-def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64">;
+def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64",
+ [(set i32:$sdst, (AMDGPUffbh_u32 i64:$src0))]
+>;
def S_FLBIT_I32 : SOP1_32 <"s_flbit_i32",
[(set i32:$sdst, (AMDGPUffbh_i32 i32:$src0))]
>;
@@ -267,8 +282,8 @@ def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64">;
let Uses = [M0] in {
def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">;
def S_MOVRELS_B64 : SOP1_64R <"s_movrels_b64">;
-def S_MOVRELD_B32 : SOP1_32 <"s_movreld_b32">;
-def S_MOVRELD_B64 : SOP1_64 <"s_movreld_b64">;
+def S_MOVRELD_B32 : SOP1_32_movreld <"s_movreld_b32">;
+def S_MOVRELD_B64 : SOP1_64_movreld <"s_movreld_b64">;
} // End Uses = [M0]
let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in {
@@ -283,8 +298,8 @@ def S_MOV_FED_B32 : SOP1_32 <"s_mov_fed_b32">;
let SubtargetPredicate = HasVGPRIndexMode in {
def S_SET_GPR_IDX_IDX : SOP1_0_32<"s_set_gpr_idx_idx"> {
- let Uses = [M0];
- let Defs = [M0];
+ let Uses = [M0, MODE];
+ let Defs = [M0, MODE];
}
}
@@ -401,8 +416,14 @@ class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
class UniformBinFrag<SDPatternOperator Op> : PatFrag <
(ops node:$src0, node:$src1),
(Op $src0, $src1),
- [{ return !N->isDivergent(); }]
->;
+ [{ return !N->isDivergent(); }]> {
+ // This check is unnecessary as it's captured by the result register
+ // bank constraint.
+ //
+ // FIXME: Should add a way for the emitter to recognize this is a
+ // trivially true predicate to eliminate the check.
+ let GISelPredicateCode = [{return true;}];
+}
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
@@ -444,9 +465,19 @@ def S_MAX_U32 : SOP2_32 <"s_max_u32",
} // End isCommutable = 1
} // End Defs = [SCC]
+class SelectPat<SDPatternOperator select> : PatFrag <
+ (ops node:$src1, node:$src2),
+ (select SCC, $src1, $src2),
+ [{ return N->getOperand(0)->hasOneUse() && !N->isDivergent(); }]
+>;
let Uses = [SCC] in {
- def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32">;
+ let AddedComplexity = 20 in {
+ def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32",
+ [(set i32:$sdst, (SelectPat<select> i32:$src0, i32:$src1))]
+ >;
+ }
+
def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64">;
} // End Uses = [SCC]
@@ -524,22 +555,22 @@ let AddedComplexity = 1 in {
let Defs = [SCC] in {
// TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3
def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
- [(set SReg_32:$sdst, (shl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<shl> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
- [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<shl> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
- [(set SReg_32:$sdst, (srl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<srl> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
- [(set SReg_64:$sdst, (srl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<srl> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
- [(set SReg_32:$sdst, (sra (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<sra> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
- [(set SReg_64:$sdst, (sra (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<sra> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
} // End Defs = [SCC]
@@ -592,14 +623,26 @@ let SubtargetPredicate = isGFX9Plus in {
def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">;
let Defs = [SCC] in {
- def S_LSHL1_ADD_U32 : SOP2_32<"s_lshl1_add_u32">;
- def S_LSHL2_ADD_U32 : SOP2_32<"s_lshl2_add_u32">;
- def S_LSHL3_ADD_U32 : SOP2_32<"s_lshl3_add_u32">;
- def S_LSHL4_ADD_U32 : SOP2_32<"s_lshl4_add_u32">;
+ def S_LSHL1_ADD_U32 : SOP2_32<"s_lshl1_add_u32",
+ [(set i32:$sdst, (shl1_add SSrc_b32:$src0, SSrc_b32:$src1))]
+ >;
+ def S_LSHL2_ADD_U32 : SOP2_32<"s_lshl2_add_u32",
+ [(set i32:$sdst, (shl2_add SSrc_b32:$src0, SSrc_b32:$src1))]
+ >;
+ def S_LSHL3_ADD_U32 : SOP2_32<"s_lshl3_add_u32",
+ [(set i32:$sdst, (shl3_add SSrc_b32:$src0, SSrc_b32:$src1))]
+ >;
+ def S_LSHL4_ADD_U32 : SOP2_32<"s_lshl4_add_u32",
+ [(set i32:$sdst, (shl4_add SSrc_b32:$src0, SSrc_b32:$src1))]
+ >;
} // End Defs = [SCC]
- def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32">;
- def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32">;
+ let isCommutable = 1 in {
+ def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32",
+ [(set i32:$sdst, (UniformBinFrag<mulhu> SSrc_b32:$src0, SSrc_b32:$src1))]>;
+ def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32",
+ [(set i32:$sdst, (UniformBinFrag<mulhs> SSrc_b32:$src0, SSrc_b32:$src1))]>;
+ }
} // End SubtargetPredicate = isGFX9Plus
//===----------------------------------------------------------------------===//
@@ -760,7 +803,11 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
"$sdst, $simm16"
>;
+let hasSideEffects = 1 in {
+
let mayLoad = 1 in {
+// s_getreg_b32 should use hasSideEffects = 1 for tablegen to allow
+// its use in the readcyclecounter selection.
def S_GETREG_B32 : SOPK_Pseudo <
"s_getreg_b32",
(outs SReg_32:$sdst), (ins hwreg:$simm16),
@@ -768,14 +815,20 @@ def S_GETREG_B32 : SOPK_Pseudo <
>;
}
-let hasSideEffects = 1 in {
+let mayLoad = 0, mayStore =0 in {
def S_SETREG_B32 : SOPK_Pseudo <
"s_setreg_b32",
(outs), (ins SReg_32:$sdst, hwreg:$simm16),
"$simm16, $sdst",
- [(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))]
->;
+ [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
+
+ // Use custom inserter to optimize some cases to
+ // S_DENORM_MODE/S_ROUND_MODE.
+ let usesCustomInserter = 1;
+ let Defs = [MODE];
+ let Uses = [MODE];
+}
// FIXME: Not on SI?
//def S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32">;
@@ -786,8 +839,11 @@ def S_SETREG_IMM32_B32 : SOPK_Pseudo <
"$simm16, $imm"> {
let Size = 8; // Unlike every other SOPK instruction.
let has_sdst = 0;
+ let Defs = [MODE];
+ let Uses = [MODE];
}
+}
} // End hasSideEffects = 1
class SOPK_WAITCNT<string opName, list<dag> pat=[]> :
@@ -920,12 +976,16 @@ def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>;
} // End SubtargetPredicate = isGFX8Plus
let SubtargetPredicate = HasVGPRIndexMode in {
+// Setting the GPR index mode is really writing the fields in the mode
+// register. We don't want to add mode register uses to every
+// instruction, and it's too complicated to deal with anyway. This is
+// modeled just as a side effect.
def S_SET_GPR_IDX_ON : SOPC <0x11,
(outs),
(ins SSrc_b32:$src0, GPRIdxMode:$src1),
"s_set_gpr_idx_on $src0,$src1"> {
- let Defs = [M0]; // No scc def
- let Uses = [M0]; // Other bits of m0 unmodified.
+ let Defs = [M0, MODE]; // No scc def
+ let Uses = [M0, MODE]; // Other bits of mode, m0 unmodified.
let hasSideEffects = 1; // Sets mode.gpr_idx_en
let FixedSize = 1;
}
@@ -1099,7 +1159,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
let mayStore = 1;
}
-let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
[(int_amdgcn_s_waitcnt timm:$simm16)]>;
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
@@ -1112,8 +1172,8 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
"s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
}
def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
@@ -1138,14 +1198,14 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16",
[(int_amdgcn_s_incperflevel timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
}
def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16",
[(int_amdgcn_s_decperflevel timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
}
def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
let simm16 = 0;
@@ -1154,6 +1214,8 @@ def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
let SubtargetPredicate = HasVGPRIndexMode in {
def S_SET_GPR_IDX_OFF : SOPP<0x1c, (ins), "s_set_gpr_idx_off"> {
let simm16 = 0;
+ let Defs = [MODE];
+ let Uses = [MODE];
}
}
} // End hasSideEffects
@@ -1161,7 +1223,8 @@ def S_SET_GPR_IDX_OFF : SOPP<0x1c, (ins), "s_set_gpr_idx_off"> {
let SubtargetPredicate = HasVGPRIndexMode in {
def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16),
"s_set_gpr_idx_mode$simm16"> {
- let Defs = [M0];
+ let Defs = [M0, MODE];
+ let Uses = [MODE];
}
}
@@ -1176,13 +1239,15 @@ let SubtargetPredicate = isGFX10Plus in {
}
def S_WAITCNT_DEPCTR :
SOPP <0x023, (ins s16imm:$simm16), "s_waitcnt_depctr $simm16">;
- def S_ROUND_MODE :
- SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
- def S_DENORM_MODE :
- SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16",
- [(SIdenorm_mode (i32 timm:$simm16))]> {
- let hasSideEffects = 1;
- }
+
+ let hasSideEffects = 0, Uses = [MODE], Defs = [MODE] in {
+ def S_ROUND_MODE :
+ SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
+ def S_DENORM_MODE :
+ SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16",
+ [(SIdenorm_mode (i32 timm:$simm16))]>;
+ }
+
def S_TTRACEDATA_IMM :
SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">;
} // End SubtargetPredicate = isGFX10Plus
@@ -1223,7 +1288,7 @@ def : GCNPat <
// Same as a 32-bit inreg
def : GCNPat<
- (i32 (sext i16:$src)),
+ (i32 (UniformUnaryFrag<sext> i16:$src)),
(S_SEXT_I32_I16 $src)
>;
@@ -1250,7 +1315,7 @@ def : GCNPat<
>;
def : GCNPat <
- (i64 (sext i16:$src)),
+ (i64 (UniformUnaryFrag<sext> i16:$src)),
(REG_SEQUENCE SReg_64, (i32 (S_SEXT_I32_I16 $src)), sub0,
(i32 (COPY_TO_REGCLASS (S_ASHR_I32 (i32 (S_SEXT_I32_I16 $src)), (S_MOV_B32 (i32 31))), SGPR_32)), sub1)
>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 075e08986c0c..5819a621f55d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -78,7 +78,11 @@ const char* const IdSymbolic[] = {
"HW_REG_XNACK_MASK",
nullptr, // HW_ID1, no predictable values
nullptr, // HW_ID2, no predictable values
- "HW_REG_POPS_PACKER"
+ "HW_REG_POPS_PACKER",
+ nullptr,
+ nullptr,
+ nullptr,
+ "HW_REG_SHADER_CYCLES"
};
} // namespace Hwreg
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 8b21b9346987..00e6d517bde5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -108,6 +108,7 @@ namespace AMDGPU {
#define GET_MIMGInfoTable_IMPL
#define GET_MIMGLZMappingTable_IMPL
#define GET_MIMGMIPMappingTable_IMPL
+#define GET_MIMGG16MappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
@@ -148,10 +149,17 @@ struct MTBUFInfo {
bool has_soffset;
};
+struct SMInfo {
+ uint16_t Opcode;
+ bool IsBuffer;
+};
+
#define GET_MTBUFInfoTable_DECL
#define GET_MTBUFInfoTable_IMPL
#define GET_MUBUFInfoTable_DECL
#define GET_MUBUFInfoTable_IMPL
+#define GET_SMInfoTable_DECL
+#define GET_SMInfoTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMTBUFBaseOpcode(unsigned Opc) {
@@ -214,6 +222,11 @@ bool getMUBUFHasSoffset(unsigned Opc) {
return Info ? Info->has_soffset : false;
}
+bool getSMEMIsBuffer(unsigned Opc) {
+ const SMInfo *Info = getSMEMOpcodeHelper(Opc);
+ return Info ? Info->IsBuffer : false;
+}
+
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
// header files, so we need to wrap it in a function that takes unsigned
// instead.
@@ -268,6 +281,13 @@ unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
}
unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
+ // "Per CU" really means "per whatever functional block the waves of a
+ // workgroup must share". For gfx10 in CU mode this is the CU, which contains
+ // two SIMDs.
+ if (isGFX10(*STI) && STI->getFeatureBits().test(FeatureCuMode))
+ return 2;
+ // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
+ // two CUs, so a total of four SIMDs.
return 4;
}
@@ -283,15 +303,6 @@ unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
return std::min(N, 16u);
}
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
- return getMaxWavesPerEU(STI) * getEUsPerCU(STI);
-}
-
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
- unsigned FlatWorkGroupSize) {
- return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
-}
-
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
return 1;
}
@@ -300,13 +311,13 @@ unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
// FIXME: Need to take scratch memory into account.
if (!isGFX10(*STI))
return 10;
- return 20;
+ return hasGFX10_3Insts(*STI) ? 16 : 20;
}
-unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
- unsigned FlatWorkGroupSize) {
- return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
- getEUsPerCU(STI)) / getEUsPerCU(STI);
+unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize) {
+ return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
+ getEUsPerCU(STI));
}
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
@@ -320,8 +331,7 @@ unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
- getWavefrontSize(STI);
+ return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
}
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
@@ -431,12 +441,21 @@ unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
bool IsWave32 = EnableWavefrontSize32 ?
*EnableWavefrontSize32 :
STI->getFeatureBits().test(FeatureWavefrontSize32);
+
+ if (hasGFX10_3Insts(*STI))
+ return IsWave32 ? 16 : 8;
+
return IsWave32 ? 8 : 4;
}
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
Optional<bool> EnableWavefrontSize32) {
- return getVGPRAllocGranule(STI, EnableWavefrontSize32);
+
+ bool IsWave32 = EnableWavefrontSize32 ?
+ *EnableWavefrontSize32 :
+ STI->getFeatureBits().test(FeatureWavefrontSize32);
+
+ return IsWave32 ? 8 : 4;
}
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
@@ -722,13 +741,16 @@ static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
return ID_SYMBOLIC_FIRST_GFX9_;
else if (isGFX9(STI))
return ID_SYMBOLIC_FIRST_GFX10_;
+ else if (isGFX10(STI) && !isGFX10_BEncoding(STI))
+ return ID_SYMBOLIC_FIRST_GFX1030_;
else
return ID_SYMBOLIC_LAST_;
}
bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
- return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
- IdSymbolic[Id];
+ return
+ ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
+ IdSymbolic[Id] && (Id != ID_XNACK_MASK || !AMDGPU::isGFX10_BEncoding(STI));
}
bool isValidHwreg(int64_t Id) {
@@ -927,7 +949,15 @@ bool hasSRAMECC(const MCSubtargetInfo &STI) {
}
bool hasMIMG_R128(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
+ return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16];
+}
+
+bool hasGFX10A16(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16];
+}
+
+bool hasG16(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureG16];
}
bool hasPackedD16(const MCSubtargetInfo &STI) {
@@ -958,9 +988,17 @@ bool isGCN3Encoding(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
}
+bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding];
+}
+
+bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts];
+}
+
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
- const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
+ const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
Reg == AMDGPU::SCC;
}
@@ -1082,6 +1120,11 @@ bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
// (move from MC* level to Target* level). Return size in bits.
unsigned getRegBitWidth(unsigned RCID) {
switch (RCID) {
+ case AMDGPU::VGPR_LO16RegClassID:
+ case AMDGPU::VGPR_HI16RegClassID:
+ case AMDGPU::SGPR_LO16RegClassID:
+ case AMDGPU::AGPR_LO16RegClassID:
+ return 16;
case AMDGPU::SGPR_32RegClassID:
case AMDGPU::VGPR_32RegClassID:
case AMDGPU::VRegOrLds_32RegClassID:
@@ -1103,6 +1146,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::SGPR_96RegClassID:
case AMDGPU::SReg_96RegClassID:
case AMDGPU::VReg_96RegClassID:
+ case AMDGPU::AReg_96RegClassID:
return 96;
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::SReg_128RegClassID:
@@ -1112,14 +1156,24 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::SGPR_160RegClassID:
case AMDGPU::SReg_160RegClassID:
case AMDGPU::VReg_160RegClassID:
+ case AMDGPU::AReg_160RegClassID:
return 160;
+ case AMDGPU::SGPR_192RegClassID:
+ case AMDGPU::SReg_192RegClassID:
+ case AMDGPU::VReg_192RegClassID:
+ case AMDGPU::AReg_192RegClassID:
+ return 192;
+ case AMDGPU::SGPR_256RegClassID:
case AMDGPU::SReg_256RegClassID:
case AMDGPU::VReg_256RegClassID:
+ case AMDGPU::AReg_256RegClassID:
return 256;
+ case AMDGPU::SGPR_512RegClassID:
case AMDGPU::SReg_512RegClassID:
case AMDGPU::VReg_512RegClassID:
case AMDGPU::AReg_512RegClassID:
return 512;
+ case AMDGPU::SGPR_1024RegClassID:
case AMDGPU::SReg_1024RegClassID:
case AMDGPU::VReg_1024RegClassID:
case AMDGPU::AReg_1024RegClassID:
@@ -1141,7 +1195,7 @@ unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
}
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
- if (Literal >= -16 && Literal <= 64)
+ if (isInlinableIntLiteral(Literal))
return true;
uint64_t Val = static_cast<uint64_t>(Literal);
@@ -1158,7 +1212,7 @@ bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
}
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
- if (Literal >= -16 && Literal <= 64)
+ if (isInlinableIntLiteral(Literal))
return true;
// The actual type of the operand does not seem to matter as long
@@ -1187,7 +1241,7 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
if (!HasInv2Pi)
return false;
- if (Literal >= -16 && Literal <= 64)
+ if (isInlinableIntLiteral(Literal))
return true;
uint16_t Val = static_cast<uint16_t>(Literal);
@@ -1217,6 +1271,17 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
}
+bool isInlinableIntLiteralV216(int32_t Literal) {
+ int16_t Lo16 = static_cast<int16_t>(Literal);
+ if (isInt<16>(Literal) || isUInt<16>(Literal))
+ return isInlinableIntLiteral(Lo16);
+
+ int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
+ if (!(Literal & 0xffff))
+ return isInlinableIntLiteral(Hi16);
+ return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
+}
+
bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
@@ -1247,16 +1312,61 @@ static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
return isGCN3Encoding(ST) || isGFX10(ST);
}
-int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
+static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
+ return isGFX9(ST) || isGFX10(ST);
+}
+
+bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
+ int64_t EncodedOffset) {
+ return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
+ : isUInt<8>(EncodedOffset);
+}
+
+bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
+ int64_t EncodedOffset,
+ bool IsBuffer) {
+ return !IsBuffer &&
+ hasSMRDSignedImmOffset(ST) &&
+ isInt<21>(EncodedOffset);
+}
+
+static bool isDwordAligned(uint64_t ByteOffset) {
+ return (ByteOffset & 3) == 0;
+}
+
+uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
+ uint64_t ByteOffset) {
if (hasSMEMByteOffset(ST))
return ByteOffset;
+
+ assert(isDwordAligned(ByteOffset));
return ByteOffset >> 2;
}
-bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
- int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
- return (hasSMEMByteOffset(ST)) ?
- isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
+Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
+ int64_t ByteOffset, bool IsBuffer) {
+ // The signed version is always a byte offset.
+ if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
+ assert(hasSMEMByteOffset(ST));
+ return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None;
+ }
+
+ if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
+ return None;
+
+ int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
+ return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
+ ? Optional<int64_t>(EncodedOffset)
+ : None;
+}
+
+Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
+ int64_t ByteOffset) {
+ if (!isCI(ST) || !isDwordAligned(ByteOffset))
+ return None;
+
+ int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
+ return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None;
}
// Given Imm, split it into the values to put into the SOffset and ImmOffset
@@ -1267,8 +1377,8 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
// aligned if they are aligned to begin with. It also ensures that additional
// offsets within the given alignment can be added to the resulting ImmOffset.
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
- const GCNSubtarget *Subtarget, uint32_t Align) {
- const uint32_t MaxImm = alignDown(4095, Align);
+ const GCNSubtarget *Subtarget, Align Alignment) {
+ const uint32_t MaxImm = alignDown(4095, Alignment.value());
uint32_t Overflow = 0;
if (Imm > MaxImm) {
@@ -1286,10 +1396,10 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
//
// Atomic operations fail to work correctly when individual address
// components are unaligned, even if their sum is aligned.
- uint32_t High = (Imm + Align) & ~4095;
- uint32_t Low = (Imm + Align) & 4095;
+ uint32_t High = (Imm + Alignment.value()) & ~4095;
+ uint32_t Low = (Imm + Alignment.value()) & 4095;
Imm = Low;
- Overflow = High - Align;
+ Overflow = High - Alignment.value();
}
}
@@ -1305,8 +1415,7 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
return true;
}
-SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
- const GCNSubtarget &ST) {
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
*this = getDefaultForCallingConv(F.getCallingConv());
StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
@@ -1318,8 +1427,25 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
if (!DX10ClampAttr.empty())
DX10Clamp = DX10ClampAttr == "true";
- FP32Denormals = ST.hasFP32Denormals(F);
- FP64FP16Denormals = ST.hasFP64FP16Denormals(F);
+ StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
+ if (!DenormF32Attr.empty()) {
+ DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr);
+ FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+ FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+ }
+
+ StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
+ if (!DenormAttr.empty()) {
+ DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
+
+ if (DenormF32Attr.empty()) {
+ FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+ FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+ }
+
+ FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+ FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+ }
}
namespace {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index a5bada2890d2..e71554575f6a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -12,10 +12,10 @@
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "SIDefines.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
@@ -26,17 +26,13 @@
namespace llvm {
class Argument;
-class AMDGPUSubtarget;
-class FeatureBitset;
class Function;
class GCNSubtarget;
class GlobalValue;
-class MCContext;
class MCRegisterClass;
class MCRegisterInfo;
-class MCSection;
class MCSubtargetInfo;
-class MachineMemOperand;
+class StringRef;
class Triple;
namespace AMDGPU {
@@ -87,15 +83,6 @@ unsigned getEUsPerCU(const MCSubtargetInfo *STI);
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
-/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// STI without any kind of limitation.
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
-
-/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// STI and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
- unsigned FlatWorkGroupSize);
-
/// \returns Minimum number of waves per execution unit for given subtarget \p
/// STI.
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
@@ -104,10 +91,10 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
/// STI without any kind of limitation.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
-/// \returns Maximum number of waves per execution unit for given subtarget \p
-/// STI and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
- unsigned FlatWorkGroupSize);
+/// \returns Number of waves per execution unit required to support the given \p
+/// FlatWorkGroupSize.
+unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize);
/// \returns Minimum flat work group size for given subtarget \p STI.
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
@@ -116,7 +103,7 @@ unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
/// \returns Number of waves per work group for given subtarget \p STI and
-/// limited by given \p FlatWorkGroupSize.
+/// \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
@@ -211,6 +198,7 @@ struct MIMGBaseOpcodeInfo {
uint8_t NumExtraArgs;
bool Gradients;
+ bool G16;
bool Coordinates;
bool LodOrClampOrMip;
bool HasD16;
@@ -247,11 +235,19 @@ struct MIMGMIPMappingInfo {
MIMGBaseOpcode NONMIP;
};
+struct MIMGG16MappingInfo {
+ MIMGBaseOpcode G;
+ MIMGBaseOpcode G16;
+};
+
LLVM_READONLY
const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
LLVM_READONLY
-const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
+const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
+
+LLVM_READONLY
+const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
LLVM_READONLY
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
@@ -308,6 +304,9 @@ LLVM_READONLY
bool getMUBUFHasSoffset(unsigned Opc);
LLVM_READONLY
+bool getSMEMIsBuffer(unsigned Opc);
+
+LLVM_READONLY
const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
uint8_t NumComponents,
uint8_t NumFormat,
@@ -551,6 +550,8 @@ inline bool isKernel(CallingConv::ID CC) {
bool hasXNACK(const MCSubtargetInfo &STI);
bool hasSRAMECC(const MCSubtargetInfo &STI);
bool hasMIMG_R128(const MCSubtargetInfo &STI);
+bool hasGFX10A16(const MCSubtargetInfo &STI);
+bool hasG16(const MCSubtargetInfo &STI);
bool hasPackedD16(const MCSubtargetInfo &STI);
bool isSI(const MCSubtargetInfo &STI);
@@ -558,6 +559,9 @@ bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
bool isGFX9(const MCSubtargetInfo &STI);
bool isGFX10(const MCSubtargetInfo &STI);
+bool isGCN3Encoding(const MCSubtargetInfo &STI);
+bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
+bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
/// Is Reg - scalar register
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
@@ -633,6 +637,13 @@ inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
return getOperandSize(Desc.OpInfo[OpNo]);
}
+/// Is this literal inlinable, and not one of the values intended for floating
+/// point values.
+LLVM_READNONE
+inline bool isInlinableIntLiteral(int64_t Literal) {
+ return Literal >= -16 && Literal <= 64;
+}
+
/// Is this literal inlinable
LLVM_READNONE
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
@@ -646,11 +657,35 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
LLVM_READNONE
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
+LLVM_READNONE
+bool isInlinableIntLiteralV216(int32_t Literal);
+
bool isArgPassedInSGPR(const Argument *Arg);
-/// \returns The encoding that will be used for \p ByteOffset in the SMRD
-/// offset field.
-int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+LLVM_READONLY
+bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
+ int64_t EncodedOffset);
+
+LLVM_READONLY
+bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
+ int64_t EncodedOffset,
+ bool IsBuffer);
+
+/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
+/// offsets.
+uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
+
+/// \returns The encoding that will be used for \p ByteOffset in the
+/// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
+/// S_LOAD instructions have a signed offset, on other subtargets it is
+/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
+Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
+ int64_t ByteOffset, bool IsBuffer);
+
+/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
+/// instruction. This is only useful on CI.s
+Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
+ int64_t ByteOffset);
/// \returns true if this offset is small enough to fit in the SMRD
/// offset field. \p ByteOffset should be the offset in bytes and
@@ -658,7 +693,8 @@ int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
- const GCNSubtarget *Subtarget, uint32_t Align = 4);
+ const GCNSubtarget *Subtarget,
+ Align Alignment = Align(4));
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
@@ -677,45 +713,76 @@ struct SIModeRegisterDefaults {
/// If this is set, neither input or output denormals are flushed for most f32
/// instructions.
- ///
- /// TODO: Split into separate input and output fields if necessary like the
- /// control bits really provide?
- bool FP32Denormals : 1;
+ bool FP32InputDenormals : 1;
+ bool FP32OutputDenormals : 1;
/// If this is set, neither input or output denormals are flushed for both f64
/// and f16/v2f16 instructions.
- bool FP64FP16Denormals : 1;
+ bool FP64FP16InputDenormals : 1;
+ bool FP64FP16OutputDenormals : 1;
SIModeRegisterDefaults() :
IEEE(true),
DX10Clamp(true),
- FP32Denormals(true),
- FP64FP16Denormals(true) {}
+ FP32InputDenormals(true),
+ FP32OutputDenormals(true),
+ FP64FP16InputDenormals(true),
+ FP64FP16OutputDenormals(true) {}
- // FIXME: Should not depend on the subtarget
- SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
+ SIModeRegisterDefaults(const Function &F);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
const bool IsCompute = AMDGPU::isCompute(CC);
SIModeRegisterDefaults Mode;
- Mode.DX10Clamp = true;
Mode.IEEE = IsCompute;
- Mode.FP32Denormals = false; // FIXME: Should be on by default.
- Mode.FP64FP16Denormals = true;
return Mode;
}
bool operator ==(const SIModeRegisterDefaults Other) const {
return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
- FP32Denormals == Other.FP32Denormals &&
- FP64FP16Denormals == Other.FP64FP16Denormals;
+ FP32InputDenormals == Other.FP32InputDenormals &&
+ FP32OutputDenormals == Other.FP32OutputDenormals &&
+ FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
+ FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
+ }
+
+ bool allFP32Denormals() const {
+ return FP32InputDenormals && FP32OutputDenormals;
+ }
+
+ bool allFP64FP16Denormals() const {
+ return FP64FP16InputDenormals && FP64FP16OutputDenormals;
+ }
+
+ /// Get the encoding value for the FP_DENORM bits of the mode register for the
+ /// FP32 denormal mode.
+ uint32_t fpDenormModeSPValue() const {
+ if (FP32InputDenormals && FP32OutputDenormals)
+ return FP_DENORM_FLUSH_NONE;
+ if (FP32InputDenormals)
+ return FP_DENORM_FLUSH_OUT;
+ if (FP32OutputDenormals)
+ return FP_DENORM_FLUSH_IN;
+ return FP_DENORM_FLUSH_IN_FLUSH_OUT;
+ }
+
+ /// Get the encoding value for the FP_DENORM bits of the mode register for the
+ /// FP64/FP16 denormal mode.
+ uint32_t fpDenormModeDPValue() const {
+ if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
+ return FP_DENORM_FLUSH_NONE;
+ if (FP64FP16InputDenormals)
+ return FP_DENORM_FLUSH_OUT;
+ if (FP64FP16OutputDenormals)
+ return FP_DENORM_FLUSH_IN;
+ return FP_DENORM_FLUSH_IN_FLUSH_OUT;
}
/// Returns true if a flag is compatible if it's enabled in the callee, but
/// disabled in the caller.
static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
- return CallerMode == CalleeMode || (CallerMode && !CalleeMode);
+ return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
}
// FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
@@ -727,8 +794,10 @@ struct SIModeRegisterDefaults {
return false;
// Allow inlining denormals enabled into denormals flushed functions.
- return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) &&
- oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals);
+ return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
+ oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
+ oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
+ oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index 207e4232e829..ef010a7ac157 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -397,6 +397,39 @@ static const char *getRegisterName(unsigned RegNum) {
{0x2c6a, "SPI_SHADER_USER_DATA_VS_30"},
{0x2c6b, "SPI_SHADER_USER_DATA_VS_31"},
+ {0x2c8c, "SPI_SHADER_USER_DATA_GS_0"},
+ {0x2c8d, "SPI_SHADER_USER_DATA_GS_1"},
+ {0x2c8e, "SPI_SHADER_USER_DATA_GS_2"},
+ {0x2c8f, "SPI_SHADER_USER_DATA_GS_3"},
+ {0x2c90, "SPI_SHADER_USER_DATA_GS_4"},
+ {0x2c91, "SPI_SHADER_USER_DATA_GS_5"},
+ {0x2c92, "SPI_SHADER_USER_DATA_GS_6"},
+ {0x2c93, "SPI_SHADER_USER_DATA_GS_7"},
+ {0x2c94, "SPI_SHADER_USER_DATA_GS_8"},
+ {0x2c95, "SPI_SHADER_USER_DATA_GS_9"},
+ {0x2c96, "SPI_SHADER_USER_DATA_GS_10"},
+ {0x2c97, "SPI_SHADER_USER_DATA_GS_11"},
+ {0x2c98, "SPI_SHADER_USER_DATA_GS_12"},
+ {0x2c99, "SPI_SHADER_USER_DATA_GS_13"},
+ {0x2c9a, "SPI_SHADER_USER_DATA_GS_14"},
+ {0x2c9b, "SPI_SHADER_USER_DATA_GS_15"},
+ {0x2c9c, "SPI_SHADER_USER_DATA_GS_16"},
+ {0x2c9d, "SPI_SHADER_USER_DATA_GS_17"},
+ {0x2c9e, "SPI_SHADER_USER_DATA_GS_18"},
+ {0x2c9f, "SPI_SHADER_USER_DATA_GS_19"},
+ {0x2ca0, "SPI_SHADER_USER_DATA_GS_20"},
+ {0x2ca1, "SPI_SHADER_USER_DATA_GS_21"},
+ {0x2ca2, "SPI_SHADER_USER_DATA_GS_22"},
+ {0x2ca3, "SPI_SHADER_USER_DATA_GS_23"},
+ {0x2ca4, "SPI_SHADER_USER_DATA_GS_24"},
+ {0x2ca5, "SPI_SHADER_USER_DATA_GS_25"},
+ {0x2ca6, "SPI_SHADER_USER_DATA_GS_26"},
+ {0x2ca7, "SPI_SHADER_USER_DATA_GS_27"},
+ {0x2ca8, "SPI_SHADER_USER_DATA_GS_28"},
+ {0x2ca9, "SPI_SHADER_USER_DATA_GS_29"},
+ {0x2caa, "SPI_SHADER_USER_DATA_GS_30"},
+ {0x2cab, "SPI_SHADER_USER_DATA_GS_31"},
+
{0x2ccc, "SPI_SHADER_USER_DATA_ES_0"},
{0x2ccd, "SPI_SHADER_USER_DATA_ES_1"},
{0x2cce, "SPI_SHADER_USER_DATA_ES_2"},
@@ -491,38 +524,55 @@ static const char *getRegisterName(unsigned RegNum) {
{0xa310, "PA_SC_SHADER_CONTROL"},
{0xa313, "PA_SC_CONSERVATIVE_RASTERIZATION_CNTL"},
- {0x2d0c, "SPI_SHADER_USER_DATA_LS_0"},
- {0x2d0d, "SPI_SHADER_USER_DATA_LS_1"},
- {0x2d0e, "SPI_SHADER_USER_DATA_LS_2"},
- {0x2d0f, "SPI_SHADER_USER_DATA_LS_3"},
- {0x2d10, "SPI_SHADER_USER_DATA_LS_4"},
- {0x2d11, "SPI_SHADER_USER_DATA_LS_5"},
- {0x2d12, "SPI_SHADER_USER_DATA_LS_6"},
- {0x2d13, "SPI_SHADER_USER_DATA_LS_7"},
- {0x2d14, "SPI_SHADER_USER_DATA_LS_8"},
- {0x2d15, "SPI_SHADER_USER_DATA_LS_9"},
- {0x2d16, "SPI_SHADER_USER_DATA_LS_10"},
- {0x2d17, "SPI_SHADER_USER_DATA_LS_11"},
- {0x2d18, "SPI_SHADER_USER_DATA_LS_12"},
- {0x2d19, "SPI_SHADER_USER_DATA_LS_13"},
- {0x2d1a, "SPI_SHADER_USER_DATA_LS_14"},
- {0x2d1b, "SPI_SHADER_USER_DATA_LS_15"},
- {0x2d1c, "SPI_SHADER_USER_DATA_LS_16"},
- {0x2d1d, "SPI_SHADER_USER_DATA_LS_17"},
- {0x2d1e, "SPI_SHADER_USER_DATA_LS_18"},
- {0x2d1f, "SPI_SHADER_USER_DATA_LS_19"},
- {0x2d20, "SPI_SHADER_USER_DATA_LS_20"},
- {0x2d21, "SPI_SHADER_USER_DATA_LS_21"},
- {0x2d22, "SPI_SHADER_USER_DATA_LS_22"},
- {0x2d23, "SPI_SHADER_USER_DATA_LS_23"},
- {0x2d24, "SPI_SHADER_USER_DATA_LS_24"},
- {0x2d25, "SPI_SHADER_USER_DATA_LS_25"},
- {0x2d26, "SPI_SHADER_USER_DATA_LS_26"},
- {0x2d27, "SPI_SHADER_USER_DATA_LS_27"},
- {0x2d28, "SPI_SHADER_USER_DATA_LS_28"},
- {0x2d29, "SPI_SHADER_USER_DATA_LS_29"},
- {0x2d2a, "SPI_SHADER_USER_DATA_LS_30"},
- {0x2d2b, "SPI_SHADER_USER_DATA_LS_31"},
+ {0x2d0c, "SPI_SHADER_USER_DATA_HS_0"},
+ {0x2d0d, "SPI_SHADER_USER_DATA_HS_1"},
+ {0x2d0e, "SPI_SHADER_USER_DATA_HS_2"},
+ {0x2d0f, "SPI_SHADER_USER_DATA_HS_3"},
+ {0x2d10, "SPI_SHADER_USER_DATA_HS_4"},
+ {0x2d11, "SPI_SHADER_USER_DATA_HS_5"},
+ {0x2d12, "SPI_SHADER_USER_DATA_HS_6"},
+ {0x2d13, "SPI_SHADER_USER_DATA_HS_7"},
+ {0x2d14, "SPI_SHADER_USER_DATA_HS_8"},
+ {0x2d15, "SPI_SHADER_USER_DATA_HS_9"},
+ {0x2d16, "SPI_SHADER_USER_DATA_HS_10"},
+ {0x2d17, "SPI_SHADER_USER_DATA_HS_11"},
+ {0x2d18, "SPI_SHADER_USER_DATA_HS_12"},
+ {0x2d19, "SPI_SHADER_USER_DATA_HS_13"},
+ {0x2d1a, "SPI_SHADER_USER_DATA_HS_14"},
+ {0x2d1b, "SPI_SHADER_USER_DATA_HS_15"},
+ {0x2d1c, "SPI_SHADER_USER_DATA_HS_16"},
+ {0x2d1d, "SPI_SHADER_USER_DATA_HS_17"},
+ {0x2d1e, "SPI_SHADER_USER_DATA_HS_18"},
+ {0x2d1f, "SPI_SHADER_USER_DATA_HS_19"},
+ {0x2d20, "SPI_SHADER_USER_DATA_HS_20"},
+ {0x2d21, "SPI_SHADER_USER_DATA_HS_21"},
+ {0x2d22, "SPI_SHADER_USER_DATA_HS_22"},
+ {0x2d23, "SPI_SHADER_USER_DATA_HS_23"},
+ {0x2d24, "SPI_SHADER_USER_DATA_HS_24"},
+ {0x2d25, "SPI_SHADER_USER_DATA_HS_25"},
+ {0x2d26, "SPI_SHADER_USER_DATA_HS_26"},
+ {0x2d27, "SPI_SHADER_USER_DATA_HS_27"},
+ {0x2d28, "SPI_SHADER_USER_DATA_HS_28"},
+ {0x2d29, "SPI_SHADER_USER_DATA_HS_29"},
+ {0x2d2a, "SPI_SHADER_USER_DATA_HS_30"},
+ {0x2d2b, "SPI_SHADER_USER_DATA_HS_31"},
+
+ {0x2d4c, "SPI_SHADER_USER_DATA_LS_0"},
+ {0x2d4d, "SPI_SHADER_USER_DATA_LS_1"},
+ {0x2d4e, "SPI_SHADER_USER_DATA_LS_2"},
+ {0x2d4f, "SPI_SHADER_USER_DATA_LS_3"},
+ {0x2d50, "SPI_SHADER_USER_DATA_LS_4"},
+ {0x2d51, "SPI_SHADER_USER_DATA_LS_5"},
+ {0x2d52, "SPI_SHADER_USER_DATA_LS_6"},
+ {0x2d53, "SPI_SHADER_USER_DATA_LS_7"},
+ {0x2d54, "SPI_SHADER_USER_DATA_LS_8"},
+ {0x2d55, "SPI_SHADER_USER_DATA_LS_9"},
+ {0x2d56, "SPI_SHADER_USER_DATA_LS_10"},
+ {0x2d57, "SPI_SHADER_USER_DATA_LS_11"},
+ {0x2d58, "SPI_SHADER_USER_DATA_LS_12"},
+ {0x2d59, "SPI_SHADER_USER_DATA_LS_13"},
+ {0x2d5a, "SPI_SHADER_USER_DATA_LS_14"},
+ {0x2d5b, "SPI_SHADER_USER_DATA_LS_15"},
{0xa2aa, "IA_MULTI_VGT_PARAM"},
{0xa2a5, "VGT_GS_MAX_PRIMS_PER_SUBGROUP"},
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 0f17c157b206..544ab669d9ae 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -14,16 +14,12 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
-#include <map>
namespace llvm {
-class AMDGPUTargetStreamer;
-class formatted_raw_ostream;
-class MCStreamer;
class Module;
+class StringRef;
class AMDGPUPALMetadata {
unsigned BlobType = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VIInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VIInstructions.td
deleted file mode 100644
index ec7d8875a746..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VIInstructions.td
+++ /dev/null
@@ -1,13 +0,0 @@
-//===-- VIInstructions.td - VI Instruction Defintions ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// Instruction definitions for VI and newer.
-//===----------------------------------------------------------------------===//
-
-FIXME: Deleting this file broke buildbots that don't do full rebuilds. This
-file is no longer used by the backend, so it can be deleted once all
-the buildbots update there dependencies.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index c7aed0985540..17f334f62a30 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1,4 +1,4 @@
-//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===//
+//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -48,9 +48,13 @@ class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1On
let mayStore = 0;
let hasSideEffects = 0;
+ let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret);
+
+ let mayRaiseFPException = ReadsModeReg;
+
let VOP1 = 1;
let VALU = 1;
- let Uses = [EXEC];
+ let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
let AsmVariantName = AMDGPUAsmVariants.Default;
}
@@ -89,9 +93,7 @@ class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret =
!if(P.HasModifiers,
- [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
- i32:$src0_modifiers,
- i1:$clamp, i32:$omod))))],
+ [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))],
!if(P.HasOMod,
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0,
i1:$clamp, i32:$omod))))],
@@ -102,8 +104,13 @@ class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
multiclass VOP1Inst <string opName, VOPProfile P,
SDPatternOperator node = null_frag> {
- def _e32 : VOP1_Pseudo <opName, P>;
- def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
+ // We only want to set this on the basic, non-SDWA or DPP forms.
+ defvar should_mov_imm = !eq(opName, "v_mov_b32");
+
+ let isMoveImm = should_mov_imm in {
+ def _e32 : VOP1_Pseudo <opName, P>;
+ def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
+ }
foreach _ = BoolToList<P.HasExtSDWA>.ret in
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
@@ -146,7 +153,7 @@ let VOPAsmPrefer32Bit = 1 in {
defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>;
}
-let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
} // End isMoveImm = 1
@@ -183,31 +190,51 @@ def V_READFIRSTLANE_B32 :
let SchedRW = [WriteDoubleCvt] in {
defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
+
+let mayRaiseFPException = 0 in {
defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
+}
+
defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
+
+let mayRaiseFPException = 0 in {
defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
+}
+
} // End SchedRW = [WriteDoubleCvt]
-let SchedRW = [WriteQuarterRate32] in {
+let SchedRW = [WriteFloatCvt] in {
+
+// XXX: Does this really not raise exceptions? The manual claims the
+// 16-bit ones can.
+let mayRaiseFPException = 0 in {
defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
+}
+
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
let FPDPRounding = 1 in {
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
} // End FPDPRounding = 1
+
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
+
+let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
-} // End SchedRW = [WriteQuarterRate32]
+} // End ReadsModeReg = 0, mayRaiseFPException = 0
+} // End SchedRW = [WriteFloatCvt]
+let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
+} // ReadsModeReg = 0, mayRaiseFPException = 0
defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
@@ -215,33 +242,30 @@ defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>;
defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>;
defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;
-let SchedRW = [WriteQuarterRate32] in {
+let SchedRW = [WriteTrans32] in {
defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>;
defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>;
defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>;
defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
-defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>;
-} // End SchedRW = [WriteQuarterRate32]
+defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>;
+} // End SchedRW = [WriteTrans32]
-let SchedRW = [WriteDouble] in {
+let SchedRW = [WriteTrans64] in {
defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>;
defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>;
-} // End SchedRW = [WriteDouble];
-
-let SchedRW = [WriteDouble] in {
-defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>;
-} // End SchedRW = [WriteDouble]
+defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, any_amdgcn_sqrt>;
+} // End SchedRW = [WriteTrans64]
-let SchedRW = [WriteQuarterRate32] in {
+let SchedRW = [WriteTrans32] in {
defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>;
defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>;
-} // End SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteTrans32]
defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>;
defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>;
defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>;
-defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>;
+defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>;
defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>;
let SchedRW = [WriteDoubleAdd] in {
@@ -317,7 +341,7 @@ defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>;
defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
let SubtargetPredicate = isGFX6GFX7 in {
- let SchedRW = [WriteQuarterRate32] in {
+ let SchedRW = [WriteTrans32] in {
defm V_LOG_CLAMP_F32 :
VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
defm V_RCP_CLAMP_F32 :
@@ -327,8 +351,8 @@ let SubtargetPredicate = isGFX6GFX7 in {
defm V_RSQ_CLAMP_F32 :
VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
defm V_RSQ_LEGACY_F32 :
- VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
- } // End SchedRW = [WriteQuarterRate32]
+ VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>;
+ } // End SchedRW = [WriteTrans32]
let SchedRW = [WriteDouble] in {
defm V_RCP_CLAMP_F64 :
@@ -339,10 +363,10 @@ let SubtargetPredicate = isGFX6GFX7 in {
} // End SubtargetPredicate = isGFX6GFX7
let SubtargetPredicate = isGFX7GFX8GFX9 in {
- let SchedRW = [WriteQuarterRate32] in {
+ let SchedRW = [WriteTrans32] in {
defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>;
defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>;
- } // End SchedRW = [WriteQuarterRate32]
+ } // End SchedRW = [WriteTrans32]
} // End SubtargetPredicate = isGFX7GFX8GFX9
let SubtargetPredicate = isGFX7Plus in {
@@ -362,15 +386,15 @@ defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
} // End FPDPRounding = 1
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
-let SchedRW = [WriteQuarterRate32] in {
+let SchedRW = [WriteTrans32] in {
defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
-defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>;
+defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>;
defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>;
defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>;
defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
-} // End SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteTrans32]
defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>;
defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>;
@@ -414,8 +438,11 @@ let SubtargetPredicate = isGFX9Plus in {
}
defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>;
- defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>;
- defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>;
+
+ let mayRaiseFPException = 0 in {
+ defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>;
+ defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>;
+ } // End mayRaiseFPException = 0
} // End SubtargetPredicate = isGFX9Plus
let SubtargetPredicate = isGFX9Only in {
@@ -458,7 +485,7 @@ class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP1
class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> :
VOP1_DPP<op, ps, p, 1>,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10> {
- let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst);
+ let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
}
@@ -475,7 +502,7 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
let Inst{31-25} = 0x3f;
- let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst);
+ let AssemblerPredicate = HasDPP8;
let SubtargetPredicate = HasDPP8;
}
@@ -812,42 +839,23 @@ def V_MOV_B32_indirect : VPseudoInstSI<(outs),
let SubtargetPredicate = isGFX8GFX9;
}
-// This is a pseudo variant of the v_movreld_b32 instruction in which the
-// vector operand appears only twice, once as def and once as use. Using this
-// pseudo avoids problems with the Two Address instructions pass.
-class V_MOVRELD_B32_pseudo<RegisterClass rc> : VPseudoInstSI <
- (outs rc:$vdst),
- (ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> {
- let VOP1 = 1;
-
- let Constraints = "$vsrc = $vdst";
- let Uses = [M0, EXEC];
-
- let SubtargetPredicate = HasMovrel;
-}
-
-def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo<VGPR_32>;
-def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo<VReg_64>;
-def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>;
-def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>;
-def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
-
let OtherPredicates = [isGFX8Plus] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
- timm:$bound_ctrl)),
- (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl),
- (as_i32imm $row_mask), (as_i32imm $bank_mask),
- (as_i1imm $bound_ctrl))
+ (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask,
+ timm:$bank_mask, timm:$bound_ctrl)),
+ (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+ (as_i32timm $row_mask), (as_i32timm $bank_mask),
+ (as_i1timm $bound_ctrl))
>;
def : GCNPat <
- (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask,
- timm:$bank_mask, timm:$bound_ctrl)),
- (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl),
- (as_i32imm $row_mask), (as_i32imm $bank_mask),
- (as_i1imm $bound_ctrl))
+ (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl,
+ timm:$row_mask, timm:$bank_mask,
+ timm:$bound_ctrl)),
+ (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl),
+ (as_i32timm $row_mask), (as_i32timm $bank_mask),
+ (as_i1timm $bound_ctrl))
>;
} // End OtherPredicates = [isGFX8Plus]
@@ -907,6 +915,7 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
let OtherPredicates = [isGFX10Plus] in {
def : GCNPat <
(i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
- (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0))
+ (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src,
+ (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
} // End OtherPredicates = [isGFX10Plus]
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index aaadc3dbc721..aa37dbf1418f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1,4 +1,4 @@
-//===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===//
+//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -69,9 +69,13 @@ class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suf
let mayStore = 0;
let hasSideEffects = 0;
+ let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret);
+
+ let mayRaiseFPException = ReadsModeReg;
+
let VOP2 = 1;
let VALU = 1;
- let Uses = [EXEC];
+ let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
let AsmVariantName = AMDGPUAsmVariants.Default;
}
@@ -459,17 +463,18 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
//===----------------------------------------------------------------------===//
defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
+let SubtargetPredicate = HasMadMacF32Insts in
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
let isCommutable = 1 in {
-defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>;
+defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>;
defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
-defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
-defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
+defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>;
+defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
-defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
+defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
@@ -484,12 +489,16 @@ defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
+let mayRaiseFPException = 0 in {
+let SubtargetPredicate = HasMadMacF32Insts in {
let Constraints = "$vdst = $src2", DisableEncoding="$src2",
isConvertibleToThreeAddress = 1 in {
defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
}
def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
+} // End SubtargetPredicate = HasMadMacF32Insts
+}
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
@@ -529,8 +538,12 @@ defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32
defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"
+
+let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>;
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>;
+}
+
defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>;
defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>;
defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>;
@@ -541,14 +554,18 @@ defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmi
defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
} // End SubtargetPredicate = isGFX6GFX7
-let SubtargetPredicate = isGFX6GFX7GFX10 in {
let isCommutable = 1 in {
+let SubtargetPredicate = isGFX6GFX7GFX10 in {
+let OtherPredicates = [HasMadMacF32Insts] in
defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
-defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32, srl>;
-defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32, sra>;
-defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32, shl>;
-} // End isCommutable = 1
} // End SubtargetPredicate = isGFX6GFX7GFX10
+let SubtargetPredicate = isGFX6GFX7 in {
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, shl>;
+} // End SubtargetPredicate = isGFX6GFX7
+} // End isCommutable = 1
+
class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
GCNPat<
@@ -617,15 +634,19 @@ defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, ashr_rev>;
let isCommutable = 1 in {
let FPDPRounding = 1 in {
-defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>;
+defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, any_fadd>;
defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>;
defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
-defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>;
+defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, any_fmul>;
+
+let mayRaiseFPException = 0 in {
def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
+}
+
} // End FPDPRounding = 1
-defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16, add>;
-defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16, sub>;
-defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">;
+defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>;
+defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>;
+defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">;
defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>;
defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
@@ -770,16 +791,16 @@ let Predicates = [Has16BitInsts] in {
// an inline immediate than -c.
// TODO: Also do for 64-bit.
def : GCNPat<
- (add i16:$src0, (i16 NegSubInlineConst16:$src1)),
- (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1)
+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
+ (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
>;
let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
def : GCNPat<
- (i32 (zext (add i16:$src0, (i16 NegSubInlineConst16:$src1)))),
- (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1)
+ (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))),
+ (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
>;
defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>;
@@ -831,7 +852,7 @@ class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps,
class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
string opName = ps.OpName, VOPProfile p = ps.Pfl> :
VOP2_DPP<op, ps, opName, p, 1> {
- let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst);
+ let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
}
@@ -857,7 +878,7 @@ class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
let Inst{30-25} = op;
let Inst{31} = 0x0;
- let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst);
+ let AssemblerPredicate = HasDPP8;
let SubtargetPredicate = HasDPP8;
}
@@ -1250,9 +1271,9 @@ defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>;
defm V_READLANE_B32 : VOP2Only_Real_gfx6_gfx7<0x001>;
-let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
+let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
defm V_WRITELANE_B32 : VOP2Only_Real_gfx6_gfx7<0x002>;
-} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in)
+} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
let SubtargetPredicate = isGFX6GFX7 in {
defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>;
@@ -1261,6 +1282,7 @@ let SubtargetPredicate = isGFX6GFX7 in {
defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>;
defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>;
defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>;
+let OtherPredicates = [HasMadMacF32Insts] in
defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>;
@@ -1593,3 +1615,9 @@ let SubtargetPredicate = HasDot3Insts in {
let SubtargetPredicate = HasPkFmacF16Inst in {
defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>;
} // End SubtargetPredicate = HasPkFmacF16Inst
+
+let SubtargetPredicate = HasDot3Insts in {
+ // NB: Opcode conflicts with V_DOT2C_F32_F16
+ let DecoderNamespace = "GFX10_B" in
+ defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 67c8b926302d..169949f2171a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1,4 +1,4 @@
-//===-- VOP3Instructions.td - Vector Instruction Defintions ---------------===//
+//===-- VOP3Instructions.td - Vector Instruction Definitions --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -32,20 +32,26 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
ret1));
}
-class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
+class getVOP3PModPat<VOPProfile P, SDPatternOperator node, bit HasExplicitClamp> {
+ dag src0_dag = (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers));
+ dag src1_dag = (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers));
+ dag src2_dag = (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers));
+ dag clamp_dag = (i1 timm:$clamp);
+
list<dag> ret3 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
- (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
- (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
- (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
+ !if(HasExplicitClamp,
+ (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag, src2_dag, clamp_dag),
+ (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag, src2_dag)))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
- (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
- (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
+ !if(HasExplicitClamp,
+ (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag, clamp_dag),
+ (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag)))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+ !if(HasExplicitClamp,
+ (DivergentFragOrOp<node, P>.ret src0_dag, clamp_dag),
+ (DivergentFragOrOp<node, P>.ret src0_dag)))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -54,18 +60,16 @@ class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
- (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)),
(P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
- (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))),
- (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)),
+ (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -74,18 +78,18 @@ class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers),
(VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)),
(P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))];
list<dag> ret2 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+ (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers)),
(P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
(P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))];
list<dag> ret1 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))))];
list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
!if(!eq(P.NumSrcArgs, 2), ret2,
@@ -224,12 +228,13 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
VOP3_Pseudo<OpName, P, pattern> {
let AsmMatchConverter = "cvtVOP3Interp";
+ let mayRaiseFPException = 0;
}
def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
Attr:$attr, AttrChan:$attrchan,
- clampmod:$clamp, omod:$omod);
+ clampmod0:$clamp, omod0:$omod);
let Asm64 = "$vdst, $src0_modifiers, $attr$attrchan$clamp$omod";
}
@@ -237,7 +242,7 @@ def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> {
let Ins64 = (ins InterpSlot:$src0,
Attr:$attr, AttrChan:$attrchan,
- clampmod:$clamp, omod:$omod);
+ clampmod0:$clamp, omod0:$omod);
let Asm64 = "$vdst, $src0, $attr$attrchan$clamp$omod";
@@ -286,17 +291,25 @@ class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
let isCommutable = 1 in {
+let mayRaiseFPException = 0 in {
+let SubtargetPredicate = HasMadMacF32Insts in {
def V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fmad>;
+} // End SubtargetPredicate = HasMadMacInsts
+
+let SubtargetPredicate = HasNoMadMacF32Insts in
+def V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+}
+
def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fma>;
+def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>;
def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
let SchedRW = [WriteDoubleAdd] in {
let FPDPRounding = 1 in {
-def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, fma>;
-def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, fadd, 1>;
+def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, any_fma>;
+def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fadd, 1>;
def V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul, 1>;
} // End FPDPRounding = 1
def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like, 1>;
@@ -310,7 +323,7 @@ def V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
def V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
} // End SchedRW = [WriteQuarterRate32]
-let Uses = [VCC, EXEC] in {
+let Uses = [MODE, VCC, EXEC] in {
// v_div_fmas_f32:
// result = src0 * src1 + src2
// if (vcc)
@@ -332,15 +345,20 @@ def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []>
} // End isCommutable = 1
+let mayRaiseFPException = 0 in {
def V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
def V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
def V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
+} // End mayRaiseFPException
+
def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
-def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbit>;
+def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>;
def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
+
+let mayRaiseFPException = 0 in { // XXX - Seems suspect but manual doesn't say it does
def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
@@ -350,6 +368,8 @@ def V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDG
def V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
def V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>;
def V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>;
+} // End mayRaiseFPException = 0
+
def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
@@ -362,6 +382,8 @@ def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_
def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
+
+let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does.
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
let SchedRW = [WriteFloatFMA, WriteSALU];
let AsmMatchConverter = "";
@@ -373,6 +395,7 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64,
let AsmMatchConverter = "";
let FPDPRounding = 1;
}
+} // End mayRaiseFPException = 0
def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
@@ -380,17 +403,16 @@ let Constraints = "@earlyclobber $vdst" in {
def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
} // End Constraints = "@earlyclobber $vdst"
-def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> {
+def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, int_amdgcn_trig_preop> {
let SchedRW = [WriteDouble];
}
let SchedRW = [Write64Bit] in {
-let SubtargetPredicate = isGFX6GFX7GFX10 in {
+let SubtargetPredicate = isGFX6GFX7 in {
def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>, shl>;
def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>, srl>;
def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, sra>;
-def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
-} // End SubtargetPredicate = isGFX6GFX7GFX10
+} // End SubtargetPredicate = isGFX6GFX7
let SubtargetPredicate = isGFX8Plus in {
def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>;
@@ -399,6 +421,23 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, as
} // End SubtargetPredicate = isGFX8Plus
} // End SchedRW = [Write64Bit]
+def : GCNPat<
+ (i64 (getDivergentFrag<sext>.ret i16:$src)),
+ (REG_SEQUENCE VReg_64,
+ (i32 (V_BFE_I32 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))), sub0,
+ (i32 (COPY_TO_REGCLASS
+ (V_ASHRREV_I32_e32 (S_MOV_B32 (i32 0x1f)), (i32 (V_BFE_I32 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
+ ), VGPR_32)), sub1)
+>;
+
+def : GCNPat<
+ (i32 (getDivergentFrag<sext>.ret i16:$src)),
+ (i32 (V_BFE_I32 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
+>;
+
+let SubtargetPredicate = isGFX6GFX7GFX10 in {
+def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+} // End SubtargetPredicate = isGFX6GFX7GFX10
let SchedRW = [Write32Bit] in {
let SubtargetPredicate = isGFX8Plus in {
@@ -417,7 +456,7 @@ let isCommutable = 1 in {
let SchedRW = [WriteQuarterRate32, WriteSALU] in {
def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
-} // End SchedRW = [WriteDouble, WriteSALU]
+} // End SchedRW = [WriteQuarterRate32, WriteSALU]
} // End isCommutable = 1
} // End SubtargetPredicate = isGFX7Plus
@@ -434,11 +473,11 @@ def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
let FPDPRounding = 1;
}
-def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma> {
+def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma> {
let Predicates = [Has16BitInsts, isGFX8Only];
let FPDPRounding = 1;
}
-def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, fma> {
+def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma> {
let renamedInGFX9 = 1;
let Predicates = [Has16BitInsts, isGFX9Plus];
let FPDPRounding = 1;
@@ -451,7 +490,7 @@ def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CL
def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
let FPDPRounding = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
-let Uses = [M0, EXEC] in {
+let Uses = [MODE, M0, EXEC] in {
// For some reason the intrinsic operands are in a different order
// from the instruction operands.
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
@@ -462,7 +501,7 @@ def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i3
(i32 timm:$attr),
(i1 timm:$high),
M0))]>;
-} // End Uses = [M0, EXEC]
+} // End Uses = [M0, MODE, EXEC]
} // End FPDPRounding = 1
} // End renamedInGFX9 = 1
@@ -478,32 +517,29 @@ def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
} // End SubtargetPredicate = isGFX9Plus
-let Uses = [M0, EXEC], FPDPRounding = 1 in {
+let Uses = [MODE, M0, EXEC], FPDPRounding = 1 in {
def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
- [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan),
- (i32 timm:$attr),
- (i32 timm:$src0_modifiers),
- (i1 timm:$high),
- (i1 timm:$clamp),
- (i32 timm:$omod)))]>;
-def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>,
- [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan),
- (i32 timm:$attr),
- (i32 timm:$src0_modifiers),
- (f32 VRegSrc_32:$src2),
- (i32 timm:$src2_modifiers),
- (i1 timm:$high),
- (i1 timm:$clamp),
- (i32 timm:$omod)))]>;
-} // End Uses = [M0, EXEC], FPDPRounding = 1
+ [(set f32:$vdst, (int_amdgcn_interp_p1_f16 (VOP3Mods f32:$src0, i32:$src0_modifiers),
+ (i32 timm:$attrchan),
+ (i32 timm:$attr),
+ (i1 timm:$high), M0))]> {
+ // This predicate should only apply to the selection pattern. The
+ // instruction still exists and should decode on subtargets with
+ // other bank counts.
+ let OtherPredicates = [has32BankLDS];
+}
+
+
+def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>;
+} // End Uses = [MODE, M0, EXEC], FPDPRounding = 1
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
-let SubtargetPredicate = isGFX8Plus, Uses = [M0, EXEC] in {
+let SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC] in {
def V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>;
def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;
def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>;
-} // End SubtargetPredicate = isGFX8Plus, Uses = [M0, EXEC]
+} // End SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC]
let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {
@@ -565,9 +601,20 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
}
return true;
- }]
-> {
+ }]> {
let PredicateCodeUsesOperands = 1;
+
+ // The divergence predicate is irrelevant in GlobalISel, as we have
+ // proper register bank checks. We also force all VOP instruction
+ // operands to VGPR, so we should not need to check the constant bus
+ // restriction.
+ //
+ // FIXME: With unlucky SGPR operands, we could penalize code by
+ // blocking folding SGPR->VGPR copies later.
+ // FIXME: There's no register bank verifier
+ // FIXME: Should add a way for the emitter to recognize this is a
+ // trivially true predicate to eliminate the check.
+ let GISelPredicateCode = [{return true;}];
}
let SubtargetPredicate = isGFX9Plus in {
@@ -602,14 +649,14 @@ def V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32,
def V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
-def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32>>;
-def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32>>;
+def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
+def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
// This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
(ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
- (inst i32:$src0, i32:$src1, i32:$src2)
+ (inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
>;
def : ThreeOp_i32_Pats<shl, add, V_LSHL_ADD_U32>;
@@ -634,6 +681,40 @@ def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3
let HasOMod = 0;
}
+class PermlanePat<SDPatternOperator permlane,
+ Instruction inst> : GCNPat<
+ (permlane i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2,
+ timm:$fi, timm:$bc),
+ (inst (as_i1timm $fi), VGPR_32:$src0, (as_i1timm $bc),
+ SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in)
+>;
+
+// Permlane intrinsic that has either fetch invalid or bound control
+// fields enabled.
+class BoundControlOrFetchInvalidPermlane<SDPatternOperator permlane> :
+ PatFrag<(ops node:$vdst_in, node:$src0, node:$src1, node:$src2,
+ node:$fi, node:$bc),
+ (permlane node:$vdst_in, node:$src0, node:
+ $src1, node:$src2, node:$fi, node:$bc)> {
+ let PredicateCode = [{ return N->getConstantOperandVal(5) != 0 ||
+ N->getConstantOperandVal(6) != 0; }];
+ let GISelPredicateCode = [{
+ return MI.getOperand(6).getImm() != 0 ||
+ MI.getOperand(7).getImm() != 0;
+ }];
+}
+
+// Drop the input value if it won't be read.
+class PermlaneDiscardVDstIn<SDPatternOperator permlane,
+ Instruction inst> : GCNPat<
+ (permlane srcvalue, i32:$src0, i32:$src1, i32:$src2,
+ timm:$fi, timm:$bc),
+ (inst (as_i1timm $fi), VGPR_32:$src0, (as_i1timm $bc),
+ SCSrc_b32:$src1, 0, SCSrc_b32:$src2,
+ (IMPLICIT_DEF))
+>;
+
+
let SubtargetPredicate = isGFX10Plus in {
def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32>;
@@ -643,16 +724,35 @@ let SubtargetPredicate = isGFX10Plus in {
def V_PERMLANEX16_B32 : VOP3Inst <"v_permlanex16_b32", VOP3_PERMLANE_Profile>;
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
- def : GCNPat<
- (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
- (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
- >;
- def : GCNPat<
- (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
- (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
- >;
+ def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32>;
+ def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32>;
+
+ def : PermlaneDiscardVDstIn<
+ BoundControlOrFetchInvalidPermlane<int_amdgcn_permlane16>,
+ V_PERMLANE16_B32>;
+ def : PermlaneDiscardVDstIn<
+ BoundControlOrFetchInvalidPermlane<int_amdgcn_permlanex16>,
+ V_PERMLANEX16_B32>;
} // End SubtargetPredicate = isGFX10Plus
+class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
+ (AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+ (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)),
+ (vt (VOP3Mods vt:$src2, i32:$src2_modifiers)),
+ (i1 CondReg)),
+ (inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2)
+>;
+
+let WaveSizePredicate = isWave64 in {
+def : DivFmasPat<f32, V_DIV_FMAS_F32, VCC>;
+def : DivFmasPat<f64, V_DIV_FMAS_F64, VCC>;
+}
+
+let WaveSizePredicate = isWave32 in {
+def : DivFmasPat<f32, V_DIV_FMAS_F32, VCC_LO>;
+def : DivFmasPat<f64, V_DIV_FMAS_F64, VCC_LO>;
+}
+
//===----------------------------------------------------------------------===//
// Integer Clamp Patterns
//===----------------------------------------------------------------------===//
@@ -745,9 +845,9 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
defm V_READLANE_B32 : VOP3_Real_gfx10<0x360>;
-let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
+let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
defm V_WRITELANE_B32 : VOP3_Real_gfx10<0x361>;
-} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in)
+} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
defm V_XOR3_B32 : VOP3_Real_gfx10<0x178>;
defm V_LSHLREV_B64 : VOP3_Real_gfx10<0x2ff>;
@@ -925,6 +1025,10 @@ defm V_TRIG_PREOP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x174>;
defm V_DIV_SCALE_F32 : VOP3be_Real_gfx6_gfx7_gfx10<0x16d>;
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx6_gfx7_gfx10<0x16e>;
+// NB: Same opcode as v_mad_legacy_f32
+let DecoderNamespace = "GFX10_B" in
+defm V_FMA_LEGACY_F32 : VOP3_Real_gfx10<0x140>;
+
//===----------------------------------------------------------------------===//
// GFX8, GFX9 (VI).
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 933acc2278fd..fc457ad212d4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1,4 +1,4 @@
-//===-- VOP3PInstructions.td - Vector Instruction Defintions --------------===//
+//===-- VOP3PInstructions.td - Vector Instruction Definitions -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,9 +10,11 @@
// VOP3P Classes
//===----------------------------------------------------------------------===//
-class VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
+class VOP3PInst<string OpName, VOPProfile P,
+ SDPatternOperator node = null_frag,
+ bit HasExplicitClamp = 0> :
VOP3P_Pseudo<OpName, P,
- !if(P.HasModifiers, getVOP3PModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
+ !if(P.HasModifiers, getVOP3PModPat<P, node, HasExplicitClamp>.ret, getVOP3Pat<P, node>.ret)
>;
// Non-packed instructions that use the VOP3P encoding.
@@ -29,9 +31,14 @@ class VOP3_VOP3PInst<string OpName, VOPProfile P, bit UseTiedOutput = 0,
!con(
(ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0,
FP16InputMods:$src1_modifiers, VCSrc_f16:$src1,
- FP16InputMods:$src2_modifiers, VCSrc_f16:$src2,
- clampmod:$clamp),
- !if(UseTiedOutput, (ins VGPR_32:$vdst_in), (ins))),
+ FP16InputMods:$src2_modifiers, VCSrc_f16:$src2),
+ // FIXME: clampmod0 misbehaves with the non-default vdst_in
+ // following it. For now workaround this by requiring clamp
+ // in tied patterns. This should use undef_tied_input, but it
+ // seems underdeveloped and doesn't apply the right register
+ // class constraints.
+ !if(UseTiedOutput, (ins clampmod:$clamp, VGPR_32:$vdst_in),
+ (ins clampmod0:$clamp))),
(ins op_sel:$op_sel, op_sel_hi:$op_sel_hi));
let Constraints = !if(UseTiedOutput, "$vdst = $vdst_in", "");
@@ -45,9 +52,9 @@ def V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_
def V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
let FPDPRounding = 1 in {
-def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
-def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fadd>;
-def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmul>;
+def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, any_fma>;
+def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, any_fadd>;
+def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, any_fmul>;
} // End FPDPRounding = 1
def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>;
def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>;
@@ -75,8 +82,8 @@ def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I1
// The constant will be emitted as a mov, and folded later.
// TODO: We could directly encode the immediate now
def : GCNPat<
- (add (v2i16 (VOP3PMods0 v2i16:$src0, i32:$src0_modifiers, i1:$clamp)), NegSubInlineConstV216:$src1),
- (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1, $clamp)
+ (add (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)), NegSubInlineConstV216:$src1),
+ (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1)
>;
multiclass MadFmaMixPats<SDPatternOperator fma_like,
@@ -142,10 +149,11 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
}
let SubtargetPredicate = HasMadMixInsts in {
+
// These are VOP3a-like opcodes which accept no omod.
// Size of src arguments (16/32) is controlled by op_sel.
// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi.
-let isCommutable = 1 in {
+let isCommutable = 1, mayRaiseFPException = 0 in {
def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
let FPDPRounding = 1 in {
@@ -203,7 +211,7 @@ foreach Type = ["I", "U"] in
foreach Index = 0-3 in {
// Defines patterns that extract each Index'ed 8bit from an unsigned
// 32bit scalar value;
- def #Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !if (!eq (Type, "U"), 1, 0)>;
+ def Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !if (!eq (Type, "U"), 1, 0)>;
// Defines multiplication patterns where the multiplication is happening on each
// Index'ed 8bit of a 32bit scalar value.
@@ -211,8 +219,8 @@ foreach Type = ["I", "U"] in
def Mul#Type#_Elt#Index : PatFrag<
(ops node:$src0, node:$src1),
(!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), AMDGPUmul_i24_oneuse, AMDGPUmul_u24_oneuse))
- (!cast<Extract>(#Type#Index#"_8bit") node:$src0),
- (!cast<Extract>(#Type#Index#"_8bit") node:$src1))>;
+ (!cast<Extract>(Type#Index#"_8bit") node:$src0),
+ (!cast<Extract>(Type#Index#"_8bit") node:$src1))>;
}
// Different variants of dot8 patterns cause a huge increase in the compile time.
@@ -231,15 +239,15 @@ foreach Type = ["I", "U"] in
foreach Index = 0-7 in {
// Defines patterns that extract each Index'ed 4bit from an unsigned
// 32bit scalar value;
- def #Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !if (!eq (Type, "U"), 1, 0)>;
+ def Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !if (!eq (Type, "U"), 1, 0)>;
// Defines multiplication patterns where the multiplication is happening on each
// Index'ed 8bit of a 32bit scalar value.
def Mul#Type#Index#"_4bit" : PatFrag<
(ops node:$src0, node:$src1),
(!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), NonACAMDGPUmul_i24_oneuse, NonACAMDGPUmul_u24_oneuse))
- (!cast<Extract>(#Type#Index#"_4bit") node:$src0),
- (!cast<Extract>(#Type#Index#"_4bit") node:$src1))>;
+ (!cast<Extract>(Type#Index#"_4bit") node:$src0),
+ (!cast<Extract>(Type#Index#"_4bit") node:$src1))>;
}
class UDot2Pat<Instruction Inst> : GCNPat <
@@ -264,40 +272,30 @@ class SDot2Pat<Instruction Inst> : GCNPat <
let IsDOT = 1 in {
let SubtargetPredicate = HasDot2Insts in {
-def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
-def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
-def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
-def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
-def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
+ VOP3_Profile<VOP_F32_V2F16_V2F16_F32>,
+ AMDGPUfdot2, 1/*ExplicitClamp*/>;
+def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16",
+ VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>;
+def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16",
+ VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>;
+def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
+ VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
+def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
+ VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
} // End SubtargetPredicate = HasDot2Insts
let SubtargetPredicate = HasDot1Insts in {
-def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
-def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
+ VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
+def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
+ VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
} // End SubtargetPredicate = HasDot1Insts
} // End let IsDOT = 1
-multiclass DotPats<SDPatternOperator dot_op,
- VOP3PInst dot_inst> {
- let SubtargetPredicate = dot_inst.SubtargetPredicate in
- def : GCNPat <
- (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),
- (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)),
- (dot_inst.Pfl.Src2VT (VOP3PMods dot_inst.Pfl.Src2VT:$src2, i32:$src2_modifiers)), i1:$clamp),
- (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1imm $clamp))>;
-}
-
-defm : DotPats<AMDGPUfdot2, V_DOT2_F32_F16>;
-defm : DotPats<int_amdgcn_sdot2, V_DOT2_I32_I16>;
-defm : DotPats<int_amdgcn_udot2, V_DOT2_U32_U16>;
-defm : DotPats<int_amdgcn_sdot4, V_DOT4_I32_I8>;
-defm : DotPats<int_amdgcn_udot4, V_DOT4_U32_U8>;
-defm : DotPats<int_amdgcn_sdot8, V_DOT8_I32_I4>;
-defm : DotPats<int_amdgcn_udot8, V_DOT8_U32_U4>;
-
def : UDot2Pat<V_DOT2_U32_U16>;
def : SDot2Pat<V_DOT2_I32_I16>;
@@ -368,12 +366,16 @@ def VOPProfileMAI_F32_V4F16_X16 : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, A
def VOPProfileMAI_F32_V4F16_X32 : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>;
let Predicates = [HasMAIInsts] in {
+
+let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
def V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>;
def V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite> {
let isMoveImm = 1;
}
+}
-let isConvergent = 1 in {
+// FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported.
+let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
def V_MFMA_F32_4X4X1F32 : VOP3Inst<"v_mfma_f32_4x4x1f32", VOPProfileMAI_F32_F32_X4, int_amdgcn_mfma_f32_4x4x1f32>;
def V_MFMA_F32_4X4X4F16 : VOP3Inst<"v_mfma_f32_4x4x4f16", VOPProfileMAI_F32_V4F16_X4, int_amdgcn_mfma_f32_4x4x4f16>;
def V_MFMA_I32_4X4X4I8 : VOP3Inst<"v_mfma_i32_4x4x4i8", VOPProfileMAI_I32_I32_X4, int_amdgcn_mfma_i32_4x4x4i8>;
@@ -394,7 +396,7 @@ def V_MFMA_I32_32X32X4I8 : VOP3Inst<"v_mfma_i32_32x32x4i8", VOPProfileMAI_I3
def V_MFMA_I32_32X32X8I8 : VOP3Inst<"v_mfma_i32_32x32x8i8", VOPProfileMAI_I32_I32_X16, int_amdgcn_mfma_i32_32x32x8i8>;
def V_MFMA_F32_32X32X2BF16 : VOP3Inst<"v_mfma_f32_32x32x2bf16", VOPProfileMAI_F32_V2I16_X32, int_amdgcn_mfma_f32_32x32x2bf16>;
def V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_32x32x4bf16>;
-} // End isConvergent = 1
+} // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1
} // End SubtargetPredicate = HasMAIInsts
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 39d18794f947..aa2fa260e7b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1,4 +1,4 @@
-//===-- VOPCInstructions.td - Vector Instruction Defintions ---------------===//
+//===-- VOPCInstructions.td - Vector Instruction Definitions --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -92,9 +92,11 @@ class VOPC_Pseudo <string opName, VOPC_Profile P, list<dag> pattern=[],
let mayStore = 0;
let hasSideEffects = 0;
+ let ReadsModeReg = isFloatType<P.Src0VT>.ret;
+
let VALU = 1;
let VOPC = 1;
- let Uses = [EXEC];
+ let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
let Defs = !if(DefVcc, [VCC], []);
VOPProfile Pfl = P;
@@ -738,6 +740,9 @@ multiclass VOPC_CLASS_F64 <string opName> :
multiclass VOPCX_CLASS_F64 <string opName> :
VOPCX_Class_Pseudos <opName, VOPC_I1_F64_I32, VOPC_F64_I32>;
+// cmp_class ignores the FP mode and faithfully reports the unmodified
+// source value.
+let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">;
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">;
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">;
@@ -747,6 +752,7 @@ let SubtargetPredicate = Has16BitInsts in {
defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">;
defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
}
+} // End ReadsModeReg = 0, mayRaiseFPException = 0
//===----------------------------------------------------------------------===//
// V_ICMPIntrinsic Pattern.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
index f208a1134a5a..f8a83e5f74c0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1,4 +1,4 @@
-//===-- VOPInstructions.td - Vector Instruction Defintions ----------------===//
+//===-- VOPInstructions.td - Vector Instruction Definitions ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,6 +8,8 @@
// dummies for outer let
class LetDummies {
+ bit ReadsModeReg;
+ bit mayRaiseFPException;
bit isCommutable;
bit isConvertibleToThreeAddress;
bit isMoveImm;
@@ -35,7 +37,7 @@ class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
let hasSideEffects = 0;
let UseNamedOperandTable = 1;
let VALU = 1;
- let Uses = [EXEC];
+ let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
}
class VOP_Pseudo <string opName, string suffix, VOPProfile P, dag outs, dag ins,
@@ -118,7 +120,10 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
let ClampLo = P.HasClampLo;
let ClampHi = P.HasClampHi;
- let Uses = [EXEC];
+ let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret);
+
+ let mayRaiseFPException = ReadsModeReg;
+ let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
let AsmVariantName = AMDGPUAsmVariants.VOP3;
let AsmMatchConverter =
@@ -160,7 +165,7 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily> :
VOPProfile Pfl = ps.Pfl;
}
-// XXX - Is there any reason to distingusih this from regular VOP3
+// XXX - Is there any reason to distinguish this from regular VOP3
// here?
class VOP3P_Real<VOP_Pseudo ps, int EncodingFamily> :
VOP3_Real<ps, EncodingFamily>;
@@ -490,10 +495,14 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
let VALU = 1;
let SDWA = 1;
- let Uses = [EXEC];
- let SubtargetPredicate = !if(P.HasExtSDWA, HasSDWA, DisableInst);
- let AssemblerPredicate = !if(P.HasExtSDWA, HasSDWA, DisableInst);
+ let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret);
+
+ let mayRaiseFPException = ReadsModeReg;
+ let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
+
+ let SubtargetPredicate = HasSDWA;
+ let AssemblerPredicate = HasSDWA;
let AsmVariantName = !if(P.HasExtSDWA, AMDGPUAsmVariants.SDWA,
AMDGPUAsmVariants.Disable);
let DecoderNamespace = "SDWA";
@@ -542,8 +551,8 @@ class Base_VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
- let SubtargetPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA9, DisableInst);
- let AssemblerPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA9, DisableInst);
+ let SubtargetPredicate = HasSDWA9;
+ let AssemblerPredicate = HasSDWA9;
let AsmVariantName = !if(ps.Pfl.HasExtSDWA9, AMDGPUAsmVariants.SDWA9,
AMDGPUAsmVariants.Disable);
let DecoderNamespace = "SDWA9";
@@ -561,8 +570,8 @@ class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA9>;
class Base_VOP_SDWA10_Real<VOP_SDWA_Pseudo ps> : Base_VOP_SDWA9_Real<ps> {
- let SubtargetPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA10, DisableInst);
- let AssemblerPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA10, DisableInst);
+ let SubtargetPredicate = HasSDWA10;
+ let AssemblerPredicate = HasSDWA10;
let DecoderNamespace = "SDWA10";
}
@@ -607,7 +616,11 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let VALU = 1;
let DPP = 1;
let Size = 8;
- let Uses = [EXEC];
+
+ let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret);
+
+ let mayRaiseFPException = ReadsModeReg;
+ let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
let isConvergent = 1;
string Mnemonic = OpName;
@@ -615,7 +628,7 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
let SubtargetPredicate = HasDPP;
- let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst);
+ let AssemblerPredicate = HasDPP;
let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
AMDGPUAsmVariants.Disable);
let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
@@ -670,7 +683,7 @@ class VOP_DPP <string OpName, VOPProfile P, bit IsDPP16,
let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
let SubtargetPredicate = HasDPP;
- let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst);
+ let AssemblerPredicate = HasDPP;
let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
AMDGPUAsmVariants.Disable);
let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
@@ -702,7 +715,7 @@ class VOP_DPP8<string OpName, VOPProfile P> :
let AsmMatchConverter = "cvtDPP8";
let SubtargetPredicate = HasDPP8;
- let AssemblerPredicate = !if(P.HasExt, HasDPP8, DisableInst);
+ let AssemblerPredicate = HasDPP8;
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
AMDGPUAsmVariants.Disable);
let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
index 7915ca003316..025b920ff7b4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
@@ -41,12 +41,14 @@ public:
MCInstLowering(&OutContext, *this) {}
StringRef getPassName() const override { return "ARC Assembly Printer"; }
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
};
} // end anonymous namespace
-void ARCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void ARCAsmPrinter::emitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream O(Str);
@@ -61,6 +63,12 @@ void ARCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}
+bool ARCAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ // Functions are 4-byte aligned.
+ MF.ensureAlignment(Align(4));
+ return AsmPrinter::runOnMachineFunction(MF);
+}
+
// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARCAsmPrinter() {
RegisterAsmPrinter<ARCAsmPrinter> X(getTheARCTarget());
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCFrameLowering.cpp
index d8946d97deff..ead593106262 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCFrameLowering.cpp
@@ -74,8 +74,7 @@ static void generateStackAdjustment(MachineBasicBlock &MBB,
.addImm(AbsAmount);
}
-static unsigned
-determineLastCalleeSave(const std::vector<CalleeSavedInfo> &CSI) {
+static unsigned determineLastCalleeSave(ArrayRef<CalleeSavedInfo> CSI) {
unsigned Last = 0;
for (auto Reg : CSI) {
assert(Reg.getReg() >= ARC::R13 && Reg.getReg() <= ARC::R25 &&
@@ -197,7 +196,7 @@ void ARCFrameLowering::emitPrologue(MachineFunction &MF,
// .cfi_offset fp, -StackSize
// .cfi_offset blink, -StackSize+4
unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
BuildMI(MBB, MBBI, dl, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -401,8 +400,7 @@ bool ARCFrameLowering::assignCalleeSavedSpillSlots(
bool ARCFrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
LLVM_DEBUG(dbgs() << "Spill callee saved registers: "
<< MBB.getParent()->getName() << "\n");
// There are routines for saving at least 3 registers (r13 to r15, etc.)
@@ -419,7 +417,7 @@ bool ARCFrameLowering::spillCalleeSavedRegisters(
bool ARCFrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const {
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
LLVM_DEBUG(dbgs() << "Restore callee saved registers: "
<< MBB.getParent()->getName() << "\n");
// There are routines for saving at least 3 registers (r13 to r15, etc.)
@@ -441,8 +439,8 @@ void ARCFrameLowering::processFunctionBeforeFrameFinalized(
LLVM_DEBUG(dbgs() << "Current stack size: " << MFI.getStackSize() << "\n");
const TargetRegisterClass *RC = &ARC::GPR32RegClass;
if (MFI.hasStackObjects()) {
- int RegScavFI = MFI.CreateStackObject(
- RegInfo->getSpillSize(*RC), RegInfo->getSpillAlignment(*RC), false);
+ int RegScavFI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC),
+ RegInfo->getSpillAlign(*RC), false);
RS->addScavengingFrameIndex(RegScavFI);
LLVM_DEBUG(dbgs() << "Created scavenging index RegScavFI=" << RegScavFI
<< "\n");
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/ARC/ARCFrameLowering.h
index 9242400fb28d..9951a09842c5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCFrameLowering.h
@@ -42,13 +42,13 @@ public:
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
bool
restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
index 8df2b5d2b6a7..4a6510f10eeb 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCISelLowering.cpp
@@ -245,7 +245,7 @@ SDValue ARCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze return values to determine the number of bytes of stack required.
CCState RetCCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
- RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), 4);
+ RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), Align(4));
RetCCInfo.AnalyzeCallResult(Ins, RetCC_ARC);
// Get a count of how many bytes are to be pushed on the stack.
@@ -563,14 +563,16 @@ SDValue ARCTargetLowering::LowerCallArguments(
for (const auto &ArgDI : ArgData) {
if (ArgDI.Flags.isByVal() && ArgDI.Flags.getByValSize()) {
unsigned Size = ArgDI.Flags.getByValSize();
- unsigned Align = std::max(StackSlotSize, ArgDI.Flags.getByValAlign());
+ Align Alignment =
+ std::max(Align(StackSlotSize), ArgDI.Flags.getNonZeroByValAlign());
// Create a new object on the stack and copy the pointee into it.
- int FI = MFI.CreateStackObject(Size, Align, false);
+ int FI = MFI.CreateStackObject(Size, Alignment, false);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
InVals.push_back(FIN);
MemOps.push_back(DAG.getMemcpy(
- Chain, dl, FIN, ArgDI.SDV, DAG.getConstant(Size, dl, MVT::i32), Align,
- false, false, false, MachinePointerInfo(), MachinePointerInfo()));
+ Chain, dl, FIN, ArgDI.SDV, DAG.getConstant(Size, dl, MVT::i32),
+ Alignment, false, false, false, MachinePointerInfo(),
+ MachinePointerInfo()));
} else {
InVals.push_back(ArgDI.SDV);
}
@@ -620,7 +622,7 @@ ARCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Analyze return values.
if (!IsVarArg)
- CCInfo.AllocateStack(AFI->getReturnStackOffset(), 4);
+ CCInfo.AllocateStack(AFI->getReturnStackOffset(), Align(4));
CCInfo.AnalyzeReturn(Outs, RetCC_ARC);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrFormats.td
index e4902a73ed49..584844d49553 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrFormats.td
@@ -127,16 +127,16 @@ class PseudoInstARC<dag outs, dag ins, string asmstr, list<dag> pattern>
//===----------------------------------------------------------------------===//
// All 32-bit ARC instructions have a 5-bit "major" opcode class designator
-// in bits 27-31.
-//
+// in bits 27-31.
+//
// Some general naming conventions:
// N - Delay Slot bit. ARC v2 branch instructions have an optional delay slot
// which is encoded with this bit. When set, a delay slot exists.
// cc - Condition code.
// SX - Signed X-bit immediate.
// UX - Unsigned X-bit immediate.
-//
-// [ABC] - 32-bit register operand. These are 6-bit fields. This encodes the
+//
+// [ABC] - 32-bit register operand. These are 6-bit fields. This encodes the
// standard 32 general purpose registers, and allows use of additional
// (extension) registers. This also encodes an instruction that uses
// a 32-bit Long Immediate (LImm), using 0x3e==62 as the field value.
@@ -166,7 +166,7 @@ class F32_BR_COND<bits<5> major, dag outs, dag ins, bit b16, string asmstr,
list<dag> pattern> :
F32_BR<major, outs, ins, b16, asmstr, pattern> {
bits<21> S21; // 2-byte aligned 21-bit byte-offset.
- bits<5> cc;
+ bits<5> cc;
let Inst{26-18} = S21{10-2};
let Inst{15-6} = S21{20-11};
let Inst{4-0} = cc;
@@ -328,7 +328,7 @@ class F32_DOP_RU6<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
}
// 2-register, signed 12-bit immediate Dual Operand instruction.
-// This instruction uses B as the first 2 operands (i.e., add B, B, -128).
+// This instruction uses B as the first 2 operands (i.e., add B, B, -128).
// |26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0|
// |B[2-0] | 1| 0| subop| F|B[5-3] |S12[5-0] |S12[11-6] |
class F32_DOP_RS12<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
@@ -336,7 +336,7 @@ class F32_DOP_RS12<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
InstARC<4, outs, ins, asmstr, pattern> {
bits<6> B;
bits<12> S12;
-
+
let Inst{31-27} = major;
let Inst{26-24} = B{2-0};
let Inst{23-22} = 0b10;
@@ -547,14 +547,14 @@ class F16_COMPACT<bits<1> i, dag outs, dag ins,
let Inst{15-11} = 0b01000;
let Inst{7-5} = h{2-0};
let Inst{2} = i;
- let Inst{1-0} = h{4-3};
+ let Inst{1-0} = h{4-3};
}
// Compact Load/Add/Sub.
class F16_LD_ADD_SUB<dag outs, dag ins, string asmstr> :
InstARC<2, outs, ins, asmstr, []> {
- bits<3> b;
+ bits<3> b;
let Inst{15-11} = 0b01001;
let Inst{10-8} = b;
}
@@ -575,10 +575,10 @@ class F16_LD_SUB<bit i, string asmstr> :
class F16_ADD :
F16_LD_ADD_SUB<(outs GPR32:$r), (ins GPR32:$b, immU<6>:$u6),
"add_s\t$r, $b, $u6"> {
-
+
bit r;
bits<6> u6;
-
+
let Inst{7} = r;
let Inst{6-4} = u6{5-3};
let Inst{3} = 1;
@@ -610,7 +610,7 @@ class F16_LDI_u7 :
bits<3> b;
bits<7> u7;
-
+
let Inst{10-8} = b;
let Inst{7-4} = u7{6-3};
let Inst{3} = 1;
@@ -623,7 +623,7 @@ class F16_JLI_EI<bit i, string asmstr> :
!strconcat(asmstr, "\t$u10"), []> {
bits<10> u10;
-
+
let Inst{15-11} = 0b01011;
let Inst{10} = i;
let Inst{9-0} = u10;
@@ -635,9 +635,9 @@ class F16_LD_ADD_RR<bits<2> i, string asmstr> :
asmstr, []> {
bits<3> a;
- bits<3> b;
+ bits<3> b;
bits<3> c;
-
+
let Inst{15-11} = 0b01100;
let Inst{10-8} = b;
let Inst{7-5} = c;
@@ -648,7 +648,7 @@ class F16_LD_ADD_RR<bits<2> i, string asmstr> :
// Load/Add GP-Relative.
class F16_GP_LD_ADD<bits<2> i, dag ins, string asmstr> :
InstARC<2, (outs), ins, asmstr, []> {
-
+
let Inst{15-11} = 0b11001;
let Inst{10-9} = i;
}
@@ -663,7 +663,7 @@ class F16_ADD_IMM<bits<2> i, string asmstr> :
bits<3> b;
bits<3> c;
bits<3> u3;
-
+
let Inst{15-11} = 0b01101;
let Inst{10-8} = b;
let Inst{7-5} = c;
@@ -689,8 +689,8 @@ class F16_OP_HREG<bits<3> i, dag outs, dag ins, string asmstr> :
class F16_OP_HREG30<bits<3> i, dag outs, dag ins, string asmstr> :
F16_OP_HREG<i, outs, ins, asmstr> {
-
- bits<5> LImmReg = 0b11110;
+
+ bits<5> LImmReg = 0b11110;
let Inst{7-5} = LImmReg{2-0};
let Inst{1-0} = LImmReg{4-3};
}
@@ -784,7 +784,7 @@ class F16_SH_SUB_BIT<bits<3> i, string asmstr> :
bits<3> b;
bits<5> u5;
-
+
let Inst{15-11} = 0b10111;
let Inst{10-8} = b;
let Inst{7-5} = i;
@@ -816,7 +816,7 @@ class F16_SP_OPS_u7_aligned<bits<3> i,
bits<3> b3;
bits<7> u7;
-
+
let fieldB = b3;
let fieldU = u7{6-2};
let u7{1-0} = 0b00;
@@ -826,7 +826,7 @@ class F16_SP_OPS_bconst<bits<3> b, string asmop> :
F16_SP_OPS_u7_aligned<0b101,
(outs), (ins immU<7>:$u7),
!strconcat(asmop, "\t%sp, %sp, $u7")> {
-
+
let fieldB = b;
}
@@ -834,14 +834,14 @@ class F16_SP_OPS_uconst<bits<3> i,
dag outs, dag ins, string asmop> :
F16_SP_OPS_u7_aligned<i, outs, ins,
!strconcat(asmop, "\t$b3")> {
-
+
let fieldU = 0b00001;
}
class F16_SP_OPS_buconst<bits<3> i, string asmop> :
F16_SP_OPS_u7_aligned<i, (outs), (ins),
!strconcat(asmop, "\t%blink")> {
-
+
let fieldB = 0x000;
let fieldU = 0b10001;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.cpp
index 2bf2c1f6bbc5..527f239c2643 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.cpp
@@ -161,7 +161,7 @@ static bool isJumpOpcode(int Opc) { return Opc == ARC::J; }
/// condition. These operands can be passed to other TargetInstrInfo
/// methods to create new branches.
///
-/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// Note that RemoveBranch and insertBranch must be implemented to support
/// cases where this method returns success.
///
/// If AllowModify is true, then this routine is allowed to modify the basic
@@ -292,18 +292,18 @@ void ARCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
void ARCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill,
+ Register SrcReg, bool isKill,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc dl = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FrameIndex);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIndex),
- MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex), Align);
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlign(FrameIndex));
assert(MMO && "Couldn't get MachineMemOperand for store to stack.");
assert(TRI->getSpillSize(*RC) == 4 &&
@@ -321,16 +321,16 @@ void ARCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void ARCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc dl = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FrameIndex);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIndex),
- MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), Align);
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlign(FrameIndex));
assert(MMO && "Couldn't get MachineMemOperand for store to stack.");
assert(TRI->getSpillSize(*RC) == 4 &&
@@ -375,7 +375,7 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB,
assert(!BytesAdded && "Code size not handled.");
// Shouldn't be a fall through.
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert(TBB && "insertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 3 || Cond.size() == 0) &&
"ARC branch conditions have two components!");
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.h
index 6f894478d3d3..4f6122daf91f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.h
@@ -68,13 +68,13 @@ public:
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned SrcReg,
+ MachineBasicBlock::iterator MI, Register SrcReg,
bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned DestReg,
+ MachineBasicBlock::iterator MI, Register DestReg,
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.td
index 311d998f3d86..8fe393dfaf5b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCInstrInfo.td
@@ -34,7 +34,7 @@ def ARCGAWrapper : SDNode<"ARCISD::GAWRAPPER", SDT_ARCmov, []>;
// Comparison
def ARCcmp : SDNode<"ARCISD::CMP", SDT_ARCcmptst, [SDNPOutGlue]>;
-// Conditionanal mov
+// Conditional mov
def ARCcmov : SDNode<"ARCISD::CMOV", SDT_ARCcmov, [SDNPInGlue]>;
// Conditional Branch
@@ -206,7 +206,7 @@ multiclass ArcBinaryEXT5Inst<bits<6> mincode, string opasm> :
multiclass ArcUnaryGEN4Inst<bits<6> mincode, string opasm> :
ArcUnaryInst<0b00100, mincode, opasm>;
-// Pattern generation for differnt instruction variants.
+// Pattern generation for different instruction variants.
multiclass MultiPat<SDPatternOperator InFrag,
Instruction RRR, Instruction RRU6, Instruction RRLImm> {
def _rrr : Pat<(InFrag i32:$B, i32:$C), (RRR i32:$B, i32:$C)>;
@@ -215,7 +215,7 @@ multiclass MultiPat<SDPatternOperator InFrag,
}
// ---------------------------------------------------------------------------
-// Instruction defintions and patterns for 3 operand binary instructions.
+// Instruction definitions and patterns for 3 operand binary instructions.
// ---------------------------------------------------------------------------
// Definitions for 3 operand binary instructions.
@@ -344,7 +344,7 @@ let isBranch = 1, isTerminator = 1 in {
// At worst, this expands into 2 4-byte instructions.
def BRcc_rr_p : PseudoInstARC<(outs),
(ins btarget:$T, GPR32:$B, GPR32:$C, ccond:$cc),
- "pbr$cc\t$B, $C, $T",
+ "pbr$cc\t$B, $C, $T",
[(ARCbrcc bb:$T, i32:$B, i32:$C, imm32:$cc)]>
{ let Size = 8; }
@@ -430,7 +430,7 @@ def LEAVE_S : F16_SP_OPS<0b110,
(outs), (ins immU<7>:$u7), "leave_s\t$u7"> {
bits<7> u7;
-
+
let fieldB = u7{6-4};
let fieldU{4-1} = u7{3-0};
let fieldU{0} = 0b0;
@@ -440,7 +440,7 @@ def ENTER_S : F16_SP_OPS<0b111,
(outs), (ins immU<6>:$u6), "enter_s\t$u6"> {
bits<6> u6;
-
+
let fieldB{2} = 0;
let fieldB{1-0} = u6{5-4};
let fieldU{4-1} = u6{3-0};
@@ -452,19 +452,19 @@ def ENTER_S : F16_SP_OPS<0b111,
//----------------------------------------------------------------------------
class COMPACT_MOV_S :
F16_COMPACT<0b0, (outs GPR32:$g), (ins GPR32:$h),
- "mov_s\t$g, $h"> {
+ "mov_s\t$g, $h"> {
let DecoderMethod = "DecodeMoveHRegInstruction";
}
def COMPACT_MOV_S_limm : COMPACT_MOV_S {
- bits<32> LImm;
+ bits<32> LImm;
let Inst{47-16} = LImm;
- bits<5> LImmReg = 0b11110;
+ bits<5> LImmReg = 0b11110;
let Inst{7-5} = LImmReg{2-0};
let Inst{1-0} = LImmReg{4-3};
- let Size = 6;
+ let Size = 6;
}
def COMPACT_MOV_S_hreg : COMPACT_MOV_S;
@@ -548,9 +548,9 @@ def GP_ADD_S : F16_GP_LD_ADD<0b11, (ins immS<11>:$s),
//----------------------------------------------------------------------------
def PCL_LD : InstARC<2, (outs GPR32:$b), (ins immU<10>:$u10),
"ld_s\t$b, [%pcl, $u10]", []> {
-
- bits<3> b;
- bits<10> u10;
+
+ bits<3> b;
+ bits<10> u10;
let Inst{15-11} = 0b11010;
let Inst{10-8} = b;
@@ -587,11 +587,11 @@ def BL_S :
InstARC<2, (outs), (ins btargetS13:$s13), "bl_s\t$s13", []> {
let Inst{15-11} = 0b11111;
-
+
bits<13> s13;
let Inst{10-0} = s13{12-2};
let s13{1-0} = 0b00;
-
+
let isCall = 1;
let isBarrier = 1;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h
index d4dcf9bf285c..968c6b63f423 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h
@@ -33,10 +33,7 @@ public:
explicit ARCFunctionInfo(MachineFunction &MF)
: ReturnStackOffsetSet(false), VarArgsFrameIndex(0),
- ReturnStackOffset(-1U), MaxCallStackReq(0) {
- // Functions are 4-byte aligned.
- MF.setAlignment(Align(4));
- }
+ ReturnStackOffset(-1U), MaxCallStackReq(0) {}
~ARCFunctionInfo() {}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
index 490f08930091..c49af8a45236 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
@@ -153,11 +153,6 @@ bool ARCRegisterInfo::requiresRegisterScavenging(
return true;
}
-bool ARCRegisterInfo::trackLivenessAfterRegAlloc(
- const MachineFunction &MF) const {
- return true;
-}
-
bool ARCRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.h
index af41234e9dda..f8bca11fdbc8 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.h
@@ -34,8 +34,6 @@ public:
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
-
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.td
index 4b6744ad73da..82fdccc51466 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCRegisterInfo.td
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Declarations that describe the ARC register file
+// Declarations that describe the ARC register file
//===----------------------------------------------------------------------===//
class ARCReg<string n, list<string> altNames> : Register<n, altNames> {
@@ -27,35 +27,35 @@ class Status<string n> : ARCReg<n, []> {
// Integer registers
def R0 : Core< 0, "%r0">, DwarfRegNum<[0]>;
def R1 : Core< 1, "%r1">, DwarfRegNum<[1]>;
-def R2 : Core< 2, "%r2">, DwarfRegNum<[2]>;
+def R2 : Core< 2, "%r2">, DwarfRegNum<[2]>;
def R3 : Core< 3, "%r3">, DwarfRegNum<[3]>;
let CostPerUse=1 in {
def R4 : Core< 4, "%r4">, DwarfRegNum<[4]>;
-def R5 : Core< 5, "%r5">, DwarfRegNum<[5]>;
+def R5 : Core< 5, "%r5">, DwarfRegNum<[5]>;
def R6 : Core< 6, "%r6">, DwarfRegNum<[6]>;
def R7 : Core< 7, "%r7">, DwarfRegNum<[7]>;
def R8 : Core< 8, "%r8">, DwarfRegNum<[8]>;
def R9 : Core< 9, "%r9">, DwarfRegNum<[9]>;
-def R10 : Core<10, "%r10">, DwarfRegNum<[10]>;
+def R10 : Core<10, "%r10">, DwarfRegNum<[10]>;
def R11 : Core<11, "%r11">, DwarfRegNum<[11]>;
}
def R12 : Core<12, "%r12">, DwarfRegNum<[12]>;
-def R13 : Core<13, "%r13">, DwarfRegNum<[13]>;
+def R13 : Core<13, "%r13">, DwarfRegNum<[13]>;
def R14 : Core<14, "%r14">, DwarfRegNum<[14]>;
def R15 : Core<15, "%r15">, DwarfRegNum<[15]>;
let CostPerUse=1 in {
def R16 : Core<16, "%r16">, DwarfRegNum<[16]>;
def R17 : Core<17, "%r17">, DwarfRegNum<[17]>;
-def R18 : Core<18, "%r18">, DwarfRegNum<[18]>;
+def R18 : Core<18, "%r18">, DwarfRegNum<[18]>;
def R19 : Core<19, "%r19">, DwarfRegNum<[19]>;
def R20 : Core<20, "%r20">, DwarfRegNum<[20]>;
-def R21 : Core<21, "%r21">, DwarfRegNum<[21]>;
+def R21 : Core<21, "%r21">, DwarfRegNum<[21]>;
def R22 : Core<22, "%r22">, DwarfRegNum<[22]>;
def R23 : Core<23, "%r23">, DwarfRegNum<[23]>;
def R24 : Core<24, "%r24">, DwarfRegNum<[24]>;
def R25 : Core<25, "%r25">, DwarfRegNum<[25]>;
-def GP : Core<26, "%gp",["%r26"]>, DwarfRegNum<[26]>;
+def GP : Core<26, "%gp",["%r26"]>, DwarfRegNum<[26]>;
def FP : Core<27, "%fp", ["%r27"]>, DwarfRegNum<[27]>;
def SP : Core<28, "%sp", ["%r28"]>, DwarfRegNum<[28]>;
def ILINK : Core<29, "%ilink">, DwarfRegNum<[29]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
index ab74fecb7804..4a5b6fd4d5bf 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCTargetMachine.cpp
@@ -39,7 +39,7 @@ ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT,
TT, CPU, FS, Options, getRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(std::make_unique<TargetLoweringObjectFileELF>()),
- Subtarget(TT, CPU, FS, *this) {
+ Subtarget(TT, std::string(CPU), std::string(FS), *this) {
initAsmInfo();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
index 53ca4066c02d..266f2de08772 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
@@ -36,6 +36,10 @@ public:
private:
void printMemOperandRI(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
+ raw_ostream &O) {
+ printOperand(MI, OpNum, O);
+ }
void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printBRCCPredicateOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
index 997e95e1a35f..3e3613ccb90f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
@@ -57,7 +57,7 @@ static MCAsmInfo *createARCMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI = new ARCMCAsmInfo(TT);
// Initial state of the frame pointer is SP.
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, ARC::SP, 0);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, ARC::SP, 0);
MAI->addInitialFrameState(Inst);
return MAI;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
index 3412813a3ef2..7398968bb24a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
@@ -47,6 +47,7 @@ FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createMVEVPTBlockPass();
+FunctionPass *createMVEVPTOptimisationsPass();
FunctionPass *createARMOptimizeBarriersPass();
FunctionPass *createThumb2SizeReductionPass(
std::function<bool(const Function &)> Ftor = nullptr);
@@ -66,6 +67,7 @@ void initializeARMExpandPseudoPass(PassRegistry &);
void initializeThumb2SizeReducePass(PassRegistry &);
void initializeThumb2ITBlockPass(PassRegistry &);
void initializeMVEVPTBlockPass(PassRegistry &);
+void initializeMVEVPTOptimisationsPass(PassRegistry &);
void initializeARMLowOverheadLoopsPass(PassRegistry &);
void initializeMVETailPredicationPass(PassRegistry &);
void initializeMVEGatherScatterLoweringPass(PassRegistry &);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
index 380eaa863689..0468f7f1cf8e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
@@ -424,6 +424,13 @@ def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
def FeatureSB : SubtargetFeature<"sb", "HasSB", "true",
"Enable v8.5a Speculation Barrier" >;
+// Armv8.6-A extensions
+def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true",
+ "Enable support for BFloat16 instructions", [FeatureNEON]>;
+
+def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8",
+ "true", "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>;
+
// Armv8.1-M extensions
def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true",
@@ -523,6 +530,11 @@ def HasV8_5aOps : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true",
"Support ARM v8.5a instructions",
[HasV8_4aOps, FeatureSB]>;
+def HasV8_6aOps : SubtargetFeature<"v8.6a", "HasV8_6aOps", "true",
+ "Support ARM v8.6a instructions",
+ [HasV8_5aOps, FeatureBF16,
+ FeatureMatMulInt8]>;
+
def HasV8_1MMainlineOps : SubtargetFeature<
"v8.1m.main", "HasV8_1MMainlineOps", "true",
"Support ARM v8-1M Mainline instructions",
@@ -536,6 +548,16 @@ def HasMVEFloatOps : SubtargetFeature<
"Support M-Class Vector Extension with integer and floating ops",
[HasMVEIntegerOps, FeatureFPARMv8_D16_SP, FeatureFullFP16]>;
+def HasCDEOps : SubtargetFeature<"cde", "HasCDEOps", "true",
+ "Support CDE instructions",
+ [HasV8MMainlineOps]>;
+
+foreach i = {0-7} in
+ def FeatureCoprocCDE#i : SubtargetFeature<"cdecp"#i,
+ "CoprocCDE["#i#"]", "true",
+ "Coprocessor "#i#" ISA is CDEv1",
+ [HasCDEOps]>;
+
//===----------------------------------------------------------------------===//
// ARM Processor subtarget features.
//
@@ -572,6 +594,12 @@ def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
"Cortex-A75 ARM processors", []>;
def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
"Cortex-A76 ARM processors", []>;
+def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
+ "Cortex-A77 ARM processors", []>;
+def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily", "CortexA78",
+ "Cortex-A78 ARM processors", []>;
+def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
+ "Cortex-X1 ARM processors", []>;
def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
"Qualcomm Krait processors", []>;
@@ -787,6 +815,19 @@ def ARMv85a : Architecture<"armv8.5-a", "ARMv85a", [HasV8_5aOps,
FeatureCRC,
FeatureRAS,
FeatureDotProd]>;
+def ARMv86a : Architecture<"armv8.6-a", "ARMv86a", [HasV8_6aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
@@ -1114,6 +1155,14 @@ def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline,
FeatureUseMISched,
FeatureHasNoBranchPredictor]>;
+def : ProcessorModel<"cortex-m55", CortexM4Model, [ARMv81mMainline,
+ FeatureDSP,
+ FeatureFPARMv8_D16,
+ FeatureUseMISched,
+ FeatureHasNoBranchPredictor,
+ FeaturePrefLoopAlign32,
+ FeatureHasSlowFPVMLx,
+ HasMVEFloatOps]>;
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDivThumb,
@@ -1181,6 +1230,30 @@ def : ProcNoItin<"cortex-a76ae", [ARMv82a, ProcA76,
FeatureFullFP16,
FeatureDotProd]>;
+def : ProcNoItin<"cortex-a77", [ARMv82a, ProcA77,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureFullFP16,
+ FeatureDotProd]>;
+
+def : ProcNoItin<"cortex-a78", [ARMv82a, ProcA78,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureFullFP16,
+ FeatureDotProd]>;
+
+def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureFullFP16,
+ FeatureDotProd]>;
+
def : ProcNoItin<"neoverse-n1", [ARMv82a,
FeatureHWDivThumb,
FeatureHWDivARM,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 6f26ca127f94..d6c1efa6327c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -57,26 +57,36 @@ ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM,
: AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr), AFI(nullptr),
MCP(nullptr), InConstantPool(false), OptimizationGoals(-1) {}
-void ARMAsmPrinter::EmitFunctionBodyEnd() {
+void ARMAsmPrinter::emitFunctionBodyEnd() {
// Make sure to terminate any constant pools that were at the end
// of the function.
if (!InConstantPool)
return;
InConstantPool = false;
- OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+ OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
}
-void ARMAsmPrinter::EmitFunctionEntryLabel() {
+void ARMAsmPrinter::emitFunctionEntryLabel() {
if (AFI->isThumbFunction()) {
- OutStreamer->EmitAssemblerFlag(MCAF_Code16);
- OutStreamer->EmitThumbFunc(CurrentFnSym);
+ OutStreamer->emitAssemblerFlag(MCAF_Code16);
+ OutStreamer->emitThumbFunc(CurrentFnSym);
} else {
- OutStreamer->EmitAssemblerFlag(MCAF_Code32);
+ OutStreamer->emitAssemblerFlag(MCAF_Code32);
}
- OutStreamer->EmitLabel(CurrentFnSym);
+
+ // Emit symbol for CMSE non-secure entry point
+ if (AFI->isCmseNSEntryFunction()) {
+ MCSymbol *S =
+ OutContext.getOrCreateSymbol("__acle_se_" + CurrentFnSym->getName());
+ emitLinkage(&MF->getFunction(), S);
+ OutStreamer->emitSymbolAttribute(S, MCSA_ELF_TypeFunction);
+ OutStreamer->emitLabel(S);
+ }
+
+ OutStreamer->emitLabel(CurrentFnSym);
}
-void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) {
+void ARMAsmPrinter::emitXXStructor(const DataLayout &DL, const Constant *CV) {
uint64_t Size = getDataLayout().getTypeAllocSize(CV->getType());
assert(Size && "C++ constructor pointer had zero size!");
@@ -90,17 +100,17 @@ void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) {
: MCSymbolRefExpr::VK_None),
OutContext);
- OutStreamer->EmitValue(E, Size);
+ OutStreamer->emitValue(E, Size);
}
-void ARMAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+void ARMAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (PromotedGlobals.count(GV))
// The global was promoted into a constant pool. It should not be emitted.
return;
- AsmPrinter::EmitGlobalVariable(GV);
+ AsmPrinter::emitGlobalVariable(GV);
}
-/// runOnMachineFunction - This uses the EmitInstruction()
+/// runOnMachineFunction - This uses the emitInstruction()
/// method to print assembly for each instruction.
///
bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
@@ -158,7 +168,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
// Emit the rest of the function body.
- EmitFunctionBody();
+ emitFunctionBody();
// Emit the XRay table for this function.
emitXRayTable();
@@ -167,10 +177,10 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// These are created per function, rather than per TU, since it's
// relatively easy to exceed the thumb branch range within a TU.
if (! ThumbIndirectPads.empty()) {
- OutStreamer->EmitAssemblerFlag(MCAF_Code16);
- EmitAlignment(Align(2));
+ OutStreamer->emitAssemblerFlag(MCAF_Code16);
+ emitAlignment(Align(2));
for (std::pair<unsigned, MCSymbol *> &TIP : ThumbIndirectPads) {
- OutStreamer->EmitLabel(TIP.second);
+ OutStreamer->emitLabel(TIP.second);
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBX)
.addReg(TIP.first)
// Add predicate operands.
@@ -467,14 +477,14 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
// the start mode, then restore the start mode.
const bool WasThumb = isThumb(StartInfo);
if (!EndInfo || WasThumb != isThumb(*EndInfo)) {
- OutStreamer->EmitAssemblerFlag(WasThumb ? MCAF_Code16 : MCAF_Code32);
+ OutStreamer->emitAssemblerFlag(WasThumb ? MCAF_Code16 : MCAF_Code32);
}
}
-void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
+void ARMAsmPrinter::emitStartOfAsmFile(Module &M) {
const Triple &TT = TM.getTargetTriple();
// Use unified assembler syntax.
- OutStreamer->EmitAssemblerFlag(MCAF_SyntaxUnified);
+ OutStreamer->emitAssemblerFlag(MCAF_SyntaxUnified);
// Emit ARM Build Attributes
if (TT.isOSBinFormatELF())
@@ -484,20 +494,20 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
// if we're thumb for the purposes of the top level code16 assembler
// flag.
if (!M.getModuleInlineAsm().empty() && TT.isThumb())
- OutStreamer->EmitAssemblerFlag(MCAF_Code16);
+ OutStreamer->emitAssemblerFlag(MCAF_Code16);
}
static void
emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
MachineModuleInfoImpl::StubValueTy &MCSym) {
// L_foo$stub:
- OutStreamer.EmitLabel(StubLabel);
+ OutStreamer.emitLabel(StubLabel);
// .indirect_symbol _foo
- OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+ OutStreamer.emitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/);
+ OutStreamer.emitIntValue(0, 4/*size*/);
else
// Internal to current translation unit.
//
@@ -505,13 +515,13 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
// pointers need to be indirect and pc-rel. We accomplish this by
// using NLPs; however, sometimes the types are local to the file.
// We need to fill in the value for the NLP in those cases.
- OutStreamer.EmitValue(
+ OutStreamer.emitValue(
MCSymbolRefExpr::create(MCSym.getPointer(), OutStreamer.getContext()),
4 /*size*/);
}
-void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
+void ARMAsmPrinter::emitEndOfAsmFile(Module &M) {
const Triple &TT = TM.getTargetTriple();
if (TT.isOSBinFormatMachO()) {
// All darwin targets use mach-o.
@@ -526,7 +536,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
- EmitAlignment(Align(4));
+ emitAlignment(Align(4));
for (auto &Stub : Stubs)
emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
@@ -539,7 +549,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
OutStreamer->SwitchSection(TLOFMacho.getThreadLocalPointerSection());
- EmitAlignment(Align(4));
+ emitAlignment(Align(4));
for (auto &Stub : Stubs)
emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
@@ -553,7 +563,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// implementation of multiple entry points). If this doesn't occur, the
// linker can safely perform dead code stripping. Since LLVM never
// generates code that does this, it is always safe to set.
- OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
// The last attribute to be emitted is ABI_optimization_goals
@@ -570,18 +580,28 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
}
//===----------------------------------------------------------------------===//
-// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+// Helper routines for emitStartOfAsmFile() and emitEndOfAsmFile()
// FIXME:
// The following seem like one-off assembler flags, but they actually need
// to appear in the .ARM.attributes section in ELF.
// Instead of subclassing the MCELFStreamer, we do the work here.
-// Returns true if all functions have the same function attribute value.
-// It also returns true when the module has no functions.
+ // Returns true if all functions have the same function attribute value.
+ // It also returns true when the module has no functions.
static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr,
StringRef Value) {
+ return !any_of(M, [&](const Function &F) {
+ return F.getFnAttribute(Attr).getValueAsString() != Value;
+ });
+}
+// Returns true if all functions have the same denormal mode.
+// It also returns true when the module has no functions.
+static bool checkDenormalAttributeConsistency(const Module &M,
+ StringRef Attr,
+ DenormalMode Value) {
return !any_of(M, [&](const Function &F) {
- return F.getFnAttribute(Attr).getValueAsString() != Value;
+ StringRef AttrVal = F.getFnAttribute(Attr).getValueAsString();
+ return parseDenormalFPAttribute(AttrVal) != Value;
});
}
@@ -606,11 +626,12 @@ void ARMAsmPrinter::emitAttributes() {
if (!ArchFS.empty())
ArchFS = (Twine(ArchFS) + "," + FS).str();
else
- ArchFS = FS;
+ ArchFS = std::string(FS);
}
const ARMBaseTargetMachine &ATM =
static_cast<const ARMBaseTargetMachine &>(TM);
- const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian());
+ const ARMSubtarget STI(TT, std::string(CPU), ArchFS, ATM,
+ ATM.isLittleEndian());
// Emit build attributes for the available hardware.
ATS.emitTargetAttributes(STI);
@@ -641,16 +662,13 @@ void ARMAsmPrinter::emitAttributes() {
}
// Set FP Denormals.
- if (checkFunctionsAttributeConsistency(*MMI->getModule(),
- "denormal-fp-math",
- "preserve-sign") ||
- TM.Options.FPDenormalMode == FPDenormal::PreserveSign)
+ if (checkDenormalAttributeConsistency(*MMI->getModule(), "denormal-fp-math",
+ DenormalMode::getPreserveSign()))
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::PreserveFPSign);
- else if (checkFunctionsAttributeConsistency(*MMI->getModule(),
- "denormal-fp-math",
- "positive-zero") ||
- TM.Options.FPDenormalMode == FPDenormal::PositiveZero)
+ else if (checkDenormalAttributeConsistency(*MMI->getModule(),
+ "denormal-fp-math",
+ DenormalMode::getPositiveZero()))
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::PositiveZero);
else if (!TM.Options.UnsafeFPMath)
@@ -855,8 +873,8 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
llvm_unreachable("unexpected target");
}
-void ARMAsmPrinter::
-EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+void ARMAsmPrinter::emitMachineConstantPoolValue(
+ MachineConstantPoolValue *MCPV) {
const DataLayout &DL = getDataLayout();
int Size = DL.getTypeAllocSize(MCPV->getType());
@@ -876,11 +894,11 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
for (const auto *GV : ACPC->promotedGlobals()) {
if (!EmittedPromotedGlobalLabels.count(GV)) {
MCSymbol *GVSym = getSymbol(GV);
- OutStreamer->EmitLabel(GVSym);
+ OutStreamer->emitLabel(GVSym);
EmittedPromotedGlobalLabels.insert(GV);
}
}
- return EmitGlobalConstant(DL, ACPC->getPromotedGlobalInit());
+ return emitGlobalConstant(DL, ACPC->getPromotedGlobalInit());
}
MCSymbol *MCSym;
@@ -925,29 +943,29 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
// We want "(<expr> - .)", but MC doesn't have a concept of the '.'
// label, so just emit a local label end reference that instead.
MCSymbol *DotSym = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(DotSym);
+ OutStreamer->emitLabel(DotSym);
const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
PCRelExpr = MCBinaryExpr::createSub(PCRelExpr, DotExpr, OutContext);
}
Expr = MCBinaryExpr::createSub(Expr, PCRelExpr, OutContext);
}
- OutStreamer->EmitValue(Expr, Size);
+ OutStreamer->emitValue(Expr, Size);
}
-void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) {
+void ARMAsmPrinter::emitJumpTableAddrs(const MachineInstr *MI) {
const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
// Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
// ARM mode tables.
- EmitAlignment(Align(4));
+ emitAlignment(Align(4));
// Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
- OutStreamer->EmitLabel(JTISymbol);
+ OutStreamer->emitLabel(JTISymbol);
// Mark the jump table as data-in-code.
- OutStreamer->EmitDataRegion(MCDR_DataRegionJT32);
+ OutStreamer->emitDataRegion(MCDR_DataRegionJT32);
// Emit each entry of the table.
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
@@ -974,23 +992,23 @@ void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) {
else if (AFI->isThumbFunction())
Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(1,OutContext),
OutContext);
- OutStreamer->EmitValue(Expr, 4);
+ OutStreamer->emitValue(Expr, 4);
}
// Mark the end of jump table data-in-code region.
- OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+ OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
}
-void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) {
+void ARMAsmPrinter::emitJumpTableInsts(const MachineInstr *MI) {
const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
// Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
// ARM mode tables.
- EmitAlignment(Align(4));
+ emitAlignment(Align(4));
// Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
- OutStreamer->EmitLabel(JTISymbol);
+ OutStreamer->emitLabel(JTISymbol);
// Emit each entry of the table.
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
@@ -1008,17 +1026,17 @@ void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) {
}
}
-void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
+void ARMAsmPrinter::emitJumpTableTBInst(const MachineInstr *MI,
unsigned OffsetWidth) {
assert((OffsetWidth == 1 || OffsetWidth == 2) && "invalid tbb/tbh width");
const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
if (Subtarget->isThumb1Only())
- EmitAlignment(Align(4));
+ emitAlignment(Align(4));
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
- OutStreamer->EmitLabel(JTISymbol);
+ OutStreamer->emitLabel(JTISymbol);
// Emit each entry of the table.
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
@@ -1026,7 +1044,7 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
// Mark the jump table as data-in-code.
- OutStreamer->EmitDataRegion(OffsetWidth == 1 ? MCDR_DataRegionJT8
+ OutStreamer->emitDataRegion(OffsetWidth == 1 ? MCDR_DataRegionJT8
: MCDR_DataRegionJT16);
for (auto MBB : JTBBs) {
@@ -1050,15 +1068,15 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
Expr = MCBinaryExpr::createSub(MBBSymbolExpr, Expr, OutContext);
Expr = MCBinaryExpr::createDiv(Expr, MCConstantExpr::create(2, OutContext),
OutContext);
- OutStreamer->EmitValue(Expr, OffsetWidth);
+ OutStreamer->emitValue(Expr, OffsetWidth);
}
// Mark the end of jump table data-in-code region. 32-bit offsets use
// actual branch instructions here, so we don't mark those as a data-region
// at all.
- OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+ OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
// Make sure the next instruction is 2-byte aligned.
- EmitAlignment(Align(2));
+ emitAlignment(Align(2));
}
void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
@@ -1076,16 +1094,26 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
unsigned Opc = MI->getOpcode();
unsigned SrcReg, DstReg;
- if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) {
- // Two special cases:
- // 1) tPUSH does not have src/dst regs.
- // 2) for Thumb1 code we sometimes materialize the constant via constpool
- // load. Yes, this is pretty fragile, but for now I don't see better
- // way... :(
+ switch (Opc) {
+ case ARM::tPUSH:
+ // special case: tPUSH does not have src/dst regs.
SrcReg = DstReg = ARM::SP;
- } else {
+ break;
+ case ARM::tLDRpci:
+ case ARM::t2MOVi16:
+ case ARM::t2MOVTi16:
+ // special cases:
+ // 1) for Thumb1 code we sometimes materialize the constant via constpool
+ // load.
+ // 2) for Thumb2 execute only code we materialize the constant via
+ // immediate constants in 2 separate instructions (MOVW/MOVT).
+ SrcReg = ~0U;
+ DstReg = MI->getOperand(0).getReg();
+ break;
+ default:
SrcReg = MI->getOperand(1).getReg();
DstReg = MI->getOperand(0).getReg();
+ break;
}
// Try to figure out the unwinding opcode out of src / dst regs.
@@ -1189,23 +1217,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
case ARM::tADDrSPi:
Offset = -MI->getOperand(2).getImm()*4;
break;
- case ARM::tLDRpci: {
- // Grab the constpool index and check, whether it corresponds to
- // original or cloned constpool entry.
- unsigned CPI = MI->getOperand(1).getIndex();
- const MachineConstantPool *MCP = MF.getConstantPool();
- if (CPI >= MCP->getConstants().size())
- CPI = AFI->getOriginalCPIdx(CPI);
- assert(CPI != -1U && "Invalid constpool index");
-
- // Derive the actual offset.
- const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
- assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
- // FIXME: Check for user, it should be "add" instruction!
- Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+ case ARM::tADDhirr:
+ Offset =
+ -AFI->EHPrologueOffsetInRegs.lookup(MI->getOperand(2).getReg());
break;
}
- }
if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) {
if (DstReg == FramePtr && FramePtr != ARM::SP)
@@ -1225,14 +1241,43 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
} else if (DstReg == ARM::SP) {
MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
- } else if (Opc == ARM::tMOVr) {
- // If a Thumb1 function spills r8-r11, we copy the values to low
- // registers before pushing them. Record the copy so we can emit the
- // correct ".save" later.
- AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
} else {
- MI->print(errs());
- llvm_unreachable("Unsupported opcode for unwinding information");
+ int64_t Offset = 0;
+ switch (Opc) {
+ case ARM::tMOVr:
+ // If a Thumb1 function spills r8-r11, we copy the values to low
+ // registers before pushing them. Record the copy so we can emit the
+ // correct ".save" later.
+ AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
+ break;
+ case ARM::tLDRpci: {
+ // Grab the constpool index and check, whether it corresponds to
+ // original or cloned constpool entry.
+ unsigned CPI = MI->getOperand(1).getIndex();
+ const MachineConstantPool *MCP = MF.getConstantPool();
+ if (CPI >= MCP->getConstants().size())
+ CPI = AFI->getOriginalCPIdx(CPI);
+ assert(CPI != -1U && "Invalid constpool index");
+
+ // Derive the actual offset.
+ const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+ assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
+ Offset = cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+ AFI->EHPrologueOffsetInRegs[DstReg] = Offset;
+ break;
+ }
+ case ARM::t2MOVi16:
+ Offset = MI->getOperand(1).getImm();
+ AFI->EHPrologueOffsetInRegs[DstReg] = Offset;
+ break;
+ case ARM::t2MOVTi16:
+ Offset = MI->getOperand(2).getImm();
+ AFI->EHPrologueOffsetInRegs[DstReg] |= (Offset << 16);
+ break;
+ default:
+ MI->print(errs());
+ llvm_unreachable("Unsupported opcode for unwinding information");
+ }
}
}
}
@@ -1241,7 +1286,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
// instructions) auto-generated.
#include "ARMGenMCPseudoLowering.inc"
-void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
const DataLayout &DL = getDataLayout();
MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
@@ -1252,7 +1297,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// If we just ended a constant pool, mark it as such.
if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
- OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+ OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
InConstantPool = false;
}
@@ -1513,7 +1558,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// This is a pseudo op for a label used by a branch future instruction
// Emit the label.
- OutStreamer->EmitLabel(getBFLabel(DL.getPrivateGlobalPrefix(),
+ OutStreamer->emitLabel(getBFLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
MI->getOperand(0).getIndex(), OutContext));
return;
@@ -1525,7 +1570,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// This adds the address of LPC0 to r0.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
+ OutStreamer->emitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
MI->getOperand(2).getImm(), OutContext));
@@ -1546,7 +1591,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// This adds the address of LPC0 to r0.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
+ OutStreamer->emitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
MI->getOperand(2).getImm(), OutContext));
@@ -1577,7 +1622,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// a PC-relative address at the ldr instruction.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
+ OutStreamer->emitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
MI->getOperand(2).getImm(), OutContext));
@@ -1620,28 +1665,28 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// If this is the first entry of the pool, mark it.
if (!InConstantPool) {
- OutStreamer->EmitDataRegion(MCDR_DataRegion);
+ OutStreamer->emitDataRegion(MCDR_DataRegion);
InConstantPool = true;
}
- OutStreamer->EmitLabel(GetCPISymbol(LabelId));
+ OutStreamer->emitLabel(GetCPISymbol(LabelId));
const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
if (MCPE.isMachineConstantPoolEntry())
- EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ emitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
- EmitGlobalConstant(DL, MCPE.Val.ConstVal);
+ emitGlobalConstant(DL, MCPE.Val.ConstVal);
return;
}
case ARM::JUMPTABLE_ADDRS:
- EmitJumpTableAddrs(MI);
+ emitJumpTableAddrs(MI);
return;
case ARM::JUMPTABLE_INSTS:
- EmitJumpTableInsts(MI);
+ emitJumpTableInsts(MI);
return;
case ARM::JUMPTABLE_TBB:
case ARM::JUMPTABLE_TBH:
- EmitJumpTableTBInst(MI, MI->getOpcode() == ARM::JUMPTABLE_TBB ? 1 : 2);
+ emitJumpTableTBInst(MI, MI->getOpcode() == ARM::JUMPTABLE_TBB ? 1 : 2);
return;
case ARM::t2BR_JT: {
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
@@ -1656,7 +1701,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case ARM::t2TBH_JT: {
unsigned Opc = MI->getOpcode() == ARM::t2TBB_JT ? ARM::t2TBB : ARM::t2TBH;
// Lower and emit the PC label, then the instruction itself.
- OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
+ OutStreamer->emitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
@@ -1698,7 +1743,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// FIXME: Ideally we could vary the LDRB index based on the padding
// between the sequence and jump table, however that relies on MCExprs
// for load indexes which are currently not supported.
- OutStreamer->EmitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4);
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
.addReg(Idx)
.addReg(Idx)
@@ -1740,7 +1785,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addImm(ARMCC::AL)
.addReg(0));
- OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
+ OutStreamer->emitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
.addReg(ARM::PC)
.addReg(ARM::PC)
@@ -1809,7 +1854,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case ARM::SPACE:
- OutStreamer->EmitZeros(MI->getOperand(1).getImm());
+ OutStreamer->emitZeros(MI->getOperand(1).getImm());
return;
case ARM::TRAP: {
// Non-Darwin binutils don't yet support the "trap" mnemonic.
@@ -1904,7 +1949,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addImm(ARMCC::AL)
.addReg(0));
- OutStreamer->EmitLabel(Label);
+ OutStreamer->emitLabel(Label);
return;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.h
index a4b37fa2331f..f8ff047a1d06 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -84,21 +84,21 @@ public:
void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
const MCSubtargetInfo *EndInfo) const override;
- void EmitJumpTableAddrs(const MachineInstr *MI);
- void EmitJumpTableInsts(const MachineInstr *MI);
- void EmitJumpTableTBInst(const MachineInstr *MI, unsigned OffsetWidth);
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitJumpTableAddrs(const MachineInstr *MI);
+ void emitJumpTableInsts(const MachineInstr *MI);
+ void emitJumpTableTBInst(const MachineInstr *MI, unsigned OffsetWidth);
+ void emitInstruction(const MachineInstr *MI) override;
bool runOnMachineFunction(MachineFunction &F) override;
- void EmitConstantPool() override {
+ void emitConstantPool() override {
// we emit constant pools customly!
}
- void EmitFunctionBodyEnd() override;
- void EmitFunctionEntryLabel() override;
- void EmitStartOfAsmFile(Module &M) override;
- void EmitEndOfAsmFile(Module &M) override;
- void EmitXXStructor(const DataLayout &DL, const Constant *CV) override;
- void EmitGlobalVariable(const GlobalVariable *GV) override;
+ void emitFunctionBodyEnd() override;
+ void emitFunctionEntryLabel() override;
+ void emitStartOfAsmFile(Module &M) override;
+ void emitEndOfAsmFile(Module &M) override;
+ void emitXXStructor(const DataLayout &DL, const Constant *CV) override;
+ void emitGlobalVariable(const GlobalVariable *GV) override;
MCSymbol *GetCPISymbol(unsigned CPID) const override;
@@ -117,7 +117,7 @@ public:
private:
void EmitSled(const MachineInstr &MI, SledKind Kind);
- // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+ // Helpers for emitStartOfAsmFile() and emitEndOfAsmFile()
void emitAttributes();
// Generic helper used to emit e.g. ARMv5 mul pseudos
@@ -150,7 +150,7 @@ private:
public:
/// EmitMachineConstantPoolValue - Print a machine constantpool value to
/// the .s file.
- void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
+ void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 48f781510254..4cc2b6bf7e7e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
@@ -495,6 +496,31 @@ bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
}
+std::string ARMBaseInstrInfo::createMIROperandComment(
+ const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
+ const TargetRegisterInfo *TRI) const {
+
+ // First, let's see if there is a generic comment for this operand
+ std::string GenericComment =
+ TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
+ if (!GenericComment.empty())
+ return GenericComment;
+
+ // If not, check if we have an immediate operand.
+ if (Op.getType() != MachineOperand::MO_Immediate)
+ return std::string();
+
+ // And print its corresponding condition code if the immediate is a
+ // predicate.
+ int FirstPredOp = MI.findFirstPredOperandIdx();
+ if (FirstPredOp != (int) OpIdx)
+ return std::string();
+
+ std::string CC = "CC::";
+ CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());
+ return CC;
+}
+
bool ARMBaseInstrInfo::PredicateInstruction(
MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
unsigned Opc = MI.getOpcode();
@@ -811,7 +837,7 @@ void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
}
void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
- unsigned DestReg) {
+ Register DestReg) {
addUnpredicatedMveVpredNOp(MIB);
MIB.addReg(DestReg, RegState::Undef);
}
@@ -1009,6 +1035,36 @@ ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
}
+Optional<ParamLoadedValue>
+ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const {
+ if (auto DstSrcPair = isCopyInstrImpl(MI)) {
+ Register DstReg = DstSrcPair->Destination->getReg();
+
+ // TODO: We don't handle cases where the forwarding reg is narrower/wider
+ // than the copy registers. Consider for example:
+ //
+ // s16 = VMOVS s0
+ // s17 = VMOVS s1
+ // call @callee(d0)
+ //
+ // We'd like to describe the call site value of d0 as d8, but this requires
+ // gathering and merging the descriptions for the two VMOVS instructions.
+ //
+ // We also don't handle the reverse situation, where the forwarding reg is
+ // narrower than the copy destination:
+ //
+ // d8 = VMOVD d0
+ // call @callee(s1)
+ //
+ // We need to produce a fragment description (the call site value of s1 is
+ // /not/ just d8).
+ if (DstReg != Reg)
+ return None;
+ }
+ return TargetInstrInfo::describeLoadedValue(MI, Reg);
+}
+
const MachineInstrBuilder &
ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
unsigned SubIdx, unsigned State,
@@ -1023,16 +1079,16 @@ ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
void ARMBaseInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
+ Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
+ Align Alignment = MFI.getObjectAlign(FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
- MFI.getObjectSize(FI), Align);
+ MFI.getObjectSize(FI), Alignment);
switch (TRI->getSpillSize(*RC)) {
case 2:
@@ -1102,7 +1158,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 16:
if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
// Use aligned spills if the stack can be realigned.
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
.addFrameIndex(FI)
.addImm(16)
@@ -1130,7 +1186,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
Subtarget.hasNEON()) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))
.addFrameIndex(FI)
@@ -1153,7 +1209,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
Subtarget.hasNEON()) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
@@ -1264,17 +1320,17 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
void ARMBaseInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
+ Register DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
+ const Align Alignment = MFI.getObjectAlign(FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI), Align);
+ MFI.getObjectSize(FI), Alignment);
switch (TRI->getSpillSize(*RC)) {
case 2:
@@ -1343,7 +1399,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 16:
if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
.addFrameIndex(FI)
.addImm(16)
@@ -1367,7 +1423,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
Subtarget.hasNEON()) {
BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
.addFrameIndex(FI)
@@ -1390,7 +1446,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
Subtarget.hasNEON()) {
BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI)
@@ -1682,13 +1738,13 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
- CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+ CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());
return PCLabelId;
}
void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SubIdx,
+ Register DestReg, unsigned SubIdx,
const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const {
unsigned Opcode = Orig.getOpcode();
@@ -1959,6 +2015,10 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
if (MI.isTerminator() || MI.isPosition())
return true;
+ // INLINEASM_BR can jump to another block
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+ return true;
+
// Treat the start of the IT block as a scheduling boundary, but schedule
// t2IT along with all instructions following it.
// FIXME: This is a big hammer. But the alternative is to add all potential
@@ -2120,7 +2180,7 @@ ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
- unsigned &PredReg) {
+ Register &PredReg) {
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx == -1) {
PredReg = 0;
@@ -2150,7 +2210,7 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
case ARM::MOVCCr:
case ARM::t2MOVCCr: {
// MOVCC can be commuted by inverting the condition.
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
@@ -2171,9 +2231,9 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
/// Identify instructions that can be folded into a MOVCC instruction, and
/// return the defining instruction.
MachineInstr *
-ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
+ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) const {
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
return nullptr;
@@ -2353,9 +2413,9 @@ unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg,
- unsigned BaseReg, int NumBytes,
- ARMCC::CondCodes Pred, unsigned PredReg,
+ const DebugLoc &dl, Register DestReg,
+ Register BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, Register PredReg,
const ARMBaseInstrInfo &TII,
unsigned MIFlags) {
if (NumBytes == 0 && DestReg != BaseReg) {
@@ -2515,7 +2575,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
}
bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
+ Register FrameReg, int &Offset,
const ARMBaseInstrInfo &TII) {
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MI.getDesc();
@@ -2671,8 +2731,8 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
-bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
+bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &CmpMask,
int &CmpValue) const {
switch (MI.getOpcode()) {
default: break;
@@ -2708,7 +2768,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
/// operates on the given source register and applies the same mask
/// as a 'tst' instruction. Provide a limited look-through for copies.
/// When successful, MI will hold the found instruction.
-static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg,
int CmpMask, bool CommonUse) {
switch (MI->getOpcode()) {
case ARM::ANDri:
@@ -2743,7 +2803,7 @@ inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).
/// This function can be extended later on.
inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
- unsigned SrcReg, unsigned SrcReg2,
+ Register SrcReg, Register SrcReg2,
int ImmValue, const MachineInstr *OI,
bool &IsThumb1) {
if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
@@ -2879,7 +2939,7 @@ static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
/// condition code of instructions which use the flags.
bool ARMBaseInstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
int CmpValue, const MachineRegisterInfo *MRI) const {
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
@@ -3166,7 +3226,7 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
return true;
MachineBasicBlock::const_iterator Next = &MI;
++Next;
- unsigned SrcReg, SrcReg2;
+ Register SrcReg, SrcReg2;
int CmpMask, CmpValue;
bool IsThumb1;
if (Next != MI.getParent()->end() &&
@@ -3177,7 +3237,7 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
}
bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
- unsigned Reg,
+ Register Reg,
MachineRegisterInfo *MRI) const {
// Fold large immediates into add, sub, or, xor.
unsigned DefOpc = DefMI.getOpcode();
@@ -3729,7 +3789,7 @@ unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
if ((NumRegs % 2) || !MI.hasOneMemOperand() ||
- (*MI.memoperands_begin())->getAlignment() < 8)
+ (*MI.memoperands_begin())->getAlign() < Align(8))
++UOps;
return UOps;
}
@@ -4316,10 +4376,10 @@ int ARMBaseInstrInfo::getOperandLatencyImpl(
return -1;
unsigned DefAlign = DefMI.hasOneMemOperand()
- ? (*DefMI.memoperands_begin())->getAlignment()
+ ? (*DefMI.memoperands_begin())->getAlign().value()
: 0;
unsigned UseAlign = UseMI.hasOneMemOperand()
- ? (*UseMI.memoperands_begin())->getAlignment()
+ ? (*UseMI.memoperands_begin())->getAlign().value()
: 0;
// Get the itinerary's latency if possible, and handle variable_ops.
@@ -4366,10 +4426,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
auto *DefMN = cast<MachineSDNode>(DefNode);
unsigned DefAlign = !DefMN->memoperands_empty()
- ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+ ? (*DefMN->memoperands_begin())->getAlign().value()
+ : 0;
auto *UseMN = cast<MachineSDNode>(UseNode);
unsigned UseAlign = !UseMN->memoperands_empty()
- ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+ ? (*UseMN->memoperands_begin())->getAlign().value()
+ : 0;
int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
UseMCID, UseIdx, UseAlign);
@@ -4660,7 +4722,7 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
// Adjust for dynamic def-side opcode variants not captured by the itinerary.
unsigned DefAlign =
- MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlignment() : 0;
+ MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;
int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);
if (Adj >= 0 || (int)Latency > -Adj) {
return Latency + Adj;
@@ -4782,7 +4844,7 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
}
@@ -5353,7 +5415,8 @@ Optional<RegImmPair> ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI,
// TODO: Handle cases where Reg is a super- or sub-register of the
// destination register.
- if (Reg != MI.getOperand(0).getReg())
+ const MachineOperand &Op0 = MI.getOperand(0);
+ if (!Op0.isReg() || Reg != Op0.getReg())
return None;
// We describe SUBri or ADDri instructions.
@@ -5365,8 +5428,7 @@ Optional<RegImmPair> ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI,
// TODO: Third operand can be global address (usually some string). Since
// strings can be relocated we cannot calculate their offsets for
// now.
- if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
- !MI.getOperand(2).isImm())
+ if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
return None;
Offset = MI.getOperand(2).getImm() * Sign;
@@ -5402,7 +5464,7 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,
if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
return nullptr;
Register Reg = CmpMI->getOperand(0).getReg();
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
return nullptr;
@@ -5460,3 +5522,521 @@ bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
}
+
+/// Constants defining how certain sequences should be outlined.
+/// This encompasses how an outlined function should be called, and what kind of
+/// frame should be emitted for that outlined function.
+///
+/// \p MachineOutlinerTailCall implies that the function is being created from
+/// a sequence of instructions ending in a return.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> B OUTLINED_FUNCTION I1
+/// BX LR I2
+/// BX LR
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 4 | 4 |
+/// | Frame overhead in Bytes | 0 | 0 |
+/// | Stack fixup required | No | No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerThunk implies that the function is being created from
+/// a sequence of instructions ending in a call. The outlined function is
+/// called with a BL instruction, and the outlined function tail-calls the
+/// original call destination.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// BL f I2
+/// B f
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 4 | 4 |
+/// | Frame overhead in Bytes | 0 | 0 |
+/// | Stack fixup required | No | No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerNoLRSave implies that the function should be called using
+/// a BL instruction, but doesn't require LR to be saved and restored. This
+/// happens when LR is known to be dead.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 I2
+/// I3
+/// BX LR
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 4 | 4 |
+/// | Frame overhead in Bytes | 4 | 4 |
+/// | Stack fixup required | No | No |
+/// +-------------------------+--------+-----+
+///
+/// \p MachineOutlinerRegSave implies that the function should be called with a
+/// save and restore of LR to an available register. This allows us to avoid
+/// stack fixups. Note that this outlining variant is compatible with the
+/// NoLRSave case.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// BX LR
+///
+/// +-------------------------+--------+-----+
+/// | | Thumb2 | ARM |
+/// +-------------------------+--------+-----+
+/// | Call overhead in Bytes | 8 | 12 |
+/// | Frame overhead in Bytes | 2 | 4 |
+/// | Stack fixup required | No | No |
+/// +-------------------------+--------+-----+
+
+enum MachineOutlinerClass {
+ MachineOutlinerTailCall,
+ MachineOutlinerThunk,
+ MachineOutlinerNoLRSave,
+ MachineOutlinerRegSave
+};
+
+enum MachineOutlinerMBBFlags {
+ LRUnavailableSomewhere = 0x2,
+ HasCalls = 0x4,
+ UnsafeRegsDead = 0x8
+};
+
+struct OutlinerCosts {
+ const int CallTailCall;
+ const int FrameTailCall;
+ const int CallThunk;
+ const int FrameThunk;
+ const int CallNoLRSave;
+ const int FrameNoLRSave;
+ const int CallRegSave;
+ const int FrameRegSave;
+
+ OutlinerCosts(const ARMSubtarget &target)
+ : CallTailCall(target.isThumb() ? 4 : 4),
+ FrameTailCall(target.isThumb() ? 0 : 0),
+ CallThunk(target.isThumb() ? 4 : 4),
+ FrameThunk(target.isThumb() ? 0 : 0),
+ CallNoLRSave(target.isThumb() ? 4 : 4),
+ FrameNoLRSave(target.isThumb() ? 4 : 4),
+ CallRegSave(target.isThumb() ? 8 : 12),
+ FrameRegSave(target.isThumb() ? 2 : 4) {}
+};
+
+unsigned
+ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
+ assert(C.LRUWasSet && "LRU wasn't set?");
+ MachineFunction *MF = C.getMF();
+ const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
+
+ BitVector regsReserved = ARI->getReservedRegs(*MF);
+ // Check if there is an available register across the sequence that we can
+ // use.
+ for (unsigned Reg : ARM::rGPRRegClass) {
+ if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
+ Reg != ARM::LR && // LR is not reserved, but don't use it.
+ Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
+ C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
+ return Reg;
+ }
+
+ // No suitable register. Return 0.
+ return 0u;
+}
+
+outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
+ unsigned SequenceSize =
+ std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
+ [this](unsigned Sum, const MachineInstr &MI) {
+ return Sum + getInstSizeInBytes(MI);
+ });
+
+ // Properties about candidate MBBs that hold for all of them.
+ unsigned FlagsSetInAll = 0xF;
+
+ // Compute liveness information for each candidate, and set FlagsSetInAll.
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+ std::for_each(
+ RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
+ [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; });
+
+ // According to the ARM Procedure Call Standard, the following are
+ // undefined on entry/exit from a function call:
+ //
+ // * Register R12(IP),
+ // * Condition codes (and thus the CPSR register)
+ //
+ // Since we control the instructions which are part of the outlined regions
+ // we don't need to be fully compliant with the AAPCS, but we have to
+ // guarantee that if a veneer is inserted at link time the code is still
+ // correct. Because of this, we can't outline any sequence of instructions
+ // where one of these registers is live into/across it. Thus, we need to
+ // delete those candidates.
+ auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
+ // If the unsafe registers in this block are all dead, then we don't need
+ // to compute liveness here.
+ if (C.Flags & UnsafeRegsDead)
+ return false;
+ C.initLRU(TRI);
+ LiveRegUnits LRU = C.LRU;
+ return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR));
+ };
+
+ // Are there any candidates where those registers are live?
+ if (!(FlagsSetInAll & UnsafeRegsDead)) {
+ // Erase every candidate that violates the restrictions above. (It could be
+ // true that we have viable candidates, so it's not worth bailing out in
+ // the case that, say, 1 out of 20 candidates violate the restructions.)
+ RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
+ RepeatedSequenceLocs.end(),
+ CantGuaranteeValueAcrossCall),
+ RepeatedSequenceLocs.end());
+
+ // If the sequence doesn't have enough candidates left, then we're done.
+ if (RepeatedSequenceLocs.size() < 2)
+ return outliner::OutlinedFunction();
+ }
+
+ // At this point, we have only "safe" candidates to outline. Figure out
+ // frame + call instruction information.
+
+ unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
+
+ // Helper lambda which sets call information for every candidate.
+ auto SetCandidateCallInfo =
+ [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
+ for (outliner::Candidate &C : RepeatedSequenceLocs)
+ C.setCallInfo(CallID, NumBytesForCall);
+ };
+
+ OutlinerCosts Costs(Subtarget);
+ unsigned FrameID = 0;
+ unsigned NumBytesToCreateFrame = 0;
+
+ // If the last instruction in any candidate is a terminator, then we should
+ // tail call all of the candidates.
+ if (RepeatedSequenceLocs[0].back()->isTerminator()) {
+ FrameID = MachineOutlinerTailCall;
+ NumBytesToCreateFrame = Costs.FrameTailCall;
+ SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
+ } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
+ LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr ||
+ LastInstrOpcode == ARM::tBLXi) {
+ FrameID = MachineOutlinerThunk;
+ NumBytesToCreateFrame = Costs.FrameThunk;
+ SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
+ } else {
+ // We need to decide how to emit calls + frames. We can always emit the same
+ // frame if we don't need to save to the stack.
+ unsigned NumBytesNoStackCalls = 0;
+ std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
+
+ for (outliner::Candidate &C : RepeatedSequenceLocs) {
+ C.initLRU(TRI);
+
+ // Is LR available? If so, we don't need a save.
+ if (C.LRU.available(ARM::LR)) {
+ FrameID = MachineOutlinerNoLRSave;
+ NumBytesNoStackCalls += Costs.CallNoLRSave;
+ C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+
+ // Is an unused register available? If so, we won't modify the stack, so
+ // we can outline with the same frame type as those that don't save LR.
+ else if (findRegisterToSaveLRTo(C)) {
+ FrameID = MachineOutlinerRegSave;
+ NumBytesNoStackCalls += Costs.CallRegSave;
+ C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
+ CandidatesWithoutStackFixups.push_back(C);
+ }
+ }
+
+ if (!CandidatesWithoutStackFixups.empty()) {
+ RepeatedSequenceLocs = CandidatesWithoutStackFixups;
+ } else
+ return outliner::OutlinedFunction();
+ }
+
+ return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
+ NumBytesToCreateFrame, FrameID);
+}
+
+bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
+ MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
+ const Function &F = MF.getFunction();
+
+ // Can F be deduplicated by the linker? If it can, don't outline from it.
+ if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
+ return false;
+
+ // Don't outline from functions with section markings; the program could
+ // expect that all the code is in the named section.
+ // FIXME: Allow outlining from multiple functions with the same section
+ // marking.
+ if (F.hasSection())
+ return false;
+
+ // FIXME: Thumb1 outlining is not handled
+ if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction())
+ return false;
+
+ // It's safe to outline from MF.
+ return true;
+}
+
+bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const {
+ // Check if LR is available through all of the MBB. If it's not, then set
+ // a flag.
+ assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
+ "Suitable Machine Function for outlining must track liveness");
+
+ LiveRegUnits LRU(getRegisterInfo());
+
+ std::for_each(MBB.rbegin(), MBB.rend(),
+ [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
+
+ // Check if each of the unsafe registers are available...
+ bool R12AvailableInBlock = LRU.available(ARM::R12);
+ bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
+
+ // If all of these are dead (and not live out), we know we don't have to check
+ // them later.
+ if (R12AvailableInBlock && CPSRAvailableInBlock)
+ Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
+
+ // Now, add the live outs to the set.
+ LRU.addLiveOuts(MBB);
+
+ // If any of these registers is available in the MBB, but also a live out of
+ // the block, then we know outlining is unsafe.
+ if (R12AvailableInBlock && !LRU.available(ARM::R12))
+ return false;
+ if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
+ return false;
+
+ // Check if there's a call inside this MachineBasicBlock. If there is, then
+ // set a flag.
+ if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
+ Flags |= MachineOutlinerMBBFlags::HasCalls;
+
+ if (!LRU.available(ARM::LR))
+ Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
+
+ return true;
+}
+
+outliner::InstrType
+ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
+ unsigned Flags) const {
+ MachineInstr &MI = *MIT;
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ // Be conservative with inline ASM
+ if (MI.isInlineAsm())
+ return outliner::InstrType::Illegal;
+
+ // Don't allow debug values to impact outlining type.
+ if (MI.isDebugInstr() || MI.isIndirectDebugValue())
+ return outliner::InstrType::Invisible;
+
+ // At this point, KILL or IMPLICIT_DEF instructions don't really tell us much
+ // so we can go ahead and skip over them.
+ if (MI.isKill() || MI.isImplicitDef())
+ return outliner::InstrType::Invisible;
+
+ // PIC instructions contain labels, outlining them would break offset
+ // computing. unsigned Opc = MI.getOpcode();
+ unsigned Opc = MI.getOpcode();
+ if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
+ Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
+ Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
+ Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
+ Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
+ Opc == ARM::t2MOV_ga_pcrel)
+ return outliner::InstrType::Illegal;
+
+ // Be conservative with ARMv8.1 MVE instructions.
+ if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
+ Opc == ARM::t2WhileLoopStart || Opc == ARM::t2LoopDec ||
+ Opc == ARM::t2LoopEnd)
+ return outliner::InstrType::Illegal;
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ uint64_t MIFlags = MCID.TSFlags;
+ if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
+ return outliner::InstrType::Illegal;
+
+ // Is this a terminator for a basic block?
+ if (MI.isTerminator()) {
+ // Don't outline if the branch is not unconditional.
+ if (isPredicated(MI))
+ return outliner::InstrType::Illegal;
+
+ // Is this the end of a function?
+ if (MI.getParent()->succ_empty())
+ return outliner::InstrType::Legal;
+
+ // It's not, so don't outline it.
+ return outliner::InstrType::Illegal;
+ }
+
+ // Make sure none of the operands are un-outlinable.
+ for (const MachineOperand &MOP : MI.operands()) {
+ if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
+ MOP.isTargetIndex())
+ return outliner::InstrType::Illegal;
+ }
+
+ // Don't outline if link register or program counter value are used.
+ if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
+ return outliner::InstrType::Illegal;
+
+ if (MI.isCall()) {
+ // If we don't know anything about the callee, assume it depends on the
+ // stack layout of the caller. In that case, it's only legal to outline
+ // as a tail-call. Explicitly list the call instructions we know about so
+ // we don't get unexpected results with call pseudo-instructions.
+ auto UnknownCallOutlineType = outliner::InstrType::Illegal;
+ if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
+ Opc == ARM::tBLXr || Opc == ARM::tBLXi)
+ UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
+
+ return UnknownCallOutlineType;
+ }
+
+ // Since calls are handled, don't touch LR or PC
+ if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
+ return outliner::InstrType::Illegal;
+
+ // Does this use the stack?
+ if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {
+ // True if there is no chance that any outlined candidate from this range
+ // could require stack fixups. That is, both
+ // * LR is available in the range (No save/restore around call)
+ // * The range doesn't include calls (No save/restore in outlined frame)
+ // are true.
+ // FIXME: This is very restrictive; the flags check the whole block,
+ // not just the bit we will try to outline.
+ bool MightNeedStackFixUp =
+ (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
+ MachineOutlinerMBBFlags::HasCalls));
+
+ if (!MightNeedStackFixUp)
+ return outliner::InstrType::Legal;
+
+ return outliner::InstrType::Illegal;
+ }
+
+ // Be conservative with IT blocks.
+ if (MI.readsRegister(ARM::ITSTATE, TRI) ||
+ MI.modifiesRegister(ARM::ITSTATE, TRI))
+ return outliner::InstrType::Illegal;
+
+ // Don't outline positions.
+ if (MI.isPosition())
+ return outliner::InstrType::Illegal;
+
+ return outliner::InstrType::Legal;
+}
+
+void ARMBaseInstrInfo::buildOutlinedFrame(
+ MachineBasicBlock &MBB, MachineFunction &MF,
+ const outliner::OutlinedFunction &OF) const {
+ // Nothing is needed for tail-calls.
+ if (OF.FrameConstructionID == MachineOutlinerTailCall)
+ return;
+
+ // For thunk outlining, rewrite the last instruction from a call to a
+ // tail-call.
+ if (OF.FrameConstructionID == MachineOutlinerThunk) {
+ MachineInstr *Call = &*--MBB.instr_end();
+ bool isThumb = Subtarget.isThumb();
+ unsigned FuncOp = isThumb ? 2 : 0;
+ unsigned Opc = Call->getOperand(FuncOp).isReg()
+ ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
+ : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
+ : ARM::tTAILJMPdND
+ : ARM::TAILJMPd;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
+ .add(Call->getOperand(FuncOp));
+ if (isThumb && !Call->getOperand(FuncOp).isReg())
+ MIB.add(predOps(ARMCC::AL));
+ Call->eraseFromParent();
+ return;
+ }
+
+ // Here we have to insert the return ourselves. Get the correct opcode from
+ // current feature set.
+ BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
+ .add(predOps(ARMCC::AL));
+}
+
+MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
+ Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
+ MachineFunction &MF, const outliner::Candidate &C) const {
+ MachineInstrBuilder MIB;
+ MachineBasicBlock::iterator CallPt;
+ unsigned Opc;
+ bool isThumb = Subtarget.isThumb();
+
+ // Are we tail calling?
+ if (C.CallConstructionID == MachineOutlinerTailCall) {
+ // If yes, then we can just branch to the label.
+ Opc = isThumb
+ ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
+ : ARM::TAILJMPd;
+ MIB = BuildMI(MF, DebugLoc(), get(Opc))
+ .addGlobalAddress(M.getNamedValue(MF.getName()));
+ if (isThumb)
+ MIB.add(predOps(ARMCC::AL));
+ It = MBB.insert(It, MIB);
+ return It;
+ }
+
+ // Create the call instruction.
+ Opc = isThumb ? ARM::tBL : ARM::BL;
+ MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
+ if (isThumb)
+ CallMIB.add(predOps(ARMCC::AL));
+ CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
+
+ // Can we save to a register?
+ if (C.CallConstructionID == MachineOutlinerRegSave) {
+ unsigned Reg = findRegisterToSaveLRTo(C);
+ assert(Reg != 0 && "No callee-saved register available?");
+
+ // Save and restore LR from that register.
+ if (!MBB.isLiveIn(ARM::LR))
+ MBB.addLiveIn(ARM::LR);
+ copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
+ CallPt = MBB.insert(It, CallMIB);
+ copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
+ It--;
+ return CallPt;
+ }
+ // Insert the call.
+ It = MBB.insert(It, CallMIB);
+ return It;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index f6d4ebe3a090..1a75b011ca59 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -21,6 +21,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include <array>
#include <cstdint>
@@ -105,6 +107,11 @@ protected:
Optional<DestSourcePair>
isCopyInstrImpl(const MachineInstr &MI) const override;
+ /// Specialization of \ref TargetInstrInfo::describeLoadedValue, used to
+ /// enhance debug entry value descriptions for ARM targets.
+ Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const override;
+
public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
@@ -146,6 +153,12 @@ public:
// Predication support.
bool isPredicated(const MachineInstr &MI) const override;
+ // MIR printer helper function to annotate Operands with a comment.
+ std::string
+ createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op,
+ unsigned OpIdx,
+ const TargetRegisterInfo *TRI) const override;
+
ARMCC::CondCodes getPredicate(const MachineInstr &MI) const {
int PIdx = MI.findFirstPredOperandIdx();
return PIdx != -1 ? (ARMCC::CondCodes)MI.getOperand(PIdx).getImm()
@@ -207,13 +220,13 @@ public:
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
@@ -222,7 +235,7 @@ public:
bool shouldSink(const MachineInstr &MI) const override;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SubIdx,
+ Register DestReg, unsigned SubIdx,
const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const override;
@@ -286,16 +299,16 @@ public:
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
- bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
+ bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &CmpMask,
int &CmpValue) const override;
/// optimizeCompareInstr - Convert the instruction to set the zero flag so
/// that we can remove a "comparison with zero"; Remove a redundant CMP
/// instruction if the flags can be updated in the same way by an earlier
/// instruction such as SUB.
- bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int CmpMask, int CmpValue,
+ bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const override;
bool analyzeSelect(const MachineInstr &MI,
@@ -308,7 +321,7 @@ public:
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
/// instruction, try to fold the immediate into the use instruction.
- bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const override;
unsigned getNumMicroOps(const InstrItineraryData *ItinData,
@@ -343,7 +356,27 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const override;
+ /// ARM supports the MachineOutliner.
+ bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
+ bool OutlineFromLinkOnceODRs) const override;
+ outliner::OutlinedFunction getOutliningCandidateInfo(
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+ outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT,
+ unsigned Flags) const override;
+ bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
+ unsigned &Flags) const override;
+ void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
+ const outliner::OutlinedFunction &OF) const override;
+ MachineBasicBlock::iterator
+ insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &It, MachineFunction &MF,
+ const outliner::Candidate &C) const override;
+
private:
+ /// Returns an unused general-purpose register which can be used for
+ /// constructing an outlined call if one exists. Returns 0 otherwise.
+ unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
+
unsigned getInstBundleLength(const MachineInstr &MI) const;
int getVLDMDefCycle(const InstrItineraryData *ItinData,
@@ -403,7 +436,7 @@ private:
/// Identify instructions that can be folded into a MOVCC instruction, and
/// return the defining instruction.
- MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
+ MachineInstr *canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) const;
private:
@@ -491,24 +524,6 @@ bool isUncondBranchOpcode(int Opc) {
// This table shows the VPT instruction variants, i.e. the different
// mask field encodings, see also B5.6. Predication/conditional execution in
// the ArmARM.
-enum VPTMaskValue {
- T = 8, // 0b1000
- TT = 4, // 0b0100
- TE = 12, // 0b1100
- TTT = 2, // 0b0010
- TTE = 6, // 0b0110
- TEE = 10, // 0b1010
- TET = 14, // 0b1110
- TTTT = 1, // 0b0001
- TTTE = 3, // 0b0011
- TTEE = 5, // 0b0101
- TTET = 7, // 0b0111
- TEEE = 9, // 0b1001
- TEET = 11, // 0b1011
- TETT = 13, // 0b1101
- TETE = 15 // 0b1111
-};
-
static inline bool isVPTOpcode(int Opc) {
return Opc == ARM::MVE_VPTv16i8 || Opc == ARM::MVE_VPTv16u8 ||
Opc == ARM::MVE_VPTv16s8 || Opc == ARM::MVE_VPTv8i16 ||
@@ -595,6 +610,18 @@ unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
return 0;
}
+static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("unhandled vctp opcode");
+ case ARM::MVE_VCTP8: return 16;
+ case ARM::MVE_VCTP16: return 8;
+ case ARM::MVE_VCTP32: return 4;
+ case ARM::MVE_VCTP64: return 2;
+ }
+ return 0;
+}
+
static inline
bool isVCTP(MachineInstr *MI) {
switch (MI->getOpcode()) {
@@ -642,20 +669,31 @@ static inline bool isPushOpcode(int Opc) {
Opc == ARM::STMDB_UPD || Opc == ARM::VSTMDDB_UPD;
}
+static inline bool isSubImmOpcode(int Opc) {
+ return Opc == ARM::SUBri ||
+ Opc == ARM::tSUBi3 || Opc == ARM::tSUBi8 ||
+ Opc == ARM::tSUBSi3 || Opc == ARM::tSUBSi8 ||
+ Opc == ARM::t2SUBri || Opc == ARM::t2SUBri12 || Opc == ARM::t2SUBSri;
+}
+
+static inline bool isMovRegOpcode(int Opc) {
+ return Opc == ARM::MOVr || Opc == ARM::tMOVr || Opc == ARM::t2MOVr;
+}
/// isValidCoprocessorNumber - decide whether an explicit coprocessor
/// number is legal in generic instructions like CDP. The answer can
/// vary with the subtarget.
static inline bool isValidCoprocessorNumber(unsigned Num,
const FeatureBitset& featureBits) {
+ // In Armv7 and Armv8-M CP10 and CP11 clash with VFP/NEON, however, the
+ // coprocessor is still valid for CDP/MCR/MRC and friends. Allowing it is
+ // useful for code which is shared with older architectures which do not know
+ // the new VFP/NEON mnemonics.
+
// Armv8-A disallows everything *other* than 111x (CP14 and CP15).
if (featureBits[ARM::HasV8Ops] && (Num & 0xE) != 0xE)
return false;
- // Armv7 disallows 101x (CP10 and CP11), which clash with VFP/NEON.
- if (featureBits[ARM::HasV7Ops] && (Num & 0xE) == 0xA)
- return false;
-
- // Armv8.1-M also disallows 100x (CP8,CP9) and 111x (CP14,CP15)
+ // Armv8.1-M disallows 100x (CP8,CP9) and 111x (CP14,CP15)
// which clash with MVE.
if (featureBits[ARM::HasV8_1MMainlineOps] &&
((Num & 0xE) == 0x8 || (Num & 0xE) == 0xE))
@@ -667,7 +705,7 @@ static inline bool isValidCoprocessorNumber(unsigned Num,
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
-ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
+ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg);
unsigned getMatchingCondBranchOpcode(unsigned Opc);
@@ -681,21 +719,21 @@ unsigned convertAddSubFlagsOpcode(unsigned OldOpc);
/// code.
void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg,
- unsigned BaseReg, int NumBytes,
- ARMCC::CondCodes Pred, unsigned PredReg,
+ const DebugLoc &dl, Register DestReg,
+ Register BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, Register PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg,
- unsigned BaseReg, int NumBytes,
- ARMCC::CondCodes Pred, unsigned PredReg,
+ const DebugLoc &dl, Register DestReg,
+ Register BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, Register PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg,
- unsigned BaseReg, int NumBytes,
+ const DebugLoc &dl, Register DestReg,
+ Register BaseReg, int NumBytes,
const TargetInstrInfo &TII,
const ARMBaseRegisterInfo &MRI,
unsigned MIFlags = 0);
@@ -714,11 +752,11 @@ bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
/// offset could not be handled directly in MI, and return the left-over
/// portion by reference.
bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
+ Register FrameReg, int &Offset,
const ARMBaseInstrInfo &TII);
bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
+ Register FrameReg, int &Offset,
const ARMBaseInstrInfo &TII,
const TargetRegisterInfo *TRI);
@@ -733,7 +771,7 @@ MachineInstr *findCMPToFoldIntoCBZ(MachineInstr *Br,
const TargetRegisterInfo *TRI);
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB);
-void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned DestReg);
+void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg);
void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond);
void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond,
@@ -753,6 +791,70 @@ bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
const ARMSubtarget *Subtarget,
bool ForCodesize = false);
+// Return the immediate if this is ADDri or SUBri, scaled as appropriate.
+// Returns 0 for unknown instructions.
+inline int getAddSubImmediate(MachineInstr &MI) {
+ int Scale = 1;
+ unsigned ImmOp;
+ switch (MI.getOpcode()) {
+ case ARM::t2ADDri:
+ ImmOp = 2;
+ break;
+ case ARM::t2SUBri:
+ case ARM::t2SUBri12:
+ ImmOp = 2;
+ Scale = -1;
+ break;
+ case ARM::tSUBi3:
+ case ARM::tSUBi8:
+ ImmOp = 3;
+ Scale = -1;
+ break;
+ default:
+ return 0;
+ }
+ return Scale * MI.getOperand(ImmOp).getImm();
+}
+
+// Given a memory access Opcode, check that the give Imm would be a valid Offset
+// for this instruction using its addressing mode.
+inline bool isLegalAddressImm(unsigned Opcode, int Imm,
+ const TargetInstrInfo *TII) {
+ const MCInstrDesc &Desc = TII->get(Opcode);
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i7:
+ return std::abs(Imm) < (((1 << 7) * 1) - 1);
+ case ARMII::AddrModeT2_i7s2:
+ return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0;
+ case ARMII::AddrModeT2_i7s4:
+ return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0;
+ default:
+ llvm_unreachable("Unhandled Addressing mode");
+ }
+}
+
+// Return true if the given intrinsic is a gather or scatter
+inline bool isGatherScatter(IntrinsicInst *IntInst) {
+ if (IntInst == nullptr)
+ return false;
+ unsigned IntrinsicID = IntInst->getIntrinsicID();
+ return (IntrinsicID == Intrinsic::masked_gather ||
+ IntrinsicID == Intrinsic::arm_mve_vldr_gather_base ||
+ IntrinsicID == Intrinsic::arm_mve_vldr_gather_base_predicated ||
+ IntrinsicID == Intrinsic::arm_mve_vldr_gather_base_wb ||
+ IntrinsicID == Intrinsic::arm_mve_vldr_gather_base_wb_predicated ||
+ IntrinsicID == Intrinsic::arm_mve_vldr_gather_offset ||
+ IntrinsicID == Intrinsic::arm_mve_vldr_gather_offset_predicated ||
+ IntrinsicID == Intrinsic::masked_scatter ||
+ IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base ||
+ IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base_predicated ||
+ IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base_wb ||
+ IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base_wb_predicated ||
+ IntrinsicID == Intrinsic::arm_mve_vstr_scatter_offset ||
+ IntrinsicID == Intrinsic::arm_mve_vstr_scatter_offset_predicated);
+}
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 52e6d05c3155..3579635f83b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -220,10 +220,25 @@ getReservedRegs(const MachineFunction &MF) const {
}
bool ARMBaseRegisterInfo::
-isAsmClobberable(const MachineFunction &MF, unsigned PhysReg) const {
+isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const {
return !getReservedRegs(MF).test(PhysReg);
}
+bool ARMBaseRegisterInfo::isInlineAsmReadOnlyReg(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const ARMFrameLowering *TFI = getFrameLowering(MF);
+
+ BitVector Reserved(getNumRegs());
+ markSuperRegs(Reserved, ARM::PC);
+ if (TFI->hasFP(MF))
+ markSuperRegs(Reserved, getFramePointerReg(STI));
+ if (hasBasePointer(MF))
+ markSuperRegs(Reserved, BasePtr);
+ assert(checkAllSuperRegsMarked(Reserved));
+ return Reserved.test(PhysReg);
+}
+
const TargetRegisterClass *
ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &MF) const {
@@ -289,7 +304,8 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
// Get the other register in a GPRPair.
-static unsigned getPairedGPR(unsigned Reg, bool Odd, const MCRegisterInfo *RI) {
+static MCPhysReg getPairedGPR(MCPhysReg Reg, bool Odd,
+ const MCRegisterInfo *RI) {
for (MCSuperRegIterator Supers(Reg, RI); Supers.isValid(); ++Supers)
if (ARM::GPRPairRegClass.contains(*Supers))
return RI->getSubReg(*Supers, Odd ? ARM::gsub_1 : ARM::gsub_0);
@@ -297,15 +313,12 @@ static unsigned getPairedGPR(unsigned Reg, bool Odd, const MCRegisterInfo *RI) {
}
// Resolve the RegPairEven / RegPairOdd register allocator hints.
-bool
-ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
- ArrayRef<MCPhysReg> Order,
- SmallVectorImpl<MCPhysReg> &Hints,
- const MachineFunction &MF,
- const VirtRegMap *VRM,
- const LiveRegMatrix *Matrix) const {
+bool ARMBaseRegisterInfo::getRegAllocationHints(
+ Register VirtReg, ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
+ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
- std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+ std::pair<Register, Register> Hint = MRI.getRegAllocationHint(VirtReg);
unsigned Odd;
switch (Hint.first) {
@@ -323,12 +336,12 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
// This register should preferably be even (Odd == 0) or odd (Odd == 1).
// Check if the other part of the pair has already been assigned, and provide
// the paired register as the first hint.
- unsigned Paired = Hint.second;
- if (Paired == 0)
+ Register Paired = Hint.second;
+ if (!Paired)
return false;
- unsigned PairedPhys = 0;
- if (Register::isPhysicalRegister(Paired)) {
+ Register PairedPhys;
+ if (Paired.isPhysical()) {
PairedPhys = Paired;
} else if (VRM && VRM->hasPhys(Paired)) {
PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this);
@@ -339,11 +352,11 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
Hints.push_back(PairedPhys);
// Then prefer even or odd registers.
- for (unsigned Reg : Order) {
+ for (MCPhysReg Reg : Order) {
if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd)
continue;
// Don't provide hints that are paired to a reserved register.
- unsigned Paired = getPairedGPR(Reg, !Odd, this);
+ MCPhysReg Paired = getPairedGPR(Reg, !Odd, this);
if (!Paired || MRI.isReserved(Paired))
continue;
Hints.push_back(Reg);
@@ -351,27 +364,27 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
return false;
}
-void
-ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
- MachineFunction &MF) const {
+void ARMBaseRegisterInfo::updateRegAllocHint(Register Reg, Register NewReg,
+ MachineFunction &MF) const {
MachineRegisterInfo *MRI = &MF.getRegInfo();
- std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
- if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
- Hint.first == (unsigned)ARMRI::RegPairEven) &&
- Register::isVirtualRegister(Hint.second)) {
+ std::pair<Register, Register> Hint = MRI->getRegAllocationHint(Reg);
+ if ((Hint.first == ARMRI::RegPairOdd || Hint.first == ARMRI::RegPairEven) &&
+ Hint.second.isVirtual()) {
// If 'Reg' is one of the even / odd register pair and it's now changed
// (e.g. coalesced) into a different register. The other register of the
// pair allocation hint must be updated to reflect the relationship
// change.
- unsigned OtherReg = Hint.second;
+ Register OtherReg = Hint.second;
Hint = MRI->getRegAllocationHint(OtherReg);
// Make sure the pair has not already divorced.
if (Hint.second == Reg) {
MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
if (Register::isVirtualRegister(NewReg))
MRI->setRegAllocationHint(NewReg,
- Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven
- : ARMRI::RegPairOdd, OtherReg);
+ Hint.first == ARMRI::RegPairOdd
+ ? ARMRI::RegPairEven
+ : ARMRI::RegPairOdd,
+ OtherReg);
}
}
}
@@ -457,14 +470,14 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
/// specified immediate.
void ARMBaseRegisterInfo::emitLoadConstPool(
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg, unsigned SubIdx, int Val,
- ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const {
+ const DebugLoc &dl, Register DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, Register PredReg, unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C =
ConstantInt::get(Type::getInt32Ty(MF.getFunction().getContext()), Val);
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align(4));
BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
.addReg(DestReg, getDefRegState(true), SubIdx)
@@ -480,11 +493,6 @@ requiresRegisterScavenging(const MachineFunction &MF) const {
}
bool ARMBaseRegisterInfo::
-trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- return true;
-}
-
-bool ARMBaseRegisterInfo::
requiresFrameIndexScavenging(const MachineFunction &MF) const {
return true;
}
@@ -606,9 +614,9 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// The FP is only available if there is no dynamic realignment. We
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
- unsigned StackAlign = TFI->getStackAlignment();
if (TFI->hasFP(MF) &&
- !((MFI.getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+ !((MFI.getLocalFrameMaxAlign() > TFI->getStackAlign()) &&
+ canRealignStack(MF))) {
if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset))
return false;
}
@@ -626,10 +634,10 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
/// be a pointer to FrameIdx at the beginning of the basic block.
-void ARMBaseRegisterInfo::
-materializeFrameBaseRegister(MachineBasicBlock *MBB,
- unsigned BaseReg, int FrameIdx,
- int64_t Offset) const {
+void ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ Register BaseReg,
+ int FrameIdx,
+ int64_t Offset) const {
ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
(AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri);
@@ -652,7 +660,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
}
-void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const {
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
@@ -680,7 +688,8 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
(void)Done;
}
-bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ Register BaseReg,
int64_t Offset) const {
const MCInstrDesc &Desc = MI->getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
@@ -759,7 +768,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(!AFI->isThumb1OnlyFunction() &&
"This eliminateFrameIndex does not support Thumb1!");
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- unsigned FrameReg;
+ Register FrameReg;
int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 477f3ad0a9a7..0a0907af2141 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -134,7 +134,9 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
bool isAsmClobberable(const MachineFunction &MF,
- unsigned PhysReg) const override;
+ MCRegister PhysReg) const override;
+ bool isInlineAsmReadOnlyReg(const MachineFunction &MF,
+ unsigned PhysReg) const override;
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
@@ -149,14 +151,12 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- bool getRegAllocationHints(unsigned VirtReg,
- ArrayRef<MCPhysReg> Order,
+ bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
- const MachineFunction &MF,
- const VirtRegMap *VRM,
+ const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
- void updateRegAllocHint(unsigned Reg, unsigned NewReg,
+ void updateRegAllocHint(Register Reg, Register NewReg,
MachineFunction &MF) const override;
bool hasBasePointer(const MachineFunction &MF) const;
@@ -165,35 +165,32 @@ public:
int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- void materializeFrameBaseRegister(MachineBasicBlock *MBB,
- unsigned BaseReg, int FrameIdx,
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB, Register BaseReg,
+ int FrameIdx,
int64_t Offset) const override;
- void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
+ bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
int64_t Offset) const override;
bool cannotEliminateFrame(const MachineFunction &MF) const;
// Debug information queries.
Register getFrameRegister(const MachineFunction &MF) const override;
- unsigned getBaseRegister() const { return BasePtr; }
-
+ Register getBaseRegister() const { return BasePtr; }
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
virtual void
emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg, unsigned SubIdx,
+ const DebugLoc &dl, Register DestReg, unsigned SubIdx,
int Val, ARMCC::CondCodes Pred = ARMCC::AL,
- unsigned PredReg = 0,
+ Register PredReg = Register(),
unsigned MIFlags = MachineInstr::NoFlags) const;
/// Code Generation virtual methods...
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
-
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp
index 00a2231f59e3..6d389cc82730 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp
@@ -49,7 +49,7 @@ void ARMBasicBlockUtils::computeBlockSize(MachineBasicBlock *MBB) {
BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
BBI.Size = 0;
BBI.Unalign = 0;
- BBI.PostAlign = Align::None();
+ BBI.PostAlign = Align(1);
for (MachineInstr &I : *MBB) {
BBI.Size += TII->getInstSizeInBytes(I);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
index 13df399ed995..47d9a4049fa0 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
@@ -87,10 +87,10 @@ struct BasicBlockInfo {
/// Compute the offset immediately following this block. If Align is
/// specified, return the offset the successor block will get if it has
/// this alignment.
- unsigned postOffset(Align Alignment = Align::None()) const {
+ unsigned postOffset(Align Alignment = Align(1)) const {
unsigned PO = Offset + Size;
const Align PA = std::max(PostAlign, Alignment);
- if (PA == Align::None())
+ if (PA == Align(1))
return PO;
// Add alignment padding from the terminator.
return PO + UnknownPadding(PA, internalKnownBits());
@@ -101,7 +101,7 @@ struct BasicBlockInfo {
/// instruction alignment. An aligned terminator may increase the number
/// of know bits.
/// If LogAlign is given, also consider the alignment of the next block.
- unsigned postKnownBits(Align Align = Align::None()) const {
+ unsigned postKnownBits(Align Align = llvm::Align(1)) const {
return std::max(Log2(std::max(PostAlign, Align)), internalKnownBits());
}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
index ce260a9ba145..d860473011e7 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -99,17 +99,14 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
LLT p0 = LLT::pointer(0, 32);
LLT s32 = LLT::scalar(32);
- Register SPReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(SPReg, Register(ARM::SP));
+ auto SPReg = MIRBuilder.buildCopy(p0, Register(ARM::SP));
- Register OffsetReg = MRI.createGenericVirtualRegister(s32);
- MIRBuilder.buildConstant(OffsetReg, Offset);
+ auto OffsetReg = MIRBuilder.buildConstant(s32, Offset);
- Register AddrReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildPtrAdd(AddrReg, SPReg, OffsetReg);
+ auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
- return AddrReg;
+ return AddrReg.getReg(0);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
@@ -133,7 +130,7 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
Register ExtReg = extendRegister(ValVReg, VA);
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(),
- /* Alignment */ 1);
+ Align(1));
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
@@ -143,7 +140,10 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
CCValAssign VA = VAs[0];
assert(VA.needsCustom() && "Value doesn't need custom handling");
- assert(VA.getValVT() == MVT::f64 && "Unsupported type");
+
+ // Custom lowering for other types, such as f16, is currently not supported
+ if (VA.getValVT() != MVT::f64)
+ return 0;
CCValAssign NextVA = VAs[1];
assert(NextVA.needsCustom() && "Value doesn't need custom handling");
@@ -203,7 +203,7 @@ void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg,
// Even if there is no splitting to do, we still want to replace the
// original type (e.g. pointer type -> integer).
auto Flags = OrigArg.Flags[0];
- Flags.setOrigAlign(Align(DL.getABITypeAlignment(OrigArg.Ty)));
+ Flags.setOrigAlign(DL.getABITypeAlign(OrigArg.Ty));
SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
Flags, OrigArg.IsFixed);
return;
@@ -215,7 +215,7 @@ void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg,
Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
auto Flags = OrigArg.Flags[0];
- Flags.setOrigAlign(Align(DL.getABITypeAlignment(SplitTy)));
+ Flags.setOrigAlign(DL.getABITypeAlign(SplitTy));
bool NeedsConsecutiveRegisters =
TLI.functionArgumentNeedsConsecutiveRegisters(
@@ -299,11 +299,8 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
int FI = MFI.CreateFixedObject(Size, Offset, true);
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
- Register AddrReg =
- MRI.createGenericVirtualRegister(LLT::pointer(MPO.getAddrSpace(), 32));
- MIRBuilder.buildFrameIndex(AddrReg, FI);
-
- return AddrReg;
+ return MIRBuilder.buildFrameIndex(LLT::pointer(MPO.getAddrSpace(), 32), FI)
+ .getReg(0);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
@@ -318,20 +315,21 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
Size = 4;
assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm");
- auto LoadVReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
- buildLoad(LoadVReg, Addr, Size, /* Alignment */ 1, MPO);
+ auto LoadVReg = buildLoad(LLT::scalar(32), Addr, Size, MPO);
MIRBuilder.buildTrunc(ValVReg, LoadVReg);
} else {
// If the value is not extended, a simple load will suffice.
- buildLoad(ValVReg, Addr, Size, /* Alignment */ 1, MPO);
+ buildLoad(ValVReg, Addr, Size, MPO);
}
}
- void buildLoad(Register Val, Register Addr, uint64_t Size, unsigned Alignment,
- MachinePointerInfo &MPO) {
- auto MMO = MIRBuilder.getMF().getMachineMemOperand(
- MPO, MachineMemOperand::MOLoad, Size, Alignment);
- MIRBuilder.buildLoad(Val, Addr, *MMO);
+ MachineInstrBuilder buildLoad(const DstOp &Res, Register Addr, uint64_t Size,
+ MachinePointerInfo &MPO) {
+ MachineFunction &MF = MIRBuilder.getMF();
+
+ auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, Size,
+ inferAlignFromPtrInfo(MF, MPO));
+ return MIRBuilder.buildLoad(Res, Addr, *MMO);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
@@ -354,9 +352,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
// We cannot create a truncating copy, nor a trunc of a physical register.
// Therefore, we need to copy the content of the physical register into a
// virtual one and then truncate that.
- auto PhysRegToVReg =
- MRI.createGenericVirtualRegister(LLT::scalar(LocSize));
- MIRBuilder.buildCopy(PhysRegToVReg, PhysReg);
+ auto PhysRegToVReg = MIRBuilder.buildCopy(LLT::scalar(LocSize), PhysReg);
MIRBuilder.buildTrunc(ValVReg, PhysRegToVReg);
}
}
@@ -367,7 +363,10 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
CCValAssign VA = VAs[0];
assert(VA.needsCustom() && "Value doesn't need custom handling");
- assert(VA.getValVT() == MVT::f64 && "Unsupported type");
+
+ // Custom lowering for other types, such as f16, is currently not supported
+ if (VA.getValVT() != MVT::f64)
+ return 0;
CCValAssign NextVA = VAs[1];
assert(NextVA.needsCustom() && "Value doesn't need custom handling");
@@ -436,7 +435,7 @@ bool ARMCallLowering::lowerFormalArguments(
for (auto &Arg : F.args()) {
if (!isSupportedType(DL, TLI, Arg.getType()))
return false;
- if (Arg.hasByValOrInAllocaAttr())
+ if (Arg.hasPassPointeeByValueAttr())
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp
index a47c59512592..67c822a5b6ef 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.cpp
@@ -32,9 +32,8 @@ static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
return false;
// Put the whole thing on the stack.
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(8, 4),
- LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomMem(
+ ValNo, ValVT, State.AllocateStack(8, Align(4)), LocVT, LocInfo));
return true;
}
@@ -42,9 +41,8 @@ static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
if (unsigned Reg = State.AllocateReg(RegList))
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(4, 4),
- LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomMem(
+ ValNo, ValVT, State.AllocateStack(4, Align(4)), LocVT, LocInfo));
return true;
}
@@ -81,9 +79,8 @@ static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
return false;
// Put the whole thing on the stack.
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(8, 8),
- LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomMem(
+ ValNo, ValVT, State.AllocateStack(8, Align(8)), LocVT, LocInfo));
return true;
}
@@ -184,8 +181,8 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
// aggregate. Store the type's required alignment as extra info for later: in
// the [N x i64] case all trace has been removed by the time we actually get
// to do allocation.
- PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo,
- ArgFlags.getOrigAlign()));
+ PendingMembers.push_back(CCValAssign::getPending(
+ ValNo, ValVT, LocVT, LocInfo, ArgFlags.getNonZeroOrigAlign().value()));
if (!ArgFlags.isInConsecutiveRegsLast())
return true;
@@ -193,8 +190,9 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
auto &DL = State.getMachineFunction().getDataLayout();
- unsigned StackAlign = DL.getStackAlignment().value();
- unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
+ const Align StackAlign = DL.getStackAlignment();
+ const Align FirstMemberAlign(PendingMembers[0].getExtraInfo());
+ Align Alignment = std::min(FirstMemberAlign, StackAlign);
ArrayRef<MCPhysReg> RegList;
switch (LocVT.SimpleTy) {
@@ -204,21 +202,24 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
// First consume all registers that would give an unaligned object. Whether
// we go on stack or in regs, no-one will be using them in future.
- unsigned RegAlign = alignTo(Align, 4) / 4;
+ unsigned RegAlign = alignTo(Alignment.value(), 4) / 4;
while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
State.AllocateReg(RegList[RegIdx++]);
break;
}
case MVT::f16:
+ case MVT::bf16:
case MVT::f32:
RegList = SRegList;
break;
case MVT::v4f16:
+ case MVT::v4bf16:
case MVT::f64:
RegList = DRegList;
break;
case MVT::v8f16:
+ case MVT::v8bf16:
case MVT::v2f64:
RegList = QRegList;
break;
@@ -247,7 +248,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
unsigned RegIdx = State.getFirstUnallocated(RegList);
for (auto &It : PendingMembers) {
if (RegIdx >= RegList.size())
- It.convertToMem(State.AllocateStack(Size, Size));
+ It.convertToMem(State.AllocateStack(Size, Align(Size)));
else
It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
@@ -265,12 +266,12 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
// After the first item has been allocated, the rest are packed as tightly as
// possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
// be allocating a bunch of i32 slots).
- unsigned RestAlign = std::min(Align, Size);
+ const Align RestAlign = std::min(Alignment, Align(Size));
for (auto &It : PendingMembers) {
- It.convertToMem(State.AllocateStack(Size, Align));
+ It.convertToMem(State.AllocateStack(Size, Alignment));
State.addLoc(It);
- Align = RestAlign;
+ Alignment = RestAlign;
}
// All pending members have now been allocated
@@ -280,5 +281,33 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
return true;
}
+static bool CustomAssignInRegList(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, CCState &State,
+ ArrayRef<MCPhysReg> RegList) {
+ unsigned Reg = State.AllocateReg(RegList);
+ if (Reg) {
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return true;
+ }
+ return false;
+}
+
+static bool CC_ARM_AAPCS_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ // f16 arguments are extended to i32 and assigned to a register in [r0, r3]
+ return CustomAssignInRegList(ValNo, ValVT, MVT::i32, LocInfo, State,
+ RRegList);
+}
+
+static bool CC_ARM_AAPCS_VFP_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ CCState &State) {
+ // f16 arguments are extended to f32 and assigned to a register in [s0, s15]
+ return CustomAssignInRegList(ValNo, ValVT, MVT::f32, LocInfo, State,
+ SRegList);
+}
+
// Include the table generated calling convention implementations.
#include "ARMGenCallingConv.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.td
index 5df5b56f5afa..3517274e4c5c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -10,7 +10,7 @@
/// CCIfAlign - Match of the original alignment of the arg
class CCIfAlign<string Align, CCAction A>:
- CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+ CCIf<!strconcat("ArgFlags.getNonZeroOrigAlign() == ", Align), A>;
//===----------------------------------------------------------------------===//
// ARM APCS Calling Convention
@@ -30,8 +30,8 @@ def CC_ARM_APCS : CallingConv<[
CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4bf16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8bf16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
@@ -56,8 +56,8 @@ def RetCC_ARM_APCS : CallingConv<[
CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4bf16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8bf16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
@@ -71,8 +71,8 @@ def RetCC_ARM_APCS : CallingConv<[
let Entry = 1 in
def FastCC_ARM_APCS : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4bf16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8bf16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
@@ -91,8 +91,8 @@ def FastCC_ARM_APCS : CallingConv<[
let Entry = 1 in
def RetFastCC_ARM_APCS : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4bf16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8bf16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
@@ -108,8 +108,8 @@ def RetFastCC_ARM_APCS : CallingConv<[
let Entry = 1 in
def CC_ARM_APCS_GHC : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4bf16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8bf16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
@@ -134,12 +134,12 @@ def CC_ARM_AAPCS_Common : CallingConv<[
// i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
// (and the same is true for f64 if VFP is not enabled)
CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
- CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8",
+ CCIfType<[i32], CCIf<"ArgFlags.getNonZeroOrigAlign() != Align(8)",
CCAssignToReg<[R0, R1, R2, R3]>>>,
CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, [R0, R1, R2, R3]>>>,
CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>,
- CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f16, bf16, f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>,
CCIfType<[v2f64], CCIfAlign<"16",
CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>,
@@ -165,8 +165,8 @@ def CC_ARM_AAPCS : CallingConv<[
CCIfNest<CCAssignToReg<[R12]>>,
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4bf16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8bf16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -176,14 +176,15 @@ def CC_ARM_AAPCS : CallingConv<[
CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[f16, bf16], CCCustom<"CC_ARM_AAPCS_Custom_f16">>,
CCDelegateTo<CC_ARM_AAPCS_Common>
]>;
let Entry = 1 in
def RetCC_ARM_AAPCS : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4bf16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8bf16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -193,6 +194,7 @@ def RetCC_ARM_AAPCS : CallingConv<[
CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[f16, bf16], CCCustom<"CC_ARM_AAPCS_Custom_f16">>,
CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
@@ -208,8 +210,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4bf16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8bf16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -224,14 +226,15 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
S9, S10, S11, S12, S13, S14, S15]>>,
+ CCIfType<[f16, bf16], CCCustom<"CC_ARM_AAPCS_VFP_Custom_f16">>,
CCDelegateTo<CC_ARM_AAPCS_Common>
]>;
let Entry = 1 in
def RetCC_ARM_AAPCS_VFP : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4bf16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8bf16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -242,7 +245,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
- S9, S10, S11, S12, S13, S14, S15]>>,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCIfType<[f16, bf16], CCCustom<"CC_ARM_AAPCS_VFP_Custom_f16">>,
CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 66ad120a111f..195d0a89291b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -206,10 +206,6 @@ namespace {
/// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
SmallVector<MachineInstr*, 4> T2JumpTables;
- /// HasFarJump - True if any far jump instruction has been emitted during
- /// the branch fix up pass.
- bool HasFarJump;
-
MachineFunction *MF;
MachineConstantPool *MCP;
const ARMBaseInstrInfo *TII;
@@ -270,7 +266,6 @@ namespace {
bool fixupImmediateBr(ImmBranch &Br);
bool fixupConditionalBr(ImmBranch &Br);
bool fixupUnconditionalBr(ImmBranch &Br);
- bool undoLRSpillRestore();
bool optimizeThumb2Instructions();
bool optimizeThumb2Branches();
bool reorderThumb2JumpTables();
@@ -350,7 +345,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << "***** ARMConstantIslands: "
<< MCP->getConstants().size() << " CP entries, aligned to "
- << MCP->getConstantPoolAlignment() << " bytes *****\n");
+ << MCP->getConstantPoolAlign().value() << " bytes *****\n");
STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
TII = STI->getInstrInfo();
@@ -363,7 +358,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
isThumb1 = AFI->isThumb1OnlyFunction();
isThumb2 = AFI->isThumb2Function();
- HasFarJump = false;
bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
// Renumber all of the machine basic blocks in the function, guaranteeing that
@@ -456,11 +450,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// After a while, this might be made debug-only, but it is not expensive.
verify();
- // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
- // undo the spill / restore of LR if possible.
- if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
- MadeChange |= undoLRSpillRestore();
-
// Save the mapping between original and cloned constpool entries.
for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
@@ -494,7 +483,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
MF->push_back(BB);
// MachineConstantPool measures alignment in bytes.
- const Align MaxAlign(MCP->getConstantPoolAlignment());
+ const Align MaxAlign = MCP->getConstantPoolAlign();
const unsigned MaxLogAlign = Log2(MaxAlign);
// Mark the basic block as required by the const-pool.
@@ -518,14 +507,13 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
const DataLayout &TD = MF->getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
- unsigned Align = CPs[i].getAlignment();
- assert(isPowerOf2_32(Align) && "Invalid alignment");
+ Align Alignment = CPs[i].getAlign();
// Verify that all constant pool entries are a multiple of their alignment.
// If not, we would have to pad them out so that instructions stay aligned.
- assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+ assert(isAligned(Alignment, Size) && "CP Entry not multiple of 4 bytes!");
// Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
- unsigned LogAlign = Log2_32(Align);
+ unsigned LogAlign = Log2(Alignment);
MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
MachineInstr *CPEMI =
BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
@@ -542,7 +530,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
CPEntries.emplace_back(1, CPEntry(CPEMI, i));
++NumCPEs;
LLVM_DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
- << Size << ", align = " << Align << '\n');
+ << Size << ", align = " << Alignment.value() << '\n');
}
LLVM_DEBUG(BB->dump());
}
@@ -668,7 +656,7 @@ Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) {
unsigned CPI = getCombinedIndex(CPEMI);
assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
- return Align(MCP->getConstants()[CPI].getAlignment());
+ return MCP->getConstants()[CPI].getAlign();
}
/// scanFunctionJumpTables - Do a scan of the function, building up
@@ -1364,8 +1352,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// displacement.
MachineBasicBlock::iterator I = UserMI;
++I;
- for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI),
- PredReg = 0;
+ Register PredReg;
+ for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI);
I->getOpcode() != ARM::t2IT &&
getITInstrPredicate(*I, PredReg) != ARMCC::AL;
Offset += TII->getInstSizeInBytes(*I), I = std::next(I)) {
@@ -1410,7 +1398,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// Avoid splitting an IT block.
if (LastIT) {
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg);
if (CC != ARMCC::AL)
MI = LastIT;
@@ -1434,7 +1422,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// We really must not split an IT block.
#ifndef NDEBUG
- unsigned PredReg;
+ Register PredReg;
assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL);
#endif
NewMBB = splitBlockBeforeInstr(&*MI);
@@ -1566,7 +1554,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
BBInfo[CPEBB->getNumber()].Size = 0;
// This block no longer needs to be aligned.
- CPEBB->setAlignment(Align::None());
+ CPEBB->setAlignment(Align(1));
} else {
// Entries are sorted by descending alignment, so realign from the front.
CPEBB->setAlignment(getCPEAlign(&*CPEBB->begin()));
@@ -1633,7 +1621,6 @@ ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
BBInfoVector &BBInfo = BBUtils->getBBInfo();
BBInfo[MBB->getNumber()].Size += 2;
BBUtils->adjustBBOffsetsAfter(MBB);
- HasFarJump = true;
++NumUBrFixed;
LLVM_DEBUG(dbgs() << " Changed B to long jump " << *MI);
@@ -1735,34 +1722,6 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
return true;
}
-/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills
-/// LR / restores LR to pc. FIXME: This is done here because it's only possible
-/// to do this if tBfar is not used.
-bool ARMConstantIslands::undoLRSpillRestore() {
- bool MadeChange = false;
- for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
- MachineInstr *MI = PushPopMIs[i];
- // First two operands are predicates.
- if (MI->getOpcode() == ARM::tPOP_RET &&
- MI->getOperand(2).getReg() == ARM::PC &&
- MI->getNumExplicitOperands() == 3) {
- // Create the new insn and copy the predicate from the old.
- BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
- .add(MI->getOperand(0))
- .add(MI->getOperand(1));
- MI->eraseFromParent();
- MadeChange = true;
- } else if (MI->getOpcode() == ARM::tPUSH &&
- MI->getOperand(2).getReg() == ARM::LR &&
- MI->getNumExplicitOperands() == 3) {
- // Just remove the push.
- MI->eraseFromParent();
- MadeChange = true;
- }
- }
- return MadeChange;
-}
-
bool ARMConstantIslands::optimizeThumb2Instructions() {
bool MadeChange = false;
@@ -1868,7 +1827,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
if (!Br.MI->killsRegister(ARM::CPSR))
return false;
- unsigned PredReg = 0;
+ Register PredReg;
unsigned NewOpc = 0;
ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg);
if (Pred == ARMCC::EQ)
@@ -2402,6 +2361,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
SmallVector<MachineOperand, 4> CondPrior;
MachineFunction::iterator BBi = BB->getIterator();
MachineFunction::iterator OldPrior = std::prev(BBi);
+ MachineFunction::iterator OldNext = std::next(BBi);
// If the block terminator isn't analyzable, don't try to move the block
bool B = TII->analyzeBranch(*BB, TBB, FBB, Cond);
@@ -2412,8 +2372,8 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
if (!B && Cond.empty() && BB != &MF->front() &&
!TII->analyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
BB->moveAfter(JTBB);
- OldPrior->updateTerminator();
- BB->updateTerminator();
+ OldPrior->updateTerminator(BB);
+ BB->updateTerminator(OldNext != MF->end() ? &*OldNext : nullptr);
// Update numbering to account for the block being moved.
MF->RenumberBlocks();
++NumJTMoved;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
index 72c95f441265..c1df7ef43cad 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -73,7 +73,7 @@ StringRef ARMConstantPoolValue::getModifierText() const {
}
int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
- unsigned Alignment) {
+ Align Alignment) {
llvm_unreachable("Shouldn't be calling this directly!");
}
@@ -189,7 +189,7 @@ const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const {
}
int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP,
- unsigned Alignment) {
+ Align Alignment) {
int index =
getExistingMachineCPValueImpl<ARMConstantPoolConstant>(CP, Alignment);
if (index != -1) {
@@ -228,7 +228,7 @@ ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, StringRef s,
bool AddCurrentAddress)
: ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
AddCurrentAddress),
- S(s) {}
+ S(std::string(s)) {}
ARMConstantPoolSymbol *ARMConstantPoolSymbol::Create(LLVMContext &C,
StringRef s, unsigned ID,
@@ -237,7 +237,7 @@ ARMConstantPoolSymbol *ARMConstantPoolSymbol::Create(LLVMContext &C,
}
int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
- unsigned Alignment) {
+ Align Alignment) {
return getExistingMachineCPValueImpl<ARMConstantPoolSymbol>(CP, Alignment);
}
@@ -277,7 +277,7 @@ ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C,
}
int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP,
- unsigned Alignment) {
+ Align Alignment) {
return getExistingMachineCPValueImpl<ARMConstantPoolMBB>(CP, Alignment);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index 660b7fc88d82..261070a74ba3 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -76,13 +76,11 @@ protected:
bool AddCurrentAddress);
template <typename Derived>
- int getExistingMachineCPValueImpl(MachineConstantPool *CP,
- unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
+ int getExistingMachineCPValueImpl(MachineConstantPool *CP, Align Alignment) {
const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
+ Constants[i].getAlign() >= Alignment) {
auto *CPV =
static_cast<ARMConstantPoolValue*>(Constants[i].Val.MachineCPVal);
if (Derived *APC = dyn_cast<Derived>(CPV))
@@ -114,7 +112,7 @@ public:
bool isPromotedGlobal() const{ return Kind == ARMCP::CPPromotedGlobal; }
int getExistingMachineCPValue(MachineConstantPool *CP,
- unsigned Alignment) override;
+ Align Alignment) override;
void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
@@ -187,7 +185,7 @@ public:
}
int getExistingMachineCPValue(MachineConstantPool *CP,
- unsigned Alignment) override;
+ Align Alignment) override;
/// hasSameValue - Return true if this ARM constpool value can share the same
/// constantpool entry as another ARM constpool value.
@@ -223,7 +221,7 @@ public:
StringRef getSymbol() const { return S; }
int getExistingMachineCPValue(MachineConstantPool *CP,
- unsigned Alignment) override;
+ Align Alignment) override;
void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
@@ -259,7 +257,7 @@ public:
const MachineBasicBlock *getMBB() const { return MBB; }
int getExistingMachineCPValue(MachineConstantPool *CP,
- unsigned Alignment) override;
+ Align Alignment) override;
void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index de4377ec5a47..48622aae3cb4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -71,6 +71,38 @@ namespace {
unsigned Opc, bool IsExt);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
+ void CMSEClearGPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ const SmallVectorImpl<unsigned> &ClearRegs,
+ unsigned ClobberReg);
+ MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+ MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs);
+ MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs);
+ void CMSESaveClearFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs,
+ SmallVectorImpl<unsigned> &AvailableRegs);
+ void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs,
+ SmallVectorImpl<unsigned> &ScratchRegs);
+ void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs);
+ void CMSERestoreFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs);
+ void CMSERestoreFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs);
+ void CMSERestoreFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs);
bool ExpandCMP_SWAP(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned LdrexOp,
unsigned StrexOp, unsigned UxtOp,
@@ -417,8 +449,7 @@ static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
// Make sure the table is sorted.
static std::atomic<bool> TableChecked(false);
if (!TableChecked.load(std::memory_order_relaxed)) {
- assert(std::is_sorted(std::begin(NEONLdStTable), std::end(NEONLdStTable)) &&
- "NEONLdStTable is not sorted!");
+ assert(llvm::is_sorted(NEONLdStTable) && "NEONLdStTable is not sorted!");
TableChecked.store(true, std::memory_order_relaxed);
}
#endif
@@ -827,7 +858,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
@@ -852,10 +883,13 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned ImmVal = (unsigned)MO.getImm();
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ unsigned MIFlags = MI.getFlags();
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
LO16.cloneMemRefs(MI);
HI16.cloneMemRefs(MI);
+ LO16.setMIFlags(MIFlags);
+ HI16.setMIFlags(MIFlags);
LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
if (isCC)
@@ -867,6 +901,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned LO16Opc = 0;
unsigned HI16Opc = 0;
+ unsigned MIFlags = MI.getFlags();
if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
LO16Opc = ARM::t2MOVi16;
HI16Opc = ARM::t2MOVTi16;
@@ -880,6 +915,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
+ LO16.setMIFlags(MIFlags);
+ HI16.setMIFlags(MIFlags);
+
switch (MO.getType()) {
case MachineOperand::MO_Immediate: {
unsigned Imm = MO.getImm();
@@ -921,6 +959,582 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump(););
}
+// The size of the area, accessed by that VLSTM/VLLDM
+// S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad)
+static const int CMSE_FP_SAVE_SIZE = 136;
+
+static void determineGPRegsToClear(const MachineInstr &MI,
+ const std::initializer_list<unsigned> &Regs,
+ SmallVectorImpl<unsigned> &ClearRegs) {
+ SmallVector<unsigned, 4> OpRegs;
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ OpRegs.push_back(Op.getReg());
+ }
+ llvm::sort(OpRegs);
+
+ std::set_difference(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(),
+ std::back_inserter(ClearRegs));
+}
+
+void ARMExpandPseudo::CMSEClearGPRegs(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, const SmallVectorImpl<unsigned> &ClearRegs,
+ unsigned ClobberReg) {
+
+ if (STI->hasV8_1MMainlineOps()) {
+ // Clear the registers using the CLRM instruction.
+ MachineInstrBuilder CLRM =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL));
+ for (unsigned R : ClearRegs)
+ CLRM.addReg(R, RegState::Define);
+ CLRM.addReg(ARM::APSR, RegState::Define);
+ CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
+ } else {
+ // Clear the registers and flags by copying ClobberReg into them.
+ // (Baseline can't do a high register clear in one instruction).
+ for (unsigned Reg : ClearRegs) {
+ if (Reg == ClobberReg)
+ continue;
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg)
+ .addReg(ClobberReg)
+ .add(predOps(ARMCC::AL));
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M))
+ .addImm(STI->hasDSP() ? 0xc00 : 0x800)
+ .addReg(ClobberReg)
+ .add(predOps(ARMCC::AL));
+ }
+}
+
+// Find which FP registers need to be cleared. The parameter `ClearRegs` is
+// initialised with all elements set to true, and this function resets all the
+// bits, which correspond to register uses. Returns true if any floating point
+// register is defined, false otherwise.
+static bool determineFPRegsToClear(const MachineInstr &MI,
+ BitVector &ClearRegs) {
+ bool DefFP = false;
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg())
+ continue;
+
+ unsigned Reg = Op.getReg();
+ if (Op.isDef()) {
+ if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
+ (Reg >= ARM::D0 && Reg <= ARM::D15) ||
+ (Reg >= ARM::S0 && Reg <= ARM::S31))
+ DefFP = true;
+ continue;
+ }
+
+ if (Reg >= ARM::Q0 && Reg <= ARM::Q7) {
+ int R = Reg - ARM::Q0;
+ ClearRegs.reset(R * 4, (R + 1) * 4);
+ } else if (Reg >= ARM::D0 && Reg <= ARM::D15) {
+ int R = Reg - ARM::D0;
+ ClearRegs.reset(R * 2, (R + 1) * 2);
+ } else if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+ ClearRegs[Reg - ARM::S0] = false;
+ }
+ }
+ return DefFP;
+}
+
+MachineBasicBlock &
+ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ BitVector ClearRegs(16, true);
+ (void)determineFPRegsToClear(*MBBI, ClearRegs);
+
+ if (STI->hasV8_1MMainlineOps())
+ return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
+ else
+ return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs);
+}
+
+// Clear the FP registers for v8.0-M, by copying over the content
+// of LR. Uses R12 as a scratch register.
+MachineBasicBlock &
+ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs) {
+ if (!STI->hasFPRegs())
+ return MBB;
+
+ auto &RetI = *MBBI;
+ const DebugLoc &DL = RetI.getDebugLoc();
+
+ // If optimising for minimum size, clear FP registers unconditionally.
+ // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and
+ // don't clear them if they belong to the non-secure state.
+ MachineBasicBlock *ClearBB, *DoneBB;
+ if (STI->hasMinSize()) {
+ ClearBB = DoneBB = &MBB;
+ } else {
+ MachineFunction *MF = MBB.getParent();
+ ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ MF->insert(++MBB.getIterator(), ClearBB);
+ MF->insert(++ClearBB->getIterator(), DoneBB);
+
+ DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end());
+ DoneBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(ClearBB);
+ MBB.addSuccessor(DoneBB);
+ ClearBB->addSuccessor(DoneBB);
+
+ // At the new basic blocks we need to have live-in the registers, used
+ // for the return value as well as LR, used to clear registers.
+ for (const MachineOperand &Op : RetI.operands()) {
+ if (!Op.isReg())
+ continue;
+ Register Reg = Op.getReg();
+ if (Reg == ARM::NoRegister || Reg == ARM::LR)
+ continue;
+ assert(Register::isPhysicalRegister(Reg) && "Unallocated register");
+ ClearBB->addLiveIn(Reg);
+ DoneBB->addLiveIn(Reg);
+ }
+ ClearBB->addLiveIn(ARM::LR);
+ DoneBB->addLiveIn(ARM::LR);
+
+ // Read the CONTROL register.
+ BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12)
+ .addImm(20)
+ .add(predOps(ARMCC::AL));
+ // Check bit 3 (SFPA).
+ BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri))
+ .addReg(ARM::R12)
+ .addImm(8)
+ .add(predOps(ARMCC::AL));
+ // If SFPA is clear, jump over ClearBB to DoneBB.
+ BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc))
+ .addMBB(DoneBB)
+ .addImm(ARMCC::EQ)
+ .addReg(ARM::CPSR, RegState::Kill);
+ }
+
+ // Emit the clearing sequence
+ for (unsigned D = 0; D < 8; D++) {
+ // Attempt to clear as double
+ if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) {
+ unsigned Reg = ARM::D0 + D;
+ BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg)
+ .addReg(ARM::LR)
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ } else {
+ // Clear first part as single
+ if (ClearRegs[D * 2 + 0]) {
+ unsigned Reg = ARM::S0 + D * 2;
+ BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ }
+ // Clear second part as single
+ if (ClearRegs[D * 2 + 1]) {
+ unsigned Reg = ARM::S0 + D * 2 + 1;
+ BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ }
+ }
+ }
+
+ // Clear FPSCR bits 0-4, 7, 28-31
+ // The other bits are program global according to the AAPCS
+ BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12)
+ .add(predOps(ARMCC::AL));
+ BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
+ .addReg(ARM::R12)
+ .addImm(0x0000009F)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
+ .addReg(ARM::R12)
+ .addImm(0xF0000000)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(ClearBB, DL, TII->get(ARM::VMSR))
+ .addReg(ARM::R12)
+ .add(predOps(ARMCC::AL));
+
+ return *DoneBB;
+}
+
+MachineBasicBlock &
+ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs) {
+ auto &RetI = *MBBI;
+
+ // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for
+ // each contiguous sequence of S-registers.
+ int Start = -1, End = -1;
+ for (int S = 0, E = ClearRegs.size(); S != E; ++S) {
+ if (ClearRegs[S] && S == End + 1) {
+ End = S; // extend range
+ continue;
+ }
+ // Emit current range.
+ if (Start < End) {
+ MachineInstrBuilder VSCCLRM =
+ BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
+ .add(predOps(ARMCC::AL));
+ while (++Start <= End)
+ VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
+ VSCCLRM.addReg(ARM::VPR, RegState::Define);
+ }
+ Start = End = S;
+ }
+ // Emit last range.
+ if (Start < End) {
+ MachineInstrBuilder VSCCLRM =
+ BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
+ .add(predOps(ARMCC::AL));
+ while (++Start <= End)
+ VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
+ VSCCLRM.addReg(ARM::VPR, RegState::Define);
+ }
+
+ return MBB;
+}
+
+void ARMExpandPseudo::CMSESaveClearFPRegs(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
+ if (STI->hasV8_1MMainlineOps())
+ CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
+ else
+ CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
+}
+
+// Save and clear FP registers if present
+void ARMExpandPseudo::CMSESaveClearFPRegsV8(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
+ if (!STI->hasFPRegs())
+ return;
+
+ // Store an available register for FPSCR clearing
+ assert(!ScratchRegs.empty());
+ unsigned SpareReg = ScratchRegs.front();
+
+ // save space on stack for VLSTM
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+
+ // Use ScratchRegs to store the fp regs
+ std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
+ std::vector<unsigned> NonclearedFPRegs;
+ for (const MachineOperand &Op : MBBI->operands()) {
+ if (Op.isReg() && Op.isUse()) {
+ unsigned Reg = Op.getReg();
+ assert(!ARM::DPRRegClass.contains(Reg) ||
+ ARM::DPR_VFP2RegClass.contains(Reg));
+ assert(!ARM::QPRRegClass.contains(Reg));
+ if (ARM::DPR_VFP2RegClass.contains(Reg)) {
+ if (ScratchRegs.size() >= 2) {
+ unsigned SaveReg2 = ScratchRegs.pop_back_val();
+ unsigned SaveReg1 = ScratchRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
+ .addReg(SaveReg1, RegState::Define)
+ .addReg(SaveReg2, RegState::Define)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ if (ScratchRegs.size() >= 1) {
+ unsigned SaveReg = ScratchRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ }
+ }
+ }
+
+ bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
+
+ // Lazy store all fp registers to the stack
+ MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
+ ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
+ VLSTM.addReg(R, RegState::Implicit |
+ (LiveRegs.contains(R) ? 0 : RegState::Undef));
+
+ // Restore all arguments
+ for (const auto &Regs : ClearedFPRegs) {
+ unsigned Reg, SaveReg1, SaveReg2;
+ std::tie(Reg, SaveReg1, SaveReg2) = Regs;
+ if (ARM::DPR_VFP2RegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
+ .addReg(SaveReg1)
+ .addReg(SaveReg2)
+ .add(predOps(ARMCC::AL));
+ else if (ARM::SPRRegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(SaveReg1)
+ .add(predOps(ARMCC::AL));
+ }
+
+ for (unsigned Reg : NonclearedFPRegs) {
+ if (ARM::DPR_VFP2RegClass.contains(Reg)) {
+ if (STI->isLittle()) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2)
+ .add(predOps(ARMCC::AL));
+ } else {
+ // For big-endian targets we need to load the two subregisters of Reg
+ // manually because VLDRD would load them in wrong order
+ unsigned SReg0 = TRI->getSubReg(Reg, ARM::ssub_0);
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2 + 1)
+ .add(predOps(ARMCC::AL));
+ }
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg)
+ .addReg(ARM::SP)
+ .addImm(Reg - ARM::S0)
+ .add(predOps(ARMCC::AL));
+ }
+ }
+ // restore FPSCR from stack and clear bits 0-4, 7, 28-31
+ // The other bits are program global according to the AAPCS
+ if (passesFPReg) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2LDRi8), SpareReg)
+ .addReg(ARM::SP)
+ .addImm(0x40)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
+ .addReg(SpareReg)
+ .addImm(0x0000009F)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
+ .addReg(SpareReg)
+ .addImm(0xF0000000)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR))
+ .addReg(SpareReg)
+ .add(predOps(ARMCC::AL));
+ // The ldr must happen after a floating point instruction. To prevent the
+ // post-ra scheduler to mess with the order, we create a bundle.
+ finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator());
+ }
+}
+
+void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc &DL,
+ const LivePhysRegs &LiveRegs) {
+ BitVector ClearRegs(32, true);
+ bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs);
+
+ // If the instruction does not write to a FP register and no elements were
+ // removed from the set, then no FP registers were used to pass
+ // arguments/returns.
+ if (!DefFP && ClearRegs.count() == ClearRegs.size()) {
+ // save space on stack for VLSTM
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+
+ // Lazy store all FP registers to the stack
+ MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
+ ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
+ VLSTM.addReg(R, RegState::Implicit |
+ (LiveRegs.contains(R) ? 0 : RegState::Undef));
+ } else {
+ // Push all the callee-saved registers (s16-s31).
+ MachineInstrBuilder VPUSH =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
+ VPUSH.addReg(Reg);
+
+ // Clear FP registers with a VSCCLRM.
+ (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
+
+ // Save floating-point context.
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(-8)
+ .add(predOps(ARMCC::AL));
+ }
+}
+
+// Restore FP registers if present
+void ARMExpandPseudo::CMSERestoreFPRegs(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs) {
+ if (STI->hasV8_1MMainlineOps())
+ CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs);
+ else
+ CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs);
+}
+
+void ARMExpandPseudo::CMSERestoreFPRegsV8(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs) {
+ if (!STI->hasFPRegs())
+ return;
+
+ // Use AvailableRegs to store the fp regs
+ std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
+ std::vector<unsigned> NonclearedFPRegs;
+ for (const MachineOperand &Op : MBBI->operands()) {
+ if (Op.isReg() && Op.isDef()) {
+ unsigned Reg = Op.getReg();
+ assert(!ARM::DPRRegClass.contains(Reg) ||
+ ARM::DPR_VFP2RegClass.contains(Reg));
+ assert(!ARM::QPRRegClass.contains(Reg));
+ if (ARM::DPR_VFP2RegClass.contains(Reg)) {
+ if (AvailableRegs.size() >= 2) {
+ unsigned SaveReg2 = AvailableRegs.pop_back_val();
+ unsigned SaveReg1 = AvailableRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
+ .addReg(SaveReg1, RegState::Define)
+ .addReg(SaveReg2, RegState::Define)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ if (AvailableRegs.size() >= 1) {
+ unsigned SaveReg = AvailableRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ }
+ }
+ }
+
+ // Push FP regs that cannot be restored via normal registers on the stack
+ for (unsigned Reg : NonclearedFPRegs) {
+ if (ARM::DPR_VFP2RegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD), Reg)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2)
+ .add(predOps(ARMCC::AL));
+ else if (ARM::SPRRegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS), Reg)
+ .addReg(ARM::SP)
+ .addImm(Reg - ARM::S0)
+ .add(predOps(ARMCC::AL));
+ }
+
+ // Lazy load fp regs from stack
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+
+ // Restore all FP registers via normal registers
+ for (const auto &Regs : ClearedFPRegs) {
+ unsigned Reg, SaveReg1, SaveReg2;
+ std::tie(Reg, SaveReg1, SaveReg2) = Regs;
+ if (ARM::DPR_VFP2RegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
+ .addReg(SaveReg1)
+ .addReg(SaveReg2)
+ .add(predOps(ARMCC::AL));
+ else if (ARM::SPRRegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(SaveReg1)
+ .add(predOps(ARMCC::AL));
+ }
+
+ // Pop the stack space
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+}
+
+static bool definesOrUsesFPReg(const MachineInstr &MI) {
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg())
+ continue;
+ unsigned Reg = Op.getReg();
+ if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
+ (Reg >= ARM::D0 && Reg <= ARM::D15) ||
+ (Reg >= ARM::S0 && Reg <= ARM::S31))
+ return true;
+ }
+ return false;
+}
+
+void ARMExpandPseudo::CMSERestoreFPRegsV81(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs) {
+ if (!definesOrUsesFPReg(*MBBI)) {
+ // Load FP registers from stack.
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+
+ // Pop the stack space
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+ } else {
+ // Restore the floating point context.
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post),
+ ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(8)
+ .add(predOps(ARMCC::AL));
+
+ // Pop all the callee-saved registers (s16-s31).
+ MachineInstrBuilder VPOP =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
+ VPOP.addReg(Reg, RegState::Define);
+ }
+}
+
/// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
/// possible. This only gets used at -O0 so we don't care about efficiency of
/// the generated code.
@@ -1149,6 +1763,93 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
return true;
}
+static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int JumpReg,
+ const LivePhysRegs &LiveRegs, bool Thumb1Only) {
+ const DebugLoc &DL = MBBI->getDebugLoc();
+ if (Thumb1Only) { // push Lo and Hi regs separately
+ MachineInstrBuilder PushMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
+ PushMIB.addReg(
+ Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
+ }
+
+ // Thumb1 can only tPUSH low regs, so we copy the high regs to the low
+ // regs that we just saved and push the low regs again, taking care to
+ // not clobber JumpReg. If JumpReg is one of the low registers, push first
+ // the values of r9-r11, and then r8. That would leave them ordered in
+ // memory, and allow us to later pop them with a single instructions.
+ // FIXME: Could also use any of r0-r3 that are free (including in the
+ // first PUSH above).
+ for (int LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; --LoReg) {
+ if (JumpReg == LoReg)
+ continue;
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
+ .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef)
+ .add(predOps(ARMCC::AL));
+ --HiReg;
+ }
+ MachineInstrBuilder PushMIB2 =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
+ if (Reg == JumpReg)
+ continue;
+ PushMIB2.addReg(Reg, RegState::Kill);
+ }
+
+ // If we couldn't use a low register for temporary storage (because it was
+ // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been
+ // saved.
+ if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
+ int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
+ .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
+ .addReg(LoReg, RegState::Kill);
+ }
+ } else { // push Lo and Hi registers with a single instruction
+ MachineInstrBuilder PushMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
+ PushMIB.addReg(
+ Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
+ }
+ }
+}
+
+static void CMSEPopCalleeSaves(const TargetInstrInfo &TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int JumpReg,
+ bool Thumb1Only) {
+ const DebugLoc &DL = MBBI->getDebugLoc();
+ if (Thumb1Only) {
+ MachineInstrBuilder PopMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
+ for (int R = 0; R < 4; ++R) {
+ PopMIB.addReg(ARM::R4 + R, RegState::Define);
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R)
+ .addReg(ARM::R4 + R, RegState::Kill)
+ .add(predOps(ARMCC::AL));
+ }
+ MachineInstrBuilder PopMIB2 =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
+ for (int R = 0; R < 4; ++R)
+ PopMIB2.addReg(ARM::R4 + R, RegState::Define);
+ } else { // pop Lo and Hi registers with a single instruction
+ MachineInstrBuilder PopMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
+ PopMIB.addReg(Reg, RegState::Define);
+ }
+}
bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -1207,12 +1908,117 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Update call site info and delete the pseudo instruction TCRETURN.
- MBB.getParent()->moveCallSiteInfo(&MI, &*NewMI);
+ if (MI.isCandidateForCallSiteEntry())
+ MI.getMF()->moveCallSiteInfo(&MI, &*NewMI);
MBB.erase(MBBI);
MBBI = NewMI;
return true;
}
+ case ARM::tBXNS_RET: {
+ MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI);
+
+ if (STI->hasV8_1MMainlineOps()) {
+ // Restore the non-secure floating point context.
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(4)
+ .add(predOps(ARMCC::AL));
+ }
+
+ // Clear all GPR that are not a use of the return instruction.
+ assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) {
+ return !Op.isReg() || Op.getReg() != ARM::R12;
+ }));
+ SmallVector<unsigned, 5> ClearRegs;
+ determineGPRegsToClear(
+ *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs);
+ CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs,
+ ARM::LR);
+
+ MachineInstrBuilder NewMI =
+ BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(),
+ TII->get(ARM::tBXNS))
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ for (const MachineOperand &Op : MI.operands())
+ NewMI->addOperand(Op);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tBLXNS_CALL: {
+ DebugLoc DL = MBBI->getDebugLoc();
+ unsigned JumpReg = MBBI->getOperand(0).getReg();
+
+ // Figure out which registers are live at the point immediately before the
+ // call. When we indiscriminately push a set of registers, the live
+ // registers are added as ordinary use operands, whereas dead registers
+ // are "undef".
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(MBB);
+ for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse()))
+ LiveRegs.stepBackward(MI);
+ LiveRegs.stepBackward(*MBBI);
+
+ CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs,
+ AFI->isThumb1OnlyFunction());
+
+ SmallVector<unsigned, 16> ClearRegs;
+ determineGPRegsToClear(*MBBI,
+ {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
+ ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9,
+ ARM::R10, ARM::R11, ARM::R12},
+ ClearRegs);
+ auto OriginalClearRegs = ClearRegs;
+
+ // Get the first cleared register as a scratch (to use later with tBIC).
+ // We need to use the first so we can ensure it is a low register.
+ unsigned ScratchReg = ClearRegs.front();
+
+ // Clear LSB of JumpReg
+ if (AFI->isThumb2Function()) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg)
+ .addReg(JumpReg)
+ .addImm(1)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ } else {
+ // We need to use an extra register to cope with 8M Baseline,
+ // since we have saved all of the registers we are ok to trash a non
+ // argument register here.
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg)
+ .add(condCodeOp())
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(JumpReg)
+ .addReg(ScratchReg)
+ .add(predOps(ARMCC::AL));
+ }
+
+ CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs,
+ ClearRegs); // save+clear FP regs with ClearRegs
+ CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);
+
+ const MachineInstrBuilder NewCall =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr))
+ .add(predOps(ARMCC::AL))
+ .addReg(JumpReg, RegState::Kill);
+
+ for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
+ NewCall->addOperand(MI.getOperand(I));
+ if (MI.isCandidateForCallSiteEntry())
+ MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr());
+
+ CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers
+
+ CMSEPopCalleeSaves(*TII, MBB, MBBI, JumpReg, AFI->isThumb1OnlyFunction());
+
+ MI.eraseFromParent();
+ return true;
+ }
case ARM::VMOVHcc:
case ARM::VMOVScc:
case ARM::VMOVDcc: {
@@ -1359,17 +2165,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// If there's dynamic realignment, adjust for it.
if (RI.needsStackRealignment(MF)) {
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
assert (!AFI->isThumb1OnlyFunction());
// Emit bic r6, r6, MaxAlign
- assert(MaxAlign <= 256 && "The BIC instruction cannot encode "
- "immediates larger than 256 with all lower "
- "bits set.");
+ assert(MaxAlign <= Align(256) &&
+ "The BIC instruction cannot encode "
+ "immediates larger than 256 with all lower "
+ "bits set.");
unsigned bicOpc = AFI->isThumbFunction() ?
ARM::t2BICri : ARM::BICri;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6)
.addReg(ARM::R6, RegState::Kill)
- .addImm(MaxAlign - 1)
+ .addImm(MaxAlign.value() - 1)
.add(predOps(ARMCC::AL))
.add(condCodeOp());
}
@@ -1410,17 +2217,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
const bool Thumb = Opcode == ARM::tTPsoft;
MachineInstrBuilder MIB;
+ MachineFunction *MF = MBB.getParent();
if (STI->genLongCalls()) {
- MachineFunction *MF = MBB.getParent();
MachineConstantPool *MCP = MF->getConstantPool();
unsigned PCLabelID = AFI->createPICLabelUId();
MachineConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(MF->getFunction().getContext(),
"__aeabi_read_tp", PCLabelID, 0);
Register Reg = MI.getOperand(0).getReg();
- MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
- .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
+ MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
+ .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
if (!Thumb)
MIB.addImm(0);
MIB.add(predOps(ARMCC::AL));
@@ -1440,7 +2248,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.cloneMemRefs(MI);
TransferImpOps(MI, MIB, MIB);
- MI.getMF()->moveCallSiteInfo(&MI, &*MIB);
+ // Update the call site info.
+ if (MI.isCandidateForCallSiteEntry())
+ MF->moveCallSiteInfo(&MI, &*MIB);
MI.eraseFromParent();
return true;
}
@@ -1504,7 +2314,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg)
- .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
+ .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
if (IsARM)
MIB.addImm(0);
MIB.add(predOps(ARMCC::AL));
@@ -1952,6 +2762,24 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
+ case ARM::LOADDUAL:
+ case ARM::STOREDUAL: {
+ Register PairReg = MI.getOperand(0).getReg();
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
+ .addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
+ Opcode == ARM::LOADDUAL ? RegState::Define : 0)
+ .addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
+ Opcode == ARM::LOADDUAL ? RegState::Define : 0);
+ for (unsigned i = 1; i < MI.getNumOperands(); i++)
+ MIB.add(MI.getOperand(i));
+ MIB.add(predOps(ARMCC::AL));
+ MIB.cloneMemRefs(MI);
+ MI.eraseFromParent();
+ return true;
+ }
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
index 6e19db3c7e22..4bfca8a803ca 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -48,7 +48,6 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -209,7 +208,7 @@ class ARMFastISel final : public FastISel {
unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
unsigned ARMSelectCallOp(bool UseReg);
- unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
+ unsigned ARMLowerPICELF(const GlobalValue *GV, MVT VT);
const TargetLowering *getTargetLowering() { return &TLI; }
@@ -444,12 +443,8 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
if (!Subtarget->hasVFP2Base()) return false;
// MachineConstantPool wants an explicit alignment.
- unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
- if (Align == 0) {
- // TODO: Figure out if this is correct.
- Align = DL.getTypeAllocSize(CFP->getType());
- }
- unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ Align Alignment = DL.getPrefTypeAlign(CFP->getType());
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
@@ -508,12 +503,8 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
return 0;
// MachineConstantPool wants an explicit alignment.
- unsigned Align = DL.getPrefTypeAlignment(C->getType());
- if (Align == 0) {
- // TODO: Figure out if this is correct.
- Align = DL.getTypeAllocSize(C->getType());
- }
- unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+ Align Alignment = DL.getPrefTypeAlign(C->getType());
+ unsigned Idx = MCP.getConstantPoolIndex(C, Alignment);
ResultReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -570,14 +561,10 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));
} else {
// MachineConstantPool wants an explicit alignment.
- unsigned Align = DL.getPrefTypeAlignment(GV->getType());
- if (Align == 0) {
- // TODO: Figure out if this is correct.
- Align = DL.getTypeAllocSize(GV->getType());
- }
+ Align Alignment = DL.getPrefTypeAlign(GV->getType());
if (Subtarget->isTargetELF() && IsPositionIndependent)
- return ARMLowerPICELF(GV, Align, VT);
+ return ARMLowerPICELF(GV, VT);
// Grab index.
unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
@@ -585,7 +572,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
ARMCP::CPValue,
PCAdj);
- unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Alignment);
// Load value.
MachineInstrBuilder MIB;
@@ -882,7 +869,7 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
int Offset = Addr.Offset;
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
// Now add the rest of the operands.
MIB.addFrameIndex(FI);
@@ -2090,6 +2077,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
bool ARMFastISel::SelectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
+ const bool IsCmseNSEntry = F.hasFnAttribute("cmse_nonsecure_entry");
if (!FuncInfo.CanLowerReturn)
return false;
@@ -2166,8 +2154,17 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
RetRegs.push_back(VA.getLocReg());
}
+ unsigned RetOpc;
+ if (IsCmseNSEntry)
+ if (isThumb2)
+ RetOpc = ARM::tBXNS_RET;
+ else
+ llvm_unreachable("CMSE not valid for non-Thumb targets");
+ else
+ RetOpc = Subtarget->getReturnOpcode();
+
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Subtarget->getReturnOpcode()));
+ TII.get(RetOpc));
AddOptionalDefs(MIB);
for (unsigned R : RetRegs)
MIB.addReg(R, RegState::Implicit);
@@ -2239,7 +2236,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
if (!isTypeLegal(ArgTy, ArgVT)) return false;
ISD::ArgFlagsTy Flags;
- Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy)));
+ Flags.setOrigAlign(DL.getABITypeAlign(ArgTy));
Args.push_back(Op);
ArgRegs.push_back(Arg);
@@ -2293,7 +2290,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
bool ARMFastISel::SelectCall(const Instruction *I,
const char *IntrMemName = nullptr) {
const CallInst *CI = cast<CallInst>(I);
- const Value *Callee = CI->getCalledValue();
+ const Value *Callee = CI->getCalledOperand();
// Can't handle inline asm.
if (isa<InlineAsm>(Callee)) return false;
@@ -2302,12 +2299,11 @@ bool ARMFastISel::SelectCall(const Instruction *I,
if (CI->isTailCall()) return false;
// Check the calling convention.
- ImmutableCallSite CS(CI);
- CallingConv::ID CC = CS.getCallingConv();
+ CallingConv::ID CC = CI->getCallingConv();
// TODO: Avoid some calling conventions?
- FunctionType *FTy = CS.getFunctionType();
+ FunctionType *FTy = CI->getFunctionType();
bool isVarArg = FTy->isVarArg();
// Handle *simple* calls for now.
@@ -2334,47 +2330,46 @@ bool ARMFastISel::SelectCall(const Instruction *I,
SmallVector<Register, 8> ArgRegs;
SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
- unsigned arg_size = CS.arg_size();
+ unsigned arg_size = CI->arg_size();
Args.reserve(arg_size);
ArgRegs.reserve(arg_size);
ArgVTs.reserve(arg_size);
ArgFlags.reserve(arg_size);
- for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i) {
+ for (auto ArgI = CI->arg_begin(), ArgE = CI->arg_end(); ArgI != ArgE; ++ArgI) {
// If we're lowering a memory intrinsic instead of a regular call, skip the
// last argument, which shouldn't be passed to the underlying function.
- if (IntrMemName && e - i <= 1)
+ if (IntrMemName && ArgE - ArgI <= 1)
break;
ISD::ArgFlagsTy Flags;
- unsigned ArgIdx = i - CS.arg_begin();
- if (CS.paramHasAttr(ArgIdx, Attribute::SExt))
+ unsigned ArgIdx = ArgI - CI->arg_begin();
+ if (CI->paramHasAttr(ArgIdx, Attribute::SExt))
Flags.setSExt();
- if (CS.paramHasAttr(ArgIdx, Attribute::ZExt))
+ if (CI->paramHasAttr(ArgIdx, Attribute::ZExt))
Flags.setZExt();
// FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(ArgIdx, Attribute::InReg) ||
- CS.paramHasAttr(ArgIdx, Attribute::StructRet) ||
- CS.paramHasAttr(ArgIdx, Attribute::SwiftSelf) ||
- CS.paramHasAttr(ArgIdx, Attribute::SwiftError) ||
- CS.paramHasAttr(ArgIdx, Attribute::Nest) ||
- CS.paramHasAttr(ArgIdx, Attribute::ByVal))
+ if (CI->paramHasAttr(ArgIdx, Attribute::InReg) ||
+ CI->paramHasAttr(ArgIdx, Attribute::StructRet) ||
+ CI->paramHasAttr(ArgIdx, Attribute::SwiftSelf) ||
+ CI->paramHasAttr(ArgIdx, Attribute::SwiftError) ||
+ CI->paramHasAttr(ArgIdx, Attribute::Nest) ||
+ CI->paramHasAttr(ArgIdx, Attribute::ByVal))
return false;
- Type *ArgTy = (*i)->getType();
+ Type *ArgTy = (*ArgI)->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
ArgVT != MVT::i1)
return false;
- Register Arg = getRegForValue(*i);
+ Register Arg = getRegForValue(*ArgI);
if (!Arg.isValid())
return false;
- Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy)));
+ Flags.setOrigAlign(DL.getABITypeAlign(ArgTy));
- Args.push_back(*i);
+ Args.push_back(*ArgI);
ArgRegs.push_back(Arg);
ArgVTs.push_back(ArgVT);
ArgFlags.push_back(Flags);
@@ -2949,8 +2944,7 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
return true;
}
-unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
- unsigned Align, MVT VT) {
+unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MVT VT) {
bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
LLVMContext *Context = &MF->getFunction().getContext();
@@ -2961,12 +2955,12 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
/*AddCurrentAddress=*/UseGOT_PREL);
- unsigned ConstAlign =
- MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
+ Align ConstAlign =
+ MF->getDataLayout().getPrefTypeAlign(Type::getInt32PtrTy(*Context));
unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
MachineMemOperand *CPMMO =
MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand::MOLoad, 4, Align(4));
Register TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index cb98b2b34efd..8a8f3237bb6f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -142,27 +142,6 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
}
-static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII,
- const MCPhysReg *CSRegs) {
- // Integer spill area is handled with "pop".
- if (isPopOpcode(MI.getOpcode())) {
- // The first two operands are predicates. The last two are
- // imp-def and imp-use of SP. Check everything in between.
- for (int i = 5, e = MI.getNumOperands(); i != e; ++i)
- if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
- return false;
- return true;
- }
- if ((MI.getOpcode() == ARM::LDR_POST_IMM ||
- MI.getOpcode() == ARM::LDR_POST_REG ||
- MI.getOpcode() == ARM::t2LDR_POST) &&
- isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) &&
- MI.getOperand(1).getReg() == ARM::SP)
- return true;
-
- return false;
-}
-
static void emitRegPlusImmediate(
bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
@@ -256,9 +235,9 @@ struct StackAdjustingInsts {
if (HasFP && !Info.BeforeFPSet)
return;
- CFAOffset -= Info.SPAdjust;
+ CFAOffset += Info.SPAdjust;
unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, std::next(Info.I), dl,
TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
@@ -281,13 +260,13 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const unsigned Reg,
- const unsigned Alignment,
+ const Align Alignment,
const bool MustBeSingleInstruction) {
const ARMSubtarget &AST =
static_cast<const ARMSubtarget &>(MF.getSubtarget());
const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
- const unsigned AlignMask = Alignment - 1;
- const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+ const unsigned AlignMask = Alignment.value() - 1U;
+ const unsigned NrBitsToZero = Log2(Alignment);
assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
if (!AFI->isThumbFunction()) {
// if the BFC instruction is available, use that to zero the lower
@@ -343,14 +322,15 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
/// this to produce a conservative estimate that we check in an assert() later.
-static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
+static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) {
// For Thumb1, push.w isn't available, so the first push will always push
// r7 and lr onto the stack first.
if (AFI.isThumb1OnlyFunction())
return -AFI.getArgRegsSaveSize() - (2 * 4);
// This is a conservative estimation: Assume the frame pointer being r7 and
// pc("r15") up to r8 getting spilled before (= 8 registers).
- return -AFI.getArgRegsSaveSize() - (8 * 4);
+ int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
+ return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4);
}
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
@@ -367,10 +347,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
assert(!AFI->isThumb1OnlyFunction() &&
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align = STI.getFrameLowering()->getStackAlignment();
+ Align Alignment = STI.getFrameLowering()->getStackAlign();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI.getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ int FPCXTSaveSize = 0;
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -439,6 +420,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
FramePtrSpillFI = FI;
GPRCS1Size += 4;
break;
+ case ARM::FPCXTNS:
+ FPCXTSaveSize = 4;
+ break;
default:
// This is a DPR. Exclude the aligned DPRCS2 spills.
if (Reg == ARM::D8)
@@ -448,25 +432,35 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- // Move past area 1.
+ // Move past FPCXT area.
MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
+ if (FPCXTSaveSize > 0) {
+ LastPush = MBBI++;
+ DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
+ }
+
+ // Move past area 1.
if (GPRCS1Size > 0) {
GPRCS1Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
}
// Determine starting offsets of spill areas.
- unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
+ unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
+ unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
- unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
- unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
+ Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
+ unsigned DPRGapSize =
+ (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) %
+ DPRAlign.value();
+
unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
int FramePtrOffsetInPush = 0;
if (HasFP) {
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
- assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset &&
+ assert(getMaxFPOffset(STI, *AFI) <= FPOffset &&
"Max FP estimation is wrong");
- FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
+ FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
@@ -599,9 +593,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
PushSize + FramePtrOffsetInPush,
MachineInstr::FrameSetup);
if (FramePtrOffsetInPush + PushSize != 0) {
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
nullptr, MRI->getDwarfRegNum(FramePtr, true),
- -(ArgRegsSaveSize - FramePtrOffsetInPush)));
+ FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -707,6 +701,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
+ AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedGapSize(DPRGapSize);
@@ -717,7 +712,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// If aligned NEON registers were spilled, the stack has already been
// realigned.
if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
assert(!AFI->isThumb1OnlyFunction());
if (!AFI->isThumbFunction()) {
emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
@@ -793,20 +788,22 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize,
+ MachineInstr::FrameDestroy);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
if (MBBI != MBB.begin()) {
do {
--MBBI;
- } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs));
- if (!isCSRestore(*MBBI, TII, CSRegs))
+ } while (MBBI != MBB.begin() &&
+ MBBI->getFlag(MachineInstr::FrameDestroy));
+ if (!MBBI->getFlag(MachineInstr::FrameDestroy))
++MBBI;
}
// Move SP to start of FP callee save spill area.
NumBytes -= (ArgRegsSaveSize +
+ AFI->getFPCXTSaveAreaSize() +
AFI->getGPRCalleeSavedArea1Size() +
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedGapSize() +
@@ -819,7 +816,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (NumBytes) {
if (isARM)
emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
+ ARMCC::AL, 0, TII,
+ MachineInstr::FrameDestroy);
else {
// It's not possible to restore SP from FP in a single instruction.
// For iOS, this looks like:
@@ -831,10 +829,11 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
"No scratch register to restore SP from FP!");
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
+ ARMCC::AL, 0, TII, MachineInstr::FrameDestroy);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
.addReg(ARM::R4)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
}
} else {
// Thumb2 or ARM.
@@ -842,15 +841,18 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
.addReg(FramePtr)
.add(predOps(ARMCC::AL))
- .add(condCodeOp());
+ .add(condCodeOp())
+ .setMIFlag(MachineInstr::FrameDestroy);
else
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
.addReg(FramePtr)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
}
} else if (NumBytes &&
!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
+ MachineInstr::FrameDestroy);
// Increment past our save areas.
if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
@@ -863,31 +865,32 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->getDPRCalleeSavedGapSize()) {
assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
"unexpected DPR alignment gap");
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
+ MachineInstr::FrameDestroy);
}
if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
+ if (AFI->getFPCXTSaveAreaSize()) MBBI++;
}
if (ArgRegsSaveSize)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize,
+ MachineInstr::FrameDestroy);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
/// debug info. It's the same as what we use for resolving the code-gen
/// references for now. FIXME: This can go wrong when references are
/// SP-relative and simple call frames aren't used.
-int
-ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const {
+int ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
}
-int
-ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
- int FI, unsigned &FrameReg,
- int SPAdj) const {
+int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI, Register &FrameReg,
+ int SPAdj) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
@@ -969,10 +972,9 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
unsigned StmOpc, unsigned StrOpc,
- bool NoGap,
- bool(*Func)(unsigned, bool),
+ bool NoGap, bool (*Func)(unsigned, bool),
unsigned NumAlignedDPRCS2Regs,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
@@ -1047,10 +1049,10 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
unsigned LdmOpc, unsigned LdrOpc,
bool isVarArg, bool NoGap,
- bool(*Func)(unsigned, bool),
+ bool (*Func)(unsigned, bool),
unsigned NumAlignedDPRCS2Regs) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
@@ -1060,6 +1062,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool isTailCall = false;
bool isInterrupt = false;
bool isTrap = false;
+ bool isCmseEntry = false;
if (MBB.end() != MI) {
DL = MI->getDebugLoc();
unsigned RetOpcode = MI->getOpcode();
@@ -1069,6 +1072,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
isTrap =
RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
RetOpcode == ARM::tTRAP;
+ isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
}
SmallVector<unsigned, 4> Regs;
@@ -1086,7 +1090,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
- !isTrap && STI.hasV5TOps()) {
+ !isCmseEntry && !isTrap && STI.hasV5TOps()) {
if (MBB.succ_empty()) {
Reg = ARM::PC;
// Fold the return instruction into the LDM.
@@ -1119,7 +1123,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Regs.size() > 1 || LdrOpc == 0) {
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
.addReg(ARM::SP)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MachineInstr::FrameDestroy);
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
if (DeleteRet) {
@@ -1137,7 +1142,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
MachineInstrBuilder MIB =
BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
.addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP);
+ .addReg(ARM::SP)
+ .setMIFlags(MachineInstr::FrameDestroy);
// ARM mode needs an extra reg0 here due to addrmode2. Will go away once
// that refactoring is complete (eventually).
if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
@@ -1162,7 +1168,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned NumAlignedDPRCS2Regs,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1180,7 +1186,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
int FI = CSI[i].getFrameIdx();
// The even-numbered registers will be 16-byte aligned, the odd-numbered
// registers will be 8-byte aligned.
- MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
+ MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
// The stack slot for D8 needs to be maximally aligned because this is
// actually the point where we align the stack pointer. MachineFrameInfo
@@ -1189,7 +1195,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// over-alignment is not realized because the code inserted below adjusts
// the stack pointer by numregs * 8 before aligning the stack pointer.
if (DNum == 0)
- MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
+ MFI.setObjectAlignment(FI, MFI.getMaxAlign());
}
// Move the stack pointer to the d8 spill slot, and align it at the same
@@ -1212,7 +1218,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
.add(predOps(ARMCC::AL))
.add(condCodeOp());
- unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
+ Align MaxAlign = MF.getFrameInfo().getMaxAlign();
// We must set parameter MustBeSingleInstruction to true, since
// skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
// stack alignment. Luckily, this can always be done since all ARM
@@ -1335,7 +1341,7 @@ skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned NumAlignedDPRCS2Regs,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1422,10 +1428,9 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
}
-bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool ARMFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -1437,6 +1442,16 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
ARM::t2STR_PRE : ARM::STR_PRE_IMM;
unsigned FltOpc = ARM::VSTMDDB_UPD;
unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+ // Save the non-secure floating point context.
+ if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
+ return C.getReg() == ARM::FPCXTNS;
+ })) {
+ BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
+ ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(-4)
+ .add(predOps(ARMCC::AL));
+ }
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
MachineInstr::FrameSetup);
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
@@ -1453,10 +1468,9 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool ARMFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -1601,7 +1615,7 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
return;
// Don't bother if the default stack alignment is sufficiently high.
- if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8)
+ if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))
return;
// Aligned spills require stack realignment.
@@ -1630,6 +1644,16 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
SavedRegs.set(ARM::R4);
}
+bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+ // For CMSE entry functions, we want to save the FPCXT_NS immediately
+ // upon function entry (resp. restore it immmediately before return)
+ if (STI.hasV8_1MMainlineOps() &&
+ MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
+ return false;
+
+ return true;
+}
+
void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
@@ -1699,6 +1723,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (RegInfo->hasBasePointer(MF))
SavedRegs.set(RegInfo->getBaseRegister());
+ // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
+ if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
+ CanEliminateFrame = false;
+
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is modified.
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
@@ -1771,8 +1799,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
// Force LR to be spilled if the Thumb function size is > 2048. This enables
- // use of BL to implement far jump. If it turns out that it's not needed
- // then the branch fix up path will undo it.
+ // use of BL to implement far jump.
if (FnSize >= (1 << 11)) {
CanEliminateFrame = false;
ForceLRSpill = true;
@@ -1858,7 +1885,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
//
// We could do slightly better on Thumb1; in some cases, an sp-relative
// offset would be legal even though an fp-relative offset is not.
- int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
+ int MaxFPOffset = getMaxFPOffset(STI, *AFI);
bool HasLargeArgumentList =
HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
@@ -2045,8 +2072,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// of GPRs, spill one extra callee save GPR so we won't have to pad between
// the integer and double callee save areas.
LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
- unsigned TargetAlign = getStackAlignment();
- if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
+ const Align TargetAlign = getStackAlign();
+ if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
unsigned Reg = UnspilledCS1GPRs[i];
@@ -2083,7 +2110,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (BigFrameOffsets && !ExtraCSSpill) {
// If any non-reserved CS register isn't spilled, just spill one or two
// extra. That should take care of it!
- unsigned NumExtras = TargetAlign / 4;
+ unsigned NumExtras = TargetAlign.value() / 4;
SmallVector<unsigned, 2> Extras;
while (NumExtras && !UnspilledCS1GPRs.empty()) {
unsigned Reg = UnspilledCS1GPRs.back();
@@ -2117,16 +2144,15 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
const TargetRegisterClass &RC = ARM::GPRRegClass;
unsigned Size = TRI->getSpillSize(RC);
- unsigned Align = TRI->getSpillAlignment(RC);
- RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
+ Align Alignment = TRI->getSpillAlign(RC);
+ RS->addScavengingFrameIndex(
+ MFI.CreateStackObject(Size, Alignment, false));
}
}
}
- if (ForceLRSpill) {
+ if (ForceLRSpill)
SavedRegs.set(ARM::LR);
- AFI->setLRIsSpilledForFarJump(true);
- }
AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
}
@@ -2142,6 +2168,27 @@ void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
SavedRegs.set(ARM::R0);
}
+bool ARMFrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ // For CMSE entry functions, handle floating-point context as if it was a
+ // callee-saved register.
+ if (STI.hasV8_1MMainlineOps() &&
+ MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
+ CSI.emplace_back(ARM::FPCXTNS);
+ CSI.back().setRestored(false);
+ }
+
+ return false;
+}
+
+const TargetFrameLowering::SpillSlot *
+ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
+ NumEntries = array_lengthof(FixedSpillOffsets);
+ return FixedSpillOffsets;
+}
+
MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -2364,8 +2411,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Emit the relevant DWARF information about the change in stack pointer as
// well as where to find both r4 and r5 (the callee-save registers)
- CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8));
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
@@ -2409,7 +2455,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
MachineConstantPool *MCP = MF.getConstantPool();
- unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
+ unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
// ldr SR0, [pc, offset(STACK_LIMIT)]
BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
@@ -2507,8 +2553,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Emit the DWARF info about the change in stack as well as where to find the
// previous link register
- CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12));
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
@@ -2570,7 +2615,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
}
// Update the CFA offset now that we've popped
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@@ -2594,7 +2639,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
}
// Update the CFA offset now that we've popped
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
index 0462b01af707..4c2c07d64f57 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -9,9 +9,7 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
#define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
-#include <vector>
namespace llvm {
@@ -33,13 +31,14 @@ public:
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
bool keepFramePointer(const MachineFunction &MF) const override;
@@ -49,9 +48,9 @@ public:
bool hasReservedCallFrame(const MachineFunction &MF) const override;
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg, int SPAdj) const;
+ Register &FrameReg, int SPAdj) const;
void getCalleeSaves(const MachineFunction &MF,
BitVector &SavedRegs) const override;
@@ -62,25 +61,31 @@ public:
MachineBasicBlock &MBB) const override;
/// Returns true if the target will correctly handle shrink wrapping.
- bool enableShrinkWrapping(const MachineFunction &MF) const override {
- return true;
- }
+ bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
bool isProfitableForNoCSROpt(const Function &F) const override {
// The no-CSR optimisation is bad for code size on ARM, because we can save
// many registers with a single PUSH/POP pair.
return false;
}
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
+
+ const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
+
private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
- unsigned StrOpc, bool NoGap,
- bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs,
- unsigned MIFlags = 0) const;
+ ArrayRef<CalleeSavedInfo> CSI, unsigned StmOpc,
+ unsigned StrOpc, bool NoGap, bool (*Func)(unsigned, bool),
+ unsigned NumAlignedDPRCS2Regs, unsigned MIFlags = 0) const;
void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
+ MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc,
unsigned LdrOpc, bool isVarArg, bool NoGap,
- bool(*Func)(unsigned, bool),
+ bool (*Func)(unsigned, bool),
unsigned NumAlignedDPRCS2Regs) const;
MachineBasicBlock::iterator
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9b06987178d8..2a9a31dab74f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -145,6 +145,8 @@ public:
// Thumb 2 Addressing Modes:
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ template <unsigned Shift>
+ bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
@@ -237,6 +239,10 @@ private:
void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
+ /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
+ /// vector lanes.
+ void SelectMVE_VSHLC(SDNode *N, bool Predicated);
+
/// Select long MVE vector reductions with two vector operands
/// Stride is the number of vector element widths the instruction can operate
/// on:
@@ -264,7 +270,21 @@ private:
/// pointer points to a set of NumVecs sub-opcodes used for the
/// different stages (e.g. VLD20 versus VLD21) of each load family.
void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
- const uint16_t *const *Opcodes);
+ const uint16_t *const *Opcodes, bool HasWriteback);
+
+ /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
+ /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
+ void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
+ bool Wrapping, bool Predicated);
+
+ /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
+ /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
+ /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
+ /// the accumulator and the immediate operand, i.e. 0
+ /// for CX1*, 1 for CX2*, 2 for CX3*
+ /// \arg \c HasAccum whether the instruction has an accumulator operand
+ void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
+ bool HasAccum);
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
/// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
@@ -1171,8 +1191,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
// Only multiples of 4 are allowed for the offset, so the frame object
// alignment must be at least 4.
MachineFrameInfo &MFI = MF->getFrameInfo();
- if (MFI.getObjectAlignment(FI) < 4)
- MFI.setObjectAlignment(FI, 4);
+ if (MFI.getObjectAlign(FI) < Align(4))
+ MFI.setObjectAlignment(FI, Align(4));
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
@@ -1195,9 +1215,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
if (RHSC * 4 < MFI.getObjectSize(FI)) {
// For LHS+RHS to result in an offset that's a multiple of 4 the object
// indexed by the LHS must be 4-byte aligned.
- if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
- MFI.setObjectAlignment(FI, 4);
- if (MFI.getObjectAlignment(FI) >= 4) {
+ if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
+ MFI.setObjectAlignment(FI, Align(4));
+ if (MFI.getObjectAlign(FI) >= Align(4)) {
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
@@ -1294,6 +1314,33 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
return true;
}
+template <unsigned Shift>
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+ OffImm =
+ CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+}
+
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R - imm8 operands.
@@ -1679,7 +1726,7 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
EVT LoadedVT;
unsigned Opcode = 0;
bool isSExtLd, isPre;
- unsigned Align;
+ Align Alignment;
ARMVCC::VPTCodes Pred;
SDValue PredReg;
SDValue Chain, Base, Offset;
@@ -1695,7 +1742,7 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
Chain = LD->getChain();
Base = LD->getBasePtr();
Offset = LD->getOffset();
- Align = LD->getAlignment();
+ Alignment = LD->getAlign();
isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
Pred = ARMVCC::None;
@@ -1711,7 +1758,7 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
Chain = LD->getChain();
Base = LD->getBasePtr();
Offset = LD->getOffset();
- Align = LD->getAlignment();
+ Alignment = LD->getAlign();
isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
Pred = ARMVCC::Then;
@@ -1725,7 +1772,7 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
SDValue NewOffset;
- if (Align >= 2 && LoadedVT == MVT::v4i16 &&
+ if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
if (isSExtLd)
Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
@@ -1743,12 +1790,12 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
else
Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
- } else if (Align >= 4 &&
+ } else if (Alignment >= Align(4) &&
(CanChangeType || LoadedVT == MVT::v4i32 ||
LoadedVT == MVT::v4f32) &&
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
- else if (Align >= 2 &&
+ else if (Alignment >= Align(2) &&
(CanChangeType || LoadedVT == MVT::v8i16 ||
LoadedVT == MVT::v8f16) &&
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
@@ -1762,8 +1809,8 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
SDValue Ops[] = {Base, NewOffset,
CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
Chain};
- SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0),
- MVT::i32, MVT::Other, Ops);
+ SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
+ N->getValueType(0), MVT::Other, Ops);
transferMemOperands(N, New);
ReplaceUses(SDValue(N, 0), SDValue(New, 1));
ReplaceUses(SDValue(N, 1), SDValue(New, 0));
@@ -2009,6 +2056,7 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
+ assert(Subtarget->hasNEON());
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
SDLoc dl(N);
@@ -2030,6 +2078,7 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4f16:
+ case MVT::v4bf16:
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
@@ -2037,6 +2086,7 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
// Quad-register operations:
case MVT::v16i8: OpcodeIndex = 0; break;
case MVT::v8f16:
+ case MVT::v8bf16:
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
@@ -2148,6 +2198,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
+ assert(Subtarget->hasNEON());
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
SDLoc dl(N);
@@ -2172,6 +2223,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4f16:
+ case MVT::v4bf16:
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
@@ -2179,6 +2231,7 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
// Quad-register operations:
case MVT::v16i8: OpcodeIndex = 0; break;
case MVT::v8f16:
+ case MVT::v8bf16:
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
@@ -2299,6 +2352,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
unsigned NumVecs,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes) {
+ assert(Subtarget->hasNEON());
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
SDLoc dl(N);
@@ -2339,11 +2393,13 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
case MVT::v4f16:
+ case MVT::v4bf16:
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
// Quad-register operations:
case MVT::v8f16:
+ case MVT::v8bf16:
case MVT::v8i16: OpcodeIndex = 0; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 1; break;
@@ -2482,7 +2538,16 @@ void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
Ops.push_back(N->getOperand(0)); // chain
- CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
+ SmallVector<EVT, 8> VTs;
+ VTs.push_back(N->getValueType(1));
+ VTs.push_back(N->getValueType(0));
+ VTs.push_back(N->getValueType(2));
+
+ SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
+ ReplaceUses(SDValue(N, 0), SDValue(New, 1));
+ ReplaceUses(SDValue(N, 1), SDValue(New, 0));
+ ReplaceUses(SDValue(N, 2), SDValue(New, 2));
+ CurDAG->RemoveDeadNode(N);
}
void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
@@ -2552,6 +2617,25 @@ void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
}
+void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
+ SDLoc Loc(N);
+ SmallVector<SDValue, 8> Ops;
+
+ // One vector input, followed by a 32-bit word of bits to shift in
+ // and then an immediate shift count
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(N->getOperand(2));
+ int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+ Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
+
+ if (Predicated)
+ AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
+ else
+ AddEmptyMVEPredicateToOps(Ops, Loc);
+
+ CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops));
+}
+
static bool SDValueToConstBool(SDValue SDVal) {
assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
@@ -2644,7 +2728,8 @@ void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
}
void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
- const uint16_t *const *Opcodes) {
+ const uint16_t *const *Opcodes,
+ bool HasWriteback) {
EVT VT = N->getValueType(0);
SDLoc Loc(N);
@@ -2664,23 +2749,141 @@ void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
}
EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
- EVT ResultTys[] = {DataTy, MVT::Other};
+ SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
+ unsigned PtrOperand = HasWriteback ? 1 : 2;
auto Data = SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
SDValue Chain = N->getOperand(0);
- for (unsigned Stage = 0; Stage < NumVecs; ++Stage) {
- SDValue Ops[] = {Data, N->getOperand(2), Chain};
+ // Add a MVE_VLDn instruction for each Vec, except the last
+ for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
+ SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
auto LoadInst =
CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
Data = SDValue(LoadInst, 0);
Chain = SDValue(LoadInst, 1);
}
+ // The last may need a writeback on it
+ if (HasWriteback)
+ ResultTys = {DataTy, MVT::i32, MVT::Other};
+ SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
+ auto LoadInst =
+ CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
- for (unsigned i = 0; i < NumVecs; i++)
+ unsigned i;
+ for (i = 0; i < NumVecs; i++)
ReplaceUses(SDValue(N, i),
- CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, Data));
- ReplaceUses(SDValue(N, NumVecs), Chain);
+ CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
+ SDValue(LoadInst, 0)));
+ if (HasWriteback)
+ ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
+ ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
+ CurDAG->RemoveDeadNode(N);
+}
+
+void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
+ bool Wrapping, bool Predicated) {
+ EVT VT = N->getValueType(0);
+ SDLoc Loc(N);
+
+ uint16_t Opcode;
+ switch (VT.getScalarSizeInBits()) {
+ case 8:
+ Opcode = Opcodes[0];
+ break;
+ case 16:
+ Opcode = Opcodes[1];
+ break;
+ case 32:
+ Opcode = Opcodes[2];
+ break;
+ default:
+ llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ unsigned OpIdx = 1;
+
+ SDValue Inactive;
+ if (Predicated)
+ Inactive = N->getOperand(OpIdx++);
+
+ Ops.push_back(N->getOperand(OpIdx++)); // base
+ if (Wrapping)
+ Ops.push_back(N->getOperand(OpIdx++)); // limit
+
+ SDValue ImmOp = N->getOperand(OpIdx++); // step
+ int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue();
+ Ops.push_back(getI32Imm(ImmValue, Loc));
+
+ if (Predicated)
+ AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
+ else
+ AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
+
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
+}
+
+void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
+ size_t NumExtraOps, bool HasAccum) {
+ bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
+ SDLoc Loc(N);
+ SmallVector<SDValue, 8> Ops;
+
+ unsigned OpIdx = 1;
+
+ // Convert and append the immediate operand designating the coprocessor.
+ SDValue ImmCorpoc = N->getOperand(OpIdx++);
+ uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue();
+ Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
+
+ // For accumulating variants copy the low and high order parts of the
+ // accumulator into a register pair and add it to the operand vector.
+ if (HasAccum) {
+ SDValue AccLo = N->getOperand(OpIdx++);
+ SDValue AccHi = N->getOperand(OpIdx++);
+ if (IsBigEndian)
+ std::swap(AccLo, AccHi);
+ Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
+ }
+
+ // Copy extra operands as-is.
+ for (size_t I = 0; I < NumExtraOps; I++)
+ Ops.push_back(N->getOperand(OpIdx++));
+
+ // Convert and append the immediate operand
+ SDValue Imm = N->getOperand(OpIdx);
+ uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue();
+ Ops.push_back(getI32Imm(ImmVal, Loc));
+
+ // Accumulating variants are IT-predicable, add predicate operands.
+ if (HasAccum) {
+ SDValue Pred = getAL(CurDAG, Loc);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ Ops.push_back(Pred);
+ Ops.push_back(PredReg);
+ }
+
+ // Create the CDE intruction
+ SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
+ SDValue ResultPair = SDValue(InstrNode, 0);
+
+ // The original intrinsic had two outputs, and the output of the dual-register
+ // CDE instruction is a register pair. We need to extract the two subregisters
+ // and replace all uses of the original outputs with the extracted
+ // subregisters.
+ uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
+ if (IsBigEndian)
+ std::swap(SubRegs[0], SubRegs[1]);
+
+ for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
+ if (SDValue(N, ResIdx).use_empty())
+ continue;
+ SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
+ MVT::i32, ResultPair);
+ ReplaceUses(SDValue(N, ResIdx), SubReg);
+ }
+
CurDAG->RemoveDeadNode(N);
}
@@ -2689,6 +2892,7 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
const uint16_t *DOpcodes,
const uint16_t *QOpcodes0,
const uint16_t *QOpcodes1) {
+ assert(Subtarget->hasNEON());
assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
SDLoc dl(N);
@@ -2725,6 +2929,8 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
case MVT::v8i16:
case MVT::v4f16:
case MVT::v8f16:
+ case MVT::v4bf16:
+ case MVT::v8bf16:
OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32:
@@ -3202,7 +3408,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
MachineFunction& MF = CurDAG->getMachineFunction();
MachineMemOperand *MemOp =
MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand::MOLoad, 4, Align(4));
CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
@@ -3222,8 +3428,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// Set the alignment of the frame object to 4, to avoid having to generate
// more than one ADD
MachineFrameInfo &MFI = MF->getFrameInfo();
- if (MFI.getObjectAlignment(FI) < 4)
- MFI.setObjectAlignment(FI, 4);
+ if (MFI.getObjectAlign(FI) < Align(4))
+ MFI.setObjectAlignment(FI, Align(4));
CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
CurDAG->getTargetConstant(0, dl, MVT::i32));
return;
@@ -3486,6 +3692,59 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->RemoveDeadNode(N);
return;
}
+ case ARMISD::LDRD: {
+ if (Subtarget->isThumb2())
+ break; // TableGen handles isel in this case.
+ SDValue Base, RegOffset, ImmOffset;
+ const SDValue &Chain = N->getOperand(0);
+ const SDValue &Addr = N->getOperand(1);
+ SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
+ if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
+ // The register-offset variant of LDRD mandates that the register
+ // allocated to RegOffset is not reused in any of the remaining operands.
+ // This restriction is currently not enforced. Therefore emitting this
+ // variant is explicitly avoided.
+ Base = Addr;
+ RegOffset = CurDAG->getRegister(0, MVT::i32);
+ }
+ SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
+ SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
+ {MVT::Untyped, MVT::Other}, Ops);
+ SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
+ SDValue(New, 0));
+ SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
+ SDValue(New, 0));
+ transferMemOperands(N, New);
+ ReplaceUses(SDValue(N, 0), Lo);
+ ReplaceUses(SDValue(N, 1), Hi);
+ ReplaceUses(SDValue(N, 2), SDValue(New, 1));
+ CurDAG->RemoveDeadNode(N);
+ return;
+ }
+ case ARMISD::STRD: {
+ if (Subtarget->isThumb2())
+ break; // TableGen handles isel in this case.
+ SDValue Base, RegOffset, ImmOffset;
+ const SDValue &Chain = N->getOperand(0);
+ const SDValue &Addr = N->getOperand(3);
+ SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
+ if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
+ // The register-offset variant of STRD mandates that the register
+ // allocated to RegOffset is not reused in any of the remaining operands.
+ // This restriction is currently not enforced. Therefore emitting this
+ // variant is explicitly avoided.
+ Base = Addr;
+ RegOffset = CurDAG->getRegister(0, MVT::i32);
+ }
+ SDNode *RegPair =
+ createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
+ SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
+ SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
+ transferMemOperands(N, New);
+ ReplaceUses(SDValue(N, 0), SDValue(New, 0));
+ CurDAG->RemoveDeadNode(N);
+ return;
+ }
case ARMISD::LOOP_DEC: {
SDValue Ops[] = { N->getOperand(1),
N->getOperand(2),
@@ -3828,14 +4087,24 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2_UPD: {
- static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
- ARM::VLD2d16wb_fixed,
- ARM::VLD2d32wb_fixed,
- ARM::VLD1q64wb_fixed};
- static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
- ARM::VLD2q16PseudoWB_fixed,
- ARM::VLD2q32PseudoWB_fixed };
- SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ if (Subtarget->hasNEON()) {
+ static const uint16_t DOpcodes[] = {
+ ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
+ ARM::VLD2q16PseudoWB_fixed,
+ ARM::VLD2q32PseudoWB_fixed};
+ SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ } else {
+ static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
+ ARM::MVE_VLD21_8_wb};
+ static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
+ ARM::MVE_VLD21_16_wb};
+ static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
+ ARM::MVE_VLD21_32_wb};
+ static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
+ SelectMVE_VLD(N, 2, Opcodes, true);
+ }
return;
}
@@ -3855,17 +4124,30 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD4_UPD: {
- static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
- ARM::VLD4d16Pseudo_UPD,
- ARM::VLD4d32Pseudo_UPD,
- ARM::VLD1d64QPseudoWB_fixed};
- static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
- ARM::VLD4q16Pseudo_UPD,
- ARM::VLD4q32Pseudo_UPD };
- static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
- ARM::VLD4q16oddPseudo_UPD,
- ARM::VLD4q32oddPseudo_UPD };
- SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ if (Subtarget->hasNEON()) {
+ static const uint16_t DOpcodes[] = {
+ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
+ ARM::VLD1d64QPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD};
+ static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD};
+ SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ } else {
+ static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
+ ARM::MVE_VLD42_8,
+ ARM::MVE_VLD43_8_wb};
+ static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
+ ARM::MVE_VLD42_16,
+ ARM::MVE_VLD43_16_wb};
+ static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
+ ARM::MVE_VLD42_32,
+ ARM::MVE_VLD43_32_wb};
+ static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
+ SelectMVE_VLD(N, 4, Opcodes, true);
+ }
return;
}
@@ -3913,15 +4195,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VST2_UPD: {
- static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
- ARM::VST2d16wb_fixed,
- ARM::VST2d32wb_fixed,
- ARM::VST1q64wb_fixed};
- static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
- ARM::VST2q16PseudoWB_fixed,
- ARM::VST2q32PseudoWB_fixed };
- SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
- return;
+ if (Subtarget->hasNEON()) {
+ static const uint16_t DOpcodes[] = {
+ ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
+ ARM::VST1q64wb_fixed};
+ static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed};
+ SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ return;
+ }
+ break;
}
case ARMISD::VST3_UPD: {
@@ -3940,18 +4224,20 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VST4_UPD: {
- static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
- ARM::VST4d16Pseudo_UPD,
- ARM::VST4d32Pseudo_UPD,
- ARM::VST1d64QPseudoWB_fixed};
- static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
- ARM::VST4q16Pseudo_UPD,
- ARM::VST4q32Pseudo_UPD };
- static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
- ARM::VST4q16oddPseudo_UPD,
- ARM::VST4q32oddPseudo_UPD };
- SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
- return;
+ if (Subtarget->hasNEON()) {
+ static const uint16_t DOpcodes[] = {
+ ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
+ ARM::VST1d64QPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD};
+ static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD};
+ SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+ break;
}
case ARMISD::VST2LN_UPD: {
@@ -4430,7 +4716,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
ARM::MVE_VLD21_32};
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
- SelectMVE_VLD(N, 2, Opcodes);
+ SelectMVE_VLD(N, 2, Opcodes, false);
return;
}
@@ -4444,7 +4730,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
ARM::MVE_VLD42_32,
ARM::MVE_VLD43_32};
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
- SelectMVE_VLD(N, 4, Opcodes);
+ SelectMVE_VLD(N, 4, Opcodes, false);
return;
}
}
@@ -4457,6 +4743,29 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
default:
break;
+ // Scalar f32 -> bf16
+ case Intrinsic::arm_neon_vcvtbfp2bf: {
+ SDLoc dl(N);
+ const SDValue &Src = N->getOperand(1);
+ llvm::EVT DestTy = N->getValueType(0);
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { Src, Src, Pred, Reg0 };
+ CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
+ return;
+ }
+
+ // Vector v4f32 -> v4bf16
+ case Intrinsic::arm_neon_vcvtfp2bf: {
+ SDLoc dl(N);
+ const SDValue &Src = N->getOperand(1);
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { Src, Pred, Reg0 };
+ CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
+ return;
+ }
+
case Intrinsic::arm_mve_urshrl:
SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
return;
@@ -4475,18 +4784,21 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_mve_sqrshrl:
SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
return;
- case Intrinsic::arm_mve_lsll:
- SelectMVE_LongShift(N, ARM::MVE_LSLLr, false, false);
- return;
- case Intrinsic::arm_mve_asrl:
- SelectMVE_LongShift(N, ARM::MVE_ASRLr, false, false);
- return;
case Intrinsic::arm_mve_vadc:
case Intrinsic::arm_mve_vadc_predicated:
SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
IntNo == Intrinsic::arm_mve_vadc_predicated);
return;
+ case Intrinsic::arm_mve_vsbc:
+ case Intrinsic::arm_mve_vsbc_predicated:
+ SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
+ IntNo == Intrinsic::arm_mve_vsbc_predicated);
+ return;
+ case Intrinsic::arm_mve_vshlc:
+ case Intrinsic::arm_mve_vshlc_predicated:
+ SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
+ return;
case Intrinsic::arm_mve_vmlldava:
case Intrinsic::arm_mve_vmlldava_predicated: {
@@ -4524,6 +4836,80 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
OpcodesS, OpcodesU);
return;
}
+
+ case Intrinsic::arm_mve_vidup:
+ case Intrinsic::arm_mve_vidup_predicated: {
+ static const uint16_t Opcodes[] = {
+ ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
+ };
+ SelectMVE_VxDUP(N, Opcodes, false,
+ IntNo == Intrinsic::arm_mve_vidup_predicated);
+ return;
+ }
+
+ case Intrinsic::arm_mve_vddup:
+ case Intrinsic::arm_mve_vddup_predicated: {
+ static const uint16_t Opcodes[] = {
+ ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
+ };
+ SelectMVE_VxDUP(N, Opcodes, false,
+ IntNo == Intrinsic::arm_mve_vddup_predicated);
+ return;
+ }
+
+ case Intrinsic::arm_mve_viwdup:
+ case Intrinsic::arm_mve_viwdup_predicated: {
+ static const uint16_t Opcodes[] = {
+ ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
+ };
+ SelectMVE_VxDUP(N, Opcodes, true,
+ IntNo == Intrinsic::arm_mve_viwdup_predicated);
+ return;
+ }
+
+ case Intrinsic::arm_mve_vdwdup:
+ case Intrinsic::arm_mve_vdwdup_predicated: {
+ static const uint16_t Opcodes[] = {
+ ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
+ };
+ SelectMVE_VxDUP(N, Opcodes, true,
+ IntNo == Intrinsic::arm_mve_vdwdup_predicated);
+ return;
+ }
+
+ case Intrinsic::arm_cde_cx1d:
+ case Intrinsic::arm_cde_cx1da:
+ case Intrinsic::arm_cde_cx2d:
+ case Intrinsic::arm_cde_cx2da:
+ case Intrinsic::arm_cde_cx3d:
+ case Intrinsic::arm_cde_cx3da: {
+ bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
+ IntNo == Intrinsic::arm_cde_cx2da ||
+ IntNo == Intrinsic::arm_cde_cx3da;
+ size_t NumExtraOps;
+ uint16_t Opcode;
+ switch (IntNo) {
+ case Intrinsic::arm_cde_cx1d:
+ case Intrinsic::arm_cde_cx1da:
+ NumExtraOps = 0;
+ Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
+ break;
+ case Intrinsic::arm_cde_cx2d:
+ case Intrinsic::arm_cde_cx2da:
+ NumExtraOps = 1;
+ Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
+ break;
+ case Intrinsic::arm_cde_cx3d:
+ case Intrinsic::arm_cde_cx3da:
+ NumExtraOps = 2;
+ Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
+ break;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+ SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
+ return;
+ }
}
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9f504b1eaa42..287e2e60e572 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -210,6 +210,8 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
if (!VT.isFloatingPoint() &&
VT != MVT::v2i64 && VT != MVT::v1i64)
@@ -284,6 +286,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
// Vector reductions
@@ -292,6 +296,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
+ setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
if (!HasMVEFP) {
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
@@ -341,6 +349,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
setOperationAction(ISD::FROUND, VT, Legal);
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
// No native support for these.
setOperationAction(ISD::FDIV, VT, Expand);
@@ -358,6 +370,17 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
}
}
+ // Custom Expand smaller than legal vector reductions to prevent false zero
+ // items being added.
+ setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
+ setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
+ setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
+ setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
+ setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
+ setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
+ setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
+ setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
+
// We 'support' these types up to bitcast/load/store level, regardless of
// MVE integer-only / float support. Only doing FP data processing on the FP
// vector types is inhibited at integer-only level.
@@ -717,13 +740,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasFullFP16()) {
addRegisterClass(MVT::f16, &ARM::HPRRegClass);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
- setOperationAction(ISD::BITCAST, MVT::i32, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
}
+ if (Subtarget->hasBF16()) {
+ addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
+ setAllExpand(MVT::bf16);
+ if (!Subtarget->hasFullFP16())
+ setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
+ }
+
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
@@ -771,6 +800,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
addQRTypeForNEON(MVT::v8f16);
addDRTypeForNEON(MVT::v4f16);
}
+
+ if (Subtarget->hasBF16()) {
+ addQRTypeForNEON(MVT::v8bf16);
+ addDRTypeForNEON(MVT::v4bf16);
+ }
}
if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
@@ -912,9 +946,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::v4f32, Expand);
}
- setTargetDAGCombine(ISD::INTRINSIC_VOID);
- setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SRA);
@@ -938,10 +969,24 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::VECREDUCE_ADD);
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::BITCAST);
+ }
+ if (Subtarget->hasMVEIntegerOps()) {
+ setTargetDAGCombine(ISD::SMIN);
+ setTargetDAGCombine(ISD::UMIN);
+ setTargetDAGCombine(ISD::SMAX);
+ setTargetDAGCombine(ISD::UMAX);
+ setTargetDAGCombine(ISD::FP_EXTEND);
}
if (!Subtarget->hasFP64()) {
@@ -1073,6 +1118,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRA, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::STORE, MVT::i64, Custom);
// MVE lowers 64 bit shifts to lsll and lsrl
// assuming that ISD::SRL and SRA of i64 are already marked custom
@@ -1419,12 +1466,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
if (Subtarget->hasNEON()) {
- // vmin and vmax aren't available in a scalar form, so we use
- // a NEON instruction with an undef lane instead.
- setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
- setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
- setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
+ // vmin and vmax aren't available in a scalar form, so we can use
+ // a NEON instruction with an undef lane instead. This has a performance
+ // penalty on some cores, so we don't do this unless we have been
+ // asked to by the core tuning model.
+ if (Subtarget->useNEONForSinglePrecisionFP()) {
+ setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
+ }
setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
@@ -1452,6 +1503,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
+ if (Subtarget->hasMVEIntegerOps())
+ setTargetDAGCombine(ISD::VSELECT);
+
if (Subtarget->hasV6Ops())
setTargetDAGCombine(ISD::SRL);
if (Subtarget->isThumb1Only())
@@ -1550,10 +1604,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CALL: return "ARMISD::CALL";
case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
+ case ARMISD::tSECALL: return "ARMISD::tSECALL";
case ARMISD::BRCOND: return "ARMISD::BRCOND";
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::SERET_FLAG: return "ARMISD::SERET_FLAG";
case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
@@ -1606,10 +1662,14 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+ case ARMISD::LDRD: return "ARMISD::LDRD";
+ case ARMISD::STRD: return "ARMISD::STRD";
+
case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST";
+ case ARMISD::VECTOR_REG_CAST: return "ARMISD::VECTOR_REG_CAST";
case ARMISD::VCMP: return "ARMISD::VCMP";
case ARMISD::VCMPZ: return "ARMISD::VCMPZ";
case ARMISD::VTST: return "ARMISD::VTST";
@@ -1650,8 +1710,28 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VTBL1: return "ARMISD::VTBL1";
case ARMISD::VTBL2: return "ARMISD::VTBL2";
case ARMISD::VMOVN: return "ARMISD::VMOVN";
+ case ARMISD::VQMOVNs: return "ARMISD::VQMOVNs";
+ case ARMISD::VQMOVNu: return "ARMISD::VQMOVNu";
+ case ARMISD::VCVTN: return "ARMISD::VCVTN";
+ case ARMISD::VCVTL: return "ARMISD::VCVTL";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::VADDVs: return "ARMISD::VADDVs";
+ case ARMISD::VADDVu: return "ARMISD::VADDVu";
+ case ARMISD::VADDLVs: return "ARMISD::VADDLVs";
+ case ARMISD::VADDLVu: return "ARMISD::VADDLVu";
+ case ARMISD::VADDLVAs: return "ARMISD::VADDLVAs";
+ case ARMISD::VADDLVAu: return "ARMISD::VADDLVAu";
+ case ARMISD::VADDLVps: return "ARMISD::VADDLVps";
+ case ARMISD::VADDLVpu: return "ARMISD::VADDLVpu";
+ case ARMISD::VADDLVAps: return "ARMISD::VADDLVAps";
+ case ARMISD::VADDLVApu: return "ARMISD::VADDLVApu";
+ case ARMISD::VMLAVs: return "ARMISD::VMLAVs";
+ case ARMISD::VMLAVu: return "ARMISD::VMLAVu";
+ case ARMISD::VMLALVs: return "ARMISD::VMLALVs";
+ case ARMISD::VMLALVu: return "ARMISD::VMLALVu";
+ case ARMISD::VMLALVAs: return "ARMISD::VMLALVAs";
+ case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu";
case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
@@ -1955,6 +2035,35 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
}
}
+SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
+ MVT LocVT, MVT ValVT, SDValue Val) const {
+ Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
+ Val);
+ if (Subtarget->hasFullFP16()) {
+ Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
+ } else {
+ Val = DAG.getNode(ISD::TRUNCATE, dl,
+ MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
+ Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
+ }
+ return Val;
+}
+
+SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
+ MVT LocVT, MVT ValVT,
+ SDValue Val) const {
+ if (Subtarget->hasFullFP16()) {
+ Val = DAG.getNode(ARMISD::VMOVrh, dl,
+ MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
+ } else {
+ Val = DAG.getNode(ISD::BITCAST, dl,
+ MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
+ Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
+ MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
+}
+
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue ARMTargetLowering::LowerCallResult(
@@ -1982,7 +2091,8 @@ SDValue ARMTargetLowering::LowerCallResult(
}
SDValue Val;
- if (VA.needsCustom()) {
+ if (VA.needsCustom() &&
+ (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
// Handle f64 or half of a v2f64.
SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag);
@@ -2031,6 +2141,13 @@ SDValue ARMTargetLowering::LowerCallResult(
break;
}
+ // f16 arguments have their size extended to 4 bytes and passed as if they
+ // had been copied to the LSBs of a 32-bit register.
+ // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
+ if (VA.needsCustom() &&
+ (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
+ Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
+
InVals.push_back(Val);
}
@@ -2097,22 +2214,34 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFunction::CallSiteInfo CSInfo;
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
+ bool isCmseNSCall = false;
bool PreferIndirect = false;
+ // Determine whether this is a non-secure function call.
+ if (CLI.CB && CLI.CB->getAttributes().hasFnAttribute("cmse_nonsecure_call"))
+ isCmseNSCall = true;
+
// Disable tail calls if they're not supported.
if (!Subtarget->supportsTailCall())
isTailCall = false;
+ // For both the non-secure calls and the returns from a CMSE entry function,
+ // the function needs to do some extra work afte r the call, or before the
+ // return, respectively, thus it cannot end with atail call
+ if (isCmseNSCall || AFI->isCmseNSEntryFunction())
+ isTailCall = false;
+
if (isa<GlobalAddressSDNode>(Callee)) {
// If we're optimizing for minimum size and the function is called three or
// more times in this block, we can improve codesize by calling indirectly
// as BLXr has a 16-bit encoding.
auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
- if (CLI.CS) {
- auto *BB = CLI.CS.getParent();
+ if (CLI.CB) {
+ auto *BB = CLI.CB->getParent();
PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
count_if(GV->users(), [&BB](const User *U) {
return isa<Instruction>(U) &&
@@ -2126,7 +2255,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Callee, CallConv, isVarArg, isStructRet,
MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
PreferIndirect);
- if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
+ if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
// We don't support GuaranteedTailCallOpt for ARM, only automatically
@@ -2187,31 +2316,50 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
break;
}
- // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
- if (VA.needsCustom()) {
- if (VA.getLocVT() == MVT::v2f64) {
- SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
- DAG.getConstant(0, dl, MVT::i32));
- SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
- DAG.getConstant(1, dl, MVT::i32));
-
- PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
- VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
-
- VA = ArgLocs[++i]; // skip ahead to next loc
- if (VA.isRegLoc()) {
- PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
- VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
- } else {
- assert(VA.isMemLoc());
+ // f16 arguments have their size extended to 4 bytes and passed as if they
+ // had been copied to the LSBs of a 32-bit register.
+ // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
+ if (VA.needsCustom() &&
+ (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
+ Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
+ } else {
+ // f16 arguments could have been extended prior to argument lowering.
+ // Mask them arguments if this is a CMSE nonsecure call.
+ auto ArgVT = Outs[realArgIdx].ArgVT;
+ if (isCmseNSCall && (ArgVT == MVT::f16)) {
+ auto LocBits = VA.getLocVT().getSizeInBits();
+ auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
+ SDValue Mask =
+ DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
+ Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ }
+ }
- MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
- dl, DAG, VA, Flags));
- }
- } else {
- PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+ // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
+ if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
+ SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+ DAG.getConstant(1, dl, MVT::i32));
+
+ PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
+ StackPtr, MemOpChains, Flags);
+
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isRegLoc()) {
+ PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
StackPtr, MemOpChains, Flags);
+ } else {
+ assert(VA.isMemLoc());
+
+ MemOpChains.push_back(
+ LowerMemOpCallTo(Chain, StackPtr, Op1, dl, DAG, VA, Flags));
}
+ } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
+ PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+ StackPtr, MemOpChains, Flags);
} else if (VA.isRegLoc()) {
if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
Outs[0].VT == MVT::i32) {
@@ -2222,7 +2370,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isThisReturn = true;
}
const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.EnableDebugEntryValues)
+ if (Options.EmitCallSiteInfo)
CSInfo.emplace_back(VA.getLocReg(), i);
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else if (isByVal) {
@@ -2245,9 +2393,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
- SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
- MachinePointerInfo(),
- DAG.InferPtrAlignment(AddArg));
+ SDValue Load =
+ DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
+ DAG.InferPtrAlign(AddArg));
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
@@ -2268,8 +2416,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
MVT::i32);
- SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
- MVT::i32);
+ SDValue AlignNode =
+ DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
@@ -2311,7 +2459,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
bool isLocalARMFunc = false;
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
auto PtrVt = getPointerTy(DAG.getDataLayout());
if (Subtarget->genLongCalls()) {
@@ -2327,7 +2474,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
// Get the address of the callee into a register
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
@@ -2341,7 +2488,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
ARMPCLabelIndex, 0);
// Get the address of the callee into a register
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
@@ -2393,7 +2540,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMConstantPoolValue *CPV =
ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
ARMPCLabelIndex, 4);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
@@ -2405,10 +2552,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
+ if (isCmseNSCall) {
+ assert(!isARMFunc && !isDirect &&
+ "Cannot handle call to ARM function or direct call");
+ if (NumBytes > 0) {
+ DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(),
+ "call to non-secure function would "
+ "require passing arguments on stack",
+ dl.getDebugLoc());
+ DAG.getContext()->diagnose(Diag);
+ }
+ if (isStructRet) {
+ DiagnosticInfoUnsupported Diag(
+ DAG.getMachineFunction().getFunction(),
+ "call to non-secure function would return value through pointer",
+ dl.getDebugLoc());
+ DAG.getContext()->diagnose(Diag);
+ }
+ }
+
// FIXME: handle tail calls differently.
unsigned CallOpc;
if (Subtarget->isThumb()) {
- if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
+ if (isCmseNSCall)
+ CallOpc = ARMISD::tSECALL;
+ else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
else
CallOpc = ARMISD::CALL;
@@ -2468,6 +2636,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
InFlag = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
@@ -2488,15 +2657,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// and then confiscate the rest of the parameter registers to insure
/// this.
void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
- unsigned Align) const {
+ Align Alignment) const {
// Byval (as with any stack) slots are always at least 4 byte aligned.
- Align = std::max(Align, 4U);
+ Alignment = std::max(Alignment, Align(4));
unsigned Reg = State->AllocateReg(GPRArgRegs);
if (!Reg)
return;
- unsigned AlignInRegs = Align / 4;
+ unsigned AlignInRegs = Alignment.value() / 4;
unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
for (unsigned i = 0; i < Waste; ++i)
Reg = State->AllocateReg(GPRArgRegs);
@@ -2635,9 +2804,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
// Check that the call results are passed in the same way.
LLVMContext &C = *DAG.getContext();
- if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
- CCAssignFnForReturn(CalleeCC, isVarArg),
- CCAssignFnForReturn(CallerCC, isVarArg)))
+ if (!CCState::resultsCompatible(
+ getEffectiveCallingConv(CalleeCC, isVarArg),
+ getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
+ CCAssignFnForReturn(CalleeCC, isVarArg),
+ CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
return false;
// The callee has to preserve all registers the caller needs to preserve.
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -2678,7 +2849,7 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
- if (VA.needsCustom()) {
+ if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
// f64 and vector types are split into multiple registers or
// register/stack-slot combinations. The types will not match
// the registers; give up on memory f64 refs until we figure
@@ -2777,6 +2948,17 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
AFI->setReturnRegsCount(RVLocs.size());
+ // Report error if cmse entry function returns structure through first ptr arg.
+ if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
+ // Note: using an empty SDLoc(), as the first line of the function is a
+ // better place to report than the last line.
+ DiagnosticInfoUnsupported Diag(
+ DAG.getMachineFunction().getFunction(),
+ "secure entry function would return value through pointer",
+ SDLoc().getDebugLoc());
+ DAG.getContext()->diagnose(Diag);
+ }
+
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
@@ -2819,7 +3001,24 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
break;
}
- if (VA.needsCustom()) {
+ // Mask f16 arguments if this is a CMSE nonsecure entry.
+ auto RetVT = Outs[realRVLocIdx].ArgVT;
+ if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
+ if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
+ Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
+ } else {
+ auto LocBits = VA.getLocVT().getSizeInBits();
+ auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
+ SDValue Mask =
+ DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
+ Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ }
+ }
+
+ if (VA.needsCustom() &&
+ (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
if (VA.getLocVT() == MVT::v2f64) {
// Extract the first half and return it in two registers.
SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
@@ -2827,15 +3026,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Half);
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- HalfGPRs.getValue(isLittleEndian ? 0 : 1),
- Flag);
+ Chain =
+ DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- HalfGPRs.getValue(isLittleEndian ? 1 : 0),
- Flag);
+ Chain =
+ DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
@@ -2849,22 +3048,20 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- fmrrd.getValue(isLittleEndian ? 0 : 1),
- Flag);
+ fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- fmrrd.getValue(isLittleEndian ? 1 : 0),
- Flag);
+ fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
} else
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
// Guarantee that all emitted copies are
// stuck together, avoiding something bad.
Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(VA.getLocReg(),
- ReturnF16 ? MVT::f16 : VA.getLocVT()));
+ RetOps.push_back(DAG.getRegister(
+ VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
}
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *I =
@@ -2898,7 +3095,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return LowerInterruptReturn(RetOps, dl, DAG);
}
- return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
+ ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG :
+ ARMISD::RET_FLAG;
+ return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
}
bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -3040,11 +3239,10 @@ SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
}
if (CP->isMachineConstantPoolEntry())
- Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
- CP->getAlignment());
+ Res =
+ DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
else
- Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
- CP->getAlignment());
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign());
return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
}
@@ -3063,14 +3261,14 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SDValue CPAddr;
bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
if (!IsPositionIndependent) {
- CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
+ CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
} else {
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
ARMCP::CPBlockAddress, PCAdj);
- CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
SDValue Result = DAG.getLoad(
@@ -3199,8 +3397,9 @@ ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
const auto *GA = cast<GlobalAddressSDNode>(Op);
auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
SDValue Offset = DAG.getLoad(
- PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
- DAG.getTargetConstantPool(CPV, PtrVT, 4)),
+ PtrVT, DL, Chain,
+ DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
+ DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
@@ -3219,7 +3418,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
- SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
Argument = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), Argument,
@@ -3270,7 +3469,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
true);
- Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(
PtrVT, dl, Chain, Offset,
@@ -3288,7 +3487,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
assert(model == TLSModel::LocalExec);
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
- Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(
PtrVT, dl, Chain, Offset,
@@ -3391,11 +3590,11 @@ static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
// that are strings for simplicity.
auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
- unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
+ Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
unsigned RequiredPadding = 4 - (Size % 4);
bool PaddingPossible =
RequiredPadding == 4 || (CDAInit && CDAInit->isString());
- if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
+ if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
Size == 0)
return SDValue();
@@ -3434,8 +3633,7 @@ static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
}
auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
- SDValue CPAddr =
- DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
AFI->markGlobalAsPromotedToConstantPool(GVar);
AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
@@ -3505,7 +3703,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
} else { // use literal pool for address constant
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
RelAddr = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
@@ -3525,7 +3723,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
DAG.getTargetGlobalAddress(GV, dl, PtrVT));
} else {
- SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
return DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
@@ -3636,7 +3834,7 @@ SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
SDValue ReturnAddress =
DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
- std::vector<EVT> ResultTys = {MVT::Other, MVT::Glue};
+ constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
SDValue Callee =
DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
SDValue RegisterMask = DAG.getRegisterMask(Mask);
@@ -3720,7 +3918,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
ARMCP::CPLSDA, PCAdj);
- CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
@@ -3782,6 +3980,15 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
case Intrinsic::arm_mve_pred_v2i:
return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
Op.getOperand(1));
+ case Intrinsic::arm_mve_vreinterpretq:
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+ case Intrinsic::arm_mve_lsll:
+ return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::arm_mve_asrl:
+ return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
}
}
@@ -3982,6 +4189,42 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
AFI->setVarArgsFrameIndex(FrameIndex);
}
+bool ARMTargetLowering::splitValueIntoRegisterParts(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
+ unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
+ bool IsABIRegCopy = CC.hasValue();
+ EVT ValueVT = Val.getValueType();
+ if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
+ PartVT == MVT::f32) {
+ unsigned ValueBits = ValueVT.getSizeInBits();
+ unsigned PartBits = PartVT.getSizeInBits();
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ Parts[0] = Val;
+ return true;
+ }
+ return false;
+}
+
+SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
+ SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
+ bool IsABIRegCopy = CC.hasValue();
+ if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
+ PartVT == MVT::f32) {
+ unsigned ValueBits = ValueVT.getSizeInBits();
+ unsigned PartBits = PartVT.getSizeInBits();
+ SDValue Val = Parts[0];
+
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ return Val;
+ }
+ return SDValue();
+}
+
SDValue ARMTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -4054,44 +4297,41 @@ SDValue ARMTargetLowering::LowerFormalArguments(
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
- if (VA.needsCustom()) {
+ if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
// f64 and vector types are split up into multiple registers or
// combinations of registers and stack slots.
- if (VA.getLocVT() == MVT::v2f64) {
- SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
- Chain, DAG, dl);
- VA = ArgLocs[++i]; // skip ahead to next loc
- SDValue ArgValue2;
- if (VA.isMemLoc()) {
- int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), FI));
- } else {
- ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
- Chain, DAG, dl);
- }
- ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
- ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
- ArgValue, ArgValue1,
- DAG.getIntPtrConstant(0, dl));
- ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
- ArgValue, ArgValue2,
- DAG.getIntPtrConstant(1, dl));
- } else
- ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+ SDValue ArgValue1 =
+ GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ SDValue ArgValue2;
+ if (VA.isMemLoc()) {
+ int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgValue2 = DAG.getLoad(
+ MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ } else {
+ ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+ }
+ ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
+ ArgValue1, DAG.getIntPtrConstant(0, dl));
+ ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
+ ArgValue2, DAG.getIntPtrConstant(1, dl));
+ } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
+ ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
} else {
const TargetRegisterClass *RC;
-
- if (RegVT == MVT::f16)
+ if (RegVT == MVT::f16 || RegVT == MVT::bf16)
RC = &ARM::HPRRegClass;
else if (RegVT == MVT::f32)
RC = &ARM::SPRRegClass;
- else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
+ else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
+ RegVT == MVT::v4bf16)
RC = &ARM::DPRRegClass;
- else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
+ else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
+ RegVT == MVT::v8bf16)
RC = &ARM::QPRRegClass;
else if (RegVT == MVT::i32)
RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
@@ -4131,6 +4371,13 @@ SDValue ARMTargetLowering::LowerFormalArguments(
break;
}
+ // f16 arguments have their size extended to 4 bytes and passed as if they
+ // had been copied to the LSBs of a 32-bit register.
+ // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
+ if (VA.needsCustom() &&
+ (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
+ ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
+
InVals.push_back(ArgValue);
} else { // VA.isRegLoc()
// sanity check
@@ -5709,85 +5956,27 @@ static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
/// operand type is illegal (e.g., v2f32 for a target that doesn't support
/// vectors), since the legalizer won't know what to do with that.
-static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget) {
+SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc dl(N);
SDValue Op = N->getOperand(0);
- // This function is only supposed to be called for i64 types, either as the
- // source or destination of the bit convert.
+ // This function is only supposed to be called for i16 and i64 types, either
+ // as the source or destination of the bit convert.
EVT SrcVT = Op.getValueType();
EVT DstVT = N->getValueType(0);
- const bool HasFullFP16 = Subtarget->hasFullFP16();
-
- if (SrcVT == MVT::f32 && DstVT == MVT::i32) {
- // FullFP16: half values are passed in S-registers, and we don't
- // need any of the bitcast and moves:
- //
- // t2: f32,ch = CopyFromReg t0, Register:f32 %0
- // t5: i32 = bitcast t2
- // t18: f16 = ARMISD::VMOVhr t5
- if (Op.getOpcode() != ISD::CopyFromReg ||
- Op.getValueType() != MVT::f32)
- return SDValue();
-
- auto Move = N->use_begin();
- if (Move->getOpcode() != ARMISD::VMOVhr)
- return SDValue();
-
- SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
- SDValue Copy = DAG.getNode(ISD::CopyFromReg, SDLoc(Op), MVT::f16, Ops);
- DAG.ReplaceAllUsesWith(*Move, &Copy);
- return Copy;
- }
-
- if (SrcVT == MVT::i16 && DstVT == MVT::f16) {
- if (!HasFullFP16)
- return SDValue();
- // SoftFP: read half-precision arguments:
- //
- // t2: i32,ch = ...
- // t7: i16 = truncate t2 <~~~~ Op
- // t8: f16 = bitcast t7 <~~~~ N
- //
- if (Op.getOperand(0).getValueType() == MVT::i32)
- return DAG.getNode(ARMISD::VMOVhr, SDLoc(Op),
- MVT::f16, Op.getOperand(0));
-
- return SDValue();
- }
- // Half-precision return values
- if (SrcVT == MVT::f16 && DstVT == MVT::i16) {
- if (!HasFullFP16)
- return SDValue();
- //
- // t11: f16 = fadd t8, t10
- // t12: i16 = bitcast t11 <~~~ SDNode N
- // t13: i32 = zero_extend t12
- // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t13
- // t17: ch = ARMISD::RET_FLAG t16, Register:i32 %r0, t16:1
- //
- // transform this into:
- //
- // t20: i32 = ARMISD::VMOVrh t11
- // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t20
- //
- auto ZeroExtend = N->use_begin();
- if (N->use_size() != 1 || ZeroExtend->getOpcode() != ISD::ZERO_EXTEND ||
- ZeroExtend->getValueType(0) != MVT::i32)
- return SDValue();
+ if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
+ (DstVT == MVT::f16 || DstVT == MVT::bf16))
+ return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
+ DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
- auto Copy = ZeroExtend->use_begin();
- if (Copy->getOpcode() == ISD::CopyToReg &&
- Copy->use_begin()->getOpcode() == ARMISD::RET_FLAG) {
- SDValue Cvt = DAG.getNode(ARMISD::VMOVrh, SDLoc(Op), MVT::i32, Op);
- DAG.ReplaceAllUsesWith(*ZeroExtend, &Cvt);
- return Cvt;
- }
- return SDValue();
- }
+ if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
+ (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
+ return DAG.getNode(
+ ISD::TRUNCATE, SDLoc(N), DstVT,
+ MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
return SDValue();
@@ -5930,16 +6119,20 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
- SDValue Ops[] = { DAG.getEntryNode(),
- DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
+ SDValue Chain = Op.getOperand(0);
+ SDValue Ops[] = {Chain,
+ DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
- SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
+ SDValue FPSCR =
+ DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
+ Chain = FPSCR.getValue(1);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32));
- return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
- DAG.getConstant(3, dl, MVT::i32));
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, dl, MVT::i32));
+ return DAG.getMergeValues({And, Chain}, dl);
}
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
@@ -6424,9 +6617,10 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
/// immediate" operand (e.g., VMOV). If so, return the encoded value.
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
- const SDLoc &dl, EVT &VT, bool is128Bits,
+ const SDLoc &dl, EVT &VT, EVT VectorVT,
VMOVModImmType type) {
unsigned OpCmode, Imm;
+ bool is128Bits = VectorVT.is128BitVector();
// SplatBitSize is set to the smallest size that splats the vector, so a
// zero vector will always have SplatBitSize == 8. However, NEON modified
@@ -6544,9 +6738,18 @@ static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
ImmMask <<= 1;
}
- if (DAG.getDataLayout().isBigEndian())
- // swap higher and lower 32 bit word
- Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
+ if (DAG.getDataLayout().isBigEndian()) {
+ // Reverse the order of elements within the vector.
+ unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
+ unsigned Mask = (1 << BytesPerElem) - 1;
+ unsigned NumElems = 8 / BytesPerElem;
+ unsigned NewImm = 0;
+ for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
+ unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
+ NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
+ }
+ Imm = NewImm;
+ }
// Op=1, Cmode=1110.
OpCmode = 0x1e;
@@ -6585,8 +6788,6 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
case MVT::f64: {
SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
- if (!ST->isLittle())
- std::swap(Lo, Hi);
return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
}
case MVT::f32:
@@ -6639,7 +6840,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
// Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
- VMovVT, false, VMOVModImm);
+ VMovVT, VT, VMOVModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
@@ -6656,7 +6857,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
// Finally, try a VMVN.i32
NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
- false, VMVNModImm);
+ VT, VMVNModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
@@ -7064,6 +7265,104 @@ static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top) {
return true;
}
+// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
+// from a pair of inputs. For example:
+// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
+// FP_ROUND(EXTRACT_ELT(Y, 0),
+// FP_ROUND(EXTRACT_ELT(X, 1),
+// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
+static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
+ if (!ST->hasMVEFloatOps())
+ return SDValue();
+
+ SDLoc dl(BV);
+ EVT VT = BV.getValueType();
+ if (VT != MVT::v8f16)
+ return SDValue();
+
+ // We are looking for a buildvector of fptrunc elements, where all the
+ // elements are interleavingly extracted from two sources. Check the first two
+ // items are valid enough and extract some info from them (they are checked
+ // properly in the loop below).
+ if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
+ BV.getOperand(0).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ BV.getOperand(0).getOperand(0).getConstantOperandVal(1) != 0)
+ return SDValue();
+ if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
+ BV.getOperand(1).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ BV.getOperand(1).getOperand(0).getConstantOperandVal(1) != 0)
+ return SDValue();
+ SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
+ SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
+ if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
+ return SDValue();
+
+ // Check all the values in the BuildVector line up with our expectations.
+ for (unsigned i = 1; i < 4; i++) {
+ auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
+ return Trunc.getOpcode() == ISD::FP_ROUND &&
+ Trunc.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Trunc.getOperand(0).getOperand(0) == Op &&
+ Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
+ };
+ if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
+ return SDValue();
+ if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
+ return SDValue();
+ }
+
+ SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
+ DAG.getConstant(0, dl, MVT::i32));
+ return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
+ DAG.getConstant(1, dl, MVT::i32));
+}
+
+// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
+// from a single input on alternating lanes. For example:
+// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
+// FP_ROUND(EXTRACT_ELT(X, 2),
+// FP_ROUND(EXTRACT_ELT(X, 4), ...)
+static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
+ if (!ST->hasMVEFloatOps())
+ return SDValue();
+
+ SDLoc dl(BV);
+ EVT VT = BV.getValueType();
+ if (VT != MVT::v4f32)
+ return SDValue();
+
+ // We are looking for a buildvector of fptext elements, where all the
+ // elements are alternating lanes from a single source. For example <0,2,4,6>
+ // or <1,3,5,7>. Check the first two items are valid enough and extract some
+ // info from them (they are checked properly in the loop below).
+ if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
+ BV.getOperand(0).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
+ int Offset = BV.getOperand(0).getOperand(0).getConstantOperandVal(1);
+ if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
+ return SDValue();
+
+ // Check all the values in the BuildVector line up with our expectations.
+ for (unsigned i = 1; i < 4; i++) {
+ auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
+ return Trunc.getOpcode() == ISD::FP_EXTEND &&
+ Trunc.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Trunc.getOperand(0).getOperand(0) == Op &&
+ Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
+ };
+ if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
+ return SDValue();
+ }
+
+ return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
+ DAG.getConstant(Offset, dl, MVT::i32));
+}
+
// If N is an integer constant that can be moved into a register in one
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
@@ -7163,13 +7462,12 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return DAG.getUNDEF(VT);
if ((ST->hasNEON() && SplatBitSize <= 64) ||
- (ST->hasMVEIntegerOps() && SplatBitSize <= 32)) {
+ (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
// Check if an immediate VMOV works.
EVT VmovVT;
- SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
- SplatUndef.getZExtValue(), SplatBitSize,
- DAG, dl, VmovVT, VT.is128BitVector(),
- VMOVModImm);
+ SDValue Val =
+ isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
@@ -7179,9 +7477,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Try an immediate VMVN.
uint64_t NegatedImm = (~SplatBits).getZExtValue();
Val = isVMOVModifiedImm(
- NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
- DAG, dl, VmovVT, VT.is128BitVector(),
- ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
+ NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
+ VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
@@ -7321,12 +7618,19 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (isConstant)
return SDValue();
- // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
- if (NumElts >= 4) {
- SDValue shuffle = ReconstructShuffle(Op, DAG);
- if (shuffle != SDValue())
+ // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
+ // vmovn). Empirical tests suggest this is rarely worth it for vectors of
+ // length <= 2.
+ if (NumElts >= 4)
+ if (SDValue shuffle = ReconstructShuffle(Op, DAG))
return shuffle;
- }
+
+ // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
+ // VCVT's
+ if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
+ return VCVT;
+ if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
+ return VCVT;
if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
// If we haven't found an efficient lowering, try splitting a 128-bit vector
@@ -7527,7 +7831,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
if (SrcEltTy == SmallestEltTy)
continue;
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
- Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+ Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
Src.WindowBase *= Src.WindowScale;
}
@@ -7579,7 +7883,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
ShuffleOps[1], Mask, DAG);
if (!Shuffle)
return SDValue();
- return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
}
enum ShuffleOpCodes {
@@ -8892,7 +9196,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
if (ShouldUseSRet) {
// Create stack object for sret.
const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
- const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
+ const Align StackAlign = DL.getPrefTypeAlign(RetTy);
int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
@@ -9067,8 +9371,7 @@ void ARMTargetLowering::ExpandDIV_Windows(
DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
- Results.push_back(Lower);
- Results.push_back(Upper);
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
}
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
@@ -9101,6 +9404,25 @@ static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
}
+void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT MemVT = LD->getMemoryVT();
+ assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
+
+ if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
+ !Subtarget->isThumb1Only() && LD->isVolatile()) {
+ SDLoc dl(N);
+ SDValue Result = DAG.getMemIntrinsicNode(
+ ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
+ {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
+ SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
+ SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
+ SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ Results.append({Pair, Result.getValue(2)});
+ }
+}
+
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT MemVT = ST->getMemoryVT();
@@ -9130,6 +9452,38 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
ST->getMemOperand());
}
+static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT MemVT = ST->getMemoryVT();
+ assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
+
+ if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
+ !Subtarget->isThumb1Only() && ST->isVolatile()) {
+ SDNode *N = Op.getNode();
+ SDLoc dl(N);
+
+ SDValue Lo = DAG.getNode(
+ ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
+ DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
+ MVT::i32));
+ SDValue Hi = DAG.getNode(
+ ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
+ DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
+ MVT::i32));
+
+ return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
+ {ST->getChain(), Lo, Hi, ST->getBasePtr()},
+ MemVT, ST->getMemOperand());
+ } else if (Subtarget->hasMVEIntegerOps() &&
+ ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
+ MemVT == MVT::v16i1))) {
+ return LowerPredicateStore(Op, DAG);
+ }
+
+ return SDValue();
+}
+
static bool isZeroVector(SDValue N) {
return (ISD::isBuildVectorAllZeros(N.getNode()) ||
(N->getOpcode() == ARMISD::VMOVIMM &&
@@ -9155,13 +9509,87 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
N->getExtensionType(), N->isExpandingLoad());
SDValue Combo = NewLoad;
- if (!PassThru.isUndef() &&
- (PassThru.getOpcode() != ISD::BITCAST ||
- !isZeroVector(PassThru->getOperand(0))))
+ bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
+ PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
+ isZeroVector(PassThru->getOperand(0));
+ if (!PassThru.isUndef() && !PassThruIsCastZero)
Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
}
+static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ if (!ST->hasMVEIntegerOps())
+ return SDValue();
+
+ SDLoc dl(Op);
+ unsigned BaseOpcode = 0;
+ switch (Op->getOpcode()) {
+ default: llvm_unreachable("Expected VECREDUCE opcode");
+ case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
+ case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
+ case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
+ case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
+ case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
+ case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
+ case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
+ case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
+ }
+
+ SDValue Op0 = Op->getOperand(0);
+ EVT VT = Op0.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumActiveLanes = NumElts;
+
+ assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
+ NumActiveLanes == 2) &&
+ "Only expected a power 2 vector size");
+
+ // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
+ // allows us to easily extract vector elements from the lanes.
+ while (NumActiveLanes > 4) {
+ unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
+ SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
+ Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
+ NumActiveLanes /= 2;
+ }
+
+ SDValue Res;
+ if (NumActiveLanes == 4) {
+ // The remaining 4 elements are summed sequentially
+ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
+ SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
+ SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
+ SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
+ SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
+ SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
+ Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
+ } else {
+ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getConstant(1, dl, MVT::i32));
+ Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
+ }
+
+ // Result type may be wider than element type.
+ if (EltVT != Op->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
+ return Res;
+}
+
+static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ if (!ST->hasMVEFloatOps())
+ return SDValue();
+ return LowerVecReduce(Op, DAG, ST);
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
// Acquire/Release load/store is not legal for targets without a dmb or
@@ -9231,12 +9659,13 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
bool isBigEndian = DAG.getDataLayout().isBigEndian();
- Results.push_back(
+ SDValue Lo =
DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
- SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
- Results.push_back(
+ SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
+ SDValue Hi =
DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
- SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
+ SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
Results.push_back(SDValue(CmpSwap, 2));
}
@@ -9362,9 +9791,19 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::LOAD:
return LowerPredicateLoad(Op, DAG);
case ISD::STORE:
- return LowerPredicateStore(Op, DAG);
+ return LowerSTORE(Op, DAG, Subtarget);
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ return LowerVecReduce(Op, DAG, Subtarget);
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAX:
+ return LowerVecReduceF(Op, DAG, Subtarget);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
@@ -9411,8 +9850,8 @@ static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
DAG.getVTList(MVT::i32, MVT::i32),
N->getOperand(1), N->getOperand(2),
Lo, Hi);
- Results.push_back(LongMul.getValue(0));
- Results.push_back(LongMul.getValue(1));
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
+ LongMul.getValue(0), LongMul.getValue(1)));
}
/// ReplaceNodeResults - Replace the results of node with an illegal result
@@ -9466,7 +9905,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ABS:
lowerABS(N, Results, DAG);
return ;
-
+ case ISD::LOAD:
+ LowerLOAD(N, Results, DAG);
+ break;
}
if (Res.getNode())
Results.push_back(Res);
@@ -9499,7 +9940,7 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
ARMConstantPoolValue *CPV =
ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
- unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
+ unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
: &ARM::GPRRegClass;
@@ -9507,11 +9948,11 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand::MOLoad, 4, Align(4));
MachineMemOperand *FIMMOSt =
MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
- MachineMemOperand::MOStore, 4, 4);
+ MachineMemOperand::MOStore, 4, Align(4));
// Load the address of the dispatch MBB into the jump buffer.
if (isThumb2) {
@@ -9697,7 +10138,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI),
- MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4);
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, Align(4));
MachineInstrBuilder MIB;
MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
@@ -9788,10 +10229,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
- if (Align == 0)
- Align = MF->getDataLayout().getTypeAllocSize(C->getType());
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+ Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
Register VReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
@@ -9828,8 +10267,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(NewVReg3)
.add(predOps(ARMCC::AL));
- MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
- MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand *JTMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
+ MachineMemOperand::MOLoad, 4, Align(4));
Register NewVReg5 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
@@ -9889,10 +10329,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
- if (Align == 0)
- Align = MF->getDataLayout().getTypeAllocSize(C->getType());
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+ Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
Register VReg1 = MRI->createVirtualRegister(TRC);
BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
@@ -9922,8 +10360,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addJumpTableIndex(MJTI)
.add(predOps(ARMCC::AL));
- MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
- MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand *JTMMOLd =
+ MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
+ MachineMemOperand::MOLoad, 4, Align(4));
Register NewVReg5 = MRI->createVirtualRegister(TRC);
BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
.addReg(NewVReg3, RegState::Kill)
@@ -10162,7 +10601,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
Register dest = MI.getOperand(0).getReg();
Register src = MI.getOperand(1).getReg();
unsigned SizeVal = MI.getOperand(2).getImm();
- unsigned Align = MI.getOperand(3).getImm();
+ unsigned Alignment = MI.getOperand(3).getImm();
DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = BB->getParent();
@@ -10175,17 +10614,17 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
bool IsThumb2 = Subtarget->isThumb2();
bool IsThumb = Subtarget->isThumb();
- if (Align & 1) {
+ if (Alignment & 1) {
UnitSize = 1;
- } else if (Align & 2) {
+ } else if (Alignment & 2) {
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
- if ((Align % 16 == 0) && SizeVal >= 16)
+ if ((Alignment % 16 == 0) && SizeVal >= 16)
UnitSize = 16;
- else if ((Align % 8 == 0) && SizeVal >= 8)
+ else if ((Alignment % 8 == 0) && SizeVal >= 8)
UnitSize = 8;
}
// Can't use NEON instructions.
@@ -10291,13 +10730,11 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
// MachineConstantPool wants an explicit alignment.
- unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
- if (Align == 0)
- Align = MF->getDataLayout().getTypeAllocSize(C->getType());
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+ Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
MachineMemOperand *CPMMO =
MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand::MOLoad, 4, Align(4));
if (IsThumb)
BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
@@ -11667,6 +12104,42 @@ static SDValue PerformAddeSubeCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformVSELECTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
+ //
+ // We need to re-implement this optimization here as the implementation in the
+ // Target-Independent DAGCombiner does not handle the kind of constant we make
+ // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
+ // good reason, allowing truncation there would break other targets).
+ //
+ // Currently, this is only done for MVE, as it's the only target that benefits
+ // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
+ if (!Subtarget->hasMVEIntegerOps())
+ return SDValue();
+
+ if (N->getOperand(0).getOpcode() != ISD::XOR)
+ return SDValue();
+ SDValue XOR = N->getOperand(0);
+
+ // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
+ // It is important to check with truncation allowed as the BUILD_VECTORs we
+ // generate in those situations will truncate their operands.
+ ConstantSDNode *Const =
+ isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
+ /*AllowTruncation*/ true);
+ if (!Const || !Const->isOne())
+ return SDValue();
+
+ // Rewrite into vselect(cond, rhs, lhs).
+ SDValue Cond = XOR->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ EVT Type = N->getValueType(0);
+ return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
+}
+
static SDValue PerformABSCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -11724,6 +12197,71 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue PerformADDVecReduce(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasMVEIntegerOps() || N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
+ // will look like:
+ // t1: i32,i32 = ARMISD::VADDLVs x
+ // t2: i64 = build_pair t1, t1:1
+ // t3: i64 = add t2, y
+ // We also need to check for sext / zext and commutitive adds.
+ auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
+ SDValue NB) {
+ if (NB->getOpcode() != ISD::BUILD_PAIR)
+ return SDValue();
+ SDValue VecRed = NB->getOperand(0);
+ if (VecRed->getOpcode() != Opcode || VecRed.getResNo() != 0 ||
+ NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
+ return SDValue();
+
+ SDLoc dl(N);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(DCI.DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, NA,
+ DCI.DAG.getConstant(0, dl, MVT::i32)));
+ Ops.push_back(DCI.DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, NA,
+ DCI.DAG.getConstant(1, dl, MVT::i32)));
+ for (unsigned i = 0, e = VecRed.getNumOperands(); i < e; i++)
+ Ops.push_back(VecRed->getOperand(i));
+ SDValue Red = DCI.DAG.getNode(OpcodeA, dl,
+ DCI.DAG.getVTList({MVT::i32, MVT::i32}), Ops);
+ return DCI.DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
+ SDValue(Red.getNode(), 1));
+ };
+
+ if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
+ return M;
+ if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
+ return M;
+ return SDValue();
+}
+
bool
ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const {
@@ -11895,6 +12433,9 @@ static SDValue PerformADDCombine(SDNode *N,
if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
return Result;
+ if (SDValue Result = PerformADDVecReduce(N, DCI, Subtarget))
+ return Result;
+
// First try with the default operand order.
if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
return Result;
@@ -11986,18 +12527,86 @@ static SDValue PerformVMULCombine(SDNode *N,
DAG.getNode(ISD::MUL, DL, VT, N01, N1));
}
+static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v2i64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ auto IsSignExt = [&](SDValue Op) {
+ if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
+ return SDValue();
+ EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
+ if (VT.getScalarSizeInBits() == 32)
+ return Op->getOperand(0);
+ return SDValue();
+ };
+ auto IsZeroExt = [&](SDValue Op) {
+ // Zero extends are a little more awkward. At the point we are matching
+ // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
+ // That might be before of after a bitcast depending on how the and is
+ // placed. Because this has to look through bitcasts, it is currently only
+ // supported on LE.
+ if (!Subtarget->isLittle())
+ return SDValue();
+
+ SDValue And = Op;
+ if (And->getOpcode() == ISD::BITCAST)
+ And = And->getOperand(0);
+ if (And->getOpcode() != ISD::AND)
+ return SDValue();
+ SDValue Mask = And->getOperand(1);
+ if (Mask->getOpcode() == ISD::BITCAST)
+ Mask = Mask->getOperand(0);
+
+ if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
+ Mask.getValueType() != MVT::v4i32)
+ return SDValue();
+ if (isAllOnesConstant(Mask->getOperand(0)) &&
+ isNullConstant(Mask->getOperand(1)) &&
+ isAllOnesConstant(Mask->getOperand(2)) &&
+ isNullConstant(Mask->getOperand(3)))
+ return And->getOperand(0);
+ return SDValue();
+ };
+
+ SDLoc dl(N);
+ if (SDValue Op0 = IsSignExt(N0)) {
+ if (SDValue Op1 = IsSignExt(N1)) {
+ SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
+ SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
+ return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
+ }
+ }
+ if (SDValue Op0 = IsZeroExt(N0)) {
+ if (SDValue Op1 = IsZeroExt(N1)) {
+ SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
+ SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
+ return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
+ }
+ }
+
+ return SDValue();
+}
+
static SDValue PerformMULCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
+ return PerformMVEVMULLCombine(N, DAG, Subtarget);
+
if (Subtarget->isThumb1Only())
return SDValue();
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
- EVT VT = N->getValueType(0);
if (VT.is64BitVector() || VT.is128BitVector())
return PerformVMULCombine(N, DCI, Subtarget);
if (VT != MVT::i32)
@@ -12182,20 +12791,21 @@ static SDValue PerformANDCombine(SDNode *N,
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
- if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v4i1 ||
+ VT == MVT::v8i1 || VT == MVT::v16i1)
return SDValue();
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
- if (BVN && Subtarget->hasNEON() &&
+ if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
- if (SplatBitSize <= 64) {
+ if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
+ SplatBitSize == 64) {
EVT VbicVT;
SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
- DAG, dl, VbicVT, VT.is128BitVector(),
- OtherModImm);
+ DAG, dl, VbicVT, VT, OtherModImm);
if (Val.getNode()) {
SDValue Input =
DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
@@ -12425,58 +13035,44 @@ static bool isValidMVECond(unsigned CC, bool IsFloat) {
};
}
+static ARMCC::CondCodes getVCMPCondCode(SDValue N) {
+ if (N->getOpcode() == ARMISD::VCMP)
+ return (ARMCC::CondCodes)N->getConstantOperandVal(2);
+ else if (N->getOpcode() == ARMISD::VCMPZ)
+ return (ARMCC::CondCodes)N->getConstantOperandVal(1);
+ else
+ llvm_unreachable("Not a VCMP/VCMPZ!");
+}
+
+static bool CanInvertMVEVCMP(SDValue N) {
+ ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N));
+ return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
+}
+
static SDValue PerformORCombine_i1(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
// together with predicates
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ARMCC::CondCodes CondCode0 = ARMCC::AL;
- ARMCC::CondCodes CondCode1 = ARMCC::AL;
- if (N0->getOpcode() == ARMISD::VCMP)
- CondCode0 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N0->getOperand(2))
- ->getZExtValue();
- else if (N0->getOpcode() == ARMISD::VCMPZ)
- CondCode0 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N0->getOperand(1))
- ->getZExtValue();
- if (N1->getOpcode() == ARMISD::VCMP)
- CondCode1 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N1->getOperand(2))
- ->getZExtValue();
- else if (N1->getOpcode() == ARMISD::VCMPZ)
- CondCode1 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N1->getOperand(1))
- ->getZExtValue();
-
- if (CondCode0 == ARMCC::AL || CondCode1 == ARMCC::AL)
- return SDValue();
-
- unsigned Opposite0 = ARMCC::getOppositeCondition(CondCode0);
- unsigned Opposite1 = ARMCC::getOppositeCondition(CondCode1);
+ auto IsFreelyInvertable = [&](SDValue V) {
+ if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
+ return CanInvertMVEVCMP(V);
+ return false;
+ };
- if (!isValidMVECond(Opposite0,
- N0->getOperand(0)->getValueType(0).isFloatingPoint()) ||
- !isValidMVECond(Opposite1,
- N1->getOperand(0)->getValueType(0).isFloatingPoint()))
+ // At least one operand must be freely invertable.
+ if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
return SDValue();
- SmallVector<SDValue, 4> Ops0;
- Ops0.push_back(N0->getOperand(0));
- if (N0->getOpcode() == ARMISD::VCMP)
- Ops0.push_back(N0->getOperand(1));
- Ops0.push_back(DCI.DAG.getConstant(Opposite0, SDLoc(N0), MVT::i32));
- SmallVector<SDValue, 4> Ops1;
- Ops1.push_back(N1->getOperand(0));
- if (N1->getOpcode() == ARMISD::VCMP)
- Ops1.push_back(N1->getOperand(1));
- Ops1.push_back(DCI.DAG.getConstant(Opposite1, SDLoc(N1), MVT::i32));
-
- SDValue NewN0 = DCI.DAG.getNode(N0->getOpcode(), SDLoc(N0), VT, Ops0);
- SDValue NewN1 = DCI.DAG.getNode(N1->getOpcode(), SDLoc(N1), VT, Ops1);
- SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1);
- return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And,
- DCI.DAG.getAllOnesConstant(SDLoc(N), VT));
+ SDValue NewN0 = DCI.DAG.getLogicalNOT(DL, N0, VT);
+ SDValue NewN1 = DCI.DAG.getLogicalNOT(DL, N1, VT);
+ SDValue And = DCI.DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
+ return DCI.DAG.getLogicalNOT(DL, And, VT);
}
/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
@@ -12492,17 +13088,21 @@ static SDValue PerformORCombine(SDNode *N,
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
+ if (Subtarget->hasMVEIntegerOps() &&
+ (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1))
+ return PerformORCombine_i1(N, DCI, Subtarget);
+
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
- if (BVN && Subtarget->hasNEON() &&
+ if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
- if (SplatBitSize <= 64) {
+ if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
+ SplatBitSize == 64) {
EVT VorrVT;
- SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
- SplatUndef.getZExtValue(), SplatBitSize,
- DAG, dl, VorrVT, VT.is128BitVector(),
- OtherModImm);
+ SDValue Val =
+ isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
if (Val.getNode()) {
SDValue Input =
DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
@@ -12563,10 +13163,6 @@ static SDValue PerformORCombine(SDNode *N,
}
}
- if (Subtarget->hasMVEIntegerOps() &&
- (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1))
- return PerformORCombine_i1(N, DCI, Subtarget);
-
// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
// reasonable.
if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
@@ -12598,6 +13194,27 @@ static SDValue PerformXORCombine(SDNode *N,
return Result;
}
+ if (Subtarget->hasMVEIntegerOps()) {
+ // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ const TargetLowering *TLI = Subtarget->getTargetLowering();
+ if (TLI->isConstTrueVal(N1.getNode()) &&
+ (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
+ if (CanInvertMVEVCMP(N0)) {
+ SDLoc DL(N0);
+ ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N0));
+
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(N0->getOperand(0));
+ if (N0->getOpcode() == ARMISD::VCMP)
+ Ops.push_back(N0->getOperand(1));
+ Ops.push_back(DCI.DAG.getConstant(CC, DL, MVT::i32));
+ return DCI.DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
+ }
+ }
+ }
+
return SDValue();
}
@@ -12796,6 +13413,78 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Op0 = N->getOperand(0);
+
+ // VMOVhr (VMOVrh (X)) -> X
+ if (Op0->getOpcode() == ARMISD::VMOVrh)
+ return Op0->getOperand(0);
+
+ // FullFP16: half values are passed in S-registers, and we don't
+ // need any of the bitcast and moves:
+ //
+ // t2: f32,ch = CopyFromReg t0, Register:f32 %0
+ // t5: i32 = bitcast t2
+ // t18: f16 = ARMISD::VMOVhr t5
+ if (Op0->getOpcode() == ISD::BITCAST) {
+ SDValue Copy = Op0->getOperand(0);
+ if (Copy.getValueType() == MVT::f32 &&
+ Copy->getOpcode() == ISD::CopyFromReg) {
+ SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1)};
+ SDValue NewCopy =
+ DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N), N->getValueType(0), Ops);
+ return NewCopy;
+ }
+ }
+
+ // fold (VMOVhr (load x)) -> (load (f16*)x)
+ if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
+ if (LN0->hasOneUse() && LN0->isUnindexed() &&
+ LN0->getMemoryVT() == MVT::i16) {
+ SDValue Load =
+ DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
+ LN0->getBasePtr(), LN0->getMemOperand());
+ DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
+ DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
+ return Load;
+ }
+ }
+
+ // Only the bottom 16 bits of the source register are used.
+ APInt DemandedMask = APInt::getLowBitsSet(32, 16);
+ const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDValue PerformVMOVrhCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (VMOVrh (load x)) -> (zextload (i16*)x)
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+ SDValue Load =
+ DCI.DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
+ LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
+ DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
+ DCI.DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+ return Load;
+ }
+
+ // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
+ if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(N0->getOperand(1)))
+ return DCI.DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
+ N0->getOperand(1));
+
+ return SDValue();
+}
+
/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
/// are normal, non-volatile loads. If so, it is profitable to bitcast an
/// i64 vector to have f64 elements, since the value can then be loaded
@@ -12946,8 +13635,29 @@ PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
// If the valuetypes are the same, we can remove the cast entirely.
if (Op->getOperand(0).getValueType() == VT)
return Op->getOperand(0);
- return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl,
- Op->getOperand(0).getValueType(), Op->getOperand(0));
+ return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
+ }
+
+ return SDValue();
+}
+
+static SDValue
+PerformVECTOR_REG_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ SDLoc dl(N);
+
+ // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
+ if (ST->isLittle())
+ return DCI.DAG.getNode(ISD::BITCAST, dl, VT, Op);
+
+ // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
+ if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
+ // If the valuetypes are the same, we can remove the cast entirely.
+ if (Op->getOperand(0).getValueType() == VT)
+ return Op->getOperand(0);
+ return DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
}
return SDValue();
@@ -13012,6 +13722,29 @@ static SDValue PerformInsertEltCombine(SDNode *N,
return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
}
+static SDValue PerformExtractEltCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // extract (vdup x) -> x
+ if (Op0->getOpcode() == ARMISD::VDUP) {
+ SDValue X = Op0->getOperand(0);
+ if (VT == MVT::f16 && X.getValueType() == MVT::i32)
+ return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
+ if (VT == MVT::i32 && X.getValueType() == MVT::f16)
+ return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
+
+ while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
+ X = X->getOperand(0);
+ if (X.getValueType() == VT)
+ return X;
+ }
+
+ return SDValue();
+}
+
/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
/// ISD::VECTOR_SHUFFLE.
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
@@ -13293,6 +14026,128 @@ static SDValue PerformVLDCombine(SDNode *N,
return CombineBaseUpdate(N, DCI);
}
+static SDValue PerformMVEVLDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Addr = N->getOperand(2);
+ MemSDNode *MemN = cast<MemSDNode>(N);
+ SDLoc dl(N);
+
+ // For the stores, where there are multiple intrinsics we only actually want
+ // to post-inc the last of the them.
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::arm_mve_vst2q &&
+ cast<ConstantSDNode>(N->getOperand(5))->getZExtValue() != 1)
+ return SDValue();
+ if (IntNo == Intrinsic::arm_mve_vst4q &&
+ cast<ConstantSDNode>(N->getOperand(7))->getZExtValue() != 3)
+ return SDValue();
+
+ // Search for a use of the address operand that is an increment.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::ADD ||
+ UI.getUse().getResNo() != Addr.getResNo())
+ continue;
+
+ // Check that the add is independent of the load/store. Otherwise, folding
+ // it would create a cycle. We can avoid searching through Addr as it's a
+ // predecessor to both.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Visited.insert(Addr.getNode());
+ Worklist.push_back(N);
+ Worklist.push_back(User);
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(User, Visited, Worklist))
+ continue;
+
+ // Find the new opcode for the updating load/store.
+ bool isLoadOp = true;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ switch (IntNo) {
+ default:
+ llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
+ case Intrinsic::arm_mve_vld2q:
+ NewOpc = ARMISD::VLD2_UPD;
+ NumVecs = 2;
+ break;
+ case Intrinsic::arm_mve_vld4q:
+ NewOpc = ARMISD::VLD4_UPD;
+ NumVecs = 4;
+ break;
+ case Intrinsic::arm_mve_vst2q:
+ NewOpc = ARMISD::VST2_UPD;
+ NumVecs = 2;
+ isLoadOp = false;
+ break;
+ case Intrinsic::arm_mve_vst4q:
+ NewOpc = ARMISD::VST4_UPD;
+ NumVecs = 4;
+ isLoadOp = false;
+ break;
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoadOp) {
+ VecTy = N->getValueType(0);
+ } else {
+ VecTy = N->getOperand(3).getValueType();
+ }
+
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+
+ // If the increment is a constant, it must match the memory ref size.
+ SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+ ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
+ if (!CInc || CInc->getZExtValue() != NumBytes)
+ continue;
+
+ // Create the new updating load/store node.
+ // First, create an SDVTList for the new updating node's results.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = VecTy;
+ Tys[n++] = MVT::i32;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
+
+ // Then, gather the new node's operands.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(2)); // ptr
+ Ops.push_back(Inc);
+
+ for (unsigned i = 3; i < N->getNumOperands(); ++i)
+ Ops.push_back(N->getOperand(i));
+
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
+ MemN->getMemOperand());
+
+ // Update the uses.
+ SmallVector<SDValue, 5> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i)
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+ break;
+ }
+
+ return SDValue();
+}
+
/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
@@ -13377,8 +14232,21 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
/// PerformVDUPLANECombine - Target-specific dag combine xforms for
/// ARMISD::VDUPLANE.
static SDValue PerformVDUPLANECombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
SDValue Op = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
+ if (Subtarget->hasMVEIntegerOps()) {
+ EVT ExtractVT = VT.getVectorElementType();
+ // We need to ensure we are creating a legal type.
+ if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
+ ExtractVT = MVT::i32;
+ SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
+ N->getOperand(0), N->getOperand(1));
+ return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
+ }
// If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
// of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
@@ -13399,7 +14267,6 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
unsigned EltBits;
if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
EltSize = 8;
- EVT VT = N->getValueType(0);
if (EltSize > VT.getScalarSizeInBits())
return SDValue();
@@ -13412,6 +14279,18 @@ static SDValue PerformVDUPCombine(SDNode *N,
const ARMSubtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
SDValue Op = N->getOperand(0);
+ SDLoc dl(N);
+
+ if (Subtarget->hasMVEIntegerOps()) {
+ // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
+ // need to come from a GPR.
+ if (Op.getValueType() == MVT::f32)
+ return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
+ else if (Op.getValueType() == MVT::f16)
+ return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
+ DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
+ }
if (!Subtarget->hasNEON())
return SDValue();
@@ -13540,7 +14419,7 @@ static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
return SDValue();
SDValue Trunc = St->getValue();
- if (Trunc->getOpcode() != ISD::TRUNCATE)
+ if (Trunc->getOpcode() != ISD::TRUNCATE && Trunc->getOpcode() != ISD::FP_ROUND)
return SDValue();
EVT FromVT = Trunc->getOperand(0).getValueType();
EVT ToVT = Trunc.getValueType();
@@ -13555,20 +14434,54 @@ static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
NumElements = 4;
if (FromEltVT == MVT::i16 && ToEltVT == MVT::i8)
NumElements = 8;
- if (NumElements == 0 || FromVT.getVectorNumElements() == NumElements ||
+ if (FromEltVT == MVT::f32 && ToEltVT == MVT::f16)
+ NumElements = 4;
+ if (NumElements == 0 ||
+ (FromEltVT != MVT::f32 && FromVT.getVectorNumElements() == NumElements) ||
FromVT.getVectorNumElements() % NumElements != 0)
return SDValue();
+ // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
+ // use the VMOVN over splitting the store. We are looking for patterns of:
+ // !rev: 0 N 1 N+1 2 N+2 ...
+ // rev: N 0 N+1 1 N+2 2 ...
+ auto isVMOVNOriginalMask = [&](ArrayRef<int> M, bool rev) {
+ unsigned NumElts = ToVT.getVectorNumElements();
+ if (NumElts != M.size())
+ return false;
+
+ unsigned Off0 = rev ? NumElts : 0;
+ unsigned Off1 = rev ? 0 : NumElts;
+
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
+ return false;
+ if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
+ return false;
+ }
+
+ return true;
+ };
+
+ if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc->getOperand(0)))
+ if (isVMOVNOriginalMask(Shuffle->getMask(), false) ||
+ isVMOVNOriginalMask(Shuffle->getMask(), true))
+ return SDValue();
+
+ LLVMContext &C = *DAG.getContext();
SDLoc DL(St);
// Details about the old store
SDValue Ch = St->getChain();
SDValue BasePtr = St->getBasePtr();
- unsigned Alignment = St->getOriginalAlignment();
+ Align Alignment = St->getOriginalAlign();
MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
AAMDNodes AAInfo = St->getAAInfo();
- EVT NewFromVT = EVT::getVectorVT(*DAG.getContext(), FromEltVT, NumElements);
- EVT NewToVT = EVT::getVectorVT(*DAG.getContext(), ToEltVT, NumElements);
+ // We split the store into slices of NumElements. fp16 trunc stores are vcvt
+ // and then stored as truncating integer stores.
+ EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
+ EVT NewToVT = EVT::getVectorVT(
+ C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
SmallVector<SDValue, 4> Stores;
for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
@@ -13578,9 +14491,17 @@ static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
SDValue Extract =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
DAG.getConstant(i * NumElements, DL, MVT::i32));
+
+ if (ToEltVT == MVT::f16) {
+ SDValue FPTrunc =
+ DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
+ Extract, DAG.getConstant(0, DL, MVT::i32));
+ Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
+ }
+
SDValue Store = DAG.getTruncStore(
Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
- NewToVT, Alignment, MMOFlags, AAInfo);
+ NewToVT, Alignment.value(), MMOFlags, AAInfo);
Stores.push_back(Store);
}
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
@@ -13778,8 +14699,163 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
ConvInput, DAG.getConstant(C, dl, MVT::i32));
}
+static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ if (!ST->hasMVEIntegerOps())
+ return SDValue();
+
+ assert(N->getOpcode() == ISD::VECREDUCE_ADD);
+ EVT ResVT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDLoc dl(N);
+
+ // We are looking for something that will have illegal types if left alone,
+ // but that we can convert to a single instruction undef MVE. For example
+ // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
+ // or
+ // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
+
+ // Cases:
+ // VADDV u/s 8/16/32
+ // VMLAV u/s 8/16/32
+ // VADDLV u/s 32
+ // VMLALV u/s 16/32
+
+ auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
+ if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
+ return SDValue();
+ SDValue A = N0->getOperand(0);
+ if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
+ return A;
+ return SDValue();
+ };
+ auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
+ SDValue &A, SDValue &B) {
+ if (ResVT != RetTy || N0->getOpcode() != ISD::MUL)
+ return false;
+ SDValue ExtA = N0->getOperand(0);
+ SDValue ExtB = N0->getOperand(1);
+ if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
+ return false;
+ A = ExtA->getOperand(0);
+ B = ExtB->getOperand(0);
+ if (A.getValueType() == B.getValueType() &&
+ llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
+ return true;
+ return false;
+ };
+ auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
+ SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
+ SDValue(Node.getNode(), 1));
+ };
+
+ if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
+ return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
+ if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
+ return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
+ if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
+ return Create64bitNode(ARMISD::VADDLVs, {A});
+ if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
+ return Create64bitNode(ARMISD::VADDLVu, {A});
+
+ SDValue A, B;
+ if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
+ return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
+ if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
+ return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
+ if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B))
+ return Create64bitNode(ARMISD::VMLALVs, {A, B});
+ if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B))
+ return Create64bitNode(ARMISD::VMLALVu, {A, B});
+ return SDValue();
+}
+
+static SDValue PerformVMOVNCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ unsigned IsTop = N->getConstantOperandVal(2);
+
+ // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
+ // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
+ if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
+ Op1->getOpcode() == ARMISD::VQMOVNu) &&
+ Op1->getConstantOperandVal(2) == 0)
+ return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
+ Op0, Op1->getOperand(1), N->getOperand(2));
+
+ // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
+ // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
+ // into the top or bottom lanes.
+ unsigned NumElts = N->getValueType(0).getVectorNumElements();
+ APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
+ APInt Op0DemandedElts =
+ IsTop ? Op1DemandedElts
+ : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
+
+ APInt KnownUndef, KnownZero;
+ const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+ if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDValue PerformVQMOVNCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Op0 = N->getOperand(0);
+ unsigned IsTop = N->getConstantOperandVal(2);
+
+ unsigned NumElts = N->getValueType(0).getVectorNumElements();
+ APInt Op0DemandedElts =
+ APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
+ : APInt::getHighBitsSet(2, 1));
+
+ APInt KnownUndef, KnownZero;
+ const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+ return SDValue();
+}
+
+static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
+ // uses of the intrinsics.
+ if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
+ int ShiftAmt = C->getSExtValue();
+ if (ShiftAmt == 0) {
+ SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
+ DAG.ReplaceAllUsesWith(N, Merge.getNode());
+ return SDValue();
+ }
+
+ if (ShiftAmt >= -32 && ShiftAmt < 0) {
+ unsigned NewOpcode =
+ N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
+ SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
+ DAG.getConstant(-ShiftAmt, DL, MVT::i32));
+ DAG.ReplaceAllUsesWith(N, NewShift.getNode());
+ return NewShift;
+ }
+ }
+
+ return SDValue();
+}
+
/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
-static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IntNo) {
default:
@@ -13928,6 +15004,72 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
case Intrinsic::arm_neon_vqrshiftu:
// No immediate versions of these to check for.
break;
+
+ case Intrinsic::arm_mve_vqdmlah:
+ case Intrinsic::arm_mve_vqdmlash:
+ case Intrinsic::arm_mve_vqrdmlah:
+ case Intrinsic::arm_mve_vqrdmlash:
+ case Intrinsic::arm_mve_vmla_n_predicated:
+ case Intrinsic::arm_mve_vmlas_n_predicated:
+ case Intrinsic::arm_mve_vqdmlah_predicated:
+ case Intrinsic::arm_mve_vqdmlash_predicated:
+ case Intrinsic::arm_mve_vqrdmlah_predicated:
+ case Intrinsic::arm_mve_vqrdmlash_predicated: {
+ // These intrinsics all take an i32 scalar operand which is narrowed to the
+ // size of a single lane of the vector type they return. So we don't need
+ // any bits of that operand above that point, which allows us to eliminate
+ // uxth/sxth.
+ unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
+ if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
+ return SDValue();
+ break;
+ }
+
+ case Intrinsic::arm_mve_minv:
+ case Intrinsic::arm_mve_maxv:
+ case Intrinsic::arm_mve_minav:
+ case Intrinsic::arm_mve_maxav:
+ case Intrinsic::arm_mve_minv_predicated:
+ case Intrinsic::arm_mve_maxv_predicated:
+ case Intrinsic::arm_mve_minav_predicated:
+ case Intrinsic::arm_mve_maxav_predicated: {
+ // These intrinsics all take an i32 scalar operand which is narrowed to the
+ // size of a single lane of the vector type they take as the other input.
+ unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
+ if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
+ return SDValue();
+ break;
+ }
+
+ case Intrinsic::arm_mve_addv: {
+ // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
+ // which allow PerformADDVecReduce to turn it into VADDLV when possible.
+ bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
+ return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
+ }
+
+ case Intrinsic::arm_mve_addlv:
+ case Intrinsic::arm_mve_addlv_predicated: {
+ // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
+ // which recombines the two outputs into an i64
+ bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
+ (Unsigned ? ARMISD::VADDLVu : ARMISD::VADDLVs) :
+ (Unsigned ? ARMISD::VADDLVpu : ARMISD::VADDLVps);
+
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
+ if (i != 2) // skip the unsigned flag
+ Ops.push_back(N->getOperand(i));
+
+ SDLoc dl(N);
+ SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
+ val.getValue(1));
+ }
}
return SDValue();
@@ -14023,9 +15165,10 @@ static SDValue PerformShiftCombine(SDNode *N,
return SDValue();
}
-// Look for a sign/zero extend of a larger than legal load. This can be split
-// into two extending loads, which are simpler to deal with than an arbitrary
-// sign extend.
+// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
+// split into multiple extending loads, which are simpler to deal with than an
+// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
+// to convert the type to an f32.
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::LOAD)
@@ -14047,45 +15190,63 @@ static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) {
NumElements = 4;
if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
NumElements = 8;
+ if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
+ NumElements = 4;
if (NumElements == 0 ||
- FromVT.getVectorNumElements() == NumElements ||
+ (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
FromVT.getVectorNumElements() % NumElements != 0 ||
!isPowerOf2_32(NumElements))
return SDValue();
+ LLVMContext &C = *DAG.getContext();
SDLoc DL(LD);
// Details about the old load
SDValue Ch = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- unsigned Alignment = LD->getOriginalAlignment();
+ Align Alignment = LD->getOriginalAlign();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
ISD::LoadExtType NewExtType =
N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
- EVT NewFromVT = FromVT.getHalfNumVectorElementsVT(*DAG.getContext());
- EVT NewToVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
- unsigned NewOffset = NewFromVT.getSizeInBits() / 8;
- SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset);
-
- // Split the load in half, each side of which is extended separately. This
- // is good enough, as legalisation will take it from there. They are either
- // already legal or they will be split further into something that is
- // legal.
- SDValue NewLoad1 =
- DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, BasePtr, Offset,
- LD->getPointerInfo(), NewFromVT, Alignment, MMOFlags, AAInfo);
- SDValue NewLoad2 =
- DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
- LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
- Alignment, MMOFlags, AAInfo);
-
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- SDValue(NewLoad1.getNode(), 1),
- SDValue(NewLoad2.getNode(), 1));
+ EVT NewFromVT = EVT::getVectorVT(
+ C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
+ EVT NewToVT = EVT::getVectorVT(
+ C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
+
+ SmallVector<SDValue, 4> Loads;
+ SmallVector<SDValue, 4> Chains;
+ for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
+ unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
+ SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset);
+
+ SDValue NewLoad =
+ DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
+ LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
+ Alignment.value(), MMOFlags, AAInfo);
+ Loads.push_back(NewLoad);
+ Chains.push_back(SDValue(NewLoad.getNode(), 1));
+ }
+
+ // Float truncs need to extended with VCVTB's into their floating point types.
+ if (FromEltVT == MVT::f16) {
+ SmallVector<SDValue, 4> Extends;
+
+ for (unsigned i = 0; i < Loads.size(); i++) {
+ SDValue LoadBC =
+ DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
+ SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
+ DAG.getConstant(0, DL, MVT::i32));
+ Extends.push_back(FPExt);
+ }
+
+ Loads = Extends;
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, NewLoad1, NewLoad2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
}
/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
@@ -14133,6 +15294,116 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ if (ST->hasMVEFloatOps())
+ if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
+ return NewLoad;
+
+ return SDValue();
+}
+
+/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
+/// saturates.
+static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ if (!ST->hasMVEIntegerOps())
+ return SDValue();
+
+ if (VT != MVT::v4i32 && VT != MVT::v8i16)
+ return SDValue();
+
+ auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
+ // Check one is a smin and the other is a smax
+ if (Min->getOpcode() != ISD::SMIN)
+ std::swap(Min, Max);
+ if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
+ return false;
+
+ APInt SaturateC;
+ if (VT == MVT::v4i32)
+ SaturateC = APInt(32, (1 << 15) - 1, true);
+ else //if (VT == MVT::v8i16)
+ SaturateC = APInt(16, (1 << 7) - 1, true);
+
+ APInt MinC, MaxC;
+ if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
+ MinC != SaturateC)
+ return false;
+ if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
+ MaxC != ~SaturateC)
+ return false;
+ return true;
+ };
+
+ if (IsSignedSaturate(N, N0.getNode())) {
+ SDLoc DL(N);
+ MVT ExtVT, HalfVT;
+ if (VT == MVT::v4i32) {
+ HalfVT = MVT::v8i16;
+ ExtVT = MVT::v4i16;
+ } else { // if (VT == MVT::v8i16)
+ HalfVT = MVT::v16i8;
+ ExtVT = MVT::v8i8;
+ }
+
+ // Create a VQMOVNB with undef top lanes, then signed extended into the top
+ // half. That extend will hopefully be removed if only the bottom bits are
+ // demanded (though a truncating store, for example).
+ SDValue VQMOVN =
+ DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
+ N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
+ SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
+ DAG.getValueType(ExtVT));
+ }
+
+ auto IsUnsignedSaturate = [&](SDNode *Min) {
+ // For unsigned, we just need to check for <= 0xffff
+ if (Min->getOpcode() != ISD::UMIN)
+ return false;
+
+ APInt SaturateC;
+ if (VT == MVT::v4i32)
+ SaturateC = APInt(32, (1 << 16) - 1, true);
+ else //if (VT == MVT::v8i16)
+ SaturateC = APInt(16, (1 << 8) - 1, true);
+
+ APInt MinC;
+ if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
+ MinC != SaturateC)
+ return false;
+ return true;
+ };
+
+ if (IsUnsignedSaturate(N)) {
+ SDLoc DL(N);
+ MVT HalfVT;
+ unsigned ExtConst;
+ if (VT == MVT::v4i32) {
+ HalfVT = MVT::v8i16;
+ ExtConst = 0x0000FFFF;
+ } else { //if (VT == MVT::v8i16)
+ HalfVT = MVT::v16i8;
+ ExtConst = 0x00FF;
+ }
+
+ // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
+ // an AND. That extend will hopefully be removed if only the bottom bits are
+ // demanded (though a truncating store, for example).
+ SDValue VQMOVN =
+ DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
+ DAG.getConstant(0, DL, MVT::i32));
+ SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
+ return DAG.getNode(ISD::AND, DL, VT, Bitcast,
+ DAG.getConstant(ExtConst, DL, VT));
+ }
+
+ return SDValue();
+}
+
static const APInt *isPowerOf2Constant(SDValue V) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
if (!C)
@@ -14614,10 +15885,41 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
return Res;
}
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDValue Src = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+
+ // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
+ if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
+ return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
+ }
+
+ // We may have a bitcast of something that has already had this bitcast
+ // combine performed on it, so skip past any VECTOR_REG_CASTs.
+ while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
+ Src = Src.getOperand(0);
+
+ // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
+ // would be generated is at least the width of the element type.
+ EVT SrcVT = Src.getValueType();
+ if ((Src.getOpcode() == ARMISD::VMOVIMM ||
+ Src.getOpcode() == ARMISD::VMVNIMM ||
+ Src.getOpcode() == ARMISD::VMOVFPIMM) &&
+ SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
+ DAG.getDataLayout().isBigEndian())
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
+
+ return SDValue();
+}
+
SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
+ case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
@@ -14635,25 +15937,37 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
+ case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
+ case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI);
case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
+ case ISD::EXTRACT_VECTOR_ELT: return PerformExtractEltCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
- case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+ case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return PerformVCVTCombine(N, DCI.DAG, Subtarget);
case ISD::FDIV:
return PerformVDIVCombine(N, DCI.DAG, Subtarget);
- case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return PerformIntrinsicCombine(N, DCI);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
return PerformShiftCombine(N, DCI, Subtarget);
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::ANY_EXTEND:
+ return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::FP_EXTEND:
+ return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::SMIN:
+ case ISD::UMIN:
+ case ISD::SMAX:
+ case ISD::UMAX:
+ return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
case ISD::LOAD: return PerformLOADCombine(N, DCI);
@@ -14664,10 +15978,25 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
+ case ISD::BITCAST:
+ return PerformBITCASTCombine(N, DCI.DAG, Subtarget);
case ARMISD::PREDICATE_CAST:
return PerformPREDICATE_CASTCombine(N, DCI);
+ case ARMISD::VECTOR_REG_CAST:
+ return PerformVECTOR_REG_CASTCombine(N, DCI, Subtarget);
case ARMISD::VCMP:
return PerformVCMPCombine(N, DCI, Subtarget);
+ case ISD::VECREDUCE_ADD:
+ return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::VMOVN:
+ return PerformVMOVNCombine(N, DCI);
+ case ARMISD::VQMOVNs:
+ case ARMISD::VQMOVNu:
+ return PerformVQMOVNCombine(N, DCI);
+ case ARMISD::ASRL:
+ case ARMISD::LSRL:
+ case ARMISD::LSLL:
+ return PerformLongShiftCombine(N, DCI.DAG);
case ARMISD::SMULWB: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
@@ -14756,6 +16085,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane:
return PerformVLDCombine(N, DCI);
+ case Intrinsic::arm_mve_vld2q:
+ case Intrinsic::arm_mve_vld4q:
+ case Intrinsic::arm_mve_vst2q:
+ case Intrinsic::arm_mve_vst4q:
+ return PerformMVEVLDCombine(N, DCI);
default: break;
}
break;
@@ -14839,28 +16173,21 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
return false;
}
-static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
- unsigned AlignCheck) {
- return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
- (DstAlign == 0 || DstAlign % AlignCheck == 0));
-}
EVT ARMTargetLowering::getOptimalMemOpType(
- uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes) const {
// See if we can use NEON instructions for this...
- if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
+ if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
bool Fast;
- if (Size >= 16 &&
- (memOpAlign(SrcAlign, DstAlign, 16) ||
+ if (Op.size() >= 16 &&
+ (Op.isAligned(Align(16)) ||
(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1,
MachineMemOperand::MONone, &Fast) &&
Fast))) {
return MVT::v2f64;
- } else if (Size >= 8 &&
- (memOpAlign(SrcAlign, DstAlign, 8) ||
+ } else if (Op.size() >= 8 &&
+ (Op.isAligned(Align(8)) ||
(allowsMisalignedMemoryAccesses(
MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) &&
Fast))) {
@@ -14974,45 +16301,97 @@ bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
if (!Subtarget->hasMVEIntegerOps())
return false;
- auto IsSinker = [](Instruction *I, int Operand) {
+ auto IsFMSMul = [&](Instruction *I) {
+ if (!I->hasOneUse())
+ return false;
+ auto *Sub = cast<Instruction>(*I->users().begin());
+ return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
+ };
+ auto IsFMS = [&](Instruction *I) {
+ if (match(I->getOperand(0), m_FNeg(m_Value())) ||
+ match(I->getOperand(1), m_FNeg(m_Value())))
+ return true;
+ return false;
+ };
+
+ auto IsSinker = [&](Instruction *I, int Operand) {
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::Mul:
+ case Instruction::FAdd:
case Instruction::ICmp:
+ case Instruction::FCmp:
return true;
+ case Instruction::FMul:
+ return !IsFMSMul(I);
case Instruction::Sub:
+ case Instruction::FSub:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
return Operand == 1;
+ case Instruction::Call:
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::fma:
+ return !IsFMS(I);
+ default:
+ return false;
+ }
+ }
+ return false;
default:
return false;
}
};
- int Op = 0;
- if (!isa<ShuffleVectorInst>(I->getOperand(Op)))
- Op = 1;
- if (!IsSinker(I, Op))
- return false;
- if (!match(I->getOperand(Op),
- m_ShuffleVector(m_InsertElement(m_Undef(), m_Value(), m_ZeroInt()),
- m_Undef(), m_Zero()))) {
- return false;
- }
- Instruction *Shuffle = cast<Instruction>(I->getOperand(Op));
- // All uses of the shuffle should be sunk to avoid duplicating it across gpr
- // and vector registers
- for (Use &U : Shuffle->uses()) {
- Instruction *Insn = cast<Instruction>(U.getUser());
- if (!IsSinker(Insn, U.getOperandNo()))
- return false;
+ for (auto OpIdx : enumerate(I->operands())) {
+ Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
+ // Make sure we are not already sinking this operand
+ if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
+ continue;
+
+ Instruction *Shuffle = Op;
+ if (Shuffle->getOpcode() == Instruction::BitCast)
+ Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
+ // We are looking for a splat that can be sunk.
+ if (!Shuffle ||
+ !match(Shuffle, m_Shuffle(
+ m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
+ m_Undef(), m_ZeroMask())))
+ continue;
+ if (!IsSinker(I, OpIdx.index()))
+ continue;
+
+ // All uses of the shuffle should be sunk to avoid duplicating it across gpr
+ // and vector registers
+ for (Use &U : Op->uses()) {
+ Instruction *Insn = cast<Instruction>(U.getUser());
+ if (!IsSinker(Insn, U.getOperandNo()))
+ return false;
+ }
+
+ Ops.push_back(&Shuffle->getOperandUse(0));
+ if (Shuffle != Op)
+ Ops.push_back(&Op->getOperandUse(0));
+ Ops.push_back(&OpIdx.value());
}
- Ops.push_back(&Shuffle->getOperandUse(0));
- Ops.push_back(&I->getOperandUse(Op));
return true;
}
+Type *ARMTargetLowering::shouldConvertSplatType(ShuffleVectorInst *SVI) const {
+ if (!Subtarget->hasMVEIntegerOps())
+ return nullptr;
+ Type *SVIType = SVI->getType();
+ Type *ScalarType = SVIType->getScalarType();
+
+ if (ScalarType->isFloatTy())
+ return Type::getInt32Ty(SVIType->getContext());
+ if (ScalarType->isHalfTy())
+ return Type::getInt16Ty(SVIType->getContext());
+ return nullptr;
+}
+
bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
EVT VT = ExtVal.getValueType();
@@ -15024,6 +16403,9 @@ bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
return false;
}
+ if (Subtarget->hasMVEIntegerOps())
+ return true;
+
// Don't create a loadext if we can fold the extension into a wide/long
// instruction.
// If there's more than one user instruction, the loadext is desirable no
@@ -15445,7 +16827,7 @@ static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
return false;
}
-static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align,
+static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
bool isSEXTLoad, bool IsMasked, bool isLE,
SDValue &Base, SDValue &Offset,
bool &isInc, SelectionDAG &DAG) {
@@ -15480,16 +16862,16 @@ static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align,
// (in BE/masked) type.
Base = Ptr->getOperand(0);
if (VT == MVT::v4i16) {
- if (Align >= 2 && IsInRange(RHSC, 0x80, 2))
+ if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
return true;
} else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
if (IsInRange(RHSC, 0x80, 1))
return true;
- } else if (Align >= 4 &&
+ } else if (Alignment >= 4 &&
(CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
IsInRange(RHSC, 0x80, 4))
return true;
- else if (Align >= 2 &&
+ else if (Alignment >= 2 &&
(CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
IsInRange(RHSC, 0x80, 2))
return true;
@@ -15511,28 +16893,28 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
EVT VT;
SDValue Ptr;
- unsigned Align;
+ Align Alignment;
bool isSEXTLoad = false;
bool IsMasked = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
- Align = LD->getAlignment();
+ Alignment = LD->getAlign();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
- Align = ST->getAlignment();
+ Alignment = ST->getAlign();
} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
- Align = LD->getAlignment();
+ Alignment = LD->getAlign();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
IsMasked = true;
} else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
- Align = ST->getAlignment();
+ Alignment = ST->getAlign();
IsMasked = true;
} else
return false;
@@ -15541,9 +16923,9 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
bool isLegal = false;
if (VT.isVector())
isLegal = Subtarget->hasMVEIntegerOps() &&
- getMVEIndexedAddressParts(Ptr.getNode(), VT, Align, isSEXTLoad,
- IsMasked, Subtarget->isLittle(), Base,
- Offset, isInc, DAG);
+ getMVEIndexedAddressParts(
+ Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
+ Subtarget->isLittle(), Base, Offset, isInc, DAG);
else {
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
@@ -15569,31 +16951,31 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SelectionDAG &DAG) const {
EVT VT;
SDValue Ptr;
- unsigned Align;
+ Align Alignment;
bool isSEXTLoad = false, isNonExt;
bool IsMasked = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
- Align = LD->getAlignment();
+ Alignment = LD->getAlign();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
- Align = ST->getAlignment();
+ Alignment = ST->getAlign();
isNonExt = !ST->isTruncatingStore();
} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
- Align = LD->getAlignment();
+ Alignment = LD->getAlign();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
IsMasked = true;
} else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
- Align = ST->getAlignment();
+ Alignment = ST->getAlign();
isNonExt = !ST->isTruncatingStore();
IsMasked = true;
} else
@@ -15619,7 +17001,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isLegal = false;
if (VT.isVector())
isLegal = Subtarget->hasMVEIntegerOps() &&
- getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, IsMasked,
+ getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
Subtarget->isLittle(), Base, Offset,
isInc, DAG);
else {
@@ -15734,18 +17116,23 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
if (Op.getOpcode() == ARMISD::VGETLANEs)
Known = Known.sext(DstSz);
else {
- Known = Known.zext(DstSz, true /* extended bits are known zero */);
+ Known = Known.zext(DstSz);
}
assert(DstSz == Known.getBitWidth());
break;
}
+ case ARMISD::VMOVrh: {
+ KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ assert(KnownOp.getBitWidth() == 16);
+ Known = KnownOp.zext(32);
+ break;
+ }
}
}
-bool
-ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
- const APInt &DemandedAPInt,
- TargetLoweringOpt &TLO) const {
+bool ARMTargetLowering::targetShrinkDemandedConstant(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ TargetLoweringOpt &TLO) const {
// Delay optimization, so we don't have to deal with illegal types, or block
// optimizations.
if (!TLO.LegalOps)
@@ -15770,7 +17157,7 @@ ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
unsigned Mask = C->getZExtValue();
- unsigned Demanded = DemandedAPInt.getZExtValue();
+ unsigned Demanded = DemandedBits.getZExtValue();
unsigned ShrunkMask = Mask & Demanded;
unsigned ExpandedMask = Mask | ~Demanded;
@@ -15825,6 +17212,35 @@ ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
return false;
}
+bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
+ SDValue Op, const APInt &OriginalDemandedBits,
+ const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
+ unsigned Depth) const {
+ unsigned Opc = Op.getOpcode();
+
+ switch (Opc) {
+ case ARMISD::ASRL:
+ case ARMISD::LSRL: {
+ // If this is result 0 and the other result is unused, see if the demand
+ // bits allow us to shrink this long shift into a standard small shift in
+ // the opposite direction.
+ if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
+ isa<ConstantSDNode>(Op->getOperand(2))) {
+ unsigned ShAmt = Op->getConstantOperandVal(2);
+ if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(
+ APInt::getAllOnesValue(32) << (32 - ShAmt)))
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(
+ ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
+ TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
+ }
+ break;
+ }
+ }
+
+ return TargetLowering::SimplifyDemandedBitsForTargetNode(
+ Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
+}
//===----------------------------------------------------------------------===//
// ARM Inline Assembly Support
@@ -15835,7 +17251,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
if (!Subtarget->hasV6Ops())
return false;
- InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
std::string AsmStr = IA->getAsmString();
SmallVector<StringRef, 4> AsmPieces;
SplitString(AsmStr, AsmPieces, ";\n");
@@ -15843,7 +17259,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
switch (AsmPieces.size()) {
default: return false;
case 1:
- AsmStr = AsmPieces[0];
+ AsmStr = std::string(AsmPieces[0]);
AsmPieces.clear();
SplitString(AsmStr, AsmPieces, " \t,");
@@ -16342,13 +17758,15 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
"no-stack-arg-probe")) {
- unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ MaybeAlign Align =
+ cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
Chain = SP.getValue(1);
SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
if (Align)
- SP = DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
- DAG.getConstant(-(uint64_t)Align, DL, MVT::i32));
+ SP =
+ DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
+ DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
SDValue Ops[2] = { SP, Chain };
return DAG.getMergeValues(Ops, DL);
@@ -16552,7 +17970,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
- Info.align = MaybeAlign(cast<ConstantInt>(AlignArg)->getZExtValue());
+ Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
// volatile loads with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOLoad;
return true;
@@ -16593,7 +18011,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
- Info.align = MaybeAlign(cast<ConstantInt>(AlignArg)->getZExtValue());
+ Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
// volatile stores with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOStore;
return true;
@@ -16619,6 +18037,34 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOStore;
return true;
}
+ case Intrinsic::arm_mve_vld2q:
+ case Intrinsic::arm_mve_vld4q: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ // Conservatively set memVT to the entire set of vectors loaded.
+ Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
+ unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
+ Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = Align(VecTy->getScalarSizeInBits() / 8);
+ // volatile loads with MVE intrinsics not supported
+ Info.flags = MachineMemOperand::MOLoad;
+ return true;
+ }
+ case Intrinsic::arm_mve_vst2q:
+ case Intrinsic::arm_mve_vst4q: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Conservatively set memVT to the entire set of vectors stored.
+ Type *VecTy = I.getArgOperand(1)->getType();
+ unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
+ Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = Align(VecTy->getScalarSizeInBits() / 8);
+ // volatile stores with MVE intrinsics not supported
+ Info.flags = MachineMemOperand::MOStore;
+ return true;
+ }
case Intrinsic::arm_ldaex:
case Intrinsic::arm_ldrex: {
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
@@ -16627,7 +18073,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
+ Info.align = DL.getABITypeAlign(PtrTy->getElementType());
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
@@ -16639,7 +18085,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
+ Info.align = DL.getABITypeAlign(PtrTy->getElementType());
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
@@ -16873,7 +18319,7 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
return false;
assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
- unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedSize();
// We can do a store + vector extract on any vector that fits perfectly in a D
// or Q register.
if (BitWidth == 64 || BitWidth == 128) {
@@ -16986,7 +18432,7 @@ ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
}
bool ARMTargetLowering::isLegalInterleavedAccessType(
- unsigned Factor, VectorType *VecTy, const DataLayout &DL) const {
+ unsigned Factor, FixedVectorType *VecTy, const DataLayout &DL) const {
unsigned VecSize = DL.getTypeSizeInBits(VecTy);
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
@@ -17045,8 +18491,8 @@ bool ARMTargetLowering::lowerInterleavedLoad(
assert(Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices");
- VectorType *VecTy = Shuffles[0]->getType();
- Type *EltTy = VecTy->getVectorElementType();
+ auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
+ Type *EltTy = VecTy->getElementType();
const DataLayout &DL = LI->getModule()->getDataLayout();
@@ -17061,8 +18507,7 @@ bool ARMTargetLowering::lowerInterleavedLoad(
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
if (EltTy->isPointerTy())
- VecTy =
- VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
+ VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
IRBuilder<> Builder(LI);
@@ -17072,15 +18517,15 @@ bool ARMTargetLowering::lowerInterleavedLoad(
if (NumLoads > 1) {
// If we're going to generate more than one load, reset the sub-vector type
// to something legal.
- VecTy = VectorType::get(VecTy->getVectorElementType(),
- VecTy->getVectorNumElements() / NumLoads);
+ VecTy = FixedVectorType::get(VecTy->getElementType(),
+ VecTy->getNumElements() / NumLoads);
// We will compute the pointer operand of each load from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
- BaseAddr, VecTy->getVectorElementType()->getPointerTo(
- LI->getPointerAddressSpace()));
+ BaseAddr,
+ VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
}
assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
@@ -17105,8 +18550,8 @@ bool ARMTargetLowering::lowerInterleavedLoad(
"expected interleave factor of 2 or 4 for MVE");
Intrinsic::ID LoadInts =
Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
- Type *VecEltTy = VecTy->getVectorElementType()->getPointerTo(
- LI->getPointerAddressSpace());
+ Type *VecEltTy =
+ VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace());
Type *Tys[] = {VecTy, VecEltTy};
Function *VldnFunc =
Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
@@ -17126,9 +18571,8 @@ bool ARMTargetLowering::lowerInterleavedLoad(
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
- BaseAddr =
- Builder.CreateConstGEP1_32(VecTy->getVectorElementType(), BaseAddr,
- VecTy->getVectorNumElements() * Factor);
+ BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
+ VecTy->getNumElements() * Factor);
CallInst *VldN = createLoadIntrinsic(BaseAddr);
@@ -17143,8 +18587,8 @@ bool ARMTargetLowering::lowerInterleavedLoad(
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
SubVec = Builder.CreateIntToPtr(
- SubVec, VectorType::get(SV->getType()->getVectorElementType(),
- VecTy->getVectorNumElements()));
+ SubVec,
+ FixedVectorType::get(SV->getType()->getElementType(), VecTy));
SubVecs[SV].push_back(SubVec);
}
@@ -17196,13 +18640,12 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
- VectorType *VecTy = SVI->getType();
- assert(VecTy->getVectorNumElements() % Factor == 0 &&
- "Invalid interleaved store");
+ auto *VecTy = cast<FixedVectorType>(SVI->getType());
+ assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
- unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
- Type *EltTy = VecTy->getVectorElementType();
- VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
+ unsigned LaneLen = VecTy->getNumElements() / Factor;
+ Type *EltTy = VecTy->getElementType();
+ auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
@@ -17224,12 +18667,12 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
Type *IntTy = DL.getIntPtrType(EltTy);
// Convert to the corresponding integer vector.
- Type *IntVecTy =
- VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
+ auto *IntVecTy =
+ FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
- SubVecTy = VectorType::get(IntTy, LaneLen);
+ SubVecTy = FixedVectorType::get(IntTy, LaneLen);
}
// The base address of the store.
@@ -17239,14 +18682,14 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
// If we're going to generate more than one store, reset the lane length
// and sub-vector type to something legal.
LaneLen /= NumStores;
- SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
+ SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
// We will compute the pointer operand of each store from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
- BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
- SI->getPointerAddressSpace()));
+ BaseAddr,
+ SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
}
assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
@@ -17276,7 +18719,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
"expected interleave factor of 2 or 4 for MVE");
Intrinsic::ID StoreInts =
Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
- Type *EltPtrTy = SubVecTy->getVectorElementType()->getPointerTo(
+ Type *EltPtrTy = SubVecTy->getElementType()->getPointerTo(
SI->getPointerAddressSpace());
Type *Tys[] = {EltPtrTy, SubVecTy};
Function *VstNFunc =
@@ -17298,7 +18741,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
// If we generating more than one store, we compute the base address of
// subsequent stores as an offset from the previous.
if (StoreCount > 0)
- BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getVectorElementType(),
+ BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
BaseAddr, LaneLen * Factor);
SmallVector<Value *, 4> Shuffles;
@@ -17308,7 +18751,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
unsigned IdxI = StoreCount * LaneLen * Factor + i;
if (Mask[IdxI] >= 0) {
Shuffles.push_back(Builder.CreateShuffleVector(
- Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
+ Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
} else {
unsigned StartMask = 0;
for (unsigned j = 1; j < LaneLen; j++) {
@@ -17325,7 +18768,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
// Note: StartMask cannot be negative, it's checked in
// isReInterleaveMask
Shuffles.push_back(Builder.CreateShuffleVector(
- Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
+ Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
}
}
@@ -17373,11 +18816,11 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
case HA_DOUBLE:
return false;
case HA_VECT64:
- return VT->getBitWidth() == 64;
+ return VT->getPrimitiveSizeInBits().getFixedSize() == 64;
case HA_VECT128:
- return VT->getBitWidth() == 128;
+ return VT->getPrimitiveSizeInBits().getFixedSize() == 128;
case HA_UNKNOWN:
- switch (VT->getBitWidth()) {
+ switch (VT->getPrimitiveSizeInBits().getFixedSize()) {
case 64:
Base = HA_VECT64;
return true;
@@ -17396,7 +18839,7 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
/// Return the correct alignment for the current calling convention.
Align ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy,
DataLayout DL) const {
- const Align ABITypeAlign(DL.getABITypeAlignment(ArgTy));
+ const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
if (!ArgTy->isVectorTy())
return ABITypeAlign;
@@ -17423,18 +18866,18 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
return IsHA || IsIntArray;
}
-unsigned ARMTargetLowering::getExceptionPointerRegister(
+Register ARMTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
// Platforms which do not use SjLj EH may return values in these registers
// via the personality function.
- return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
+ return Subtarget->useSjLjEH() ? Register() : ARM::R0;
}
-unsigned ARMTargetLowering::getExceptionSelectorRegister(
+Register ARMTargetLowering::getExceptionSelectorRegister(
const Constant *PersonalityFn) const {
// Platforms which do not use SjLj EH may return values in these registers
// via the personality function.
- return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
+ return Subtarget->useSjLjEH() ? Register() : ARM::R1;
}
void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
index 6061a65d3b89..8b1f4183032e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -68,10 +68,12 @@ class VectorType;
CALL, // Function call.
CALL_PRED, // Function call that's predicable.
CALL_NOLINK, // Function call with branch not branch-and-link.
+ tSECALL, // CMSE non-secure function call.
BRCOND, // Conditional branch.
BR_JT, // Jumptable branch.
BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
RET_FLAG, // Return with a flag operand.
+ SERET_FLAG, // CMSE Entry function return with a flag operand.
INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand.
PIC_ADD, // Add with a PC operand and a PIC label.
@@ -133,6 +135,7 @@ class VectorType;
LE, // Low-overhead loops, Loop End
PREDICATE_CAST, // Predicate cast for MVE i1 types
+ VECTOR_REG_CAST, // Reinterpret the current contents of a vector register
VCMP, // Vector compare.
VCMPZ, // Vector compare to zero.
@@ -201,10 +204,36 @@ class VectorType;
VTBL2, // 2-register shuffle with mask
VMOVN, // MVE vmovn
+ // MVE Saturating truncates
+ VQMOVNs, // Vector (V) Saturating (Q) Move and Narrow (N), signed (s)
+ VQMOVNu, // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u)
+
+ // MVE float <> half converts
+ VCVTN, // MVE vcvt f32 -> f16, truncating into either the bottom or top lanes
+ VCVTL, // MVE vcvt f16 -> f32, extending from either the bottom or top lanes
+
// Vector multiply long:
VMULLs, // ...signed
VMULLu, // ...unsigned
+ // MVE reductions
+ VADDVs, // sign- or zero-extend the elements of a vector to i32,
+ VADDVu, // add them all together, and return an i32 of their sum
+ VADDLVs, // sign- or zero-extend elements to i64 and sum, returning
+ VADDLVu, // the low and high 32-bit halves of the sum
+ VADDLVAs, // same as VADDLV[su] but also add an input accumulator
+ VADDLVAu, // provided as low and high halves
+ VADDLVps, // same as VADDLVs but with a v4i1 predicate mask
+ VADDLVpu, // same as VADDLVu but with a v4i1 predicate mask
+ VADDLVAps, // same as VADDLVps but with a v4i1 predicate mask
+ VADDLVApu, // same as VADDLVpu but with a v4i1 predicate mask
+ VMLAVs,
+ VMLAVu,
+ VMLALVs,
+ VMLALVu,
+ VMLALVAs,
+ VMLALVAu,
+
SMULWB, // Signed multiply word by half word, bottom
SMULWT, // Signed multiply word by half word, top
UMLAL, // 64bit Unsigned Accumulate Multiply
@@ -280,7 +309,11 @@ class VectorType;
VST4_UPD,
VST2LN_UPD,
VST3LN_UPD,
- VST4LN_UPD
+ VST4LN_UPD,
+
+ // Load/Store of dual registers
+ LDRD,
+ STRD
};
} // end namespace ARMISD
@@ -333,8 +366,16 @@ class VectorType;
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const;
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
+ SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+ bool SimplifyDemandedBitsForTargetNode(SDValue Op,
+ const APInt &OriginalDemandedBits,
+ const APInt &OriginalDemandedElts,
+ KnownBits &Known,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const override;
+
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
/// allowsMisalignedMemoryAccesses - Returns true if the target allows
@@ -345,10 +386,7 @@ class VectorType;
MachineMemOperand::Flags Flags,
bool *Fast) const override;
- EVT getOptimalMemOpType(uint64_t Size,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset,
- bool MemcpyStrSrc,
+ EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
@@ -356,6 +394,7 @@ class VectorType;
bool isZExtFree(SDValue Val, EVT VT2) const override;
bool shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
+ Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const override;
bool isFNegFree(EVT VT) const override;
@@ -414,10 +453,10 @@ class VectorType;
const SelectionDAG &DAG,
unsigned Depth) const override;
- bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
TargetLoweringOpt &TLO) const override;
-
bool ExpandInlineAsm(CallInst *CI) const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
@@ -522,6 +561,12 @@ class VectorType;
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const override;
+ bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
+ bool MathUsed) const override {
+ // Using overflow ops for overflow checks only should beneficial on ARM.
+ return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
+ }
+
/// Returns true if an argument of type Ty needs to be passed in a
/// contiguous block of registers in calling convention CallConv.
bool functionArgumentNeedsConsecutiveRegisters(
@@ -529,12 +574,12 @@ class VectorType;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override;
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const;
@@ -606,7 +651,7 @@ class VectorType;
/// Returns true if \p VecTy is a legal interleaved access type. This
/// function checks the vector element type and the overall width of the
/// vector.
- bool isLegalInterleavedAccessType(unsigned Factor, VectorType *VecTy,
+ bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy,
const DataLayout &DL) const;
bool alignLoopsWithOptSize() const override;
@@ -723,6 +768,8 @@ class VectorType;
SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
SmallVectorImpl<SDValue> &Results) const;
+ SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) const;
SDValue LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed,
SDValue &Chain) const;
SDValue LowerREM(SDNode *N, SelectionDAG &DAG) const;
@@ -734,6 +781,8 @@ class VectorType;
SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const;
void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
+ void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
@@ -744,6 +793,11 @@ class VectorType;
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
+ SDValue MoveToHPR(const SDLoc &dl, SelectionDAG &DAG, MVT LocVT, MVT ValVT,
+ SDValue Val) const;
+ SDValue MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG, MVT LocVT,
+ MVT ValVT, SDValue Val) const;
+
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
@@ -763,6 +817,17 @@ class VectorType;
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+ bool
+ splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
+ SDValue *Parts, unsigned NumParts, MVT PartVT,
+ Optional<CallingConv::ID> CC) const override;
+
+ SDValue
+ joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT,
+ Optional<CallingConv::ID> CC) const override;
+
SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -783,7 +848,7 @@ class VectorType;
SmallVectorImpl<SDValue> &InVals) const override;
/// HandleByVal - Target-specific cleanup for ByVal support.
- void HandleByVal(CCState *, unsigned &, unsigned) const override;
+ void HandleByVal(CCState *, unsigned &, Align) const override;
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrCDE.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrCDE.td
new file mode 100644
index 000000000000..0e97668e2e01
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrCDE.td
@@ -0,0 +1,666 @@
+//===-- ARMInstrCDE.td - CDE support for ARM ---------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Arm CDE (Custom Datapath Extension) instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+// Immediate operand of arbitrary bit width
+class BitWidthImmOperand<int width>
+ : ImmAsmOperand<0, !add(!shl(1, width), -1)> {
+ let Name = "Imm"#width#"b";
+}
+
+class BitWidthImm<int width>
+ : Operand<i32>,
+ ImmLeaf<i32, "{ return Imm >= 0 && Imm < (1 << "#width#"); }"> {
+ let ParserMatchClass = BitWidthImmOperand<width>;
+}
+
+def CDEDualRegOp : RegisterOperand<GPRPairnosp, "printGPRPairOperand">;
+
+// Used by VCX3 FP
+def imm_3b : BitWidthImm<3>;
+
+// Used by VCX3 vector
+def imm_4b : BitWidthImm<4>;
+
+// Used by VCX2 FP and CX3
+def imm_6b : BitWidthImm<6>;
+
+// Used by VCX2 vector
+def imm_7b : BitWidthImm<7>;
+
+// Used by CX2
+def imm_9b : BitWidthImm<9>;
+
+// Used by VCX1 FP
+def imm_11b : BitWidthImm<11>;
+
+// Used by VCX1 vector
+def imm_12b : BitWidthImm<12>;
+
+// Used by CX1
+def imm_13b : BitWidthImm<13>;
+
+// Base class for all CDE instructions
+class CDE_Instr<bit acc, dag oops, dag iops, string asm, string cstr>
+ : Thumb2XI<oops, !con((ins p_imm:$coproc), iops),
+ AddrModeNone, /*sz=*/4, NoItinerary,
+ asm, cstr, /*pattern=*/[]>,
+ Sched<[]> {
+ bits<3> coproc;
+
+ let Inst{31-29} = 0b111; // 15:13
+ let Inst{28} = acc;
+ let Inst{27-26} = 0b11;
+ let Inst{11} = 0b0;
+ let Inst{10-8} = coproc{2-0};
+
+ let isPredicable = 0;
+ let DecoderNamespace = "Thumb2CDE";
+}
+
+// Base class for CX* CDE instructions
+class CDE_GPR_Instr<bit dual, bit acc, dag oops, dag iops,
+ string asm, string cstr>
+ : CDE_Instr<acc, oops, iops, asm, cstr>,
+ Requires<[HasCDE]> {
+
+ let Inst{25-24} = 0b10;
+ let Inst{6} = dual;
+ let isPredicable = acc;
+}
+
+// Set of registers used by the CDE instructions.
+class CDE_RegisterOperands {
+ dag Rd;
+ dag Rd_src;
+ dag Rn;
+ dag Rm;
+}
+
+// CX* CDE instruction parameter set
+class CX_Params {
+ dag Oops; // Output operands for CX* instructions
+ dag Iops1; // Input operands for CX1* instructions
+ dag Iops2; // Input operands for CX2* instructions
+ dag Iops3; // Input operands for CX3* instructions
+ dag PredOp; // Input predicate operand
+ string PAsm; // Predicate assembly string
+ string Cstr; // asm constraint string
+ bit Dual; // "dual" field for encoding
+ bit Acc; // "acc" field for encoding
+}
+
+// VCX* CDE instruction parameter set
+class VCX_Params {
+ dag Oops; // Output operands for VCX* instructions
+ dag Iops1; // Input operands for VCX1* instructions
+ dag Iops2; // Input operands for VCX2* instructions
+ dag Iops3; // Input operands for VCX3* instructions
+ string Cstr; // asm constraint string
+ bit Acc; // "acc" field for encoding
+ vpred_ops Vpred; // Predication type for VCX* vector instructions
+}
+
+// CX1, CX1A, CX1D, CX1DA
+class CDE_CX1_Instr<string iname, CX_Params params>
+ : CDE_GPR_Instr<params.Dual, params.Acc, params.Oops,
+ !con(params.Iops1, (ins imm_13b:$imm), params.PredOp),
+ !strconcat(iname, params.PAsm, "\t$coproc, $Rd, $imm"),
+ params.Cstr> {
+ bits<13> imm;
+ bits<4> Rd;
+
+ let Inst{23-22} = 0b00;
+ let Inst{21-16} = imm{12-7};
+ let Inst{15-12} = Rd{3-0};
+ let Inst{7} = imm{6};
+ let Inst{5-0} = imm{5-0};
+}
+
+// CX2, CX2A, CX2D, CX2DA
+class CDE_CX2_Instr<string iname, CX_Params params>
+ : CDE_GPR_Instr<params.Dual, params.Acc, params.Oops,
+ !con(params.Iops2, (ins imm_9b:$imm), params.PredOp),
+ !strconcat(iname, params.PAsm, "\t$coproc, $Rd, $Rn, $imm"),
+ params.Cstr> {
+ bits<9> imm;
+ bits<4> Rd;
+ bits<4> Rn;
+
+ let Inst{23-22} = 0b01;
+ let Inst{21-20} = imm{8-7};
+ let Inst{19-16} = Rn{3-0};
+ let Inst{15-12} = Rd{3-0};
+ let Inst{7} = imm{6};
+ let Inst{5-0} = imm{5-0};
+}
+
+// CX3, CX3A, CX3D, CX3DA
+class CDE_CX3_Instr<string iname, CX_Params params>
+ : CDE_GPR_Instr<params.Dual, params.Acc, params.Oops,
+ !con(params.Iops3, (ins imm_6b:$imm), params.PredOp),
+ !strconcat(iname, params.PAsm, "\t$coproc, $Rd, $Rn, $Rm, $imm"),
+ params.Cstr> {
+ bits<6> imm;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{23} = 0b1;
+ let Inst{22-20} = imm{5-3};
+ let Inst{19-16} = Rn{3-0};
+ let Inst{15-12} = Rm{3-0};
+ let Inst{7} = imm{2};
+ let Inst{5-4} = imm{1-0};
+ let Inst{3-0} = Rd{3-0};
+}
+
+// Registers for single-register variants of CX* instructions
+def cde_cx_single_regs : CDE_RegisterOperands {
+ let Rd = (outs GPRwithAPSR_NZCVnosp:$Rd);
+ let Rd_src = (ins GPRwithAPSR_NZCVnosp:$Rd_src);
+ let Rn = (ins GPRwithAPSR_NZCVnosp:$Rn);
+ let Rm = (ins GPRwithAPSR_NZCVnosp:$Rm);
+}
+
+// Registers for single-register variants of CX* instructions
+def cde_cx_dual_regs : CDE_RegisterOperands {
+ let Rd = (outs CDEDualRegOp:$Rd);
+ let Rd_src = (ins CDEDualRegOp:$Rd_src);
+ let Rn = (ins GPRwithAPSR_NZCVnosp:$Rn);
+ let Rm = (ins GPRwithAPSR_NZCVnosp:$Rm);
+}
+
+class CDE_CX_ParamsTemplate<bit dual, bit acc, CDE_RegisterOperands ops>
+ : CX_Params {
+
+ dag IOpsPrefix = !if(acc, ops.Rd_src, (ins));
+
+ let Oops = ops.Rd;
+ let Iops1 = IOpsPrefix;
+ let Iops2 = !con(IOpsPrefix, ops.Rn);
+ let Iops3 = !con(IOpsPrefix, ops.Rn, ops.Rm);
+ let PredOp = !if(acc, (ins pred:$p), (ins));
+ let PAsm = !if(acc, "${p}", "");
+ let Cstr = !if(acc, "$Rd = $Rd_src", "");
+ let Dual = dual;
+ let Acc = acc;
+}
+
+def cde_cx_params_single_noacc : CDE_CX_ParamsTemplate<0b0, 0b0, cde_cx_single_regs>;
+def cde_cx_params_single_acc : CDE_CX_ParamsTemplate<0b0, 0b1, cde_cx_single_regs>;
+def cde_cx_params_dual_noacc : CDE_CX_ParamsTemplate<0b1, 0b0, cde_cx_dual_regs>;
+def cde_cx_params_dual_acc : CDE_CX_ParamsTemplate<0b1, 0b1, cde_cx_dual_regs>;
+
+def CDE_CX1 : CDE_CX1_Instr<"cx1", cde_cx_params_single_noacc>;
+def CDE_CX1A : CDE_CX1_Instr<"cx1a", cde_cx_params_single_acc>;
+def CDE_CX1D : CDE_CX1_Instr<"cx1d", cde_cx_params_dual_noacc>;
+def CDE_CX1DA : CDE_CX1_Instr<"cx1da", cde_cx_params_dual_acc>;
+
+def CDE_CX2 : CDE_CX2_Instr<"cx2", cde_cx_params_single_noacc>;
+def CDE_CX2A : CDE_CX2_Instr<"cx2a", cde_cx_params_single_acc>;
+def CDE_CX2D : CDE_CX2_Instr<"cx2d", cde_cx_params_dual_noacc>;
+def CDE_CX2DA : CDE_CX2_Instr<"cx2da", cde_cx_params_dual_acc>;
+
+def CDE_CX3 : CDE_CX3_Instr<"cx3", cde_cx_params_single_noacc>;
+def CDE_CX3A : CDE_CX3_Instr<"cx3a", cde_cx_params_single_acc>;
+def CDE_CX3D : CDE_CX3_Instr<"cx3d", cde_cx_params_dual_noacc>;
+def CDE_CX3DA : CDE_CX3_Instr<"cx3da", cde_cx_params_dual_acc>;
+
+let Predicates = [HasCDE] in {
+ def : Pat<(i32 (int_arm_cde_cx1 timm:$coproc, timm:$imm)),
+ (i32 (CDE_CX1 p_imm:$coproc, imm_13b:$imm))>;
+ def : Pat<(i32 (int_arm_cde_cx1a timm:$coproc, GPRwithAPSR_NZCVnosp:$acc,
+ timm:$imm)),
+ (i32 (CDE_CX1A p_imm:$coproc, GPRwithAPSR_NZCVnosp:$acc,
+ imm_13b:$imm))>;
+ def : Pat<(i32 (int_arm_cde_cx2 timm:$coproc, GPRwithAPSR_NZCVnosp:$n,
+ timm:$imm)),
+ (i32 (CDE_CX2 p_imm:$coproc, GPRwithAPSR_NZCVnosp:$n,
+ imm_9b:$imm))>;
+ def : Pat<(i32 (int_arm_cde_cx2a timm:$coproc, GPRwithAPSR_NZCVnosp:$acc,
+ GPRwithAPSR_NZCVnosp:$n, timm:$imm)),
+ (i32 (CDE_CX2A p_imm:$coproc, GPRwithAPSR_NZCVnosp:$acc,
+ GPRwithAPSR_NZCVnosp:$n, imm_9b:$imm))>;
+ def : Pat<(i32 (int_arm_cde_cx3 timm:$coproc, GPRwithAPSR_NZCVnosp:$n,
+ GPRwithAPSR_NZCVnosp:$m, timm:$imm)),
+ (i32 (CDE_CX3 p_imm:$coproc, GPRwithAPSR_NZCVnosp:$n,
+ GPRwithAPSR_NZCVnosp:$m, imm_6b:$imm))>;
+ def : Pat<(i32 (int_arm_cde_cx3a timm:$coproc,
+ GPRwithAPSR_NZCVnosp:$acc,
+ GPRwithAPSR_NZCVnosp:$n,
+ GPRwithAPSR_NZCVnosp:$m, timm:$imm)),
+ (i32 (CDE_CX3A p_imm:$coproc,
+ GPRwithAPSR_NZCVnosp:$acc,
+ GPRwithAPSR_NZCVnosp:$n,
+ GPRwithAPSR_NZCVnosp:$m, imm_6b:$imm))>;
+}
+
+class CDE_RequiresSReg : Requires<[HasCDE, HasFPRegs]>;
+class CDE_RequiresDReg : Requires<[HasCDE, HasFPRegs]>;
+class CDE_RequiresQReg : Requires<[HasCDE, HasMVEInt]>;
+
+// Base class for CDE VCX* instructions
+class CDE_FP_Vec_Instr<bit vec, bit acc, dag oops, dag iops, string asm, string cstr>
+ : CDE_Instr<acc, oops, iops, asm, cstr> {
+ let Inst{25} = 0b0;
+ let Inst{6} = vec;
+}
+
+// Base class for floating-point variants of CDE VCX* instructions
+class CDE_FP_Instr<bit acc, bit sz, dag oops, dag iops, string asm, string cstr>
+ : CDE_FP_Vec_Instr<0b0, acc, oops, iops, asm, cstr> {
+ let Inst{24} = sz;
+}
+
+// Base class for vector variants of CDE VCX* instruction
+class CDE_Vec_Instr<bit acc, dag oops, dag iops, string asm, string cstr,
+ vpred_ops vpred>
+ : CDE_FP_Vec_Instr<0b1, acc, oops,
+ !con(iops, (ins vpred:$vp)), asm,
+ !strconcat(cstr, vpred.vpred_constraint)>,
+ CDE_RequiresQReg {
+}
+
+
+// VCX1/VCX1A, vector variant
+class CDE_VCX1_Vec_Instr<string iname, VCX_Params params>
+ : CDE_Vec_Instr<params.Acc, params.Oops,
+ !con(params.Iops1, (ins imm_12b:$imm)),
+ iname#"${vp}\t$coproc, $Qd, $imm", params.Cstr, params.Vpred> {
+ bits<12> imm;
+ bits<3> Qd;
+
+ let Inst{24} = imm{11};
+ let Inst{23} = 0b0;
+ let Inst{22} = 0b0;
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = imm{10-7};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12} = 0b0;
+ let Inst{7} = imm{6};
+ let Inst{5-0} = imm{5-0};
+
+ let Unpredictable{22} = 0b1;
+}
+
+// VCX1/VCX1A, base class for FP variants
+class CDE_VCX1_FP_Instr<bit sz, string iname, VCX_Params params>
+ : CDE_FP_Instr<params.Acc, sz, params.Oops,
+ !con(params.Iops1, (ins imm_11b:$imm)),
+ iname#"\t$coproc, $Vd, $imm", params.Cstr> {
+ bits<11> imm;
+
+ let Inst{23} = 0b0;
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = imm{10-7};
+ let Inst{7} = imm{6};
+ let Inst{5-0} = imm{5-0};
+}
+
+// VCX1/VCX1A, S registers
+class CDE_VCX1_FP_Instr_S<string iname, VCX_Params params>
+ : CDE_VCX1_FP_Instr<0b0, iname, params>,
+ CDE_RequiresSReg {
+ bits<5> Vd;
+
+ let Inst{22} = Vd{0};
+ let Inst{15-12} = Vd{4-1};
+}
+
+// VCX1/VCX1A, D registers
+class CDE_VCX1_FP_Instr_D<string iname, VCX_Params params>
+ : CDE_VCX1_FP_Instr<0b1, iname, params>,
+ CDE_RequiresDReg {
+ bits<5> Vd;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+}
+
+// VCX2/VCX2A, vector variant
+class CDE_VCX2_Vec_Instr<string iname, VCX_Params params>
+ : CDE_Vec_Instr<params.Acc, params.Oops,
+ !con(params.Iops2, (ins imm_7b:$imm)),
+ iname#"${vp}\t$coproc, $Qd, $Qm, $imm", params.Cstr,
+ params.Vpred> {
+ bits<7> imm;
+ bits<3> Qd;
+ bits<3> Qm;
+
+ let Inst{24} = imm{6};
+ let Inst{23} = 0b0;
+ let Inst{22} = 0b0;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{5-2};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12} = 0b0;
+ let Inst{7} = imm{1};
+ let Inst{5} = 0b0;
+ let Inst{4} = imm{0};
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b0;
+
+ let Unpredictable{22} = 0b1;
+ let Unpredictable{5} = 0b1;
+}
+
+// VCX2/VCX2A, base class for FP variants
+class CDE_VCX2_FP_Instr<bit sz, string iname, VCX_Params params>
+ : CDE_FP_Instr<params.Acc, sz, params.Oops,
+ !con(params.Iops2, (ins imm_6b:$imm)),
+ iname#"\t$coproc, $Vd, $Vm, $imm", params.Cstr> {
+ bits<6> imm;
+
+ let Inst{23} = 0b0;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm{5-2};
+ let Inst{7} = imm{1};
+ let Inst{4} = imm{0};
+}
+
+// VCX2/VCX2A, S registers
+class CDE_VCX2_FP_Instr_S<string iname, VCX_Params params>
+ : CDE_VCX2_FP_Instr<0b0, iname, params>,
+ CDE_RequiresSReg {
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{4-1};
+ let Inst{22} = Vd{0};
+ let Inst{3-0} = Vm{4-1};
+ let Inst{5} = Vm{0};
+}
+
+// VCX2/VCX2A, D registers
+class CDE_VCX2_FP_Instr_D<string iname, VCX_Params params>
+ : CDE_VCX2_FP_Instr<0b1, iname, params>,
+ CDE_RequiresDReg {
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+}
+
+// VCX3/VCX3A, vector variant
+class CDE_VCX3_Vec_Instr<string iname, VCX_Params params>
+ : CDE_Vec_Instr<params.Acc, params.Oops,
+ !con(params.Iops3, (ins imm_4b:$imm)),
+ iname#"${vp}\t$coproc, $Qd, $Qn, $Qm, $imm", params.Cstr,
+ params.Vpred> {
+ bits<4> imm;
+ bits<3> Qd;
+ bits<3> Qm;
+ bits<3> Qn;
+
+ let Inst{24} = imm{3};
+ let Inst{23} = 0b1;
+ let Inst{22} = 0b0;
+ let Inst{21-20} = imm{2-1};
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12} = 0b0;
+ let Inst{7} = 0b0;
+ let Inst{5} = 0b0;
+ let Inst{4} = imm{0};
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b0;
+
+ let Unpredictable{22} = 0b1;
+ let Unpredictable{7} = 0b1;
+ let Unpredictable{5} = 0b1;
+}
+
+// VCX3/VCX3A, base class for FP variants
+class CDE_VCX3_FP_Instr<bit sz, string iname, VCX_Params params>
+ : CDE_FP_Instr<params.Acc, sz, params.Oops,
+ !con(params.Iops3, (ins imm_3b:$imm)),
+ iname#"\t$coproc, $Vd, $Vn, $Vm, $imm", params.Cstr> {
+ bits<3> imm;
+
+ let Inst{23} = 0b1;
+ let Inst{21-20} = imm{2-1};
+ let Inst{4} = imm{0};
+}
+
+// VCX3/VCX3A, S registers
+class CDE_VCX3_FP_Instr_S<string iname, VCX_Params params>
+ : CDE_VCX3_FP_Instr<0b0, iname, params>,
+ CDE_RequiresSReg {
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<5> Vn;
+
+ let Inst{22} = Vd{0};
+ let Inst{19-16} = Vn{4-1};
+ let Inst{15-12} = Vd{4-1};
+ let Inst{7} = Vn{0};
+ let Inst{5} = Vm{0};
+ let Inst{3-0} = Vm{4-1};
+}
+
+// VCX3/VCX3A, D registers
+class CDE_VCX3_FP_Instr_D<string iname, VCX_Params params>
+ : CDE_VCX3_FP_Instr<0b1, iname, params>,
+ CDE_RequiresDReg {
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<5> Vn;
+
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
+}
+
+// Register operands for VCX* instructions
+class CDE_VCX_RegisterOperandsTemplate<RegisterClass regclass>
+ : CDE_RegisterOperands {
+ let Rd = (outs regclass:$Vd);
+ let Rd_src = (ins regclass:$Vd_src);
+ let Rn = (ins regclass:$Vn);
+ let Rm = (ins regclass:$Vm);
+}
+
+class CDE_VCXQ_RegisterOperandsTemplate<RegisterClass regclass>
+ : CDE_RegisterOperands {
+ let Rd = (outs regclass:$Qd);
+ let Rd_src = (ins regclass:$Qd_src);
+ let Rn = (ins regclass:$Qn);
+ let Rm = (ins regclass:$Qm);
+}
+
+def cde_vcx_s_regs : CDE_VCX_RegisterOperandsTemplate<SPR>;
+def cde_vcx_d_regs : CDE_VCX_RegisterOperandsTemplate<DPR_VFP2>;
+def cde_vcx_q_regs : CDE_VCXQ_RegisterOperandsTemplate<MQPR>;
+
+class CDE_VCX_ParamsTemplate<bit acc, CDE_RegisterOperands ops>
+ : VCX_Params {
+
+ dag IOpsPrefix = !if(acc, ops.Rd_src, (ins));
+
+ let Oops = ops.Rd;
+ let Iops1 = IOpsPrefix;
+ let Iops2 = !con(IOpsPrefix, ops.Rm);
+ let Iops3 = !con(IOpsPrefix, ops.Rn, ops.Rm);
+ let Cstr = !if(acc, "$Vd = $Vd_src", "");
+ let Acc = acc;
+}
+
+class CDE_VCXQ_ParamsTemplate<bit acc, CDE_RegisterOperands ops>
+ : VCX_Params {
+
+ dag IOpsPrefix = !if(acc, ops.Rd_src, (ins));
+
+ let Oops = ops.Rd;
+ let Iops1 = IOpsPrefix;
+ let Iops2 = !con(IOpsPrefix, ops.Rm);
+ let Iops3 = !con(IOpsPrefix, ops.Rn, ops.Rm);
+ let Cstr = !if(acc, "$Qd = $Qd_src", "");
+ let Acc = acc;
+ let Vpred = !if(acc, vpred_n, vpred_r);
+}
+
+def cde_vcx_params_s_noacc : CDE_VCX_ParamsTemplate<0b0, cde_vcx_s_regs>;
+def cde_vcx_params_s_acc : CDE_VCX_ParamsTemplate<0b1, cde_vcx_s_regs>;
+def cde_vcx_params_d_noacc : CDE_VCX_ParamsTemplate<0b0, cde_vcx_d_regs>;
+def cde_vcx_params_d_acc : CDE_VCX_ParamsTemplate<0b1, cde_vcx_d_regs>;
+def cde_vcx_params_q_noacc : CDE_VCXQ_ParamsTemplate<0b0, cde_vcx_q_regs>;
+def cde_vcx_params_q_acc : CDE_VCXQ_ParamsTemplate<0b1, cde_vcx_q_regs>;
+
+def CDE_VCX1_fpsp : CDE_VCX1_FP_Instr_S<"vcx1", cde_vcx_params_s_noacc>;
+def CDE_VCX1A_fpsp : CDE_VCX1_FP_Instr_S<"vcx1a", cde_vcx_params_s_acc>;
+def CDE_VCX1_fpdp : CDE_VCX1_FP_Instr_D<"vcx1", cde_vcx_params_d_noacc>;
+def CDE_VCX1A_fpdp : CDE_VCX1_FP_Instr_D<"vcx1a", cde_vcx_params_d_acc>;
+def CDE_VCX1_vec : CDE_VCX1_Vec_Instr<"vcx1", cde_vcx_params_q_noacc>;
+def CDE_VCX1A_vec : CDE_VCX1_Vec_Instr<"vcx1a", cde_vcx_params_q_acc>;
+
+def CDE_VCX2_fpsp : CDE_VCX2_FP_Instr_S<"vcx2", cde_vcx_params_s_noacc>;
+def CDE_VCX2A_fpsp : CDE_VCX2_FP_Instr_S<"vcx2a", cde_vcx_params_s_acc>;
+def CDE_VCX2_fpdp : CDE_VCX2_FP_Instr_D<"vcx2", cde_vcx_params_d_noacc>;
+def CDE_VCX2A_fpdp : CDE_VCX2_FP_Instr_D<"vcx2a", cde_vcx_params_d_acc>;
+def CDE_VCX2_vec : CDE_VCX2_Vec_Instr<"vcx2", cde_vcx_params_q_noacc>;
+def CDE_VCX2A_vec : CDE_VCX2_Vec_Instr<"vcx2a", cde_vcx_params_q_acc>;
+
+def CDE_VCX3_fpsp : CDE_VCX3_FP_Instr_S<"vcx3", cde_vcx_params_s_noacc>;
+def CDE_VCX3A_fpsp : CDE_VCX3_FP_Instr_S<"vcx3a", cde_vcx_params_s_acc>;
+def CDE_VCX3_fpdp : CDE_VCX3_FP_Instr_D<"vcx3", cde_vcx_params_d_noacc>;
+def CDE_VCX3A_fpdp : CDE_VCX3_FP_Instr_D<"vcx3a", cde_vcx_params_d_acc>;
+def CDE_VCX3_vec : CDE_VCX3_Vec_Instr<"vcx3", cde_vcx_params_q_noacc>;
+def CDE_VCX3A_vec : CDE_VCX3_Vec_Instr<"vcx3a", cde_vcx_params_q_acc>;
+
+
+let Predicates = [HasCDE, HasFPRegs] in {
+ def : Pat<(f32 (int_arm_cde_vcx1 timm:$coproc, timm:$imm)),
+ (f32 (CDE_VCX1_fpsp p_imm:$coproc, imm_11b:$imm))>;
+ def : Pat<(f32 (int_arm_cde_vcx1a timm:$coproc, (f32 SPR:$acc), timm:$imm)),
+ (f32 (CDE_VCX1A_fpsp p_imm:$coproc, SPR:$acc, imm_11b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx1 timm:$coproc, timm:$imm)),
+ (f64 (CDE_VCX1_fpdp p_imm:$coproc, imm_11b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx1a timm:$coproc, (f64 DPR:$acc), timm:$imm)),
+ (f64 (CDE_VCX1A_fpdp p_imm:$coproc, DPR:$acc, imm_11b:$imm))>;
+
+ def : Pat<(f32 (int_arm_cde_vcx2 timm:$coproc, (f32 SPR:$n), timm:$imm)),
+ (f32 (CDE_VCX2_fpsp p_imm:$coproc, SPR:$n, imm_6b:$imm))>;
+ def : Pat<(f32 (int_arm_cde_vcx2a timm:$coproc, (f32 SPR:$acc), (f32 SPR:$n),
+ timm:$imm)),
+ (f32 (CDE_VCX2A_fpsp p_imm:$coproc, SPR:$acc, SPR:$n, imm_6b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx2 timm:$coproc, (f64 DPR:$n), timm:$imm)),
+ (f64 (CDE_VCX2_fpdp p_imm:$coproc, DPR:$n, imm_6b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx2a timm:$coproc, (f64 DPR:$acc), (f64 DPR:$n),
+ timm:$imm)),
+ (f64 (CDE_VCX2A_fpdp p_imm:$coproc, DPR:$acc, DPR:$n, imm_6b:$imm))>;
+
+ def : Pat<(f32 (int_arm_cde_vcx3 timm:$coproc, (f32 SPR:$n), (f32 SPR:$m),
+ timm:$imm)),
+ (f32 (CDE_VCX3_fpsp p_imm:$coproc, (f32 SPR:$n), (f32 SPR:$m),
+ imm_3b:$imm))>;
+ def : Pat<(f32 (int_arm_cde_vcx3a timm:$coproc, (f32 SPR:$acc), (f32 SPR:$n),
+ (f32 SPR:$m), timm:$imm)),
+ (f32 (CDE_VCX3A_fpsp p_imm:$coproc, SPR:$acc, SPR:$n, SPR:$m,
+ imm_3b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx3 timm:$coproc, (f64 DPR:$n), (f64 DPR:$m),
+ timm:$imm)),
+ (f64 (CDE_VCX3_fpdp p_imm:$coproc, DPR:$n, DPR:$m, imm_3b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx3a timm:$coproc, (f64 DPR:$acc), (f64 DPR:$n),
+ (f64 DPR:$m), timm:$imm)),
+ (f64 (CDE_VCX3A_fpdp p_imm:$coproc, DPR:$acc, DPR:$n, DPR:$m,
+ imm_3b:$imm))>;
+}
+
+let Predicates = [HasCDE, HasMVEInt] in {
+ def : Pat<(v16i8 (int_arm_cde_vcx1q timm:$coproc, timm:$imm)),
+ (v16i8 (CDE_VCX1_vec p_imm:$coproc, imm_12b:$imm))>;
+ def : Pat<(v16i8 (int_arm_cde_vcx1qa timm:$coproc, (v16i8 MQPR:$acc),
+ timm:$imm)),
+ (v16i8 (CDE_VCX1A_vec p_imm:$coproc, MQPR:$acc, imm_12b:$imm))>;
+
+ def : Pat<(v16i8 (int_arm_cde_vcx2q timm:$coproc, (v16i8 MQPR:$n), timm:$imm)),
+ (v16i8 (CDE_VCX2_vec p_imm:$coproc, MQPR:$n, imm_7b:$imm))>;
+ def : Pat<(v16i8 (int_arm_cde_vcx2qa timm:$coproc, (v16i8 MQPR:$acc),
+ (v16i8 MQPR:$n), timm:$imm)),
+ (v16i8 (CDE_VCX2A_vec p_imm:$coproc, MQPR:$acc, MQPR:$n,
+ imm_7b:$imm))>;
+
+ def : Pat<(v16i8 (int_arm_cde_vcx3q timm:$coproc, (v16i8 MQPR:$n),
+ (v16i8 MQPR:$m), timm:$imm)),
+ (v16i8 (CDE_VCX3_vec p_imm:$coproc, MQPR:$n, MQPR:$m,
+ imm_4b:$imm))>;
+ def : Pat<(v16i8 (int_arm_cde_vcx3qa timm:$coproc, (v16i8 MQPR:$acc),
+ (v16i8 MQPR:$n), (v16i8 MQPR:$m),
+ timm:$imm)),
+ (v16i8 (CDE_VCX3A_vec p_imm:$coproc, MQPR:$acc, MQPR:$n, MQPR:$m,
+ imm_4b:$imm))>;
+}
+
+multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
+ def : Pat<(VTI.Vec (int_arm_cde_vcx1q_predicated timm:$coproc,
+ (VTI.Vec MQPR:$inactive), timm:$imm,
+ (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (CDE_VCX1_vec p_imm:$coproc, imm_12b:$imm, ARMVCCThen,
+ (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$inactive)))>;
+ def : Pat<(VTI.Vec (int_arm_cde_vcx1qa_predicated timm:$coproc,
+ (VTI.Vec MQPR:$acc), timm:$imm,
+ (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (CDE_VCX1A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
+ imm_12b:$imm, ARMVCCThen,
+ (VTI.Pred VCCR:$pred)))>;
+
+ def : Pat<(VTI.Vec (int_arm_cde_vcx2q_predicated timm:$coproc,
+ (VTI.Vec MQPR:$inactive),
+ (v16i8 MQPR:$n), timm:$imm,
+ (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (CDE_VCX2_vec p_imm:$coproc, (v16i8 MQPR:$n),
+ imm_7b:$imm, ARMVCCThen,
+ (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$inactive)))>;
+ def : Pat<(VTI.Vec (int_arm_cde_vcx2qa_predicated timm:$coproc,
+ (VTI.Vec MQPR:$acc),
+ (v16i8 MQPR:$n), timm:$imm,
+ (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (CDE_VCX2A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
+ (v16i8 MQPR:$n), timm:$imm, ARMVCCThen,
+ (VTI.Pred VCCR:$pred)))>;
+
+ def : Pat<(VTI.Vec (int_arm_cde_vcx3q_predicated timm:$coproc,
+ (VTI.Vec MQPR:$inactive),
+ (v16i8 MQPR:$n), (v16i8 MQPR:$m),
+ timm:$imm,
+ (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (CDE_VCX3_vec p_imm:$coproc, (v16i8 MQPR:$n),
+ (v16i8 MQPR:$m),
+ imm_4b:$imm, ARMVCCThen,
+ (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$inactive)))>;
+ def : Pat<(VTI.Vec (int_arm_cde_vcx3qa_predicated timm:$coproc,
+ (VTI.Vec MQPR:$acc),
+ (v16i8 MQPR:$n), (v16i8 MQPR:$m), timm:$imm,
+ (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (CDE_VCX3A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
+ (v16i8 MQPR:$n), (v16i8 MQPR:$m),
+ imm_4b:$imm, ARMVCCThen,
+ (VTI.Pred VCCR:$pred)))>;
+}
+
+let Predicates = [HasCDE, HasMVEInt] in
+ foreach VTI = [ MVE_v16i8, MVE_v8i16, MVE_v4i32, MVE_v2i64 ] in
+ defm : VCXPredicatedPat_m<VTI>;
+
+let Predicates = [HasCDE, HasMVEFloat] in
+ foreach VTI = [ MVE_v8f16, MVE_v4f32 ] in
+ defm : VCXPredicatedPat_m<VTI>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td
index 1da32ad2af6c..e13f3437cc7b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -205,7 +205,6 @@ def VPTPredROperand : AsmOperandClass {
let Name = "VPTPredR";
let PredicateMethod = "isVPTPred";
}
-def undef_tied_input;
// Operand classes for the cluster of MC operands describing a
// VPT-predicated MVE instruction.
@@ -409,6 +408,9 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
bit thumbArithFlagSetting = 0;
bit validForTailPredication = 0;
+ bit retainsPreviousHalfElement = 0;
+ bit horizontalReduction = 0;
+ bit doubleWidthResult = 0;
// If this is a pseudo instruction, mark it isCodeGenOnly.
let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
@@ -422,6 +424,9 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
let TSFlags{18-15} = D.Value;
let TSFlags{19} = thumbArithFlagSetting;
let TSFlags{20} = validForTailPredication;
+ let TSFlags{21} = retainsPreviousHalfElement;
+ let TSFlags{22} = horizontalReduction;
+ let TSFlags{23} = doubleWidthResult;
let Constraints = cstr;
let Itinerary = itin;
@@ -1123,6 +1128,9 @@ class Thumb2DSPPat<dag pattern, dag result> : Pat<pattern, result> {
class Thumb2DSPMulPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsThumb2, UseMulOps, HasDSP];
}
+class FPRegs16Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasFPRegs16];
+}
class FP16Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [HasFP16];
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index a802d5a06f07..2790ac215f86 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -126,7 +126,7 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
MIB.addMemOperand(MMO);
BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg)
.addReg(Reg, RegState::Kill)
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
index 3efe85a7d45c..da0a836c8f95 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -159,6 +159,8 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def ARMseretflag : SDNode<"ARMISD::SERET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
@@ -243,6 +245,12 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
+def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
// Vector operations shared between NEON and MVE
def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
@@ -258,7 +266,7 @@ def ARMvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
def ARMvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
def ARMvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
-def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>,
SDTCisVT<2, i32>]>;
def ARMvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
def ARMvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
@@ -268,6 +276,10 @@ def ARMvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def ARMvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def ARMvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
@@ -279,6 +291,11 @@ def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
+def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def ARMvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def ARMvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
def SDTARMVCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisInt<3>]>;
def SDTARMVCMPZ : SDTypeProfile<1, 2, [SDTCisInt<2>]>;
@@ -290,6 +307,36 @@ def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMLoLoop, [SDNPHasChain]>;
def ARMLE : SDNode<"ARMISD::LE", SDT_ARMLoLoop, [SDNPHasChain]>;
def ARMLoopDec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>;
+// 'VECTOR_REG_CAST' is an operation that reinterprets the contents of a
+// vector register as a different vector type, without changing the contents of
+// the register. It differs from 'bitconvert' in that bitconvert reinterprets
+// the _memory_ storage format of the vector, whereas VECTOR_REG_CAST
+// reinterprets the _register_ format - and in big-endian, the memory and
+// register formats are different, so they are different operations.
+//
+// For example, 'VECTOR_REG_CAST' between v8i16 and v16i8 will map the LSB of
+// the zeroth i16 lane to the zeroth i8 lane, regardless of system endianness,
+// whereas 'bitconvert' will map it to the high byte in big-endian mode,
+// because that's what (MVE) VSTRH.16 followed by VLDRB.8 would do. So the
+// bitconvert would have to emit a VREV16.8 instruction, whereas the
+// VECTOR_REG_CAST emits no code at all if the vector is already in a register.
+def ARMVectorRegCastImpl : SDNode<"ARMISD::VECTOR_REG_CAST", SDTUnaryOp>;
+
+// In little-endian, VECTOR_REG_CAST is often turned into bitconvert during
+// lowering (because in that situation they're identical). So an isel pattern
+// that needs to match something that's _logically_ a VECTOR_REG_CAST must
+// _physically_ match a different node type depending on endianness.
+//
+// This 'PatFrags' instance is a centralized facility to make that easy. It
+// matches VECTOR_REG_CAST in either endianness, and also bitconvert in the
+// endianness where it's equivalent.
+def ARMVectorRegCast: PatFrags<
+ (ops node:$x), [(ARMVectorRegCastImpl node:$x), (bitconvert node:$x)], [{
+ // Reject a match against bitconvert (aka ISD::BITCAST) if big-endian
+ return !(CurDAG->getDataLayout().isBigEndian() &&
+ N->getOpcode() == ISD::BITCAST);
+ }]>;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -396,6 +443,62 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
return hasNoVMLxHazardUse(N);
}]>;
+def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
+def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
+
+//===----------------------------------------------------------------------===//
+// NEON/MVE pattern fragments
+//
+
+// Extract D sub-registers of Q registers.
+def DSubReg_i8_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N),
+ MVT::i32);
+}]>;
+def DSubReg_i16_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N),
+ MVT::i32);
+}]>;
+def DSubReg_i32_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N),
+ MVT::i32);
+}]>;
+def DSubReg_f64_reg : SDNodeXForm<imm, [{
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N),
+ MVT::i32);
+}]>;
+
+// Extract S sub-registers of Q/D registers.
+def SSubReg_f32_reg : SDNodeXForm<imm, [{
+ assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N),
+ MVT::i32);
+}]>;
+
+// Extract S sub-registers of Q/D registers containing a given f16/bf16 lane.
+def SSubReg_f16_reg : SDNodeXForm<imm, [{
+ assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue()/2, SDLoc(N),
+ MVT::i32);
+}]>;
+
+// Translate lane numbers from Q registers to D subregs.
+def SubReg_i8_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
+}]>;
+def SubReg_i16_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32);
+}]>;
+def SubReg_i32_lane : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32);
+}]>;
+
+
+
//===----------------------------------------------------------------------===//
// Operand Definitions.
//
@@ -2695,6 +2798,14 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
Requires<[IsARM, HasV5TE]>;
}
+let mayLoad = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
+def LOADDUAL : ARMPseudoInst<(outs GPRPairOp:$Rt), (ins addrmode3:$addr),
+ 64, IIC_iLoad_d_r, []>,
+ Requires<[IsARM, HasV5TE]> {
+ let AM = AddrMode3;
+}
+}
+
def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "lda", "\t$Rt, $addr", []>;
def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
@@ -2766,7 +2877,7 @@ multiclass AI2_ldridx<bit isByte, string opc,
}
let mayLoad = 1, hasSideEffects = 0 in {
-// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or
+// FIXME: for LDR_PRE_REG etc. the itinerary should be either IIC_iLoad_ru or
// IIC_iLoad_siu depending on whether it the offset register is shifted.
defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>;
defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
@@ -2933,6 +3044,9 @@ multiclass AI3ldrT<bits<4> op, string opc> {
let Inst{3-0} = Rm{3-0};
let DecoderMethod = "DecodeLDR";
}
+
+ def ii : ARMAsmPseudo<!strconcat(opc, "${p} $Rt, $addr"),
+ (ins addr_offset_none:$addr, pred:$p), (outs GPR:$Rt)>;
}
defm LDRSBT : AI3ldrT<0b1101, "ldrsbt">;
@@ -2970,6 +3084,14 @@ let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
}
}
+let mayStore = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
+def STOREDUAL : ARMPseudoInst<(outs), (ins GPRPairOp:$Rt, addrmode3:$addr),
+ 64, IIC_iStore_d_r, []>,
+ Requires<[IsARM, HasV5TE]> {
+ let AM = AddrMode3;
+}
+}
+
// Indexed stores
multiclass AI2_stridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
@@ -3036,7 +3158,7 @@ multiclass AI2_stridx<bit isByte, string opc,
}
let mayStore = 1, hasSideEffects = 0 in {
-// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or
+// FIXME: for STR_PRE_REG etc. the itinerary should be either IIC_iStore_ru or
// IIC_iStore_siu depending on whether it the offset register is shifted.
defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>;
defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_iu, IIC_iStore_bh_ru>;
@@ -3770,9 +3892,8 @@ def QSUB16 : AAIIntrinsic<0b01100010, 0b11110111, "qsub16", int_arm_qsub16>;
def QSUB8 : AAIIntrinsic<0b01100010, 0b11111111, "qsub8", int_arm_qsub8>;
def QDADD : AAIRevOpr<0b00010100, 0b00000101, "qdadd",
- [(set GPRnopc:$Rd, (int_arm_qadd (int_arm_qadd GPRnopc:$Rm,
- GPRnopc:$Rm),
- GPRnopc:$Rn))]>;
+ [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm,
+ (int_arm_qadd GPRnopc:$Rn, GPRnopc:$Rn)))]>;
def QDSUB : AAIRevOpr<0b00010110, 0b00000101, "qdsub",
[(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm,
(int_arm_qadd GPRnopc:$Rn, GPRnopc:$Rn)))]>;
@@ -3787,7 +3908,7 @@ def : ARMV5TEPat<(saddsat GPR:$a, GPR:$b),
(QADD GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(ssubsat GPR:$a, GPR:$b),
(QSUB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(saddsat(saddsat rGPR:$Rm, rGPR:$Rm), rGPR:$Rn),
+def : ARMV5TEPat<(saddsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)),
(QDADD rGPR:$Rm, rGPR:$Rn)>;
def : ARMV5TEPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)),
(QDSUB rGPR:$Rm, rGPR:$Rn)>;
@@ -5414,7 +5535,8 @@ def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
(outs GPRwithAPSR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
- imm0_7:$opc2), []>;
+ imm0_7:$opc2), []>,
+ ComplexDeprecationPredicate<"MRC">;
def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
(MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
@@ -5691,7 +5813,7 @@ def : ARMPat<(ARMthread_pointer), (MRC 15, 0, 13, 0, 3)>,
// when we get here from a longjmp(). We force everything out of registers
// except for our own input by listing the relevant registers in Defs. By
// doing so, we also cause the prologue/epilogue code to actively preserve
-// all of the callee-saved resgisters, which is exactly what we want.
+// all of the callee-saved registers, which is exactly what we want.
// A constant value is passed in $val, and we use the location as a scratch.
//
// These are pseudo-instructions and are lowered to individual MC-insts, so
@@ -5976,6 +6098,12 @@ include "ARMInstrNEON.td"
include "ARMInstrMVE.td"
//===----------------------------------------------------------------------===//
+// CDE (Custom Datapath Extension)
+//
+
+include "ARMInstrCDE.td"
+
+//===----------------------------------------------------------------------===//
// Assembler aliases
//
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
index 604291be822c..2a1f50d97e3b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -10,44 +10,6 @@
//
//===----------------------------------------------------------------------===//
-class ExpandImmAsmOp<string shift> : AsmOperandClass {
- let Name = !strconcat("ExpandImm", shift);
- let PredicateMethod = !strconcat("isExpImm<", shift, ">");
- let RenderMethod = "addImmOperands";
-}
-class InvertedExpandImmAsmOp<string shift, string size> : AsmOperandClass {
- let Name = !strconcat("InvertedExpandImm", shift, "_", size);
- let PredicateMethod = !strconcat("isInvertedExpImm<", shift, ",", size, ">");
- let RenderMethod = "addImmOperands";
-}
-
-class ExpandImm<string shift> : Operand<i32> {
- let ParserMatchClass = ExpandImmAsmOp<shift>;
- let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",false>");
- let DecoderMethod = !strconcat("DecodeExpandedImmOperand<",shift,">");
- let PrintMethod = "printExpandedImmOperand";
-}
-class InvertedExpandImm<string shift, string size> : Operand<i32> {
- let ParserMatchClass = InvertedExpandImmAsmOp<shift, size>;
- let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",true>");
- let PrintMethod = "printExpandedImmOperand";
- // No decoder method needed, because this operand type is only used
- // by aliases (VAND and VORN)
-}
-
-def expzero00 : ExpandImm<"0">;
-def expzero08 : ExpandImm<"8">;
-def expzero16 : ExpandImm<"16">;
-def expzero24 : ExpandImm<"24">;
-
-def expzero00inv16 : InvertedExpandImm<"0", "16">;
-def expzero08inv16 : InvertedExpandImm<"8", "16">;
-
-def expzero00inv32 : InvertedExpandImm<"0", "32">;
-def expzero08inv32 : InvertedExpandImm<"8", "32">;
-def expzero16inv32 : InvertedExpandImm<"16", "32">;
-def expzero24inv32 : InvertedExpandImm<"24", "32">;
-
// VPT condition mask
def vpt_mask : Operand<i32> {
let PrintMethod = "printVPTMask";
@@ -277,7 +239,8 @@ class mve_addr_q_shift<int shift> : MemOperand {
// A family of classes wrapping up information about the vector types
// used by MVE.
-class MVEVectorVTInfo<ValueType vec, ValueType dblvec, ValueType pred,
+class MVEVectorVTInfo<ValueType vec, ValueType dblvec,
+ ValueType pred, ValueType dblpred,
bits<2> size, string suffixletter, bit unsigned> {
// The LLVM ValueType representing the vector, so we can use it in
// ISel patterns.
@@ -300,6 +263,9 @@ class MVEVectorVTInfo<ValueType vec, ValueType dblvec, ValueType pred,
// directly.
ValueType Pred = pred;
+ // Same as Pred but for DblVec rather than Vec.
+ ValueType DblPred = dblpred;
+
// The most common representation of the vector element size in MVE
// instruction encodings: a 2-bit value V representing an (8<<V)-bit
// vector element.
@@ -319,38 +285,38 @@ class MVEVectorVTInfo<ValueType vec, ValueType dblvec, ValueType pred,
!cast<string>(LaneBits));
// The suffix used on an instruction that mentions the whole type.
- string Suffix = suffixletter ## BitsSuffix;
+ string Suffix = suffixletter # BitsSuffix;
// The letter part of the suffix only.
string SuffixLetter = suffixletter;
}
// Integer vector types that don't treat signed and unsigned differently.
-def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "i", ?>;
-def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "i", ?>;
-def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "i", ?>;
-def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "i", ?>;
+def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "i", ?>;
+def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "i", ?>;
+def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "i", ?>;
+def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "i", ?>;
// Explicitly signed and unsigned integer vectors. They map to the
// same set of LLVM ValueTypes as above, but are represented
// differently in assembly and instruction encodings.
-def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "s", 0b0>;
-def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "s", 0b0>;
-def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "s", 0b0>;
-def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "s", 0b0>;
-def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "u", 0b1>;
-def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "u", 0b1>;
-def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "u", 0b1>;
-def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "u", 0b1>;
+def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "s", 0b0>;
+def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "s", 0b0>;
+def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "s", 0b0>;
+def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "s", 0b0>;
+def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "u", 0b1>;
+def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "u", 0b1>;
+def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "u", 0b1>;
+def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "u", 0b1>;
// FP vector types.
-def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, 0b01, "f", ?>;
-def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, 0b10, "f", ?>;
-def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, 0b11, "f", ?>;
+def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, v4i1, 0b01, "f", ?>;
+def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, v4i1, 0b10, "f", ?>;
+def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, ?, 0b11, "f", ?>;
// Polynomial vector types.
-def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b11, "p", 0b0>;
-def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b11, "p", 0b1>;
+def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b11, "p", 0b0>;
+def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b11, "p", 0b1>;
// --------- Start of base classes for the instructions themselves
@@ -473,6 +439,8 @@ class MVE_ScalarShiftDoubleReg<string iname, dag iops, string asm,
let Inst{19-17} = RdaLo{3-1};
let Inst{11-9} = RdaHi{3-1};
+
+ let hasSideEffects = 0;
}
class MVE_ScalarShiftDRegImm<string iname, bits<2> op5_4, bit op16,
@@ -590,6 +558,7 @@ class MVE_VABAV<string suffix, bit U, bits<2> size>
let Inst{5} = Qm{3};
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b1;
+ let horizontalReduction = 1;
}
multiclass MVE_VABAV_m<MVEVectorVTInfo VTI> {
@@ -639,38 +608,63 @@ class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
let Inst{5} = A;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
+ let horizontalReduction = 1;
+ let validForTailPredication = 1;
}
-multiclass MVE_VADDV_A<string suffix, bit U, bits<2> size,
- list<dag> pattern=[]> {
- def acc : MVE_VADDV<"vaddva", suffix,
+def ARMVADDVs : SDNode<"ARMISD::VADDVs", SDTVecReduce>;
+def ARMVADDVu : SDNode<"ARMISD::VADDVu", SDTVecReduce>;
+
+multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
+ def acc : MVE_VADDV<"vaddva", VTI.Suffix,
(ins tGPREven:$Rda_src, MQPR:$Qm), "$Rda = $Rda_src",
- 0b1, U, size, pattern>;
- def no_acc : MVE_VADDV<"vaddv", suffix,
+ 0b1, VTI.Unsigned, VTI.Size>;
+ def no_acc : MVE_VADDV<"vaddv", VTI.Suffix,
(ins MQPR:$Qm), "",
- 0b0, U, size, pattern>;
-}
+ 0b0, VTI.Unsigned, VTI.Size>;
-defm MVE_VADDVs8 : MVE_VADDV_A<"s8", 0b0, 0b00>;
-defm MVE_VADDVs16 : MVE_VADDV_A<"s16", 0b0, 0b01>;
-defm MVE_VADDVs32 : MVE_VADDV_A<"s32", 0b0, 0b10>;
-defm MVE_VADDVu8 : MVE_VADDV_A<"u8", 0b1, 0b00>;
-defm MVE_VADDVu16 : MVE_VADDV_A<"u16", 0b1, 0b01>;
-defm MVE_VADDVu32 : MVE_VADDV_A<"u32", 0b1, 0b10>;
+ defvar InstA = !cast<Instruction>(NAME # "acc");
+ defvar InstN = !cast<Instruction>(NAME # "no_acc");
-let Predicates = [HasMVEInt] in {
- def : Pat<(i32 (vecreduce_add (v4i32 MQPR:$src))), (i32 (MVE_VADDVu32no_acc $src))>;
- def : Pat<(i32 (vecreduce_add (v8i16 MQPR:$src))), (i32 (MVE_VADDVu16no_acc $src))>;
- def : Pat<(i32 (vecreduce_add (v16i8 MQPR:$src))), (i32 (MVE_VADDVu8no_acc $src))>;
- def : Pat<(i32 (add (i32 (vecreduce_add (v4i32 MQPR:$src1))), (i32 tGPR:$src2))),
- (i32 (MVE_VADDVu32acc $src2, $src1))>;
- def : Pat<(i32 (add (i32 (vecreduce_add (v8i16 MQPR:$src1))), (i32 tGPR:$src2))),
- (i32 (MVE_VADDVu16acc $src2, $src1))>;
- def : Pat<(i32 (add (i32 (vecreduce_add (v16i8 MQPR:$src1))), (i32 tGPR:$src2))),
- (i32 (MVE_VADDVu8acc $src2, $src1))>;
+ let Predicates = [HasMVEInt] in {
+ if VTI.Unsigned then {
+ def : Pat<(i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
+ (i32 (InstN $vec))>;
+ def : Pat<(i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
+ (i32 (InstN $vec))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec))>;
+ def : Pat<(i32 (add (i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec))>;
+ } else {
+ def : Pat<(i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
+ (i32 (InstN $vec))>;
+ def : Pat<(i32 (add (i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec))>;
+ }
+ def : Pat<(i32 (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
+ (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$pred))),
+ (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
+ (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$pred)),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ }
}
+defm MVE_VADDVs8 : MVE_VADDV_A<MVE_v16s8>;
+defm MVE_VADDVs16 : MVE_VADDV_A<MVE_v8s16>;
+defm MVE_VADDVs32 : MVE_VADDV_A<MVE_v4s32>;
+defm MVE_VADDVu8 : MVE_VADDV_A<MVE_v16u8>;
+defm MVE_VADDVu16 : MVE_VADDV_A<MVE_v8u16>;
+defm MVE_VADDVu32 : MVE_VADDV_A<MVE_v4u32>;
+
class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
bit A, bit U, list<dag> pattern=[]>
: MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname,
@@ -689,21 +683,58 @@ class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
let Inst{5} = A;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
-}
-
-multiclass MVE_VADDLV_A<string suffix, bit U, list<dag> pattern=[]> {
- def acc : MVE_VADDLV<"vaddlva", suffix,
+ let horizontalReduction = 1;
+}
+
+def SDTVecReduceL : SDTypeProfile<2, 1, [ // VADDLV
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>
+]>;
+def SDTVecReduceLA : SDTypeProfile<2, 3, [ // VADDLVA
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
+ SDTCisVec<4>
+]>;
+def SDTVecReduceLP : SDTypeProfile<2, 2, [ // VADDLVp
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<2>
+]>;
+def SDTVecReduceLPA : SDTypeProfile<2, 4, [ // VADDLVAp
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
+ SDTCisVec<4>, SDTCisVec<5>
+]>;
+
+multiclass MVE_VADDLV_A<MVEVectorVTInfo VTI> {
+ def acc : MVE_VADDLV<"vaddlva", VTI.Suffix,
(ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, MQPR:$Qm),
"$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
- 0b1, U, pattern>;
- def no_acc : MVE_VADDLV<"vaddlv", suffix,
+ 0b1, VTI.Unsigned>;
+ def no_acc : MVE_VADDLV<"vaddlv", VTI.Suffix,
(ins MQPR:$Qm), "",
- 0b0, U, pattern>;
-}
+ 0b0, VTI.Unsigned>;
+
+ defvar InstA = !cast<Instruction>(NAME # "acc");
+ defvar InstN = !cast<Instruction>(NAME # "no_acc");
+ defvar letter = VTI.SuffixLetter;
+ defvar ARMVADDLV = SDNode<"ARMISD::VADDLV" # letter, SDTVecReduceL>;
+ defvar ARMVADDLVA = SDNode<"ARMISD::VADDLVA" # letter, SDTVecReduceLA>;
+ defvar ARMVADDLVp = SDNode<"ARMISD::VADDLVp" # letter, SDTVecReduceLP>;
+ defvar ARMVADDLVAp = SDNode<"ARMISD::VADDLVAp" # letter, SDTVecReduceLPA>;
-defm MVE_VADDLVs32 : MVE_VADDLV_A<"s32", 0b0>;
-defm MVE_VADDLVu32 : MVE_VADDLV_A<"u32", 0b1>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(ARMVADDLV (v4i32 MQPR:$vec)),
+ (InstN (v4i32 MQPR:$vec))>;
+ def : Pat<(ARMVADDLVA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec)),
+ (InstA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec))>;
+ def : Pat<(ARMVADDLVp (v4i32 MQPR:$vec), (VTI.Pred VCCR:$pred)),
+ (InstN (v4i32 MQPR:$vec), ARMVCCThen, (VTI.Pred VCCR:$pred))>;
+ def : Pat<(ARMVADDLVAp tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec),
+ (VTI.Pred VCCR:$pred)),
+ (InstA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec),
+ ARMVCCThen, (VTI.Pred VCCR:$pred))>;
+ }
+}
+
+defm MVE_VADDLVs32 : MVE_VADDLV_A<MVE_v4s32>;
+defm MVE_VADDLVu32 : MVE_VADDLV_A<MVE_v4u32>;
class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
bit bit_17, bit bit_7, list<dag> pattern=[]>
@@ -724,25 +755,48 @@ class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
let Inst{6-5} = 0b00;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
+ let horizontalReduction = 1;
let Predicates = [HasMVEFloat];
+ let hasSideEffects = 0;
}
-multiclass MVE_VMINMAXNMV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
- def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b1, bit_7, pattern>;
- def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b1, bit_7, pattern>;
-}
+multiclass MVE_VMINMAXNMV_p<string iname, bit notAbs, bit isMin,
+ MVEVectorVTInfo VTI, string intrBaseName,
+ ValueType Scalar, RegisterClass ScalarReg> {
+ def "": MVE_VMINMAXNMV<iname, VTI.Suffix, VTI.Size{0}, notAbs, isMin>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar unpred_intr = !cast<Intrinsic>(intrBaseName);
+ defvar pred_intr = !cast<Intrinsic>(intrBaseName#"_predicated");
-defm MVE_VMINNMV : MVE_VMINMAXNMV_fty<"vminnmv", 0b1>;
-defm MVE_VMAXNMV : MVE_VMINMAXNMV_fty<"vmaxnmv", 0b0>;
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(Scalar (unpred_intr (Scalar ScalarReg:$prev),
+ (VTI.Vec MQPR:$vec))),
+ (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS ScalarReg:$prev, rGPR),
+ (VTI.Vec MQPR:$vec)),
+ ScalarReg)>;
+ def : Pat<(Scalar (pred_intr (Scalar ScalarReg:$prev),
+ (VTI.Vec MQPR:$vec),
+ (VTI.Pred VCCR:$pred))),
+ (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS ScalarReg:$prev, rGPR),
+ (VTI.Vec MQPR:$vec),
+ ARMVCCThen, (VTI.Pred VCCR:$pred)),
+ ScalarReg)>;
+ }
+}
-multiclass MVE_VMINMAXNMAV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
- def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b0, bit_7, pattern>;
- def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b0, bit_7, pattern>;
+multiclass MVE_VMINMAXNMV_fty<string iname, bit notAbs, bit isMin,
+ string intrBase> {
+ defm f32 : MVE_VMINMAXNMV_p<iname, notAbs, isMin, MVE_v4f32, intrBase,
+ f32, SPR>;
+ defm f16 : MVE_VMINMAXNMV_p<iname, notAbs, isMin, MVE_v8f16, intrBase,
+ f16, HPR>;
}
-defm MVE_VMINNMAV : MVE_VMINMAXNMAV_fty<"vminnmav", 0b1>;
-defm MVE_VMAXNMAV : MVE_VMINMAXNMAV_fty<"vmaxnmav", 0b0>;
+defm MVE_VMINNMV : MVE_VMINMAXNMV_fty<"vminnmv", 1, 1, "int_arm_mve_minnmv">;
+defm MVE_VMAXNMV : MVE_VMINMAXNMV_fty<"vmaxnmv", 1, 0, "int_arm_mve_maxnmv">;
+defm MVE_VMINNMAV: MVE_VMINMAXNMV_fty<"vminnmav", 0, 1, "int_arm_mve_minnmav">;
+defm MVE_VMAXNMAV: MVE_VMINMAXNMV_fty<"vmaxnmav", 0, 0, "int_arm_mve_maxnmav">;
class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
bit bit_17, bit bit_7, list<dag> pattern=[]>
@@ -762,33 +816,40 @@ class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
let Inst{6-5} = 0b00;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
+ let horizontalReduction = 1;
}
-multiclass MVE_VMINMAXV_p<string iname, bit bit_17, bit bit_7,
- MVEVectorVTInfo VTI, Intrinsic intr> {
+multiclass MVE_VMINMAXV_p<string iname, bit notAbs, bit isMin,
+ MVEVectorVTInfo VTI, string intrBaseName> {
def "": MVE_VMINMAXV<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
- bit_17, bit_7>;
- defvar Inst = !cast<Instruction>(NAME);
+ notAbs, isMin>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar unpred_intr = !cast<Intrinsic>(intrBaseName);
+ defvar pred_intr = !cast<Intrinsic>(intrBaseName#"_predicated");
+ defvar base_args = (? (i32 rGPR:$prev), (VTI.Vec MQPR:$vec));
+ defvar args = !if(notAbs, !con(base_args, (? (i32 VTI.Unsigned))),
+ base_args);
- let Predicates = [HasMVEInt] in
- def _pat : Pat<(i32 (intr (i32 rGPR:$prev), (VTI.Vec MQPR:$vec))),
- (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 !con(args, (unpred_intr))),
+ (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
+ def : Pat<(i32 !con(args, (pred_intr (VTI.Pred VCCR:$pred)))),
+ (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec),
+ ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+ }
}
-multiclass MVE_VMINMAXV_ty<string iname, bit bit_7,
- Intrinsic intr_s, Intrinsic intr_u> {
- defm s8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16s8, intr_s>;
- defm s16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8s16, intr_s>;
- defm s32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4s32, intr_s>;
- defm u8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16u8, intr_u>;
- defm u16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8u16, intr_u>;
- defm u32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4u32, intr_u>;
+multiclass MVE_VMINMAXV_ty<string iname, bit isMin, string intrBaseName> {
+ defm s8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16s8, intrBaseName>;
+ defm s16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8s16, intrBaseName>;
+ defm s32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4s32, intrBaseName>;
+ defm u8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16u8, intrBaseName>;
+ defm u16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8u16, intrBaseName>;
+ defm u32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4u32, intrBaseName>;
}
-defm MVE_VMINV : MVE_VMINMAXV_ty<
- "vminv", 0b1, int_arm_mve_minv_s, int_arm_mve_minv_u>;
-defm MVE_VMAXV : MVE_VMINMAXV_ty<
- "vmaxv", 0b0, int_arm_mve_maxv_s, int_arm_mve_maxv_u>;
+defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;
+defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">;
let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
@@ -819,14 +880,14 @@ let Predicates = [HasMVEInt] in {
}
-multiclass MVE_VMINMAXAV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
- def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b0, bit_7>;
- def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b0, bit_7>;
- def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b0, bit_7>;
+multiclass MVE_VMINMAXAV_ty<string iname, bit isMin, string intrBaseName> {
+ defm s8 : MVE_VMINMAXV_p<iname, 0, isMin, MVE_v16s8, intrBaseName>;
+ defm s16: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v8s16, intrBaseName>;
+ defm s32: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v4s32, intrBaseName>;
}
-defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;
-defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;
+defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 1, "int_arm_mve_minav">;
+defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0, "int_arm_mve_maxav">;
class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0>
@@ -847,6 +908,12 @@ class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
let Inst{5} = A;
let Inst{3-1} = Qm{2-0};
let Inst{0} = bit_0;
+ let horizontalReduction = 1;
+ // Allow tail predication for non-exchanging versions. As this is also a
+ // horizontalReduction, ARMLowOverheadLoops will also have to check that
+ // the vector operands contain zeros in their false lanes for the instruction
+ // to be properly valid.
+ let validForTailPredication = !eq(X, 0);
}
multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
@@ -932,6 +999,58 @@ defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v16s8, 0b0, 0b1>;
defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v8s16, 0b0, 0b0>;
defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v4s32, 0b1, 0b0>;
+def SDTVecReduce2 : SDTypeProfile<1, 2, [ // VMLAV
+ SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>
+]>;
+def SDTVecReduce2L : SDTypeProfile<2, 2, [ // VMLALV
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<3>
+]>;
+def SDTVecReduce2LA : SDTypeProfile<2, 4, [ // VMLALVA
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
+ SDTCisVec<4>, SDTCisVec<5>
+]>;
+def ARMVMLAVs : SDNode<"ARMISD::VMLAVs", SDTVecReduce2>;
+def ARMVMLAVu : SDNode<"ARMISD::VMLAVu", SDTVecReduce2>;
+def ARMVMLALVs : SDNode<"ARMISD::VMLALVs", SDTVecReduce2L>;
+def ARMVMLALVu : SDNode<"ARMISD::VMLALVu", SDTVecReduce2L>;
+def ARMVMLALVAs : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>;
+def ARMVMLALVAu : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
+ (i32 (MVE_VMLADAVu32 $src1, $src2))>;
+ def : Pat<(i32 (vecreduce_add (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)))),
+ (i32 (MVE_VMLADAVu16 $src1, $src2))>;
+ def : Pat<(i32 (ARMVMLAVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(i32 (ARMVMLAVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(i32 (vecreduce_add (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)))),
+ (i32 (MVE_VMLADAVu8 $src1, $src2))>;
+ def : Pat<(i32 (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(i32 (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+
+ def : Pat<(i32 (add (i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau32 $src3, $src1, $src2))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau16 $src3, $src1, $src2))>;
+ def : Pat<(i32 (add (ARMVMLAVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(i32 (add (ARMVMLAVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau8 $src3, $src1, $src2))>;
+ def : Pat<(i32 (add (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(i32 (add (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+}
+
// vmlav aliases vmladav
foreach acc = ["", "a"] in {
foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in {
@@ -963,6 +1082,14 @@ class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
let Inst{5} = A;
let Inst{3-1} = Qm{2-0};
let Inst{0} = bit_0;
+ let horizontalReduction = 1;
+ // Allow tail predication for non-exchanging versions. As this is also a
+ // horizontalReduction, ARMLowOverheadLoops will also have to check that
+ // the vector operands contain zeros in their false lanes for the instruction
+ // to be properly valid.
+ let validForTailPredication = !eq(X, 0);
+
+ let hasSideEffects = 0;
}
multiclass MVE_VMLALDAVBase_A<string iname, string x, string suffix,
@@ -1023,6 +1150,26 @@ multiclass MVE_VMLALDAV_multi<string suffix, bit sz, list<dag> pattern=[]> {
defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>;
defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(ARMVMLALVs (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
+ (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVu (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
+ (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
+ (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
+ (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
+
+ def : Pat<(ARMVMLALVAs tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
+ (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVAu tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
+ (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVAs tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
+ (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVAu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
+ (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
+}
+
// vmlalv aliases vmlaldav
foreach acc = ["", "a"] in {
foreach suffix = ["s16", "s32", "u16", "u32"] in {
@@ -1244,28 +1391,29 @@ let Predicates = [HasMVEInt] in {
(v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>;
}
-let Predicates = [HasMVEInt] in {
- def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
- (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
- def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
- (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
- def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
- (v16i8 (MVE_VREV64_8 (v16i8 MQPR:$src)))>;
+multiclass MVE_VREV_basic_patterns<int revbits, list<MVEVectorVTInfo> VTIs,
+ Instruction Inst> {
+ defvar unpred_op = !cast<SDNode>("ARMvrev" # revbits);
- def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
- (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
- def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
- (v16i8 (MVE_VREV32_8 (v16i8 MQPR:$src)))>;
+ foreach VTI = VTIs in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$src)))>;
+ def : Pat<(VTI.Vec (int_arm_mve_vrev_predicated (VTI.Vec MQPR:$src),
+ revbits, (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$src), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+let Predicates = [HasMVEInt] in {
+ defm: MVE_VREV_basic_patterns<64, [MVE_v4i32, MVE_v4f32], MVE_VREV64_32>;
+ defm: MVE_VREV_basic_patterns<64, [MVE_v8i16, MVE_v8f16], MVE_VREV64_16>;
+ defm: MVE_VREV_basic_patterns<64, [MVE_v16i8 ], MVE_VREV64_8>;
- def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
- (v16i8 (MVE_VREV16_8 (v16i8 MQPR:$src)))>;
+ defm: MVE_VREV_basic_patterns<32, [MVE_v8i16, MVE_v8f16], MVE_VREV32_16>;
+ defm: MVE_VREV_basic_patterns<32, [MVE_v16i8 ], MVE_VREV32_8>;
- def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
- (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
- def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
- (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
- def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
- (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
+ defm: MVE_VREV_basic_patterns<16, [MVE_v16i8 ], MVE_VREV16_8>;
}
def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
@@ -1280,14 +1428,14 @@ def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
}
let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (vnotq (v16i8 MQPR:$val1))),
- (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>;
- def : Pat<(v8i16 (vnotq (v8i16 MQPR:$val1))),
- (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
- def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))),
- (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
- def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))),
- (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
+ foreach VTI = [ MVE_v16i8, MVE_v8i16, MVE_v4i32, MVE_v2i64 ] in {
+ def : Pat<(VTI.Vec (vnotq (VTI.Vec MQPR:$val1))),
+ (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1)))>;
+ def : Pat<(VTI.Vec (int_arm_mve_mvn_predicated (VTI.Vec MQPR:$val1),
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
}
class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
@@ -1383,10 +1531,10 @@ defm : MVE_bit_op_with_inv<MVE_v8i16, or, int_arm_mve_orn_predicated, MVE_VORN>;
defm : MVE_bit_op_with_inv<MVE_v4i32, or, int_arm_mve_orn_predicated, MVE_VORN>;
defm : MVE_bit_op_with_inv<MVE_v2i64, or, int_arm_mve_orn_predicated, MVE_VORN>;
-class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
+class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps>
: MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
- bits<8> imm;
+ bits<12> imm;
bits<4> Qd;
let Inst{28} = imm{7};
@@ -1396,66 +1544,59 @@ class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
let Inst{18-16} = imm{6-4};
let Inst{15-13} = Qd{2-0};
let Inst{12} = 0b0;
- let Inst{11-8} = cmode;
+ let Inst{11} = halfword;
+ let Inst{10} = !if(halfword, 0, imm{10});
+ let Inst{9} = imm{9};
+ let Inst{8} = 0b1;
let Inst{7-6} = 0b01;
let Inst{4} = 0b1;
let Inst{3-0} = imm{3-0};
}
-class MVE_VORR<string suffix, bits<4> cmode, ExpandImm imm_type>
- : MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
- let Inst{5} = 0b0;
- let validForTailPredication = 1;
-}
+multiclass MVE_bit_cmode_p<string iname, bit opcode,
+ MVEVectorVTInfo VTI, Operand imm_type, SDNode op> {
+ def "" : MVE_bit_cmode<iname, VTI.Suffix, VTI.Size{0},
+ (ins MQPR:$Qd_src, imm_type:$imm)> {
+ let Inst{5} = opcode;
+ let validForTailPredication = 1;
+ }
-def MVE_VORRIZ0v4i32 : MVE_VORR<"i32", 0b0001, expzero00>;
-def MVE_VORRIZ0v8i16 : MVE_VORR<"i16", 0b1001, expzero00>;
-def MVE_VORRIZ8v4i32 : MVE_VORR<"i32", 0b0011, expzero08>;
-def MVE_VORRIZ8v8i16 : MVE_VORR<"i16", 0b1011, expzero08>;
-def MVE_VORRIZ16v4i32 : MVE_VORR<"i32", 0b0101, expzero16>;
-def MVE_VORRIZ24v4i32 : MVE_VORR<"i32", 0b0111, expzero24>;
-
-def MVE_VORNIZ0v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ0v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ8v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ8v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ16v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ24v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar UnpredPat = (VTI.Vec (op (VTI.Vec MQPR:$src), timm:$simm));
-def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
- (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<UnpredPat, (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm))>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ UnpredPat, (VTI.Vec MQPR:$src))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm,
+ ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+ }
+}
-class MVE_VBIC<string suffix, bits<4> cmode, ExpandImm imm_type>
- : MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
- let Inst{5} = 0b1;
- let validForTailPredication = 1;
+multiclass MVE_VORRimm<MVEVectorVTInfo VTI, Operand imm_type> {
+ defm "": MVE_bit_cmode_p<"vorr", 0, VTI, imm_type, ARMvorrImm>;
+}
+multiclass MVE_VBICimm<MVEVectorVTInfo VTI, Operand imm_type> {
+ defm "": MVE_bit_cmode_p<"vbic", 1, VTI, imm_type, ARMvbicImm>;
}
-def MVE_VBICIZ0v4i32 : MVE_VBIC<"i32", 0b0001, expzero00>;
-def MVE_VBICIZ0v8i16 : MVE_VBIC<"i16", 0b1001, expzero00>;
-def MVE_VBICIZ8v4i32 : MVE_VBIC<"i32", 0b0011, expzero08>;
-def MVE_VBICIZ8v8i16 : MVE_VBIC<"i16", 0b1011, expzero08>;
-def MVE_VBICIZ16v4i32 : MVE_VBIC<"i32", 0b0101, expzero16>;
-def MVE_VBICIZ24v4i32 : MVE_VBIC<"i32", 0b0111, expzero24>;
-
-def MVE_VANDIZ0v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ0v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ8v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ8v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ16v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ24v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+defm MVE_VORRimmi16 : MVE_VORRimm<MVE_v8i16, nImmSplatI16>;
+defm MVE_VORRimmi32 : MVE_VORRimm<MVE_v4i32, nImmSplatI32>;
+defm MVE_VBICimmi16 : MVE_VBICimm<MVE_v8i16, nImmSplatI16>;
+defm MVE_VBICimmi32 : MVE_VBICimm<MVE_v4i32, nImmSplatI32>;
+
+def MVE_VORNimmi16 : MVEInstAlias<"vorn${vp}.i16\t$Qd, $imm",
+ (MVE_VORRimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>;
+def MVE_VORNimmi32 : MVEInstAlias<"vorn${vp}.i32\t$Qd, $imm",
+ (MVE_VORRimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>;
+
+def MVE_VANDimmi16 : MVEInstAlias<"vand${vp}.i16\t$Qd, $imm",
+ (MVE_VBICimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>;
+def MVE_VANDimmi32 : MVEInstAlias<"vand${vp}.i32\t$Qd, $imm",
+ (MVE_VBICimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>;
+
+def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
+ (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
class MVE_VMOV_lane_direction {
bit bit_20;
@@ -1494,6 +1635,8 @@ class MVE_VMOV_lane<string suffix, bit U, dag indexop,
let Inst{11-8} = 0b1011;
let Inst{7} = Qd{3};
let Inst{4-0} = 0b10000;
+
+ let hasSideEffects = 0;
}
class MVE_VMOV_lane_32<MVE_VMOV_lane_direction dir>
@@ -1557,10 +1700,14 @@ let Predicates = [HasMVEInt] in {
(MVE_VMOV_from_lane_s8 MQPR:$src, imm:$lane)>;
def : Pat<(ARMvgetlanes (v8i16 MQPR:$src), imm:$lane),
(MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
+ def : Pat<(ARMvgetlanes (v8f16 MQPR:$src), imm:$lane),
+ (MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
def : Pat<(ARMvgetlaneu (v16i8 MQPR:$src), imm:$lane),
(MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane)>;
def : Pat<(ARMvgetlaneu (v8i16 MQPR:$src), imm:$lane),
(MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
+ def : Pat<(ARMvgetlaneu (v8f16 MQPR:$src), imm:$lane),
+ (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
(MVE_VMOV_to_lane_8 (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
@@ -1575,8 +1722,8 @@ let Predicates = [HasMVEInt] in {
def : Pat<(insertelt (v4f32 MQPR:$src1), (f32 SPR:$src2), imm:$lane),
(INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), SPR:$src2, (SSubReg_f32_reg imm:$lane))>;
- def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane),
- (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>;
+ def : Pat<(insertelt (v8f16 MQPR:$src1), (f16 HPR:$src2), imm:$lane),
+ (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS (f16 HPR:$src2), rGPR), imm:$lane)>;
def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane),
(EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>;
def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane),
@@ -1588,8 +1735,8 @@ let Predicates = [HasMVEInt] in {
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
def : Pat<(v4f32 (scalar_to_vector GPR:$src)),
(MVE_VMOV_to_lane_32 (v4f32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
- def : Pat<(v8f16 (scalar_to_vector HPR:$src)),
- (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
+ def : Pat<(v8f16 (scalar_to_vector (f16 HPR:$src))),
+ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), (f16 HPR:$src), ssub_0)>;
def : Pat<(v8f16 (scalar_to_vector GPR:$src)),
(MVE_VMOV_to_lane_16 (v8f16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
}
@@ -1882,6 +2029,26 @@ class MVE_VRHADD_Base<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
+def addnuw : PatFrag<(ops node:$lhs, node:$rhs),
+ (add node:$lhs, node:$rhs), [{
+ return N->getFlags().hasNoUnsignedWrap();
+}]>;
+
+def addnsw : PatFrag<(ops node:$lhs, node:$rhs),
+ (add node:$lhs, node:$rhs), [{
+ return N->getFlags().hasNoSignedWrap();
+}]>;
+
+def subnuw : PatFrag<(ops node:$lhs, node:$rhs),
+ (sub node:$lhs, node:$rhs), [{
+ return N->getFlags().hasNoUnsignedWrap();
+}]>;
+
+def subnsw : PatFrag<(ops node:$lhs, node:$rhs),
+ (sub node:$lhs, node:$rhs), [{
+ return N->getFlags().hasNoSignedWrap();
+}]>;
+
multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI,
SDNode unpred_op, Intrinsic pred_int> {
def "" : MVE_VRHADD_Base<VTI.Suffix, VTI.Unsigned, VTI.Size>;
@@ -1913,6 +2080,37 @@ defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8>;
defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16>;
defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
+// Rounding Halving Add perform the arithemtic operation with an extra bit of
+// precision, before performing the shift, to void clipping errors. We're not
+// modelling that here with these patterns, but we're using no wrap forms of
+// add to ensure that the extra bit of information is not needed for the
+// arithmetic or the rounding.
+def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
+ (v16i8 (ARMvmovImm (i32 3585)))),
+ (i32 1))),
+ (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
+def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
+ (v8i16 (ARMvmovImm (i32 2049)))),
+ (i32 1))),
+ (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
+def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
+ (v4i32 (ARMvmovImm (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
+def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
+ (v16i8 (ARMvmovImm (i32 3585)))),
+ (i32 1))),
+ (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
+def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
+ (v8i16 (ARMvmovImm (i32 2049)))),
+ (i32 1))),
+ (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
+def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
+ (v4i32 (ARMvmovImm (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
+
+
class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
bits<2> size, list<dag> pattern=[]>
: MVE_int<iname, suffix, size, pattern> {
@@ -1936,7 +2134,8 @@ class MVE_VHSUB_<string suffix, bit U, bits<2> size,
: MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
+ SDNode unpred_op, Intrinsic pred_int, PatFrag add_op,
+ SDNode shift_op> {
def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
@@ -1945,6 +2144,9 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ def : Pat<(VTI.Vec (shift_op (add_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
+ (Inst MQPR:$Qm, MQPR:$Qn)>;
+
// Predicated add-and-divide-by-two
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned),
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
@@ -1954,18 +2156,24 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
}
}
-multiclass MVE_VHADD<MVEVectorVTInfo VTI>
- : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated>;
+multiclass MVE_VHADD<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op>
+ : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op,
+ shift_op>;
-defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8>;
-defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16>;
-defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32>;
-defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8>;
-defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16>;
-defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32>;
+// Halving add/sub perform the arithemtic operation with an extra bit of
+// precision, before performing the shift, to void clipping errors. We're not
+// modelling that here with these patterns, but we're using no wrap forms of
+// add/sub to ensure that the extra bit of information is not needed.
+defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, addnsw, ARMvshrsImm>;
+defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, addnsw, ARMvshrsImm>;
+defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, addnsw, ARMvshrsImm>;
+defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, addnuw, ARMvshruImm>;
+defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, addnuw, ARMvshruImm>;
+defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, addnuw, ARMvshruImm>;
multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
+ SDNode unpred_op, Intrinsic pred_int, PatFrag sub_op,
+ SDNode shift_op> {
def "" : MVE_VHSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
@@ -1975,6 +2183,10 @@ multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
(i32 VTI.Unsigned))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ def : Pat<(VTI.Vec (shift_op (sub_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
+ (Inst MQPR:$Qm, MQPR:$Qn)>;
+
+
// Predicated subtract-and-divide-by-two
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
(i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
@@ -1985,15 +2197,16 @@ multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
}
}
-multiclass MVE_VHSUB<MVEVectorVTInfo VTI>
- : MVE_VHSUB_m<VTI, int_arm_mve_vhsub, int_arm_mve_hsub_predicated>;
+multiclass MVE_VHSUB<MVEVectorVTInfo VTI, PatFrag sub_op, SDNode shift_op>
+ : MVE_VHSUB_m<VTI, int_arm_mve_vhsub, int_arm_mve_hsub_predicated, sub_op,
+ shift_op>;
-defm MVE_VHSUBs8 : MVE_VHSUB<MVE_v16s8>;
-defm MVE_VHSUBs16 : MVE_VHSUB<MVE_v8s16>;
-defm MVE_VHSUBs32 : MVE_VHSUB<MVE_v4s32>;
-defm MVE_VHSUBu8 : MVE_VHSUB<MVE_v16u8>;
-defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16>;
-defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32>;
+defm MVE_VHSUBs8 : MVE_VHSUB<MVE_v16s8, subnsw, ARMvshrsImm>;
+defm MVE_VHSUBs16 : MVE_VHSUB<MVE_v8s16, subnsw, ARMvshrsImm>;
+defm MVE_VHSUBs32 : MVE_VHSUB<MVE_v4s32, subnsw, ARMvshrsImm>;
+defm MVE_VHSUBu8 : MVE_VHSUB<MVE_v16u8, subnuw, ARMvshruImm>;
+defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16, subnuw, ARMvshruImm>;
+defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32, subnuw, ARMvshruImm>;
class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
@@ -2028,24 +2241,37 @@ let Predicates = [HasMVEInt] in {
def : Pat<(v4i32 (ARMvdup (i32 rGPR:$elem))),
(MVE_VDUP32 rGPR:$elem)>;
- def : Pat<(v4i32 (ARMvduplane (v4i32 MQPR:$src), imm:$lane)),
- (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
- // For the 16-bit and 8-bit vduplanes we don't care about the signedness
- // of the lane move operation as we only want the lowest 8/16 bits anyway.
- def : Pat<(v8i16 (ARMvduplane (v8i16 MQPR:$src), imm:$lane)),
- (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
- def : Pat<(v16i8 (ARMvduplane (v16i8 MQPR:$src), imm:$lane)),
- (MVE_VDUP8 (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane))>;
-
- def : Pat<(v4f32 (ARMvdup (f32 SPR:$elem))),
- (v4f32 (MVE_VDUP32 (i32 (COPY_TO_REGCLASS (f32 SPR:$elem), rGPR))))>;
- def : Pat<(v8f16 (ARMvdup (f16 HPR:$elem))),
- (v8f16 (MVE_VDUP16 (i32 (COPY_TO_REGCLASS (f16 HPR:$elem), rGPR))))>;
+ def : Pat<(v8f16 (ARMvdup (i32 rGPR:$elem))),
+ (MVE_VDUP16 rGPR:$elem)>;
+ def : Pat<(v4f32 (ARMvdup (i32 rGPR:$elem))),
+ (MVE_VDUP32 rGPR:$elem)>;
- def : Pat<(v4f32 (ARMvduplane (v4f32 MQPR:$src), imm:$lane)),
- (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
- def : Pat<(v8f16 (ARMvduplane (v8f16 MQPR:$src), imm:$lane)),
- (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
+ // Match a vselect with an ARMvdup as a predicated MVE_VDUP
+ def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred),
+ (v16i8 (ARMvdup (i32 rGPR:$elem))),
+ (v16i8 MQPR:$inactive))),
+ (MVE_VDUP8 rGPR:$elem, ARMVCCThen, (v16i1 VCCR:$pred),
+ (v16i8 MQPR:$inactive))>;
+ def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred),
+ (v8i16 (ARMvdup (i32 rGPR:$elem))),
+ (v8i16 MQPR:$inactive))),
+ (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred),
+ (v8i16 MQPR:$inactive))>;
+ def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred),
+ (v4i32 (ARMvdup (i32 rGPR:$elem))),
+ (v4i32 MQPR:$inactive))),
+ (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred),
+ (v4i32 MQPR:$inactive))>;
+ def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred),
+ (v4f32 (ARMvdup (i32 rGPR:$elem))),
+ (v4f32 MQPR:$inactive))),
+ (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred),
+ (v4f32 MQPR:$inactive))>;
+ def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred),
+ (v8f16 (ARMvdup (i32 rGPR:$elem))),
+ (v8f16 MQPR:$inactive))),
+ (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred),
+ (v8f16 MQPR:$inactive))>;
}
@@ -2079,32 +2305,43 @@ class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
let validForTailPredication = 1;
}
-def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>;
-def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>;
-def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>;
+multiclass MVE_VCLSCLZ_p<string opname, bit opcode, MVEVectorVTInfo VTI,
+ SDNode unpred_op> {
+ def "": MVE_VCLSCLZ<"v"#opname, VTI.Suffix, VTI.Size, opcode>;
-def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>;
-def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
-def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_"#opname#"_predicated");
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))),
- (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>;
- def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))),
- (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>;
- def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))),
- (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
}
+defm MVE_VCLSs8 : MVE_VCLSCLZ_p<"cls", 0, MVE_v16s8, int_arm_mve_vcls>;
+defm MVE_VCLSs16 : MVE_VCLSCLZ_p<"cls", 0, MVE_v8s16, int_arm_mve_vcls>;
+defm MVE_VCLSs32 : MVE_VCLSCLZ_p<"cls", 0, MVE_v4s32, int_arm_mve_vcls>;
+
+defm MVE_VCLZs8 : MVE_VCLSCLZ_p<"clz", 1, MVE_v16i8, ctlz>;
+defm MVE_VCLZs16 : MVE_VCLSCLZ_p<"clz", 1, MVE_v8i16, ctlz>;
+defm MVE_VCLZs32 : MVE_VCLSCLZ_p<"clz", 1, MVE_v4i32, ctlz>;
+
class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
- list<dag> pattern=[]>
+ bit saturate, list<dag> pattern=[]>
: MVEIntSingleSrc<iname, suffix, size, pattern> {
let Inst{28} = 0b1;
let Inst{25-23} = 0b111;
let Inst{21-20} = 0b11;
- let Inst{17-16} = 0b01;
- let Inst{12-8} = 0b00011;
+ let Inst{17} = 0b0;
+ let Inst{16} = !eq(saturate, 0);
+ let Inst{12-11} = 0b00;
+ let Inst{10} = saturate;
+ let Inst{9-8} = 0b11;
let Inst{7} = negate;
let Inst{6} = 0b1;
let Inst{4} = 0b0;
@@ -2112,61 +2349,40 @@ class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
let validForTailPredication = 1;
}
-def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>;
-def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
-def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
-
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
- (v16i8 (MVE_VABSs8 $v))>;
- def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
- (v8i16 (MVE_VABSs16 $v))>;
- def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
- (v4i32 (MVE_VABSs32 $v))>;
-}
+multiclass MVE_VABSNEG_int_m<string iname, bit negate, bit saturate,
+ SDNode unpred_op, Intrinsic pred_int,
+ MVEVectorVTInfo VTI> {
+ def "" : MVE_VABSNEG_int<iname, VTI.Suffix, VTI.Size, negate, saturate>;
+ defvar Inst = !cast<Instruction>(NAME);
-def MVE_VNEGs8 : MVE_VABSNEG_int<"vneg", "s8", 0b00, 0b1>;
-def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
-def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
+ let Predicates = [HasMVEInt] in {
+ // VQABS and VQNEG have more difficult isel patterns defined elsewhere
+ if !eq(saturate, 0) then {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
+ }
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
- (v16i8 (MVE_VNEGs8 $v))>;
- def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
- (v8i16 (MVE_VNEGs16 $v))>;
- def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
- (v4i32 (MVE_VNEGs32 $v))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+ }
}
-class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
- bit negate, list<dag> pattern=[]>
- : MVEIntSingleSrc<iname, suffix, size, pattern> {
-
- let Inst{28} = 0b1;
- let Inst{25-23} = 0b111;
- let Inst{21-20} = 0b11;
- let Inst{17-16} = 0b00;
- let Inst{12-8} = 0b00111;
- let Inst{7} = negate;
- let Inst{6} = 0b1;
- let Inst{4} = 0b0;
- let Inst{0} = 0b0;
- let validForTailPredication = 1;
+foreach VTI = [ MVE_v16s8, MVE_v8s16, MVE_v4s32 ] in {
+ defm "MVE_VABS" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vabs", 0, 0, abs, int_arm_mve_abs_predicated, VTI>;
+ defm "MVE_VQABS" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vqabs", 0, 1, ?, int_arm_mve_qabs_predicated, VTI>;
+ defm "MVE_VNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vneg", 1, 0, vnegq, int_arm_mve_neg_predicated, VTI>;
+ defm "MVE_VQNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vqneg", 1, 1, ?, int_arm_mve_qneg_predicated, VTI>;
}
-def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>;
-def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>;
-def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>;
-
-def MVE_VQNEGs8 : MVE_VQABSNEG<"vqneg", "s8", 0b00, 0b1>;
-def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
-def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
-
// int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times
// zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert
multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max,
- dag zero_vec, MVE_VQABSNEG vqabs_instruction,
- MVE_VQABSNEG vqneg_instruction> {
+ dag zero_vec, MVE_VABSNEG_int vqabs_instruction,
+ MVE_VABSNEG_int vqneg_instruction> {
let Predicates = [HasMVEInt] in {
// The below tree can be replaced by a vqabs instruction, as it represents
// the following vectorized expression (r being the value in $reg):
@@ -2257,6 +2473,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>;
def : Pat<(v4i32 (ARMvmovImm timm:$simm)),
(v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>;
+ def : Pat<(v2i64 (ARMvmovImm timm:$simm)),
+ (v2i64 (MVE_VMOVimmi64 nImmSplatI64:$simm))>;
def : Pat<(v8i16 (ARMvmvnImm timm:$simm)),
(v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>;
@@ -2265,6 +2483,15 @@ let Predicates = [HasMVEInt] in {
def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)),
(v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>;
+
+ def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
+ MQPR:$inactive)),
+ (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm,
+ ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
+ def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
+ MQPR:$inactive)),
+ (v4i32 (MVE_VMVNimmi32 nImmSplatI32:$simm,
+ ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
}
class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
@@ -2291,13 +2518,37 @@ class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
let validForTailPredication = 1;
}
-def MVE_VMAXAs8 : MVE_VMINMAXA<"vmaxa", "s8", 0b00, 0b0>;
-def MVE_VMAXAs16 : MVE_VMINMAXA<"vmaxa", "s16", 0b01, 0b0>;
-def MVE_VMAXAs32 : MVE_VMINMAXA<"vmaxa", "s32", 0b10, 0b0>;
+multiclass MVE_VMINMAXA_m<string iname, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int, bit bit_12> {
+ def "" : MVE_VMINMAXA<iname, VTI.Suffix, VTI.Size, bit_12>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated v(min|max)a
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qd), (abs (VTI.Vec MQPR:$Qm)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm)))>;
+
+ // Predicated v(min|max)a
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ }
+}
+
+multiclass MVE_VMINA<MVEVectorVTInfo VTI>
+ : MVE_VMINMAXA_m<"vmina", VTI, umin, int_arm_mve_vmina_predicated, 0b1>;
+
+defm MVE_VMINAs8 : MVE_VMINA<MVE_v16s8>;
+defm MVE_VMINAs16 : MVE_VMINA<MVE_v8s16>;
+defm MVE_VMINAs32 : MVE_VMINA<MVE_v4s32>;
-def MVE_VMINAs8 : MVE_VMINMAXA<"vmina", "s8", 0b00, 0b1>;
-def MVE_VMINAs16 : MVE_VMINMAXA<"vmina", "s16", 0b01, 0b1>;
-def MVE_VMINAs32 : MVE_VMINMAXA<"vmina", "s32", 0b10, 0b1>;
+multiclass MVE_VMAXA<MVEVectorVTInfo VTI>
+ : MVE_VMINMAXA_m<"vmaxa", VTI, umax, int_arm_mve_vmaxa_predicated, 0b0>;
+
+defm MVE_VMAXAs8 : MVE_VMAXA<MVE_v16s8>;
+defm MVE_VMAXAs16 : MVE_VMAXA<MVE_v8s16>;
+defm MVE_VMAXAs32 : MVE_VMAXA<MVE_v4s32>;
// end of MVE Integer instructions
@@ -2334,7 +2585,7 @@ class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
let Inst{3-1} = Qm{2-0};
}
-class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
+class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U, bit top,
list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
iname, suffix, "$Qd, $Qm", vpred_r, "",
@@ -2344,25 +2595,36 @@ class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
let Inst{21} = 0b1;
let Inst{20-19} = sz{1-0};
let Inst{18-16} = 0b000;
+ let Inst{12} = top;
let Inst{11-6} = 0b111101;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let doubleWidthResult = 1;
}
-multiclass MVE_VMOVL_shift_half<string iname, string suffix, bits<2> sz, bit U,
- list<dag> pattern=[]> {
- def bh : MVE_VMOVL<!strconcat(iname, "b"), suffix, sz, U, pattern> {
- let Inst{12} = 0b0;
- }
- def th : MVE_VMOVL<!strconcat(iname, "t"), suffix, sz, U, pattern> {
- let Inst{12} = 0b1;
- }
+multiclass MVE_VMOVL_m<bit top, string chr, MVEVectorVTInfo OutVTI,
+ MVEVectorVTInfo InVTI> {
+ def "": MVE_VMOVL<"vmovl" # chr, InVTI.Suffix, OutVTI.Size,
+ InVTI.Unsigned, top>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ def : Pat<(OutVTI.Vec (int_arm_mve_vmovl_predicated (InVTI.Vec MQPR:$src),
+ (i32 InVTI.Unsigned), (i32 top),
+ (OutVTI.Pred VCCR:$pred),
+ (OutVTI.Vec MQPR:$inactive))),
+ (OutVTI.Vec (Inst (InVTI.Vec MQPR:$src), ARMVCCThen,
+ (OutVTI.Pred VCCR:$pred),
+ (OutVTI.Vec MQPR:$inactive)))>;
}
-defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>;
-defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
-defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
-defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
+defm MVE_VMOVLs8bh : MVE_VMOVL_m<0, "b", MVE_v8s16, MVE_v16s8>;
+defm MVE_VMOVLs8th : MVE_VMOVL_m<1, "t", MVE_v8s16, MVE_v16s8>;
+defm MVE_VMOVLu8bh : MVE_VMOVL_m<0, "b", MVE_v8u16, MVE_v16u8>;
+defm MVE_VMOVLu8th : MVE_VMOVL_m<1, "t", MVE_v8u16, MVE_v16u8>;
+defm MVE_VMOVLs16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8s16>;
+defm MVE_VMOVLs16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8s16>;
+defm MVE_VMOVLu16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8u16>;
+defm MVE_VMOVLu16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8u16>;
let Predicates = [HasMVEInt] in {
def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
@@ -2372,12 +2634,23 @@ let Predicates = [HasMVEInt] in {
def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8),
(MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>;
+ def : Pat<(sext_inreg (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src)))), v8i8),
+ (MVE_VMOVLs8th MQPR:$src)>;
+ def : Pat<(sext_inreg (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src)))), v4i16),
+ (MVE_VMOVLs16th MQPR:$src)>;
+
+ // zext_inreg 8 -> 16
+ def : Pat<(ARMvbicImm (v8i16 MQPR:$src), (i32 0xAFF)),
+ (MVE_VMOVLu8bh MQPR:$src)>;
// zext_inreg 16 -> 32
def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))),
(MVE_VMOVLu16bh MQPR:$src)>;
- // zext_inreg 8 -> 16
- def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))),
- (MVE_VMOVLu8bh MQPR:$src)>;
+ // Same zext_inreg with vrevs, picking the top half
+ def : Pat<(ARMvbicImm (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src)))), (i32 0xAFF)),
+ (MVE_VMOVLu8th MQPR:$src)>;
+ def : Pat<(and (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src)))),
+ (v4i32 (ARMvmovImm (i32 0xCFF)))),
+ (MVE_VMOVLu16th MQPR:$src)>;
}
@@ -2395,6 +2668,8 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
// For the MVE_VSHLL_patterns multiclass to refer to
Operand immediateType = immtype;
+
+ let doubleWidthResult = 1;
}
// The immediate VSHLL instructions accept shift counts from 1 up to
@@ -2438,6 +2713,7 @@ class MVE_VSHLL_by_lane_width<string iname, string suffix, bits<2> size,
let Inst{11-6} = 0b111000;
let Inst{4} = 0b0;
let Inst{0} = 0b1;
+ let doubleWidthResult = 1;
}
multiclass MVE_VSHLL_lw<string iname, string suffix, bits<2> sz, bit U,
@@ -2472,17 +2748,17 @@ multiclass MVE_VSHLL_patterns<MVEVectorVTInfo VTI, int top> {
def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), imm:$imm,
(i32 VTI.Unsigned), (i32 top),
- (VTI.Pred VCCR:$mask),
+ (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))>;
def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
(i32 VTI.Unsigned), (i32 top),
- (VTI.Pred VCCR:$mask),
+ (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen,
- (VTI.Pred VCCR:$mask),
+ (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))>;
}
@@ -2509,6 +2785,8 @@ class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
let Inst{11-6} = 0b111111;
let Inst{4} = 0b0;
let Inst{0} = 0b1;
+ let validForTailPredication = 1;
+ let retainsPreviousHalfElement = 1;
}
def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> {
@@ -2550,6 +2828,8 @@ class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
let Inst{11-6} = 0b111111;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
+ let retainsPreviousHalfElement = 1;
}
def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
@@ -2598,6 +2878,8 @@ class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
let Inst{11-6} = 0b111101;
let Inst{4} = 0b0;
let Inst{0} = bit_0;
+ let validForTailPredication = 1;
+ let retainsPreviousHalfElement = 1;
}
multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
@@ -3131,41 +3413,34 @@ class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
}
-multiclass MVE_VRINT_ops<string suffix, bits<2> size, list<dag> pattern=[]> {
- def N : MVE_VRINT<"n", 0b000, suffix, size, pattern>;
- def X : MVE_VRINT<"x", 0b001, suffix, size, pattern>;
- def A : MVE_VRINT<"a", 0b010, suffix, size, pattern>;
- def Z : MVE_VRINT<"z", 0b011, suffix, size, pattern>;
- def M : MVE_VRINT<"m", 0b101, suffix, size, pattern>;
- def P : MVE_VRINT<"p", 0b111, suffix, size, pattern>;
-}
+multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
+ SDNode unpred_op> {
+ def "": MVE_VRINT<suffix, opcode, VTI.Suffix, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_vrint"#suffix#"_predicated");
-defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>;
-defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>;
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
+}
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
+multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
+ defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>;
+ defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
+ defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
+ defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;
+ defm M : MVE_VRINT_m<VTI, "m", 0b101, ffloor>;
+ defm P : MVE_VRINT_m<VTI, "p", 0b111, fceil>;
}
+defm MVE_VRINTf16 : MVE_VRINT_ops<MVE_v8f16>;
+defm MVE_VRINTf32 : MVE_VRINT_ops<MVE_v4f32>;
+
class MVEFloatArithNeon<string iname, string suffix, bit size,
dag oops, dag iops, string ops,
vpred_ops vpred, string cstr, list<dag> pattern=[]>
@@ -3281,29 +3556,40 @@ class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
let Inst{8} = bit_8;
let Inst{7} = Qn{3};
let Inst{4} = bit_4;
+ let validForTailPredication = 1;
}
-def MVE_VFMAf32 : MVE_VADDSUBFMA_fp<"vfma", "f32", 0b0, 0b1, 0b0, 0b0,
- (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
-def MVE_VFMAf16 : MVE_VADDSUBFMA_fp<"vfma", "f16", 0b1, 0b1, 0b0, 0b0,
- (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
-
-def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
- (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
-def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
- (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
+ def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0b1, 0b0, fms,
+ (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = int_arm_mve_fma_predicated;
+ defvar m1 = (VTI.Vec MQPR:$m1);
+ defvar m2 = (VTI.Vec MQPR:$m2);
+ defvar add = (VTI.Vec MQPR:$add);
+ defvar pred = (VTI.Pred VCCR:$pred);
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
- (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>;
- def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
- (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>;
- def : Pat<(v8f16 (fma (fneg (v8f16 MQPR:$src1)), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
- (v8f16 (MVE_VFMSf16 $src3, $src1, $src2))>;
- def : Pat<(v4f32 (fma (fneg (v4f32 MQPR:$src1)), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
- (v4f32 (MVE_VFMSf32 $src3, $src1, $src2))>;
+ let Predicates = [HasMVEFloat] in {
+ if fms then {
+ def : Pat<(VTI.Vec (fma (fneg m1), m2, add)), (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (fma m1, (fneg m2), add)), (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)),
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)),
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ } else {
+ def : Pat<(VTI.Vec (fma m1, m2, add)), (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (pred_int m1, m2, add, pred)),
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ }
+ }
}
+defm MVE_VFMAf32 : MVE_VFMA_fp_multi<"vfma", 0, MVE_v4f32>;
+defm MVE_VFMAf16 : MVE_VFMA_fp_multi<"vfma", 0, MVE_v8f16>;
+defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>;
+defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
+
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
SDNode unpred_op, Intrinsic pred_int> {
def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
@@ -3423,10 +3709,10 @@ defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
- Operand imm_operand_type, list<dag> pattern=[]>
+ Operand imm_operand_type>
: MVE_float<"vcvt", suffix,
(outs MQPR:$Qd), (ins MQPR:$Qm, imm_operand_type:$imm6),
- "$Qd, $Qm, $imm6", vpred_r, "", pattern> {
+ "$Qd, $Qm, $imm6", vpred_r, "", []> {
bits<4> Qd;
bits<6> imm6;
@@ -3468,14 +3754,43 @@ class MVE_VCVT_fix_f16<string suffix, bit U, bit op>
let Inst{20} = 0b1;
}
-def MVE_VCVTf16s16_fix : MVE_VCVT_fix_f16<"f16.s16", 0b0, 0b0>;
-def MVE_VCVTs16f16_fix : MVE_VCVT_fix_f16<"s16.f16", 0b0, 0b1>;
-def MVE_VCVTf16u16_fix : MVE_VCVT_fix_f16<"f16.u16", 0b1, 0b0>;
-def MVE_VCVTu16f16_fix : MVE_VCVT_fix_f16<"u16.f16", 0b1, 0b1>;
-def MVE_VCVTf32s32_fix : MVE_VCVT_fix_f32<"f32.s32", 0b0, 0b0>;
-def MVE_VCVTs32f32_fix : MVE_VCVT_fix_f32<"s32.f32", 0b0, 0b1>;
-def MVE_VCVTf32u32_fix : MVE_VCVT_fix_f32<"f32.u32", 0b1, 0b0>;
-def MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32<"u32.f32", 0b1, 0b1>;
+multiclass MVE_VCVT_fix_patterns<Instruction Inst, bit U, MVEVectorVTInfo DestVTI,
+ MVEVectorVTInfo SrcVTI> {
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(DestVTI.Vec (int_arm_mve_vcvt_fix
+ (i32 U), (SrcVTI.Vec MQPR:$Qm), imm:$scale)),
+ (DestVTI.Vec (Inst (SrcVTI.Vec MQPR:$Qm), imm:$scale))>;
+ def : Pat<(DestVTI.Vec (int_arm_mve_vcvt_fix_predicated (i32 U),
+ (DestVTI.Vec MQPR:$inactive),
+ (SrcVTI.Vec MQPR:$Qm),
+ imm:$scale,
+ (DestVTI.Pred VCCR:$mask))),
+ (DestVTI.Vec (Inst (SrcVTI.Vec MQPR:$Qm), imm:$scale,
+ ARMVCCThen, (DestVTI.Pred VCCR:$mask),
+ (DestVTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VCVT_fix_f32_m<bit U, bit op,
+ MVEVectorVTInfo DestVTI, MVEVectorVTInfo SrcVTI> {
+ def "" : MVE_VCVT_fix_f32<DestVTI.Suffix#"."#SrcVTI.Suffix, U, op>;
+ defm : MVE_VCVT_fix_patterns<!cast<Instruction>(NAME), U, DestVTI, SrcVTI>;
+}
+
+multiclass MVE_VCVT_fix_f16_m<bit U, bit op,
+ MVEVectorVTInfo DestVTI, MVEVectorVTInfo SrcVTI> {
+ def "" : MVE_VCVT_fix_f16<DestVTI.Suffix#"."#SrcVTI.Suffix, U, op>;
+ defm : MVE_VCVT_fix_patterns<!cast<Instruction>(NAME), U, DestVTI, SrcVTI>;
+}
+
+defm MVE_VCVTf16s16_fix : MVE_VCVT_fix_f16_m<0b0, 0b0, MVE_v8f16, MVE_v8s16>;
+defm MVE_VCVTs16f16_fix : MVE_VCVT_fix_f16_m<0b0, 0b1, MVE_v8s16, MVE_v8f16>;
+defm MVE_VCVTf16u16_fix : MVE_VCVT_fix_f16_m<0b1, 0b0, MVE_v8f16, MVE_v8u16>;
+defm MVE_VCVTu16f16_fix : MVE_VCVT_fix_f16_m<0b1, 0b1, MVE_v8u16, MVE_v8f16>;
+defm MVE_VCVTf32s32_fix : MVE_VCVT_fix_f32_m<0b0, 0b0, MVE_v4f32, MVE_v4s32>;
+defm MVE_VCVTs32f32_fix : MVE_VCVT_fix_f32_m<0b0, 0b1, MVE_v4s32, MVE_v4f32>;
+defm MVE_VCVTf32u32_fix : MVE_VCVT_fix_f32_m<0b1, 0b0, MVE_v4f32, MVE_v4u32>;
+defm MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32_m<0b1, 0b1, MVE_v4u32, MVE_v4f32>;
class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
bits<2> rm, list<dag> pattern=[]>
@@ -3497,23 +3812,44 @@ class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
let validForTailPredication = 1;
}
-multiclass MVE_VCVT_fp_int_anpm_multi<string suffix, bits<2> size, bit op,
- list<dag> pattern=[]> {
- def a : MVE_VCVT_fp_int_anpm<suffix, size, op, "a", 0b00>;
- def n : MVE_VCVT_fp_int_anpm<suffix, size, op, "n", 0b01>;
- def p : MVE_VCVT_fp_int_anpm<suffix, size, op, "p", 0b10>;
- def m : MVE_VCVT_fp_int_anpm<suffix, size, op, "m", 0b11>;
+multiclass MVE_VCVT_fp_int_anpm_inner<MVEVectorVTInfo Int, MVEVectorVTInfo Flt,
+ string anpm, bits<2> rm> {
+ def "": MVE_VCVT_fp_int_anpm<Int.Suffix # "." # Flt.Suffix, Int.Size,
+ Int.Unsigned, anpm, rm>;
+
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar IntrBaseName = "int_arm_mve_vcvt" # anpm;
+ defvar UnpredIntr = !cast<Intrinsic>(IntrBaseName);
+ defvar PredIntr = !cast<Intrinsic>(IntrBaseName # "_predicated");
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(Int.Vec (UnpredIntr (i32 Int.Unsigned), (Flt.Vec MQPR:$in))),
+ (Int.Vec (Inst (Flt.Vec MQPR:$in)))>;
+
+ def : Pat<(Int.Vec (PredIntr (i32 Int.Unsigned), (Int.Vec MQPR:$inactive),
+ (Flt.Vec MQPR:$in), (Flt.Pred VCCR:$pred))),
+ (Int.Vec (Inst (Flt.Vec MQPR:$in), ARMVCCThen,
+ (Flt.Pred VCCR:$pred), (Int.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VCVT_fp_int_anpm_outer<MVEVectorVTInfo Int,
+ MVEVectorVTInfo Flt> {
+ defm a : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "a", 0b00>;
+ defm n : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "n", 0b01>;
+ defm p : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "p", 0b10>;
+ defm m : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "m", 0b11>;
}
// This defines instructions such as MVE_VCVTu16f16a, with an explicit
// rounding-mode suffix on the mnemonic. The class below will define
// the bare MVE_VCVTu16f16 (with implied rounding toward zero).
-defm MVE_VCVTs16f16 : MVE_VCVT_fp_int_anpm_multi<"s16.f16", 0b01, 0b0>;
-defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_multi<"u16.f16", 0b01, 0b1>;
-defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_multi<"s32.f32", 0b10, 0b0>;
-defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_multi<"u32.f32", 0b10, 0b1>;
+defm MVE_VCVTs16f16 : MVE_VCVT_fp_int_anpm_outer<MVE_v8s16, MVE_v8f16>;
+defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_outer<MVE_v8u16, MVE_v8f16>;
+defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_outer<MVE_v4s32, MVE_v4f32>;
+defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_outer<MVE_v4u32, MVE_v4f32>;
-class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
+class MVE_VCVT_fp_int<string suffix, bits<2> size, bit toint, bit unsigned,
list<dag> pattern=[]>
: MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
(ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
@@ -3527,41 +3863,43 @@ class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
let Inst{17-16} = 0b11;
let Inst{15-13} = Qd{2-0};
let Inst{12-9} = 0b0011;
- let Inst{8-7} = op;
+ let Inst{8} = toint;
+ let Inst{7} = unsigned;
let Inst{4} = 0b0;
let validForTailPredication = 1;
}
+multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
+ SDNode unpred_op> {
+ defvar Unsigned = !or(!eq(Dest.SuffixLetter,"u"), !eq(Src.SuffixLetter,"u"));
+ defvar ToInt = !eq(Src.SuffixLetter,"f");
+
+ def "" : MVE_VCVT_fp_int<Dest.Suffix # "." # Src.Suffix, Dest.Size,
+ ToInt, Unsigned>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(Dest.Vec (unpred_op (Src.Vec MQPR:$src))),
+ (Dest.Vec (Inst (Src.Vec MQPR:$src)))>;
+ def : Pat<(Dest.Vec (int_arm_mve_vcvt_fp_int_predicated
+ (Src.Vec MQPR:$src), (i32 Unsigned),
+ (Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))),
+ (Dest.Vec (Inst (Src.Vec MQPR:$src), ARMVCCThen,
+ (Src.Pred VCCR:$mask),
+ (Dest.Vec MQPR:$inactive)))>;
+ }
+}
// The unsuffixed VCVT for float->int implicitly rounds toward zero,
// which I reflect here in the llvm instruction names
-def MVE_VCVTs16f16z : MVE_VCVT_fp_int<"s16.f16", 0b01, 0b10>;
-def MVE_VCVTu16f16z : MVE_VCVT_fp_int<"u16.f16", 0b01, 0b11>;
-def MVE_VCVTs32f32z : MVE_VCVT_fp_int<"s32.f32", 0b10, 0b10>;
-def MVE_VCVTu32f32z : MVE_VCVT_fp_int<"u32.f32", 0b10, 0b11>;
+defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint>;
+defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint>;
+defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint>;
+defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint>;
// Whereas VCVT for int->float rounds to nearest
-def MVE_VCVTf16s16n : MVE_VCVT_fp_int<"f16.s16", 0b01, 0b00>;
-def MVE_VCVTf16u16n : MVE_VCVT_fp_int<"f16.u16", 0b01, 0b01>;
-def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>;
-def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>;
-
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))),
- (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>;
- def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))),
- (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>;
- def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))),
- (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>;
- def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))),
- (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>;
- def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))),
- (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>;
- def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))),
- (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>;
- def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))),
- (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>;
- def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))),
- (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>;
-}
+defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp>;
+defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp>;
+defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp>;
+defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp>;
class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
list<dag> pattern=[]>
@@ -3582,26 +3920,29 @@ class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
let validForTailPredication = 1;
}
-def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>;
-def MVE_VABSf32 : MVE_VABSNEG_fp<"vabs", "f32", 0b10, 0b0>;
-
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v8f16 (fabs MQPR:$src)),
- (MVE_VABSf16 MQPR:$src)>;
- def : Pat<(v4f32 (fabs MQPR:$src)),
- (MVE_VABSf32 MQPR:$src)>;
-}
+multiclass MVE_VABSNEG_fp_m<string iname, SDNode unpred_op, Intrinsic pred_int,
+ MVEVectorVTInfo VTI, bit opcode> {
+ def "" : MVE_VABSNEG_fp<iname, VTI.Suffix, VTI.Size, opcode>;
+ defvar Inst = !cast<Instruction>(NAME);
-def MVE_VNEGf16 : MVE_VABSNEG_fp<"vneg", "f16", 0b01, 0b1>;
-def MVE_VNEGf32 : MVE_VABSNEG_fp<"vneg", "f32", 0b10, 0b1>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v8f16 (fneg MQPR:$src)),
- (MVE_VNEGf16 MQPR:$src)>;
- def : Pat<(v4f32 (fneg MQPR:$src)),
- (MVE_VNEGf32 MQPR:$src)>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+ }
}
+defm MVE_VABSf16 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
+ MVE_v8f16, 0>;
+defm MVE_VABSf32 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
+ MVE_v4f32, 0>;
+defm MVE_VNEGf16 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
+ MVE_v8f16, 1>;
+defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
+ MVE_v4f32, 1>;
+
class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
list<dag> pattern=[]>
: MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
@@ -3623,11 +3964,37 @@ class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
let Inst{0} = 0b1;
}
-def MVE_VMAXNMAf32 : MVE_VMAXMINNMA<"vmaxnma", "f32", 0b0, 0b0>;
-def MVE_VMAXNMAf16 : MVE_VMAXMINNMA<"vmaxnma", "f16", 0b1, 0b0>;
+multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int,
+ bit bit_12> {
+ def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size{0}, bit_12>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated v(max|min)nma
+ def : Pat<(VTI.Vec (unpred_op (fabs (VTI.Vec MQPR:$Qd)),
+ (fabs (VTI.Vec MQPR:$Qm)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm)))>;
+
+ // Predicated v(max|min)nma
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ }
+}
+
+multiclass MVE_VMAXNMA<MVEVectorVTInfo VTI, bit bit_12>
+ : MVE_VMAXMINNMA_m<"vmaxnma", VTI, fmaxnum, int_arm_mve_vmaxnma_predicated, bit_12>;
+
+defm MVE_VMAXNMAf32 : MVE_VMAXNMA<MVE_v4f32, 0b0>;
+defm MVE_VMAXNMAf16 : MVE_VMAXNMA<MVE_v8f16, 0b0>;
-def MVE_VMINNMAf32 : MVE_VMAXMINNMA<"vminnma", "f32", 0b0, 0b1>;
-def MVE_VMINNMAf16 : MVE_VMAXMINNMA<"vminnma", "f16", 0b1, 0b1>;
+multiclass MVE_VMINNMA<MVEVectorVTInfo VTI, bit bit_12>
+ : MVE_VMAXMINNMA_m<"vminnma", VTI, fminnum, int_arm_mve_vminnma_predicated, bit_12>;
+
+defm MVE_VMINNMAf32 : MVE_VMINNMA<MVE_v4f32, 0b1>;
+defm MVE_VMINNMAf16 : MVE_VMINNMA<MVE_v8f16, 0b1>;
// end of MVE Floating Point instructions
@@ -3796,12 +4163,12 @@ multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
- def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>;
- def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>;
- def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>;
+ def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup rGPR:$v2)), fc)),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 rGPR:$v2), fc))>;
+ def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup rGPR:$v2)), fc)),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 rGPR:$v2), fc))>;
+ def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup rGPR:$v2)), fc)),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc))>;
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)))),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
@@ -3810,12 +4177,12 @@ multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)))),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup rGPR:$v2)), fc)))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup rGPR:$v2)), fc)))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup rGPR:$v2)), fc)))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
}
multiclass unpred_vcmpf_z<PatLeaf fc> {
@@ -3825,31 +4192,31 @@ multiclass unpred_vcmpf_z<PatLeaf fc> {
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)))),
- (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
}
multiclass unpred_vcmpf_r<int fc> {
- def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)),
- (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
- def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)),
- (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
+ def : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)),
+ (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
+ def : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)),
+ (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
- def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>;
- def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>;
+ def : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup rGPR:$v2)), fc)),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 rGPR:$v2), fc))>;
+ def : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup rGPR:$v2)), fc)),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)))),
(v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)))),
(v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup rGPR:$v2)), fc)))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup rGPR:$v2)), fc)))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
}
let Predicates = [HasMVEInt] in {
@@ -3889,7 +4256,7 @@ let Predicates = [HasMVEFloat] in {
}
-// Extra "worst case" and/or/xor partterns, going into and out of GRP
+// Extra "worst case" and/or/xor patterns, going into and out of GRP
multiclass two_predops<SDPatternOperator opnode, Instruction insn> {
def v16i1 : Pat<(v16i1 (opnode (v16i1 VCCR:$p1), (v16i1 VCCR:$p2))),
(v16i1 (COPY_TO_REGCLASS
@@ -3918,7 +4285,6 @@ let Predicates = [HasMVEInt] in {
// example when moving between rGPR and VPR.P0 as part of predicate vector
// shuffles. We also sometimes need to cast between different predicate
// vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles.
-
def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>;
let Predicates = [HasMVEInt] in {
@@ -3932,6 +4298,16 @@ let Predicates = [HasMVEInt] in {
def : Pat<(VT (predicate_cast (VT2 VCCR:$src))),
(VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
}
+
+ // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
+ // rather than the more general 'ARMVectorRegCast' which would also
+ // match some bitconverts. If we use the latter in cases where the
+ // input and output types are the same, the bitconvert gets elided
+ // and we end up generating a nonsense match of nothing.
+
+ foreach VT = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
+ foreach VT2 = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
+ def : Pat<(VT (ARMVectorRegCastImpl (VT2 MQPR:$src))), (VT MQPR:$src)>;
}
// end of MVE compares
@@ -3973,11 +4349,32 @@ class MVE_VQxDMLxDH<string iname, bit exch, bit round, bit subtract,
let Inst{0} = round;
}
+multiclass MVE_VQxDMLxDH_p<string iname, bit exch, bit round, bit subtract,
+ MVEVectorVTInfo VTI> {
+ def "": MVE_VQxDMLxDH<iname, exch, round, subtract, VTI.Suffix, VTI.Size,
+ !if(!eq(VTI.LaneBits, 32), ",@earlyclobber $Qd", "")>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar ConstParams = (? (i32 exch), (i32 round), (i32 subtract));
+ defvar unpred_intr = int_arm_mve_vqdmlad;
+ defvar pred_intr = int_arm_mve_vqdmlad_predicated;
+
+ def : Pat<(VTI.Vec !con((unpred_intr (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
+ (VTI.Vec MQPR:$c)), ConstParams)),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
+ (VTI.Vec MQPR:$c)))>;
+ def : Pat<(VTI.Vec !con((pred_intr (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
+ (VTI.Vec MQPR:$c)), ConstParams,
+ (? (VTI.Pred VCCR:$pred)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
+ (VTI.Vec MQPR:$c),
+ ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+}
+
multiclass MVE_VQxDMLxDH_multi<string iname, bit exch,
bit round, bit subtract> {
- def s8 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s8", 0b00>;
- def s16 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s16", 0b01>;
- def s32 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s32", 0b10, ",@earlyclobber $Qd">;
+ defm s8 : MVE_VQxDMLxDH_p<iname, exch, round, subtract, MVE_v16s8>;
+ defm s16 : MVE_VQxDMLxDH_p<iname, exch, round, subtract, MVE_v8s16>;
+ defm s32 : MVE_VQxDMLxDH_p<iname, exch, round, subtract, MVE_v4s32>;
}
defm MVE_VQDMLADH : MVE_VQxDMLxDH_multi<"vqdmladh", 0b0, 0b0, 0b0>;
@@ -4051,6 +4448,7 @@ class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
let Inst{7} = Qn{3};
let Inst{0} = 0b0;
let validForTailPredication = 1;
+ let doubleWidthResult = 1;
}
multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
@@ -4072,10 +4470,10 @@ multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
// Predicated multiply
def : Pat<(VTI.DblVec !con((pred_int (VTI.Vec MQPR:$Qm),
(VTI.Vec MQPR:$Qn)),
- uflag, (? (i32 Top), (VTI.Pred VCCR:$mask),
+ uflag, (? (i32 Top), (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))),
(VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))>;
}
}
@@ -4122,6 +4520,50 @@ defm MVE_VMULLBp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
defm MVE_VMULLTp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
int_arm_mve_mull_poly_predicated, 0b1>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v2i64 (ARMvmulls (v4i32 MQPR:$src1), (v4i32 MQPR:$src2))),
+ (MVE_VMULLBs32 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(v2i64 (ARMvmulls (v4i32 (ARMvrev64 (v4i32 MQPR:$src1))),
+ (v4i32 (ARMvrev64 (v4i32 MQPR:$src2))))),
+ (MVE_VMULLTs32 MQPR:$src1, MQPR:$src2)>;
+
+ def : Pat<(mul (sext_inreg (v4i32 MQPR:$src1), v4i16),
+ (sext_inreg (v4i32 MQPR:$src2), v4i16)),
+ (MVE_VMULLBs16 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(mul (sext_inreg (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src1)))), v4i16),
+ (sext_inreg (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src2)))), v4i16)),
+ (MVE_VMULLTs16 MQPR:$src1, MQPR:$src2)>;
+
+ def : Pat<(mul (sext_inreg (v8i16 MQPR:$src1), v8i8),
+ (sext_inreg (v8i16 MQPR:$src2), v8i8)),
+ (MVE_VMULLBs8 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(mul (sext_inreg (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src1)))), v8i8),
+ (sext_inreg (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src2)))), v8i8)),
+ (MVE_VMULLTs8 MQPR:$src1, MQPR:$src2)>;
+
+ def : Pat<(v2i64 (ARMvmullu (v4i32 MQPR:$src1), (v4i32 MQPR:$src2))),
+ (MVE_VMULLBu32 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(v2i64 (ARMvmullu (v4i32 (ARMvrev64 (v4i32 MQPR:$src1))),
+ (v4i32 (ARMvrev64 (v4i32 MQPR:$src2))))),
+ (MVE_VMULLTu32 MQPR:$src1, MQPR:$src2)>;
+
+ def : Pat<(mul (and (v4i32 MQPR:$src1), (v4i32 (ARMvmovImm (i32 0xCFF)))),
+ (and (v4i32 MQPR:$src2), (v4i32 (ARMvmovImm (i32 0xCFF))))),
+ (MVE_VMULLBu16 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(mul (and (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src1)))),
+ (v4i32 (ARMvmovImm (i32 0xCFF)))),
+ (and (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src2)))),
+ (v4i32 (ARMvmovImm (i32 0xCFF))))),
+ (MVE_VMULLTu16 MQPR:$src1, MQPR:$src2)>;
+
+ def : Pat<(mul (ARMvbicImm (v8i16 MQPR:$src1), (i32 0xAFF)),
+ (ARMvbicImm (v8i16 MQPR:$src2), (i32 0xAFF))),
+ (MVE_VMULLBu8 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(mul (ARMvbicImm (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src1)))), (i32 0xAFF)),
+ (ARMvbicImm (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src2)))), (i32 0xAFF))),
+ (MVE_VMULLTu8 MQPR:$src1, MQPR:$src2)>;
+}
+
class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size, bit round,
list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
@@ -4195,6 +4637,8 @@ class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
let Inst{8} = 0b0;
let Inst{7} = !if(!eq(bit_17, 0), 1, 0);
let Inst{0} = 0b1;
+ let validForTailPredication = 1;
+ let retainsPreviousHalfElement = 1;
}
multiclass MVE_VxMOVxN_halves<string iname, string suffix,
@@ -4213,21 +4657,121 @@ defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>;
defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>;
+
+multiclass MVE_VMOVN_p<Instruction Inst, bit top,
+ MVEVectorVTInfo VTI, MVEVectorVTInfo InVTI> {
+ // Match the most obvious MVEvmovn(a,b,t), which overwrites the odd or even
+ // lanes of a (depending on t) with the even lanes of b.
+ def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qd_src),
+ (VTI.Vec MQPR:$Qm), (i32 top))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
+
+ if !eq(top, 0) then {
+ // If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd
+ // lanes of a with the odd lanes of b. In other words, the lanes we're
+ // _keeping_ from a are the even ones. So we can flip it round and say that
+ // this is the same as overwriting the even lanes of b with the even lanes
+ // of a, i.e. it's a VMOVNB with the operands reversed.
+ defvar vrev = !cast<SDNode>("ARMvrev" # InVTI.LaneBits);
+ def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (vrev MQPR:$Qd_src)), (i32 1))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
+ }
+
+ // Match the IR intrinsic for a predicated VMOVN. This regards the Qm input
+ // as having wider lanes that we're narrowing, instead of already-narrow
+ // lanes that we're taking every other one of.
+ def : Pat<(VTI.Vec (int_arm_mve_vmovn_predicated (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm), (i32 top),
+ (InVTI.Pred VCCR:$pred))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm),
+ ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
+}
+
+defm : MVE_VMOVN_p<MVE_VMOVNi32bh, 0, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VMOVN_p<MVE_VMOVNi32th, 1, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VMOVN_p<MVE_VMOVNi16bh, 0, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VMOVN_p<MVE_VMOVNi16th, 1, MVE_v16i8, MVE_v8i16>;
+
+multiclass MVE_VQMOVN_p<Instruction Inst, bit outU, bit inU, bit top,
+ MVEVectorVTInfo VTI, MVEVectorVTInfo InVTI> {
+ def : Pat<(VTI.Vec (int_arm_mve_vqmovn (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm),
+ (i32 outU), (i32 inU), (i32 top))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm)))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vqmovn_predicated (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm),
+ (i32 outU), (i32 inU), (i32 top),
+ (InVTI.Pred VCCR:$pred))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm),
+ ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
+}
+
+defm : MVE_VQMOVN_p<MVE_VQMOVNs32bh, 0, 0, 0, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNs32th, 0, 0, 1, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNs16bh, 0, 0, 0, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNs16th, 0, 0, 1, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNu32bh, 1, 1, 0, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNu32th, 1, 1, 1, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNu16bh, 1, 1, 0, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNu16th, 1, 1, 1, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VQMOVN_p<MVE_VQMOVUNs32bh, 1, 0, 0, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVUNs32th, 1, 0, 1, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVUNs16bh, 1, 0, 0, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VQMOVN_p<MVE_VQMOVUNs16th, 1, 0, 1, MVE_v16i8, MVE_v8i16>;
+
+def SDTARMVMOVNQ : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVec<2>, SDTCisVT<3, i32>]>;
+def MVEvqmovns : SDNode<"ARMISD::VQMOVNs", SDTARMVMOVNQ>;
+def MVEvqmovnu : SDNode<"ARMISD::VQMOVNu", SDTARMVMOVNQ>;
+
let Predicates = [HasMVEInt] in {
- def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
- (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
- def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
- (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
- def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))),
- (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
- def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))),
- (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
+ def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 0))),
+ (v8i16 (MVE_VQMOVNs32bh (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
+ def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 1))),
+ (v8i16 (MVE_VQMOVNs32th (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
+ def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
+ (v16i8 (MVE_VQMOVNs16bh (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
+ def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
+ (v16i8 (MVE_VQMOVNs16th (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
+
+ def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 0))),
+ (v8i16 (MVE_VQMOVNu32bh (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
+ def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 1))),
+ (v8i16 (MVE_VQMOVNu32th (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
+ def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
+ (v16i8 (MVE_VQMOVNu16bh (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
+ def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
+ (v16i8 (MVE_VQMOVNu16th (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
+
+ def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshrsImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 0))),
+ (v8i16 (MVE_VQSHRNbhs32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
+ def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshrsImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 0))),
+ (v16i8 (MVE_VQSHRNbhs16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
+ def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshrsImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 1))),
+ (v8i16 (MVE_VQSHRNths32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
+ def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshrsImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 1))),
+ (v16i8 (MVE_VQSHRNths16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
+
+ def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshruImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 0))),
+ (v8i16 (MVE_VQSHRNbhu32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
+ def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshruImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 0))),
+ (v16i8 (MVE_VQSHRNbhu16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
+ def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshruImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 1))),
+ (v8i16 (MVE_VQSHRNthu32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
+ def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshruImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 1))),
+ (v16i8 (MVE_VQSHRNthu16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
}
class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
- list<dag> pattern=[]>
- : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
- "$Qd, $Qm", vpred_n, "$Qd = $Qd_src", pattern> {
+ dag iops_extra, vpred_ops vpred, string cstr>
+ : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+ !con(iops_extra, (ins MQPR:$Qm)), "$Qd, $Qm",
+ vpred, cstr, []> {
let Inst{28} = op;
let Inst{21-16} = 0b111111;
let Inst{12} = T;
@@ -4235,10 +4779,17 @@ class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
let Inst{0} = 0b1;
let Predicates = [HasMVEFloat];
+ let retainsPreviousHalfElement = 1;
}
+def SDTARMVCVTL : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisVT<2, i32>]>;
+def MVEvcvtn : SDNode<"ARMISD::VCVTN", SDTARMVMOVNQ>;
+def MVEvcvtl : SDNode<"ARMISD::VCVTL", SDTARMVCVTL>;
+
multiclass MVE_VCVT_f2h_m<string iname, int half> {
- def "": MVE_VCVT_ff<iname, "f16.f32", 0b0, half>;
+ def "": MVE_VCVT_ff<iname, "f16.f32", 0b0, half,
+ (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -4250,11 +4801,28 @@ multiclass MVE_VCVT_f2h_m<string iname, int half> {
(v4i1 VCCR:$mask))),
(v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm),
ARMVCCThen, (v4i1 VCCR:$mask)))>;
+
+ def : Pat<(v8f16 (MVEvcvtn (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half))),
+ (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>;
}
}
multiclass MVE_VCVT_h2f_m<string iname, int half> {
- def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half>;
+ def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half, (ins), vpred_r, "">;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4f32 (int_arm_mve_vcvt_widen (v8f16 MQPR:$Qm), (i32 half))),
+ (v4f32 (Inst (v8f16 MQPR:$Qm)))>;
+ def : Pat<(v4f32 (int_arm_mve_vcvt_widen_predicated
+ (v4f32 MQPR:$inactive), (v8f16 MQPR:$Qm), (i32 half),
+ (v4i1 VCCR:$mask))),
+ (v4f32 (Inst (v8f16 MQPR:$Qm), ARMVCCThen,
+ (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive)))>;
+
+ def : Pat<(v4f32 (MVEvcvtl (v8f16 MQPR:$Qm), (i32 half))),
+ (v4f32 (Inst (v8f16 MQPR:$Qm)))>;
+ }
}
defm MVE_VCVTf16f32bh : MVE_VCVT_f2h_m<"vcvtb", 0b0>;
@@ -4353,15 +4921,37 @@ class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
let Inst{7} = Qn{3};
let Inst{0} = 0b1;
let validForTailPredication = 1;
+ let doubleWidthResult = 1;
+}
+
+multiclass MVE_VQDMULL_m<string iname, MVEVectorVTInfo VTI, bit size, bit T,
+ string cstr> {
+ def "" : MVE_VQDMULL<iname, VTI.Suffix, size, T, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated saturating multiply
+ def : Pat<(VTI.DblVec (int_arm_mve_vqdmull (VTI.Vec MQPR:$Qm),
+ (VTI.Vec MQPR:$Qn), (i32 T))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ // Predicated saturating multiply
+ def : Pat<(VTI.DblVec (int_arm_mve_vqdmull_predicated
+ (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 T), (VTI.DblPred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+ }
}
-multiclass MVE_VQDMULL_halves<string suffix, bit size, string cstr=""> {
- def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0, cstr>;
- def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1, cstr>;
+multiclass MVE_VQDMULL_halves<MVEVectorVTInfo VTI, bit size, string cstr=""> {
+ defm bh : MVE_VQDMULL_m<"vqdmullb", VTI, size, 0b0, cstr>;
+ defm th : MVE_VQDMULL_m<"vqdmullt", VTI, size, 0b1, cstr>;
}
-defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<"s16", 0b0>;
-defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1, "@earlyclobber $Qd">;
+defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<MVE_v8s16, 0b0>;
+defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<MVE_v4s32, 0b1, "@earlyclobber $Qd">;
// end of mve_qDest_qSrc
@@ -4407,10 +4997,61 @@ class MVE_qDest_single_rSrc<string iname, string suffix, list<dag> pattern=[]>
let Inst{3-0} = Rm{3-0};
}
+// Patterns for vector-scalar instructions with integer operands
+multiclass MVE_vec_scalar_int_pat_m<Instruction inst, MVEVectorVTInfo VTI,
+ SDNode unpred_op, SDNode pred_op,
+ bit unpred_has_sign = 0,
+ bit pred_has_sign = 0> {
+ defvar UnpredSign = !if(unpred_has_sign, (? (i32 VTI.Unsigned)), (?));
+ defvar PredSign = !if(pred_has_sign, (? (i32 VTI.Unsigned)), (?));
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated version
+ def : Pat<(VTI.Vec !con((unpred_op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (ARMvdup rGPR:$val))),
+ UnpredSign)),
+ (VTI.Vec (inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val)))>;
+ // Predicated version
+ def : Pat<(VTI.Vec !con((pred_op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (ARMvdup rGPR:$val))),
+ PredSign,
+ (pred_op (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))),
+ (VTI.Vec (inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+// Patterns for vector-scalar instructions with FP operands
+multiclass MVE_vec_scalar_fp_pat_m<SDNode unpred_op, Intrinsic pred_int,
+ Instruction instr_f16,
+ Instruction instr_f32> {
+ let Predicates = [HasMVEFloat] in {
+ // Unpredicated F16
+ def : Pat<(v8f16 (unpred_op (v8f16 MQPR:$Qm), (v8f16 (ARMvdup rGPR:$val)))),
+ (v8f16 (instr_f16 (v8f16 MQPR:$Qm), (i32 rGPR:$val)))>;
+ // Unpredicated F32
+ def : Pat<(v4f32 (unpred_op (v4f32 MQPR:$Qm), (v4f32 (ARMvdup rGPR:$val)))),
+ (v4f32 (instr_f32 (v4f32 MQPR:$Qm), (i32 rGPR:$val)))>;
+ // Predicated F16
+ def : Pat<(v8f16 (pred_int (v8f16 MQPR:$Qm), (v8f16 (ARMvdup rGPR:$val)),
+ (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
+ (v8f16 (instr_f16 (v8f16 MQPR:$Qm), (i32 rGPR:$val),
+ ARMVCCThen, (v8i1 VCCR:$mask),
+ (v8f16 MQPR:$inactive)))>;
+ // Predicated F32
+ def : Pat<(v4f32 (pred_int (v4f32 MQPR:$Qm), (v4f32 (ARMvdup rGPR:$val)),
+ (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
+ (v4f32 (instr_f32 (v4f32 MQPR:$Qm), (i32 rGPR:$val),
+ ARMVCCThen, (v4i1 VCCR:$mask),
+ (v4f32 MQPR:$inactive)))>;
+ }
+}
+
class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
- bit bit_5, bit bit_12, bit bit_16,
- bit bit_28, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, "", pattern> {
+ bit bit_5, bit bit_12, bit bit_16, bit bit_28>
+ : MVE_qDest_rSrc<iname, suffix, ""> {
let Inst{28} = bit_28;
let Inst{21-20} = size;
@@ -4421,42 +5062,60 @@ class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
let validForTailPredication = 1;
}
-multiclass MVE_VADDSUB_qr_sizes<string iname, string suffix,
- bit bit_5, bit bit_12, bit bit_16,
- bit bit_28, list<dag> pattern=[]> {
- def "8" : MVE_VADDSUB_qr<iname, suffix#"8", 0b00,
- bit_5, bit_12, bit_16, bit_28>;
- def "16" : MVE_VADDSUB_qr<iname, suffix#"16", 0b01,
- bit_5, bit_12, bit_16, bit_28>;
- def "32" : MVE_VADDSUB_qr<iname, suffix#"32", 0b10,
- bit_5, bit_12, bit_16, bit_28>;
-}
-
-defm MVE_VADD_qr_i : MVE_VADDSUB_qr_sizes<"vadd", "i", 0b0, 0b0, 0b1, 0b0>;
-defm MVE_VQADD_qr_s : MVE_VADDSUB_qr_sizes<"vqadd", "s", 0b1, 0b0, 0b0, 0b0>;
-defm MVE_VQADD_qr_u : MVE_VADDSUB_qr_sizes<"vqadd", "u", 0b1, 0b0, 0b0, 0b1>;
-
-defm MVE_VSUB_qr_i : MVE_VADDSUB_qr_sizes<"vsub", "i", 0b0, 0b1, 0b1, 0b0>;
-defm MVE_VQSUB_qr_s : MVE_VADDSUB_qr_sizes<"vqsub", "s", 0b1, 0b1, 0b0, 0b0>;
-defm MVE_VQSUB_qr_u : MVE_VADDSUB_qr_sizes<"vqsub", "u", 0b1, 0b1, 0b0, 0b1>;
-
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
- (v16i8 (MVE_VADD_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
- (v8i16 (MVE_VADD_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
- (v4i32 (MVE_VADD_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
-}
-
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
- (v16i8 (MVE_VSUB_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
- (v8i16 (MVE_VSUB_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
- (v4i32 (MVE_VSUB_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
-}
+// Vector-scalar add/sub
+multiclass MVE_VADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b0, subtract, 0b1, 0b0>;
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
+ unpred_op, pred_int>;
+}
+
+multiclass MVE_VADD_qr_m<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_qr_m<"vadd", VTI, 0b0, add, int_arm_mve_add_predicated>;
+
+multiclass MVE_VSUB_qr_m<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_qr_m<"vsub", VTI, 0b1, sub, int_arm_mve_sub_predicated>;
+
+defm MVE_VADD_qr_i8 : MVE_VADD_qr_m<MVE_v16i8>;
+defm MVE_VADD_qr_i16 : MVE_VADD_qr_m<MVE_v8i16>;
+defm MVE_VADD_qr_i32 : MVE_VADD_qr_m<MVE_v4i32>;
+
+defm MVE_VSUB_qr_i8 : MVE_VSUB_qr_m<MVE_v16i8>;
+defm MVE_VSUB_qr_i16 : MVE_VSUB_qr_m<MVE_v8i16>;
+defm MVE_VSUB_qr_i32 : MVE_VSUB_qr_m<MVE_v4i32>;
+
+// Vector-scalar saturating add/sub
+multiclass MVE_VQADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
+ SDNode unpred_op_s, SDNode unpred_op_u,
+ Intrinsic pred_int> {
+ def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b1, subtract,
+ 0b0, VTI.Unsigned>;
+ defvar unpred_op = !if(VTI.Unsigned, unpred_op_u, unpred_op_s);
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
+ unpred_op, pred_int, 0, 1>;
+}
+
+multiclass MVE_VQADD_qr_m<MVEVectorVTInfo VTI>
+ : MVE_VQADDSUB_qr_m<"vqadd", VTI, 0b0, saddsat, uaddsat,
+ int_arm_mve_qadd_predicated>;
+
+multiclass MVE_VQSUB_qr_m<MVEVectorVTInfo VTI>
+ : MVE_VQADDSUB_qr_m<"vqsub", VTI, 0b1, ssubsat, usubsat,
+ int_arm_mve_qsub_predicated>;
+
+defm MVE_VQADD_qr_s8 : MVE_VQADD_qr_m<MVE_v16s8>;
+defm MVE_VQADD_qr_s16 : MVE_VQADD_qr_m<MVE_v8s16>;
+defm MVE_VQADD_qr_s32 : MVE_VQADD_qr_m<MVE_v4s32>;
+defm MVE_VQADD_qr_u8 : MVE_VQADD_qr_m<MVE_v16u8>;
+defm MVE_VQADD_qr_u16 : MVE_VQADD_qr_m<MVE_v8u16>;
+defm MVE_VQADD_qr_u32 : MVE_VQADD_qr_m<MVE_v4u32>;
+
+defm MVE_VQSUB_qr_s8 : MVE_VQSUB_qr_m<MVE_v16s8>;
+defm MVE_VQSUB_qr_s16 : MVE_VQSUB_qr_m<MVE_v8s16>;
+defm MVE_VQSUB_qr_s32 : MVE_VQSUB_qr_m<MVE_v4s32>;
+defm MVE_VQSUB_qr_u8 : MVE_VQSUB_qr_m<MVE_v16u8>;
+defm MVE_VQSUB_qr_u16 : MVE_VQSUB_qr_m<MVE_v8u16>;
+defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m<MVE_v4u32>;
class MVE_VQDMULL_qr<string iname, string suffix, bit size,
bit T, string cstr="", list<dag> pattern=[]>
@@ -4469,15 +5128,40 @@ class MVE_VQDMULL_qr<string iname, string suffix, bit size,
let Inst{8} = 0b1;
let Inst{5} = 0b1;
let validForTailPredication = 1;
+ let doubleWidthResult = 1;
}
-multiclass MVE_VQDMULL_qr_halves<string suffix, bit size, string cstr=""> {
- def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0, cstr>;
- def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1, cstr>;
+multiclass MVE_VQDMULL_qr_m<string iname, MVEVectorVTInfo VTI, bit size,
+ bit T, string cstr> {
+ def "" : MVE_VQDMULL_qr<iname, VTI.Suffix, size, T, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated saturating multiply
+ def : Pat<(VTI.DblVec (int_arm_mve_vqdmull (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (ARMvdup rGPR:$val)),
+ (i32 T))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val)))>;
+ // Predicated saturating multiply
+ def : Pat<(VTI.DblVec (int_arm_mve_vqdmull_predicated
+ (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (ARMvdup rGPR:$val)),
+ (i32 T),
+ (VTI.DblPred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+ }
}
-defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<"s16", 0b0>;
-defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1, "@earlyclobber $Qd">;
+multiclass MVE_VQDMULL_qr_halves<MVEVectorVTInfo VTI, bit size, string cstr=""> {
+ defm bh : MVE_VQDMULL_qr_m<"vqdmullb", VTI, size, 0b0, cstr>;
+ defm th : MVE_VQDMULL_qr_m<"vqdmullt", VTI, size, 0b1, cstr>;
+}
+
+defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<MVE_v8s16, 0b0>;
+defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<MVE_v4s32, 0b1, "@earlyclobber $Qd">;
class MVE_VxADDSUB_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, bit subtract,
@@ -4493,19 +5177,34 @@ class MVE_VxADDSUB_qr<string iname, string suffix,
let validForTailPredication = 1;
}
-def MVE_VHADD_qr_s8 : MVE_VxADDSUB_qr<"vhadd", "s8", 0b0, 0b00, 0b0>;
-def MVE_VHADD_qr_s16 : MVE_VxADDSUB_qr<"vhadd", "s16", 0b0, 0b01, 0b0>;
-def MVE_VHADD_qr_s32 : MVE_VxADDSUB_qr<"vhadd", "s32", 0b0, 0b10, 0b0>;
-def MVE_VHADD_qr_u8 : MVE_VxADDSUB_qr<"vhadd", "u8", 0b1, 0b00, 0b0>;
-def MVE_VHADD_qr_u16 : MVE_VxADDSUB_qr<"vhadd", "u16", 0b1, 0b01, 0b0>;
-def MVE_VHADD_qr_u32 : MVE_VxADDSUB_qr<"vhadd", "u32", 0b1, 0b10, 0b0>;
+multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
+ Intrinsic unpred_int, Intrinsic pred_int> {
+ def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, subtract>;
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME),
+ VTI, unpred_int, pred_int, 1, 1>;
+}
+
+multiclass MVE_VHADD_qr_m<MVEVectorVTInfo VTI> :
+ MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, int_arm_mve_vhadd,
+ int_arm_mve_hadd_predicated>;
+
+multiclass MVE_VHSUB_qr_m<MVEVectorVTInfo VTI> :
+ MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, int_arm_mve_vhsub,
+ int_arm_mve_hsub_predicated>;
-def MVE_VHSUB_qr_s8 : MVE_VxADDSUB_qr<"vhsub", "s8", 0b0, 0b00, 0b1>;
-def MVE_VHSUB_qr_s16 : MVE_VxADDSUB_qr<"vhsub", "s16", 0b0, 0b01, 0b1>;
-def MVE_VHSUB_qr_s32 : MVE_VxADDSUB_qr<"vhsub", "s32", 0b0, 0b10, 0b1>;
-def MVE_VHSUB_qr_u8 : MVE_VxADDSUB_qr<"vhsub", "u8", 0b1, 0b00, 0b1>;
-def MVE_VHSUB_qr_u16 : MVE_VxADDSUB_qr<"vhsub", "u16", 0b1, 0b01, 0b1>;
-def MVE_VHSUB_qr_u32 : MVE_VxADDSUB_qr<"vhsub", "u32", 0b1, 0b10, 0b1>;
+defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m<MVE_v16s8>;
+defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m<MVE_v8s16>;
+defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m<MVE_v4s32>;
+defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m<MVE_v16u8>;
+defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m<MVE_v8u16>;
+defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m<MVE_v4u32>;
+
+defm MVE_VHSUB_qr_s8 : MVE_VHSUB_qr_m<MVE_v16s8>;
+defm MVE_VHSUB_qr_s16 : MVE_VHSUB_qr_m<MVE_v8s16>;
+defm MVE_VHSUB_qr_s32 : MVE_VHSUB_qr_m<MVE_v4s32>;
+defm MVE_VHSUB_qr_u8 : MVE_VHSUB_qr_m<MVE_v16u8>;
+defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16>;
+defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>;
let Predicates = [HasMVEFloat] in {
def MVE_VADD_qr_f32 : MVE_VxADDSUB_qr<"vadd", "f32", 0b0, 0b11, 0b0>;
@@ -4515,6 +5214,11 @@ let Predicates = [HasMVEFloat] in {
def MVE_VSUB_qr_f16 : MVE_VxADDSUB_qr<"vsub", "f16", 0b1, 0b11, 0b1>;
}
+defm : MVE_vec_scalar_fp_pat_m<fadd, int_arm_mve_add_predicated,
+ MVE_VADD_qr_f16, MVE_VADD_qr_f32>;
+defm : MVE_vec_scalar_fp_pat_m<fsub, int_arm_mve_sub_predicated,
+ MVE_VSUB_qr_f16, MVE_VSUB_qr_f32>;
+
class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
bit bit_7, bit bit_17, list<dag> pattern=[]>
: MVE_qDest_single_rSrc<iname, suffix, pattern> {
@@ -4563,19 +5267,19 @@ defm MVE_VQSHL_qr : MVE_VxSHL_qr_types<"vqshl", 0b1, 0b0>;
defm MVE_VQRSHL_qr : MVE_VxSHL_qr_types<"vqrshl", 0b1, 0b1>;
let Predicates = [HasMVEInt] in {
- def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
- (v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
- def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
- (v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
- def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
- (v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup rGPR:$Rm)))),
+ (v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), rGPR:$Rm))>;
+ def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup rGPR:$Rm)))),
+ (v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), rGPR:$Rm))>;
+ def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup rGPR:$Rm)))),
+ (v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), rGPR:$Rm))>;
- def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
- (v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
- def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
- (v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
- def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
- (v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup rGPR:$Rm)))),
+ (v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), rGPR:$Rm))>;
+ def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup rGPR:$Rm)))),
+ (v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), rGPR:$Rm))>;
+ def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup rGPR:$Rm)))),
+ (v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), rGPR:$Rm))>;
}
class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
@@ -4594,6 +5298,20 @@ def MVE_VBRSR8 : MVE_VBRSR<"vbrsr", "8", 0b00>;
def MVE_VBRSR16 : MVE_VBRSR<"vbrsr", "16", 0b01>;
def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>;
+multiclass MVE_VBRSR_pat_m<MVEVectorVTInfo VTI, Instruction Inst> {
+ // Unpredicated
+ def : Pat<(VTI.Vec (int_arm_mve_vbrsr (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm)))>;
+ // Predicated
+ def : Pat<(VTI.Vec (int_arm_mve_vbrsr_predicated
+ (VTI.Vec MQPR:$inactive),
+ (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+}
+
let Predicates = [HasMVEInt] in {
def : Pat<(v16i8 ( bitreverse (v16i8 MQPR:$val1))),
(v16i8 ( MVE_VBRSR8 (v16i8 MQPR:$val1), (t2MOVi (i32 8)) ))>;
@@ -4603,11 +5321,19 @@ let Predicates = [HasMVEInt] in {
def : Pat<(v8i16 ( bitreverse (v8i16 MQPR:$val1))),
(v8i16 ( MVE_VBRSR16 (v8i16 MQPR:$val1), (t2MOVi (i32 16)) ))>;
+
+ defm : MVE_VBRSR_pat_m<MVE_v16i8, MVE_VBRSR8>;
+ defm : MVE_VBRSR_pat_m<MVE_v8i16, MVE_VBRSR16>;
+ defm : MVE_VBRSR_pat_m<MVE_v4i32, MVE_VBRSR32>;
}
-class MVE_VMUL_qr_int<string iname, string suffix,
- bits<2> size, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, "", pattern> {
+let Predicates = [HasMVEFloat] in {
+ defm : MVE_VBRSR_pat_m<MVE_v8f16, MVE_VBRSR16>;
+ defm : MVE_VBRSR_pat_m<MVE_v4f32, MVE_VBRSR32>;
+}
+
+class MVE_VMUL_qr_int<string iname, string suffix, bits<2> size>
+ : MVE_qDest_rSrc<iname, suffix, ""> {
let Inst{28} = 0b0;
let Inst{21-20} = size;
@@ -4618,19 +5344,16 @@ class MVE_VMUL_qr_int<string iname, string suffix,
let validForTailPredication = 1;
}
-def MVE_VMUL_qr_i8 : MVE_VMUL_qr_int<"vmul", "i8", 0b00>;
-def MVE_VMUL_qr_i16 : MVE_VMUL_qr_int<"vmul", "i16", 0b01>;
-def MVE_VMUL_qr_i32 : MVE_VMUL_qr_int<"vmul", "i32", 0b10>;
-
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
- (v16i8 (MVE_VMUL_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
- (v8i16 (MVE_VMUL_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
- (v4i32 (MVE_VMUL_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
+multiclass MVE_VMUL_qr_int_m<MVEVectorVTInfo VTI> {
+ def "" : MVE_VMUL_qr_int<"vmul", VTI.Suffix, VTI.Size>;
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
+ mul, int_arm_mve_mul_predicated>;
}
+defm MVE_VMUL_qr_i8 : MVE_VMUL_qr_int_m<MVE_v16i8>;
+defm MVE_VMUL_qr_i16 : MVE_VMUL_qr_int_m<MVE_v8i16>;
+defm MVE_VMUL_qr_i32 : MVE_VMUL_qr_int_m<MVE_v4i32>;
+
class MVE_VxxMUL_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, list<dag> pattern=[]>
: MVE_qDest_rSrc<iname, suffix, "", pattern> {
@@ -4643,19 +5366,37 @@ class MVE_VxxMUL_qr<string iname, string suffix,
let Inst{5} = 0b1;
}
-def MVE_VQDMULH_qr_s8 : MVE_VxxMUL_qr<"vqdmulh", "s8", 0b0, 0b00>;
-def MVE_VQDMULH_qr_s16 : MVE_VxxMUL_qr<"vqdmulh", "s16", 0b0, 0b01>;
-def MVE_VQDMULH_qr_s32 : MVE_VxxMUL_qr<"vqdmulh", "s32", 0b0, 0b10>;
+multiclass MVE_VxxMUL_qr_m<string iname, MVEVectorVTInfo VTI, bit bit_28,
+ Intrinsic int_unpred, Intrinsic int_pred> {
+ def "" : MVE_VxxMUL_qr<iname, VTI.Suffix, bit_28, VTI.Size>;
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
+ int_unpred, int_pred>;
+}
+
+multiclass MVE_VQDMULH_qr_m<MVEVectorVTInfo VTI> :
+ MVE_VxxMUL_qr_m<"vqdmulh", VTI, 0b0,
+ int_arm_mve_vqdmulh, int_arm_mve_qdmulh_predicated>;
+
+multiclass MVE_VQRDMULH_qr_m<MVEVectorVTInfo VTI> :
+ MVE_VxxMUL_qr_m<"vqrdmulh", VTI, 0b1,
+ int_arm_mve_vqrdmulh, int_arm_mve_qrdmulh_predicated>;
-def MVE_VQRDMULH_qr_s8 : MVE_VxxMUL_qr<"vqrdmulh", "s8", 0b1, 0b00>;
-def MVE_VQRDMULH_qr_s16 : MVE_VxxMUL_qr<"vqrdmulh", "s16", 0b1, 0b01>;
-def MVE_VQRDMULH_qr_s32 : MVE_VxxMUL_qr<"vqrdmulh", "s32", 0b1, 0b10>;
+defm MVE_VQDMULH_qr_s8 : MVE_VQDMULH_qr_m<MVE_v16s8>;
+defm MVE_VQDMULH_qr_s16 : MVE_VQDMULH_qr_m<MVE_v8s16>;
+defm MVE_VQDMULH_qr_s32 : MVE_VQDMULH_qr_m<MVE_v4s32>;
+
+defm MVE_VQRDMULH_qr_s8 : MVE_VQRDMULH_qr_m<MVE_v16s8>;
+defm MVE_VQRDMULH_qr_s16 : MVE_VQRDMULH_qr_m<MVE_v8s16>;
+defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
let Predicates = [HasMVEFloat], validForTailPredication = 1 in {
def MVE_VMUL_qr_f16 : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>;
def MVE_VMUL_qr_f32 : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>;
}
+defm : MVE_vec_scalar_fp_pat_m<fmul, int_arm_mve_mul_predicated,
+ MVE_VMUL_qr_f16, MVE_VMUL_qr_f32>;
+
class MVE_VFMAMLA_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, bit S,
list<dag> pattern=[]>
@@ -4668,42 +5409,87 @@ class MVE_VFMAMLA_qr<string iname, string suffix,
let Inst{8} = 0b0;
let Inst{5} = 0b0;
let validForTailPredication = 1;
+ let hasSideEffects = 0;
}
-def MVE_VMLA_qr_s8 : MVE_VFMAMLA_qr<"vmla", "s8", 0b0, 0b00, 0b0>;
-def MVE_VMLA_qr_s16 : MVE_VFMAMLA_qr<"vmla", "s16", 0b0, 0b01, 0b0>;
-def MVE_VMLA_qr_s32 : MVE_VFMAMLA_qr<"vmla", "s32", 0b0, 0b10, 0b0>;
-def MVE_VMLA_qr_u8 : MVE_VFMAMLA_qr<"vmla", "u8", 0b1, 0b00, 0b0>;
-def MVE_VMLA_qr_u16 : MVE_VFMAMLA_qr<"vmla", "u16", 0b1, 0b01, 0b0>;
-def MVE_VMLA_qr_u32 : MVE_VFMAMLA_qr<"vmla", "u32", 0b1, 0b10, 0b0>;
+multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
+ bit scalar_addend> {
+ def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
+ scalar_addend>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # iname # "_n_predicated");
+ defvar v1 = (VTI.Vec MQPR:$v1);
+ defvar v2 = (VTI.Vec MQPR:$v2);
+ defvar vs = (VTI.Vec (ARMvdup rGPR:$s));
+ defvar s = (i32 rGPR:$s);
+ defvar pred = (VTI.Pred VCCR:$pred);
+
+ // The signed and unsigned variants of this instruction have different
+ // encodings, but they're functionally identical. For the sake of
+ // determinism, we generate only the unsigned variant.
+ if VTI.Unsigned then let Predicates = [HasMVEInt] in {
+ if scalar_addend then {
+ def : Pat<(VTI.Vec (add (mul v1, v2), vs)),
+ (VTI.Vec (Inst v1, v2, s))>;
+ } else {
+ def : Pat<(VTI.Vec (add (mul v2, vs), v1)),
+ (VTI.Vec (Inst v1, v2, s))>;
+ }
-def MVE_VMLAS_qr_s8 : MVE_VFMAMLA_qr<"vmlas", "s8", 0b0, 0b00, 0b1>;
-def MVE_VMLAS_qr_s16 : MVE_VFMAMLA_qr<"vmlas", "s16", 0b0, 0b01, 0b1>;
-def MVE_VMLAS_qr_s32 : MVE_VFMAMLA_qr<"vmlas", "s32", 0b0, 0b10, 0b1>;
-def MVE_VMLAS_qr_u8 : MVE_VFMAMLA_qr<"vmlas", "u8", 0b1, 0b00, 0b1>;
-def MVE_VMLAS_qr_u16 : MVE_VFMAMLA_qr<"vmlas", "u16", 0b1, 0b01, 0b1>;
-def MVE_VMLAS_qr_u32 : MVE_VFMAMLA_qr<"vmlas", "u32", 0b1, 0b10, 0b1>;
+ def : Pat<(VTI.Vec (pred_int v1, v2, s, pred)),
+ (VTI.Vec (Inst v1, v2, s, ARMVCCThen, pred))>;
+ }
+}
-let Predicates = [HasMVEInt] in {
- def : Pat<(v4i32 (add (v4i32 MQPR:$src1),
- (v4i32 (mul (v4i32 MQPR:$src2),
- (v4i32 (ARMvdup (i32 rGPR:$x))))))),
- (v4i32 (MVE_VMLA_qr_u32 $src1, $src2, $x))>;
- def : Pat<(v8i16 (add (v8i16 MQPR:$src1),
- (v8i16 (mul (v8i16 MQPR:$src2),
- (v8i16 (ARMvdup (i32 rGPR:$x))))))),
- (v8i16 (MVE_VMLA_qr_u16 $src1, $src2, $x))>;
- def : Pat<(v16i8 (add (v16i8 MQPR:$src1),
- (v16i8 (mul (v16i8 MQPR:$src2),
- (v16i8 (ARMvdup (i32 rGPR:$x))))))),
- (v16i8 (MVE_VMLA_qr_u8 $src1, $src2, $x))>;
+defm MVE_VMLA_qr_s8 : MVE_VMLA_qr_multi<"vmla", MVE_v16s8, 0b0>;
+defm MVE_VMLA_qr_s16 : MVE_VMLA_qr_multi<"vmla", MVE_v8s16, 0b0>;
+defm MVE_VMLA_qr_s32 : MVE_VMLA_qr_multi<"vmla", MVE_v4s32, 0b0>;
+defm MVE_VMLA_qr_u8 : MVE_VMLA_qr_multi<"vmla", MVE_v16u8, 0b0>;
+defm MVE_VMLA_qr_u16 : MVE_VMLA_qr_multi<"vmla", MVE_v8u16, 0b0>;
+defm MVE_VMLA_qr_u32 : MVE_VMLA_qr_multi<"vmla", MVE_v4u32, 0b0>;
+
+defm MVE_VMLAS_qr_s8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16s8, 0b1>;
+defm MVE_VMLAS_qr_s16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8s16, 0b1>;
+defm MVE_VMLAS_qr_s32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4s32, 0b1>;
+defm MVE_VMLAS_qr_u8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16u8, 0b1>;
+defm MVE_VMLAS_qr_u16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8u16, 0b1>;
+defm MVE_VMLAS_qr_u32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4u32, 0b1>;
+
+multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
+ bit scalar_addend> {
+ def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, scalar_addend>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = int_arm_mve_fma_predicated;
+ defvar v1 = (VTI.Vec MQPR:$v1);
+ defvar v2 = (VTI.Vec MQPR:$v2);
+ defvar vs = (VTI.Vec (ARMvdup (i32 rGPR:$s)));
+ defvar is = (i32 rGPR:$s);
+ defvar pred = (VTI.Pred VCCR:$pred);
+
+ let Predicates = [HasMVEFloat] in {
+ if scalar_addend then {
+ def : Pat<(VTI.Vec (fma v1, v2, vs)),
+ (VTI.Vec (Inst v1, v2, is))>;
+ def : Pat<(VTI.Vec (pred_int v1, v2, vs, pred)),
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred))>;
+ } else {
+ def : Pat<(VTI.Vec (fma v1, vs, v2)),
+ (VTI.Vec (Inst v2, v1, is))>;
+ def : Pat<(VTI.Vec (fma vs, v1, v2)),
+ (VTI.Vec (Inst v2, v1, is))>;
+ def : Pat<(VTI.Vec (pred_int v1, vs, v2, pred)),
+ (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
+ def : Pat<(VTI.Vec (pred_int vs, v1, v2, pred)),
+ (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
+ }
+ }
}
let Predicates = [HasMVEFloat] in {
- def MVE_VFMA_qr_f16 : MVE_VFMAMLA_qr<"vfma", "f16", 0b1, 0b11, 0b0>;
- def MVE_VFMA_qr_f32 : MVE_VFMAMLA_qr<"vfma", "f32", 0b0, 0b11, 0b0>;
- def MVE_VFMA_qr_Sf16 : MVE_VFMAMLA_qr<"vfmas", "f16", 0b1, 0b11, 0b1>;
- def MVE_VFMA_qr_Sf32 : MVE_VFMAMLA_qr<"vfmas", "f32", 0b0, 0b11, 0b1>;
+ defm MVE_VFMA_qr_f16 : MVE_VFMA_qr_multi<"vfma", MVE_v8f16, 0>;
+ defm MVE_VFMA_qr_f32 : MVE_VFMA_qr_multi<"vfma", MVE_v4f32, 0>;
+ defm MVE_VFMA_qr_Sf16 : MVE_VFMA_qr_multi<"vfmas", MVE_v8f16, 1>;
+ defm MVE_VFMA_qr_Sf32 : MVE_VFMA_qr_multi<"vfmas", MVE_v4f32, 1>;
}
class MVE_VQDMLAH_qr<string iname, string suffix, bit U, bits<2> size,
@@ -4718,10 +5504,30 @@ class MVE_VQDMLAH_qr<string iname, string suffix, bit U, bits<2> size,
let Inst{5} = bit_5;
}
+multiclass MVE_VQDMLAH_qr_multi<string iname, MVEVectorVTInfo VTI,
+ bit bit_5, bit bit_12> {
+ def "": MVE_VQDMLAH_qr<iname, VTI.Suffix, 0b0, VTI.Size, bit_5, bit_12>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar unpred_int = !cast<Intrinsic>("int_arm_mve_" # iname);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # iname # "_predicated");
+
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
+ (i32 rGPR:$s))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
+ (i32 rGPR:$s)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
+ (i32 rGPR:$s), (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
+ (i32 rGPR:$s), ARMVCCThen,
+ (VTI.Pred VCCR:$pred)))>;
+ }
+}
+
multiclass MVE_VQDMLAH_qr_types<string iname, bit bit_5, bit bit_12> {
- def s8 : MVE_VQDMLAH_qr<iname, "s8", 0b0, 0b00, bit_5, bit_12>;
- def s16 : MVE_VQDMLAH_qr<iname, "s16", 0b0, 0b01, bit_5, bit_12>;
- def s32 : MVE_VQDMLAH_qr<iname, "s32", 0b0, 0b10, bit_5, bit_12>;
+ defm s8 : MVE_VQDMLAH_qr_multi<iname, MVE_v16s8, bit_5, bit_12>;
+ defm s16 : MVE_VQDMLAH_qr_multi<iname, MVE_v8s16, bit_5, bit_12>;
+ defm s32 : MVE_VQDMLAH_qr_multi<iname, MVE_v4s32, bit_5, bit_12>;
}
defm MVE_VQDMLAH_qr : MVE_VQDMLAH_qr_types<"vqdmlah", 0b1, 0b0>;
@@ -4752,6 +5558,7 @@ class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
let Inst{6-1} = 0b110111;
let Inst{0} = imm{0};
let validForTailPredication = 1;
+ let hasSideEffects = 0;
}
def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>;
@@ -4787,6 +5594,7 @@ class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
let Inst{3-1} = Rm{3-1};
let Inst{0} = imm{0};
let validForTailPredication = 1;
+ let hasSideEffects = 0;
}
def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>;
@@ -4855,6 +5663,8 @@ class MVE_VMOV_64bit<dag oops, dag iops, bit to_qreg, string ops, string cstr>
let Inst{12-5} = 0b01111000;
let Inst{4} = idx2;
let Inst{3-0} = Rt{3-0};
+
+ let hasSideEffects = 0;
}
// The assembly syntax for these instructions mentions the vector
@@ -4924,6 +5734,7 @@ class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
let mayLoad = load;
let mayStore = !eq(load,0);
+ let hasSideEffects = 0;
}
// A parameter class used to encapsulate all the ways the writeback
@@ -5004,22 +5815,44 @@ foreach wb = [MVE_vldst24_writeback<
"vst" # n.nvecs # stage # "." # s.lanesize>;
}
+def SDTARMVST2 : SDTypeProfile<1, 5, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVec<3>,
+ SDTCisSameAs<3, 4>, SDTCisVT<5, i32>]>;
+def SDTARMVST4 : SDTypeProfile<1, 7, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVec<3>,
+ SDTCisSameAs<3, 4>, SDTCisSameAs<3, 5>,
+ SDTCisSameAs<3, 6>, SDTCisVT<7, i32>]>;
+def MVEVST2UPD : SDNode<"ARMISD::VST2_UPD", SDTARMVST2, [SDNPHasChain]>;
+def MVEVST4UPD : SDNode<"ARMISD::VST4_UPD", SDTARMVST4, [SDNPHasChain]>;
+
multiclass MVE_vst24_patterns<int lanesize, ValueType VT> {
foreach stage = [0,1] in
def : Pat<(int_arm_mve_vst2q i32:$addr,
- (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)),
+ (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)),
(!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize)
- (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
- t2_addr_offset_none:$addr)>;
+ (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
+ t2_addr_offset_none:$addr)>;
+ foreach stage = [0,1] in
+ def : Pat<(i32 (MVEVST2UPD i32:$addr, (i32 32),
+ (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage))),
+ (i32 (!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize#_wb)
+ (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
+ t2_addr_offset_none:$addr))>;
foreach stage = [0,1,2,3] in
def : Pat<(int_arm_mve_vst4q i32:$addr,
- (VT MQPR:$v0), (VT MQPR:$v1),
- (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)),
+ (VT MQPR:$v0), (VT MQPR:$v1),
+ (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)),
(!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize)
- (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
- VT:$v2, qsub_2, VT:$v3, qsub_3),
- t2_addr_offset_none:$addr)>;
+ (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
+ VT:$v2, qsub_2, VT:$v3, qsub_3),
+ t2_addr_offset_none:$addr)>;
+ foreach stage = [0,1,2,3] in
+ def : Pat<(i32 (MVEVST4UPD i32:$addr, (i32 64),
+ (VT MQPR:$v0), (VT MQPR:$v1),
+ (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage))),
+ (i32 (!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize#_wb)
+ (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
+ VT:$v2, qsub_2, VT:$v3, qsub_3),
+ t2_addr_offset_none:$addr))>;
}
defm : MVE_vst24_patterns<8, v16i8>;
defm : MVE_vst24_patterns<16, v8i16>;
@@ -5097,6 +5930,7 @@ class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
let mayLoad = dir.load;
let mayStore = !eq(dir.load,0);
+ let hasSideEffects = 0;
let validForTailPredication = 1;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
index 6244d8d9e27e..1b3f6075c0e9 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -509,11 +509,6 @@ def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
-def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
- SDTCisVT<2, i32>]>;
-def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
-def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
-
def NEONvbsl : SDNode<"ARMISD::VBSL",
SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
@@ -531,11 +526,6 @@ def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
-def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
- SDTCisSameAs<1, 2>]>;
-def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
-def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
-
def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
SDTCisVT<2, v8i8>]>;
def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
@@ -1084,6 +1074,12 @@ def : Pat<(vector_insert (v4f16 DPR:$src),
def : Pat<(vector_insert (v8f16 QPR:$src),
(f16 (load addrmode6:$addr)), imm:$lane),
(VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v4bf16 DPR:$src),
+ (bf16 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v8bf16 QPR:$src),
+ (bf16 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
def : Pat<(vector_insert (v2f32 DPR:$src),
(f32 (load addrmode6:$addr)), imm:$lane),
(VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
@@ -2459,57 +2455,6 @@ def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
}
//===----------------------------------------------------------------------===//
-// NEON pattern fragments
-//===----------------------------------------------------------------------===//
-
-// Extract D sub-registers of Q registers.
-def DSubReg_i8_reg : SDNodeXForm<imm, [{
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N),
- MVT::i32);
-}]>;
-def DSubReg_i16_reg : SDNodeXForm<imm, [{
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N),
- MVT::i32);
-}]>;
-def DSubReg_i32_reg : SDNodeXForm<imm, [{
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N),
- MVT::i32);
-}]>;
-def DSubReg_f64_reg : SDNodeXForm<imm, [{
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N),
- MVT::i32);
-}]>;
-
-// Extract S sub-registers of Q/D registers.
-def SSubReg_f32_reg : SDNodeXForm<imm, [{
- assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
- return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N),
- MVT::i32);
-}]>;
-
-// Extract S sub-registers of Q/D registers containing a given f16 lane.
-def SSubReg_f16_reg : SDNodeXForm<imm, [{
- assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
- return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue()/2, SDLoc(N),
- MVT::i32);
-}]>;
-
-// Translate lane numbers from Q registers to D subregs.
-def SubReg_i8_lane : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
-}]>;
-def SubReg_i16_lane : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32);
-}]>;
-def SubReg_i32_lane : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32);
-}]>;
-
-//===----------------------------------------------------------------------===//
// Instruction Classes
//===----------------------------------------------------------------------===//
@@ -4367,7 +4312,7 @@ def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
(i32 0))>;
def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
(VMULslhd DPR:$Rn,
- (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
+ (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
(i32 0))>;
def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
(VMULslfq QPR:$Rn,
@@ -4375,7 +4320,7 @@ def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
(i32 0))>;
def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
(VMULslhq QPR:$Rn,
- (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
+ (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
(i32 0))>;
}
@@ -4433,17 +4378,17 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
DecoderNamespace = "NEONData" in {
defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "s", NEONvmulls, 1>;
+ "vmull", "s", ARMvmulls, 1>;
defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "u", NEONvmullu, 1>;
+ "vmull", "u", ARMvmullu, 1>;
def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
v8i16, v8i8, int_arm_neon_vmullp, 1>;
def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
"vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
Requires<[HasV8, HasCrypto]>;
}
-defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
-defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>;
// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
@@ -4513,12 +4458,12 @@ def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "s", NEONvmulls, add>;
+ "vmlal", "s", ARMvmulls, add>;
defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "u", NEONvmullu, add>;
+ "vmlal", "u", ARMvmullu, add>;
-defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
-defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>;
let Predicates = [HasNEON, HasV8_1a] in {
// v8.1a Neon Rounding Double Multiply-Op vector operations,
@@ -4746,12 +4691,12 @@ def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "s", NEONvmulls, sub>;
+ "vmlsl", "s", ARMvmulls, sub>;
defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "u", NEONvmullu, sub>;
+ "vmlsl", "u", ARMvmullu, sub>;
-defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
-defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -4833,10 +4778,10 @@ def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
// We put them in the VFPV8 decoder namespace because the ARM and Thumb
// encodings are the same and thus no further bit twiddling is necessary
// in the disassembler.
-class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy,
- ValueType AccumTy, ValueType InputTy,
+class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm,
+ string AsmTy, ValueType AccumTy, ValueType InputTy,
SDPatternOperator OpNode> :
- N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
+ N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
(ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
Asm, AsmTy,
[(set (AccumTy RegTy:$dst),
@@ -4848,10 +4793,10 @@ class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy,
let Constraints = "$dst = $Vd";
}
-def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>;
-def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>;
-def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
-def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
+def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>;
+def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>;
+def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
+def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
// Indexed dot product instructions:
multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
@@ -4886,6 +4831,68 @@ defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
+// v8.6A matrix multiplication extension
+let Predicates = [HasMatMulInt8] in {
+ class N3VMatMul<bit B, bit U, string Asm, string AsmTy,
+ SDPatternOperator OpNode>
+ : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst),
+ (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary,
+ Asm, AsmTy,
+ [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd),
+ (v16i8 QPR:$Vn),
+ (v16i8 QPR:$Vm)))]> {
+ let DecoderNamespace = "VFPV8";
+ let Constraints = "$dst = $Vd";
+ }
+
+ multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy,
+ ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode,
+ dag RHS> {
+
+ def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst),
+ (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm,
+ NoItinerary, Asm, AsmTy, []> {
+ bit lane;
+ let Inst{5} = lane;
+ let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane");
+ let DecoderNamespace = "VFPV8";
+ let Constraints = "$dst = $Vd";
+ }
+
+ def : Pat<
+ (AccumTy (OpNode (AccumTy RegTy:$Vd),
+ (InputTy RegTy:$Vn),
+ (InputTy (bitconvert (AccumTy
+ (ARMvduplane (AccumTy RegTy:$Vm),
+ VectorIndex32:$lane)))))),
+ (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
+
+ }
+
+ multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS>
+ : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> {
+ def : Pat<
+ (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd),
+ (InputTy (bitconvert (AccumTy
+ (ARMvduplane (AccumTy RegTy:$Vm),
+ VectorIndex32:$lane)))),
+ (InputTy RegTy:$Vn))),
+ (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
+ }
+
+ def VSMMLA : N3VMatMul<0, 0, "vsmmla", "s8", int_arm_neon_smmla>;
+ def VUMMLA : N3VMatMul<0, 1, "vummla", "u8", int_arm_neon_ummla>;
+ def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>;
+ def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8, int_arm_neon_usdot>;
+ def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>;
+
+ defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8,
+ int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>;
+ defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8,
+ int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
+ defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>;
+ defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
+}
// ARMv8.3 complex operations
class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
@@ -5232,7 +5239,6 @@ class VFMQ<string opc, string type, bits<2> S>
let Inst{3} = idx{0};
}
-let hasNoSchedulingInfo = 1 in {
// op1 op2 op3
def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
@@ -5242,7 +5248,6 @@ def VFMALDI : VFMD<"vfmal", "f16", 0b00>;
def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>;
def VFMALQI : VFMQ<"vfmal", "f16", 0b00>;
def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>;
-}
} // HasNEON, HasFP16FML
@@ -5296,7 +5301,7 @@ def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
IIC_VMOVImm,
"vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
- (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
@@ -5305,7 +5310,7 @@ def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
IIC_VMOVImm,
"vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
- (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
let Inst{10-9} = SIMM{10-9};
}
@@ -5314,7 +5319,7 @@ def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
IIC_VMOVImm,
"vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
- (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
@@ -5323,7 +5328,7 @@ def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
IIC_VMOVImm,
"vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
- (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
let Inst{10-9} = SIMM{10-9};
}
@@ -5347,7 +5352,7 @@ def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
IIC_VMOVImm,
"vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
- (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
@@ -5356,7 +5361,7 @@ def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
IIC_VMOVImm,
"vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
- (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
let Inst{10-9} = SIMM{10-9};
}
@@ -5365,7 +5370,7 @@ def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
IIC_VMOVImm,
"vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
- (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
@@ -5374,7 +5379,7 @@ def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
IIC_VMOVImm,
"vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
- (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
let Inst{10-9} = SIMM{10-9};
}
@@ -6354,32 +6359,57 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
(EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
}
-def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
-def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
-
-let Predicates = [HasNEON] in {
-def : Pat<(extractelt (v4f16 DPR:$src), imm_even:$lane),
- (EXTRACT_SUBREG
- (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
- (SSubReg_f16_reg imm_even:$lane))>;
+multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> {
+ def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
+ (SSubReg_f16_reg imm_even:$lane))>;
+ def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane),
+ (EXTRACT_SUBREG
+ (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
+ (SSubReg_f16_reg imm_even:$lane))>;
+}
-def : Pat<(extractelt (v4f16 DPR:$src), imm_odd:$lane),
+multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> {
+ def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane),
(COPY_TO_REGCLASS
(VMOVH (EXTRACT_SUBREG
- (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
- (SSubReg_f16_reg imm_odd:$lane))),
+ (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
+ (SSubReg_f16_reg imm_odd:$lane))),
HPR)>;
+ def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane),
+ (COPY_TO_REGCLASS
+ (VMOVH (EXTRACT_SUBREG
+ (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
+ (SSubReg_f16_reg imm_odd:$lane))),
+ HPR)>;
+}
+
+let Predicates = [HasNEON] in {
+ defm : ExtractEltEvenF16<v4f16, v8f16>;
+ defm : ExtractEltOddF16VMOVH<v4f16, v8f16>;
+}
+
+let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in {
+ // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes
+ defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>;
+}
-def : Pat<(extractelt (v8f16 QPR:$src), imm_even:$lane),
- (EXTRACT_SUBREG
- (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
- (SSubReg_f16_reg imm_even:$lane))>;
+let Predicates = [HasBF16, HasNEON] in {
+ defm : ExtractEltEvenF16<v4bf16, v8bf16>;
-def : Pat<(extractelt (v8f16 QPR:$src), imm_odd:$lane),
+ // Otherwise, if VMOVH is not available resort to extracting the odd lane
+ // into a GPR and then moving to HPR
+ def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane),
(COPY_TO_REGCLASS
- (VMOVH (EXTRACT_SUBREG
- (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
- (SSubReg_f16_reg imm_odd:$lane))),
+ (VGETLNu16 (v4bf16 DPR:$src), imm:$lane),
+ HPR)>;
+
+ def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane),
+ (COPY_TO_REGCLASS
+ (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)),
HPR)>;
}
@@ -6415,6 +6445,21 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
}
}
+// TODO: for odd lanes we could optimize this a bit by using the VINS
+// FullFP16 instruction when it is available
+multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> {
+ def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane),
+ (VT4 (VSETLNi16 DPR:$src1,
+ (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>;
+ def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane),
+ (VT8 (INSERT_SUBREG QPR:$src1,
+ (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i16_reg imm:$lane))),
+ (COPY_TO_REGCLASS HPR:$src2, GPR),
+ (SubReg_i16_lane imm:$lane))),
+ (DSubReg_i16_reg imm:$lane)))>;
+}
+
let Predicates = [HasNEON] in {
def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
(v16i8 (INSERT_SUBREG QPR:$src1,
@@ -6442,14 +6487,7 @@ def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
(INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
-def : Pat<(insertelt (v4f16 DPR:$src1), HPR:$src2, imm:$lane),
- (v4f16 (VSETLNi16 DPR:$src1, (VMOVRH $src2), imm:$lane))>;
-def : Pat<(insertelt (v8f16 QPR:$src1), HPR:$src2, imm:$lane),
- (v8f16 (INSERT_SUBREG QPR:$src1,
- (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
- (DSubReg_i16_reg imm:$lane))),
- (VMOVRH $src2), (SubReg_i16_lane imm:$lane))),
- (DSubReg_i16_reg imm:$lane)))>;
+defm : InsertEltF16<f16, v4f16, v8f16>;
//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
@@ -6484,6 +6522,9 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
dsub_0)>;
}
+let Predicates = [HasNEON, HasBF16] in
+defm : InsertEltF16<bf16, v4bf16, v8bf16>;
+
// VDUP : Vector Duplicate (from ARM core register to all elements)
class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
@@ -6588,18 +6629,35 @@ def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
-def : Pat<(v4f16 (ARMvdup HPR:$src)),
+def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))),
(v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
- HPR:$src, ssub_0), (i32 0)))>;
+ (f16 HPR:$src), ssub_0), (i32 0)))>;
def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))),
(v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$src, ssub_0), (i32 0)))>;
def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))),
(v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$src, ssub_0), (i32 0)))>;
-def : Pat<(v8f16 (ARMvdup HPR:$src)),
+def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))),
(v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
- HPR:$src, ssub_0), (i32 0)))>;
+ (f16 HPR:$src), ssub_0), (i32 0)))>;
+}
+
+let Predicates = [HasNEON, HasBF16] in {
+def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)),
+ (VDUPLN16d DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)),
+ (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))),
+ (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
+ (bf16 HPR:$src), ssub_0), (i32 0)))>;
+def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))),
+ (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
+ (bf16 HPR:$src), ssub_0), (i32 0)))>;
}
// VMOVN : Vector Narrowing Move
@@ -7330,7 +7388,7 @@ def : Pat<(arm_vmovsr GPR:$a),
Requires<[HasNEON, DontUseVMOVSR]>;
//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns or Endiness - Revert Patterns
+// Non-Instruction Patterns or Endianess - Revert Patterns
//===----------------------------------------------------------------------===//
// bit_convert
@@ -7345,6 +7403,9 @@ def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>;
def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16 DPR:$src)>;
+
// 128 bit conversions
def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
@@ -7354,6 +7415,9 @@ def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>;
def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16 QPR:$src)>;
}
let Predicates = [IsLE,HasNEON] in {
@@ -7361,24 +7425,28 @@ let Predicates = [IsLE,HasNEON] in {
def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (f64 DPR:$src)>;
def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
@@ -7388,6 +7456,12 @@ let Predicates = [IsLE,HasNEON] in {
def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>;
def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>;
+ def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (v4bf16 DPR:$src)>;
+ def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>;
+ def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>;
+ def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>;
+ def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (v4bf16 DPR:$src)>;
+
def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
@@ -7399,30 +7473,35 @@ let Predicates = [IsLE,HasNEON] in {
def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (v8i8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
// 128 bit conversions
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
@@ -7432,6 +7511,12 @@ let Predicates = [IsLE,HasNEON] in {
def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
+ def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>;
+ def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>;
+ def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>;
+ def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>;
+ def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>;
+
def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
@@ -7443,6 +7528,7 @@ let Predicates = [IsLE,HasNEON] in {
def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
}
@@ -7451,24 +7537,28 @@ let Predicates = [IsBE,HasNEON] in {
def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
@@ -7478,6 +7568,12 @@ let Predicates = [IsBE,HasNEON] in {
def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
+ def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
+
def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
@@ -7489,30 +7585,35 @@ let Predicates = [IsBE,HasNEON] in {
def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (VREV16d8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
// 128 bit conversions
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
@@ -7522,6 +7623,12 @@ let Predicates = [IsBE,HasNEON] in {
def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
+ def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
+
def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
@@ -7533,9 +7640,26 @@ let Predicates = [IsBE,HasNEON] in {
def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
}
+let Predicates = [HasNEON] in {
+ // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
+ // rather than the more general 'ARMVectorRegCast' which would also
+ // match some bitconverts. If we use the latter in cases where the
+ // input and output types are the same, the bitconvert gets elided
+ // and we end up generating a nonsense match of nothing.
+
+ foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
+ foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
+ def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>;
+
+ foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
+ foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
+ def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>;
+}
+
// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
let Predicates = [IsBE,HasNEON] in {
def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
@@ -7863,6 +7987,8 @@ def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
(REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
(REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
}
//===----------------------------------------------------------------------===//
@@ -8915,3 +9041,115 @@ def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
(VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
(VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
+
+// ARMv8.6a BFloat16 instructions.
+let Predicates = [HasBF16, HasNEON] in {
+class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6,
+ dag oops, dag iops, list<dag> pattern>
+ : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops,
+ N3RegFrm, IIC_VDOTPROD, "", "", pattern>
+{
+ let DecoderNamespace = "VFPV8";
+}
+
+class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy>
+ : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst),
+ (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
+ [(set (AccumTy RegTy:$dst),
+ (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
+ (InputTy RegTy:$Vn),
+ (InputTy RegTy:$Vm)))]> {
+ let Constraints = "$dst = $Vd";
+ let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
+ let DecoderNamespace = "VFPV8";
+}
+
+multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
+ ValueType InputTy, dag RHS> {
+
+ def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst),
+ (ins RegTy:$Vd, RegTy:$Vn,
+ DPR_VFP2:$Vm, VectorIndex32:$lane), []> {
+ bit lane;
+ let Inst{5} = lane;
+ let Constraints = "$dst = $Vd";
+ let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane");
+ let DecoderNamespace = "VFPV8";
+ }
+
+ def : Pat<
+ (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
+ (InputTy RegTy:$Vn),
+ (InputTy (bitconvert (AccumTy
+ (ARMvduplane (AccumTy RegTy:$Vm),
+ VectorIndex32:$lane)))))),
+ (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
+}
+
+def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v8i8>;
+def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v16i8>;
+
+defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v8i8, (v2f32 DPR_VFP2:$Vm)>;
+defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
+
+class BF16MM<bit Q, RegisterClass RegTy,
+ string opc>
+ : N3Vnp<0b11000, 0b00, 0b1100, Q, 0,
+ (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
+ N3RegFrm, IIC_VDOTPROD, "", "",
+ [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd),
+ (v16i8 QPR:$Vn),
+ (v16i8 QPR:$Vm)))]> {
+ let Constraints = "$dst = $Vd";
+ let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
+ let DecoderNamespace = "VFPV8";
+}
+
+def VMMLA : BF16MM<1, QPR, "vmmla">;
+
+class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode>
+ : N3VCP8<0b00, 0b11, T, 1,
+ (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm),
+ NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
+ [(set (v4f32 QPR:$dst),
+ (OpNode (v4f32 QPR:$Vd),
+ (v16i8 QPR:$Vn),
+ (v16i8 QPR:$Vm)))]> {
+ let Constraints = "$dst = $Vd";
+ let DecoderNamespace = "VFPV8";
+}
+
+def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>;
+def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>;
+
+multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> {
+ def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst),
+ (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
+ IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> {
+ bits<2> idx;
+ let Inst{5} = idx{1};
+ let Inst{3} = idx{0};
+ let Constraints = "$dst = $Vd";
+ let DecoderNamespace = "VFPV8";
+ }
+
+ def : Pat<
+ (v4f32 (OpNode (v4f32 QPR:$Vd),
+ (v16i8 QPR:$Vn),
+ (v16i8 (bitconvert (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm),
+ VectorIndex16:$lane)))))),
+ (!cast<Instruction>(NAME) QPR:$Vd,
+ QPR:$Vn,
+ (EXTRACT_SUBREG QPR:$Vm,
+ (DSubReg_i16_reg VectorIndex16:$lane)),
+ (SubReg_i16_lane VectorIndex16:$lane))>;
+}
+
+defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>;
+defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>;
+
+def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0,
+ (outs DPR:$Vd), (ins QPR:$Vm),
+ NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>;
+}
+// End of BFloat16 instructions
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
index 18bcbda44580..7fae32117243 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -14,6 +14,10 @@
// Thumb specific DAG Nodes.
//
+def ARMtsecall : SDNode<"ARMISD::tSECALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def imm_sr_XFORM: SDNodeXForm<imm, [{
unsigned Imm = N->getZExtValue();
return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32);
@@ -499,6 +503,10 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
[(ARMretflag)], (tBX LR, pred:$p)>, Sched<[WriteBr]>;
+ // alternative return for CMSE entry functions
+ def tBXNS_RET : tPseudoInst<(outs), (ins), 2, IIC_Br,
+ [(ARMseretflag)]>, Sched<[WriteBr]>;
+
// Alternative return instruction used by vararg functions.
def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
2, IIC_Br, [],
@@ -560,6 +568,10 @@ let isCall = 1,
let Unpredictable{1-0} = 0b11;
}
+ def tBLXNS_CALL : PseudoInst<(outs), (ins GPRnopc:$func), IIC_Br,
+ [(ARMtsecall GPRnopc:$func)]>,
+ Requires<[IsThumb, Has8MSecExt]>, Sched<[WriteBr]>;
+
// ARMv4T
def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func),
4, IIC_Br,
@@ -1513,7 +1525,7 @@ def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
// tromped upon when we get here from a longjmp(). We force everything out of
// registers except for our own input by listing the relevant registers in
// Defs. By doing so, we also cause the prologue/epilogue code to actively
-// preserve all of the callee-saved resgisters, which is exactly what we want.
+// preserve all of the callee-saved registers, which is exactly what we want.
// $val is a scratch register for our use.
let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ],
hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c5aae235f25d..7137e8ee66b8 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -270,7 +270,8 @@ def t2am_imm8_offset : MemOperand,
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
-class T2AddrMode_Imm8s4 : MemOperand {
+class T2AddrMode_Imm8s4 : MemOperand,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm8<2>", []> {
let EncoderMethod = "getT2AddrModeImm8s4OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8s4";
let ParserMatchClass = MemImm8s4OffsetAsmOperand;
@@ -1448,7 +1449,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
- IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>,
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "",
+ [(set rGPR:$Rt, rGPR:$Rt2, (ARMldrd t2addrmode_imm8s4:$addr))]>,
Sched<[WriteLd]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
@@ -1629,7 +1631,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
- IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>,
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "",
+ [(ARMstrd rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr)]>,
Sched<[WriteST]>;
// Indexed stores
@@ -1745,7 +1748,7 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
// ldrd / strd pre / post variants
-let mayLoad = 1 in
+let mayLoad = 1, hasSideEffects = 0 in
def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []>,
@@ -1753,13 +1756,13 @@ def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
let DecoderMethod = "DecodeT2LDRDPreInstruction";
}
-let mayLoad = 1 in
+let mayLoad = 1, hasSideEffects = 0 in
def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
"$addr.base = $wb", []>, Sched<[WriteLd]>;
-let mayStore = 1 in
+let mayStore = 1, hasSideEffects = 0 in
def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
@@ -1767,7 +1770,7 @@ def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
let DecoderMethod = "DecodeT2STRDPreInstruction";
}
-let mayStore = 1 in
+let mayStore = 1, hasSideEffects = 0 in
def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
t2am_imm8s4_offset:$imm),
@@ -1871,6 +1874,34 @@ defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>;
defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
+// PLD/PLDW/PLI aliases w/ the optional .w suffix
+def : t2InstAlias<"pld${p}.w\t$addr",
+ (t2PLDi12 t2addrmode_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"pld${p}.w\t$addr",
+ (t2PLDi8 t2addrmode_negimm8:$addr, pred:$p)>;
+def : t2InstAlias<"pld${p}.w\t$addr",
+ (t2PLDs t2addrmode_so_reg:$addr, pred:$p)>;
+
+def : InstAlias<"pldw${p}.w\t$addr",
+ (t2PLDWi12 t2addrmode_imm12:$addr, pred:$p), 0>,
+ Requires<[IsThumb2,HasV7,HasMP]>;
+def : InstAlias<"pldw${p}.w\t$addr",
+ (t2PLDWi8 t2addrmode_negimm8:$addr, pred:$p), 0>,
+ Requires<[IsThumb2,HasV7,HasMP]>;
+def : InstAlias<"pldw${p}.w\t$addr",
+ (t2PLDWs t2addrmode_so_reg:$addr, pred:$p), 0>,
+ Requires<[IsThumb2,HasV7,HasMP]>;
+
+def : InstAlias<"pli${p}.w\t$addr",
+ (t2PLIi12 t2addrmode_imm12:$addr, pred:$p), 0>,
+ Requires<[IsThumb2,HasV7]>;
+def : InstAlias<"pli${p}.w\t$addr",
+ (t2PLIi8 t2addrmode_negimm8:$addr, pred:$p), 0>,
+ Requires<[IsThumb2,HasV7]>;
+def : InstAlias<"pli${p}.w\t$addr",
+ (t2PLIs t2addrmode_so_reg:$addr, pred:$p), 0>,
+ Requires<[IsThumb2,HasV7]>;
+
// pci variant is very similar to i12, but supports negative offsets
// from the PC. Only PLD and PLI have pci variants (not PLDW)
class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr),
@@ -1893,6 +1924,24 @@ class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr),
def t2PLDpci : T2Iplpci<0, "pld">, Requires<[IsThumb2]>;
def t2PLIpci : T2Iplpci<1, "pli">, Requires<[IsThumb2,HasV7]>;
+def : t2InstAlias<"pld${p}.w $addr",
+ (t2PLDpci t2ldrlabel:$addr, pred:$p)>;
+def : InstAlias<"pli${p}.w $addr",
+ (t2PLIpci t2ldrlabel:$addr, pred:$p), 0>,
+ Requires<[IsThumb2,HasV7]>;
+
+// PLD/PLI with alternate literal form.
+def : t2InstAlias<"pld${p} $addr",
+ (t2PLDpci t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : InstAlias<"pli${p} $addr",
+ (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p), 0>,
+ Requires<[IsThumb2,HasV7]>;
+def : t2InstAlias<"pld${p}.w $addr",
+ (t2PLDpci t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : InstAlias<"pli${p}.w $addr",
+ (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p), 0>,
+ Requires<[IsThumb2,HasV7]>;
+
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
@@ -2436,7 +2485,7 @@ def : Thumb2DSPPat<(int_arm_qadd rGPR:$Rm, rGPR:$Rn),
(t2QADD rGPR:$Rm, rGPR:$Rn)>;
def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, rGPR:$Rn),
(t2QSUB rGPR:$Rm, rGPR:$Rn)>;
-def : Thumb2DSPPat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn),
+def : Thumb2DSPPat<(int_arm_qadd rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)),
(t2QDADD rGPR:$Rm, rGPR:$Rn)>;
def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)),
(t2QDSUB rGPR:$Rm, rGPR:$Rn)>;
@@ -2445,7 +2494,7 @@ def : Thumb2DSPPat<(saddsat rGPR:$Rm, rGPR:$Rn),
(t2QADD rGPR:$Rm, rGPR:$Rn)>;
def : Thumb2DSPPat<(ssubsat rGPR:$Rm, rGPR:$Rn),
(t2QSUB rGPR:$Rm, rGPR:$Rn)>;
-def : Thumb2DSPPat<(saddsat(saddsat rGPR:$Rm, rGPR:$Rm), rGPR:$Rn),
+def : Thumb2DSPPat<(saddsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)),
(t2QDADD rGPR:$Rm, rGPR:$Rn)>;
def : Thumb2DSPPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)),
(t2QDSUB rGPR:$Rm, rGPR:$Rn)>;
@@ -2716,6 +2765,8 @@ def t2SBFX: T2TwoRegBitFI<
let Inst{25} = 1;
let Inst{24-20} = 0b10100;
let Inst{15} = 0;
+
+ let hasSideEffects = 0;
}
def t2UBFX: T2TwoRegBitFI<
@@ -2725,6 +2776,8 @@ def t2UBFX: T2TwoRegBitFI<
let Inst{25} = 1;
let Inst{24-20} = 0b11100;
let Inst{15} = 0;
+
+ let hasSideEffects = 0;
}
// A8.8.247 UDF - Undefined (Encoding T2)
@@ -3708,7 +3761,7 @@ def : T2Pat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
// when we get here from a longjmp(). We force everything out of registers
// except for our own input by listing the relevant registers in Defs. By
// doing so, we also cause the prologue/epilogue code to actively preserve
-// all of the callee-saved resgisters, which is exactly what we want.
+// all of the callee-saved registers, which is exactly what we want.
// $val is a scratch register for our use.
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
@@ -4147,7 +4200,7 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
imm:$cp))]>,
Requires<[IsThumb2]>;
-// Pseudo isntruction that combines movs + predicated rsbmi
+// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
@@ -4848,9 +4901,15 @@ def : t2InstAlias<"tst${p} $Rn, $Rm",
(t2TSTrr rGPR:$Rn, rGPR:$Rm, pred:$p)>;
// Memory barriers
+def : InstAlias<"dmb${p}.w\t$opt", (t2DMB memb_opt:$opt, pred:$p), 0>, Requires<[HasDB]>;
def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>;
+def : InstAlias<"dmb${p}.w", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>;
+def : InstAlias<"dsb${p}.w\t$opt", (t2DSB memb_opt:$opt, pred:$p), 0>, Requires<[HasDB]>;
def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p), 0>, Requires<[HasDB]>;
+def : InstAlias<"dsb${p}.w", (t2DSB 0xf, pred:$p), 0>, Requires<[HasDB]>;
+def : InstAlias<"isb${p}.w\t$opt", (t2ISB memb_opt:$opt, pred:$p), 0>, Requires<[HasDB]>;
def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p), 0>, Requires<[HasDB]>;
+def : InstAlias<"isb${p}.w", (t2ISB 0xf, pred:$p), 0>, Requires<[HasDB]>;
// Non-predicable aliases of a predicable DSB: the predicate is (14, 0) where
// 14 = AL (always execute) and 0 = "instruction doesn't read the CPSR".
@@ -5184,14 +5243,6 @@ def : t2InstAlias<"ldr${p}.w $Rt, $immediate",
(t2LDRConstPool GPRnopc:$Rt,
const_pool_asm_imm:$immediate, pred:$p)>;
-// PLD/PLDW/PLI with alternate literal form.
-def : t2InstAlias<"pld${p} $addr",
- (t2PLDpci t2ldr_pcrel_imm12:$addr, pred:$p)>;
-def : InstAlias<"pli${p} $addr",
- (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p), 0>,
- Requires<[IsThumb2,HasV7]>;
-
-
//===----------------------------------------------------------------------===//
// ARMv8.1m instructions
//
@@ -5204,7 +5255,7 @@ class V8_1MI<dag oops, dag iops, AddrMode am, InstrItinClass itin, string asm,
def t2CLRM : V8_1MI<(outs),
(ins pred:$p, reglist_with_apsr:$regs, variable_ops),
- AddrModeNone, NoItinerary, "clrm", "${p}\t$regs", "", []> {
+ AddrModeNone, NoItinerary, "clrm${p}", "$regs", "", []> {
bits<16> regs;
let Inst{31-16} = 0b1110100010011111;
@@ -5357,6 +5408,7 @@ def t2DoLoopStart :
t2PseudoInst<(outs), (ins rGPR:$elts), 4, IIC_Br,
[(int_set_loop_iterations rGPR:$elts)]>, Sched<[WriteBr]>;
+let hasSideEffects = 0 in
def t2LoopDec :
t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
4, IIC_Br, []>, Sched<[WriteBr]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
index f1d1d8a89164..8a652c1d90f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -158,11 +158,24 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
let isUnpredicable = 1 in
def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
- [(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
+ [(set HPR:$Sd, (f16 (alignedload16 addrmode5fp16:$addr)))]>,
Requires<[HasFPRegs16]>;
} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
+def : Pat<(bf16 (alignedload16 addrmode5fp16:$addr)),
+ (VLDRH addrmode5fp16:$addr)> {
+ let Predicates = [HasFPRegs16];
+}
+def : Pat<(bf16 (alignedload16 addrmode3:$addr)),
+ (COPY_TO_REGCLASS (LDRH addrmode3:$addr), HPR)> {
+ let Predicates = [HasNoFPRegs16, IsARM];
+}
+def : Pat<(bf16 (alignedload16 t2addrmode_imm12:$addr)),
+ (COPY_TO_REGCLASS (t2LDRHi12 t2addrmode_imm12:$addr), HPR)> {
+ let Predicates = [HasNoFPRegs16, IsThumb];
+}
+
def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
IIC_fpStore64, "vstr", "\t$Dd, $addr",
[(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>,
@@ -180,9 +193,22 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
let isUnpredicable = 1 in
def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
- [(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
+ [(alignedstore16 (f16 HPR:$Sd), addrmode5fp16:$addr)]>,
Requires<[HasFPRegs16]>;
+def : Pat<(alignedstore16 (bf16 HPR:$Sd), addrmode5fp16:$addr),
+ (VSTRH (bf16 HPR:$Sd), addrmode5fp16:$addr)> {
+ let Predicates = [HasFPRegs16];
+}
+def : Pat<(alignedstore16 (bf16 HPR:$Sd), addrmode3:$addr),
+ (STRH (COPY_TO_REGCLASS $Sd, GPR), addrmode3:$addr)> {
+ let Predicates = [HasNoFPRegs16, IsARM];
+}
+def : Pat<(alignedstore16 (bf16 HPR:$Sd), t2addrmode_imm12:$addr),
+ (t2STRHi12 (COPY_TO_REGCLASS $Sd, GPR), t2addrmode_imm12:$addr)> {
+ let Predicates = [HasNoFPRegs16, IsThumb];
+}
+
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
@@ -277,7 +303,6 @@ def : MnemonicAlias<"vstm", "vstmia">;
//===----------------------------------------------------------------------===//
// Lazy load / store multiple Instructions
//
-let mayLoad = 1 in
def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
NoItinerary, "vlldm${p}\t$Rn", "", []>,
Requires<[HasV8MMainline, Has8MSecExt]> {
@@ -288,9 +313,9 @@ def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
let Inst{15-12} = 0;
let Inst{7-0} = 0;
let mayLoad = 1;
+ let Defs = [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, VPR, FPSCR, FPSCR_NZCV];
}
-let mayStore = 1 in
def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
NoItinerary, "vlstm${p}\t$Rn", "", []>,
Requires<[HasV8MMainline, Has8MSecExt]> {
@@ -387,7 +412,7 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VADDH : AHbI<0b11100, 0b11, 0, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fadd HPR:$Sn, HPR:$Sm))]>,
+ [(set (f16 HPR:$Sd), (fadd (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
@@ -412,7 +437,7 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VSUBH : AHbI<0b11100, 0b11, 1, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fsub HPR:$Sn, HPR:$Sm))]>,
+ [(set (f16 HPR:$Sd), (fsub (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
@@ -433,7 +458,7 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VDIVH : AHbI<0b11101, 0b00, 0, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fdiv HPR:$Sn, HPR:$Sm))]>,
+ [(set (f16 HPR:$Sd), (fdiv (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
Sched<[WriteFPDIV32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
@@ -458,7 +483,7 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VMULH : AHbI<0b11100, 0b10, 0, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fmul HPR:$Sn, HPR:$Sm))]>,
+ [(set (f16 HPR:$Sd), (fmul (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
@@ -480,7 +505,7 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
def VNMULH : AHbI<0b11100, 0b10, 1, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fneg (fmul HPR:$Sn, HPR:$Sm)))]>,
+ [(set (f16 HPR:$Sd), (fneg (fmul (f16 HPR:$Sn), (f16 HPR:$Sm))))]>,
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
multiclass vsel_inst<string op, bits<2> opc, int CC> {
@@ -489,7 +514,7 @@ multiclass vsel_inst<string op, bits<2> opc, int CC> {
def H : AHbInp<0b11100, opc, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"),
- [(set HPR:$Sd, (ARMcmov HPR:$Sm, HPR:$Sn, CC))]>,
+ [(set (f16 HPR:$Sd), (ARMcmov (f16 HPR:$Sm), (f16 HPR:$Sn), CC))]>,
Requires<[HasFullFP16]>;
def S : ASbInp<0b11100, opc, 0,
@@ -518,7 +543,7 @@ multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
def H : AHbInp<0b11101, 0b00, opc,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
NoItinerary, !strconcat(op, ".f16\t$Sd, $Sn, $Sm"),
- [(set HPR:$Sd, (SD HPR:$Sn, HPR:$Sm))]>,
+ [(set (f16 HPR:$Sd), (SD (f16 HPR:$Sn), (f16 HPR:$Sm)))]>,
Requires<[HasFullFP16]>;
def S : ASbInp<0b11101, 0b00, opc,
@@ -564,7 +589,7 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins HPR:$Sd, HPR:$Sm),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
- [(arm_cmpfpe HPR:$Sd, HPR:$Sm)]>;
+ [(arm_cmpfpe (f16 HPR:$Sd), (f16 HPR:$Sm))]>;
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
@@ -583,7 +608,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins HPR:$Sd, HPR:$Sm),
IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm",
- [(arm_cmpfp HPR:$Sd, HPR:$Sm)]>;
+ [(arm_cmpfp (f16 HPR:$Sd), (f16 HPR:$Sm))]>;
} // Defs = [FPSCR_NZCV]
//===----------------------------------------------------------------------===//
@@ -607,7 +632,7 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0,
(outs HPR:$Sd), (ins HPR:$Sm),
IIC_fpUNA16, "vabs", ".f16\t$Sd, $Sm",
- [(set HPR:$Sd, (fabs (f16 HPR:$Sm)))]>;
+ [(set (f16 HPR:$Sd), (fabs (f16 HPR:$Sm)))]>;
let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
@@ -633,7 +658,7 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins HPR:$Sd),
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0",
- [(arm_cmpfpe0 HPR:$Sd)]> {
+ [(arm_cmpfpe0 (f16 HPR:$Sd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -661,7 +686,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins HPR:$Sd),
IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0",
- [(arm_cmpfp0 HPR:$Sd)]> {
+ [(arm_cmpfp0 (f16 HPR:$Sd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -683,6 +708,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
let Inst{22} = Dd{4};
let Predicates = [HasVFP2, HasDPVFP];
+ let hasSideEffects = 0;
}
// Special case encoding: bits 11-8 is 0b1011.
@@ -707,20 +733,23 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
let Inst{4} = 0;
let Predicates = [HasVFP2, HasDPVFP];
+ let hasSideEffects = 0;
}
// Between half, single and double-precision.
+let hasSideEffects = 0 in
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
-def : FP16Pat<(f32 (fpextend HPR:$Sm)),
- (VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
+def : FP16Pat<(f32 (fpextend (f16 HPR:$Sm))),
+ (VCVTBHS (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>;
def : FP16Pat<(f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+let hasSideEffects = 0 in
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* Intentionally left blank, see patterns below */]>,
@@ -731,19 +760,41 @@ def : FP16Pat<(f16 (fpround SPR:$Sm)),
(COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>;
def : FP16Pat<(fp_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane),
+ (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTBSH SPR:$src2),
+ (SSubReg_f16_reg imm:$lane)))>;
+def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane),
+ (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTBSH SPR:$src2),
+ (SSubReg_f16_reg imm:$lane)))>;
+let hasSideEffects = 0 in
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>,
+ [/* Intentionally left blank, see patterns below */]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
+def : FP16Pat<(f32 (fpextend (extractelt (v8f16 MQPR:$src), imm_odd:$lane))),
+ (VCVTTHS (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane)))>;
+def : FP16Pat<(f32 (fpextend (extractelt (v4f16 DPR:$src), imm_odd:$lane))),
+ (VCVTTHS (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
+ (SSubReg_f16_reg imm_odd:$lane)))>;
+
+let hasSideEffects = 0 in
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>,
+ [/* Intentionally left blank, see patterns below */]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
+def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane),
+ (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTTSH SPR:$src2),
+ (SSubReg_f16_reg imm:$lane)))>;
+def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane),
+ (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTTSH SPR:$src2),
+ (SSubReg_f16_reg imm:$lane)))>;
+
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
@@ -756,10 +807,12 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
// Encode instruction operands.
let Inst{3-0} = Sm{4-1};
let Inst{5} = Sm{0};
+
+ let hasSideEffects = 0;
}
-def : FullFP16Pat<(f64 (fpextend HPR:$Sm)),
- (VCVTBHD (COPY_TO_REGCLASS HPR:$Sm, SPR))>,
+def : FullFP16Pat<(f64 (fpextend (f16 HPR:$Sm))),
+ (VCVTBHD (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
(VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>,
@@ -779,6 +832,8 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
let Inst{5} = Dm{4};
let Inst{15-12} = Sd{4-1};
let Inst{22} = Sd{0};
+
+ let hasSideEffects = 0;
}
def : FullFP16Pat<(f16 (fpround DPR:$Dm)),
@@ -798,6 +853,8 @@ def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
// Encode instruction operands.
let Inst{3-0} = Sm{4-1};
let Inst{5} = Sm{0};
+
+ let hasSideEffects = 0;
}
def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
@@ -813,11 +870,13 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
let Inst{22} = Sd{0};
let Inst{3-0} = Dm{3-0};
let Inst{5} = Dm{4};
+
+ let hasSideEffects = 0;
}
multiclass vcvt_inst<string opc, bits<2> rm,
SDPatternOperator node = null_frag> {
- let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+ let PostEncoderMethod = "", DecoderNamespace = "VFPV8", hasSideEffects = 0 in {
def SH : AHuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins HPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f16\t$Sd, $Sm"),
@@ -883,14 +942,14 @@ multiclass vcvt_inst<string opc, bits<2> rm,
let Predicates = [HasFPARMv8] in {
let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (fp_to_sint (node HPR:$a))),
+ def : Pat<(i32 (fp_to_sint (node (f16 HPR:$a)))),
(COPY_TO_REGCLASS
- (!cast<Instruction>(NAME#"SH") HPR:$a),
+ (!cast<Instruction>(NAME#"SH") (f16 HPR:$a)),
GPR)>;
- def : Pat<(i32 (fp_to_uint (node HPR:$a))),
+ def : Pat<(i32 (fp_to_uint (node (f16 HPR:$a)))),
(COPY_TO_REGCLASS
- (!cast<Instruction>(NAME#"UH") HPR:$a),
+ (!cast<Instruction>(NAME#"UH") (f16 HPR:$a)),
GPR)>;
}
def : Pat<(i32 (fp_to_sint (node SPR:$a))),
@@ -936,7 +995,7 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
def VNEGH : AHuI<0b11101, 0b11, 0b0001, 0b01, 0,
(outs HPR:$Sd), (ins HPR:$Sm),
IIC_fpUNA16, "vneg", ".f16\t$Sd, $Sm",
- [(set HPR:$Sd, (fneg HPR:$Sm))]>;
+ [(set (f16 HPR:$Sd), (fneg (f16 HPR:$Sm)))]>;
multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0,
@@ -1035,7 +1094,7 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs HPR:$Sd), (ins HPR:$Sm),
IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm",
- [(set HPR:$Sd, (fsqrt (f16 HPR:$Sm)))]>;
+ [(set (f16 HPR:$Sd), (fsqrt (f16 HPR:$Sm)))]>;
let hasSideEffects = 0 in {
let isMoveReg = 1 in {
@@ -1250,7 +1309,7 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
def VMOVRH : AVConv2I<0b11100001, 0b1001,
(outs rGPR:$Rt), (ins HPR:$Sn),
IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
- [(set rGPR:$Rt, (arm_vmovrh HPR:$Sn))]>,
+ []>,
Requires<[HasFPRegs16]>,
Sched<[WriteFPMOV]> {
// Instruction operands.
@@ -1272,7 +1331,7 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001,
def VMOVHR : AVConv4I<0b11100000, 0b1001,
(outs HPR:$Sn), (ins rGPR:$Rt),
IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
- [(set HPR:$Sn, (arm_vmovhr rGPR:$Rt))]>,
+ []>,
Requires<[HasFPRegs16]>,
Sched<[WriteFPMOV]> {
// Instruction operands.
@@ -1290,6 +1349,11 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001,
let isUnpredicable = 1;
}
+def : FPRegs16Pat<(arm_vmovrh (f16 HPR:$Sn)), (VMOVRH (f16 HPR:$Sn))>;
+def : FPRegs16Pat<(arm_vmovrh (bf16 HPR:$Sn)), (VMOVRH (bf16 HPR:$Sn))>;
+def : FPRegs16Pat<(f16 (arm_vmovhr rGPR:$Rt)), (VMOVHR rGPR:$Rt)>;
+def : FPRegs16Pat<(bf16 (arm_vmovhr rGPR:$Rt)), (VMOVHR rGPR:$Rt)>;
+
// FMRDH: SPR -> GPR
// FMRDL: SPR -> GPR
// FMRRS: SPR -> GPR
@@ -1317,6 +1381,7 @@ class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{22} = Dd{4};
let Predicates = [HasVFP2, HasDPVFP];
+ let hasSideEffects = 0;
}
class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -1333,6 +1398,8 @@ class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{5} = Sm{0};
let Inst{15-12} = Sd{4-1};
let Inst{22} = Sd{0};
+
+ let hasSideEffects = 0;
}
class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -1352,6 +1419,7 @@ class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{22} = Sd{0};
let Predicates = [HasFullFP16];
+ let hasSideEffects = 0;
}
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
@@ -1465,6 +1533,7 @@ class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{22} = Sd{0};
let Predicates = [HasVFP2, HasDPVFP];
+ let hasSideEffects = 0;
}
class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -1501,6 +1570,7 @@ class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{22} = Sd{0};
let Predicates = [HasFullFP16];
+ let hasSideEffects = 0;
}
// Always set Z bit in the instruction, i.e. "round towards zero" variants.
@@ -1548,8 +1618,8 @@ def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
let isUnpredicable = 1;
}
-def : VFPNoNEONPat<(i32 (fp_to_sint HPR:$a)),
- (COPY_TO_REGCLASS (VTOSIZH HPR:$a), GPR)>;
+def : VFPNoNEONPat<(i32 (fp_to_sint (f16 HPR:$a))),
+ (COPY_TO_REGCLASS (VTOSIZH (f16 HPR:$a)), GPR)>;
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
@@ -1595,8 +1665,8 @@ def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
let isUnpredicable = 1;
}
-def : VFPNoNEONPat<(i32 (fp_to_uint HPR:$a)),
- (COPY_TO_REGCLASS (VTOUIZH HPR:$a), GPR)>;
+def : VFPNoNEONPat<(i32 (fp_to_uint (f16 HPR:$a))),
+ (COPY_TO_REGCLASS (VTOUIZH (f16 HPR:$a)), GPR)>;
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
let Uses = [FPSCR] in {
@@ -1680,6 +1750,8 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{0};
let Inst{15-12} = dst{4-1};
+
+ let hasSideEffects = 0;
}
// Double Precision register
@@ -1692,6 +1764,7 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
let Inst{22} = dst{4};
let Inst{15-12} = dst{3-0};
+ let hasSideEffects = 0;
let Predicates = [HasVFP2, HasDPVFP];
}
@@ -1867,6 +1940,37 @@ def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
} // End of 'let Constraints = "$a = $dst" in'
+// BFloat16 - Single precision, unary, predicated
+class BF16_VCVT<string opc, bits<2> op7_6>
+ : VFPAI<(outs SPR:$Sd), (ins SPR:$dst, SPR:$Sm),
+ VFPUnaryFrm, NoItinerary,
+ opc, ".bf16.f32\t$Sd, $Sm", []>,
+ RegConstraint<"$dst = $Sd">,
+ Requires<[HasBF16]>,
+ Sched<[]> {
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
+ let Inst{27-23} = 0b11101; // opcode1
+ let Inst{21-20} = 0b11; // opcode2
+ let Inst{19-16} = 0b0011; // opcode3
+ let Inst{11-8} = 0b1001;
+ let Inst{7-6} = op7_6;
+ let Inst{4} = 0;
+
+ let DecoderNamespace = "VFPV8";
+ let hasSideEffects = 0;
+}
+
+def BF16_VCVTB : BF16_VCVT<"vcvtb", 0b01>;
+def BF16_VCVTT : BF16_VCVT<"vcvtt", 0b11>;
+
//===----------------------------------------------------------------------===//
// FP Multiply-Accumulate Operations.
//
@@ -1896,8 +2000,8 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
def VMLAH : AHbI<0b11100, 0b00, 0, 0,
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
IIC_fpMAC16, "vmla", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fadd_mlx (fmul_su HPR:$Sn, HPR:$Sm),
- HPR:$Sdin))]>,
+ [(set (f16 HPR:$Sd), (fadd_mlx (fmul_su (f16 HPR:$Sn), (f16 HPR:$Sm)),
+ (f16 HPR:$Sdin)))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasFullFP16,UseFPVMLx]>;
@@ -1907,8 +2011,8 @@ def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
-def : Pat<(fadd_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
- (VMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
+def : Pat<(fadd_mlx HPR:$dstin, (fmul_su (f16 HPR:$a), HPR:$b)),
+ (VMLAH HPR:$dstin, (f16 HPR:$a), HPR:$b)>,
Requires<[HasFullFP16,DontUseNEONForFP, UseFPVMLx]>;
@@ -1937,8 +2041,8 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
def VMLSH : AHbI<0b11100, 0b00, 1, 0,
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
IIC_fpMAC16, "vmls", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fadd_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
- HPR:$Sdin))]>,
+ [(set (f16 HPR:$Sd), (fadd_mlx (fneg (fmul_su (f16 HPR:$Sn), (f16 HPR:$Sm))),
+ (f16 HPR:$Sdin)))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasFullFP16,UseFPVMLx]>;
@@ -1948,8 +2052,8 @@ def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
-def : Pat<(fsub_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
- (VMLSH HPR:$dstin, HPR:$a, HPR:$b)>,
+def : Pat<(fsub_mlx HPR:$dstin, (fmul_su (f16 HPR:$a), HPR:$b)),
+ (VMLSH HPR:$dstin, (f16 HPR:$a), HPR:$b)>,
Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
@@ -1977,8 +2081,8 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
def VNMLAH : AHbI<0b11100, 0b01, 1, 0,
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
IIC_fpMAC16, "vnmla", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fsub_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
- HPR:$Sdin))]>,
+ [(set (f16 HPR:$Sd), (fsub_mlx (fneg (fmul_su (f16 HPR:$Sn), (f16 HPR:$Sm))),
+ (f16 HPR:$Sdin)))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasFullFP16,UseFPVMLx]>;
@@ -1989,8 +2093,8 @@ def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
-def : Pat<(fsub_mlx (fneg (fmul_su HPR:$a, HPR:$b)), HPR:$dstin),
- (VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
+def : Pat<(fsub_mlx (fneg (fmul_su (f16 HPR:$a), HPR:$b)), HPR:$dstin),
+ (VNMLAH HPR:$dstin, (f16 HPR:$a), HPR:$b)>,
Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
// (-dst - (a * b)) -> -(dst + (a * b))
@@ -2000,8 +2104,8 @@ def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))),
def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
-def : Pat<(fsub_mlx (fneg HPR:$dstin), (fmul_su HPR:$a, HPR:$b)),
- (VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
+def : Pat<(fsub_mlx (fneg HPR:$dstin), (fmul_su (f16 HPR:$a), HPR:$b)),
+ (VNMLAH HPR:$dstin, (f16 HPR:$a), HPR:$b)>,
Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
@@ -2028,7 +2132,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
def VNMLSH : AHbI<0b11100, 0b01, 0, 0,
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
IIC_fpMAC16, "vnmls", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fsub_mlx (fmul_su HPR:$Sn, HPR:$Sm), HPR:$Sdin))]>,
+ [(set (f16 HPR:$Sd), (fsub_mlx (fmul_su (f16 HPR:$Sn), (f16 HPR:$Sm)), (f16 HPR:$Sdin)))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasFullFP16,UseFPVMLx]>;
@@ -2038,8 +2142,8 @@ def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
-def : Pat<(fsub_mlx (fmul_su HPR:$a, HPR:$b), HPR:$dstin),
- (VNMLSH HPR:$dstin, HPR:$a, HPR:$b)>,
+def : Pat<(fsub_mlx (fmul_su (f16 HPR:$a), HPR:$b), HPR:$dstin),
+ (VNMLSH HPR:$dstin, (f16 HPR:$a), HPR:$b)>,
Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
//===----------------------------------------------------------------------===//
@@ -2069,8 +2173,8 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
def VFMAH : AHbI<0b11101, 0b10, 0, 0,
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fadd_mlx (fmul_su HPR:$Sn, HPR:$Sm),
- HPR:$Sdin))]>,
+ [(set (f16 HPR:$Sd), (fadd_mlx (fmul_su (f16 HPR:$Sn), (f16 HPR:$Sm)),
+ (f16 HPR:$Sdin)))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasFullFP16,UseFusedMAC]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
@@ -2081,8 +2185,8 @@ def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
-def : Pat<(fadd_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
- (VFMAH HPR:$dstin, HPR:$a, HPR:$b)>,
+def : Pat<(fadd_mlx HPR:$dstin, (fmul_su (f16 HPR:$a), HPR:$b)),
+ (VFMAH HPR:$dstin, (f16 HPR:$a), HPR:$b)>,
Requires<[HasFullFP16,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
@@ -2093,8 +2197,8 @@ def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, HPR:$Sdin)),
- (VFMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, (f16 HPR:$Sdin))),
+ (VFMAH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
def VFMSD : ADbI<0b11101, 0b10, 1, 0,
@@ -2121,8 +2225,8 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
def VFMSH : AHbI<0b11101, 0b10, 1, 0,
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fadd_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
- HPR:$Sdin))]>,
+ [(set (f16 HPR:$Sd), (fadd_mlx (fneg (fmul_su (f16 HPR:$Sn), (f16 HPR:$Sm))),
+ (f16 HPR:$Sdin)))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasFullFP16,UseFusedMAC]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
@@ -2133,8 +2237,8 @@ def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
-def : Pat<(fsub_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
- (VFMSH HPR:$dstin, HPR:$a, HPR:$b)>,
+def : Pat<(fsub_mlx HPR:$dstin, (fmul_su (f16 HPR:$a), HPR:$b)),
+ (VFMSH HPR:$dstin, (f16 HPR:$a), HPR:$b)>,
Requires<[HasFullFP16,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
@@ -2145,8 +2249,8 @@ def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin)),
- (VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+def : Pat<(f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin))),
+ (VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
// (fma x, (fneg y), z) -> (vfms z, x, y)
def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
@@ -2155,8 +2259,8 @@ def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin)),
- (VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+def : Pat<(f16 (fma (f16 HPR:$Sn), (fneg (f16 HPR:$Sm)), (f16 HPR:$Sdin))),
+ (VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
@@ -2183,8 +2287,8 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
def VFNMAH : AHbI<0b11101, 0b01, 1, 0,
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fsub_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
- HPR:$Sdin))]>,
+ [(set (f16 HPR:$Sd), (fsub_mlx (fneg (fmul_su (f16 HPR:$Sn), (f16 HPR:$Sm))),
+ (f16 HPR:$Sdin)))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasFullFP16,UseFusedMAC]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
@@ -2204,8 +2308,8 @@ def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(fneg (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (f16 HPR:$Sdin))),
- (VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+def : Pat<(fneg (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (f16 (f16 HPR:$Sdin)))),
+ (VFNMAH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
@@ -2214,8 +2318,8 @@ def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, (fneg HPR:$Sdin))),
- (VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+def : Pat<(f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (fneg (f16 HPR:$Sdin)))),
+ (VFNMAH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
@@ -2241,7 +2345,7 @@ def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
def VFNMSH : AHbI<0b11101, 0b01, 0, 0,
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm",
- [(set HPR:$Sd, (fsub_mlx (fmul_su HPR:$Sn, HPR:$Sm), HPR:$Sdin))]>,
+ [(set (f16 HPR:$Sd), (fsub_mlx (fmul_su (f16 HPR:$Sn), (f16 HPR:$Sm)), (f16 HPR:$Sdin)))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasFullFP16,UseFusedMAC]>,
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
@@ -2262,8 +2366,8 @@ def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, (fneg HPR:$Sdin))),
- (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+def : Pat<(f16 (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (fneg (f16 HPR:$Sdin)))),
+ (VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
@@ -2272,8 +2376,8 @@ def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(fneg (f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin))),
- (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+def : Pat<(fneg (f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin)))),
+ (VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
@@ -2282,8 +2386,8 @@ def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
-def : Pat<(fneg (f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin))),
- (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+def : Pat<(fneg (f16 (fma (f16 HPR:$Sn), (fneg (f16 HPR:$Sm)), (f16 HPR:$Sdin)))),
+ (VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
//===----------------------------------------------------------------------===//
@@ -2306,7 +2410,7 @@ def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p),
IIC_fpUNA16,
[(set (f16 HPR:$Sd),
- (ARMcmov HPR:$Sn, HPR:$Sm, cmovpred:$p))]>,
+ (ARMcmov (f16 HPR:$Sn), (f16 HPR:$Sm), cmovpred:$p))]>,
RegConstraint<"$Sd = $Sn">, Requires<[HasFPRegs]>;
} // hasSideEffects
@@ -2512,7 +2616,7 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
VFPMiscFrm, IIC_fpUNA16,
"vmov", ".f16\t$Sd, $imm",
- [(set HPR:$Sd, vfp_f16imm:$imm)]>,
+ [(set (f16 HPR:$Sd), vfp_f16imm:$imm)]>,
Requires<[HasFullFP16]> {
bits<5> Sd;
bits<8> imm;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 67816bc2103f..c8a894fb11a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -239,17 +239,17 @@ static bool selectMergeValues(MachineInstrBuilder &MIB,
// We only support G_MERGE_VALUES as a way to stick together two scalar GPRs
// into one DPR.
- Register VReg0 = MIB->getOperand(0).getReg();
+ Register VReg0 = MIB.getReg(0);
(void)VReg0;
assert(MRI.getType(VReg0).getSizeInBits() == 64 &&
RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID &&
"Unsupported operand for G_MERGE_VALUES");
- Register VReg1 = MIB->getOperand(1).getReg();
+ Register VReg1 = MIB.getReg(1);
(void)VReg1;
assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
"Unsupported operand for G_MERGE_VALUES");
- Register VReg2 = MIB->getOperand(2).getReg();
+ Register VReg2 = MIB.getReg(2);
(void)VReg2;
assert(MRI.getType(VReg2).getSizeInBits() == 32 &&
RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID &&
@@ -271,17 +271,17 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB,
// We only support G_UNMERGE_VALUES as a way to break up one DPR into two
// GPRs.
- Register VReg0 = MIB->getOperand(0).getReg();
+ Register VReg0 = MIB.getReg(0);
(void)VReg0;
assert(MRI.getType(VReg0).getSizeInBits() == 32 &&
RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID &&
"Unsupported operand for G_UNMERGE_VALUES");
- Register VReg1 = MIB->getOperand(1).getReg();
+ Register VReg1 = MIB.getReg(1);
(void)VReg1;
assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
"Unsupported operand for G_UNMERGE_VALUES");
- Register VReg2 = MIB->getOperand(2).getReg();
+ Register VReg2 = MIB.getReg(2);
(void)VReg2;
assert(MRI.getType(VReg2).getSizeInBits() == 64 &&
RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID &&
@@ -530,7 +530,7 @@ bool ARMInstructionSelector::selectCmp(CmpConstants Helper,
MachineRegisterInfo &MRI) const {
const InsertInfo I(MIB);
- auto ResReg = MIB->getOperand(0).getReg();
+ auto ResReg = MIB.getReg(0);
if (!validReg(MRI, ResReg, 1, ARM::GPRRegBankID))
return false;
@@ -542,8 +542,8 @@ bool ARMInstructionSelector::selectCmp(CmpConstants Helper,
return true;
}
- auto LHSReg = MIB->getOperand(2).getReg();
- auto RHSReg = MIB->getOperand(3).getReg();
+ auto LHSReg = MIB.getReg(2);
+ auto RHSReg = MIB.getReg(3);
if (!validOpRegPair(MRI, LHSReg, RHSReg, Helper.OperandSize,
Helper.OperandRegBankID))
return false;
@@ -627,7 +627,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
bool UseMovt = STI.useMovt();
unsigned Size = TM.getPointerSize(0);
- unsigned Alignment = 4;
+ const Align Alignment(4);
auto addOpsForConstantPoolLoad = [&MF, Alignment,
Size](MachineInstrBuilder &MIB,
@@ -687,7 +687,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
if (Indirect) {
if (!UseOpcodeThatLoads) {
- auto ResultReg = MIB->getOperand(0).getReg();
+ auto ResultReg = MIB.getReg(0);
auto AddressReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
MIB->getOperand(0).setReg(AddressReg);
@@ -773,7 +773,7 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
auto &DbgLoc = MIB->getDebugLoc();
// Compare the condition to 1.
- auto CondReg = MIB->getOperand(1).getReg();
+ auto CondReg = MIB.getReg(1);
assert(validReg(MRI, CondReg, 1, ARM::GPRRegBankID) &&
"Unsupported types for select operation");
auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(Opcodes.TSTri))
@@ -785,9 +785,9 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
// Move a value into the result register based on the result of the
// comparison.
- auto ResReg = MIB->getOperand(0).getReg();
- auto TrueReg = MIB->getOperand(2).getReg();
- auto FalseReg = MIB->getOperand(3).getReg();
+ auto ResReg = MIB.getReg(0);
+ auto TrueReg = MIB.getReg(2);
+ auto FalseReg = MIB.getReg(3);
assert(validOpRegPair(MRI, ResReg, TrueReg, 32, ARM::GPRRegBankID) &&
validOpRegPair(MRI, TrueReg, FalseReg, 32, ARM::GPRRegBankID) &&
"Unsupported types for select operation");
@@ -990,7 +990,7 @@ bool ARMInstructionSelector::select(MachineInstr &I) {
case G_FCONSTANT: {
// Load from constant pool
unsigned Size = MRI.getType(I.getOperand(0).getReg()).getSizeInBits() / 8;
- unsigned Alignment = Size;
+ Align Alignment(Size);
assert((Size == 4 || Size == 8) && "Unsupported FP constant type");
auto LoadOpcode = Size == 4 ? ARM::VLDRS : ARM::VLDRD;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index e2dff51ea61c..f3657155f47e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -357,13 +357,12 @@ ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate,
llvm_unreachable("Unsupported size for FCmp predicate");
}
-bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder,
- GISelChangeObserver &Observer) const {
+bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
using namespace TargetOpcode;
- MIRBuilder.setInstr(MI);
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
switch (MI.getOpcode()) {
@@ -445,8 +444,7 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
} else {
// We need to compare against 0.
assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate");
- auto Zero = MRI.createGenericVirtualRegister(LLT::scalar(32));
- MIRBuilder.buildConstant(Zero, 0);
+ auto Zero = MIRBuilder.buildConstant(LLT::scalar(32), 0);
MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero);
}
Results.push_back(ProcessedResult);
@@ -462,7 +460,7 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
// Convert to integer constants, while preserving the binary representation.
auto AsInteger =
MI.getOperand(1).getFPImm()->getValueAPF().bitcastToAPInt();
- MIRBuilder.buildConstant(MI.getOperand(0).getReg(),
+ MIRBuilder.buildConstant(MI.getOperand(0),
*ConstantInt::get(Ctx, AsInteger));
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.h
index e95f8cf76103..f1c2e9c94336 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLegalizerInfo.h
@@ -28,9 +28,7 @@ class ARMLegalizerInfo : public LegalizerInfo {
public:
ARMLegalizerInfo(const ARMSubtarget &ST);
- bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder,
- GISelChangeObserver &Observer) const override;
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
private:
void setFCmpLibcallsGNU();
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 12dddd29ca84..a84d23d3bb96 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -32,6 +32,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -50,6 +51,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
@@ -900,7 +902,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
unsigned Offset = getMemoryOpOffset(*First);
Register Base = getLoadStoreBaseOp(*First).getReg();
bool BaseKill = LatestMI->killsRegister(Base);
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
DebugLoc DL = First->getDebugLoc();
MachineInstr *Merged = nullptr;
@@ -991,7 +993,7 @@ static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
// Stack pointer alignment is out of the programmers control so we can trust
// SP-relative loads/stores.
if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
- STI.getFrameLowering()->getTransientStackAlignment() >= 4)
+ STI.getFrameLowering()->getTransientStackAlign() >= Align(4))
return true;
return false;
}
@@ -1183,8 +1185,8 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
/// Check if the given instruction increments or decrements a register and
/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
/// generated by the instruction are possibly read as well.
-static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
- ARMCC::CondCodes Pred, unsigned PredReg) {
+static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg,
+ ARMCC::CondCodes Pred, Register PredReg) {
bool CheckCPSRDef;
int Scale;
switch (MI.getOpcode()) {
@@ -1201,7 +1203,7 @@ static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
default: return 0;
}
- unsigned MIPredReg;
+ Register MIPredReg;
if (MI.getOperand(0).getReg() != Reg ||
MI.getOperand(1).getReg() != Reg ||
getInstrPredicate(MI, MIPredReg) != Pred ||
@@ -1215,8 +1217,8 @@ static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
/// Searches for an increment or decrement of \p Reg before \p MBBI.
static MachineBasicBlock::iterator
-findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
- ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg,
+ ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
Offset = 0;
MachineBasicBlock &MBB = *MBBI->getParent();
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
@@ -1235,8 +1237,8 @@ findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
/// Searches for a increment or decrement of \p Reg after \p MBBI.
static MachineBasicBlock::iterator
-findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
- ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg,
+ ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
Offset = 0;
MachineBasicBlock &MBB = *MBBI->getParent();
MachineBasicBlock::iterator EndMBBI = MBB.end();
@@ -1270,7 +1272,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
const MachineOperand &BaseOP = MI->getOperand(0);
Register Base = BaseOP.getReg();
bool BaseKill = BaseOP.isKill();
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
unsigned Opcode = MI->getOpcode();
DebugLoc DL = MI->getDebugLoc();
@@ -1383,6 +1385,38 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
case ARM::t2STRi8:
case ARM::t2STRi12:
return ARM::t2STR_POST;
+
+ case ARM::MVE_VLDRBS16:
+ return ARM::MVE_VLDRBS16_post;
+ case ARM::MVE_VLDRBS32:
+ return ARM::MVE_VLDRBS32_post;
+ case ARM::MVE_VLDRBU16:
+ return ARM::MVE_VLDRBU16_post;
+ case ARM::MVE_VLDRBU32:
+ return ARM::MVE_VLDRBU32_post;
+ case ARM::MVE_VLDRHS32:
+ return ARM::MVE_VLDRHS32_post;
+ case ARM::MVE_VLDRHU32:
+ return ARM::MVE_VLDRHU32_post;
+ case ARM::MVE_VLDRBU8:
+ return ARM::MVE_VLDRBU8_post;
+ case ARM::MVE_VLDRHU16:
+ return ARM::MVE_VLDRHU16_post;
+ case ARM::MVE_VLDRWU32:
+ return ARM::MVE_VLDRWU32_post;
+ case ARM::MVE_VSTRB16:
+ return ARM::MVE_VSTRB16_post;
+ case ARM::MVE_VSTRB32:
+ return ARM::MVE_VSTRB32_post;
+ case ARM::MVE_VSTRH32:
+ return ARM::MVE_VSTRH32_post;
+ case ARM::MVE_VSTRBU8:
+ return ARM::MVE_VSTRBU8_post;
+ case ARM::MVE_VSTRHU16:
+ return ARM::MVE_VSTRHU16_post;
+ case ARM::MVE_VSTRWU32:
+ return ARM::MVE_VSTRWU32_post;
+
default: llvm_unreachable("Unhandled opcode!");
}
}
@@ -1412,7 +1446,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
if (MI->getOperand(0).getReg() == Base)
return false;
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
int Bytes = getLSMultipleTransferSize(MI);
MachineBasicBlock &MBB = *MI->getParent();
@@ -1525,7 +1559,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
return false;
- unsigned PredReg;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
MachineBasicBlock::iterator MBBI(MI);
MachineBasicBlock &MBB = *MI.getParent();
@@ -1602,13 +1636,13 @@ static bool isMemoryOp(const MachineInstr &MI) {
// Don't touch volatile memory accesses - we may be changing their order.
// TODO: We could allow unordered and monotonic atomics here, but we need to
- // make sure the resulting ldm/stm is correctly marked as atomic.
+ // make sure the resulting ldm/stm is correctly marked as atomic.
if (MMO.isVolatile() || MMO.isAtomic())
return false;
// Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
// not.
- if (MMO.getAlignment() < 4)
+ if (MMO.getAlign() < Align(4))
return false;
// str <undef> could probably be eliminated entirely, but for now we just want
@@ -1692,7 +1726,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
"register offset not handled below");
int OffImm = getMemoryOpOffset(*MI);
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
if (OddRegNum > EvenRegNum && OffImm == 0) {
@@ -1792,7 +1826,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
const MachineOperand &MO = MBBI->getOperand(0);
Register Reg = MO.getReg();
Register Base = getLoadStoreBaseOp(*MBBI).getReg();
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
int Offset = getMemoryOpOffset(*MBBI);
if (CurrBase == 0) {
@@ -2046,6 +2080,7 @@ namespace {
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
MachineRegisterInfo *MRI;
+ MachineDominatorTree *DT;
MachineFunction *MF;
ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
@@ -2058,29 +2093,34 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
private:
bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
- unsigned &NewOpc, unsigned &EvenReg,
- unsigned &OddReg, unsigned &BaseReg,
- int &Offset,
- unsigned &PredReg, ARMCC::CondCodes &Pred,
- bool &isT2);
+ unsigned &NewOpc, Register &EvenReg, Register &OddReg,
+ Register &BaseReg, int &Offset, Register &PredReg,
+ ARMCC::CondCodes &Pred, bool &isT2);
bool RescheduleOps(MachineBasicBlock *MBB,
SmallVectorImpl<MachineInstr *> &Ops,
unsigned Base, bool isLd,
DenseMap<MachineInstr*, unsigned> &MI2LocMap);
bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+ bool DistributeIncrements();
+ bool DistributeIncrements(Register Base);
};
} // end anonymous namespace
char ARMPreAllocLoadStoreOpt::ID = 0;
-INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
- ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
+INITIALIZE_PASS_BEGIN(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
+ ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
+ ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
// Limit the number of instructions to be rescheduled.
// FIXME: tune this limit, and/or come up with some better heuristics.
@@ -2096,10 +2136,11 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
MRI = &Fn.getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
MF = &Fn;
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- bool Modified = false;
+ bool Modified = DistributeIncrements();
for (MachineBasicBlock &MFI : Fn)
Modified |= RescheduleLoadStoreInstrs(&MFI);
@@ -2143,15 +2184,10 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
return AddedRegPressure.size() <= MemRegs.size() * 2;
}
-bool
-ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
- DebugLoc &dl, unsigned &NewOpc,
- unsigned &FirstReg,
- unsigned &SecondReg,
- unsigned &BaseReg, int &Offset,
- unsigned &PredReg,
- ARMCC::CondCodes &Pred,
- bool &isT2) {
+bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
+ MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc,
+ Register &FirstReg, Register &SecondReg, Register &BaseReg, int &Offset,
+ Register &PredReg, ARMCC::CondCodes &Pred, bool &isT2) {
// Make sure we're allowed to generate LDRD/STRD.
if (!STI->hasV5TEOps())
return false;
@@ -2183,12 +2219,12 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
(*Op0->memoperands_begin())->isAtomic())
return false;
- unsigned Align = (*Op0->memoperands_begin())->getAlignment();
+ Align Alignment = (*Op0->memoperands_begin())->getAlign();
const Function &Func = MF->getFunction();
- unsigned ReqAlign = STI->hasV6Ops()
- ? TD->getABITypeAlignment(Type::getInt64Ty(Func.getContext()))
- : 8; // Pre-v6 need 8-byte align
- if (Align < ReqAlign)
+ Align ReqAlign =
+ STI->hasV6Ops() ? TD->getABITypeAlign(Type::getInt64Ty(Func.getContext()))
+ : Align(8); // Pre-v6 need 8-byte align
+ if (Alignment < ReqAlign)
return false;
// Then make sure the immediate offset fits.
@@ -2313,8 +2349,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
// to try to allocate a pair of registers that can form register pairs.
MachineInstr *Op0 = Ops.back();
MachineInstr *Op1 = Ops[Ops.size()-2];
- unsigned FirstReg = 0, SecondReg = 0;
- unsigned BaseReg = 0, PredReg = 0;
+ Register FirstReg, SecondReg;
+ Register BaseReg, PredReg;
ARMCC::CondCodes Pred = ARMCC::AL;
bool isT2 = false;
unsigned NewOpc = 0;
@@ -2416,7 +2452,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
if (!isMemoryOp(MI))
continue;
- unsigned PredReg = 0;
+ Register PredReg;
if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
continue;
@@ -2482,6 +2518,199 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
return RetVal;
}
+// Get the Base register operand index from the memory access MachineInst if we
+// should attempt to distribute postinc on it. Return -1 if not of a valid
+// instruction type. If it returns an index, it is assumed that instruction is a
+// r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
+static int getBaseOperandIndex(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case ARM::MVE_VLDRBS16:
+ case ARM::MVE_VLDRBS32:
+ case ARM::MVE_VLDRBU16:
+ case ARM::MVE_VLDRBU32:
+ case ARM::MVE_VLDRHS32:
+ case ARM::MVE_VLDRHU32:
+ case ARM::MVE_VLDRBU8:
+ case ARM::MVE_VLDRHU16:
+ case ARM::MVE_VLDRWU32:
+ case ARM::MVE_VSTRB16:
+ case ARM::MVE_VSTRB32:
+ case ARM::MVE_VSTRH32:
+ case ARM::MVE_VSTRBU8:
+ case ARM::MVE_VSTRHU16:
+ case ARM::MVE_VSTRWU32:
+ return 1;
+ }
+ return -1;
+}
+
+static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
+ Register NewReg,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ MachineFunction *MF = MI->getMF();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ unsigned NewOpcode = getPostIndexedLoadStoreOpcode(
+ MI->getOpcode(), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
+
+ const MCInstrDesc &MCID = TII->get(NewOpcode);
+ // Constrain the def register class
+ const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
+ MRI.constrainRegClass(NewReg, TRC);
+ // And do the same for the base operand
+ TRC = TII->getRegClass(MCID, 2, TRI, *MF);
+ MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
+
+ return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
+ .addReg(NewReg, RegState::Define)
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(Offset)
+ .add(MI->getOperand(3))
+ .add(MI->getOperand(4))
+ .cloneMemRefs(*MI);
+}
+
+// Given a Base Register, optimise the load/store uses to attempt to create more
+// post-inc accesses. We do this by taking zero offset loads/stores with an add,
+// and convert them to a postinc load/store of the same type. Any subsequent
+// accesses will be adjusted to use and account for the post-inc value.
+// For example:
+// LDR #0 LDR_POSTINC #16
+// LDR #4 LDR #-12
+// LDR #8 LDR #-8
+// LDR #12 LDR #-4
+// ADD #16
+bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
+ // We are looking for:
+ // One zero offset load/store that can become postinc
+ MachineInstr *BaseAccess = nullptr;
+ // An increment that can be folded in
+ MachineInstr *Increment = nullptr;
+ // Other accesses after BaseAccess that will need to be updated to use the
+ // postinc value
+ SmallPtrSet<MachineInstr *, 8> OtherAccesses;
+ for (auto &Use : MRI->use_nodbg_instructions(Base)) {
+ if (!Increment && getAddSubImmediate(Use) != 0) {
+ Increment = &Use;
+ continue;
+ }
+
+ int BaseOp = getBaseOperandIndex(Use);
+ if (BaseOp == -1)
+ return false;
+
+ if (!Use.getOperand(BaseOp).isReg() ||
+ Use.getOperand(BaseOp).getReg() != Base)
+ return false;
+ if (Use.getOperand(BaseOp + 1).getImm() == 0)
+ BaseAccess = &Use;
+ else
+ OtherAccesses.insert(&Use);
+ }
+
+ if (!BaseAccess || !Increment ||
+ BaseAccess->getParent() != Increment->getParent())
+ return false;
+ Register PredReg;
+ if (Increment->definesRegister(ARM::CPSR) ||
+ getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
+ << Base.virtRegIndex() << "\n");
+
+ // Make sure that Increment has no uses before BaseAccess.
+ for (MachineInstr &Use :
+ MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
+ if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
+ LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
+ return false;
+ }
+ }
+
+ // Make sure that Increment can be folded into Base
+ int IncrementOffset = getAddSubImmediate(*Increment);
+ unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
+ BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
+ if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
+ LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
+ return false;
+ }
+
+ // And make sure that the negative value of increment can be added to all
+ // other offsets after the BaseAccess. We rely on either
+ // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
+ // to keep things simple.
+ SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
+ for (auto *Use : OtherAccesses) {
+ if (DT->dominates(BaseAccess, Use)) {
+ SuccessorAccesses.insert(Use);
+ unsigned BaseOp = getBaseOperandIndex(*Use);
+ if (!isLegalAddressImm(
+ Use->getOpcode(),
+ Use->getOperand(BaseOp + 1).getImm() - IncrementOffset, TII)) {
+ LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
+ return false;
+ }
+ } else if (!DT->dominates(Use, BaseAccess)) {
+ LLVM_DEBUG(
+ dbgs() << " Unknown dominance relation between Base and Use\n");
+ return false;
+ }
+ }
+
+ // Replace BaseAccess with a post inc
+ LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
+ LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
+ Register NewBaseReg = Increment->getOperand(0).getReg();
+ MachineInstr *BaseAccessPost =
+ createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
+ BaseAccess->eraseFromParent();
+ Increment->eraseFromParent();
+ (void)BaseAccessPost;
+ LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
+
+ for (auto *Use : SuccessorAccesses) {
+ LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
+ unsigned BaseOp = getBaseOperandIndex(*Use);
+ Use->getOperand(BaseOp).setReg(NewBaseReg);
+ int OldOffset = Use->getOperand(BaseOp + 1).getImm();
+ Use->getOperand(BaseOp + 1).setImm(OldOffset - IncrementOffset);
+ LLVM_DEBUG(dbgs() << " To : "; Use->dump());
+ }
+
+ // Remove the kill flag from all uses of NewBaseReg, in case any old uses
+ // remain.
+ for (MachineOperand &Op : MRI->use_nodbg_operands(NewBaseReg))
+ Op.setIsKill(false);
+ return true;
+}
+
+bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
+ bool Changed = false;
+ SmallSetVector<Register, 4> Visited;
+ for (auto &MBB : *MF) {
+ for (auto &MI : MBB) {
+ int BaseOp = getBaseOperandIndex(MI);
+ if (BaseOp == -1 || !MI.getOperand(BaseOp).isReg())
+ continue;
+
+ Register Base = MI.getOperand(BaseOp).getReg();
+ if (!Base.isVirtual() || Visited.count(Base))
+ continue;
+
+ Visited.insert(Base);
+ }
+ }
+
+ for (auto Base : Visited)
+ Changed |= DistributeIncrements(Base);
+
+ return Changed;
+}
+
/// Returns an instance of the load / store optimization pass.
FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
if (PreAlloc)
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 6717d4706aef..be75d6bef08c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -35,6 +35,20 @@
/// are defined to be as large as this maximum sequence of replacement
/// instructions.
///
+/// A note on VPR.P0 (the lane mask):
+/// VPT, VCMP, VPNOT and VCTP won't overwrite VPR.P0 when they update it in a
+/// "VPT Active" context (which includes low-overhead loops and vpt blocks).
+/// They will simply "and" the result of their calculation with the current
+/// value of VPR.P0. You can think of it like this:
+/// \verbatim
+/// if VPT active: ; Between a DLSTP/LETP, or for predicated instrs
+/// VPR.P0 &= Value
+/// else
+/// VPR.P0 = Value
+/// \endverbatim
+/// When we're inside the low-overhead loop (between DLSTP and LETP), we always
+/// fall in the "VPT active" case, so we can consider that all VPR writes by
+/// one of those instruction is actually a "and".
//===----------------------------------------------------------------------===//
#include "ARM.h"
@@ -45,6 +59,7 @@
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineLoopUtils.h"
@@ -60,34 +75,93 @@ using namespace llvm;
namespace {
+ using InstSet = SmallPtrSetImpl<MachineInstr *>;
+
+ class PostOrderLoopTraversal {
+ MachineLoop &ML;
+ MachineLoopInfo &MLI;
+ SmallPtrSet<MachineBasicBlock*, 4> Visited;
+ SmallVector<MachineBasicBlock*, 4> Order;
+
+ public:
+ PostOrderLoopTraversal(MachineLoop &ML, MachineLoopInfo &MLI)
+ : ML(ML), MLI(MLI) { }
+
+ const SmallVectorImpl<MachineBasicBlock*> &getOrder() const {
+ return Order;
+ }
+
+ // Visit all the blocks within the loop, as well as exit blocks and any
+ // blocks properly dominating the header.
+ void ProcessLoop() {
+ std::function<void(MachineBasicBlock*)> Search = [this, &Search]
+ (MachineBasicBlock *MBB) -> void {
+ if (Visited.count(MBB))
+ return;
+
+ Visited.insert(MBB);
+ for (auto *Succ : MBB->successors()) {
+ if (!ML.contains(Succ))
+ continue;
+ Search(Succ);
+ }
+ Order.push_back(MBB);
+ };
+
+ // Insert exit blocks.
+ SmallVector<MachineBasicBlock*, 2> ExitBlocks;
+ ML.getExitBlocks(ExitBlocks);
+ for (auto *MBB : ExitBlocks)
+ Order.push_back(MBB);
+
+ // Then add the loop body.
+ Search(ML.getHeader());
+
+ // Then try the preheader and its predecessors.
+ std::function<void(MachineBasicBlock*)> GetPredecessor =
+ [this, &GetPredecessor] (MachineBasicBlock *MBB) -> void {
+ Order.push_back(MBB);
+ if (MBB->pred_size() == 1)
+ GetPredecessor(*MBB->pred_begin());
+ };
+
+ if (auto *Preheader = ML.getLoopPreheader())
+ GetPredecessor(Preheader);
+ else if (auto *Preheader = MLI.findLoopPreheader(&ML, true))
+ GetPredecessor(Preheader);
+ }
+ };
+
struct PredicatedMI {
MachineInstr *MI = nullptr;
SetVector<MachineInstr*> Predicates;
public:
- PredicatedMI(MachineInstr *I, SetVector<MachineInstr*> &Preds) :
- MI(I) {
+ PredicatedMI(MachineInstr *I, SetVector<MachineInstr *> &Preds) : MI(I) {
+ assert(I && "Instruction must not be null!");
Predicates.insert(Preds.begin(), Preds.end());
}
};
- // Represent a VPT block, a list of instructions that begins with a VPST and
- // has a maximum of four proceeding instructions. All instructions within the
- // block are predicated upon the vpr and we allow instructions to define the
- // vpr within in the block too.
+ // Represent a VPT block, a list of instructions that begins with a VPT/VPST
+ // and has a maximum of four proceeding instructions. All instructions within
+ // the block are predicated upon the vpr and we allow instructions to define
+ // the vpr within in the block too.
class VPTBlock {
- std::unique_ptr<PredicatedMI> VPST;
+ // The predicate then instruction, which is either a VPT, or a VPST
+ // instruction.
+ std::unique_ptr<PredicatedMI> PredicateThen;
PredicatedMI *Divergent = nullptr;
SmallVector<PredicatedMI, 4> Insts;
public:
VPTBlock(MachineInstr *MI, SetVector<MachineInstr*> &Preds) {
- VPST = std::make_unique<PredicatedMI>(MI, Preds);
+ PredicateThen = std::make_unique<PredicatedMI>(MI, Preds);
}
void addInst(MachineInstr *MI, SetVector<MachineInstr*> &Preds) {
LLVM_DEBUG(dbgs() << "ARM Loops: Adding predicated MI: " << *MI);
- if (!Divergent && !set_difference(Preds, VPST->Predicates).empty()) {
+ if (!Divergent && !set_difference(Preds, PredicateThen->Predicates).empty()) {
Divergent = &Insts.back();
LLVM_DEBUG(dbgs() << " - has divergent predicate: " << *Divergent->MI);
}
@@ -104,38 +178,73 @@ namespace {
// Is the given instruction part of the predicate set controlling the entry
// to the block.
bool IsPredicatedOn(MachineInstr *MI) const {
- return VPST->Predicates.count(MI);
+ return PredicateThen->Predicates.count(MI);
+ }
+
+ // Returns true if this is a VPT instruction.
+ bool isVPT() const { return !isVPST(); }
+
+ // Returns true if this is a VPST instruction.
+ bool isVPST() const {
+ return PredicateThen->MI->getOpcode() == ARM::MVE_VPST;
}
// Is the given instruction the only predicate which controls the entry to
// the block.
bool IsOnlyPredicatedOn(MachineInstr *MI) const {
- return IsPredicatedOn(MI) && VPST->Predicates.size() == 1;
+ return IsPredicatedOn(MI) && PredicateThen->Predicates.size() == 1;
}
unsigned size() const { return Insts.size(); }
SmallVectorImpl<PredicatedMI> &getInsts() { return Insts; }
- MachineInstr *getVPST() const { return VPST->MI; }
+ MachineInstr *getPredicateThen() const { return PredicateThen->MI; }
PredicatedMI *getDivergent() const { return Divergent; }
};
+ struct Reduction {
+ MachineInstr *Init;
+ MachineInstr &Copy;
+ MachineInstr &Reduce;
+ MachineInstr &VPSEL;
+
+ Reduction(MachineInstr *Init, MachineInstr *Mov, MachineInstr *Add,
+ MachineInstr *Sel)
+ : Init(Init), Copy(*Mov), Reduce(*Add), VPSEL(*Sel) { }
+ };
+
struct LowOverheadLoop {
- MachineLoop *ML = nullptr;
+ MachineLoop &ML;
+ MachineBasicBlock *Preheader = nullptr;
+ MachineLoopInfo &MLI;
+ ReachingDefAnalysis &RDA;
+ const TargetRegisterInfo &TRI;
+ const ARMBaseInstrInfo &TII;
MachineFunction *MF = nullptr;
MachineInstr *InsertPt = nullptr;
MachineInstr *Start = nullptr;
MachineInstr *Dec = nullptr;
MachineInstr *End = nullptr;
MachineInstr *VCTP = nullptr;
+ SmallPtrSet<MachineInstr*, 4> SecondaryVCTPs;
VPTBlock *CurrentBlock = nullptr;
SetVector<MachineInstr*> CurrentPredicate;
SmallVector<VPTBlock, 4> VPTBlocks;
+ SmallPtrSet<MachineInstr*, 4> ToRemove;
+ SmallVector<std::unique_ptr<Reduction>, 1> Reductions;
+ SmallPtrSet<MachineInstr*, 4> BlockMasksToRecompute;
bool Revert = false;
bool CannotTailPredicate = false;
- LowOverheadLoop(MachineLoop *ML) : ML(ML) {
- MF = ML->getHeader()->getParent();
+ LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI,
+ ReachingDefAnalysis &RDA, const TargetRegisterInfo &TRI,
+ const ARMBaseInstrInfo &TII)
+ : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII) {
+ MF = ML.getHeader()->getParent();
+ if (auto *MBB = ML.getLoopPreheader())
+ Preheader = MBB;
+ else if (auto *MBB = MLI.findLoopPreheader(&ML, true))
+ Preheader = MBB;
}
// If this is an MVE instruction, check that we know how to use tail
@@ -151,22 +260,30 @@ namespace {
// For now, let's keep things really simple and only support a single
// block for tail predication.
return !Revert && FoundAllComponents() && VCTP &&
- !CannotTailPredicate && ML->getNumBlocks() == 1;
+ !CannotTailPredicate && ML.getNumBlocks() == 1;
}
- bool ValidateTailPredicate(MachineInstr *StartInsertPt,
- ReachingDefAnalysis *RDA,
- MachineLoopInfo *MLI);
+ // Check that the predication in the loop will be equivalent once we
+ // perform the conversion. Also ensure that we can provide the number
+ // of elements to the loop start instruction.
+ bool ValidateTailPredicate(MachineInstr *StartInsertPt);
+
+ // See whether the live-out instructions are a reduction that we can fixup
+ // later.
+ bool FindValidReduction(InstSet &LiveMIs, InstSet &LiveOutUsers);
+
+ // Check that any values available outside of the loop will be the same
+ // after tail predication conversion.
+ bool ValidateLiveOuts();
// Is it safe to define LR with DLS/WLS?
// LR can be defined if it is the operand to start, because it's the same
// value, or if it's going to be equivalent to the operand to Start.
- MachineInstr *IsSafeToDefineLR(ReachingDefAnalysis *RDA);
+ MachineInstr *isSafeToDefineLR();
// Check the branch targets are within range and we satisfy our
// restrictions.
- void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA,
- MachineLoopInfo *MLI);
+ void CheckLegality(ARMBasicBlockUtils *BBUtils);
bool FoundAllComponents() const {
return Start && Dec && End;
@@ -241,18 +358,19 @@ namespace {
void RevertWhile(MachineInstr *MI) const;
- bool RevertLoopDec(MachineInstr *MI, bool AllowFlags = false) const;
+ bool RevertLoopDec(MachineInstr *MI) const;
void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;
- void RemoveLoopUpdate(LowOverheadLoop &LoLoop);
-
void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
+ void FixupReductions(LowOverheadLoop &LoLoop) const;
+
MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);
void Expand(LowOverheadLoop &LoLoop);
+ void IterationCountDCE(LowOverheadLoop &LoLoop);
};
}
@@ -261,7 +379,7 @@ char ARMLowOverheadLoops::ID = 0;
INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
false, false)
-MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) {
+MachineInstr *LowOverheadLoop::isSafeToDefineLR() {
// We can define LR because LR already contains the same value.
if (Start->getOperand(0).getReg() == ARM::LR)
return Start;
@@ -279,52 +397,22 @@ MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) {
// Find an insertion point:
// - Is there a (mov lr, Count) before Start? If so, and nothing else writes
// to Count before Start, we can insert at that mov.
- if (auto *LRDef = RDA->getReachingMIDef(Start, ARM::LR))
- if (IsMoveLR(LRDef) && RDA->hasSameReachingDef(Start, LRDef, CountReg))
+ if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR))
+ if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg))
return LRDef;
// - Is there a (mov lr, Count) after Start? If so, and nothing else writes
// to Count after Start, we can insert at that mov.
- if (auto *LRDef = RDA->getLocalLiveOutMIDef(MBB, ARM::LR))
- if (IsMoveLR(LRDef) && RDA->hasSameReachingDef(Start, LRDef, CountReg))
+ if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR))
+ if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg))
return LRDef;
// We've found no suitable LR def and Start doesn't use LR directly. Can we
// just define LR anyway?
- if (!RDA->isRegUsedAfter(Start, ARM::LR))
- return Start;
-
- return nullptr;
-}
-
-// Can we safely move 'From' to just before 'To'? To satisfy this, 'From' must
-// not define a register that is used by any instructions, after and including,
-// 'To'. These instructions also must not redefine any of Froms operands.
-template<typename Iterator>
-static bool IsSafeToMove(MachineInstr *From, MachineInstr *To, ReachingDefAnalysis *RDA) {
- SmallSet<int, 2> Defs;
- // First check that From would compute the same value if moved.
- for (auto &MO : From->operands()) {
- if (!MO.isReg() || MO.isUndef() || !MO.getReg())
- continue;
- if (MO.isDef())
- Defs.insert(MO.getReg());
- else if (!RDA->hasSameReachingDef(From, To, MO.getReg()))
- return false;
- }
-
- // Now walk checking that the rest of the instructions will compute the same
- // value.
- for (auto I = ++Iterator(From), E = Iterator(To); I != E; ++I) {
- for (auto &MO : I->operands())
- if (MO.isReg() && MO.getReg() && MO.isUse() && Defs.count(MO.getReg()))
- return false;
- }
- return true;
+ return RDA.isSafeToDefRegAt(Start, ARM::LR) ? Start : nullptr;
}
-bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt,
- ReachingDefAnalysis *RDA, MachineLoopInfo *MLI) {
+bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
assert(VCTP && "VCTP instruction expected but is not set");
// All predication within the loop should be based on vctp. If the block
// isn't predicated on entry, check whether the vctp is within the block
@@ -332,24 +420,35 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt,
for (auto &Block : VPTBlocks) {
if (Block.IsPredicatedOn(VCTP))
continue;
- if (!Block.HasNonUniformPredicate() || !isVCTP(Block.getDivergent()->MI)) {
+ if (Block.HasNonUniformPredicate() && !isVCTP(Block.getDivergent()->MI)) {
LLVM_DEBUG(dbgs() << "ARM Loops: Found unsupported diverging predicate: "
- << *Block.getDivergent()->MI);
+ << *Block.getDivergent()->MI);
return false;
}
SmallVectorImpl<PredicatedMI> &Insts = Block.getInsts();
for (auto &PredMI : Insts) {
- if (PredMI.Predicates.count(VCTP) || isVCTP(PredMI.MI))
+ // Check the instructions in the block and only allow:
+ // - VCTPs
+ // - Instructions predicated on the main VCTP
+ // - Any VCMP
+ // - VCMPs just "and" their result with VPR.P0. Whether they are
+ // located before/after the VCTP is irrelevant - the end result will
+ // be the same in both cases, so there's no point in requiring them
+ // to be located after the VCTP!
+ if (PredMI.Predicates.count(VCTP) || isVCTP(PredMI.MI) ||
+ VCMPOpcodeToVPT(PredMI.MI->getOpcode()) != 0)
continue;
LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *PredMI.MI
- << " - which is predicated on:\n";
- for (auto *MI : PredMI.Predicates)
- dbgs() << " - " << *MI;
- );
+ << " - which is predicated on:\n";
+ for (auto *MI : PredMI.Predicates)
+ dbgs() << " - " << *MI);
return false;
}
}
+ if (!ValidateLiveOuts())
+ return false;
+
// For tail predication, we need to provide the number of elements, instead
// of the iteration count, to the loop start instruction. The number of
// elements is provided to the vctp instruction, so we need to check that
@@ -359,7 +458,7 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt,
// If the register is defined within loop, then we can't perform TP.
// TODO: Check whether this is just a mov of a register that would be
// available.
- if (RDA->getReachingDef(VCTP, NumElements) >= 0) {
+ if (RDA.hasLocalDefBefore(VCTP, NumElements)) {
LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n");
return false;
}
@@ -367,17 +466,20 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt,
// The element count register maybe defined after InsertPt, in which case we
// need to try to move either InsertPt or the def so that the [w|d]lstp can
// use the value.
- MachineBasicBlock *InsertBB = InsertPt->getParent();
- if (!RDA->isReachingDefLiveOut(InsertPt, NumElements)) {
- if (auto *ElemDef = RDA->getLocalLiveOutMIDef(InsertBB, NumElements)) {
- if (IsSafeToMove<MachineBasicBlock::reverse_iterator>(ElemDef, InsertPt, RDA)) {
+ // TODO: On failing to move an instruction, check if the count is provided by
+ // a mov and whether we can use the mov operand directly.
+ MachineBasicBlock *InsertBB = StartInsertPt->getParent();
+ if (!RDA.isReachingDefLiveOut(StartInsertPt, NumElements)) {
+ if (auto *ElemDef = RDA.getLocalLiveOutMIDef(InsertBB, NumElements)) {
+ if (RDA.isSafeToMoveForwards(ElemDef, StartInsertPt)) {
ElemDef->removeFromParent();
- InsertBB->insert(MachineBasicBlock::iterator(InsertPt), ElemDef);
+ InsertBB->insert(MachineBasicBlock::iterator(StartInsertPt), ElemDef);
LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: "
<< *ElemDef);
- } else if (IsSafeToMove<MachineBasicBlock::iterator>(InsertPt, ElemDef, RDA)) {
- InsertPt->removeFromParent();
- InsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), InsertPt);
+ } else if (RDA.isSafeToMoveBackwards(StartInsertPt, ElemDef)) {
+ StartInsertPt->removeFromParent();
+ InsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef),
+ StartInsertPt);
LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
} else {
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to move element count to loop "
@@ -390,10 +492,10 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt,
// Especially in the case of while loops, InsertBB may not be the
// preheader, so we need to check that the register isn't redefined
// before entering the loop.
- auto CannotProvideElements = [&RDA](MachineBasicBlock *MBB,
+ auto CannotProvideElements = [this](MachineBasicBlock *MBB,
Register NumElements) {
// NumElements is redefined in this block.
- if (RDA->getReachingDef(&MBB->back(), NumElements) >= 0)
+ if (RDA.hasLocalDefBefore(&MBB->back(), NumElements))
return true;
// Don't continue searching up through multiple predecessors.
@@ -404,7 +506,7 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt,
};
// First, find the block that looks like the preheader.
- MachineBasicBlock *MBB = MLI->findLoopPreheader(ML, true);
+ MachineBasicBlock *MBB = Preheader;
if (!MBB) {
LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find preheader.\n");
return false;
@@ -419,13 +521,372 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt,
MBB = *MBB->pred_begin();
}
- LLVM_DEBUG(dbgs() << "ARM Loops: Will use tail predication.\n");
+ // Check that the value change of the element count is what we expect and
+ // that the predication will be equivalent. For this we need:
+ // NumElements = NumElements - VectorWidth. The sub will be a sub immediate
+ // and we can also allow register copies within the chain too.
+ auto IsValidSub = [](MachineInstr *MI, int ExpectedVecWidth) {
+ return -getAddSubImmediate(*MI) == ExpectedVecWidth;
+ };
+
+ MBB = VCTP->getParent();
+ if (auto *Def = RDA.getUniqueReachingMIDef(&MBB->back(), NumElements)) {
+ SmallPtrSet<MachineInstr*, 2> ElementChain;
+ SmallPtrSet<MachineInstr*, 2> Ignore = { VCTP };
+ unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode());
+
+ Ignore.insert(SecondaryVCTPs.begin(), SecondaryVCTPs.end());
+
+ if (RDA.isSafeToRemove(Def, ElementChain, Ignore)) {
+ bool FoundSub = false;
+
+ for (auto *MI : ElementChain) {
+ if (isMovRegOpcode(MI->getOpcode()))
+ continue;
+
+ if (isSubImmOpcode(MI->getOpcode())) {
+ if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth))
+ return false;
+ FoundSub = true;
+ } else
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "ARM Loops: Will remove element count chain:\n";
+ for (auto *MI : ElementChain)
+ dbgs() << " - " << *MI);
+ ToRemove.insert(ElementChain.begin(), ElementChain.end());
+ }
+ }
+ return true;
+}
+
+static bool isVectorPredicated(MachineInstr *MI) {
+ int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
+ return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
+}
+
+static bool isRegInClass(const MachineOperand &MO,
+ const TargetRegisterClass *Class) {
+ return MO.isReg() && MO.getReg() && Class->contains(MO.getReg());
+}
+
+// MVE 'narrowing' operate on half a lane, reading from half and writing
+// to half, which are referred to has the top and bottom half. The other
+// half retains its previous value.
+static bool retainsPreviousHalfElement(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ uint64_t Flags = MCID.TSFlags;
+ return (Flags & ARMII::RetainsPreviousHalfElement) != 0;
+}
+
+// Some MVE instructions read from the top/bottom halves of their operand(s)
+// and generate a vector result with result elements that are double the
+// width of the input.
+static bool producesDoubleWidthResult(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ uint64_t Flags = MCID.TSFlags;
+ return (Flags & ARMII::DoubleWidthResult) != 0;
+}
+
+static bool isHorizontalReduction(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ uint64_t Flags = MCID.TSFlags;
+ return (Flags & ARMII::HorizontalReduction) != 0;
+}
+
+// Can this instruction generate a non-zero result when given only zeroed
+// operands? This allows us to know that, given operands with false bytes
+// zeroed by masked loads, that the result will also contain zeros in those
+// bytes.
+static bool canGenerateNonZeros(const MachineInstr &MI) {
+
+ // Check for instructions which can write into a larger element size,
+ // possibly writing into a previous zero'd lane.
+ if (producesDoubleWidthResult(MI))
+ return true;
+
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ // FIXME: VNEG FP and -0? I think we'll need to handle this once we allow
+ // fp16 -> fp32 vector conversions.
+ // Instructions that perform a NOT will generate 1s from 0s.
+ case ARM::MVE_VMVN:
+ case ARM::MVE_VORN:
+ // Count leading zeros will do just that!
+ case ARM::MVE_VCLZs8:
+ case ARM::MVE_VCLZs16:
+ case ARM::MVE_VCLZs32:
+ return true;
+ }
+ return false;
+}
+
+
+// Look at its register uses to see if it only can only receive zeros
+// into its false lanes which would then produce zeros. Also check that
+// the output register is also defined by an FalseLanesZero instruction
+// so that if tail-predication happens, the lanes that aren't updated will
+// still be zeros.
+static bool producesFalseLanesZero(MachineInstr &MI,
+ const TargetRegisterClass *QPRs,
+ const ReachingDefAnalysis &RDA,
+ InstSet &FalseLanesZero) {
+ if (canGenerateNonZeros(MI))
+ return false;
+
+ bool AllowScalars = isHorizontalReduction(MI);
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (!isRegInClass(MO, QPRs) && AllowScalars)
+ continue;
+ if (auto *OpDef = RDA.getMIOperand(&MI, MO))
+ if (FalseLanesZero.count(OpDef))
+ continue;
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI);
+ return true;
+}
+
+bool
+LowOverheadLoop::FindValidReduction(InstSet &LiveMIs, InstSet &LiveOutUsers) {
+ // Also check for reductions where the operation needs to be merging values
+ // from the last and previous loop iterations. This means an instruction
+ // producing a value and a vmov storing the value calculated in the previous
+ // iteration. So we can have two live-out regs, one produced by a vmov and
+ // both being consumed by a vpsel.
+ LLVM_DEBUG(dbgs() << "ARM Loops: Looking for reduction live-outs:\n";
+ for (auto *MI : LiveMIs)
+ dbgs() << " - " << *MI);
+
+ if (!Preheader)
+ return false;
+
+ // Expect a vmov, a vadd and a single vpsel user.
+ // TODO: This means we can't currently support multiple reductions in the
+ // loop.
+ if (LiveMIs.size() != 2 || LiveOutUsers.size() != 1)
+ return false;
+
+ MachineInstr *VPSEL = *LiveOutUsers.begin();
+ if (VPSEL->getOpcode() != ARM::MVE_VPSEL)
+ return false;
+
+ unsigned VPRIdx = llvm::findFirstVPTPredOperandIdx(*VPSEL) + 1;
+ MachineInstr *Pred = RDA.getMIOperand(VPSEL, VPRIdx);
+ if (!Pred || Pred != VCTP) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Not using equivalent predicate.\n");
+ return false;
+ }
+
+ MachineInstr *Reduce = RDA.getMIOperand(VPSEL, 1);
+ if (!Reduce)
+ return false;
+
+ assert(LiveMIs.count(Reduce) && "Expected MI to be live-out");
+
+ // TODO: Support more operations than VADD.
+ switch (VCTP->getOpcode()) {
+ default:
+ return false;
+ case ARM::MVE_VCTP8:
+ if (Reduce->getOpcode() != ARM::MVE_VADDi8)
+ return false;
+ break;
+ case ARM::MVE_VCTP16:
+ if (Reduce->getOpcode() != ARM::MVE_VADDi16)
+ return false;
+ break;
+ case ARM::MVE_VCTP32:
+ if (Reduce->getOpcode() != ARM::MVE_VADDi32)
+ return false;
+ break;
+ }
+
+ // Test that the reduce op is overwriting ones of its operands.
+ if (Reduce->getOperand(0).getReg() != Reduce->getOperand(1).getReg() &&
+ Reduce->getOperand(0).getReg() != Reduce->getOperand(2).getReg()) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Reducing op isn't overwriting itself.\n");
+ return false;
+ }
+
+ // Check that the VORR is actually a VMOV.
+ MachineInstr *Copy = RDA.getMIOperand(VPSEL, 2);
+ if (!Copy || Copy->getOpcode() != ARM::MVE_VORR ||
+ !Copy->getOperand(1).isReg() || !Copy->getOperand(2).isReg() ||
+ Copy->getOperand(1).getReg() != Copy->getOperand(2).getReg())
+ return false;
+
+ assert(LiveMIs.count(Copy) && "Expected MI to be live-out");
+
+ // Check that the vadd and vmov are only used by each other and the vpsel.
+ SmallPtrSet<MachineInstr*, 2> CopyUsers;
+ RDA.getGlobalUses(Copy, Copy->getOperand(0).getReg(), CopyUsers);
+ if (CopyUsers.size() > 2 || !CopyUsers.count(Reduce)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Copy users unsupported.\n");
+ return false;
+ }
+
+ SmallPtrSet<MachineInstr*, 2> ReduceUsers;
+ RDA.getGlobalUses(Reduce, Reduce->getOperand(0).getReg(), ReduceUsers);
+ if (ReduceUsers.size() > 2 || !ReduceUsers.count(Copy)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Reduce users unsupported.\n");
+ return false;
+ }
+
+ // Then find whether there's an instruction initialising the register that
+ // is storing the reduction.
+ SmallPtrSet<MachineInstr*, 2> Incoming;
+ RDA.getLiveOuts(Preheader, Copy->getOperand(1).getReg(), Incoming);
+ if (Incoming.size() > 1)
+ return false;
+
+ MachineInstr *Init = Incoming.empty() ? nullptr : *Incoming.begin();
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found a reduction:\n"
+ << " - " << *Copy
+ << " - " << *Reduce
+ << " - " << *VPSEL);
+ Reductions.push_back(std::make_unique<Reduction>(Init, Copy, Reduce, VPSEL));
return true;
}
-void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils,
- ReachingDefAnalysis *RDA,
- MachineLoopInfo *MLI) {
+bool LowOverheadLoop::ValidateLiveOuts() {
+ // We want to find out if the tail-predicated version of this loop will
+ // produce the same values as the loop in its original form. For this to
+ // be true, the newly inserted implicit predication must not change the
+ // the (observable) results.
+ // We're doing this because many instructions in the loop will not be
+ // predicated and so the conversion from VPT predication to tail-predication
+ // can result in different values being produced; due to the tail-predication
+ // preventing many instructions from updating their falsely predicated
+ // lanes. This analysis assumes that all the instructions perform lane-wise
+ // operations and don't perform any exchanges.
+ // A masked load, whether through VPT or tail predication, will write zeros
+ // to any of the falsely predicated bytes. So, from the loads, we know that
+ // the false lanes are zeroed and here we're trying to track that those false
+ // lanes remain zero, or where they change, the differences are masked away
+ // by their user(s).
+ // All MVE loads and stores have to be predicated, so we know that any load
+ // operands, or stored results are equivalent already. Other explicitly
+ // predicated instructions will perform the same operation in the original
+ // loop and the tail-predicated form too. Because of this, we can insert
+ // loads, stores and other predicated instructions into our Predicated
+ // set and build from there.
+ const TargetRegisterClass *QPRs = TRI.getRegClass(ARM::MQPRRegClassID);
+ SetVector<MachineInstr *> FalseLanesUnknown;
+ SmallPtrSet<MachineInstr *, 4> FalseLanesZero;
+ SmallPtrSet<MachineInstr *, 4> Predicated;
+ MachineBasicBlock *Header = ML.getHeader();
+
+ for (auto &MI : *Header) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ uint64_t Flags = MCID.TSFlags;
+ if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE)
+ continue;
+
+ if (isVCTP(&MI) || isVPTOpcode(MI.getOpcode()))
+ continue;
+
+ // Predicated loads will write zeros to the falsely predicated bytes of the
+ // destination register.
+ if (isVectorPredicated(&MI)) {
+ if (MI.mayLoad())
+ FalseLanesZero.insert(&MI);
+ Predicated.insert(&MI);
+ continue;
+ }
+
+ if (MI.getNumDefs() == 0)
+ continue;
+
+ if (!producesFalseLanesZero(MI, QPRs, RDA, FalseLanesZero)) {
+ // We require retaining and horizontal operations to operate upon zero'd
+ // false lanes to ensure the conversion doesn't change the output.
+ if (retainsPreviousHalfElement(MI) || isHorizontalReduction(MI))
+ return false;
+ // Otherwise we need to evaluate this instruction later to see whether
+ // unknown false lanes will get masked away by their user(s).
+ FalseLanesUnknown.insert(&MI);
+ } else if (!isHorizontalReduction(MI))
+ FalseLanesZero.insert(&MI);
+ }
+
+ auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO,
+ SmallPtrSetImpl<MachineInstr *> &Predicated) {
+ SmallPtrSet<MachineInstr *, 2> Uses;
+ RDA.getGlobalUses(MI, MO.getReg(), Uses);
+ for (auto *Use : Uses) {
+ if (Use != MI && !Predicated.count(Use))
+ return false;
+ }
+ return true;
+ };
+
+ // Visit the unknowns in reverse so that we can start at the values being
+ // stored and then we can work towards the leaves, hopefully adding more
+ // instructions to Predicated. Successfully terminating the loop means that
+ // all the unknown values have to found to be masked by predicated user(s).
+ // For any unpredicated values, we store them in NonPredicated so that we
+ // can later check whether these form a reduction.
+ SmallPtrSet<MachineInstr*, 2> NonPredicated;
+ for (auto *MI : reverse(FalseLanesUnknown)) {
+ for (auto &MO : MI->operands()) {
+ if (!isRegInClass(MO, QPRs) || !MO.isDef())
+ continue;
+ if (!HasPredicatedUsers(MI, MO, Predicated)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found an unknown def of : "
+ << TRI.getRegAsmName(MO.getReg()) << " at " << *MI);
+ NonPredicated.insert(MI);
+ continue;
+ }
+ }
+ // Any unknown false lanes have been masked away by the user(s).
+ Predicated.insert(MI);
+ }
+
+ SmallPtrSet<MachineInstr *, 2> LiveOutMIs;
+ SmallPtrSet<MachineInstr*, 2> LiveOutUsers;
+ SmallVector<MachineBasicBlock *, 2> ExitBlocks;
+ ML.getExitBlocks(ExitBlocks);
+ assert(ML.getNumBlocks() == 1 && "Expected single block loop!");
+ assert(ExitBlocks.size() == 1 && "Expected a single exit block");
+ MachineBasicBlock *ExitBB = ExitBlocks.front();
+ for (const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->liveins()) {
+ // Check Q-regs that are live in the exit blocks. We don't collect scalars
+ // because they won't be affected by lane predication.
+ if (QPRs->contains(RegMask.PhysReg)) {
+ if (auto *MI = RDA.getLocalLiveOutMIDef(Header, RegMask.PhysReg))
+ LiveOutMIs.insert(MI);
+ RDA.getLiveInUses(ExitBB, RegMask.PhysReg, LiveOutUsers);
+ }
+ }
+
+ // If we have any non-predicated live-outs, they need to be part of a
+ // reduction that we can fixup later. The reduction that the form of an
+ // operation that uses its previous values through a vmov and then a vpsel
+ // resides in the exit blocks to select the final bytes from n and n-1
+ // iterations.
+ if (!NonPredicated.empty() &&
+ !FindValidReduction(NonPredicated, LiveOutUsers))
+ return false;
+
+ // We've already validated that any VPT predication within the loop will be
+ // equivalent when we perform the predication transformation; so we know that
+ // any VPT predicated instruction is predicated upon VCTP. Any live-out
+ // instruction needs to be predicated, so check this here. The instructions
+ // in NonPredicated have been found to be a reduction that we can ensure its
+ // legality.
+ for (auto *MI : LiveOutMIs)
+ if (!isVectorPredicated(MI) && !NonPredicated.count(MI))
+ return false;
+
+ return true;
+}
+
+void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils) {
if (Revert)
return;
@@ -434,7 +895,7 @@ void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils,
// TODO Maybe there's cases where the target doesn't have to be the header,
// but for now be safe and revert.
- if (End->getOperand(1).getMBB() != ML->getHeader()) {
+ if (End->getOperand(1).getMBB() != ML.getHeader()) {
LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
Revert = true;
return;
@@ -442,8 +903,8 @@ void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils,
// The WLS and LE instructions have 12-bits for the label offset. WLS
// requires a positive offset, while LE uses negative.
- if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) ||
- !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) {
+ if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||
+ !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {
LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
Revert = true;
return;
@@ -458,7 +919,7 @@ void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils,
return;
}
- InsertPt = Revert ? nullptr : IsSafeToDefineLR(RDA);
+ InsertPt = Revert ? nullptr : isSafeToDefineLR();
if (!InsertPt) {
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
Revert = true;
@@ -473,9 +934,9 @@ void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils,
return;
}
- assert(ML->getBlocks().size() == 1 &&
+ assert(ML.getBlocks().size() == 1 &&
"Shouldn't be processing a loop with more than one block");
- CannotTailPredicate = !ValidateTailPredicate(InsertPt, RDA, MLI);
+ CannotTailPredicate = !ValidateTailPredicate(InsertPt);
LLVM_DEBUG(if (CannotTailPredicate)
dbgs() << "ARM Loops: Couldn't validate tail predicate.\n");
}
@@ -484,29 +945,44 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
if (CannotTailPredicate)
return false;
- // Only support a single vctp.
- if (isVCTP(MI) && VCTP)
- return false;
+ if (isVCTP(MI)) {
+ // If we find another VCTP, check whether it uses the same value as the main VCTP.
+ // If it does, store it in the SecondaryVCTPs set, else refuse it.
+ if (VCTP) {
+ if (!VCTP->getOperand(1).isIdenticalTo(MI->getOperand(1)) ||
+ !RDA.hasSameReachingDef(VCTP, MI, MI->getOperand(1).getReg())) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching "
+ "definition from the main VCTP");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found secondary VCTP: " << *MI);
+ SecondaryVCTPs.insert(MI);
+ } else {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Found 'main' VCTP: " << *MI);
+ VCTP = MI;
+ }
+ } else if (isVPTOpcode(MI->getOpcode())) {
+ if (MI->getOpcode() != ARM::MVE_VPST) {
+ assert(MI->findRegisterDefOperandIdx(ARM::VPR) != -1 &&
+ "VPT does not implicitly define VPR?!");
+ CurrentPredicate.insert(MI);
+ }
- // Start a new vpt block when we discover a vpt.
- if (MI->getOpcode() == ARM::MVE_VPST) {
VPTBlocks.emplace_back(MI, CurrentPredicate);
CurrentBlock = &VPTBlocks.back();
return true;
- } else if (isVCTP(MI))
- VCTP = MI;
- else if (MI->getOpcode() == ARM::MVE_VPSEL ||
- MI->getOpcode() == ARM::MVE_VPNOT)
+ } else if (MI->getOpcode() == ARM::MVE_VPSEL ||
+ MI->getOpcode() == ARM::MVE_VPNOT) {
+ // TODO: Allow VPSEL and VPNOT, we currently cannot because:
+ // 1) It will use the VPR as a predicate operand, but doesn't have to be
+ // instead a VPT block, which means we can assert while building up
+ // the VPT block because we don't find another VPT or VPST to being a new
+ // one.
+ // 2) VPSEL still requires a VPR operand even after tail predicating,
+ // which means we can't remove it unless there is another
+ // instruction, such as vcmp, that can provide the VPR def.
return false;
-
- // TODO: Allow VPSEL and VPNOT, we currently cannot because:
- // 1) It will use the VPR as a predicate operand, but doesn't have to be
- // instead a VPT block, which means we can assert while building up
- // the VPT block because we don't find another VPST to being a new
- // one.
- // 2) VPSEL still requires a VPR operand even after tail predicating,
- // which means we can't remove it unless there is another
- // instruction, such as vcmp, that can provide the VPR def.
+ }
bool IsUse = false;
bool IsDef = false;
@@ -548,7 +1024,9 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
return false;
}
- return true;
+ // If the instruction is already explicitly predicated, then the conversion
+ // will be fine, but ensure that all memory operations are predicated.
+ return !IsUse && MI->mayLoadOrStore() ? false : true;
}
bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
@@ -591,6 +1069,8 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
dbgs() << " - " << Preheader->getName() << "\n";
else if (auto *Preheader = MLI->findLoopPreheader(ML))
dbgs() << " - " << Preheader->getName() << "\n";
+ else if (auto *Preheader = MLI->findLoopPreheader(ML, true))
+ dbgs() << " - " << Preheader->getName() << "\n";
for (auto *MBB : ML->getBlocks())
dbgs() << " - " << MBB->getName() << "\n";
);
@@ -608,14 +1088,12 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
return nullptr;
};
- LowOverheadLoop LoLoop(ML);
+ LowOverheadLoop LoLoop(*ML, *MLI, *RDA, *TRI, *TII);
// Search the preheader for the start intrinsic.
// FIXME: I don't see why we shouldn't be supporting multiple predecessors
// with potentially multiple set.loop.iterations, so we need to enable this.
- if (auto *Preheader = ML->getLoopPreheader())
- LoLoop.Start = SearchForStart(Preheader);
- else if (auto *Preheader = MLI->findLoopPreheader(ML, true))
- LoLoop.Start = SearchForStart(Preheader);
+ if (LoLoop.Preheader)
+ LoLoop.Start = SearchForStart(LoLoop.Preheader);
else
return false;
@@ -624,7 +1102,9 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
// whether we can convert that predicate using tail predication.
for (auto *MBB : reverse(ML->getBlocks())) {
for (auto &MI : *MBB) {
- if (MI.getOpcode() == ARM::t2LoopDec)
+ if (MI.isDebugValue())
+ continue;
+ else if (MI.getOpcode() == ARM::t2LoopDec)
LoLoop.Dec = &MI;
else if (MI.getOpcode() == ARM::t2LoopEnd)
LoLoop.End = &MI;
@@ -641,28 +1121,6 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
// Check we know how to tail predicate any mve instructions.
LoLoop.AnalyseMVEInst(&MI);
}
-
- // We need to ensure that LR is not used or defined inbetween LoopDec and
- // LoopEnd.
- if (!LoLoop.Dec || LoLoop.End || LoLoop.Revert)
- continue;
-
- // If we find that LR has been written or read between LoopDec and
- // LoopEnd, expect that the decremented value is being used else where.
- // Because this value isn't actually going to be produced until the
- // latch, by LE, we would need to generate a real sub. The value is also
- // likely to be copied/reloaded for use of LoopEnd - in which in case
- // we'd need to perform an add because it gets subtracted again by LE!
- // The other option is to then generate the other form of LE which doesn't
- // perform the sub.
- for (auto &MO : MI.operands()) {
- if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() &&
- MO.getReg() == ARM::LR) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI);
- LoLoop.Revert = true;
- break;
- }
- }
}
}
@@ -672,7 +1130,15 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
return false;
}
- LoLoop.CheckLegality(BBUtils.get(), RDA, MLI);
+ // Check that the only instruction using LoopDec is LoopEnd.
+ // TODO: Check for copy chains that really have no effect.
+ SmallPtrSet<MachineInstr*, 2> Uses;
+ RDA->getReachingLocalUses(LoLoop.Dec, ARM::LR, Uses);
+ if (Uses.size() > 1 || !Uses.count(LoLoop.End)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n");
+ LoLoop.Revert = true;
+ }
+ LoLoop.CheckLegality(BBUtils.get());
Expand(LoLoop);
return true;
}
@@ -702,16 +1168,19 @@ void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
MI->eraseFromParent();
}
-bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI,
- bool SetFlags) const {
+bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
+ SmallPtrSet<MachineInstr*, 1> Ignore;
+ for (auto I = MachineBasicBlock::iterator(MI), E = MBB->end(); I != E; ++I) {
+ if (I->getOpcode() == ARM::t2LoopEnd) {
+ Ignore.insert(&*I);
+ break;
+ }
+ }
// If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
- if (SetFlags &&
- (RDA->isRegUsedAfter(MI, ARM::CPSR) ||
- !RDA->hasSameReachingDef(MI, &MBB->back(), ARM::CPSR)))
- SetFlags = false;
+ bool SetFlags = RDA->isSafeToDefRegAt(MI, ARM::CPSR, Ignore);
MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
TII->get(ARM::t2SUBri));
@@ -759,7 +1228,102 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
MI->eraseFromParent();
}
+// Perform dead code elimation on the loop iteration count setup expression.
+// If we are tail-predicating, the number of elements to be processed is the
+// operand of the VCTP instruction in the vector body, see getCount(), which is
+// register $r3 in this example:
+//
+// $lr = big-itercount-expression
+// ..
+// t2DoLoopStart renamable $lr
+// vector.body:
+// ..
+// $vpr = MVE_VCTP32 renamable $r3
+// renamable $lr = t2LoopDec killed renamable $lr, 1
+// t2LoopEnd renamable $lr, %vector.body
+// tB %end
+//
+// What we would like achieve here is to replace the do-loop start pseudo
+// instruction t2DoLoopStart with:
+//
+// $lr = MVE_DLSTP_32 killed renamable $r3
+//
+// Thus, $r3 which defines the number of elements, is written to $lr,
+// and then we want to delete the whole chain that used to define $lr,
+// see the comment below how this chain could look like.
+//
+void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
+ if (!LoLoop.IsTailPredicationLegal())
+ return;
+
+ LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n");
+
+ MachineInstr *Def = RDA->getMIOperand(LoLoop.Start, 0);
+ if (!Def) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n");
+ return;
+ }
+
+ // Collect and remove the users of iteration count.
+ SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec,
+ LoLoop.End, LoLoop.InsertPt };
+ SmallPtrSet<MachineInstr*, 2> Remove;
+ if (RDA->isSafeToRemove(Def, Remove, Killed))
+ LoLoop.ToRemove.insert(Remove.begin(), Remove.end());
+ else {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n");
+ return;
+ }
+
+ // Collect the dead code and the MBBs in which they reside.
+ RDA->collectKilledOperands(Def, Killed);
+ SmallPtrSet<MachineBasicBlock*, 2> BasicBlocks;
+ for (auto *MI : Killed)
+ BasicBlocks.insert(MI->getParent());
+
+ // Collect IT blocks in all affected basic blocks.
+ std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
+ for (auto *MBB : BasicBlocks) {
+ for (auto &MI : *MBB) {
+ if (MI.getOpcode() != ARM::t2IT)
+ continue;
+ RDA->getReachingLocalUses(&MI, ARM::ITSTATE, ITBlocks[&MI]);
+ }
+ }
+
+ // If we're removing all of the instructions within an IT block, then
+ // also remove the IT instruction.
+ SmallPtrSet<MachineInstr*, 2> ModifiedITs;
+ for (auto *MI : Killed) {
+ if (MachineOperand *MO = MI->findRegisterUseOperand(ARM::ITSTATE)) {
+ MachineInstr *IT = RDA->getMIOperand(MI, *MO);
+ auto &CurrentBlock = ITBlocks[IT];
+ CurrentBlock.erase(MI);
+ if (CurrentBlock.empty())
+ ModifiedITs.erase(IT);
+ else
+ ModifiedITs.insert(IT);
+ }
+ }
+
+ // Delete the killed instructions only if we don't have any IT blocks that
+ // need to be modified because we need to fixup the mask.
+ // TODO: Handle cases where IT blocks are modified.
+ if (ModifiedITs.empty()) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Will remove iteration count:\n";
+ for (auto *MI : Killed)
+ dbgs() << " - " << *MI);
+ LoLoop.ToRemove.insert(Killed.begin(), Killed.end());
+ } else
+ LLVM_DEBUG(dbgs() << "ARM Loops: Would need to modify IT block(s).\n");
+}
+
MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Expanding LoopStart.\n");
+ // When using tail-predication, try to delete the dead code that was used to
+ // calculate the number of loop iterations.
+ IterationCountDCE(LoLoop);
+
MachineInstr *InsertPt = LoLoop.InsertPt;
MachineInstr *Start = LoLoop.Start;
MachineBasicBlock *MBB = InsertPt->getParent();
@@ -775,109 +1339,67 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
if (!IsDo)
MIB.add(Start->getOperand(1));
- // When using tail-predication, try to delete the dead code that was used to
- // calculate the number of loop iterations.
- if (LoLoop.IsTailPredicationLegal()) {
- SmallVector<MachineInstr*, 4> Killed;
- SmallVector<MachineInstr*, 4> Dead;
- if (auto *Def = RDA->getReachingMIDef(Start,
- Start->getOperand(0).getReg())) {
- Killed.push_back(Def);
-
- while (!Killed.empty()) {
- MachineInstr *Def = Killed.back();
- Killed.pop_back();
- Dead.push_back(Def);
- for (auto &MO : Def->operands()) {
- if (!MO.isReg() || !MO.isKill())
- continue;
-
- MachineInstr *Kill = RDA->getReachingMIDef(Def, MO.getReg());
- if (Kill && RDA->getNumUses(Kill, MO.getReg()) == 1)
- Killed.push_back(Kill);
- }
- }
- for (auto *MI : Dead)
- MI->eraseFromParent();
- }
- }
-
// If we're inserting at a mov lr, then remove it as it's redundant.
if (InsertPt != Start)
- InsertPt->eraseFromParent();
- Start->eraseFromParent();
+ LoLoop.ToRemove.insert(InsertPt);
+ LoLoop.ToRemove.insert(Start);
LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
return &*MIB;
}
-// Goal is to optimise and clean-up these loops:
-//
-// vector.body:
-// renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
-// renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3(tied-def 0), 4
-// ..
-// $lr = MVE_DLSTP_32 renamable $r3
-//
-// The SUB is the old update of the loop iteration count expression, which
-// is no longer needed. This sub is removed when the element count, which is in
-// r3 in this example, is defined by an instruction in the loop, and it has
-// no uses.
-//
-void ARMLowOverheadLoops::RemoveLoopUpdate(LowOverheadLoop &LoLoop) {
- Register ElemCount = LoLoop.VCTP->getOperand(1).getReg();
- MachineInstr *LastInstrInBlock = &LoLoop.VCTP->getParent()->back();
-
- LLVM_DEBUG(dbgs() << "ARM Loops: Trying to remove loop update stmt\n");
-
- if (LoLoop.ML->getNumBlocks() != 1) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Single block loop expected\n");
- return;
- }
-
- LLVM_DEBUG(dbgs() << "ARM Loops: Analyzing elemcount in operand: ";
- LoLoop.VCTP->getOperand(1).dump());
-
- // Find the definition we are interested in removing, if there is one.
- MachineInstr *Def = RDA->getReachingMIDef(LastInstrInBlock, ElemCount);
- if (!Def) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Can't find a def, nothing to do.\n");
- return;
- }
-
- // Bail if we define CPSR and it is not dead
- if (!Def->registerDefIsDead(ARM::CPSR, TRI)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: CPSR is not dead\n");
- return;
- }
-
- // Bail if elemcount is used in exit blocks, i.e. if it is live-in.
- if (isRegLiveInExitBlocks(LoLoop.ML, ElemCount)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Elemcount is live-out, can't remove stmt\n");
- return;
- }
+void ARMLowOverheadLoops::FixupReductions(LowOverheadLoop &LoLoop) const {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Fixing up reduction(s).\n");
+ auto BuildMov = [this](MachineInstr &InsertPt, Register To, Register From) {
+ MachineBasicBlock *MBB = InsertPt.getParent();
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, &InsertPt, InsertPt.getDebugLoc(), TII->get(ARM::MVE_VORR));
+ MIB.addDef(To);
+ MIB.addReg(From);
+ MIB.addReg(From);
+ MIB.addImm(0);
+ MIB.addReg(0);
+ MIB.addReg(To);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Inserted VMOV: " << *MIB);
+ };
- // Bail if there are uses after this Def in the block.
- SmallVector<MachineInstr*, 4> Uses;
- RDA->getReachingLocalUses(Def, ElemCount, Uses);
- if (Uses.size()) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Local uses in block, can't remove stmt\n");
- return;
- }
+ for (auto &Reduction : LoLoop.Reductions) {
+ MachineInstr &Copy = Reduction->Copy;
+ MachineInstr &Reduce = Reduction->Reduce;
+ Register DestReg = Copy.getOperand(0).getReg();
- Uses.clear();
- RDA->getAllInstWithUseBefore(Def, ElemCount, Uses);
+ // Change the initialiser if present
+ if (Reduction->Init) {
+ MachineInstr *Init = Reduction->Init;
- // Remove Def if there are no uses, or if the only use is the VCTP
- // instruction.
- if (!Uses.size() || (Uses.size() == 1 && Uses[0] == LoLoop.VCTP)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing loop update instruction: ";
- Def->dump());
- Def->eraseFromParent();
- return;
+ for (unsigned i = 0; i < Init->getNumOperands(); ++i) {
+ MachineOperand &MO = Init->getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.isTied() &&
+ Init->findTiedOperandIdx(i) == 0)
+ Init->getOperand(i).setReg(DestReg);
+ }
+ Init->getOperand(0).setReg(DestReg);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Changed init regs: " << *Init);
+ } else
+ BuildMov(LoLoop.Preheader->instr_back(), DestReg, Copy.getOperand(1).getReg());
+
+ // Change the reducing op to write to the register that is used to copy
+ // its value on the next iteration. Also update the tied-def operand.
+ Reduce.getOperand(0).setReg(DestReg);
+ Reduce.getOperand(5).setReg(DestReg);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Changed reduction regs: " << Reduce);
+
+ // Instead of a vpsel, just copy the register into the necessary one.
+ MachineInstr &VPSEL = Reduction->VPSEL;
+ if (VPSEL.getOperand(0).getReg() != DestReg)
+ BuildMov(VPSEL, VPSEL.getOperand(0).getReg(), DestReg);
+
+ // Remove the unnecessary instructions.
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing:\n"
+ << " - " << Copy
+ << " - " << VPSEL << "\n");
+ Copy.eraseFromParent();
+ VPSEL.eraseFromParent();
}
-
- LLVM_DEBUG(dbgs() << "ARM Loops: Can't remove loop update, it's used by:\n";
- for (auto U : Uses) U->dump());
}
void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
@@ -893,28 +1415,24 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
};
// There are a few scenarios which we have to fix up:
- // 1) A VPT block with is only predicated by the vctp and has no internal vpr
- // defs.
- // 2) A VPT block which is only predicated by the vctp but has an internal
- // vpr def.
- // 3) A VPT block which is predicated upon the vctp as well as another vpr
- // def.
- // 4) A VPT block which is not predicated upon a vctp, but contains it and
- // all instructions within the block are predicated upon in.
-
+ // 1. VPT Blocks with non-uniform predicates:
+ // - a. When the divergent instruction is a vctp
+ // - b. When the block uses a vpst, and is only predicated on the vctp
+ // - c. When the block uses a vpt and (optionally) contains one or more
+ // vctp.
+ // 2. VPT Blocks with uniform predicates:
+ // - a. The block uses a vpst, and is only predicated on the vctp
for (auto &Block : LoLoop.getVPTBlocks()) {
SmallVectorImpl<PredicatedMI> &Insts = Block.getInsts();
if (Block.HasNonUniformPredicate()) {
PredicatedMI *Divergent = Block.getDivergent();
if (isVCTP(Divergent->MI)) {
- // The vctp will be removed, so the size of the vpt block needs to be
- // modified.
- uint64_t Size = getARMVPTBlockMask(Block.size() - 1);
- Block.getVPST()->getOperand(0).setImm(Size);
- LLVM_DEBUG(dbgs() << "ARM Loops: Modified VPT block mask.\n");
- } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP)) {
- // The VPT block has a non-uniform predicate but it's entry is guarded
- // only by a vctp, which means we:
+ // The vctp will be removed, so the block mask of the vp(s)t will need
+ // to be recomputed.
+ LoLoop.BlockMasksToRecompute.insert(Block.getPredicateThen());
+ } else if (Block.isVPST() && Block.IsOnlyPredicatedOn(LoLoop.VCTP)) {
+ // The VPT block has a non-uniform predicate but it uses a vpst and its
+ // entry is guarded only by a vctp, which means we:
// - Need to remove the original vpst.
// - Then need to unpredicate any following instructions, until
// we come across the divergent vpr def.
@@ -922,7 +1440,7 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
// the divergent vpr def.
// TODO: We could be producing more VPT blocks than necessary and could
// fold the newly created one into a proceeding one.
- for (auto I = ++MachineBasicBlock::iterator(Block.getVPST()),
+ for (auto I = ++MachineBasicBlock::iterator(Block.getPredicateThen()),
E = ++MachineBasicBlock::iterator(Divergent->MI); I != E; ++I)
RemovePredicate(&*I);
@@ -935,28 +1453,58 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
++Size;
++I;
}
+ // Create a VPST (with a null mask for now, we'll recompute it later).
MachineInstrBuilder MIB = BuildMI(*InsertAt->getParent(), InsertAt,
InsertAt->getDebugLoc(),
TII->get(ARM::MVE_VPST));
- MIB.addImm(getARMVPTBlockMask(Size));
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST());
+ MIB.addImm(0);
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getPredicateThen());
LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB);
- Block.getVPST()->eraseFromParent();
+ LoLoop.ToRemove.insert(Block.getPredicateThen());
+ LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
+ }
+ // Else, if the block uses a vpt, iterate over the block, removing the
+ // extra VCTPs it may contain.
+ else if (Block.isVPT()) {
+ bool RemovedVCTP = false;
+ for (PredicatedMI &Elt : Block.getInsts()) {
+ MachineInstr *MI = Elt.MI;
+ if (isVCTP(MI)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VCTP: " << *MI);
+ LoLoop.ToRemove.insert(MI);
+ RemovedVCTP = true;
+ continue;
+ }
+ }
+ if (RemovedVCTP)
+ LoLoop.BlockMasksToRecompute.insert(Block.getPredicateThen());
}
- } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP)) {
- // A vpt block which is only predicated upon vctp and has no internal vpr
- // defs:
+ } else if (Block.IsOnlyPredicatedOn(LoLoop.VCTP) && Block.isVPST()) {
+ // A vpt block starting with VPST, is only predicated upon vctp and has no
+ // internal vpr defs:
// - Remove vpst.
// - Unpredicate the remaining instructions.
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getVPST());
- Block.getVPST()->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *Block.getPredicateThen());
+ LoLoop.ToRemove.insert(Block.getPredicateThen());
for (auto &PredMI : Insts)
RemovePredicate(PredMI.MI);
}
}
-
- LLVM_DEBUG(dbgs() << "ARM Loops: Removing VCTP: " << *LoLoop.VCTP);
- LoLoop.VCTP->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing remaining VCTPs...\n");
+ // Remove the "main" VCTP
+ LoLoop.ToRemove.insert(LoLoop.VCTP);
+ LLVM_DEBUG(dbgs() << " " << *LoLoop.VCTP);
+ // Remove remaining secondary VCTPs
+ for (MachineInstr *VCTP : LoLoop.SecondaryVCTPs) {
+ // All VCTPs that aren't marked for removal yet should be unpredicated ones.
+ // The predicated ones should have already been marked for removal when
+ // visiting the VPT blocks.
+ if (LoLoop.ToRemove.insert(VCTP).second) {
+ assert(getVPTInstrPredicate(*VCTP) == ARMVCC::None &&
+ "Removing Predicated VCTP without updating the block mask!");
+ LLVM_DEBUG(dbgs() << " " << *VCTP);
+ }
+ }
}
void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
@@ -973,9 +1521,8 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
MIB.add(End->getOperand(0));
MIB.add(End->getOperand(1));
LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
-
- LoLoop.End->eraseFromParent();
- LoLoop.Dec->eraseFromParent();
+ LoLoop.ToRemove.insert(LoLoop.Dec);
+ LoLoop.ToRemove.insert(End);
return &*MIB;
};
@@ -1001,7 +1548,7 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
RevertWhile(LoLoop.Start);
else
LoLoop.Start->eraseFromParent();
- bool FlagsAlreadySet = RevertLoopDec(LoLoop.Dec, true);
+ bool FlagsAlreadySet = RevertLoopDec(LoLoop.Dec);
RevertLoopEnd(LoLoop.End, FlagsAlreadySet);
} else {
LoLoop.Start = ExpandLoopStart(LoLoop);
@@ -1009,10 +1556,35 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
LoLoop.End = ExpandLoopEnd(LoLoop);
RemoveDeadBranch(LoLoop.End);
if (LoLoop.IsTailPredicationLegal()) {
- RemoveLoopUpdate(LoLoop);
ConvertVPTBlocks(LoLoop);
+ FixupReductions(LoLoop);
+ }
+ for (auto *I : LoLoop.ToRemove) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Erasing " << *I);
+ I->eraseFromParent();
+ }
+ for (auto *I : LoLoop.BlockMasksToRecompute) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Recomputing VPT/VPST Block Mask: " << *I);
+ recomputeVPTBlockMask(*I);
+ LLVM_DEBUG(dbgs() << " ... done: " << *I);
}
}
+
+ PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
+ DFS.ProcessLoop();
+ const SmallVectorImpl<MachineBasicBlock*> &PostOrder = DFS.getOrder();
+ for (auto *MBB : PostOrder) {
+ recomputeLiveIns(*MBB);
+ // FIXME: For some reason, the live-in print order is non-deterministic for
+ // our tests and I can't out why... So just sort them.
+ MBB->sortUniqueLiveIns();
+ }
+
+ for (auto *MBB : reverse(PostOrder))
+ recomputeLivenessFlags(*MBB);
+
+ // We've moved, removed and inserted new instructions, so update RDA.
+ RDA->reset();
}
bool ARMLowOverheadLoops::RevertNonLoops() {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index 8e01b998d900..f893faa4cf97 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -194,9 +194,9 @@ void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
// BLX ip
// POP{ r0, lr }
//
- OutStreamer->EmitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4);
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->EmitLabel(CurSled);
+ OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
// Emit "B #20" instruction, which jumps over the next 24 bytes (because
@@ -209,8 +209,8 @@ void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
emitNops(NoopsInSledCount);
- OutStreamer->EmitLabel(Target);
- recordSled(CurSled, MI, Kind);
+ OutStreamer->emitLabel(Target);
+ recordSled(CurSled, MI, Kind, 2);
}
void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 3b676ca4c883..507c3e69b3a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -15,4 +15,6 @@ void ARMFunctionInfo::anchor() {}
ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
: isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
- hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()) {}
+ hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
+ IsCmseNSEntry(MF.getFunction().hasFnAttribute("cmse_nonsecure_entry")),
+ IsCmseNSCall(MF.getFunction().hasFnAttribute("cmse_nonsecure_call")) {}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index bb136e92329b..298c8a238987 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -58,10 +58,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// emitPrologue.
bool RestoreSPFromFP = false;
- /// LRSpilledForFarJump - True if the LR register has been for spilled to
- /// enable far jump.
- bool LRSpilledForFarJump = false;
-
/// LRSpilled - True if the LR register has been for spilled for
/// any reason, so it's legal to emit an ARM::tBfar (i.e. "bl").
bool LRSpilled = false;
@@ -87,6 +83,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
/// areas.
+ unsigned FPCXTSaveSize = 0;
unsigned GPRCS1Size = 0;
unsigned GPRCS2Size = 0;
unsigned DPRCSAlignGapSize = 0;
@@ -109,6 +106,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// HasITBlocks - True if IT blocks have been inserted.
bool HasITBlocks = false;
+ // Security Extensions
+ bool IsCmseNSEntry;
+ bool IsCmseNSCall;
+
/// CPEClones - Track constant pool entries clones created by Constant Island
/// pass.
DenseMap<unsigned, unsigned> CPEClones;
@@ -144,6 +145,9 @@ public:
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
bool isThumb2Function() const { return isThumb && hasThumb2; }
+ bool isCmseNSEntryFunction() const { return IsCmseNSEntry; }
+ bool isCmseNSCallFunction() const { return IsCmseNSCall; }
+
unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
@@ -162,9 +166,6 @@ public:
bool isLRSpilled() const { return LRSpilled; }
void setLRIsSpilled(bool s) { LRSpilled = s; }
- bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
- void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
-
unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
@@ -179,11 +180,13 @@ public:
void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
+ unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; }
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; }
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
+ void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; }
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; }
@@ -252,6 +255,7 @@ public:
}
DenseMap<unsigned, unsigned> EHPrologueRemappedRegs;
+ DenseMap<unsigned, unsigned> EHPrologueOffsetInRegs;
void setPreservesR0() { PreservesR0 = true; }
bool getPreservesR0() const { return PreservesR0; }
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index e2c9335db419..e750649ce86c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -19,8 +19,9 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
-#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsARM.h"
@@ -28,7 +29,6 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
-#include "llvm/PassSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -352,7 +352,6 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
SmallVector<Instruction*, 8> Writes;
LoadPairs.clear();
WideLoads.clear();
- OrderedBasicBlock OrderedBB(BB);
// Collect loads and instruction that may write to memory. For now we only
// record loads which are simple, sign-extended and have a single user.
@@ -384,7 +383,7 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
if (!isModOrRefSet(intersectModRef(AA->getModRefInfo(Write, ReadLoc),
ModRefInfo::ModRef)))
continue;
- if (OrderedBB.dominates(Write, Read))
+ if (Write->comesBefore(Read))
RAWDeps[Read].insert(Write);
}
}
@@ -392,8 +391,9 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
// Check whether there's not a write between the two loads which would
// prevent them from being safely merged.
auto SafeToPair = [&](LoadInst *Base, LoadInst *Offset) {
- LoadInst *Dominator = OrderedBB.dominates(Base, Offset) ? Base : Offset;
- LoadInst *Dominated = OrderedBB.dominates(Base, Offset) ? Offset : Base;
+ bool BaseFirst = Base->comesBefore(Offset);
+ LoadInst *Dominator = BaseFirst ? Base : Offset;
+ LoadInst *Dominated = BaseFirst ? Offset : Base;
if (RAWDeps.count(Dominated)) {
InstSet &WritesBefore = RAWDeps[Dominated];
@@ -401,7 +401,7 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
for (auto Before : WritesBefore) {
// We can't move the second load backward, past a write, to merge
// with the first load.
- if (OrderedBB.dominates(Dominator, Before))
+ if (Dominator->comesBefore(Before))
return false;
}
}
@@ -571,6 +571,10 @@ bool ARMParallelDSP::CreateParallelPairs(Reduction &R) {
auto Ld2 = static_cast<LoadInst*>(PMul0->RHS);
auto Ld3 = static_cast<LoadInst*>(PMul1->RHS);
+ // Check that each mul is operating on two different loads.
+ if (Ld0 == Ld2 || Ld1 == Ld3)
+ return false;
+
if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
@@ -705,12 +709,11 @@ void ARMParallelDSP::InsertParallelMACs(Reduction &R) {
}
// Roughly sort the mul pairs in their program order.
- OrderedBasicBlock OrderedBB(R.getRoot()->getParent());
- llvm::sort(R.getMulPairs(), [&OrderedBB](auto &PairA, auto &PairB) {
- const Instruction *A = PairA.first->Root;
- const Instruction *B = PairB.first->Root;
- return OrderedBB.dominates(A, B);
- });
+ llvm::sort(R.getMulPairs(), [](auto &PairA, auto &PairB) {
+ const Instruction *A = PairA.first->Root;
+ const Instruction *B = PairB.first->Root;
+ return A->comesBefore(B);
+ });
IntegerType *Ty = IntegerType::get(M->getContext(), 32);
for (auto &Pair : R.getMulPairs()) {
@@ -772,8 +775,7 @@ LoadInst* ARMParallelDSP::CreateWideLoad(MemInstList &Loads,
const unsigned AddrSpace = DomLoad->getPointerAddressSpace();
Value *VecPtr = IRB.CreateBitCast(Base->getPointerOperand(),
LoadTy->getPointerTo(AddrSpace));
- LoadInst *WideLoad = IRB.CreateAlignedLoad(LoadTy, VecPtr,
- Base->getAlignment());
+ LoadInst *WideLoad = IRB.CreateAlignedLoad(LoadTy, VecPtr, Base->getAlign());
// Make sure everything is in the correct order in the basic block.
MoveBefore(Base->getPointerOperand(), VecPtr);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
index dea1d767beb4..1ae71be9f760 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
@@ -7,148 +7,160 @@
//===----------------------------------------------------------------------===//
def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
- AssemblerPredicate<"HasV4TOps", "armv4t">;
+ AssemblerPredicate<(all_of HasV4TOps), "armv4t">;
def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
def HasV5T : Predicate<"Subtarget->hasV5TOps()">,
- AssemblerPredicate<"HasV5TOps", "armv5t">;
+ AssemblerPredicate<(all_of HasV5TOps), "armv5t">;
def NoV5T : Predicate<"!Subtarget->hasV5TOps()">;
def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
- AssemblerPredicate<"HasV5TEOps", "armv5te">;
+ AssemblerPredicate<(all_of HasV5TEOps), "armv5te">;
def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
- AssemblerPredicate<"HasV6Ops", "armv6">;
+ AssemblerPredicate<(all_of HasV6Ops), "armv6">;
def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
def HasV6M : Predicate<"Subtarget->hasV6MOps()">,
- AssemblerPredicate<"HasV6MOps",
+ AssemblerPredicate<(all_of HasV6MOps),
"armv6m or armv6t2">;
def HasV8MBaseline : Predicate<"Subtarget->hasV8MBaselineOps()">,
- AssemblerPredicate<"HasV8MBaselineOps",
+ AssemblerPredicate<(all_of HasV8MBaselineOps),
"armv8m.base">;
def HasV8MMainline : Predicate<"Subtarget->hasV8MMainlineOps()">,
- AssemblerPredicate<"HasV8MMainlineOps",
+ AssemblerPredicate<(all_of HasV8MMainlineOps),
"armv8m.main">;
def HasV8_1MMainline : Predicate<"Subtarget->hasV8_1MMainlineOps()">,
- AssemblerPredicate<"HasV8_1MMainlineOps",
+ AssemblerPredicate<(all_of HasV8_1MMainlineOps),
"armv8.1m.main">;
def HasMVEInt : Predicate<"Subtarget->hasMVEIntegerOps()">,
- AssemblerPredicate<"HasMVEIntegerOps",
+ AssemblerPredicate<(all_of HasMVEIntegerOps),
"mve">;
def HasMVEFloat : Predicate<"Subtarget->hasMVEFloatOps()">,
- AssemblerPredicate<"HasMVEFloatOps",
+ AssemblerPredicate<(all_of HasMVEFloatOps),
"mve.fp">;
+def HasCDE : Predicate<"Subtarget->hasCDEOps()">,
+ AssemblerPredicate<(all_of HasCDEOps),
+ "cde">;
def HasFPRegs : Predicate<"Subtarget->hasFPRegs()">,
- AssemblerPredicate<"FeatureFPRegs",
+ AssemblerPredicate<(all_of FeatureFPRegs),
"fp registers">;
def HasFPRegs16 : Predicate<"Subtarget->hasFPRegs16()">,
- AssemblerPredicate<"FeatureFPRegs16",
+ AssemblerPredicate<(all_of FeatureFPRegs16),
+ "16-bit fp registers">;
+def HasNoFPRegs16 : Predicate<"!Subtarget->hasFPRegs16()">,
+ AssemblerPredicate<(all_of (not FeatureFPRegs16)),
"16-bit fp registers">;
def HasFPRegs64 : Predicate<"Subtarget->hasFPRegs64()">,
- AssemblerPredicate<"FeatureFPRegs64",
+ AssemblerPredicate<(all_of FeatureFPRegs64),
"64-bit fp registers">;
def HasFPRegsV8_1M : Predicate<"Subtarget->hasFPRegs() && Subtarget->hasV8_1MMainlineOps()">,
- AssemblerPredicate<"FeatureFPRegs,HasV8_1MMainlineOps",
+ AssemblerPredicate<(all_of FeatureFPRegs, HasV8_1MMainlineOps),
"armv8.1m.main with FP or MVE">;
def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
- AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
+ AssemblerPredicate<(all_of HasV6T2Ops), "armv6t2">;
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
def HasV6K : Predicate<"Subtarget->hasV6KOps()">,
- AssemblerPredicate<"HasV6KOps", "armv6k">;
+ AssemblerPredicate<(all_of HasV6KOps), "armv6k">;
def NoV6K : Predicate<"!Subtarget->hasV6KOps()">;
def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
- AssemblerPredicate<"HasV7Ops", "armv7">;
+ AssemblerPredicate<(all_of HasV7Ops), "armv7">;
def HasV8 : Predicate<"Subtarget->hasV8Ops()">,
- AssemblerPredicate<"HasV8Ops", "armv8">;
+ AssemblerPredicate<(all_of HasV8Ops), "armv8">;
def PreV8 : Predicate<"!Subtarget->hasV8Ops()">,
- AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
+ AssemblerPredicate<(all_of (not HasV8Ops)), "armv7 or earlier">;
def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
- AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+ AssemblerPredicate<(all_of HasV8_1aOps), "armv8.1a">;
def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
- AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
+ AssemblerPredicate<(all_of HasV8_2aOps), "armv8.2a">;
def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
- AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
+ AssemblerPredicate<(all_of HasV8_3aOps), "armv8.3a">;
def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
- AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+ AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
- AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
+ AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
+def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
+ AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">,
- AssemblerPredicate<"FeatureVFP2_SP", "VFP2">;
+ AssemblerPredicate<(all_of FeatureVFP2_SP), "VFP2">;
def HasVFP3 : Predicate<"Subtarget->hasVFP3Base()">,
- AssemblerPredicate<"FeatureVFP3_D16_SP", "VFP3">;
+ AssemblerPredicate<(all_of FeatureVFP3_D16_SP), "VFP3">;
def HasVFP4 : Predicate<"Subtarget->hasVFP4Base()">,
- AssemblerPredicate<"FeatureVFP4_D16_SP", "VFP4">;
+ AssemblerPredicate<(all_of FeatureVFP4_D16_SP), "VFP4">;
def HasDPVFP : Predicate<"Subtarget->hasFP64()">,
- AssemblerPredicate<"FeatureFP64",
+ AssemblerPredicate<(all_of FeatureFP64),
"double precision VFP">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8Base()">,
- AssemblerPredicate<"FeatureFPARMv8_D16_SP", "FPARMv8">;
+ AssemblerPredicate<(all_of FeatureFPARMv8_D16_SP), "FPARMv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
- AssemblerPredicate<"FeatureNEON", "NEON">;
+ AssemblerPredicate<(all_of FeatureNEON), "NEON">;
def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
- AssemblerPredicate<"FeatureSHA2", "sha2">;
+ AssemblerPredicate<(all_of FeatureSHA2), "sha2">;
def HasAES : Predicate<"Subtarget->hasAES()">,
- AssemblerPredicate<"FeatureAES", "aes">;
+ AssemblerPredicate<(all_of FeatureAES), "aes">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
- AssemblerPredicate<"FeatureCrypto", "crypto">;
+ AssemblerPredicate<(all_of FeatureCrypto), "crypto">;
def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
- AssemblerPredicate<"FeatureDotProd", "dotprod">;
+ AssemblerPredicate<(all_of FeatureDotProd), "dotprod">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
- AssemblerPredicate<"FeatureCRC", "crc">;
+ AssemblerPredicate<(all_of FeatureCRC), "crc">;
def HasRAS : Predicate<"Subtarget->hasRAS()">,
- AssemblerPredicate<"FeatureRAS", "ras">;
+ AssemblerPredicate<(all_of FeatureRAS), "ras">;
def HasLOB : Predicate<"Subtarget->hasLOB()">,
- AssemblerPredicate<"FeatureLOB", "lob">;
+ AssemblerPredicate<(all_of FeatureLOB), "lob">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
- AssemblerPredicate<"FeatureFP16","half-float conversions">;
+ AssemblerPredicate<(all_of FeatureFP16),"half-float conversions">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
- AssemblerPredicate<"FeatureFullFP16","full half-float">;
+ AssemblerPredicate<(all_of FeatureFullFP16),"full half-float">;
def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
- AssemblerPredicate<"FeatureFP16FML","full half-float fml">;
+ AssemblerPredicate<(all_of FeatureFP16FML),"full half-float fml">;
+def HasBF16 : Predicate<"Subtarget->hasBF16()">,
+ AssemblerPredicate<(all_of FeatureBF16),"BFloat16 floating point extension">;
+def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
+ AssemblerPredicate<(all_of FeatureMatMulInt8),"8-bit integer matrix multiply">;
def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
- AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
+ AssemblerPredicate<(all_of FeatureHWDivThumb), "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
- AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
+ AssemblerPredicate<(all_of FeatureHWDivARM), "divide in ARM">;
def HasDSP : Predicate<"Subtarget->hasDSP()">,
- AssemblerPredicate<"FeatureDSP", "dsp">;
+ AssemblerPredicate<(all_of FeatureDSP), "dsp">;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
- AssemblerPredicate<"FeatureDB",
+ AssemblerPredicate<(all_of FeatureDB),
"data-barriers">;
def HasDFB : Predicate<"Subtarget->hasFullDataBarrier()">,
- AssemblerPredicate<"FeatureDFB",
+ AssemblerPredicate<(all_of FeatureDFB),
"full-data-barrier">;
def HasV7Clrex : Predicate<"Subtarget->hasV7Clrex()">,
- AssemblerPredicate<"FeatureV7Clrex",
+ AssemblerPredicate<(all_of FeatureV7Clrex),
"v7 clrex">;
def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">,
- AssemblerPredicate<"FeatureAcquireRelease",
+ AssemblerPredicate<(all_of FeatureAcquireRelease),
"acquire/release">;
def HasMP : Predicate<"Subtarget->hasMPExtension()">,
- AssemblerPredicate<"FeatureMP",
+ AssemblerPredicate<(all_of FeatureMP),
"mp-extensions">;
def HasVirtualization: Predicate<"false">,
- AssemblerPredicate<"FeatureVirtualization",
+ AssemblerPredicate<(all_of FeatureVirtualization),
"virtualization-extensions">;
def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
- AssemblerPredicate<"FeatureTrustZone",
+ AssemblerPredicate<(all_of FeatureTrustZone),
"TrustZone">;
def Has8MSecExt : Predicate<"Subtarget->has8MSecExt()">,
- AssemblerPredicate<"Feature8MSecExt",
+ AssemblerPredicate<(all_of Feature8MSecExt),
"ARMv8-M Security Extensions">;
def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">,
- AssemblerPredicate<"ModeThumb", "thumb">;
+ AssemblerPredicate<(all_of ModeThumb), "thumb">;
def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
- AssemblerPredicate<"ModeThumb,FeatureThumb2",
+ AssemblerPredicate<(all_of ModeThumb, FeatureThumb2),
"thumb2">;
def IsMClass : Predicate<"Subtarget->isMClass()">,
- AssemblerPredicate<"FeatureMClass", "armv*m">;
+ AssemblerPredicate<(all_of FeatureMClass), "armv*m">;
def IsNotMClass : Predicate<"!Subtarget->isMClass()">,
- AssemblerPredicate<"!FeatureMClass",
+ AssemblerPredicate<(all_of (not FeatureMClass)),
"!armv*m">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
- AssemblerPredicate<"!ModeThumb", "arm-mode">;
+ AssemblerPredicate<(all_of (not ModeThumb)), "arm-mode">;
def IsMachO : Predicate<"Subtarget->isTargetMachO()">;
def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
@@ -157,12 +169,12 @@ def IsNotWindows : Predicate<"!Subtarget->isTargetWindows()">;
def IsReadTPHard : Predicate<"Subtarget->isReadTPHard()">;
def IsReadTPSoft : Predicate<"!Subtarget->isReadTPHard()">;
def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
- AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
+ AssemblerPredicate<(all_of FeatureNaClTrap), "NaCl">;
def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
def UseNegativeImmediates :
Predicate<"false">,
- AssemblerPredicate<"!FeatureNoNegativeImmediates",
+ AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
"NegativeImmediates">;
// FIXME: Eventually this will be just "hasV6T2Ops".
@@ -206,4 +218,4 @@ def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
// Armv8.5-A extensions
def HasSB : Predicate<"Subtarget->hasSB()">,
- AssemblerPredicate<"FeatureSB", "sb">;
+ AssemblerPredicate<(all_of FeatureSB), "sb">;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 43c8cd5a89be..f9dbfef4c113 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -131,45 +131,47 @@ static void checkValueMappings() {
ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
: ARMGenRegisterBankInfo() {
- static bool AlreadyInit = false;
// We have only one set of register banks, whatever the subtarget
// is. Therefore, the initialization of the RegBanks table should be
// done only once. Indeed the table of all register banks
// (ARM::RegBanks) is unique in the compiler. At some point, it
// will get tablegen'ed and the whole constructor becomes empty.
- if (AlreadyInit)
- return;
- AlreadyInit = true;
+ static llvm::once_flag InitializeRegisterBankFlag;
- const RegisterBank &RBGPR = getRegBank(ARM::GPRRegBankID);
- (void)RBGPR;
- assert(&ARM::GPRRegBank == &RBGPR && "The order in RegBanks is messed up");
+ static auto InitializeRegisterBankOnce = [&]() {
+ const RegisterBank &RBGPR = getRegBank(ARM::GPRRegBankID);
+ (void)RBGPR;
+ assert(&ARM::GPRRegBank == &RBGPR && "The order in RegBanks is messed up");
- // Initialize the GPR bank.
- assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRRegClassID)) &&
- "Subclass not added?");
- assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRwithAPSRRegClassID)) &&
- "Subclass not added?");
- assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRnopcRegClassID)) &&
- "Subclass not added?");
- assert(RBGPR.covers(*TRI.getRegClass(ARM::rGPRRegClassID)) &&
- "Subclass not added?");
- assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPRRegClassID)) &&
- "Subclass not added?");
- assert(RBGPR.covers(*TRI.getRegClass(ARM::tcGPRRegClassID)) &&
- "Subclass not added?");
- assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) &&
- "Subclass not added?");
- assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPREven_and_tGPR_and_tcGPRRegClassID)) &&
- "Subclass not added?");
- assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPROdd_and_tcGPRRegClassID)) &&
- "Subclass not added?");
- assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit");
+ // Initialize the GPR bank.
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRwithAPSRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRnopcRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::rGPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::tcGPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(
+ *TRI.getRegClass(ARM::tGPREven_and_tGPR_and_tcGPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPROdd_and_tcGPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit");
#ifndef NDEBUG
- ARM::checkPartialMappings();
- ARM::checkValueMappings();
+ ARM::checkPartialMappings();
+ ARM::checkValueMappings();
#endif
+ };
+
+ llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
}
const RegisterBank &
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 56055a15483a..a384b0dc757c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -305,6 +305,17 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
let DiagnosticType = "rGPR";
}
+// GPRs without the PC and SP but with APSR_NZCV.Some instructions allow
+// accessing the APSR_NZCV, while actually encoding PC in the register field.
+// This is useful for assembly and disassembly only.
+// Currently used by the CDE extension.
+def GPRwithAPSR_NZCVnosp
+ : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), LR, APSR_NZCV)> {
+ let isAllocatable = 0;
+ let DiagnosticString =
+ "operand must be a register in the range [r0, r12], r14 or apsr_nzcv";
+}
+
// Thumb registers are R0-R7 normally. Some instructions can still use
// the general GPR register class above (MOV, e.g.)
def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)> {
@@ -379,7 +390,7 @@ def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> {
let DiagnosticString = "operand must be a register in range [s0, s31]";
}
-def HPR : RegisterClass<"ARM", [f16], 32, (sequence "S%u", 0, 31)> {
+def HPR : RegisterClass<"ARM", [f16, bf16], 32, (sequence "S%u", 0, 31)> {
let AltOrders = [(add (decimate HPR, 2), SPR),
(add (decimate HPR, 4),
(decimate HPR, 2),
@@ -401,7 +412,7 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)> {
// class.
// ARM requires only word alignment for double. It's more performant if it
// is double-word alignment though.
-def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16, v4bf16], 64,
(sequence "D%u", 0, 31)> {
// Allocate non-VFP2 registers D16-D31 first, and prefer even registers on
// Darwin platforms.
@@ -422,20 +433,20 @@ def FPWithVPR : RegisterClass<"ARM", [f32], 32, (add SPR, DPR, VPR)> {
// Subset of DPR that are accessible with VFP2 (and so that also have
// 32-bit SPR subregs).
-def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16, v4bf16], 64,
(trunc DPR, 16)> {
let DiagnosticString = "operand must be a register in range [d0, d15]";
}
// Subset of DPR which can be used as a source of NEON scalars for 16-bit
// operations
-def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16, v4bf16], 64,
(trunc DPR, 8)> {
let DiagnosticString = "operand must be a register in range [d0, d7]";
}
// Generic 128-bit vector register class.
-def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16, v8bf16], 128,
(sequence "Q%u", 0, 15)> {
// Allocate non-VFP2 aliases Q8-Q15 first.
let AltOrders = [(rotl QPR, 8), (trunc QPR, 8)];
@@ -577,3 +588,6 @@ def Tuples4DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4, dsub_6],
// Spaced quads of D registers.
def DQuadSpc : RegisterClass<"ARM", [v4i64], 64, (add Tuples3DSpc)>;
+
+// FP context payload
+def FPCXTRegs : RegisterClass<"ARM", [i32], 32, (add FPCXTNS)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td
index a79f3348f338..d9a8d304c41f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -96,7 +96,7 @@ def CortexA57Model : SchedMachineModel {
let FullInstRWOverlapCheck = 0;
let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat,
- HasFPRegsV8_1M];
+ HasFPRegsV8_1M, HasFP16FML, HasMatMulInt8, HasBF16];
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td
index 00a44599b1b2..e0e98bfa0e9b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -744,7 +744,7 @@ let SchedModel = SwiftModel in {
SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
SwiftWriteLM14CyNo, SwiftWriteLM14CyNo,
SwiftWriteP01OneCycle, SwiftVLDMPerm5]>,
- // Inaccurate: reuse describtion from 9 S registers.
+ // Inaccurate: reuse description from 9 S registers.
SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy,
SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
@@ -760,7 +760,7 @@ let SchedModel = SwiftModel in {
SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
SwiftWriteLM11CyNo, SwiftWriteLM11CyNo,
SwiftWriteP01OneCycle, SwiftVLDMPerm3]>,
- // Inaccurate: reuse describtion from 9 S registers.
+ // Inaccurate: reuse description from 9 S registers.
SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy,
SwiftWriteLM13Cy, SwiftWriteLM14CyNo,
SwiftWriteLM17CyNo, SwiftWriteLM18CyNo,
@@ -958,7 +958,7 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy,
SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3],
(instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
- // Four element struture.
+ // Four element structure.
def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo,
SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5],
(instregex "VLD4(LN|DUP)(d|q)(8|16|32)$",
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index cade06e8c109..7e06229b60c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -126,24 +126,24 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
const ARMSubtarget &Subtarget =
DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
// Do repeated 4-byte loads and stores. To be improved.
// This requires 4-byte alignment.
- if ((Align & 3) != 0)
+ if (Alignment < Align(4))
return SDValue();
// This requires the copy size to be a constant, preferably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (!ConstantSize)
- return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
- RTLIB::MEMCPY);
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
+ Alignment.value(), RTLIB::MEMCPY);
uint64_t SizeVal = ConstantSize->getZExtValue();
if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
- return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
- RTLIB::MEMCPY);
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
+ Alignment.value(), RTLIB::MEMCPY);
unsigned BytesLeft = SizeVal & 3;
unsigned NumMemOps = SizeVal >> 2;
@@ -240,16 +240,16 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
+ SDValue Size, Align Alignment, bool isVolatile,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
- return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
- RTLIB::MEMMOVE);
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
+ Alignment.value(), RTLIB::MEMMOVE);
}
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
+ SDValue Size, Align Alignment, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
- return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
- RTLIB::MEMSET);
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
+ Alignment.value(), RTLIB::MEMSET);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
index b8a86ae7310f..7aa831c09248 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -39,22 +39,22 @@ class ARMSelectionDAGInfo : public SelectionDAGTargetInfo {
public:
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
- bool AlwaysInline,
+ SDValue Size, Align Alignment,
+ bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
SDValue
EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
SDValue Dst, SDValue Src, SDValue Size,
- unsigned Align, bool isVolatile,
+ Align Alignment, bool isVolatile,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
// Adjust parameters for memset, see RTABI section 4.3.4
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1, SDValue Op2,
- SDValue Op3, unsigned Align, bool isVolatile,
+ SDValue Op3, Align Alignment, bool isVolatile,
MachinePointerInfo DstPtrInfo) const override;
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
index eb4d39b01cbb..46802037c2aa 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -183,7 +183,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (!ArchFS.empty())
ArchFS = (Twine(ArchFS) + "," + FS).str();
else
- ArchFS = FS;
+ ArchFS = std::string(FS);
}
ParseSubtargetFeatures(CPUString, ArchFS);
@@ -292,12 +292,15 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexA73:
case CortexA75:
case CortexA76:
+ case CortexA77:
+ case CortexA78:
case CortexR4:
case CortexR4F:
case CortexR5:
case CortexR7:
case CortexM3:
case CortexR52:
+ case CortexX1:
break;
case Exynos:
LdStMultipleTiming = SingleIssuePlusExtras;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
index 6bdd021970ef..2703e385dd81 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCSchedule.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <memory>
#include <string>
@@ -60,6 +61,8 @@ protected:
CortexA73,
CortexA75,
CortexA76,
+ CortexA77,
+ CortexA78,
CortexA8,
CortexA9,
CortexM3,
@@ -68,6 +71,7 @@ protected:
CortexR5,
CortexR52,
CortexR7,
+ CortexX1,
Exynos,
Krait,
Kryo,
@@ -108,6 +112,7 @@ protected:
ARMv83a,
ARMv84a,
ARMv85a,
+ ARMv86a,
ARMv8a,
ARMv8mBaseline,
ARMv8mMainline,
@@ -157,11 +162,13 @@ protected:
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
+ bool HasV8_6aOps = false;
bool HasV8MBaselineOps = false;
bool HasV8MMainlineOps = false;
bool HasV8_1MMainlineOps = false;
bool HasMVEIntegerOps = false;
bool HasMVEFloatOps = false;
+ bool HasCDEOps = false;
/// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
/// floating point ISAs are supported.
@@ -254,6 +261,12 @@ protected:
/// HasFP16FML - True if subtarget supports half-precision FP fml operations
bool HasFP16FML = false;
+ /// HasBF16 - True if subtarget supports BFloat16 floating point operations
+ bool HasBF16 = false;
+
+ /// HasMatMulInt8 - True if subtarget supports 8-bit integer matrix multiply
+ bool HasMatMulInt8 = false;
+
/// HasD32 - True if subtarget has the full 32 double precision
/// FP registers for VFPv3.
bool HasD32 = false;
@@ -562,6 +575,7 @@ private:
void initSubtargetFeatures(StringRef CPU, StringRef FS);
ARMFrameLowering *initializeFrameLowering(StringRef CPU, StringRef FS);
+ std::bitset<8> CoprocCDE = {};
public:
void computeIssueWidth();
@@ -579,11 +593,13 @@ public:
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
+ bool hasV8_6aOps() const { return HasV8_6aOps; }
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
bool hasMVEIntegerOps() const { return HasMVEIntegerOps; }
bool hasMVEFloatOps() const { return HasMVEFloatOps; }
+ bool hasCDEOps() const { return HasCDEOps; }
bool hasFPRegs() const { return HasFPRegs; }
bool hasFPRegs16() const { return HasFPRegs16; }
bool hasFPRegs64() const { return HasFPRegs64; }
@@ -689,12 +705,15 @@ public:
bool hasD32() const { return HasD32; }
bool hasFullFP16() const { return HasFullFP16; }
bool hasFP16FML() const { return HasFP16FML; }
+ bool hasBF16() const { return HasBF16; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); }
+ bool hasMatMulInt8() const { return HasMatMulInt8; }
+
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 84876eda33a6..9ead5fa4308c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -96,6 +96,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
initializeARMExpandPseudoPass(Registry);
initializeThumb2SizeReducePass(Registry);
initializeMVEVPTBlockPass(Registry);
+ initializeMVEVPTOptimisationsPass(Registry);
initializeMVETailPredicationPass(Registry);
initializeARMLowOverheadLoopsPass(Registry);
initializeMVEGatherScatterLoweringPass(Registry);
@@ -243,7 +244,14 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
this->Options.NoTrapAfterNoreturn = true;
}
+ // ARM supports the debug entry values.
+ setSupportsDebugEntryValues(true);
+
initAsmInfo();
+
+ // ARM supports the MachineOutliner.
+ setMachineOutliner(true);
+ setSupportsDefaultOutlining(false);
}
ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
@@ -359,6 +367,7 @@ public:
void addPreRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
+ void addPreEmitPass2() override;
std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
@@ -483,6 +492,8 @@ bool ARMPassConfig::addGlobalInstructionSelect() {
void ARMPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOpt::None) {
+ addPass(createMVEVPTOptimisationsPass());
+
addPass(createMLxExpansionPass());
if (EnableARMLoadStoreOpt)
@@ -507,9 +518,12 @@ void ARMPassConfig::addPreSched2() {
addPass(createARMExpandPseudoPass());
if (getOptLevel() != CodeGenOpt::None) {
- // in v8, IfConversion depends on Thumb instruction widths
+ // When optimising for size, always run the Thumb2SizeReduction pass before
+ // IfConversion. Otherwise, check whether IT blocks are restricted
+ // (e.g. in v8, IfConversion depends on Thumb instruction widths)
addPass(createThumb2SizeReductionPass([this](const Function &F) {
- return this->TM->getSubtarget<ARMSubtarget>(F).restrictIT();
+ return this->TM->getSubtarget<ARMSubtarget>(F).hasMinSize() ||
+ this->TM->getSubtarget<ARMSubtarget>(F).restrictIT();
}));
addPass(createIfConverter([](const MachineFunction &MF) {
@@ -538,7 +552,9 @@ void ARMPassConfig::addPreEmitPass() {
// Don't optimize barriers at -O0.
if (getOptLevel() != CodeGenOpt::None)
addPass(createARMOptimizeBarriersPass());
+}
+void ARMPassConfig::addPreEmitPass2() {
addPass(createARMConstantIslandPass());
addPass(createARMLowOverheadLoopsPass());
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index 891329d3f297..3f0e3360632d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -49,7 +49,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
// Since we cannot modify flags for an existing section, we create a new
// section with the right flags, and use 0 as the unique ID for
// execute-only text
- TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U);
+ TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U, nullptr);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 7ff05034c1f2..bea4e157a131 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -16,18 +16,19 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -45,7 +46,7 @@ static cl::opt<bool> DisableLowOverheadLoops(
"disable-arm-loloops", cl::Hidden, cl::init(false),
cl::desc("Disable the generation of low-overhead loops"));
-extern cl::opt<bool> DisableTailPredication;
+extern cl::opt<TailPredication::Mode> EnableTailPredication;
extern cl::opt<bool> EnableMaskedGatherScatters;
@@ -57,17 +58,32 @@ bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
const FeatureBitset &CalleeBits =
TM.getSubtargetImpl(*Callee)->getFeatureBits();
- // To inline a callee, all features not in the whitelist must match exactly.
- bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
- (CalleeBits & ~InlineFeatureWhitelist);
- // For features in the whitelist, the callee's features must be a subset of
+ // To inline a callee, all features not in the allowed list must match exactly.
+ bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
+ (CalleeBits & ~InlineFeaturesAllowed);
+ // For features in the allowed list, the callee's features must be a subset of
// the callers'.
- bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
- (CalleeBits & InlineFeatureWhitelist);
+ bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
+ (CalleeBits & InlineFeaturesAllowed);
return MatchExact && MatchSubset;
}
-int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+bool ARMTTIImpl::shouldFavorBackedgeIndex(const Loop *L) const {
+ if (L->getHeader()->getParent()->hasOptSize())
+ return false;
+ if (ST->hasMVEIntegerOps())
+ return false;
+ return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1;
+}
+
+bool ARMTTIImpl::shouldFavorPostInc() const {
+ if (ST->hasMVEIntegerOps())
+ return true;
+ return false;
+}
+
+int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned Bits = Ty->getPrimitiveSizeInBits();
@@ -110,7 +126,7 @@ int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
}
int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+ Type *Ty, TTI::TargetCostKind CostKind) {
// Division by a constant can be turned into multiplication, but only if we
// know it's constant. So it's not so much that the immediate is cheap (it's
// not), but that the alternative is worse.
@@ -125,12 +141,14 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im
if (Imm == 255 || Imm == 65535)
return 0;
// Conversion to BIC is free, and means we can use ~Imm instead.
- return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
+ return std::min(getIntImmCost(Imm, Ty, CostKind),
+ getIntImmCost(~Imm, Ty, CostKind));
}
if (Opcode == Instruction::Add)
// Conversion to SUB is free, and means we can use -Imm instead.
- return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
+ return std::min(getIntImmCost(Imm, Ty, CostKind),
+ getIntImmCost(-Imm, Ty, CostKind));
if (Opcode == Instruction::ICmp && Imm.isNegative() &&
Ty->getIntegerBitWidth() == 32) {
@@ -147,34 +165,27 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im
if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
return 0;
- return getIntImmCost(Imm, Ty);
+ return getIntImmCost(Imm, Ty, CostKind);
}
int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- // Single to/from double precision conversions.
- static const CostTblEntry NEONFltDblTbl[] = {
- // Vector fptrunc/fpext conversions.
- { ISD::FP_ROUND, MVT::v2f64, 2 },
- { ISD::FP_EXTEND, MVT::v2f32, 2 },
- { ISD::FP_EXTEND, MVT::v4f32, 4 }
+ // TODO: Allow non-throughput costs that aren't binary.
+ auto AdjustCost = [&CostKind](int Cost) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost == 0 ? 0 : 1;
+ return Cost;
};
- if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
- ISD == ISD::FP_EXTEND)) {
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
- if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
- return LT.first * Entry->Cost;
- }
-
EVT SrcTy = TLI->getValueType(DL, Src);
EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
- return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I));
// The extend of a load is free
if (I && isa<LoadInst>(I->getOperand(0))) {
@@ -194,7 +205,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
};
if (const auto *Entry = ConvertCostTableLookup(
LoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {
{ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0},
@@ -203,27 +214,129 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 0},
{ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 0},
{ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 0},
+ // The following extend from a legal type to an illegal type, so need to
+ // split the load. This introduced an extra load operation, but the
+ // extend is still "free".
+ {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1},
+ {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1},
+ {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 3},
+ {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 3},
+ {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1},
+ {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1},
};
if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
if (const auto *Entry =
ConvertCostTableLookup(MVELoadConversionTbl, ISD,
DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
+ }
+
+ static const TypeConversionCostTblEntry MVEFLoadConversionTbl[] = {
+ // FPExtends are similar but also require the VCVT instructions.
+ {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
+ {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 3},
+ };
+ if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
+ if (const auto *Entry =
+ ConvertCostTableLookup(MVEFLoadConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
+ return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
+ }
+ }
+
+ // The truncate of a store is free. This is the mirror of extends above.
+ if (I && I->hasOneUse() && isa<StoreInst>(*I->user_begin())) {
+ static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {
+ {ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0},
+ {ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0},
+ {ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0},
+ {ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1},
+ {ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3},
+ {ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1},
+ };
+ if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
+ if (const auto *Entry =
+ ConvertCostTableLookup(MVELoadConversionTbl, ISD, SrcTy.getSimpleVT(),
+ DstTy.getSimpleVT()))
+ return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
+ }
+
+ static const TypeConversionCostTblEntry MVEFLoadConversionTbl[] = {
+ {ISD::FP_ROUND, MVT::v4f32, MVT::v4f16, 1},
+ {ISD::FP_ROUND, MVT::v8f32, MVT::v8f16, 3},
+ };
+ if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
+ if (const auto *Entry =
+ ConvertCostTableLookup(MVEFLoadConversionTbl, ISD, SrcTy.getSimpleVT(),
+ DstTy.getSimpleVT()))
+ return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}
}
+ // NEON vector operations that can extend their inputs.
+ if ((ISD == ISD::SIGN_EXTEND || ISD == ISD::ZERO_EXTEND) &&
+ I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
+ static const TypeConversionCostTblEntry NEONDoubleWidthTbl[] = {
+ // vaddl
+ { ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
+ // vsubl
+ { ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
+ // vmull
+ { ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
+ // vshll
+ { ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
+ };
+
+ auto *User = cast<Instruction>(*I->user_begin());
+ int UserISD = TLI->InstructionOpcodeToISD(User->getOpcode());
+ if (auto *Entry = ConvertCostTableLookup(NEONDoubleWidthTbl, UserISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT())) {
+ return AdjustCost(Entry->Cost);
+ }
+ }
+
+ // Single to/from double precision conversions.
+ if (Src->isVectorTy() && ST->hasNEON() &&
+ ((ISD == ISD::FP_ROUND && SrcTy.getScalarType() == MVT::f64 &&
+ DstTy.getScalarType() == MVT::f32) ||
+ (ISD == ISD::FP_EXTEND && SrcTy.getScalarType() == MVT::f32 &&
+ DstTy.getScalarType() == MVT::f64))) {
+ static const CostTblEntry NEONFltDblTbl[] = {
+ // Vector fptrunc/fpext conversions.
+ {ISD::FP_ROUND, MVT::v2f64, 2},
+ {ISD::FP_EXTEND, MVT::v2f32, 2},
+ {ISD::FP_EXTEND, MVT::v4f32, 4}};
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+ if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
+ return AdjustCost(LT.first * Entry->Cost);
+ }
+
// Some arithmetic, load and store operations have specific instructions
// to cast up/down their types automatically at no extra cost.
// TODO: Get these tables to know at least what the related operations are.
static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
- { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
- { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
// The number of vmovl instructions for the extension.
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
@@ -294,7 +407,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
}
// Scalar float to integer conversions.
@@ -324,7 +437,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
}
// Scalar integer to float conversions.
@@ -355,7 +468,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
ISD, DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
}
// MVE extend costs, taken from codegen tests. i8->i16 or i16->i32 is one
@@ -380,7 +493,28 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (const auto *Entry = ConvertCostTableLookup(MVEVectorConversionTbl,
ISD, DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
- return Entry->Cost * ST->getMVEVectorCostFactor();
+ return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
+ }
+
+ if (ISD == ISD::FP_ROUND || ISD == ISD::FP_EXTEND) {
+ // As general rule, fp converts that were not matched above are scalarized
+ // and cost 1 vcvt for each lane, so long as the instruction is available.
+ // If not it will become a series of function calls.
+ const int CallCost = getCallInstrCost(nullptr, Dst, {Src}, CostKind);
+ int Lanes = 1;
+ if (SrcTy.isFixedLengthVector())
+ Lanes = SrcTy.getVectorNumElements();
+ auto IsLegal = [this](EVT VT) {
+ EVT EltVT = VT.getScalarType();
+ return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
+ (EltVT == MVT::f64 && ST->hasFP64()) ||
+ (EltVT == MVT::f16 && ST->hasFullFP16());
+ };
+
+ if (IsLegal(SrcTy) && IsLegal(DstTy))
+ return Lanes;
+ else
+ return Lanes * CallCost;
}
// Scalar integer conversion costs.
@@ -399,13 +533,14 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
}
int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
? ST->getMVEVectorCostFactor()
: 1;
- return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src);
+ return AdjustCost(
+ BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I));
}
int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
@@ -420,7 +555,7 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
Opcode == Instruction::ExtractElement)) {
// Cross-class copies are expensive on many microarchitectures,
// so assume they are expensive by default.
- if (ValTy->getVectorElementType()->isIntegerTy())
+ if (cast<VectorType>(ValTy)->getElementType()->isIntegerTy())
return 3;
// Even if it's not a cross class copy, this likely leads to mixing
@@ -438,14 +573,19 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
// result anyway.
return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index),
ST->getMVEVectorCostFactor()) *
- ValTy->getVectorNumElements() / 2;
+ cast<FixedVectorType>(ValTy)->getNumElements() / 2;
}
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}
int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
@@ -472,7 +612,8 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy()
? ST->getMVEVectorCostFactor()
: 1;
- return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind,
+ I);
}
int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@@ -496,11 +637,28 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
return BaseT::getAddressComputationCost(Ty, SE, Ptr);
}
-bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
+bool ARMTTIImpl::isProfitableLSRChainElement(Instruction *I) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ // If a VCTP is part of a chain, it's already profitable and shouldn't be
+ // optimized, else LSR may block tail-predication.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::arm_mve_vctp8:
+ case Intrinsic::arm_mve_vctp16:
+ case Intrinsic::arm_mve_vctp32:
+ case Intrinsic::arm_mve_vctp64:
+ return true;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) {
if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps())
return false;
- if (auto *VecTy = dyn_cast<VectorType>(DataTy)) {
+ if (auto *VecTy = dyn_cast<FixedVectorType>(DataTy)) {
// Don't support v2i1 yet.
if (VecTy->getNumElements() == 2)
return false;
@@ -512,12 +670,11 @@ bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
}
unsigned EltWidth = DataTy->getScalarSizeInBits();
- return (EltWidth == 32 && (!Alignment || Alignment >= 4)) ||
- (EltWidth == 16 && (!Alignment || Alignment >= 2)) ||
- (EltWidth == 8);
+ return (EltWidth == 32 && Alignment >= 4) ||
+ (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
}
-bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, MaybeAlign Alignment) {
+bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) {
if (!EnableMaskedGatherScatters || !ST->hasMVEIntegerOps())
return false;
@@ -534,8 +691,8 @@ bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, MaybeAlign Alignment) {
return false;
unsigned EltWidth = Ty->getScalarSizeInBits();
- return ((EltWidth == 32 && (!Alignment || Alignment >= 4)) ||
- (EltWidth == 16 && (!Alignment || Alignment >= 2)) || EltWidth == 8);
+ return ((EltWidth == 32 && Alignment >= 4) ||
+ (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
}
int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
@@ -552,8 +709,8 @@ int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
return LibCallCost;
const unsigned Size = C->getValue().getZExtValue();
- const unsigned DstAlign = MI->getDestAlignment();
- const unsigned SrcAlign = MI->getSourceAlignment();
+ const Align DstAlign = *MI->getDestAlign();
+ const Align SrcAlign = *MI->getSourceAlign();
const Function *F = I->getParent()->getParent();
const unsigned Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
std::vector<EVT> MemOps;
@@ -562,8 +719,9 @@ int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
// loaded and stored. That's why we multiply the number of elements by 2 to
// get the cost for this memcpy.
if (getTLI()->findOptimalMemOpLowering(
- MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/,
- false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/,
+ MemOps, Limit,
+ MemOp::Copy(Size, /*DstAlignCanChange*/ false, DstAlign, SrcAlign,
+ /*IsVolatile*/ true),
MI->getDestAddressSpace(), MI->getSourceAddressSpace(),
F->getAttributes()))
return MemOps.size() * 2;
@@ -572,8 +730,8 @@ int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
return LibCallCost;
}
-int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
+ int Index, VectorType *SubTp) {
if (ST->hasNEON()) {
if (Kind == TTI::SK_Broadcast) {
static const CostTblEntry NEONDupTbl[] = {
@@ -667,12 +825,19 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
}
int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
+ // TODO: Handle more cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+ Op2Info, Opd1PropInfo,
+ Opd2PropInfo, Args, CxtI);
+
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
@@ -723,7 +888,8 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
return LT.first * Entry->Cost;
- int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+ Op2Info,
Opd1PropInfo, Opd2PropInfo);
// This is somewhat of a hack. The problem that we are facing is that SROA
@@ -779,12 +945,13 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return LT.first * BaseCost;
// Else this is expand, assume that we need to scalarize this op.
- if (Ty->isVectorTy()) {
- unsigned Num = Ty->getVectorNumElements();
- unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
+ unsigned Num = VTy->getNumElements();
+ unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType(),
+ CostKind);
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
- return BaseT::getScalarizationOverhead(Ty, Args) + Num * Cost;
+ return BaseT::getScalarizationOverhead(VTy, Args) + Num * Cost;
}
return BaseCost;
@@ -792,26 +959,53 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return 1;
+
+ // Type legalization can't handle structs
+ if (TLI->getValueType(DL, Src, true) == MVT::Other)
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind);
if (ST->hasNEON() && Src->isVectorTy() &&
(Alignment && *Alignment != Align(16)) &&
- Src->getVectorElementType()->isDoubleTy()) {
+ cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
// Unaligned loads/stores are extremely inefficient.
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
return LT.first * 4;
}
+
+ // MVE can optimize a fpext(load(4xhalf)) using an extending integer load.
+ // Same for stores.
+ if (ST->hasMVEFloatOps() && isa<FixedVectorType>(Src) && I &&
+ ((Opcode == Instruction::Load && I->hasOneUse() &&
+ isa<FPExtInst>(*I->user_begin())) ||
+ (Opcode == Instruction::Store && isa<FPTruncInst>(I->getOperand(0))))) {
+ FixedVectorType *SrcVTy = cast<FixedVectorType>(Src);
+ Type *DstTy =
+ Opcode == Instruction::Load
+ ? (*I->user_begin())->getType()
+ : cast<Instruction>(I->getOperand(0))->getOperand(0)->getType();
+ if (SrcVTy->getNumElements() == 4 && SrcVTy->getScalarType()->isHalfTy() &&
+ DstTy->getScalarType()->isFloatTy())
+ return ST->getMVEVectorCostFactor();
+ }
+
int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
? ST->getMVEVectorCostFactor()
: 1;
- return BaseCost * LT.first;
+ return BaseCost * BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind, I);
}
int ARMTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
- bool UseMaskForGaps) {
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
@@ -820,8 +1014,9 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(
if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
!UseMaskForCond && !UseMaskForGaps) {
- unsigned NumElts = VecTy->getVectorNumElements();
- auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
+ unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
+ auto *SubVecTy =
+ FixedVectorType::get(VecTy->getScalarType(), NumElts / Factor);
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
// Accesses having vector types that are a multiple of 128 bits can be
@@ -842,10 +1037,109 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
+ Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
}
+unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ const Value *Ptr, bool VariableMask,
+ Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ using namespace PatternMatch;
+ if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters)
+ return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment, CostKind, I);
+
+ assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!");
+ auto *VTy = cast<FixedVectorType>(DataTy);
+
+ // TODO: Splitting, once we do that.
+
+ unsigned NumElems = VTy->getNumElements();
+ unsigned EltSize = VTy->getScalarSizeInBits();
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy);
+
+ // For now, it is assumed that for the MVE gather instructions the loads are
+ // all effectively serialised. This means the cost is the scalar cost
+ // multiplied by the number of elements being loaded. This is possibly very
+ // conservative, but even so we still end up vectorising loops because the
+ // cost per iteration for many loops is lower than for scalar loops.
+ unsigned VectorCost = NumElems * LT.first;
+ // The scalarization cost should be a lot higher. We use the number of vector
+ // elements plus the scalarization overhead.
+ unsigned ScalarCost =
+ NumElems * LT.first + BaseT::getScalarizationOverhead(VTy, {});
+
+ if (Alignment < EltSize / 8)
+ return ScalarCost;
+
+ unsigned ExtSize = EltSize;
+ // Check whether there's a single user that asks for an extended type
+ if (I != nullptr) {
+ // Dependent of the caller of this function, a gather instruction will
+ // either have opcode Instruction::Load or be a call to the masked_gather
+ // intrinsic
+ if ((I->getOpcode() == Instruction::Load ||
+ match(I, m_Intrinsic<Intrinsic::masked_gather>())) &&
+ I->hasOneUse()) {
+ const User *Us = *I->users().begin();
+ if (isa<ZExtInst>(Us) || isa<SExtInst>(Us)) {
+ // only allow valid type combinations
+ unsigned TypeSize =
+ cast<Instruction>(Us)->getType()->getScalarSizeInBits();
+ if (((TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
+ (TypeSize == 16 && EltSize == 8)) &&
+ TypeSize * NumElems == 128) {
+ ExtSize = TypeSize;
+ }
+ }
+ }
+ // Check whether the input data needs to be truncated
+ TruncInst *T;
+ if ((I->getOpcode() == Instruction::Store ||
+ match(I, m_Intrinsic<Intrinsic::masked_scatter>())) &&
+ (T = dyn_cast<TruncInst>(I->getOperand(0)))) {
+ // Only allow valid type combinations
+ unsigned TypeSize = T->getOperand(0)->getType()->getScalarSizeInBits();
+ if (((EltSize == 16 && TypeSize == 32) ||
+ (EltSize == 8 && (TypeSize == 32 || TypeSize == 16))) &&
+ TypeSize * NumElems == 128)
+ ExtSize = TypeSize;
+ }
+ }
+
+ if (ExtSize * NumElems != 128 || NumElems < 4)
+ return ScalarCost;
+
+ // Any (aligned) i32 gather will not need to be scalarised.
+ if (ExtSize == 32)
+ return VectorCost;
+ // For smaller types, we need to ensure that the gep's inputs are correctly
+ // extended from a small enough value. Other sizes (including i64) are
+ // scalarized for now.
+ if (ExtSize != 8 && ExtSize != 16)
+ return ScalarCost;
+
+ if (const auto *BC = dyn_cast<BitCastInst>(Ptr))
+ Ptr = BC->getOperand(0);
+ if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ if (GEP->getNumOperands() != 2)
+ return ScalarCost;
+ unsigned Scale = DL.getTypeAllocSize(GEP->getResultElementType());
+ // Scale needs to be correct (which is only relevant for i16s).
+ if (Scale != 1 && Scale * 8 != ExtSize)
+ return ScalarCost;
+ // And we need to zext (not sext) the indexes from a small enough type.
+ if (const auto *ZExt = dyn_cast<ZExtInst>(GEP->getOperand(1))) {
+ if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
+ return VectorCost;
+ }
+ return ScalarCost;
+ }
+ return ScalarCost;
+}
+
bool ARMTTIImpl::isLoweredToCall(const Function *F) {
if (!F->isIntrinsic())
BaseT::isLoweredToCall(F);
@@ -913,23 +1207,31 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
HardwareLoopInfo &HWLoopInfo) {
// Low-overhead branches are only supported in the 'low-overhead branch'
// extension of v8.1-m.
- if (!ST->hasLOB() || DisableLowOverheadLoops)
+ if (!ST->hasLOB() || DisableLowOverheadLoops) {
+ LLVM_DEBUG(dbgs() << "ARMHWLoops: Disabled\n");
return false;
+ }
- if (!SE.hasLoopInvariantBackedgeTakenCount(L))
+ if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
+ LLVM_DEBUG(dbgs() << "ARMHWLoops: No BETC\n");
return false;
+ }
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ LLVM_DEBUG(dbgs() << "ARMHWLoops: Uncomputable BETC\n");
return false;
+ }
const SCEV *TripCountSCEV =
SE.getAddExpr(BackedgeTakenCount,
SE.getOne(BackedgeTakenCount->getType()));
// We need to store the trip count in LR, a 32-bit register.
- if (SE.getUnsignedRangeMax(TripCountSCEV).getBitWidth() > 32)
+ if (SE.getUnsignedRangeMax(TripCountSCEV).getBitWidth() > 32) {
+ LLVM_DEBUG(dbgs() << "ARMHWLoops: Trip count does not fit into 32bits\n");
return false;
+ }
// Making a call will trash LR and clear LO_BRANCH_INFO, so there's little
// point in generating a hardware loop if that's going to happen.
@@ -1034,8 +1336,10 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
auto ScanLoop = [&](Loop *L) {
for (auto *BB : L->getBlocks()) {
for (auto &I : *BB) {
- if (MaybeCall(I) || IsHardwareLoopIntrinsic(I))
+ if (MaybeCall(I) || IsHardwareLoopIntrinsic(I)) {
+ LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n");
return false;
+ }
}
}
return true;
@@ -1102,12 +1406,47 @@ static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) {
static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
const DataLayout &DL,
const LoopAccessInfo *LAI) {
+ LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");
+
+ // If there are live-out values, it is probably a reduction, which needs a
+ // final reduction step after the loop. MVE has a VADDV instruction to reduce
+ // integer vectors, but doesn't have an equivalent one for float vectors. A
+ // live-out value that is not recognised as a reduction will result in the
+ // tail-predicated loop to be reverted to a non-predicated loop and this is
+ // very expensive, i.e. it has a significant performance impact. So, in this
+ // case it's better not to tail-predicate the loop, which is what we check
+ // here. Thus, we allow only 1 live-out value, which has to be an integer
+ // reduction, which matches the loops supported by ARMLowOverheadLoops.
+ // It is important to keep ARMLowOverheadLoops and canTailPredicateLoop in
+ // sync with each other.
+ SmallVector< Instruction *, 8 > LiveOuts;
+ LiveOuts = llvm::findDefsUsedOutsideOfLoop(L);
+ bool IntReductionsDisabled =
+ EnableTailPredication == TailPredication::EnabledNoReductions ||
+ EnableTailPredication == TailPredication::ForceEnabledNoReductions;
+
+ for (auto *I : LiveOuts) {
+ if (!I->getType()->isIntegerTy()) {
+ LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer "
+ "live-out value\n");
+ return false;
+ }
+ if (I->getOpcode() != Instruction::Add) {
+ LLVM_DEBUG(dbgs() << "Only add reductions supported\n");
+ return false;
+ }
+ if (IntReductionsDisabled) {
+ LLVM_DEBUG(dbgs() << "Integer add reductions not enabled\n");
+ return false;
+ }
+ }
+
+ // Next, check that all instructions can be tail-predicated.
PredicatedScalarEvolution PSE = LAI->getPSE();
+ SmallVector<Instruction *, 16> LoadStores;
int ICmpCount = 0;
int Stride = 0;
- LLVM_DEBUG(dbgs() << "tail-predication: checking allowed instructions\n");
- SmallVector<Instruction *, 16> LoadStores;
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : BB->instructionsWithoutDebug()) {
if (isa<PHINode>(&I))
@@ -1155,8 +1494,10 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
TargetLibraryInfo *TLI,
DominatorTree *DT,
const LoopAccessInfo *LAI) {
- if (DisableTailPredication)
+ if (!EnableTailPredication) {
+ LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");
return false;
+ }
// Creating a predicated vector loop is the first step for generating a
// tail-predicated hardware loop, for which we need the MVE masked
@@ -1197,7 +1538,16 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
return canTailPredicateLoop(L, LI, SE, DL, LAI);
}
+bool ARMTTIImpl::emitGetActiveLaneMask() const {
+ if (!ST->hasMVEIntegerOps() || !EnableTailPredication)
+ return false;
+ // Intrinsic @llvm.get.active.lane.mask is supported.
+ // It is used in the MVETailPredication pass, which requires the number of
+ // elements processed by this vector loop to setup the tail-predicated
+ // loop.
+ return true;
+}
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// Only currently enable these preferences for M-Class cores.
@@ -1241,8 +1591,7 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
return;
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- ImmutableCallSite CS(&I);
- if (const Function *F = CS.getCalledFunction()) {
+ if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
if (!isLoweredToCall(F))
continue;
}
@@ -1251,7 +1600,7 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
SmallVector<const Value*, 4> Operands(I.value_op_begin(),
I.value_op_end());
- Cost += getUserCost(&I, Operands);
+ Cost += getUserCost(&I, Operands, TargetTransformInfo::TCK_CodeSize);
}
}
@@ -1271,27 +1620,12 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Force = true;
}
+void ARMTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
+
bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const {
- assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type");
- unsigned ScalarBits = Ty->getScalarSizeInBits();
- if (!ST->hasMVEIntegerOps())
- return false;
-
- switch (Opcode) {
- case Instruction::FAdd:
- case Instruction::FMul:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Mul:
- case Instruction::FCmp:
- return false;
- case Instruction::ICmp:
- case Instruction::Add:
- return ScalarBits < 64 && ScalarBits * Ty->getVectorNumElements() == 128;
- default:
- llvm_unreachable("Unhandled reduction opcode");
- }
- return false;
+ return ST->hasMVEIntegerOps();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index f66083eaf187..7bf6de4bffe0 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -38,6 +38,16 @@ class ScalarEvolution;
class Type;
class Value;
+namespace TailPredication {
+ enum Mode {
+ Disabled = 0,
+ EnabledNoReductions,
+ Enabled,
+ ForceEnabledNoReductions,
+ ForceEnabled
+ };
+}
+
class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
using BaseT = BasicTTIImplBase<ARMTTIImpl>;
using TTI = TargetTransformInfo;
@@ -47,13 +57,13 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMSubtarget *ST;
const ARMTargetLowering *TLI;
- // Currently the following features are excluded from InlineFeatureWhitelist.
+ // Currently the following features are excluded from InlineFeaturesAllowed.
// ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
// Depending on whether they are set or unset, different
// instructions/registers are available. For example, inlining a callee with
// -thumb-mode in a caller with +thumb-mode, may cause the assembler to
// fail if the callee uses ARM only instructions, e.g. in inline asm.
- const FeatureBitset InlineFeatureWhitelist = {
+ const FeatureBitset InlineFeaturesAllowed = {
ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
@@ -93,11 +103,8 @@ public:
bool enableInterleavedAccessVectorization() { return true; }
- bool shouldFavorBackedgeIndex(const Loop *L) const {
- if (L->getHeader()->getParent()->hasOptSize())
- return false;
- return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1;
- }
+ bool shouldFavorBackedgeIndex(const Loop *L) const;
+ bool shouldFavorPostInc() const;
/// Floating-point computation using ARMv8 AArch32 Advanced
/// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
@@ -113,9 +120,10 @@ public:
Type *Ty);
using BaseT::getIntImmCost;
- int getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
- int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty, TTI::TargetCostKind CostKind);
/// @}
@@ -153,19 +161,24 @@ public:
return ST->getMaxInterleaveFactor();
}
- bool isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment);
+ bool isProfitableLSRChainElement(Instruction *I);
- bool isLegalMaskedStore(Type *DataTy, MaybeAlign Alignment) {
+ bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
+
+ bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
return isLegalMaskedLoad(DataTy, Alignment);
}
- bool isLegalMaskedGather(Type *Ty, MaybeAlign Alignment);
+ bool isLegalMaskedGather(Type *Ty, Align Alignment);
- bool isLegalMaskedScatter(Type *Ty, MaybeAlign Alignment) { return false; }
+ bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
+ return isLegalMaskedGather(Ty, Alignment);
+ }
int getMemcpyCost(const Instruction *I);
- int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
@@ -194,9 +207,11 @@ public:
}
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
@@ -206,6 +221,7 @@ public:
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -214,13 +230,20 @@ public:
const Instruction *CxtI = nullptr);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr);
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
+
+ unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
bool isLoweredToCall(const Function *F);
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
@@ -236,6 +259,10 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ bool emitGetActiveLaneMask() const;
+
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
bool shouldBuildLookupTablesForConstant(Constant *C) const {
// In the ROPI and RWPI relocation models we can't have pointers to global
// variables or functions in constant data, so don't convert switches to
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index f6d76ee09534..05f870b90ecd 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
@@ -180,10 +181,68 @@ public:
}
};
+// Various sets of ARM instruction mnemonics which are used by the asm parser
+class ARMMnemonicSets {
+ StringSet<> CDE;
+ StringSet<> CDEWithVPTSuffix;
+public:
+ ARMMnemonicSets(const MCSubtargetInfo &STI);
+
+ /// Returns true iff a given mnemonic is a CDE instruction
+ bool isCDEInstr(StringRef Mnemonic) {
+ // Quick check before searching the set
+ if (!Mnemonic.startswith("cx") && !Mnemonic.startswith("vcx"))
+ return false;
+ return CDE.count(Mnemonic);
+ }
+
+ /// Returns true iff a given mnemonic is a VPT-predicable CDE instruction
+ /// (possibly with a predication suffix "e" or "t")
+ bool isVPTPredicableCDEInstr(StringRef Mnemonic) {
+ if (!Mnemonic.startswith("vcx"))
+ return false;
+ return CDEWithVPTSuffix.count(Mnemonic);
+ }
+
+ /// Returns true iff a given mnemonic is an IT-predicable CDE instruction
+ /// (possibly with a condition suffix)
+ bool isITPredicableCDEInstr(StringRef Mnemonic) {
+ if (!Mnemonic.startswith("cx"))
+ return false;
+ return Mnemonic.startswith("cx1a") || Mnemonic.startswith("cx1da") ||
+ Mnemonic.startswith("cx2a") || Mnemonic.startswith("cx2da") ||
+ Mnemonic.startswith("cx3a") || Mnemonic.startswith("cx3da");
+ }
+
+ /// Return true iff a given mnemonic is an integer CDE instruction with
+ /// dual-register destination
+ bool isCDEDualRegInstr(StringRef Mnemonic) {
+ if (!Mnemonic.startswith("cx"))
+ return false;
+ return Mnemonic == "cx1d" || Mnemonic == "cx1da" ||
+ Mnemonic == "cx2d" || Mnemonic == "cx2da" ||
+ Mnemonic == "cx3d" || Mnemonic == "cx3da";
+ }
+};
+
+ARMMnemonicSets::ARMMnemonicSets(const MCSubtargetInfo &STI) {
+ for (StringRef Mnemonic: { "cx1", "cx1a", "cx1d", "cx1da",
+ "cx2", "cx2a", "cx2d", "cx2da",
+ "cx3", "cx3a", "cx3d", "cx3da", })
+ CDE.insert(Mnemonic);
+ for (StringRef Mnemonic :
+ {"vcx1", "vcx1a", "vcx2", "vcx2a", "vcx3", "vcx3a"}) {
+ CDE.insert(Mnemonic);
+ CDEWithVPTSuffix.insert(Mnemonic);
+ CDEWithVPTSuffix.insert(std::string(Mnemonic) + "t");
+ CDEWithVPTSuffix.insert(std::string(Mnemonic) + "e");
+ }
+}
class ARMAsmParser : public MCTargetAsmParser {
const MCRegisterInfo *MRI;
UnwindContext UC;
+ ARMMnemonicSets MS;
ARMTargetStreamer &getTargetStreamer() {
assert(getParser().getStreamer().getTargetStreamer() &&
@@ -245,12 +304,12 @@ class ARMAsmParser : public MCTargetAsmParser {
ITInst.setOpcode(ARM::t2IT);
ITInst.addOperand(MCOperand::createImm(ITState.Cond));
ITInst.addOperand(MCOperand::createImm(ITState.Mask));
- Out.EmitInstruction(ITInst, getSTI());
+ Out.emitInstruction(ITInst, getSTI());
// Emit the conditonal instructions
assert(PendingConditionalInsts.size() <= 4);
for (const MCInst &Inst : PendingConditionalInsts) {
- Out.EmitInstruction(Inst, getSTI());
+ Out.emitInstruction(Inst, getSTI());
}
PendingConditionalInsts.clear();
@@ -444,6 +503,8 @@ class ARMAsmParser : public MCTargetAsmParser {
void tryConvertingToTwoOperandForm(StringRef Mnemonic, bool CarrySetting,
OperandVector &Operands);
+ bool CDEConvertDualRegOperand(StringRef Mnemonic, OperandVector &Operands);
+
bool isThumb() const {
// FIXME: Can tablegen auto-generate this?
return getSTI().getFeatureBits()[ARM::ModeThumb];
@@ -501,6 +562,9 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasMVEFloat() const {
return getSTI().getFeatureBits()[ARM::HasMVEFloatOps];
}
+ bool hasCDE() const {
+ return getSTI().getFeatureBits()[ARM::HasCDEOps];
+ }
bool has8MSecExt() const {
return getSTI().getFeatureBits()[ARM::Feature8MSecExt];
}
@@ -605,7 +669,7 @@ public:
ARMAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(Options, STI, MII), UC(Parser) {
+ : MCTargetAsmParser(Options, STI, MII), UC(Parser), MS(STI) {
MCAsmParserExtension::Initialize(Parser);
// Cache the MCRegisterInfo.
@@ -628,6 +692,8 @@ public:
// Implementation of the MCTargetAsmParser interface:
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
@@ -3553,8 +3619,7 @@ public:
if (Kind == k_RegisterList && Regs.back().second == ARM::APSR)
Kind = k_RegisterListWithAPSR;
- assert(std::is_sorted(Regs.begin(), Regs.end()) &&
- "Register list must be sorted by encoding");
+ assert(llvm::is_sorted(Regs) && "Register list must be sorted by encoding");
auto Op = std::make_unique<ARMOperand>(Kind);
for (const auto &P : Regs)
@@ -3885,6 +3950,14 @@ bool ARMAsmParser::ParseRegister(unsigned &RegNo,
return (RegNo == (unsigned)-1);
}
+OperandMatchResultTy ARMAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ if (ParseRegister(RegNo, StartLoc, EndLoc))
+ return MatchOperand_NoMatch;
+ return MatchOperand_Success;
+}
+
/// Try to parse a register name. The token must be an Identifier when called,
/// and if it is a register name the token is eaten and the register number is
/// returned. Otherwise return -1.
@@ -6045,20 +6118,35 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
case AsmToken::LCurly:
return parseRegisterList(Operands, !Mnemonic.startswith("clr"));
case AsmToken::Dollar:
- case AsmToken::Hash:
- // #42 -> immediate.
+ case AsmToken::Hash: {
+ // #42 -> immediate
+ // $ 42 -> immediate
+ // $foo -> symbol name
+ // $42 -> symbol name
S = Parser.getTok().getLoc();
- Parser.Lex();
+
+ // Favor the interpretation of $-prefixed operands as symbol names.
+ // Cases where immediates are explicitly expected are handled by their
+ // specific ParseMethod implementations.
+ auto AdjacentToken = getLexer().peekTok(/*ShouldSkipSpace=*/false);
+ bool ExpectIdentifier = Parser.getTok().is(AsmToken::Dollar) &&
+ (AdjacentToken.is(AsmToken::Identifier) ||
+ AdjacentToken.is(AsmToken::Integer));
+ if (!ExpectIdentifier) {
+ // Token is not part of identifier. Drop leading $ or # before parsing
+ // expression.
+ Parser.Lex();
+ }
if (Parser.getTok().isNot(AsmToken::Colon)) {
- bool isNegative = Parser.getTok().is(AsmToken::Minus);
+ bool IsNegative = Parser.getTok().is(AsmToken::Minus);
const MCExpr *ImmVal;
if (getParser().parseExpression(ImmVal))
return true;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
if (CE) {
int32_t Val = CE->getValue();
- if (isNegative && Val == 0)
+ if (IsNegative && Val == 0)
ImmVal = MCConstantExpr::create(std::numeric_limits<int32_t>::min(),
getContext());
}
@@ -6077,7 +6165,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
}
// w/ a ':' after the '#', it's just like a plain ':'.
LLVM_FALLTHROUGH;
-
+ }
case AsmToken::Colon: {
S = Parser.getTok().getLoc();
// ":lower16:" and ":upper16:" expression prefixes
@@ -6233,6 +6321,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
Mnemonic == "bxns" || Mnemonic == "blxns" ||
+ Mnemonic == "vdot" || Mnemonic == "vmmla" ||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
@@ -6373,14 +6462,20 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic,
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
+ Mnemonic == "vfmat" || Mnemonic == "vfmab" ||
+ Mnemonic == "vdot" || Mnemonic == "vmmla" ||
Mnemonic == "sb" || Mnemonic == "ssbb" ||
- Mnemonic == "pssbb" ||
+ Mnemonic == "pssbb" || Mnemonic == "vsmmla" ||
+ Mnemonic == "vummla" || Mnemonic == "vusmmla" ||
+ Mnemonic == "vusdot" || Mnemonic == "vsudot" ||
Mnemonic == "bfcsel" || Mnemonic == "wls" ||
Mnemonic == "dls" || Mnemonic == "le" || Mnemonic == "csel" ||
Mnemonic == "csinc" || Mnemonic == "csinv" || Mnemonic == "csneg" ||
Mnemonic == "cinc" || Mnemonic == "cinv" || Mnemonic == "cneg" ||
Mnemonic == "cset" || Mnemonic == "csetm" ||
Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst") ||
+ (hasCDE() && MS.isCDEInstr(Mnemonic) &&
+ !MS.isITPredicableCDEInstr(Mnemonic)) ||
(hasMVE() &&
(Mnemonic.startswith("vst2") || Mnemonic.startswith("vld2") ||
Mnemonic.startswith("vst4") || Mnemonic.startswith("vld4") ||
@@ -6770,6 +6865,69 @@ void ARMAsmParser::fixupGNULDRDAlias(StringRef Mnemonic,
ARMOperand::CreateReg(PairedReg, Op2.getStartLoc(), Op2.getEndLoc()));
}
+// Dual-register instruction have the following syntax:
+// <mnemonic> <predicate>? <coproc>, <Rdest>, <Rdest+1>, <Rsrc>, ..., #imm
+// This function tries to remove <Rdest+1> and replace <Rdest> with a pair
+// operand. If the conversion fails an error is diagnosed, and the function
+// returns true.
+bool ARMAsmParser::CDEConvertDualRegOperand(StringRef Mnemonic,
+ OperandVector &Operands) {
+ assert(MS.isCDEDualRegInstr(Mnemonic));
+ bool isPredicable =
+ Mnemonic == "cx1da" || Mnemonic == "cx2da" || Mnemonic == "cx3da";
+ size_t NumPredOps = isPredicable ? 1 : 0;
+
+ if (Operands.size() <= 3 + NumPredOps)
+ return false;
+
+ StringRef Op2Diag(
+ "operand must be an even-numbered register in the range [r0, r10]");
+
+ const MCParsedAsmOperand &Op2 = *Operands[2 + NumPredOps];
+ if (!Op2.isReg())
+ return Error(Op2.getStartLoc(), Op2Diag);
+
+ unsigned RNext;
+ unsigned RPair;
+ switch (Op2.getReg()) {
+ default:
+ return Error(Op2.getStartLoc(), Op2Diag);
+ case ARM::R0:
+ RNext = ARM::R1;
+ RPair = ARM::R0_R1;
+ break;
+ case ARM::R2:
+ RNext = ARM::R3;
+ RPair = ARM::R2_R3;
+ break;
+ case ARM::R4:
+ RNext = ARM::R5;
+ RPair = ARM::R4_R5;
+ break;
+ case ARM::R6:
+ RNext = ARM::R7;
+ RPair = ARM::R6_R7;
+ break;
+ case ARM::R8:
+ RNext = ARM::R9;
+ RPair = ARM::R8_R9;
+ break;
+ case ARM::R10:
+ RNext = ARM::R11;
+ RPair = ARM::R10_R11;
+ break;
+ }
+
+ const MCParsedAsmOperand &Op3 = *Operands[3 + NumPredOps];
+ if (!Op3.isReg() || Op3.getReg() != RNext)
+ return Error(Op3.getStartLoc(), "operand must be a consecutive register");
+
+ Operands.erase(Operands.begin() + 3 + NumPredOps);
+ Operands[2 + NumPredOps] =
+ ARMOperand::CreateReg(RPair, Op2.getStartLoc(), Op2.getEndLoc());
+ return false;
+}
+
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
@@ -6786,7 +6944,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// First check for the ARM-specific .req directive.
if (Parser.getTok().is(AsmToken::Identifier) &&
- Parser.getTok().getIdentifier() == ".req") {
+ Parser.getTok().getIdentifier().lower() == ".req") {
parseDirectiveReq(Name, NameLoc);
// We always return 'error' for this, as we're done with this
// statement and don't need to match the 'instruction."
@@ -6823,6 +6981,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// ITx -> x100 (ITT -> 0100, ITE -> 1100)
// ITxy -> xy10 (e.g. ITET -> 1010)
// ITxyz -> xyz1 (e.g. ITEET -> 1101)
+ // Note: See the ARM::PredBlockMask enum in
+ // /lib/Target/ARM/Utils/ARMBaseInfo.h
if (Mnemonic == "it" || Mnemonic.startswith("vpt") ||
Mnemonic.startswith("vpst")) {
SMLoc Loc = Mnemonic == "it" ? SMLoc::getFromPointer(NameLoc.getPointer() + 2) :
@@ -6969,6 +7129,21 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
tryConvertingToTwoOperandForm(Mnemonic, CarrySetting, Operands);
+ if (hasCDE() && MS.isCDEInstr(Mnemonic)) {
+ // Dual-register instructions use even-odd register pairs as their
+ // destination operand, in assembly such pair is spelled as two
+ // consecutive registers, without any special syntax. ConvertDualRegOperand
+ // tries to convert such operand into register pair, e.g. r2, r3 -> r2_r3.
+ // It returns true, if an error message has been emitted. If the function
+ // returns false, the function either succeeded or an error (e.g. missing
+ // operand) will be diagnosed elsewhere.
+ if (MS.isCDEDualRegInstr(Mnemonic)) {
+ bool GotError = CDEConvertDualRegOperand(Mnemonic, Operands);
+ if (GotError)
+ return GotError;
+ }
+ }
+
// Some instructions, mostly Thumb, have forms for the same mnemonic that
// do and don't have a cc_out optional-def operand. With some spot-checks
// of the operand list, we can figure out which variant we're trying to
@@ -7947,6 +8122,142 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Error (Operands[3]->getStartLoc(), "Q-register indexes must be 2 and 0 or 3 and 1");
break;
}
+ case ARM::UMAAL:
+ case ARM::UMLAL:
+ case ARM::UMULL:
+ case ARM::t2UMAAL:
+ case ARM::t2UMLAL:
+ case ARM::t2UMULL:
+ case ARM::SMLAL:
+ case ARM::SMLALBB:
+ case ARM::SMLALBT:
+ case ARM::SMLALD:
+ case ARM::SMLALDX:
+ case ARM::SMLALTB:
+ case ARM::SMLALTT:
+ case ARM::SMLSLD:
+ case ARM::SMLSLDX:
+ case ARM::SMULL:
+ case ARM::t2SMLAL:
+ case ARM::t2SMLALBB:
+ case ARM::t2SMLALBT:
+ case ARM::t2SMLALD:
+ case ARM::t2SMLALDX:
+ case ARM::t2SMLALTB:
+ case ARM::t2SMLALTT:
+ case ARM::t2SMLSLD:
+ case ARM::t2SMLSLDX:
+ case ARM::t2SMULL: {
+ unsigned RdHi = Inst.getOperand(0).getReg();
+ unsigned RdLo = Inst.getOperand(1).getReg();
+ if(RdHi == RdLo) {
+ return Error(Loc,
+ "unpredictable instruction, RdHi and RdLo must be different");
+ }
+ break;
+ }
+
+ case ARM::CDE_CX1:
+ case ARM::CDE_CX1A:
+ case ARM::CDE_CX1D:
+ case ARM::CDE_CX1DA:
+ case ARM::CDE_CX2:
+ case ARM::CDE_CX2A:
+ case ARM::CDE_CX2D:
+ case ARM::CDE_CX2DA:
+ case ARM::CDE_CX3:
+ case ARM::CDE_CX3A:
+ case ARM::CDE_CX3D:
+ case ARM::CDE_CX3DA:
+ case ARM::CDE_VCX1_vec:
+ case ARM::CDE_VCX1_fpsp:
+ case ARM::CDE_VCX1_fpdp:
+ case ARM::CDE_VCX1A_vec:
+ case ARM::CDE_VCX1A_fpsp:
+ case ARM::CDE_VCX1A_fpdp:
+ case ARM::CDE_VCX2_vec:
+ case ARM::CDE_VCX2_fpsp:
+ case ARM::CDE_VCX2_fpdp:
+ case ARM::CDE_VCX2A_vec:
+ case ARM::CDE_VCX2A_fpsp:
+ case ARM::CDE_VCX2A_fpdp:
+ case ARM::CDE_VCX3_vec:
+ case ARM::CDE_VCX3_fpsp:
+ case ARM::CDE_VCX3_fpdp:
+ case ARM::CDE_VCX3A_vec:
+ case ARM::CDE_VCX3A_fpsp:
+ case ARM::CDE_VCX3A_fpdp: {
+ assert(Inst.getOperand(1).isImm() &&
+ "CDE operand 1 must be a coprocessor ID");
+ int64_t Coproc = Inst.getOperand(1).getImm();
+ if (Coproc < 8 && !ARM::isCDECoproc(Coproc, *STI))
+ return Error(Operands[1]->getStartLoc(),
+ "coprocessor must be configured as CDE");
+ else if (Coproc >= 8)
+ return Error(Operands[1]->getStartLoc(),
+ "coprocessor must be in the range [p0, p7]");
+ break;
+ }
+
+ case ARM::t2CDP:
+ case ARM::t2CDP2:
+ case ARM::t2LDC2L_OFFSET:
+ case ARM::t2LDC2L_OPTION:
+ case ARM::t2LDC2L_POST:
+ case ARM::t2LDC2L_PRE:
+ case ARM::t2LDC2_OFFSET:
+ case ARM::t2LDC2_OPTION:
+ case ARM::t2LDC2_POST:
+ case ARM::t2LDC2_PRE:
+ case ARM::t2LDCL_OFFSET:
+ case ARM::t2LDCL_OPTION:
+ case ARM::t2LDCL_POST:
+ case ARM::t2LDCL_PRE:
+ case ARM::t2LDC_OFFSET:
+ case ARM::t2LDC_OPTION:
+ case ARM::t2LDC_POST:
+ case ARM::t2LDC_PRE:
+ case ARM::t2MCR:
+ case ARM::t2MCR2:
+ case ARM::t2MCRR:
+ case ARM::t2MCRR2:
+ case ARM::t2MRC:
+ case ARM::t2MRC2:
+ case ARM::t2MRRC:
+ case ARM::t2MRRC2:
+ case ARM::t2STC2L_OFFSET:
+ case ARM::t2STC2L_OPTION:
+ case ARM::t2STC2L_POST:
+ case ARM::t2STC2L_PRE:
+ case ARM::t2STC2_OFFSET:
+ case ARM::t2STC2_OPTION:
+ case ARM::t2STC2_POST:
+ case ARM::t2STC2_PRE:
+ case ARM::t2STCL_OFFSET:
+ case ARM::t2STCL_OPTION:
+ case ARM::t2STCL_POST:
+ case ARM::t2STCL_PRE:
+ case ARM::t2STC_OFFSET:
+ case ARM::t2STC_OPTION:
+ case ARM::t2STC_POST:
+ case ARM::t2STC_PRE: {
+ unsigned Opcode = Inst.getOpcode();
+ // Inst.getOperand indexes operands in the (oops ...) and (iops ...) dags,
+ // CopInd is the index of the coprocessor operand.
+ size_t CopInd = 0;
+ if (Opcode == ARM::t2MRRC || Opcode == ARM::t2MRRC2)
+ CopInd = 2;
+ else if (Opcode == ARM::t2MRC || Opcode == ARM::t2MRC2)
+ CopInd = 1;
+ assert(Inst.getOperand(CopInd).isImm() &&
+ "Operand must be a coprocessor ID");
+ int64_t Coproc = Inst.getOperand(CopInd).getImm();
+ // Operands[2] is the coprocessor operand at syntactic level
+ if (ARM::isCDECoproc(Coproc, *STI))
+ return Error(Operands[2]->getStartLoc(),
+ "coprocessor must be configured as GCP");
+ break;
+ }
}
return false;
@@ -8223,50 +8534,6 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
}
switch (Inst.getOpcode()) {
- case ARM::MVE_VORNIZ0v4i32:
- case ARM::MVE_VORNIZ0v8i16:
- case ARM::MVE_VORNIZ8v4i32:
- case ARM::MVE_VORNIZ8v8i16:
- case ARM::MVE_VORNIZ16v4i32:
- case ARM::MVE_VORNIZ24v4i32:
- case ARM::MVE_VANDIZ0v4i32:
- case ARM::MVE_VANDIZ0v8i16:
- case ARM::MVE_VANDIZ8v4i32:
- case ARM::MVE_VANDIZ8v8i16:
- case ARM::MVE_VANDIZ16v4i32:
- case ARM::MVE_VANDIZ24v4i32: {
- unsigned Opcode;
- bool imm16 = false;
- switch(Inst.getOpcode()) {
- case ARM::MVE_VORNIZ0v4i32: Opcode = ARM::MVE_VORRIZ0v4i32; break;
- case ARM::MVE_VORNIZ0v8i16: Opcode = ARM::MVE_VORRIZ0v8i16; imm16 = true; break;
- case ARM::MVE_VORNIZ8v4i32: Opcode = ARM::MVE_VORRIZ8v4i32; break;
- case ARM::MVE_VORNIZ8v8i16: Opcode = ARM::MVE_VORRIZ8v8i16; imm16 = true; break;
- case ARM::MVE_VORNIZ16v4i32: Opcode = ARM::MVE_VORRIZ16v4i32; break;
- case ARM::MVE_VORNIZ24v4i32: Opcode = ARM::MVE_VORRIZ24v4i32; break;
- case ARM::MVE_VANDIZ0v4i32: Opcode = ARM::MVE_VBICIZ0v4i32; break;
- case ARM::MVE_VANDIZ0v8i16: Opcode = ARM::MVE_VBICIZ0v8i16; imm16 = true; break;
- case ARM::MVE_VANDIZ8v4i32: Opcode = ARM::MVE_VBICIZ8v4i32; break;
- case ARM::MVE_VANDIZ8v8i16: Opcode = ARM::MVE_VBICIZ8v8i16; imm16 = true; break;
- case ARM::MVE_VANDIZ16v4i32: Opcode = ARM::MVE_VBICIZ16v4i32; break;
- case ARM::MVE_VANDIZ24v4i32: Opcode = ARM::MVE_VBICIZ24v4i32; break;
- default: llvm_unreachable("unexpected opcode");
- }
-
- MCInst TmpInst;
- TmpInst.setOpcode(Opcode);
- TmpInst.addOperand(Inst.getOperand(0));
- TmpInst.addOperand(Inst.getOperand(1));
-
- // invert immediate
- unsigned imm = ~Inst.getOperand(2).getImm() & (imm16 ? 0xffff : 0xffffffff);
- TmpInst.addOperand(MCOperand::createImm(imm));
-
- TmpInst.addOperand(Inst.getOperand(3));
- TmpInst.addOperand(Inst.getOperand(4));
- Inst = TmpInst;
- return true;
- }
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
case ARM::LDRT_POST:
case ARM::LDRBT_POST: {
@@ -8285,6 +8552,26 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
Inst = TmpInst;
return true;
}
+ // Alias for 'ldr{sb,h,sh}t Rt, [Rn] {, #imm}' for ommitted immediate.
+ case ARM::LDRSBTii:
+ case ARM::LDRHTii:
+ case ARM::LDRSHTii: {
+ MCInst TmpInst;
+
+ if (Inst.getOpcode() == ARM::LDRSBTii)
+ TmpInst.setOpcode(ARM::LDRSBTi);
+ else if (Inst.getOpcode() == ARM::LDRHTii)
+ TmpInst.setOpcode(ARM::LDRHTi);
+ else if (Inst.getOpcode() == ARM::LDRSHTii)
+ TmpInst.setOpcode(ARM::LDRSHTi);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::createImm(256));
+ TmpInst.addOperand(Inst.getOperand(2));
+ Inst = TmpInst;
+ return true;
+ }
// Alias for alternate form of 'str{,b}t Rt, [Rn], #imm' instruction.
case ARM::STRT_POST:
case ARM::STRBT_POST: {
@@ -8323,7 +8610,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// Reading PC provides the start of the current instruction + 8 and
// the transform to adr is biased by that.
MCSymbol *Dot = getContext().createTempSymbol();
- Out.EmitLabel(Dot);
+ Out.emitLabel(Dot);
const MCExpr *OpExpr = Inst.getOperand(2).getExpr();
const MCExpr *InstPC = MCSymbolRefExpr::create(Dot,
MCSymbolRefExpr::VK_None,
@@ -10521,7 +10808,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (isITBlockFull() || isITBlockTerminator(Inst))
flushPendingInstructions(Out);
} else {
- Out.EmitInstruction(Inst, getSTI());
+ Out.emitInstruction(Inst, getSTI());
}
return false;
case Match_NearMisses:
@@ -10546,7 +10833,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
bool IsMachO = Format == MCObjectFileInfo::IsMachO;
bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
- StringRef IDVal = DirectiveID.getIdentifier();
+ std::string IDVal = DirectiveID.getIdentifier().lower();
if (IDVal == ".word")
parseLiteralValues(4, DirectiveID.getLoc());
else if (IDVal == ".short" || IDVal == ".hword")
@@ -10632,7 +10919,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
const MCExpr *Value;
if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size, L);
+ getParser().getStreamer().emitValue(Value, Size, L);
return false;
};
return (parseMany(parseOne));
@@ -10648,7 +10935,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
if (!isThumb())
SwitchMode();
- getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
return false;
}
@@ -10661,7 +10948,7 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
if (isThumb())
SwitchMode();
- getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
return false;
}
@@ -10673,7 +10960,7 @@ void ARMAsmParser::doBeforeLabelEmit(MCSymbol *Symbol) {
void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
if (NextSymbolIsThumb) {
- getParser().getStreamer().EmitThumbFunc(Symbol);
+ getParser().getStreamer().emitThumbFunc(Symbol);
NextSymbolIsThumb = false;
}
}
@@ -10693,7 +10980,7 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
Parser.getTok().is(AsmToken::String)) {
MCSymbol *Func = getParser().getContext().getOrCreateSymbol(
Parser.getTok().getIdentifier());
- getParser().getStreamer().EmitThumbFunc(Func);
+ getParser().getStreamer().emitThumbFunc(Func);
Parser.Lex();
if (parseToken(AsmToken::EndOfStatement,
"unexpected token in '.thumb_func' directive"))
@@ -10757,14 +11044,14 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
if (!isThumb())
SwitchMode();
- getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
} else {
if (!hasARM())
return Error(L, "target does not support ARM mode");
if (isThumb())
SwitchMode();
- getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
}
return false;
@@ -10817,7 +11104,7 @@ void ARMAsmParser::FixModeAfterArchChange(bool WasThumb, SMLoc Loc) {
SwitchMode();
} else {
// Mode switch forced, because the new arch doesn't support the old mode.
- getParser().getStreamer().EmitAssemblerFlag(isThumb() ? MCAF_Code16
+ getParser().getStreamer().emitAssemblerFlag(isThumb() ? MCAF_Code16
: MCAF_Code32);
// Warn about the implcit mode switch. GAS does not switch modes here,
// but instead stays in the old mode, reporting an error on any following
@@ -10859,11 +11146,13 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
TagLoc = Parser.getTok().getLoc();
if (Parser.getTok().is(AsmToken::Identifier)) {
StringRef Name = Parser.getTok().getIdentifier();
- Tag = ARMBuildAttrs::AttrTypeFromString(Name);
- if (Tag == -1) {
+ Optional<unsigned> Ret =
+ ELFAttrs::attrTypeFromString(Name, ARMBuildAttrs::ARMAttributeTags);
+ if (!Ret.hasValue()) {
Error(TagLoc, "attribute name not recognised: " + Name);
return false;
}
+ Tag = Ret.getValue();
Parser.Lex();
} else {
const MCExpr *AttrExpr;
@@ -11314,9 +11603,9 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
assert(Section && "must have section to emit alignment");
if (Section->UseCodeAlign())
- getStreamer().EmitCodeAlignment(2);
+ getStreamer().emitCodeAlignment(2);
else
- getStreamer().EmitValueToAlignment(2);
+ getStreamer().emitValueToAlignment(2);
return false;
}
@@ -11516,9 +11805,9 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
const MCSection *Section = getStreamer().getCurrentSectionOnly();
assert(Section && "must have section to emit alignment");
if (Section->UseCodeAlign())
- getStreamer().EmitCodeAlignment(4, 0);
+ getStreamer().emitCodeAlignment(4, 0);
else
- getStreamer().EmitValueToAlignment(4, 0, 1, 0);
+ getStreamer().emitValueToAlignment(4, 0, 1, 0);
return false;
}
return true;
@@ -11770,7 +12059,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
// when we start to table-generate them, and we can use the ARM
// flags below, that were generated by table-gen.
static const struct {
- const unsigned Kind;
+ const uint64_t Kind;
const FeatureBitset ArchCheck;
const FeatureBitset Features;
} Extensions[] = {
@@ -11819,7 +12108,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
EnableFeature = false;
Name = Name.substr(2);
}
- unsigned FeatureKind = ARM::parseArchExt(Name);
+ uint64_t FeatureKind = ARM::parseArchExt(Name);
if (FeatureKind == ARM::AEK_INVALID)
return Error(ExtLoc, "unknown architectural extension: " + Name);
@@ -11969,6 +12258,7 @@ bool ARMAsmParser::isMnemonicVPTPredicable(StringRef Mnemonic,
Mnemonic.startswith("vpnot") || Mnemonic.startswith("vbic") ||
Mnemonic.startswith("vrmlsldavh") || Mnemonic.startswith("vmlsldav") ||
Mnemonic.startswith("vcvt") ||
+ MS.isVPTPredicableCDEInstr(Mnemonic) ||
(Mnemonic.startswith("vmov") &&
!(ExtraToken == ".f16" || ExtraToken == ".32" ||
ExtraToken == ".16" || ExtraToken == ".8"));
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index d26b04556abb..54ff0d9966cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -182,6 +182,9 @@ static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
@@ -201,6 +204,8 @@ static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
@@ -538,10 +543,6 @@ template<unsigned MinLog, unsigned MaxLog>
static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
const void *Decoder);
-template <int shift>
-static DecodeStatus DecodeExpandedImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
template<unsigned start>
static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
@@ -1087,8 +1088,12 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
}
}
+ uint32_t Coproc = fieldFromInstruction(Insn32, 8, 4);
+ const uint8_t *DecoderTable = ARM::isCDECoproc(Coproc, STI)
+ ? DecoderTableThumb2CDE32
+ : DecoderTableThumb2CoProc32;
Result =
- decodeInstruction(DecoderTableThumb2CoProc32, MI, Insn32, Address, this, STI);
+ decodeInstruction(DecoderTable, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
Check(Result, AddThumbPredicate(MI));
@@ -1220,10 +1225,12 @@ static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
+ // According to the Arm ARM RegNo = 14 is undefined, but we return fail
+ // rather than SoftFail as there is no GPRPair table entry for index 7.
if (RegNo > 13)
return MCDisassembler::Fail;
- if ((RegNo & 1) || RegNo == 0xe)
+ if (RegNo & 1)
S = MCDisassembler::SoftFail;
unsigned RegisterPair = GPRPairDecoderTable[RegNo/2];
@@ -1231,6 +1238,19 @@ static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
return S;
}
+static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 13)
+ return MCDisassembler::Fail;
+
+ unsigned RegisterPair = GPRPairDecoderTable[RegNo/2];
+ Inst.addOperand(MCOperand::createReg(RegisterPair));
+
+ if ((RegNo & 1) || RegNo > 10)
+ return MCDisassembler::SoftFail;
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder) {
@@ -6068,6 +6088,23 @@ static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus
+DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo == 15) {
+ Inst.addOperand(MCOperand::createReg(ARM::APSR_NZCV));
+ return MCDisassembler::Success;
+ }
+
+ unsigned Register = GPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+
+ if (RegNo == 13)
+ return MCDisassembler::SoftFail;
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -6395,16 +6432,6 @@ static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
return S;
}
-template <int shift>
-static DecodeStatus DecodeExpandedImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- Val <<= shift;
-
- Inst.addOperand(MCOperand::createImm(Val));
- return MCDisassembler::Success;
-}
-
template<unsigned start>
static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index be02da18fb7d..9ad595c016c4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -48,10 +48,17 @@ public:
} // end anonymous namespace
Optional<MCFixupKind> ARMAsmBackend::getFixupKind(StringRef Name) const {
- if (STI.getTargetTriple().isOSBinFormatELF() && Name == "R_ARM_NONE")
- return FK_NONE;
-
- return MCAsmBackend::getFixupKind(Name);
+ if (!STI.getTargetTriple().isOSBinFormatELF())
+ return None;
+
+ unsigned Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/ARM.def"
+#undef ELF_RELOC
+ .Default(-1u);
+ if (Type == -1u)
+ return None;
+ return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
}
const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
@@ -166,6 +173,11 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_le", 0, 32, MCFixupKindInfo::FKF_IsPCRel}
};
+ // Fixup kinds from .reloc directive are like R_ARM_NONE. They do not require
+ // any extra processing.
+ if (Kind >= FirstLiteralRelocationKind)
+ return MCAsmBackend::getFixupKindInfo(FK_NONE);
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
@@ -310,9 +322,8 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
return reasonForFixupRelaxation(Fixup, Value);
}
-void ARMAsmBackend::relaxInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- MCInst &Res) const {
+void ARMAsmBackend::relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode(), STI);
// Sanity check w/ diagnostic if we get here w/ a bogus instruction.
@@ -328,17 +339,18 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst,
// have to change the operands too.
if ((Inst.getOpcode() == ARM::tCBZ || Inst.getOpcode() == ARM::tCBNZ) &&
RelaxedOp == ARM::tHINT) {
+ MCInst Res;
Res.setOpcode(RelaxedOp);
Res.addOperand(MCOperand::createImm(0));
Res.addOperand(MCOperand::createImm(14));
Res.addOperand(MCOperand::createReg(0));
+ Inst = std::move(Res);
return;
}
// The rest of instructions we're relaxing have the same operands.
// We just need to update to the proper opcode.
- Res = Inst;
- Res.setOpcode(RelaxedOp);
+ Inst.setOpcode(RelaxedOp);
}
bool ARMAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
@@ -432,7 +444,6 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
default:
Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type");
return 0;
- case FK_NONE:
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -865,7 +876,7 @@ bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCSymbolRefExpr *A = Target.getSymA();
const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
const unsigned FixupKind = Fixup.getKind();
- if (FixupKind == FK_NONE)
+ if (FixupKind >= FirstLiteralRelocationKind)
return true;
if (FixupKind == ARM::fixup_arm_thumb_bl) {
assert(Sym && "How did we resolve this?");
@@ -909,9 +920,6 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
- case FK_NONE:
- return 0;
-
case FK_Data_1:
case ARM::fixup_arm_thumb_bcc:
case ARM::fixup_arm_thumb_cp:
@@ -973,9 +981,6 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
- case FK_NONE:
- return 0;
-
case FK_Data_1:
return 1;
case FK_Data_2:
@@ -1031,7 +1036,10 @@ void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
MutableArrayRef<char> Data, uint64_t Value,
bool IsResolved,
const MCSubtargetInfo* STI) const {
- unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ unsigned Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return;
+ unsigned NumBytes = getFixupKindNumBytes(Kind);
MCContext &Ctx = Asm.getContext();
Value = adjustFixupValue(Asm, Fixup, Target, Value, IsResolved, Ctx, STI);
if (!Value)
@@ -1043,7 +1051,7 @@ void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
// Used to point to big endian bytes.
unsigned FullSizeBytes;
if (Endian == support::big) {
- FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind());
+ FullSizeBytes = getFixupKindContainerSizeBytes(Kind);
assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!");
assert(NumBytes <= FullSizeBytes && "Invalid fixup size!");
}
@@ -1110,11 +1118,11 @@ uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
const MCCFIInstruction &Inst = Instrs[i];
switch (Inst.getOperation()) {
case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa
- CFARegisterOffset = -Inst.getOffset();
+ CFARegisterOffset = Inst.getOffset();
CFARegister = *MRI.getLLVMRegNum(Inst.getRegister(), true);
break;
case MCCFIInstruction::OpDefCfaOffset: // DW_CFA_def_cfa_offset
- CFARegisterOffset = -Inst.getOffset();
+ CFARegisterOffset = Inst.getOffset();
break;
case MCCFIInstruction::OpDefCfaRegister: // DW_CFA_def_cfa_register
CFARegister = *MRI.getLLVMRegNum(Inst.getRegister(), true);
@@ -1271,35 +1279,6 @@ uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
return CompactUnwindEncoding | ((FloatRegCount - 1) << 8);
}
-static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) {
- ARM::ArchKind AK = ARM::parseArch(Arch);
- switch (AK) {
- default:
- return MachO::CPU_SUBTYPE_ARM_V7;
- case ARM::ArchKind::ARMV4T:
- return MachO::CPU_SUBTYPE_ARM_V4T;
- case ARM::ArchKind::ARMV5T:
- case ARM::ArchKind::ARMV5TE:
- case ARM::ArchKind::ARMV5TEJ:
- return MachO::CPU_SUBTYPE_ARM_V5;
- case ARM::ArchKind::ARMV6:
- case ARM::ArchKind::ARMV6K:
- return MachO::CPU_SUBTYPE_ARM_V6;
- case ARM::ArchKind::ARMV7A:
- return MachO::CPU_SUBTYPE_ARM_V7;
- case ARM::ArchKind::ARMV7S:
- return MachO::CPU_SUBTYPE_ARM_V7S;
- case ARM::ArchKind::ARMV7K:
- return MachO::CPU_SUBTYPE_ARM_V7K;
- case ARM::ArchKind::ARMV6M:
- return MachO::CPU_SUBTYPE_ARM_V6M;
- case ARM::ArchKind::ARMV7M:
- return MachO::CPU_SUBTYPE_ARM_V7M;
- case ARM::ArchKind::ARMV7EM:
- return MachO::CPU_SUBTYPE_ARM_V7EM;
- }
-}
-
static MCAsmBackend *createARMAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
@@ -1309,10 +1288,8 @@ static MCAsmBackend *createARMAsmBackend(const Target &T,
switch (TheTriple.getObjectFormat()) {
default:
llvm_unreachable("unsupported object format");
- case Triple::MachO: {
- MachO::CPUSubTypeARM CS = getMachOSubTypeFromArch(TheTriple.getArchName());
- return new ARMAsmBackendDarwin(T, STI, MRI, CS);
- }
+ case Triple::MachO:
+ return new ARMAsmBackendDarwin(T, STI, MRI);
case Triple::COFF:
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
return new ARMAsmBackendWinCOFF(T, STI);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 67722a5e5b64..38c7b30769b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -66,8 +66,8 @@ public:
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override;
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index 87e56940f46d..e27bb134670f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -16,16 +16,20 @@
namespace llvm {
class ARMAsmBackendDarwin : public ARMAsmBackend {
const MCRegisterInfo &MRI;
+ Triple TT;
public:
const MachO::CPUSubTypeARM Subtype;
ARMAsmBackendDarwin(const Target &T, const MCSubtargetInfo &STI,
- const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st)
- : ARMAsmBackend(T, STI, support::little), MRI(MRI), Subtype(st) {}
+ const MCRegisterInfo &MRI)
+ : ARMAsmBackend(T, STI, support::little), MRI(MRI),
+ TT(STI.getTargetTriple()),
+ Subtype((MachO::CPUSubTypeARM)cantFail(
+ MachO::getCPUSubType(STI.getTargetTriple()))) {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
- return createARMMachObjectWriter(/*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
- Subtype);
+ return createARMMachObjectWriter(
+ /*Is64Bit=*/false, cantFail(MachO::getCPUType(TT)), Subtype);
}
uint32_t generateCompactUnwindEncoding(
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 6293a2462306..74cd2e681ded 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -393,9 +393,21 @@ namespace ARMII {
// in an IT block).
ThumbArithFlagSetting = 1 << 19,
- // Whether an instruction can be included in an MVE tail-predicated loop.
+ // Whether an instruction can be included in an MVE tail-predicated loop,
+ // though extra validity checks may need to be performed too.
ValidForTailPredication = 1 << 20,
+ // Whether an instruction writes to the top/bottom half of a vector element
+ // and leaves the other half untouched.
+ RetainsPreviousHalfElement = 1 << 21,
+
+ // Whether the instruction produces a scalar result from vector operands.
+ HorizontalReduction = 1 << 22,
+
+ // Whether this instruction produces a vector result that is larger than
+ // its input, typically reading from the top/bottom halves of the input(s).
+ DoubleWidthResult = 1 << 23,
+
//===------------------------------------------------------------------===//
// Code domain.
DomainShift = 15,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 2c26dd388c05..37d81e4b0af1 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -53,8 +53,8 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const {
- // FIXME: This is extremely conservative. This really needs to use a
- // whitelist with a clear explanation for why each realocation needs to
+ // FIXME: This is extremely conservative. This really needs to use an
+ // explicit list with a clear explanation for why each realocation needs to
// point to the symbol, not to the section.
switch (Type) {
default:
@@ -79,6 +79,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel,
MCContext &Ctx) const {
+ unsigned Kind = Fixup.getTargetKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return Kind - FirstLiteralRelocationKind;
MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
if (IsPCRel) {
@@ -89,9 +92,18 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case FK_Data_4:
switch (Modifier) {
default:
- llvm_unreachable("Unsupported Modifier");
- case MCSymbolRefExpr::VK_None:
+ Ctx.reportError(Fixup.getLoc(),
+ "invalid fixup for 4-byte pc-relative data relocation");
+ return ELF::R_ARM_NONE;
+ case MCSymbolRefExpr::VK_None: {
+ if (const MCSymbolRefExpr *SymRef = Target.getSymA()) {
+ // For GNU AS compatibility expressions such as
+ // _GLOBAL_OFFSET_TABLE_ - label emit a R_ARM_BASE_PREL relocation.
+ if (SymRef->getSymbol().getName() == "_GLOBAL_OFFSET_TABLE_")
+ return ELF::R_ARM_BASE_PREL;
+ }
return ELF::R_ARM_REL32;
+ }
case MCSymbolRefExpr::VK_GOTTPOFF:
return ELF::R_ARM_TLS_IE32;
case MCSymbolRefExpr::VK_ARM_GOT_PREL:
@@ -145,30 +157,34 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
return ELF::R_ARM_THM_BF18;
}
}
- switch (Fixup.getTargetKind()) {
+ switch (Kind) {
default:
Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
- case FK_NONE:
- return ELF::R_ARM_NONE;
case FK_Data_1:
switch (Modifier) {
default:
- llvm_unreachable("unsupported Modifier");
+ Ctx.reportError(Fixup.getLoc(),
+ "invalid fixup for 1-byte data relocation");
+ return ELF::R_ARM_NONE;
case MCSymbolRefExpr::VK_None:
return ELF::R_ARM_ABS8;
}
case FK_Data_2:
switch (Modifier) {
default:
- llvm_unreachable("unsupported modifier");
+ Ctx.reportError(Fixup.getLoc(),
+ "invalid fixup for 2-byte data relocation");
+ return ELF::R_ARM_NONE;
case MCSymbolRefExpr::VK_None:
return ELF::R_ARM_ABS16;
}
case FK_Data_4:
switch (Modifier) {
default:
- llvm_unreachable("Unsupported Modifier");
+ Ctx.reportError(Fixup.getLoc(),
+ "invalid fixup for 4-byte data relocation");
+ return ELF::R_ARM_NONE;
case MCSymbolRefExpr::VK_ARM_NONE:
return ELF::R_ARM_NONE;
case MCSymbolRefExpr::VK_GOT:
@@ -210,7 +226,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_arm_movt_hi16:
switch (Modifier) {
default:
- llvm_unreachable("Unsupported Modifier");
+ Ctx.reportError(Fixup.getLoc(), "invalid fixup for ARM MOVT instruction");
+ return ELF::R_ARM_NONE;
case MCSymbolRefExpr::VK_None:
return ELF::R_ARM_MOVT_ABS;
case MCSymbolRefExpr::VK_ARM_SBREL:
@@ -219,7 +236,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_arm_movw_lo16:
switch (Modifier) {
default:
- llvm_unreachable("Unsupported Modifier");
+ Ctx.reportError(Fixup.getLoc(), "invalid fixup for ARM MOVW instruction");
+ return ELF::R_ARM_NONE;
case MCSymbolRefExpr::VK_None:
return ELF::R_ARM_MOVW_ABS_NC;
case MCSymbolRefExpr::VK_ARM_SBREL:
@@ -228,7 +246,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_t2_movt_hi16:
switch (Modifier) {
default:
- llvm_unreachable("Unsupported Modifier");
+ Ctx.reportError(Fixup.getLoc(),
+ "invalid fixup for Thumb MOVT instruction");
+ return ELF::R_ARM_NONE;
case MCSymbolRefExpr::VK_None:
return ELF::R_ARM_THM_MOVT_ABS;
case MCSymbolRefExpr::VK_ARM_SBREL:
@@ -237,7 +257,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_t2_movw_lo16:
switch (Modifier) {
default:
- llvm_unreachable("Unsupported Modifier");
+ Ctx.reportError(Fixup.getLoc(),
+ "invalid fixup for Thumb MOVW instruction");
+ return ELF::R_ARM_NONE;
case MCSymbolRefExpr::VK_None:
return ELF::R_ARM_THM_MOVW_ABS_NC;
case MCSymbolRefExpr::VK_ARM_SBREL:
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index f558ca8d2d9f..876741d6c343 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -93,7 +93,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
StringRef StringValue) override;
void emitArch(ARM::ArchKind Arch) override;
- void emitArchExtension(unsigned ArchExt) override;
+ void emitArchExtension(uint64_t ArchExt) override;
void emitObjectArch(ARM::ArchKind Arch) override;
void emitFPU(unsigned FPU) override;
void emitInst(uint32_t Inst, char Suffix = '\0') override;
@@ -177,7 +177,8 @@ void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {}
void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value);
if (IsVerboseAsm) {
- StringRef Name = ARMBuildAttrs::AttrTypeAsString(Attribute);
+ StringRef Name =
+ ELFAttrs::attrTypeAsString(Attribute, ARMBuildAttrs::ARMAttributeTags);
if (!Name.empty())
OS << "\t@ " << Name;
}
@@ -193,7 +194,8 @@ void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
default:
OS << "\t.eabi_attribute\t" << Attribute << ", \"" << String << "\"";
if (IsVerboseAsm) {
- StringRef Name = ARMBuildAttrs::AttrTypeAsString(Attribute);
+ StringRef Name = ELFAttrs::attrTypeAsString(
+ Attribute, ARMBuildAttrs::ARMAttributeTags);
if (!Name.empty())
OS << "\t@ " << Name;
}
@@ -212,7 +214,9 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
if (!StringValue.empty())
OS << ", \"" << StringValue << "\"";
if (IsVerboseAsm)
- OS << "\t@ " << ARMBuildAttrs::AttrTypeAsString(Attribute);
+ OS << "\t@ "
+ << ELFAttrs::attrTypeAsString(Attribute,
+ ARMBuildAttrs::ARMAttributeTags);
break;
}
OS << "\n";
@@ -222,7 +226,7 @@ void ARMTargetAsmStreamer::emitArch(ARM::ArchKind Arch) {
OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n";
}
-void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) {
+void ARMTargetAsmStreamer::emitArchExtension(uint64_t ArchExt) {
OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n";
}
@@ -238,7 +242,7 @@ void ARMTargetAsmStreamer::finishAttributeSection() {}
void
ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
- OS << "\t.tlsdescseq\t" << S->getSymbol().getName();
+ OS << "\t.tlsdescseq\t" << S->getSymbol().getName() << "\n";
}
void ARMTargetAsmStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
@@ -328,12 +332,8 @@ private:
}
// Create new attribute item
- AttributeItem Item = {
- AttributeItem::NumericAttribute,
- Attribute,
- Value,
- StringRef("")
- };
+ AttributeItem Item = {AttributeItem::NumericAttribute, Attribute, Value,
+ std::string(StringRef(""))};
Contents.push_back(Item);
}
@@ -344,17 +344,13 @@ private:
if (!OverwriteExisting)
return;
Item->Type = AttributeItem::TextAttribute;
- Item->StringValue = Value;
+ Item->StringValue = std::string(Value);
return;
}
// Create new attribute item
- AttributeItem Item = {
- AttributeItem::TextAttribute,
- Attribute,
- 0,
- Value
- };
+ AttributeItem Item = {AttributeItem::TextAttribute, Attribute, 0,
+ std::string(Value)};
Contents.push_back(Item);
}
@@ -366,17 +362,13 @@ private:
return;
Item->Type = AttributeItem::NumericAndTextAttributes;
Item->IntValue = IntValue;
- Item->StringValue = StringValue;
+ Item->StringValue = std::string(StringValue);
return;
}
// Create new attribute item
- AttributeItem Item = {
- AttributeItem::NumericAndTextAttributes,
- Attribute,
- IntValue,
- StringValue
- };
+ AttributeItem Item = {AttributeItem::NumericAndTextAttributes, Attribute,
+ IntValue, std::string(StringValue)};
Contents.push_back(Item);
}
@@ -452,7 +444,7 @@ public:
~ARMELFStreamer() override = default;
- void FinishImpl() override;
+ void finishImpl() override;
// ARM exception handling directives
void emitFnStart();
@@ -468,13 +460,13 @@ public:
void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
SMLoc Loc) override {
- EmitDataMappingSymbol();
+ emitDataMappingSymbol();
MCObjectStreamer::emitFill(NumBytes, FillValue, Loc);
}
- void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
+ void changeSection(MCSection *Section, const MCExpr *Subsection) override {
LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo);
- MCELFStreamer::ChangeSection(Section, Subsection);
+ MCELFStreamer::changeSection(Section, Subsection);
auto LastMappingSymbol = LastMappingSymbols.find(Section);
if (LastMappingSymbol != LastMappingSymbols.end()) {
LastEMSInfo = std::move(LastMappingSymbol->second);
@@ -486,14 +478,14 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst &Inst,
+ void emitInstruction(const MCInst &Inst,
const MCSubtargetInfo &STI) override {
if (IsThumb)
EmitThumbMappingSymbol();
else
EmitARMMappingSymbol();
- MCELFStreamer::EmitInstruction(Inst, STI);
+ MCELFStreamer::emitInstruction(Inst, STI);
}
void emitInst(uint32_t Inst, char Suffix) {
@@ -533,15 +525,15 @@ public:
llvm_unreachable("Invalid Suffix");
}
- MCELFStreamer::EmitBytes(StringRef(Buffer, Size));
+ MCELFStreamer::emitBytes(StringRef(Buffer, Size));
}
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
- void EmitBytes(StringRef Data) override {
- EmitDataMappingSymbol();
- MCELFStreamer::EmitBytes(Data);
+ void emitBytes(StringRef Data) override {
+ emitDataMappingSymbol();
+ MCELFStreamer::emitBytes(Data);
}
void FlushPendingMappingSymbol() {
@@ -555,7 +547,7 @@ public:
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
- void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
+ void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value)) {
if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) {
getContext().reportError(Loc, "relocated expression must be 32-bit");
@@ -564,12 +556,12 @@ public:
getOrCreateDataFragment();
}
- EmitDataMappingSymbol();
- MCELFStreamer::EmitValueImpl(Value, Size, Loc);
+ emitDataMappingSymbol();
+ MCELFStreamer::emitValueImpl(Value, Size, Loc);
}
- void EmitAssemblerFlag(MCAssemblerFlag Flag) override {
- MCELFStreamer::EmitAssemblerFlag(Flag);
+ void emitAssemblerFlag(MCAssemblerFlag Flag) override {
+ MCELFStreamer::emitAssemblerFlag(Flag);
switch (Flag) {
case MCAF_SyntaxUnified:
@@ -609,7 +601,7 @@ private:
ElfMappingSymbol State;
};
- void EmitDataMappingSymbol() {
+ void emitDataMappingSymbol() {
if (LastEMSInfo->State == EMS_Data)
return;
else if (LastEMSInfo->State == EMS_None) {
@@ -648,7 +640,7 @@ private:
void EmitMappingSymbol(StringRef Name) {
auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
Name + "." + Twine(MappingSymbolCounter++)));
- EmitLabel(Symbol);
+ emitLabel(Symbol);
Symbol->setType(ELF::STT_NOTYPE);
Symbol->setBinding(ELF::STB_LOCAL);
@@ -659,15 +651,15 @@ private:
uint64_t Offset) {
auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
Name + "." + Twine(MappingSymbolCounter++)));
- EmitLabelAtPos(Symbol, Loc, F, Offset);
+ emitLabelAtPos(Symbol, Loc, F, Offset);
Symbol->setType(ELF::STT_NOTYPE);
Symbol->setBinding(ELF::STB_LOCAL);
Symbol->setExternal(false);
}
- void EmitThumbFunc(MCSymbol *Func) override {
+ void emitThumbFunc(MCSymbol *Func) override {
getAssembler().setIsThumbFunc(Func);
- EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction);
+ emitSymbolAttribute(Func, MCSA_ELF_TypeFunction);
}
// Helper functions for ARM exception handling directives
@@ -868,6 +860,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::ArchKind::ARMV8_3A:
case ARM::ArchKind::ARMV8_4A:
case ARM::ArchKind::ARMV8_5A:
+ case ARM::ArchKind::ARMV8_6A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -1091,7 +1084,7 @@ void ARMTargetELFStreamer::finishAttributeSection() {
Streamer.SwitchSection(AttributeSection);
// Format version
- Streamer.EmitIntValue(0x41, 1);
+ Streamer.emitInt8(0x41);
}
// Vendor size + Vendor name + '\0'
@@ -1102,31 +1095,31 @@ void ARMTargetELFStreamer::finishAttributeSection() {
const size_t ContentsSize = calculateContentSize();
- Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
- Streamer.EmitBytes(CurrentVendor);
- Streamer.EmitIntValue(0, 1); // '\0'
+ Streamer.emitInt32(VendorHeaderSize + TagHeaderSize + ContentsSize);
+ Streamer.emitBytes(CurrentVendor);
+ Streamer.emitInt8(0); // '\0'
- Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
- Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+ Streamer.emitInt8(ARMBuildAttrs::File);
+ Streamer.emitInt32(TagHeaderSize + ContentsSize);
// Size should have been accounted for already, now
// emit each field as its type (ULEB or String)
for (size_t i = 0; i < Contents.size(); ++i) {
AttributeItem item = Contents[i];
- Streamer.EmitULEB128IntValue(item.Tag);
+ Streamer.emitULEB128IntValue(item.Tag);
switch (item.Type) {
default: llvm_unreachable("Invalid attribute type");
case AttributeItem::NumericAttribute:
- Streamer.EmitULEB128IntValue(item.IntValue);
+ Streamer.emitULEB128IntValue(item.IntValue);
break;
case AttributeItem::TextAttribute:
- Streamer.EmitBytes(item.StringValue);
- Streamer.EmitIntValue(0, 1); // '\0'
+ Streamer.emitBytes(item.StringValue);
+ Streamer.emitInt8(0); // '\0'
break;
case AttributeItem::NumericAndTextAttributes:
- Streamer.EmitULEB128IntValue(item.IntValue);
- Streamer.EmitBytes(item.StringValue);
- Streamer.EmitIntValue(0, 1); // '\0'
+ Streamer.emitULEB128IntValue(item.IntValue);
+ Streamer.emitBytes(item.StringValue);
+ Streamer.emitInt8(0); // '\0'
break;
}
}
@@ -1143,7 +1136,7 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
Streamer.getAssembler().registerSymbol(*Symbol);
unsigned Type = cast<MCSymbolELF>(Symbol)->getType();
if (Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)
- Streamer.EmitThumbFunc(Symbol);
+ Streamer.emitThumbFunc(Symbol);
}
void
@@ -1155,13 +1148,13 @@ void ARMTargetELFStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Value)) {
const MCSymbol &Sym = SRE->getSymbol();
if (!Sym.isDefined()) {
- getStreamer().EmitAssignment(Symbol, Value);
+ getStreamer().emitAssignment(Symbol, Value);
return;
}
}
- getStreamer().EmitThumbFunc(Symbol);
- getStreamer().EmitAssignment(Symbol, Value);
+ getStreamer().emitThumbFunc(Symbol);
+ getStreamer().emitAssignment(Symbol, Value);
}
void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) {
@@ -1170,12 +1163,12 @@ void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) {
void ARMTargetELFStreamer::reset() { AttributeSection = nullptr; }
-void ARMELFStreamer::FinishImpl() {
+void ARMELFStreamer::finishImpl() {
MCTargetStreamer &TS = *getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
ATS.finishAttributeSection();
- MCELFStreamer::FinishImpl();
+ MCELFStreamer::finishImpl();
}
void ARMELFStreamer::reset() {
@@ -1201,7 +1194,7 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix,
static_cast<const MCSectionELF &>(Fn.getSection());
// Create the name for new section
- StringRef FnSecName(FnSection.getSectionName());
+ StringRef FnSecName(FnSection.getName());
SmallString<128> EHSecName(Prefix);
if (FnSecName != ".text") {
EHSecName += FnSecName;
@@ -1213,13 +1206,13 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix,
Flags |= ELF::SHF_GROUP;
MCSectionELF *EHSection = getContext().getELFSection(
EHSecName, Type, Flags, 0, Group, FnSection.getUniqueID(),
- static_cast<const MCSymbolELF *>(&Fn));
+ static_cast<const MCSymbolELF *>(FnSection.getBeginSymbol()));
assert(EHSection && "Failed to get the required EH section");
// Switch to .ARM.extab or .ARM.exidx section
SwitchSection(EHSection);
- EmitCodeAlignment(4);
+ emitCodeAlignment(4);
}
inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
@@ -1258,7 +1251,7 @@ void ARMELFStreamer::EHReset() {
void ARMELFStreamer::emitFnStart() {
assert(FnStart == nullptr);
FnStart = getContext().createTempSymbol();
- EmitLabel(FnStart);
+ emitLabel(FnStart);
}
void ARMELFStreamer::emitFnEnd() {
@@ -1284,17 +1277,17 @@ void ARMELFStreamer::emitFnEnd() {
MCSymbolRefExpr::VK_ARM_PREL31,
getContext());
- EmitValue(FnStartRef, 4);
+ emitValue(FnStartRef, 4);
if (CantUnwind) {
- EmitIntValue(ARM::EHABI::EXIDX_CANTUNWIND, 4);
+ emitInt32(ARM::EHABI::EXIDX_CANTUNWIND);
} else if (ExTab) {
// Emit a reference to the unwind opcodes in the ".ARM.extab" section.
const MCSymbolRefExpr *ExTabEntryRef =
MCSymbolRefExpr::create(ExTab,
MCSymbolRefExpr::VK_ARM_PREL31,
getContext());
- EmitValue(ExTabEntryRef, 4);
+ emitValue(ExTabEntryRef, 4);
} else {
// For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in
// the second word of exception index table entry. The size of the unwind
@@ -1307,7 +1300,7 @@ void ARMELFStreamer::emitFnEnd() {
Opcodes[1] << 8 |
Opcodes[2] << 16 |
Opcodes[3] << 24;
- EmitIntValue(Intval, Opcodes.size());
+ emitIntValue(Intval, Opcodes.size());
}
// Switch to the section containing FnStart
@@ -1366,7 +1359,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
// Create .ARM.extab label for offset in .ARM.exidx
assert(!ExTab);
ExTab = getContext().createTempSymbol();
- EmitLabel(ExTab);
+ emitLabel(ExTab);
// Emit personality
if (Personality) {
@@ -1375,7 +1368,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
MCSymbolRefExpr::VK_ARM_PREL31,
getContext());
- EmitValue(PersonalityRef, 4);
+ emitValue(PersonalityRef, 4);
}
// Emit unwind opcodes
@@ -1386,7 +1379,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
Opcodes[I + 1] << 8 |
Opcodes[I + 2] << 16 |
Opcodes[I + 3] << 24;
- EmitIntValue(Intval, 4);
+ emitInt32(Intval);
}
// According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or
@@ -1397,7 +1390,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
// In case that the .handlerdata directive is not specified by the
// programmer, we should emit zero to terminate the handler data.
if (NoHandlerData && !Personality)
- EmitIntValue(0, 4);
+ emitInt32(0);
}
void ARMELFStreamer::emitHandlerData() { FlushUnwindOpcodes(false); }
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
index b36106a78b71..744d919f2fd4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
@@ -288,7 +288,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, uint64_t Address,
case ARM::t2DSB:
switch (MI->getOperand(0).getImm()) {
default:
- if (!printAliasInstr(MI, STI, O))
+ if (!printAliasInstr(MI, Address, STI, O))
printInstruction(MI, Address, STI, O);
break;
case 0:
@@ -302,7 +302,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, uint64_t Address,
return;
}
- if (!printAliasInstr(MI, STI, O))
+ if (!printAliasInstr(MI, Address, STI, O))
printInstruction(MI, Address, STI, O);
printAnnotation(O, Annot);
@@ -1669,15 +1669,6 @@ void ARMInstPrinter::printVPTMask(const MCInst *MI, unsigned OpNum,
}
}
-void ARMInstPrinter::printExpandedImmOperand(const MCInst *MI, unsigned OpNum,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- uint32_t Val = MI->getOperand(OpNum).getImm();
- O << markup("<imm:") << "#0x";
- O.write_hex(Val);
- O << markup(">");
-}
-
void ARMInstPrinter::printMveSaturateOp(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
index 20f901033395..37cb731ff001 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
@@ -32,10 +32,10 @@ public:
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
- virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
- virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx,
+ virtual bool printAliasInstr(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ virtual void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
const MCSubtargetInfo &STI,
raw_ostream &O);
static const char *getRegisterName(unsigned RegNo,
@@ -43,6 +43,10 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ printOperand(MI, OpNum, STI, O);
+ }
void printSORegRegOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
@@ -109,6 +113,12 @@ public:
template <unsigned scale>
void printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ template <unsigned scale>
+ void printAdrLabelOperand(const MCInst *MI, uint64_t /*Address*/,
+ unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printAdrLabelOperand<scale>(MI, OpNum, STI, O);
+ }
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printThumbSRImm(const MCInst *MI, unsigned OpNum,
@@ -206,6 +216,11 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printThumbLdrLabelOperand(const MCInst *MI, uint64_t /*Address*/,
+ unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printThumbLdrLabelOperand(MI, OpNum, STI, O);
+ }
void printFBits16(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printFBits32(const MCInst *MI, unsigned OpNum,
@@ -260,8 +275,6 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
void printMveAddrModeQOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printExpandedImmOperand(const MCInst *MI, unsigned OpNum,
- const MCSubtargetInfo &STI, raw_ostream &O);
void printMveSaturateOp(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
private:
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index d30d15df3d00..765613cf347d 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -37,8 +37,6 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
ExceptionsType = (TheTriple.isOSDarwin() && !TheTriple.isWatchABI())
? ExceptionHandling::SjLj
: ExceptionHandling::DwarfCFI;
-
- UseIntegratedAssembler = true;
}
void ARMELFMCAsmInfo::anchor() { }
@@ -73,8 +71,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) {
// foo(plt) instead of foo@plt
UseParensForSymbolVariant = true;
-
- UseIntegratedAssembler = true;
}
void ARMELFMCAsmInfo::setUseIntegratedAssembler(bool Value) {
@@ -116,7 +112,6 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
ExceptionsType = ExceptionHandling::DwarfCFI;
UseParensForSymbolVariant = true;
- UseIntegratedAssembler = true;
DwarfRegNumForCFI = false;
// Conditional Thumb 4-byte instructions can have an implicit IT.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 268fe7efd9ce..1cb99534f146 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -413,14 +413,6 @@ public:
unsigned getThumbSRImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- template <uint8_t shift, bool invert>
- unsigned getExpandedImmOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- static_assert(shift <= 32, "Shift count must be less than or equal to 32.");
- const MCOperand MO = MI.getOperand(Op);
- return (invert ? (MO.getImm() ^ 0xff) : MO.getImm()) >> shift;
- }
unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
unsigned EncodedValue,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 9f60e70e0e02..05d73ccf6ff2 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -63,6 +63,25 @@ static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
return true;
}
}
+ if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] &&
+ ((MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 10) ||
+ (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 11))) {
+ Info = "since v7, cp10 and cp11 are reserved for advanced SIMD or floating "
+ "point instructions";
+ return true;
+ }
+ return false;
+}
+
+static bool getMRCDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
+ std::string &Info) {
+ if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] &&
+ ((MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 10) ||
+ (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 11))) {
+ Info = "since v7, cp10 and cp11 are reserved for advanced SIMD or floating "
+ "point instructions";
+ return true;
+ }
return false;
}
@@ -168,7 +187,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
if (!ArchFS.empty())
ArchFS = (Twine(ArchFS) + "," + FS).str();
else
- ArchFS = FS;
+ ArchFS = std::string(FS);
}
return createARMMCSubtargetInfoImpl(TT, CPU, ArchFS);
@@ -200,7 +219,7 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
MAI = new ARMELFMCAsmInfo(TheTriple);
unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true);
- MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0));
+ MAI->addInitialFrameState(MCCFIInstruction::cfiDefCfa(nullptr, Reg, 0));
return MAI;
}
@@ -266,7 +285,9 @@ public:
bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
uint64_t Size, uint64_t &Target) const override {
// We only handle PCRel branches for now.
- if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+ if (Inst.getNumOperands() == 0 ||
+ Info->get(Inst.getOpcode()).OpInfo[0].OperandType !=
+ MCOI::OPERAND_PCREL)
return false;
int64_t Imm = Inst.getOperand(0).getImm();
@@ -285,8 +306,15 @@ public:
switch (Inst.getOpcode()) {
default:
OpId = 0;
+ if (Inst.getNumOperands() == 0)
+ return false;
break;
+ case ARM::MVE_WLSTP_8:
+ case ARM::MVE_WLSTP_16:
+ case ARM::MVE_WLSTP_32:
+ case ARM::MVE_WLSTP_64:
case ARM::t2WLS:
+ case ARM::MVE_LETP:
case ARM::t2LEUpdate:
OpId = 2;
break;
@@ -316,6 +344,14 @@ static MCInstrAnalysis *createThumbMCInstrAnalysis(const MCInstrInfo *Info) {
return new ThumbMCInstrAnalysis(Info);
}
+bool ARM::isCDECoproc(size_t Coproc, const MCSubtargetInfo &STI) {
+ // Unfortunately we don't have ARMTargetInfo in the disassembler, so we have
+ // to rely on feature bits.
+ if (Coproc >= 8)
+ return false;
+ return STI.getFeatureBits()[ARM::FeatureCoprocCDE0 + Coproc];
+}
+
// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTargetMC() {
for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget(),
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 9cbbd56225ef..7cfe6881b456 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -107,6 +107,9 @@ inline bool isVpred(OperandType op) {
inline bool isVpred(uint8_t op) {
return isVpred(static_cast<OperandType>(op));
}
+
+bool isCDECoproc(size_t Coproc, const MCSubtargetInfo &STI);
+
} // end namespace ARM
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 7b30a61e8ccb..1fee354cad93 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -80,7 +80,7 @@ void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {
default:
llvm_unreachable("Invalid Suffix");
}
- getStreamer().EmitBytes(StringRef(Buffer, Size));
+ getStreamer().emitBytes(StringRef(Buffer, Size));
}
// The remaining callbacks should be handled separately by each
@@ -108,7 +108,7 @@ void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute,
unsigned IntValue,
StringRef StringValue) {}
void ARMTargetStreamer::emitArch(ARM::ArchKind Arch) {}
-void ARMTargetStreamer::emitArchExtension(unsigned ArchExt) {}
+void ARMTargetStreamer::emitArchExtension(uint64_t ArchExt) {}
void ARMTargetStreamer::emitObjectArch(ARM::ArchKind Arch) {}
void ARMTargetStreamer::emitFPU(unsigned FPU) {}
void ARMTargetStreamer::finishAttributeSection() {}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index a9460b70da56..781627c3c425 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -134,7 +134,7 @@ void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
uint8_t Buff[16];
Buff[0] = ARM::EHABI::UNWIND_OPCODE_INC_VSP_ULEB128;
size_t ULEBSize = encodeULEB128((Offset - 0x204) >> 2, Buff + 1);
- EmitBytes(Buff, ULEBSize + 1);
+ emitBytes(Buff, ULEBSize + 1);
} else if (Offset > 0) {
if (Offset > 0x100) {
EmitInt8(ARM::EHABI::UNWIND_OPCODE_INC_VSP | 0x3fu);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index 5fb7307159d1..ec11a78f8a7a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -64,7 +64,7 @@ public:
OpBegins.push_back(OpBegins.back() + Opcodes.size());
}
- /// Finalize the unwind opcode sequence for EmitBytes()
+ /// Finalize the unwind opcode sequence for emitBytes()
void Finalize(unsigned &PersonalityIndex,
SmallVectorImpl<uint8_t> &Result);
@@ -80,7 +80,7 @@ private:
OpBegins.push_back(OpBegins.back() + 2);
}
- void EmitBytes(const uint8_t *Opcode, size_t Size) {
+ void emitBytes(const uint8_t *Opcode, size_t Size) {
Ops.insert(Ops.end(), Opcode, Opcode + Size);
OpBegins.push_back(OpBegins.back() + Size);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index b3c8146a9bde..e6f649164a29 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -22,18 +22,18 @@ public:
std::unique_ptr<MCObjectWriter> OW)
: MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {}
- void EmitThumbFunc(MCSymbol *Symbol) override;
- void FinishImpl() override;
+ void emitThumbFunc(MCSymbol *Symbol) override;
+ void finishImpl() override;
};
-void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
+void ARMWinCOFFStreamer::emitThumbFunc(MCSymbol *Symbol) {
getAssembler().setIsThumbFunc(Symbol);
}
-void ARMWinCOFFStreamer::FinishImpl() {
- EmitFrames(nullptr);
+void ARMWinCOFFStreamer::finishImpl() {
+ emitFrames(nullptr);
- MCWinCOFFStreamer::FinishImpl();
+ MCWinCOFFStreamer::finishImpl();
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index 9f64af02e698..4d7ad6cd60cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -15,6 +15,7 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -37,6 +38,7 @@
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
@@ -67,27 +69,77 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<TargetPassConfig>();
+ AU.addRequired<LoopInfoWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
private:
+ LoopInfo *LI = nullptr;
+
// Check this is a valid gather with correct alignment
bool isLegalTypeAndAlignment(unsigned NumElements, unsigned ElemSize,
- unsigned Alignment);
+ Align Alignment);
// Check whether Ptr is hidden behind a bitcast and look through it
void lookThroughBitcast(Value *&Ptr);
// Check for a getelementptr and deduce base and offsets from it, on success
// returning the base directly and the offsets indirectly using the Offsets
// argument
- Value *checkGEP(Value *&Offsets, Type *Ty, Value *Ptr, IRBuilder<> Builder);
+ Value *checkGEP(Value *&Offsets, Type *Ty, GetElementPtrInst *GEP,
+ IRBuilder<> &Builder);
+ // Compute the scale of this gather/scatter instruction
+ int computeScale(unsigned GEPElemSize, unsigned MemoryElemSize);
+ // If the value is a constant, or derived from constants via additions
+ // and multilications, return its numeric value
+ Optional<int64_t> getIfConst(const Value *V);
+ // If Inst is an add instruction, check whether one summand is a
+ // constant. If so, scale this constant and return it together with
+ // the other summand.
+ std::pair<Value *, int64_t> getVarAndConst(Value *Inst, int TypeScale);
- bool lowerGather(IntrinsicInst *I);
+ Value *lowerGather(IntrinsicInst *I);
// Create a gather from a base + vector of offsets
Value *tryCreateMaskedGatherOffset(IntrinsicInst *I, Value *Ptr,
- IRBuilder<> Builder);
+ Instruction *&Root, IRBuilder<> &Builder);
// Create a gather from a vector of pointers
Value *tryCreateMaskedGatherBase(IntrinsicInst *I, Value *Ptr,
- IRBuilder<> Builder);
+ IRBuilder<> &Builder, int64_t Increment = 0);
+ // Create an incrementing gather from a vector of pointers
+ Value *tryCreateMaskedGatherBaseWB(IntrinsicInst *I, Value *Ptr,
+ IRBuilder<> &Builder,
+ int64_t Increment = 0);
+
+ Value *lowerScatter(IntrinsicInst *I);
+ // Create a scatter to a base + vector of offsets
+ Value *tryCreateMaskedScatterOffset(IntrinsicInst *I, Value *Offsets,
+ IRBuilder<> &Builder);
+ // Create a scatter to a vector of pointers
+ Value *tryCreateMaskedScatterBase(IntrinsicInst *I, Value *Ptr,
+ IRBuilder<> &Builder,
+ int64_t Increment = 0);
+ // Create an incrementing scatter from a vector of pointers
+ Value *tryCreateMaskedScatterBaseWB(IntrinsicInst *I, Value *Ptr,
+ IRBuilder<> &Builder,
+ int64_t Increment = 0);
+
+ // QI gathers and scatters can increment their offsets on their own if
+ // the increment is a constant value (digit)
+ Value *tryCreateIncrementingGatScat(IntrinsicInst *I, Value *BasePtr,
+ Value *Ptr, GetElementPtrInst *GEP,
+ IRBuilder<> &Builder);
+ // QI gathers/scatters can increment their offsets on their own if the
+ // increment is a constant value (digit) - this creates a writeback QI
+ // gather/scatter
+ Value *tryCreateIncrementingWBGatScat(IntrinsicInst *I, Value *BasePtr,
+ Value *Ptr, unsigned TypeScale,
+ IRBuilder<> &Builder);
+ // Check whether these offsets could be moved out of the loop they're in
+ bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI);
+ // Pushes the given add out of the loop
+ void pushOutAdd(PHINode *&Phi, Value *OffsSecondOperand, unsigned StartIndex);
+ // Pushes the given mul out of the loop
+ void pushOutMul(PHINode *&Phi, Value *IncrementPerRound,
+ Value *OffsSecondOperand, unsigned LoopIncrement,
+ IRBuilder<> &Builder);
};
} // end anonymous namespace
@@ -103,102 +155,177 @@ Pass *llvm::createMVEGatherScatterLoweringPass() {
bool MVEGatherScatterLowering::isLegalTypeAndAlignment(unsigned NumElements,
unsigned ElemSize,
- unsigned Alignment) {
- // Do only allow non-extending gathers for now
- if (((NumElements == 4 && ElemSize == 32) ||
- (NumElements == 8 && ElemSize == 16) ||
+ Align Alignment) {
+ if (((NumElements == 4 &&
+ (ElemSize == 32 || ElemSize == 16 || ElemSize == 8)) ||
+ (NumElements == 8 && (ElemSize == 16 || ElemSize == 8)) ||
(NumElements == 16 && ElemSize == 8)) &&
- ElemSize / 8 <= Alignment)
+ Alignment >= ElemSize / 8)
return true;
- LLVM_DEBUG(dbgs() << "masked gathers: instruction does not have valid "
- << "alignment or vector type \n");
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: instruction does not have "
+ << "valid alignment or vector type \n");
return false;
}
-Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, Type *Ty, Value *Ptr,
- IRBuilder<> Builder) {
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, Type *Ty,
+ GetElementPtrInst *GEP,
+ IRBuilder<> &Builder) {
if (!GEP) {
- LLVM_DEBUG(dbgs() << "masked gathers: no getelementpointer found\n");
+ LLVM_DEBUG(
+ dbgs() << "masked gathers/scatters: no getelementpointer found\n");
return nullptr;
}
- LLVM_DEBUG(dbgs() << "masked gathers: getelementpointer found. Loading"
- << " from base + vector of offsets\n");
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementpointer found."
+ << " Looking at intrinsic for base + vector of offsets\n");
Value *GEPPtr = GEP->getPointerOperand();
if (GEPPtr->getType()->isVectorTy()) {
- LLVM_DEBUG(dbgs() << "masked gathers: gather from a vector of pointers"
- << " hidden behind a getelementptr currently not"
- << " supported. Expanding.\n");
return nullptr;
}
if (GEP->getNumOperands() != 2) {
- LLVM_DEBUG(dbgs() << "masked gathers: getelementptr with too many"
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementptr with too many"
<< " operands. Expanding.\n");
return nullptr;
}
Offsets = GEP->getOperand(1);
- // SExt offsets inside masked gathers are not permitted by the architecture;
- // we therefore can't fold them
+ // Paranoid check whether the number of parallel lanes is the same
+ assert(cast<FixedVectorType>(Ty)->getNumElements() ==
+ cast<FixedVectorType>(Offsets->getType())->getNumElements());
+ // Only <N x i32> offsets can be integrated into an arm gather, any smaller
+ // type would have to be sign extended by the gep - and arm gathers can only
+ // zero extend. Additionally, the offsets do have to originate from a zext of
+ // a vector with element types smaller or equal the type of the gather we're
+ // looking at
+ if (Offsets->getType()->getScalarSizeInBits() != 32)
+ return nullptr;
if (ZExtInst *ZextOffs = dyn_cast<ZExtInst>(Offsets))
Offsets = ZextOffs->getOperand(0);
- Type *OffsType = VectorType::getInteger(cast<VectorType>(Ty));
- // If the offset we found does not have the type the intrinsic expects,
- // i.e., the same type as the gather itself, we need to convert it (only i
- // types) or fall back to expanding the gather
- if (OffsType != Offsets->getType()) {
- if (OffsType->getScalarSizeInBits() >
- Offsets->getType()->getScalarSizeInBits()) {
- LLVM_DEBUG(dbgs() << "masked gathers: extending offsets\n");
- Offsets = Builder.CreateZExt(Offsets, OffsType, "");
- } else {
- LLVM_DEBUG(dbgs() << "masked gathers: no correct offset type. Can't"
- << " create masked gather\n");
+ else if (!(cast<FixedVectorType>(Offsets->getType())->getNumElements() == 4 &&
+ Offsets->getType()->getScalarSizeInBits() == 32))
+ return nullptr;
+
+ if (Ty != Offsets->getType()) {
+ if ((Ty->getScalarSizeInBits() <
+ Offsets->getType()->getScalarSizeInBits())) {
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: no correct offset type."
+ << " Can't create intrinsic.\n");
return nullptr;
+ } else {
+ Offsets = Builder.CreateZExt(
+ Offsets, VectorType::getInteger(cast<VectorType>(Ty)));
}
}
// If none of the checks failed, return the gep's base pointer
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: found correct offsets\n");
return GEPPtr;
}
void MVEGatherScatterLowering::lookThroughBitcast(Value *&Ptr) {
// Look through bitcast instruction if #elements is the same
if (auto *BitCast = dyn_cast<BitCastInst>(Ptr)) {
- Type *BCTy = BitCast->getType();
- Type *BCSrcTy = BitCast->getOperand(0)->getType();
- if (BCTy->getVectorNumElements() == BCSrcTy->getVectorNumElements()) {
- LLVM_DEBUG(dbgs() << "masked gathers: looking through bitcast\n");
+ auto *BCTy = cast<FixedVectorType>(BitCast->getType());
+ auto *BCSrcTy = cast<FixedVectorType>(BitCast->getOperand(0)->getType());
+ if (BCTy->getNumElements() == BCSrcTy->getNumElements()) {
+ LLVM_DEBUG(
+ dbgs() << "masked gathers/scatters: looking through bitcast\n");
Ptr = BitCast->getOperand(0);
}
}
}
-bool MVEGatherScatterLowering::lowerGather(IntrinsicInst *I) {
+int MVEGatherScatterLowering::computeScale(unsigned GEPElemSize,
+ unsigned MemoryElemSize) {
+ // This can be a 32bit load/store scaled by 4, a 16bit load/store scaled by 2,
+ // or a 8bit, 16bit or 32bit load/store scaled by 1
+ if (GEPElemSize == 32 && MemoryElemSize == 32)
+ return 2;
+ else if (GEPElemSize == 16 && MemoryElemSize == 16)
+ return 1;
+ else if (GEPElemSize == 8)
+ return 0;
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: incorrect scale. Can't "
+ << "create intrinsic\n");
+ return -1;
+}
+
+Optional<int64_t> MVEGatherScatterLowering::getIfConst(const Value *V) {
+ const Constant *C = dyn_cast<Constant>(V);
+ if (C != nullptr)
+ return Optional<int64_t>{C->getUniqueInteger().getSExtValue()};
+ if (!isa<Instruction>(V))
+ return Optional<int64_t>{};
+
+ const Instruction *I = cast<Instruction>(V);
+ if (I->getOpcode() == Instruction::Add ||
+ I->getOpcode() == Instruction::Mul) {
+ Optional<int64_t> Op0 = getIfConst(I->getOperand(0));
+ Optional<int64_t> Op1 = getIfConst(I->getOperand(1));
+ if (!Op0 || !Op1)
+ return Optional<int64_t>{};
+ if (I->getOpcode() == Instruction::Add)
+ return Optional<int64_t>{Op0.getValue() + Op1.getValue()};
+ if (I->getOpcode() == Instruction::Mul)
+ return Optional<int64_t>{Op0.getValue() * Op1.getValue()};
+ }
+ return Optional<int64_t>{};
+}
+
+std::pair<Value *, int64_t>
+MVEGatherScatterLowering::getVarAndConst(Value *Inst, int TypeScale) {
+ std::pair<Value *, int64_t> ReturnFalse =
+ std::pair<Value *, int64_t>(nullptr, 0);
+ // At this point, the instruction we're looking at must be an add or we
+ // bail out
+ Instruction *Add = dyn_cast<Instruction>(Inst);
+ if (Add == nullptr || Add->getOpcode() != Instruction::Add)
+ return ReturnFalse;
+
+ Value *Summand;
+ Optional<int64_t> Const;
+ // Find out which operand the value that is increased is
+ if ((Const = getIfConst(Add->getOperand(0))))
+ Summand = Add->getOperand(1);
+ else if ((Const = getIfConst(Add->getOperand(1))))
+ Summand = Add->getOperand(0);
+ else
+ return ReturnFalse;
+
+ // Check that the constant is small enough for an incrementing gather
+ int64_t Immediate = Const.getValue() << TypeScale;
+ if (Immediate > 512 || Immediate < -512 || Immediate % 4 != 0)
+ return ReturnFalse;
+
+ return std::pair<Value *, int64_t>(Summand, Immediate);
+}
+
+Value *MVEGatherScatterLowering::lowerGather(IntrinsicInst *I) {
using namespace PatternMatch;
LLVM_DEBUG(dbgs() << "masked gathers: checking transform preconditions\n");
// @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
// Attempt to turn the masked gather in I into a MVE intrinsic
// Potentially optimising the addressing modes as we do so.
- Type *Ty = I->getType();
+ auto *Ty = cast<FixedVectorType>(I->getType());
Value *Ptr = I->getArgOperand(0);
- unsigned Alignment = cast<ConstantInt>(I->getArgOperand(1))->getZExtValue();
+ Align Alignment = cast<ConstantInt>(I->getArgOperand(1))->getAlignValue();
Value *Mask = I->getArgOperand(2);
Value *PassThru = I->getArgOperand(3);
- if (!isLegalTypeAndAlignment(Ty->getVectorNumElements(),
- Ty->getScalarSizeInBits(), Alignment))
- return false;
+ if (!isLegalTypeAndAlignment(Ty->getNumElements(), Ty->getScalarSizeInBits(),
+ Alignment))
+ return nullptr;
lookThroughBitcast(Ptr);
assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
IRBuilder<> Builder(I->getContext());
Builder.SetInsertPoint(I);
Builder.SetCurrentDebugLocation(I->getDebugLoc());
- Value *Load = tryCreateMaskedGatherOffset(I, Ptr, Builder);
+
+ Instruction *Root = I;
+ Value *Load = tryCreateMaskedGatherOffset(I, Ptr, Root, Builder);
if (!Load)
Load = tryCreateMaskedGatherBase(I, Ptr, Builder);
if (!Load)
- return false;
+ return nullptr;
if (!isa<UndefValue>(PassThru) && !match(PassThru, m_Zero())) {
LLVM_DEBUG(dbgs() << "masked gathers: found non-trivial passthru - "
@@ -206,72 +333,649 @@ bool MVEGatherScatterLowering::lowerGather(IntrinsicInst *I) {
Load = Builder.CreateSelect(Mask, Load, PassThru);
}
+ Root->replaceAllUsesWith(Load);
+ Root->eraseFromParent();
+ if (Root != I)
+ // If this was an extending gather, we need to get rid of the sext/zext
+ // sext/zext as well as of the gather itself
+ I->eraseFromParent();
+
LLVM_DEBUG(dbgs() << "masked gathers: successfully built masked gather\n");
- I->replaceAllUsesWith(Load);
- I->eraseFromParent();
- return true;
+ return Load;
}
-Value *MVEGatherScatterLowering::tryCreateMaskedGatherBase(
- IntrinsicInst *I, Value *Ptr, IRBuilder<> Builder) {
+Value *MVEGatherScatterLowering::tryCreateMaskedGatherBase(IntrinsicInst *I,
+ Value *Ptr,
+ IRBuilder<> &Builder,
+ int64_t Increment) {
using namespace PatternMatch;
+ auto *Ty = cast<FixedVectorType>(I->getType());
LLVM_DEBUG(dbgs() << "masked gathers: loading from vector of pointers\n");
- Type *Ty = I->getType();
- if (Ty->getVectorNumElements() != 4)
+ if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32)
// Can't build an intrinsic for this
return nullptr;
Value *Mask = I->getArgOperand(2);
if (match(Mask, m_One()))
return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base,
{Ty, Ptr->getType()},
- {Ptr, Builder.getInt32(0)});
+ {Ptr, Builder.getInt32(Increment)});
else
return Builder.CreateIntrinsic(
Intrinsic::arm_mve_vldr_gather_base_predicated,
{Ty, Ptr->getType(), Mask->getType()},
- {Ptr, Builder.getInt32(0), Mask});
+ {Ptr, Builder.getInt32(Increment), Mask});
+}
+
+Value *MVEGatherScatterLowering::tryCreateMaskedGatherBaseWB(
+ IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment) {
+ using namespace PatternMatch;
+ auto *Ty = cast<FixedVectorType>(I->getType());
+ LLVM_DEBUG(
+ dbgs()
+ << "masked gathers: loading from vector of pointers with writeback\n");
+ if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32)
+ // Can't build an intrinsic for this
+ return nullptr;
+ Value *Mask = I->getArgOperand(2);
+ if (match(Mask, m_One()))
+ return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base_wb,
+ {Ty, Ptr->getType()},
+ {Ptr, Builder.getInt32(Increment)});
+ else
+ return Builder.CreateIntrinsic(
+ Intrinsic::arm_mve_vldr_gather_base_wb_predicated,
+ {Ty, Ptr->getType(), Mask->getType()},
+ {Ptr, Builder.getInt32(Increment), Mask});
}
Value *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
- IntrinsicInst *I, Value *Ptr, IRBuilder<> Builder) {
+ IntrinsicInst *I, Value *Ptr, Instruction *&Root, IRBuilder<> &Builder) {
using namespace PatternMatch;
- Type *Ty = I->getType();
+
+ Type *OriginalTy = I->getType();
+ Type *ResultTy = OriginalTy;
+
+ unsigned Unsigned = 1;
+ // The size of the gather was already checked in isLegalTypeAndAlignment;
+ // if it was not a full vector width an appropriate extend should follow.
+ auto *Extend = Root;
+ if (OriginalTy->getPrimitiveSizeInBits() < 128) {
+ // Only transform gathers with exactly one use
+ if (!I->hasOneUse())
+ return nullptr;
+
+ // The correct root to replace is not the CallInst itself, but the
+ // instruction which extends it
+ Extend = cast<Instruction>(*I->users().begin());
+ if (isa<SExtInst>(Extend)) {
+ Unsigned = 0;
+ } else if (!isa<ZExtInst>(Extend)) {
+ LLVM_DEBUG(dbgs() << "masked gathers: extend needed but not provided. "
+ << "Expanding\n");
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << "masked gathers: found an extending gather\n");
+ ResultTy = Extend->getType();
+ // The final size of the gather must be a full vector width
+ if (ResultTy->getPrimitiveSizeInBits() != 128) {
+ LLVM_DEBUG(dbgs() << "masked gathers: extending from the wrong type. "
+ << "Expanding\n");
+ return nullptr;
+ }
+ }
+
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
Value *Offsets;
- Value *BasePtr = checkGEP(Offsets, Ty, Ptr, Builder);
+ Value *BasePtr = checkGEP(Offsets, ResultTy, GEP, Builder);
if (!BasePtr)
return nullptr;
+ // Check whether the offset is a constant increment that could be merged into
+ // a QI gather
+ Value *Load = tryCreateIncrementingGatScat(I, BasePtr, Offsets, GEP, Builder);
+ if (Load)
+ return Load;
- unsigned Scale;
- int GEPElemSize =
- BasePtr->getType()->getPointerElementType()->getPrimitiveSizeInBits();
- int ResultElemSize = Ty->getScalarSizeInBits();
- // This can be a 32bit load scaled by 4, a 16bit load scaled by 2, or a
- // 8bit, 16bit or 32bit load scaled by 1
- if (GEPElemSize == 32 && ResultElemSize == 32) {
- Scale = 2;
- } else if (GEPElemSize == 16 && ResultElemSize == 16) {
- Scale = 1;
- } else if (GEPElemSize == 8) {
- Scale = 0;
- } else {
- LLVM_DEBUG(dbgs() << "masked gathers: incorrect scale for load. Can't"
- << " create masked gather\n");
+ int Scale = computeScale(
+ BasePtr->getType()->getPointerElementType()->getPrimitiveSizeInBits(),
+ OriginalTy->getScalarSizeInBits());
+ if (Scale == -1)
return nullptr;
- }
+ Root = Extend;
Value *Mask = I->getArgOperand(2);
if (!match(Mask, m_One()))
return Builder.CreateIntrinsic(
Intrinsic::arm_mve_vldr_gather_offset_predicated,
- {Ty, BasePtr->getType(), Offsets->getType(), Mask->getType()},
- {BasePtr, Offsets, Builder.getInt32(Ty->getScalarSizeInBits()),
- Builder.getInt32(Scale), Builder.getInt32(1), Mask});
+ {ResultTy, BasePtr->getType(), Offsets->getType(), Mask->getType()},
+ {BasePtr, Offsets, Builder.getInt32(OriginalTy->getScalarSizeInBits()),
+ Builder.getInt32(Scale), Builder.getInt32(Unsigned), Mask});
else
return Builder.CreateIntrinsic(
Intrinsic::arm_mve_vldr_gather_offset,
- {Ty, BasePtr->getType(), Offsets->getType()},
- {BasePtr, Offsets, Builder.getInt32(Ty->getScalarSizeInBits()),
- Builder.getInt32(Scale), Builder.getInt32(1)});
+ {ResultTy, BasePtr->getType(), Offsets->getType()},
+ {BasePtr, Offsets, Builder.getInt32(OriginalTy->getScalarSizeInBits()),
+ Builder.getInt32(Scale), Builder.getInt32(Unsigned)});
+}
+
+Value *MVEGatherScatterLowering::lowerScatter(IntrinsicInst *I) {
+ using namespace PatternMatch;
+ LLVM_DEBUG(dbgs() << "masked scatters: checking transform preconditions\n");
+
+ // @llvm.masked.scatter.*(data, ptrs, alignment, mask)
+ // Attempt to turn the masked scatter in I into a MVE intrinsic
+ // Potentially optimising the addressing modes as we do so.
+ Value *Input = I->getArgOperand(0);
+ Value *Ptr = I->getArgOperand(1);
+ Align Alignment = cast<ConstantInt>(I->getArgOperand(2))->getAlignValue();
+ auto *Ty = cast<FixedVectorType>(Input->getType());
+
+ if (!isLegalTypeAndAlignment(Ty->getNumElements(), Ty->getScalarSizeInBits(),
+ Alignment))
+ return nullptr;
+
+ lookThroughBitcast(Ptr);
+ assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
+
+ IRBuilder<> Builder(I->getContext());
+ Builder.SetInsertPoint(I);
+ Builder.SetCurrentDebugLocation(I->getDebugLoc());
+
+ Value *Store = tryCreateMaskedScatterOffset(I, Ptr, Builder);
+ if (!Store)
+ Store = tryCreateMaskedScatterBase(I, Ptr, Builder);
+ if (!Store)
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "masked scatters: successfully built masked scatter\n");
+ I->eraseFromParent();
+ return Store;
+}
+
+Value *MVEGatherScatterLowering::tryCreateMaskedScatterBase(
+ IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment) {
+ using namespace PatternMatch;
+ Value *Input = I->getArgOperand(0);
+ auto *Ty = cast<FixedVectorType>(Input->getType());
+ // Only QR variants allow truncating
+ if (!(Ty->getNumElements() == 4 && Ty->getScalarSizeInBits() == 32)) {
+ // Can't build an intrinsic for this
+ return nullptr;
+ }
+ Value *Mask = I->getArgOperand(3);
+ // int_arm_mve_vstr_scatter_base(_predicated) addr, offset, data(, mask)
+ LLVM_DEBUG(dbgs() << "masked scatters: storing to a vector of pointers\n");
+ if (match(Mask, m_One()))
+ return Builder.CreateIntrinsic(Intrinsic::arm_mve_vstr_scatter_base,
+ {Ptr->getType(), Input->getType()},
+ {Ptr, Builder.getInt32(Increment), Input});
+ else
+ return Builder.CreateIntrinsic(
+ Intrinsic::arm_mve_vstr_scatter_base_predicated,
+ {Ptr->getType(), Input->getType(), Mask->getType()},
+ {Ptr, Builder.getInt32(Increment), Input, Mask});
+}
+
+Value *MVEGatherScatterLowering::tryCreateMaskedScatterBaseWB(
+ IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment) {
+ using namespace PatternMatch;
+ Value *Input = I->getArgOperand(0);
+ auto *Ty = cast<FixedVectorType>(Input->getType());
+ LLVM_DEBUG(
+ dbgs()
+ << "masked scatters: storing to a vector of pointers with writeback\n");
+ if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32)
+ // Can't build an intrinsic for this
+ return nullptr;
+ Value *Mask = I->getArgOperand(3);
+ if (match(Mask, m_One()))
+ return Builder.CreateIntrinsic(Intrinsic::arm_mve_vstr_scatter_base_wb,
+ {Ptr->getType(), Input->getType()},
+ {Ptr, Builder.getInt32(Increment), Input});
+ else
+ return Builder.CreateIntrinsic(
+ Intrinsic::arm_mve_vstr_scatter_base_wb_predicated,
+ {Ptr->getType(), Input->getType(), Mask->getType()},
+ {Ptr, Builder.getInt32(Increment), Input, Mask});
+}
+
+Value *MVEGatherScatterLowering::tryCreateMaskedScatterOffset(
+ IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder) {
+ using namespace PatternMatch;
+ Value *Input = I->getArgOperand(0);
+ Value *Mask = I->getArgOperand(3);
+ Type *InputTy = Input->getType();
+ Type *MemoryTy = InputTy;
+ LLVM_DEBUG(dbgs() << "masked scatters: getelementpointer found. Storing"
+ << " to base + vector of offsets\n");
+ // If the input has been truncated, try to integrate that trunc into the
+ // scatter instruction (we don't care about alignment here)
+ if (TruncInst *Trunc = dyn_cast<TruncInst>(Input)) {
+ Value *PreTrunc = Trunc->getOperand(0);
+ Type *PreTruncTy = PreTrunc->getType();
+ if (PreTruncTy->getPrimitiveSizeInBits() == 128) {
+ Input = PreTrunc;
+ InputTy = PreTruncTy;
+ }
+ }
+ if (InputTy->getPrimitiveSizeInBits() != 128) {
+ LLVM_DEBUG(
+ dbgs() << "masked scatters: cannot create scatters for non-standard"
+ << " input types. Expanding.\n");
+ return nullptr;
+ }
+
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ Value *Offsets;
+ Value *BasePtr = checkGEP(Offsets, InputTy, GEP, Builder);
+ if (!BasePtr)
+ return nullptr;
+ // Check whether the offset is a constant increment that could be merged into
+ // a QI gather
+ Value *Store =
+ tryCreateIncrementingGatScat(I, BasePtr, Offsets, GEP, Builder);
+ if (Store)
+ return Store;
+ int Scale = computeScale(
+ BasePtr->getType()->getPointerElementType()->getPrimitiveSizeInBits(),
+ MemoryTy->getScalarSizeInBits());
+ if (Scale == -1)
+ return nullptr;
+
+ if (!match(Mask, m_One()))
+ return Builder.CreateIntrinsic(
+ Intrinsic::arm_mve_vstr_scatter_offset_predicated,
+ {BasePtr->getType(), Offsets->getType(), Input->getType(),
+ Mask->getType()},
+ {BasePtr, Offsets, Input,
+ Builder.getInt32(MemoryTy->getScalarSizeInBits()),
+ Builder.getInt32(Scale), Mask});
+ else
+ return Builder.CreateIntrinsic(
+ Intrinsic::arm_mve_vstr_scatter_offset,
+ {BasePtr->getType(), Offsets->getType(), Input->getType()},
+ {BasePtr, Offsets, Input,
+ Builder.getInt32(MemoryTy->getScalarSizeInBits()),
+ Builder.getInt32(Scale)});
+}
+
+Value *MVEGatherScatterLowering::tryCreateIncrementingGatScat(
+ IntrinsicInst *I, Value *BasePtr, Value *Offsets, GetElementPtrInst *GEP,
+ IRBuilder<> &Builder) {
+ FixedVectorType *Ty;
+ if (I->getIntrinsicID() == Intrinsic::masked_gather)
+ Ty = cast<FixedVectorType>(I->getType());
+ else
+ Ty = cast<FixedVectorType>(I->getArgOperand(0)->getType());
+ // Incrementing gathers only exist for v4i32
+ if (Ty->getNumElements() != 4 ||
+ Ty->getScalarSizeInBits() != 32)
+ return nullptr;
+ Loop *L = LI->getLoopFor(I->getParent());
+ if (L == nullptr)
+ // Incrementing gathers are not beneficial outside of a loop
+ return nullptr;
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to build incrementing "
+ "wb gather/scatter\n");
+
+ // The gep was in charge of making sure the offsets are scaled correctly
+ // - calculate that factor so it can be applied by hand
+ DataLayout DT = I->getParent()->getParent()->getParent()->getDataLayout();
+ int TypeScale =
+ computeScale(DT.getTypeSizeInBits(GEP->getOperand(0)->getType()),
+ DT.getTypeSizeInBits(GEP->getType()) /
+ cast<FixedVectorType>(GEP->getType())->getNumElements());
+ if (TypeScale == -1)
+ return nullptr;
+
+ if (GEP->hasOneUse()) {
+ // Only in this case do we want to build a wb gather, because the wb will
+ // change the phi which does affect other users of the gep (which will still
+ // be using the phi in the old way)
+ Value *Load =
+ tryCreateIncrementingWBGatScat(I, BasePtr, Offsets, TypeScale, Builder);
+ if (Load != nullptr)
+ return Load;
+ }
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to build incrementing "
+ "non-wb gather/scatter\n");
+
+ std::pair<Value *, int64_t> Add = getVarAndConst(Offsets, TypeScale);
+ if (Add.first == nullptr)
+ return nullptr;
+ Value *OffsetsIncoming = Add.first;
+ int64_t Immediate = Add.second;
+
+ // Make sure the offsets are scaled correctly
+ Instruction *ScaledOffsets = BinaryOperator::Create(
+ Instruction::Shl, OffsetsIncoming,
+ Builder.CreateVectorSplat(Ty->getNumElements(), Builder.getInt32(TypeScale)),
+ "ScaledIndex", I);
+ // Add the base to the offsets
+ OffsetsIncoming = BinaryOperator::Create(
+ Instruction::Add, ScaledOffsets,
+ Builder.CreateVectorSplat(
+ Ty->getNumElements(),
+ Builder.CreatePtrToInt(
+ BasePtr,
+ cast<VectorType>(ScaledOffsets->getType())->getElementType())),
+ "StartIndex", I);
+
+ if (I->getIntrinsicID() == Intrinsic::masked_gather)
+ return cast<IntrinsicInst>(
+ tryCreateMaskedGatherBase(I, OffsetsIncoming, Builder, Immediate));
+ else
+ return cast<IntrinsicInst>(
+ tryCreateMaskedScatterBase(I, OffsetsIncoming, Builder, Immediate));
+}
+
+Value *MVEGatherScatterLowering::tryCreateIncrementingWBGatScat(
+ IntrinsicInst *I, Value *BasePtr, Value *Offsets, unsigned TypeScale,
+ IRBuilder<> &Builder) {
+ // Check whether this gather's offset is incremented by a constant - if so,
+ // and the load is of the right type, we can merge this into a QI gather
+ Loop *L = LI->getLoopFor(I->getParent());
+ // Offsets that are worth merging into this instruction will be incremented
+ // by a constant, thus we're looking for an add of a phi and a constant
+ PHINode *Phi = dyn_cast<PHINode>(Offsets);
+ if (Phi == nullptr || Phi->getNumIncomingValues() != 2 ||
+ Phi->getParent() != L->getHeader() || Phi->getNumUses() != 2)
+ // No phi means no IV to write back to; if there is a phi, we expect it
+ // to have exactly two incoming values; the only phis we are interested in
+ // will be loop IV's and have exactly two uses, one in their increment and
+ // one in the gather's gep
+ return nullptr;
+
+ unsigned IncrementIndex =
+ Phi->getIncomingBlock(0) == L->getLoopLatch() ? 0 : 1;
+ // Look through the phi to the phi increment
+ Offsets = Phi->getIncomingValue(IncrementIndex);
+
+ std::pair<Value *, int64_t> Add = getVarAndConst(Offsets, TypeScale);
+ if (Add.first == nullptr)
+ return nullptr;
+ Value *OffsetsIncoming = Add.first;
+ int64_t Immediate = Add.second;
+ if (OffsetsIncoming != Phi)
+ // Then the increment we are looking at is not an increment of the
+ // induction variable, and we don't want to do a writeback
+ return nullptr;
+
+ Builder.SetInsertPoint(&Phi->getIncomingBlock(1 - IncrementIndex)->back());
+ unsigned NumElems =
+ cast<FixedVectorType>(OffsetsIncoming->getType())->getNumElements();
+
+ // Make sure the offsets are scaled correctly
+ Instruction *ScaledOffsets = BinaryOperator::Create(
+ Instruction::Shl, Phi->getIncomingValue(1 - IncrementIndex),
+ Builder.CreateVectorSplat(NumElems, Builder.getInt32(TypeScale)),
+ "ScaledIndex", &Phi->getIncomingBlock(1 - IncrementIndex)->back());
+ // Add the base to the offsets
+ OffsetsIncoming = BinaryOperator::Create(
+ Instruction::Add, ScaledOffsets,
+ Builder.CreateVectorSplat(
+ NumElems,
+ Builder.CreatePtrToInt(
+ BasePtr,
+ cast<VectorType>(ScaledOffsets->getType())->getElementType())),
+ "StartIndex", &Phi->getIncomingBlock(1 - IncrementIndex)->back());
+ // The gather is pre-incrementing
+ OffsetsIncoming = BinaryOperator::Create(
+ Instruction::Sub, OffsetsIncoming,
+ Builder.CreateVectorSplat(NumElems, Builder.getInt32(Immediate)),
+ "PreIncrementStartIndex",
+ &Phi->getIncomingBlock(1 - IncrementIndex)->back());
+ Phi->setIncomingValue(1 - IncrementIndex, OffsetsIncoming);
+
+ Builder.SetInsertPoint(I);
+
+ Value *EndResult;
+ Value *NewInduction;
+ if (I->getIntrinsicID() == Intrinsic::masked_gather) {
+ // Build the incrementing gather
+ Value *Load = tryCreateMaskedGatherBaseWB(I, Phi, Builder, Immediate);
+ // One value to be handed to whoever uses the gather, one is the loop
+ // increment
+ EndResult = Builder.CreateExtractValue(Load, 0, "Gather");
+ NewInduction = Builder.CreateExtractValue(Load, 1, "GatherIncrement");
+ } else {
+ // Build the incrementing scatter
+ NewInduction = tryCreateMaskedScatterBaseWB(I, Phi, Builder, Immediate);
+ EndResult = NewInduction;
+ }
+ Instruction *AddInst = cast<Instruction>(Offsets);
+ AddInst->replaceAllUsesWith(NewInduction);
+ AddInst->eraseFromParent();
+ Phi->setIncomingValue(IncrementIndex, NewInduction);
+
+ return EndResult;
+}
+
+void MVEGatherScatterLowering::pushOutAdd(PHINode *&Phi,
+ Value *OffsSecondOperand,
+ unsigned StartIndex) {
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: optimising add instruction\n");
+ Instruction *InsertionPoint =
+ &cast<Instruction>(Phi->getIncomingBlock(StartIndex)->back());
+ // Initialize the phi with a vector that contains a sum of the constants
+ Instruction *NewIndex = BinaryOperator::Create(
+ Instruction::Add, Phi->getIncomingValue(StartIndex), OffsSecondOperand,
+ "PushedOutAdd", InsertionPoint);
+ unsigned IncrementIndex = StartIndex == 0 ? 1 : 0;
+
+ // Order such that start index comes first (this reduces mov's)
+ Phi->addIncoming(NewIndex, Phi->getIncomingBlock(StartIndex));
+ Phi->addIncoming(Phi->getIncomingValue(IncrementIndex),
+ Phi->getIncomingBlock(IncrementIndex));
+ Phi->removeIncomingValue(IncrementIndex);
+ Phi->removeIncomingValue(StartIndex);
+}
+
+void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi,
+ Value *IncrementPerRound,
+ Value *OffsSecondOperand,
+ unsigned LoopIncrement,
+ IRBuilder<> &Builder) {
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: optimising mul instruction\n");
+
+ // Create a new scalar add outside of the loop and transform it to a splat
+ // by which loop variable can be incremented
+ Instruction *InsertionPoint = &cast<Instruction>(
+ Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1)->back());
+
+ // Create a new index
+ Value *StartIndex = BinaryOperator::Create(
+ Instruction::Mul, Phi->getIncomingValue(LoopIncrement == 1 ? 0 : 1),
+ OffsSecondOperand, "PushedOutMul", InsertionPoint);
+
+ Instruction *Product =
+ BinaryOperator::Create(Instruction::Mul, IncrementPerRound,
+ OffsSecondOperand, "Product", InsertionPoint);
+ // Increment NewIndex by Product instead of the multiplication
+ Instruction *NewIncrement = BinaryOperator::Create(
+ Instruction::Add, Phi, Product, "IncrementPushedOutMul",
+ cast<Instruction>(Phi->getIncomingBlock(LoopIncrement)->back())
+ .getPrevNode());
+
+ Phi->addIncoming(StartIndex,
+ Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1));
+ Phi->addIncoming(NewIncrement, Phi->getIncomingBlock(LoopIncrement));
+ Phi->removeIncomingValue((unsigned)0);
+ Phi->removeIncomingValue((unsigned)0);
+ return;
+}
+
+// Check whether all usages of this instruction are as offsets of
+// gathers/scatters or simple arithmetics only used by gathers/scatters
+static bool hasAllGatScatUsers(Instruction *I) {
+ if (I->hasNUses(0)) {
+ return false;
+ }
+ bool Gatscat = true;
+ for (User *U : I->users()) {
+ if (!isa<Instruction>(U))
+ return false;
+ if (isa<GetElementPtrInst>(U) ||
+ isGatherScatter(dyn_cast<IntrinsicInst>(U))) {
+ return Gatscat;
+ } else {
+ unsigned OpCode = cast<Instruction>(U)->getOpcode();
+ if ((OpCode == Instruction::Add || OpCode == Instruction::Mul) &&
+ hasAllGatScatUsers(cast<Instruction>(U))) {
+ continue;
+ }
+ return false;
+ }
+ }
+ return Gatscat;
+}
+
+bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
+ LoopInfo *LI) {
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to optimize\n");
+ // Optimise the addresses of gathers/scatters by moving invariant
+ // calculations out of the loop
+ if (!isa<Instruction>(Offsets))
+ return false;
+ Instruction *Offs = cast<Instruction>(Offsets);
+ if (Offs->getOpcode() != Instruction::Add &&
+ Offs->getOpcode() != Instruction::Mul)
+ return false;
+ Loop *L = LI->getLoopFor(BB);
+ if (L == nullptr)
+ return false;
+ if (!Offs->hasOneUse()) {
+ if (!hasAllGatScatUsers(Offs))
+ return false;
+ }
+
+ // Find out which, if any, operand of the instruction
+ // is a phi node
+ PHINode *Phi;
+ int OffsSecondOp;
+ if (isa<PHINode>(Offs->getOperand(0))) {
+ Phi = cast<PHINode>(Offs->getOperand(0));
+ OffsSecondOp = 1;
+ } else if (isa<PHINode>(Offs->getOperand(1))) {
+ Phi = cast<PHINode>(Offs->getOperand(1));
+ OffsSecondOp = 0;
+ } else {
+ bool Changed = true;
+ if (isa<Instruction>(Offs->getOperand(0)) &&
+ L->contains(cast<Instruction>(Offs->getOperand(0))))
+ Changed |= optimiseOffsets(Offs->getOperand(0), BB, LI);
+ if (isa<Instruction>(Offs->getOperand(1)) &&
+ L->contains(cast<Instruction>(Offs->getOperand(1))))
+ Changed |= optimiseOffsets(Offs->getOperand(1), BB, LI);
+ if (!Changed) {
+ return false;
+ } else {
+ if (isa<PHINode>(Offs->getOperand(0))) {
+ Phi = cast<PHINode>(Offs->getOperand(0));
+ OffsSecondOp = 1;
+ } else if (isa<PHINode>(Offs->getOperand(1))) {
+ Phi = cast<PHINode>(Offs->getOperand(1));
+ OffsSecondOp = 0;
+ } else {
+ return false;
+ }
+ }
+ }
+ // A phi node we want to perform this function on should be from the
+ // loop header, and shouldn't have more than 2 incoming values
+ if (Phi->getParent() != L->getHeader() ||
+ Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // The phi must be an induction variable
+ Instruction *Op;
+ int IncrementingBlock = -1;
+
+ for (int i = 0; i < 2; i++)
+ if ((Op = dyn_cast<Instruction>(Phi->getIncomingValue(i))) != nullptr)
+ if (Op->getOpcode() == Instruction::Add &&
+ (Op->getOperand(0) == Phi || Op->getOperand(1) == Phi))
+ IncrementingBlock = i;
+ if (IncrementingBlock == -1)
+ return false;
+
+ Instruction *IncInstruction =
+ cast<Instruction>(Phi->getIncomingValue(IncrementingBlock));
+
+ // If the phi is not used by anything else, we can just adapt it when
+ // replacing the instruction; if it is, we'll have to duplicate it
+ PHINode *NewPhi;
+ Value *IncrementPerRound = IncInstruction->getOperand(
+ (IncInstruction->getOperand(0) == Phi) ? 1 : 0);
+
+ // Get the value that is added to/multiplied with the phi
+ Value *OffsSecondOperand = Offs->getOperand(OffsSecondOp);
+
+ if (IncrementPerRound->getType() != OffsSecondOperand->getType())
+ // Something has gone wrong, abort
+ return false;
+
+ // Only proceed if the increment per round is a constant or an instruction
+ // which does not originate from within the loop
+ if (!isa<Constant>(IncrementPerRound) &&
+ !(isa<Instruction>(IncrementPerRound) &&
+ !L->contains(cast<Instruction>(IncrementPerRound))))
+ return false;
+
+ if (Phi->getNumUses() == 2) {
+ // No other users -> reuse existing phi (One user is the instruction
+ // we're looking at, the other is the phi increment)
+ if (IncInstruction->getNumUses() != 1) {
+ // If the incrementing instruction does have more users than
+ // our phi, we need to copy it
+ IncInstruction = BinaryOperator::Create(
+ Instruction::BinaryOps(IncInstruction->getOpcode()), Phi,
+ IncrementPerRound, "LoopIncrement", IncInstruction);
+ Phi->setIncomingValue(IncrementingBlock, IncInstruction);
+ }
+ NewPhi = Phi;
+ } else {
+ // There are other users -> create a new phi
+ NewPhi = PHINode::Create(Phi->getType(), 0, "NewPhi", Phi);
+ std::vector<Value *> Increases;
+ // Copy the incoming values of the old phi
+ NewPhi->addIncoming(Phi->getIncomingValue(IncrementingBlock == 1 ? 0 : 1),
+ Phi->getIncomingBlock(IncrementingBlock == 1 ? 0 : 1));
+ IncInstruction = BinaryOperator::Create(
+ Instruction::BinaryOps(IncInstruction->getOpcode()), NewPhi,
+ IncrementPerRound, "LoopIncrement", IncInstruction);
+ NewPhi->addIncoming(IncInstruction,
+ Phi->getIncomingBlock(IncrementingBlock));
+ IncrementingBlock = 1;
+ }
+
+ IRBuilder<> Builder(BB->getContext());
+ Builder.SetInsertPoint(Phi);
+ Builder.SetCurrentDebugLocation(Offs->getDebugLoc());
+
+ switch (Offs->getOpcode()) {
+ case Instruction::Add:
+ pushOutAdd(NewPhi, OffsSecondOperand, IncrementingBlock == 1 ? 0 : 1);
+ break;
+ case Instruction::Mul:
+ pushOutMul(NewPhi, IncrementPerRound, OffsSecondOperand, IncrementingBlock,
+ Builder);
+ break;
+ default:
+ return false;
+ }
+ LLVM_DEBUG(
+ dbgs() << "masked gathers/scatters: simplified loop variable add/mul\n");
+
+ // The instruction has now been "absorbed" into the phi value
+ Offs->replaceAllUsesWith(NewPhi);
+ if (Offs->hasNUses(0))
+ Offs->eraseFromParent();
+ // Clean up the old increment in case it's unused because we built a new
+ // one
+ if (IncInstruction->hasNUses(0))
+ IncInstruction->eraseFromParent();
+
+ return true;
}
bool MVEGatherScatterLowering::runOnFunction(Function &F) {
@@ -282,20 +986,51 @@ bool MVEGatherScatterLowering::runOnFunction(Function &F) {
auto *ST = &TM.getSubtarget<ARMSubtarget>(F);
if (!ST->hasMVEIntegerOps())
return false;
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SmallVector<IntrinsicInst *, 4> Gathers;
+ SmallVector<IntrinsicInst *, 4> Scatters;
+
+ bool Changed = false;
+
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
- if (II && II->getIntrinsicID() == Intrinsic::masked_gather)
+ if (II && II->getIntrinsicID() == Intrinsic::masked_gather) {
Gathers.push_back(II);
+ if (isa<GetElementPtrInst>(II->getArgOperand(0)))
+ Changed |= optimiseOffsets(
+ cast<Instruction>(II->getArgOperand(0))->getOperand(1),
+ II->getParent(), LI);
+ } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter) {
+ Scatters.push_back(II);
+ if (isa<GetElementPtrInst>(II->getArgOperand(1)))
+ Changed |= optimiseOffsets(
+ cast<Instruction>(II->getArgOperand(1))->getOperand(1),
+ II->getParent(), LI);
+ }
}
}
- if (Gathers.empty())
- return false;
+ for (unsigned i = 0; i < Gathers.size(); i++) {
+ IntrinsicInst *I = Gathers[i];
+ Value *L = lowerGather(I);
+ if (L == nullptr)
+ continue;
- for (IntrinsicInst *I : Gathers)
- lowerGather(I);
+ // Get rid of any now dead instructions
+ SimplifyInstructionsInBlock(cast<Instruction>(L)->getParent());
+ Changed = true;
+ }
- return true;
+ for (unsigned i = 0; i < Scatters.size(); i++) {
+ IntrinsicInst *I = Scatters[i];
+ Value *S = lowerScatter(I);
+ if (S == nullptr)
+ continue;
+
+ // Get rid of any now dead instructions
+ SimplifyInstructionsInBlock(cast<Instruction>(S)->getParent());
+ Changed = true;
+ }
+ return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
index 038c68739cdf..5bf3522ab2e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -1,4 +1,4 @@
-//===- MVETailPredication.cpp - MVE Tail Predication ----------------------===//
+//===- MVETailPredication.cpp - MVE Tail Predication ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,8 +8,17 @@
//
/// \file
/// Armv8.1m introduced MVE, M-Profile Vector Extension, and low-overhead
-/// branches to help accelerate DSP applications. These two extensions can be
-/// combined to provide implicit vector predication within a low-overhead loop.
+/// branches to help accelerate DSP applications. These two extensions,
+/// combined with a new form of predication called tail-predication, can be used
+/// to provide implicit vector predication within a low-overhead loop.
+/// This is implicit because the predicate of active/inactive lanes is
+/// calculated by hardware, and thus does not need to be explicitly passed
+/// to vector instructions. The instructions responsible for this are the
+/// DLSTP and WLSTP instructions, which setup a tail-predicated loop and the
+/// the total number of data elements processed by the loop. The loop-end
+/// LETP instruction is responsible for decrementing and setting the remaining
+/// elements to be processed and generating the mask of active lanes.
+///
/// The HardwareLoops pass inserts intrinsics identifying loops that the
/// backend will attempt to convert into a low-overhead loop. The vectorizer is
/// responsible for generating a vectorized loop in which the lanes are
@@ -21,36 +30,62 @@
/// - A loop containing multiple VCPT instructions, predicating multiple VPT
/// blocks of instructions operating on different vector types.
///
-/// This pass inserts the inserts the VCTP intrinsic to represent the effect of
-/// tail predication. This will be picked up by the ARM Low-overhead loop pass,
-/// which performs the final transformation to a DLSTP or WLSTP tail-predicated
-/// loop.
+/// This pass:
+/// 1) Checks if the predicates of the masked load/store instructions are
+/// generated by intrinsic @llvm.get.active.lanes(). This intrinsic consumes
+/// the Backedge Taken Count (BTC) of the scalar loop as its second argument,
+/// which we extract to set up the number of elements processed by the loop.
+/// 2) Intrinsic @llvm.get.active.lanes() is then replaced by the MVE target
+/// specific VCTP intrinsic to represent the effect of tail predication.
+/// This will be picked up by the ARM Low-overhead loop pass, which performs
+/// the final transformation to a DLSTP or WLSTP tail-predicated loop.
#include "ARM.h"
#include "ARMSubtarget.h"
+#include "ARMTargetTransformInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
using namespace llvm;
#define DEBUG_TYPE "mve-tail-predication"
#define DESC "Transform predicated vector loops to use MVE tail predication"
-cl::opt<bool>
-DisableTailPredication("disable-mve-tail-predication", cl::Hidden,
- cl::init(true),
- cl::desc("Disable MVE Tail Predication"));
+cl::opt<TailPredication::Mode> EnableTailPredication(
+ "tail-predication", cl::desc("MVE tail-predication options"),
+ cl::init(TailPredication::Disabled),
+ cl::values(clEnumValN(TailPredication::Disabled, "disabled",
+ "Don't tail-predicate loops"),
+ clEnumValN(TailPredication::EnabledNoReductions,
+ "enabled-no-reductions",
+ "Enable tail-predication, but not for reduction loops"),
+ clEnumValN(TailPredication::Enabled,
+ "enabled",
+ "Enable tail-predication, including reduction loops"),
+ clEnumValN(TailPredication::ForceEnabledNoReductions,
+ "force-enabled-no-reductions",
+ "Enable tail-predication, but not for reduction loops, "
+ "and force this which might be unsafe"),
+ clEnumValN(TailPredication::ForceEnabled,
+ "force-enabled",
+ "Enable tail-predication, including reduction loops, "
+ "and force this which might be unsafe")));
+
+
namespace {
class MVETailPredication : public LoopPass {
@@ -58,6 +93,7 @@ class MVETailPredication : public LoopPass {
Loop *L = nullptr;
ScalarEvolution *SE = nullptr;
TargetTransformInfo *TTI = nullptr;
+ const ARMSubtarget *ST = nullptr;
public:
static char ID;
@@ -76,7 +112,6 @@ public:
bool runOnLoop(Loop *L, LPPassManager&) override;
private:
-
/// Perform the relevant checks on the loop and convert if possible.
bool TryConvert(Value *TripCount);
@@ -84,19 +119,21 @@ private:
/// load/stores.
bool IsPredicatedVectorLoop();
- /// Compute a value for the total number of elements that the predicated
- /// loop will process.
- Value *ComputeElements(Value *TripCount, VectorType *VecTy);
-
- /// Is the icmp that generates an i1 vector, based upon a loop counter
- /// and a limit that is defined outside the loop.
- bool isTailPredicate(Instruction *Predicate, Value *NumElements);
+ /// Perform checks on the arguments of @llvm.get.active.lane.mask
+ /// intrinsic: check if the first is a loop induction variable, and for the
+ /// the second check that no overflow can occur in the expression that use
+ /// this backedge-taken count.
+ bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount,
+ FixedVectorType *VecTy);
/// Insert the intrinsic to represent the effect of tail predication.
- void InsertVCTPIntrinsic(Instruction *Predicate,
- DenseMap<Instruction*, Instruction*> &NewPredicates,
- VectorType *VecTy,
- Value *NumElements);
+ void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount,
+ FixedVectorType *VecTy);
+
+ /// Rematerialize the iteration count in exit blocks, which enables
+ /// ARMLowOverheadLoops to better optimise away loop update statements inside
+ /// hardware-loops.
+ void RematerializeIterCount();
};
} // end namespace
@@ -121,13 +158,14 @@ static bool IsMasked(Instruction *I) {
}
bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
- if (skipLoop(L) || DisableTailPredication)
+ if (skipLoop(L) || !EnableTailPredication)
return false;
+ MaskedInsts.clear();
Function &F = *L->getHeader()->getParent();
auto &TPC = getAnalysis<TargetPassConfig>();
auto &TM = TPC.getTM<TargetMachine>();
- auto *ST = &TM.getSubtarget<ARMSubtarget>(F);
+ ST = &TM.getSubtarget<ARMSubtarget>(F);
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
this->L = L;
@@ -185,125 +223,59 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n"
<< *Decrement << "\n");
- return TryConvert(Setup->getArgOperand(0));
-}
-bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) {
- // Look for the following:
-
- // %trip.count.minus.1 = add i32 %N, -1
- // %broadcast.splatinsert10 = insertelement <4 x i32> undef,
- // i32 %trip.count.minus.1, i32 0
- // %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10,
- // <4 x i32> undef,
- // <4 x i32> zeroinitializer
- // ...
- // ...
- // %index = phi i32
- // %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
- // %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert,
- // <4 x i32> undef,
- // <4 x i32> zeroinitializer
- // %induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
- // %pred = icmp ule <4 x i32> %induction, %broadcast.splat11
-
- // And return whether V == %pred.
-
- using namespace PatternMatch;
-
- CmpInst::Predicate Pred;
- Instruction *Shuffle = nullptr;
- Instruction *Induction = nullptr;
-
- // The vector icmp
- if (!match(I, m_ICmp(Pred, m_Instruction(Induction),
- m_Instruction(Shuffle))) ||
- Pred != ICmpInst::ICMP_ULE)
- return false;
-
- // First find the stuff outside the loop which is setting up the limit
- // vector....
- // The invariant shuffle that broadcast the limit into a vector.
- Instruction *Insert = nullptr;
- if (!match(Shuffle, m_ShuffleVector(m_Instruction(Insert), m_Undef(),
- m_Zero())))
- return false;
-
- // Insert the limit into a vector.
- Instruction *BECount = nullptr;
- if (!match(Insert, m_InsertElement(m_Undef(), m_Instruction(BECount),
- m_Zero())))
- return false;
-
- // The limit calculation, backedge count.
- Value *TripCount = nullptr;
- if (!match(BECount, m_Add(m_Value(TripCount), m_AllOnes())))
- return false;
-
- if (TripCount != NumElements || !L->isLoopInvariant(BECount))
- return false;
-
- // Now back to searching inside the loop body...
- // Find the add with takes the index iv and adds a constant vector to it.
- Instruction *BroadcastSplat = nullptr;
- Constant *Const = nullptr;
- if (!match(Induction, m_Add(m_Instruction(BroadcastSplat),
- m_Constant(Const))))
- return false;
-
- // Check that we're adding <0, 1, 2, 3...
- if (auto *CDS = dyn_cast<ConstantDataSequential>(Const)) {
- for (unsigned i = 0; i < CDS->getNumElements(); ++i) {
- if (CDS->getElementAsInteger(i) != i)
- return false;
- }
- } else
- return false;
-
- // The shuffle which broadcasts the index iv into a vector.
- if (!match(BroadcastSplat, m_ShuffleVector(m_Instruction(Insert), m_Undef(),
- m_Zero())))
- return false;
-
- // The insert element which initialises a vector with the index iv.
- Instruction *IV = nullptr;
- if (!match(Insert, m_InsertElement(m_Undef(), m_Instruction(IV), m_Zero())))
- return false;
-
- // The index iv.
- auto *Phi = dyn_cast<PHINode>(IV);
- if (!Phi)
- return false;
-
- // TODO: Don't think we need to check the entry value.
- Value *OnEntry = Phi->getIncomingValueForBlock(L->getLoopPreheader());
- if (!match(OnEntry, m_Zero()))
- return false;
-
- Value *InLoop = Phi->getIncomingValueForBlock(L->getLoopLatch());
- unsigned Lanes = cast<VectorType>(Insert->getType())->getNumElements();
-
- Instruction *LHS = nullptr;
- if (!match(InLoop, m_Add(m_Instruction(LHS), m_SpecificInt(Lanes))))
+ if (!TryConvert(Setup->getArgOperand(0))) {
+ LLVM_DEBUG(dbgs() << "ARM TP: Can't tail-predicate this loop.\n");
return false;
+ }
- return LHS == Phi;
+ return true;
}
-static VectorType* getVectorType(IntrinsicInst *I) {
+static FixedVectorType *getVectorType(IntrinsicInst *I) {
unsigned TypeOp = I->getIntrinsicID() == Intrinsic::masked_load ? 0 : 1;
auto *PtrTy = cast<PointerType>(I->getOperand(TypeOp)->getType());
- return cast<VectorType>(PtrTy->getElementType());
+ auto *VecTy = cast<FixedVectorType>(PtrTy->getElementType());
+ assert(VecTy && "No scalable vectors expected here");
+ return VecTy;
}
bool MVETailPredication::IsPredicatedVectorLoop() {
// Check that the loop contains at least one masked load/store intrinsic.
// We only support 'normal' vector instructions - other than masked
// load/stores.
+ bool ActiveLaneMask = false;
for (auto *BB : L->getBlocks()) {
for (auto &I : *BB) {
+ auto *Int = dyn_cast<IntrinsicInst>(&I);
+ if (!Int)
+ continue;
+
+ switch (Int->getIntrinsicID()) {
+ case Intrinsic::get_active_lane_mask:
+ ActiveLaneMask = true;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::sadd_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::usub_sat:
+ continue;
+ case Intrinsic::fma:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::round:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::fabs:
+ if (ST->hasMVEFloatOps())
+ continue;
+ LLVM_FALLTHROUGH;
+ default:
+ break;
+ }
+
if (IsMasked(&I)) {
- VectorType *VecTy = getVectorType(cast<IntrinsicInst>(&I));
+ auto *VecTy = getVectorType(Int);
unsigned Lanes = VecTy->getNumElements();
unsigned ElementWidth = VecTy->getScalarSizeInBits();
// MVE vectors are 128-bit, but don't support 128 x i1.
@@ -312,94 +284,23 @@ bool MVETailPredication::IsPredicatedVectorLoop() {
if (Lanes * ElementWidth > MaxWidth || Lanes == MaxWidth)
return false;
MaskedInsts.push_back(cast<IntrinsicInst>(&I));
- } else if (auto *Int = dyn_cast<IntrinsicInst>(&I)) {
- for (auto &U : Int->args()) {
- if (isa<VectorType>(U->getType()))
- return false;
- }
+ continue;
+ }
+
+ for (const Use &U : Int->args()) {
+ if (isa<VectorType>(U->getType()))
+ return false;
}
}
}
+ if (!ActiveLaneMask) {
+ LLVM_DEBUG(dbgs() << "ARM TP: No get.active.lane.mask intrinsic found.\n");
+ return false;
+ }
return !MaskedInsts.empty();
}
-Value* MVETailPredication::ComputeElements(Value *TripCount,
- VectorType *VecTy) {
- const SCEV *TripCountSE = SE->getSCEV(TripCount);
- ConstantInt *VF = ConstantInt::get(cast<IntegerType>(TripCount->getType()),
- VecTy->getNumElements());
-
- if (VF->equalsInt(1))
- return nullptr;
-
- // TODO: Support constant trip counts.
- auto VisitAdd = [&](const SCEVAddExpr *S) -> const SCEVMulExpr* {
- if (auto *Const = dyn_cast<SCEVConstant>(S->getOperand(0))) {
- if (Const->getAPInt() != -VF->getValue())
- return nullptr;
- } else
- return nullptr;
- return dyn_cast<SCEVMulExpr>(S->getOperand(1));
- };
-
- auto VisitMul = [&](const SCEVMulExpr *S) -> const SCEVUDivExpr* {
- if (auto *Const = dyn_cast<SCEVConstant>(S->getOperand(0))) {
- if (Const->getValue() != VF)
- return nullptr;
- } else
- return nullptr;
- return dyn_cast<SCEVUDivExpr>(S->getOperand(1));
- };
-
- auto VisitDiv = [&](const SCEVUDivExpr *S) -> const SCEV* {
- if (auto *Const = dyn_cast<SCEVConstant>(S->getRHS())) {
- if (Const->getValue() != VF)
- return nullptr;
- } else
- return nullptr;
-
- if (auto *RoundUp = dyn_cast<SCEVAddExpr>(S->getLHS())) {
- if (auto *Const = dyn_cast<SCEVConstant>(RoundUp->getOperand(0))) {
- if (Const->getAPInt() != (VF->getValue() - 1))
- return nullptr;
- } else
- return nullptr;
-
- return RoundUp->getOperand(1);
- }
- return nullptr;
- };
-
- // TODO: Can we use SCEV helpers, such as findArrayDimensions, and friends to
- // determine the numbers of elements instead? Looks like this is what is used
- // for delinearization, but I'm not sure if it can be applied to the
- // vectorized form - at least not without a bit more work than I feel
- // comfortable with.
-
- // Search for Elems in the following SCEV:
- // (1 + ((-VF + (VF * (((VF - 1) + %Elems) /u VF))<nuw>) /u VF))<nuw><nsw>
- const SCEV *Elems = nullptr;
- if (auto *TC = dyn_cast<SCEVAddExpr>(TripCountSE))
- if (auto *Div = dyn_cast<SCEVUDivExpr>(TC->getOperand(1)))
- if (auto *Add = dyn_cast<SCEVAddExpr>(Div->getLHS()))
- if (auto *Mul = VisitAdd(Add))
- if (auto *Div = VisitMul(Mul))
- if (auto *Res = VisitDiv(Div))
- Elems = Res;
-
- if (!Elems)
- return nullptr;
-
- Instruction *InsertPt = L->getLoopPreheader()->getTerminator();
- if (!isSafeToExpandAt(Elems, InsertPt, *SE))
- return nullptr;
-
- auto DL = L->getHeader()->getModule()->getDataLayout();
- SCEVExpander Expander(*SE, DL, "elements");
- return Expander.expandCodeFor(Elems, Elems->getType(), InsertPt);
-}
-
// Look through the exit block to see whether there's a duplicate predicate
// instruction. This can happen when we need to perform a select on values
// from the last and previous iteration. Instead of doing a straight
@@ -407,31 +308,13 @@ Value* MVETailPredication::ComputeElements(Value *TripCount,
// in the block. This means that the VPR doesn't have to be live into the
// exit block which should make it easier to convert this loop into a proper
// tail predicated loop.
-static void Cleanup(DenseMap<Instruction*, Instruction*> &NewPredicates,
- SetVector<Instruction*> &MaybeDead, Loop *L) {
+static void Cleanup(SetVector<Instruction*> &MaybeDead, Loop *L) {
BasicBlock *Exit = L->getUniqueExitBlock();
if (!Exit) {
LLVM_DEBUG(dbgs() << "ARM TP: can't find loop exit block\n");
return;
}
- for (auto &Pair : NewPredicates) {
- Instruction *OldPred = Pair.first;
- Instruction *NewPred = Pair.second;
-
- for (auto &I : *Exit) {
- if (I.isSameOperationAs(OldPred)) {
- Instruction *PredClone = NewPred->clone();
- PredClone->insertBefore(&I);
- I.replaceAllUsesWith(PredClone);
- MaybeDead.insert(&I);
- LLVM_DEBUG(dbgs() << "ARM TP: replacing: "; I.dump();
- dbgs() << "ARM TP: with: "; PredClone->dump());
- break;
- }
- }
- }
-
// Drop references and add operands to check for dead.
SmallPtrSet<Instruction*, 4> Dead;
while (!MaybeDead.empty()) {
@@ -440,11 +323,10 @@ static void Cleanup(DenseMap<Instruction*, Instruction*> &NewPredicates,
if (I->hasNUsesOrMore(1))
continue;
- for (auto &U : I->operands()) {
+ for (auto &U : I->operands())
if (auto *OpI = dyn_cast<Instruction>(U))
MaybeDead.insert(OpI);
- }
- I->dropAllReferences();
+
Dead.insert(I);
}
@@ -457,24 +339,211 @@ static void Cleanup(DenseMap<Instruction*, Instruction*> &NewPredicates,
DeleteDeadPHIs(I);
}
-void MVETailPredication::InsertVCTPIntrinsic(Instruction *Predicate,
- DenseMap<Instruction*, Instruction*> &NewPredicates,
- VectorType *VecTy, Value *NumElements) {
- IRBuilder<> Builder(L->getHeader()->getFirstNonPHI());
+// The active lane intrinsic has this form:
+//
+// @llvm.get.active.lane.mask(IV, BTC)
+//
+// Here we perform checks that this intrinsic behaves as expected,
+// which means:
+//
+// 1) The element count, which is calculated with BTC + 1, cannot overflow.
+// 2) The element count needs to be sufficiently large that the decrement of
+// element counter doesn't overflow, which means that we need to prove:
+// ceil(ElementCount / VectorWidth) >= TripCount
+// by rounding up ElementCount up:
+// ((ElementCount + (VectorWidth - 1)) / VectorWidth
+// and evaluate if expression isKnownNonNegative:
+// (((ElementCount + (VectorWidth - 1)) / VectorWidth) - TripCount
+// 3) The IV must be an induction phi with an increment equal to the
+// vector width.
+bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
+ Value *TripCount, FixedVectorType *VecTy) {
+ bool ForceTailPredication =
+ EnableTailPredication == TailPredication::ForceEnabledNoReductions ||
+ EnableTailPredication == TailPredication::ForceEnabled;
+ // 1) Test whether entry to the loop is protected by a conditional
+ // BTC + 1 < 0. In other words, if the scalar trip count overflows,
+ // becomes negative, we shouldn't enter the loop and creating
+ // tripcount expression BTC + 1 is not safe. So, check that BTC
+ // isn't max. This is evaluated in unsigned, because the semantics
+ // of @get.active.lane.mask is a ULE comparison.
+
+ int VectorWidth = VecTy->getNumElements();
+ auto *BackedgeTakenCount = ActiveLaneMask->getOperand(1);
+ auto *BTC = SE->getSCEV(BackedgeTakenCount);
+
+ if (!llvm::cannotBeMaxInLoop(BTC, L, *SE, false /*Signed*/) &&
+ !ForceTailPredication) {
+ LLVM_DEBUG(dbgs() << "ARM TP: Overflow possible, BTC can be max: ";
+ BTC->dump());
+ return false;
+ }
+
+ // 2) Prove that the sub expression is non-negative, i.e. it doesn't overflow:
+ //
+ // (((ElementCount + (VectorWidth - 1)) / VectorWidth) - TripCount
+ //
+ // 2.1) First prove overflow can't happen in:
+ //
+ // ElementCount + (VectorWidth - 1)
+ //
+ // Because of a lack of context, it is difficult to get a useful bounds on
+ // this expression. But since ElementCount uses the same variables as the
+ // TripCount (TC), for which we can find meaningful value ranges, we use that
+ // instead and assert that:
+ //
+ // upperbound(TC) <= UINT_MAX - VectorWidth
+ //
+ auto *TC = SE->getSCEV(TripCount);
+ unsigned SizeInBits = TripCount->getType()->getScalarSizeInBits();
+ auto Diff = APInt(SizeInBits, ~0) - APInt(SizeInBits, VectorWidth);
+ uint64_t MaxMinusVW = Diff.getZExtValue();
+ uint64_t UpperboundTC = SE->getSignedRange(TC).getUpper().getZExtValue();
+
+ if (UpperboundTC > MaxMinusVW && !ForceTailPredication) {
+ LLVM_DEBUG(dbgs() << "ARM TP: Overflow possible in tripcount rounding:\n";
+ dbgs() << "upperbound(TC) <= UINT_MAX - VectorWidth\n";
+ dbgs() << UpperboundTC << " <= " << MaxMinusVW << "== false\n";);
+ return false;
+ }
+
+ // 2.2) Make sure overflow doesn't happen in final expression:
+ // (((ElementCount + (VectorWidth - 1)) / VectorWidth) - TripCount,
+ // To do this, compare the full ranges of these subexpressions:
+ //
+ // Range(Ceil) <= Range(TC)
+ //
+ // where Ceil = ElementCount + (VW-1) / VW. If Ceil and TC are runtime
+ // values (and not constants), we have to compensate for the lowerbound value
+ // range to be off by 1. The reason is that BTC lives in the preheader in
+ // this form:
+ //
+ // %trip.count.minus = add nsw nuw i32 %N, -1
+ //
+ // For the loop to be executed, %N has to be >= 1 and as a result the value
+ // range of %trip.count.minus has a lower bound of 0. Value %TC has this form:
+ //
+ // %5 = add nuw nsw i32 %4, 1
+ // call void @llvm.set.loop.iterations.i32(i32 %5)
+ //
+ // where %5 is some expression using %N, which needs to have a lower bound of
+ // 1. Thus, if the ranges of Ceil and TC are not a single constant but a set,
+ // we first add 0 to TC such that we can do the <= comparison on both sets.
+ //
+ auto *One = SE->getOne(TripCount->getType());
+ // ElementCount = BTC + 1
+ auto *ElementCount = SE->getAddExpr(BTC, One);
+ // Tmp = ElementCount + (VW-1)
+ auto *ECPlusVWMinus1 = SE->getAddExpr(ElementCount,
+ SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1)));
+ // Ceil = ElementCount + (VW-1) / VW
+ auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1,
+ SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth)));
+
+ ConstantRange RangeCeil = SE->getSignedRange(Ceil) ;
+ ConstantRange RangeTC = SE->getSignedRange(TC) ;
+ if (!RangeTC.isSingleElement()) {
+ auto ZeroRange =
+ ConstantRange(APInt(TripCount->getType()->getScalarSizeInBits(), 0));
+ RangeTC = RangeTC.unionWith(ZeroRange);
+ }
+ if (!RangeTC.contains(RangeCeil) && !ForceTailPredication) {
+ LLVM_DEBUG(dbgs() << "ARM TP: Overflow possible in sub\n");
+ return false;
+ }
+
+ // 3) Find out if IV is an induction phi. Note that We can't use Loop
+ // helpers here to get the induction variable, because the hardware loop is
+ // no longer in loopsimplify form, and also the hwloop intrinsic use a
+ // different counter. Using SCEV, we check that the induction is of the
+ // form i = i + 4, where the increment must be equal to the VectorWidth.
+ auto *IV = ActiveLaneMask->getOperand(0);
+ auto *IVExpr = SE->getSCEV(IV);
+ auto *AddExpr = dyn_cast<SCEVAddRecExpr>(IVExpr);
+ if (!AddExpr) {
+ LLVM_DEBUG(dbgs() << "ARM TP: induction not an add expr: "; IVExpr->dump());
+ return false;
+ }
+ // Check that this AddRec is associated with this loop.
+ if (AddExpr->getLoop() != L) {
+ LLVM_DEBUG(dbgs() << "ARM TP: phi not part of this loop\n");
+ return false;
+ }
+ auto *Step = dyn_cast<SCEVConstant>(AddExpr->getOperand(1));
+ if (!Step) {
+ LLVM_DEBUG(dbgs() << "ARM TP: induction step is not a constant: ";
+ AddExpr->getOperand(1)->dump());
+ return false;
+ }
+ auto StepValue = Step->getValue()->getSExtValue();
+ if (VectorWidth == StepValue)
+ return true;
+
+ LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue << " doesn't match "
+ "vector width " << VectorWidth << "\n");
+
+ return false;
+}
+
+// Materialize NumElements in the preheader block.
+static Value *getNumElements(BasicBlock *Preheader, Value *BTC) {
+ // First, check the preheader if it not already exist:
+ //
+ // preheader:
+ // %BTC = add i32 %N, -1
+ // ..
+ // vector.body:
+ //
+ // if %BTC already exists. We don't need to emit %NumElems = %BTC + 1,
+ // but instead can just return %N.
+ for (auto &I : *Preheader) {
+ if (I.getOpcode() != Instruction::Add || &I != BTC)
+ continue;
+ ConstantInt *MinusOne = nullptr;
+ if (!(MinusOne = dyn_cast<ConstantInt>(I.getOperand(1))))
+ continue;
+ if (MinusOne->getSExtValue() == -1) {
+ LLVM_DEBUG(dbgs() << "ARM TP: Found num elems: " << I << "\n");
+ return I.getOperand(0);
+ }
+ }
+
+ // But we do need to materialise BTC if it is not already there,
+ // e.g. if it is a constant.
+ IRBuilder<> Builder(Preheader->getTerminator());
+ Value *NumElements = Builder.CreateAdd(BTC,
+ ConstantInt::get(BTC->getType(), 1), "num.elements");
+ LLVM_DEBUG(dbgs() << "ARM TP: Created num elems: " << *NumElements << "\n");
+ return NumElements;
+}
+
+void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
+ Value *TripCount, FixedVectorType *VecTy) {
+ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
Module *M = L->getHeader()->getModule();
Type *Ty = IntegerType::get(M->getContext(), 32);
+ unsigned VectorWidth = VecTy->getNumElements();
+
+ // The backedge-taken count in @llvm.get.active.lane.mask, its 2nd operand,
+ // is one less than the trip count. So we need to find or create
+ // %num.elements = %BTC + 1 in the preheader.
+ Value *BTC = ActiveLaneMask->getOperand(1);
+ Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+ Value *NumElements = getNumElements(L->getLoopPreheader(), BTC);
// Insert a phi to count the number of elements processed by the loop.
+ Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI() );
PHINode *Processed = Builder.CreatePHI(Ty, 2);
Processed->addIncoming(NumElements, L->getLoopPreheader());
- // Insert the intrinsic to represent the effect of tail predication.
- Builder.SetInsertPoint(cast<Instruction>(Predicate));
+ // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and thus
+ // represent the effect of tail predication.
+ Builder.SetInsertPoint(ActiveLaneMask);
ConstantInt *Factor =
- ConstantInt::get(cast<IntegerType>(Ty), VecTy->getNumElements());
+ ConstantInt::get(cast<IntegerType>(Ty), VectorWidth);
Intrinsic::ID VCTPID;
- switch (VecTy->getNumElements()) {
+ switch (VectorWidth) {
default:
llvm_unreachable("unexpected number of lanes");
case 4: VCTPID = Intrinsic::arm_mve_vctp32; break;
@@ -488,9 +557,8 @@ void MVETailPredication::InsertVCTPIntrinsic(Instruction *Predicate,
// purposes, but takes a v4i1 instead of a v2i1.
}
Function *VCTP = Intrinsic::getDeclaration(M, VCTPID);
- Value *TailPredicate = Builder.CreateCall(VCTP, Processed);
- Predicate->replaceAllUsesWith(TailPredicate);
- NewPredicates[Predicate] = cast<Instruction>(TailPredicate);
+ Value *VCTPCall = Builder.CreateCall(VCTP, Processed);
+ ActiveLaneMask->replaceAllUsesWith(VCTPCall);
// Add the incoming value to the new phi.
// TODO: This add likely already exists in the loop.
@@ -498,47 +566,45 @@ void MVETailPredication::InsertVCTPIntrinsic(Instruction *Predicate,
Processed->addIncoming(Remaining, L->getLoopLatch());
LLVM_DEBUG(dbgs() << "ARM TP: Insert processed elements phi: "
<< *Processed << "\n"
- << "ARM TP: Inserted VCTP: " << *TailPredicate << "\n");
+ << "ARM TP: Inserted VCTP: " << *VCTPCall << "\n");
}
bool MVETailPredication::TryConvert(Value *TripCount) {
if (!IsPredicatedVectorLoop()) {
- LLVM_DEBUG(dbgs() << "ARM TP: no masked instructions in loop");
+ LLVM_DEBUG(dbgs() << "ARM TP: no masked instructions in loop.\n");
return false;
}
LLVM_DEBUG(dbgs() << "ARM TP: Found predicated vector loop.\n");
-
- // Walk through the masked intrinsics and try to find whether the predicate
- // operand is generated from an induction variable.
SetVector<Instruction*> Predicates;
- DenseMap<Instruction*, Instruction*> NewPredicates;
+ // Walk through the masked intrinsics and try to find whether the predicate
+ // operand is generated by intrinsic @llvm.get.active.lane.mask().
for (auto *I : MaskedInsts) {
- Intrinsic::ID ID = I->getIntrinsicID();
- unsigned PredOp = ID == Intrinsic::masked_load ? 2 : 3;
+ unsigned PredOp = I->getIntrinsicID() == Intrinsic::masked_load ? 2 : 3;
auto *Predicate = dyn_cast<Instruction>(I->getArgOperand(PredOp));
if (!Predicate || Predicates.count(Predicate))
continue;
- VectorType *VecTy = getVectorType(I);
- Value *NumElements = ComputeElements(TripCount, VecTy);
- if (!NumElements)
- continue;
-
- if (!isTailPredicate(Predicate, NumElements)) {
- LLVM_DEBUG(dbgs() << "ARM TP: Not tail predicate: " << *Predicate << "\n");
+ auto *ActiveLaneMask = dyn_cast<IntrinsicInst>(Predicate);
+ if (!ActiveLaneMask ||
+ ActiveLaneMask->getIntrinsicID() != Intrinsic::get_active_lane_mask)
continue;
- }
- LLVM_DEBUG(dbgs() << "ARM TP: Found tail predicate: " << *Predicate << "\n");
Predicates.insert(Predicate);
+ LLVM_DEBUG(dbgs() << "ARM TP: Found active lane mask: "
+ << *ActiveLaneMask << "\n");
- InsertVCTPIntrinsic(Predicate, NewPredicates, VecTy, NumElements);
+ auto *VecTy = getVectorType(I);
+ if (!IsSafeActiveMask(ActiveLaneMask, TripCount, VecTy)) {
+ LLVM_DEBUG(dbgs() << "ARM TP: Not safe to insert VCTP.\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "ARM TP: Safe to insert VCTP.\n");
+ InsertVCTPIntrinsic(ActiveLaneMask, TripCount, VecTy);
}
- // Now clean up.
- Cleanup(NewPredicates, Predicates, L);
+ Cleanup(Predicates, L);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
index a5df46c94f42..dc769ae526bc 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -22,9 +22,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include <cassert>
#include <new>
@@ -34,83 +34,220 @@ using namespace llvm;
#define DEBUG_TYPE "arm-mve-vpt"
namespace {
- class MVEVPTBlock : public MachineFunctionPass {
- public:
- static char ID;
+class MVEVPTBlock : public MachineFunctionPass {
+public:
+ static char ID;
+ const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
- MVEVPTBlock() : MachineFunctionPass(ID) {}
+ MVEVPTBlock() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &Fn) override;
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<ReachingDefAnalysis>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs).set(
- MachineFunctionProperties::Property::TracksLiveness);
- }
-
- StringRef getPassName() const override {
- return "MVE VPT block insertion pass";
- }
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
- private:
- bool InsertVPTBlocks(MachineBasicBlock &MBB);
+ StringRef getPassName() const override {
+ return "MVE VPT block insertion pass";
+ }
- const Thumb2InstrInfo *TII = nullptr;
- ReachingDefAnalysis *RDA = nullptr;
- };
+private:
+ bool InsertVPTBlocks(MachineBasicBlock &MBB);
+};
- char MVEVPTBlock::ID = 0;
+char MVEVPTBlock::ID = 0;
} // end anonymous namespace
INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
-static MachineInstr *findVCMPToFoldIntoVPST(MachineInstr *MI,
- ReachingDefAnalysis *RDA,
+static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI,
+ const TargetRegisterInfo *TRI,
unsigned &NewOpcode) {
- // First, search backwards to the instruction that defines VPR
- auto *Def = RDA->getReachingMIDef(MI, ARM::VPR);
- if (!Def)
- return nullptr;
+ // Search backwards to the instruction that defines VPR. This may or not
+ // be a VCMP, we check that after this loop. If we find another instruction
+ // that reads cpsr, we return nullptr.
+ MachineBasicBlock::iterator CmpMI = MI;
+ while (CmpMI != MI->getParent()->begin()) {
+ --CmpMI;
+ if (CmpMI->modifiesRegister(ARM::VPR, TRI))
+ break;
+ if (CmpMI->readsRegister(ARM::VPR, TRI))
+ break;
+ }
- // Now check that Def is a VCMP
- if (!(NewOpcode = VCMPOpcodeToVPT(Def->getOpcode())))
+ if (CmpMI == MI)
+ return nullptr;
+ NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode());
+ if (NewOpcode == 0)
return nullptr;
- // Check that Def's operands are not defined between the VCMP and MI, i.e.
- // check that they have the same reaching def.
- if (!RDA->hasSameReachingDef(Def, MI, Def->getOperand(1).getReg()) ||
- !RDA->hasSameReachingDef(Def, MI, Def->getOperand(2).getReg()))
+ // Search forward from CmpMI to MI, checking if either register was def'd
+ if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI),
+ MI, TRI))
+ return nullptr;
+ if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI),
+ MI, TRI))
return nullptr;
+ return &*CmpMI;
+}
+
+// Advances Iter past a block of predicated instructions.
+// Returns true if it successfully skipped the whole block of predicated
+// instructions. Returns false when it stopped early (due to MaxSteps), or if
+// Iter didn't point to a predicated instruction.
+static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter,
+ MachineBasicBlock::instr_iterator EndIter,
+ unsigned MaxSteps,
+ unsigned &NumInstrsSteppedOver) {
+ ARMVCC::VPTCodes NextPred = ARMVCC::None;
+ Register PredReg;
+ NumInstrsSteppedOver = 0;
+
+ while (Iter != EndIter) {
+ NextPred = getVPTInstrPredicate(*Iter, PredReg);
+ assert(NextPred != ARMVCC::Else &&
+ "VPT block pass does not expect Else preds");
+ if (NextPred == ARMVCC::None || MaxSteps == 0)
+ break;
+ --MaxSteps;
+ ++Iter;
+ ++NumInstrsSteppedOver;
+ };
+
+ return NumInstrsSteppedOver != 0 &&
+ (NextPred == ARMVCC::None || Iter == EndIter);
+}
+
+// Returns true if at least one instruction in the range [Iter, End) defines
+// or kills VPR.
+static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter,
+ MachineBasicBlock::iterator End) {
+ for (; Iter != End; ++Iter)
+ if (Iter->definesRegister(ARM::VPR) || Iter->killsRegister(ARM::VPR))
+ return true;
+ return false;
+}
+
+// Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize.
+static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize) {
+ switch (BlockSize) {
+ case 1:
+ return ARM::PredBlockMask::T;
+ case 2:
+ return ARM::PredBlockMask::TT;
+ case 3:
+ return ARM::PredBlockMask::TTT;
+ case 4:
+ return ARM::PredBlockMask::TTTT;
+ default:
+ llvm_unreachable("Invalid BlockSize!");
+ }
+}
+
+// Given an iterator (Iter) that points at an instruction with a "Then"
+// predicate, tries to create the largest block of continuous predicated
+// instructions possible, and returns the VPT Block Mask of that block.
+//
+// This will try to perform some minor optimization in order to maximize the
+// size of the block.
+static ARM::PredBlockMask
+CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
+ MachineBasicBlock::instr_iterator EndIter,
+ SmallVectorImpl<MachineInstr *> &DeadInstructions) {
+ MachineBasicBlock::instr_iterator BlockBeg = Iter;
+ (void)BlockBeg;
+ assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then &&
+ "Expected a Predicated Instruction");
+
+ LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump());
+
+ unsigned BlockSize;
+ StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize);
+
+ LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter =
+ std::next(BlockBeg);
+ AddedInstIter != Iter; ++AddedInstIter) {
+ dbgs() << " adding: ";
+ AddedInstIter->dump();
+ });
+
+ // Generate the initial BlockMask
+ ARM::PredBlockMask BlockMask = GetInitialBlockMask(BlockSize);
+
+ // Remove VPNOTs while there's still room in the block, so we can make the
+ // largest block possible.
+ ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else;
+ while (BlockSize < 4 && Iter != EndIter &&
+ Iter->getOpcode() == ARM::MVE_VPNOT) {
+
+ // Try to skip all of the predicated instructions after the VPNOT, stopping
+ // after (4 - BlockSize). If we can't skip them all, stop.
+ unsigned ElseInstCnt = 0;
+ MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter);
+ if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize),
+ ElseInstCnt))
+ break;
+
+ // Check if this VPNOT can be removed or not: It can only be removed if at
+ // least one of the predicated instruction that follows it kills or sets
+ // VPR.
+ if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter))
+ break;
+
+ LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump(););
+
+ // Record the new size of the block
+ BlockSize += ElseInstCnt;
+ assert(BlockSize <= 4 && "Block is too large!");
+
+ // Record the VPNot to remove it later.
+ DeadInstructions.push_back(&*Iter);
+ ++Iter;
+
+ // Replace the predicates of the instructions we're adding.
+ // Note that we are using "Iter" to iterate over the block so we can update
+ // it at the same time.
+ for (; Iter != VPNOTBlockEndIter; ++Iter) {
+ // Find the register in which the predicate is
+ int OpIdx = findFirstVPTPredOperandIdx(*Iter);
+ assert(OpIdx != -1);
+
+ // Change the predicate and update the mask
+ Iter->getOperand(OpIdx).setImm(CurrentPredicate);
+ BlockMask = expandPredBlockMask(BlockMask, CurrentPredicate);
+
+ LLVM_DEBUG(dbgs() << " adding : "; Iter->dump());
+ }
- return Def;
+ CurrentPredicate =
+ (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then);
+ }
+ return BlockMask;
}
bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
bool Modified = false;
MachineBasicBlock::instr_iterator MBIter = Block.instr_begin();
MachineBasicBlock::instr_iterator EndIter = Block.instr_end();
- SmallSet<MachineInstr *, 4> RemovedVCMPs;
+
+ SmallVector<MachineInstr *, 4> DeadInstructions;
while (MBIter != EndIter) {
MachineInstr *MI = &*MBIter;
- unsigned PredReg = 0;
- DebugLoc dl = MI->getDebugLoc();
+ Register PredReg;
+ DebugLoc DL = MI->getDebugLoc();
ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
// The idea of the predicate is that None, Then and Else are for use when
// handling assembly language: they correspond to the three possible
// suffixes "", "t" and "e" on the mnemonic. So when instructions are read
- // from assembly source or disassembled from object code, you expect to see
- // a mixture whenever there's a long VPT block. But in code generation, we
- // hope we'll never generate an Else as input to this pass.
+ // from assembly source or disassembled from object code, you expect to
+ // see a mixture whenever there's a long VPT block. But in code
+ // generation, we hope we'll never generate an Else as input to this pass.
assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
if (Pred == ARMVCC::None) {
@@ -118,46 +255,25 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
continue;
}
- LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump());
- int VPTInstCnt = 1;
- ARMVCC::VPTCodes NextPred;
-
- // Look at subsequent instructions, checking if they can be in the same VPT
- // block.
- ++MBIter;
- while (MBIter != EndIter && VPTInstCnt < 4) {
- NextPred = getVPTInstrPredicate(*MBIter, PredReg);
- assert(NextPred != ARMVCC::Else &&
- "VPT block pass does not expect Else preds");
- if (NextPred != Pred)
- break;
- LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump());
- ++VPTInstCnt;
- ++MBIter;
- };
-
- unsigned BlockMask = getARMVPTBlockMask(VPTInstCnt);
+ ARM::PredBlockMask BlockMask =
+ CreateVPTBlock(MBIter, EndIter, DeadInstructions);
- // Search back for a VCMP that can be folded to create a VPT, or else create
- // a VPST directly
+ // Search back for a VCMP that can be folded to create a VPT, or else
+ // create a VPST directly
MachineInstrBuilder MIBuilder;
unsigned NewOpcode;
- MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, RDA, NewOpcode);
- if (VCMP) {
+ LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n");
+ if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) {
LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump());
- MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode));
- MIBuilder.addImm(BlockMask);
+ MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode));
+ MIBuilder.addImm((uint64_t)BlockMask);
MIBuilder.add(VCMP->getOperand(1));
MIBuilder.add(VCMP->getOperand(2));
MIBuilder.add(VCMP->getOperand(3));
- // We delay removing the actual VCMP instruction by saving it to a list
- // and deleting all instructions in this list in one go after we have
- // created the VPT blocks. We do this in order not to invalidate the
- // ReachingDefAnalysis that is queried by 'findVCMPToFoldIntoVPST'.
- RemovedVCMPs.insert(VCMP);
+ VCMP->eraseFromParent();
} else {
- MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST));
- MIBuilder.addImm(BlockMask);
+ MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST));
+ MIBuilder.addImm((uint64_t)BlockMask);
}
finalizeBundle(
@@ -166,16 +282,18 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
Modified = true;
}
- for (auto *I : RemovedVCMPs)
- I->eraseFromParent();
+ // Erase all dead instructions
+ for (MachineInstr *DeadMI : DeadInstructions) {
+ if (DeadMI->isInsideBundle())
+ DeadMI->eraseFromBundle();
+ else
+ DeadMI->eraseFromParent();
+ }
return Modified;
}
bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
- if (skipFunction(Fn.getFunction()))
- return false;
-
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
@@ -183,7 +301,7 @@ bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
return false;
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
- RDA = &getAnalysis<ReachingDefAnalysis>();
+ TRI = STI.getRegisterInfo();
LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
<< "********** Function: " << Fn.getName() << '\n');
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
new file mode 100644
index 000000000000..382ddd4572c7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
@@ -0,0 +1,464 @@
+//===-- MVEVPTOptimisationsPass.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass does a few optimisations related to MVE VPT blocks before
+/// register allocation is performed. The goal is to maximize the sizes of the
+/// blocks that will be created by the MVE VPT Block Insertion pass (which runs
+/// after register allocation). The first optimisation done by this pass is the
+/// replacement of "opposite" VCMPs with VPNOTs, so the Block Insertion pass
+/// can delete them later to create larger VPT blocks.
+/// The second optimisation replaces re-uses of old VCCR values with VPNOTs when
+/// inside a block of predicated instructions. This is done to avoid
+/// spill/reloads of VPR in the middle of a block, which prevents the Block
+/// Insertion pass from creating large blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/Debug.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-mve-vpt-opts"
+
+namespace {
+class MVEVPTOptimisations : public MachineFunctionPass {
+public:
+ static char ID;
+ const Thumb2InstrInfo *TII;
+ MachineRegisterInfo *MRI;
+
+ MVEVPTOptimisations() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override {
+ return "ARM MVE VPT Optimisation Pass";
+ }
+
+private:
+ MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
+ MachineInstr &Instr,
+ MachineOperand &User,
+ Register Target);
+ bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
+ bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
+};
+
+char MVEVPTOptimisations::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(MVEVPTOptimisations, DEBUG_TYPE,
+ "ARM MVE VPT Optimisations pass", false, false)
+
+// Returns true if Opcode is any VCMP Opcode.
+static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
+
+// Returns true if a VCMP with this Opcode can have its operands swapped.
+// There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
+// and VCMPr instructions (since the r is always on the right).
+static bool CanHaveSwappedOperands(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return true;
+ case ARM::MVE_VCMPf32:
+ case ARM::MVE_VCMPf16:
+ case ARM::MVE_VCMPf32r:
+ case ARM::MVE_VCMPf16r:
+ case ARM::MVE_VCMPi8r:
+ case ARM::MVE_VCMPi16r:
+ case ARM::MVE_VCMPi32r:
+ case ARM::MVE_VCMPu8r:
+ case ARM::MVE_VCMPu16r:
+ case ARM::MVE_VCMPu32r:
+ case ARM::MVE_VCMPs8r:
+ case ARM::MVE_VCMPs16r:
+ case ARM::MVE_VCMPs32r:
+ return false;
+ }
+}
+
+// Returns the CondCode of a VCMP Instruction.
+static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
+ assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
+ return ARMCC::CondCodes(Instr.getOperand(3).getImm());
+}
+
+// Returns true if Cond is equivalent to a VPNOT instruction on the result of
+// Prev. Cond and Prev must be VCMPs.
+static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
+ assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
+
+ // Opcodes must match.
+ if (Cond.getOpcode() != Prev.getOpcode())
+ return false;
+
+ MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
+ MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
+
+ // If the VCMP has the opposite condition with the same operands, we can
+ // replace it with a VPNOT
+ ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
+ ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
+ if (ExpectedCode == GetCondCode(Prev))
+ if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
+ return true;
+ // Check again with operands swapped if possible
+ if (!CanHaveSwappedOperands(Cond.getOpcode()))
+ return false;
+ ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
+ return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
+ CondOP2.isIdenticalTo(PrevOP1);
+}
+
+// Returns true if Instr writes to VCCR.
+static bool IsWritingToVCCR(MachineInstr &Instr) {
+ if (Instr.getNumOperands() == 0)
+ return false;
+ MachineOperand &Dst = Instr.getOperand(0);
+ if (!Dst.isReg())
+ return false;
+ Register DstReg = Dst.getReg();
+ if (!DstReg.isVirtual())
+ return false;
+ MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
+ const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
+ return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
+}
+
+// Transforms
+// <Instr that uses %A ('User' Operand)>
+// Into
+// %K = VPNOT %Target
+// <Instr that uses %K ('User' Operand)>
+// And returns the newly inserted VPNOT.
+// This optimization is done in the hopes of preventing spills/reloads of VPR by
+// reducing the number of VCCR values with overlapping lifetimes.
+MachineInstr &MVEVPTOptimisations::ReplaceRegisterUseWithVPNOT(
+ MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
+ Register Target) {
+ Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
+
+ MachineInstrBuilder MIBuilder =
+ BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
+ .addDef(NewResult)
+ .addReg(Target);
+ addUnpredicatedMveVpredNOp(MIBuilder);
+
+ // Make the user use NewResult instead, and clear its kill flag.
+ User.setReg(NewResult);
+ User.setIsKill(false);
+
+ LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
+ MIBuilder.getInstr()->dump());
+
+ return *MIBuilder.getInstr();
+}
+
+// Moves a VPNOT before its first user if an instruction that uses Reg is found
+// in-between the VPNOT and its user.
+// Returns true if there is at least one user of the VPNOT in the block.
+static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Iter,
+ Register Reg) {
+ assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
+ assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
+ "The VPNOT cannot be predicated");
+
+ MachineInstr &VPNOT = *Iter;
+ Register VPNOTResult = VPNOT.getOperand(0).getReg();
+ Register VPNOTOperand = VPNOT.getOperand(1).getReg();
+
+ // Whether the VPNOT will need to be moved, and whether we found a user of the
+ // VPNOT.
+ bool MustMove = false, HasUser = false;
+ MachineOperand *VPNOTOperandKiller = nullptr;
+ for (; Iter != MBB.end(); ++Iter) {
+ if (MachineOperand *MO =
+ Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) {
+ // If we find the operand that kills the VPNOTOperand's result, save it.
+ VPNOTOperandKiller = MO;
+ }
+
+ if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
+ MustMove = true;
+ continue;
+ }
+
+ if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
+ continue;
+
+ HasUser = true;
+ if (!MustMove)
+ break;
+
+ // Move the VPNOT right before Iter
+ LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
+ Iter->dump());
+ MBB.splice(Iter, &MBB, VPNOT.getIterator());
+ // If we move the instr, and its operand was killed earlier, remove the kill
+ // flag.
+ if (VPNOTOperandKiller)
+ VPNOTOperandKiller->setIsKill(false);
+
+ break;
+ }
+ return HasUser;
+}
+
+// This optimisation attempts to reduce the number of overlapping lifetimes of
+// VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
+// this replaces
+// %A:vccr = (something)
+// %B:vccr = VPNOT %A
+// %Foo = (some op that uses %B)
+// %Bar = (some op that uses %A)
+// With
+// %A:vccr = (something)
+// %B:vccr = VPNOT %A
+// %Foo = (some op that uses %B)
+// %TMP2:vccr = VPNOT %B
+// %Bar = (some op that uses %A)
+bool MVEVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
+ SmallVector<MachineInstr *, 4> DeadInstructions;
+ bool Modified = false;
+
+ while (Iter != End) {
+ Register VCCRValue, OppositeVCCRValue;
+ // The first loop looks for 2 unpredicated instructions:
+ // %A:vccr = (instr) ; A is stored in VCCRValue
+ // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue
+ for (; Iter != End; ++Iter) {
+ // We're only interested in unpredicated instructions that write to VCCR.
+ if (!IsWritingToVCCR(*Iter) ||
+ getVPTInstrPredicate(*Iter) != ARMVCC::None)
+ continue;
+ Register Dst = Iter->getOperand(0).getReg();
+
+ // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
+ // found what we were looking for.
+ if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
+ Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
+ // Move the VPNOT closer to its first user if needed, and ignore if it
+ // has no users.
+ if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
+ continue;
+
+ OppositeVCCRValue = Dst;
+ ++Iter;
+ break;
+ }
+
+ // Else, just set VCCRValue.
+ VCCRValue = Dst;
+ }
+
+ // If the first inner loop didn't find anything, stop here.
+ if (Iter == End)
+ break;
+
+ assert(VCCRValue && OppositeVCCRValue &&
+ "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
+ "stopped before the end of the block!");
+ assert(VCCRValue != OppositeVCCRValue &&
+ "VCCRValue should not be equal to OppositeVCCRValue!");
+
+ // LastVPNOTResult always contains the same value as OppositeVCCRValue.
+ Register LastVPNOTResult = OppositeVCCRValue;
+
+ // This second loop tries to optimize the remaining instructions.
+ for (; Iter != End; ++Iter) {
+ bool IsInteresting = false;
+
+ if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
+ IsInteresting = true;
+
+ // - If the instruction is a VPNOT, it can be removed, and we can just
+ // replace its uses with LastVPNOTResult.
+ // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
+ if (Iter->getOpcode() == ARM::MVE_VPNOT) {
+ Register Result = Iter->getOperand(0).getReg();
+
+ MRI->replaceRegWith(Result, LastVPNOTResult);
+ DeadInstructions.push_back(&*Iter);
+ Modified = true;
+
+ LLVM_DEBUG(dbgs()
+ << "Replacing all uses of '" << printReg(Result)
+ << "' with '" << printReg(LastVPNOTResult) << "'\n");
+ } else {
+ MachineInstr &VPNOT =
+ ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
+ Modified = true;
+
+ LastVPNOTResult = VPNOT.getOperand(0).getReg();
+ std::swap(VCCRValue, OppositeVCCRValue);
+
+ LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
+ << "' with '" << printReg(LastVPNOTResult)
+ << "' in instr: " << *Iter);
+ }
+ } else {
+ // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
+ // instead as they contain the same value.
+ if (MachineOperand *MO =
+ Iter->findRegisterUseOperand(OppositeVCCRValue)) {
+ IsInteresting = true;
+
+ // This is pointless if LastVPNOTResult == OppositeVCCRValue.
+ if (LastVPNOTResult != OppositeVCCRValue) {
+ LLVM_DEBUG(dbgs() << "Replacing usage of '"
+ << printReg(OppositeVCCRValue) << "' with '"
+ << printReg(LastVPNOTResult) << " for instr: ";
+ Iter->dump());
+ MO->setReg(LastVPNOTResult);
+ Modified = true;
+ }
+
+ MO->setIsKill(false);
+ }
+
+ // If this is an unpredicated VPNOT on
+ // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
+ if (Iter->getOpcode() == ARM::MVE_VPNOT &&
+ getVPTInstrPredicate(*Iter) == ARMVCC::None) {
+ Register VPNOTOperand = Iter->getOperand(1).getReg();
+ if (VPNOTOperand == LastVPNOTResult ||
+ VPNOTOperand == OppositeVCCRValue) {
+ IsInteresting = true;
+
+ std::swap(VCCRValue, OppositeVCCRValue);
+ LastVPNOTResult = Iter->getOperand(0).getReg();
+ }
+ }
+ }
+
+ // If this instruction was not interesting, and it writes to VCCR, stop.
+ if (!IsInteresting && IsWritingToVCCR(*Iter))
+ break;
+ }
+ }
+
+ for (MachineInstr *DeadInstruction : DeadInstructions)
+ DeadInstruction->removeFromParent();
+
+ return Modified;
+}
+
+// This optimisation replaces VCMPs with VPNOTs when they are equivalent.
+bool MVEVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
+ SmallVector<MachineInstr *, 4> DeadInstructions;
+
+ // The last VCMP that we have seen and that couldn't be replaced.
+ // This is reset when an instruction that writes to VCCR/VPR is found, or when
+ // a VCMP is replaced with a VPNOT.
+ // We'll only replace VCMPs with VPNOTs when this is not null, and when the
+ // current VCMP is the opposite of PrevVCMP.
+ MachineInstr *PrevVCMP = nullptr;
+ // If we find an instruction that kills the result of PrevVCMP, we save the
+ // operand here to remove the kill flag in case we need to use PrevVCMP's
+ // result.
+ MachineOperand *PrevVCMPResultKiller = nullptr;
+
+ for (MachineInstr &Instr : MBB.instrs()) {
+ if (PrevVCMP) {
+ if (MachineOperand *MO = Instr.findRegisterUseOperand(
+ PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
+ // If we come accross the instr that kills PrevVCMP's result, record it
+ // so we can remove the kill flag later if we need to.
+ PrevVCMPResultKiller = MO;
+ }
+ }
+
+ // Ignore predicated instructions.
+ if (getVPTInstrPredicate(Instr) != ARMVCC::None)
+ continue;
+
+ // Only look at VCMPs
+ if (!IsVCMP(Instr.getOpcode())) {
+ // If the instruction writes to VCCR, forget the previous VCMP.
+ if (IsWritingToVCCR(Instr))
+ PrevVCMP = nullptr;
+ continue;
+ }
+
+ if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
+ PrevVCMP = &Instr;
+ continue;
+ }
+
+ // The register containing the result of the VCMP that we're going to
+ // replace.
+ Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
+
+ // Build a VPNOT to replace the VCMP, reusing its operands.
+ MachineInstrBuilder MIBuilder =
+ BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
+ .add(Instr.getOperand(0))
+ .addReg(PrevVCMPResultReg);
+ addUnpredicatedMveVpredNOp(MIBuilder);
+ LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
+ MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
+ Instr.dump());
+
+ // If we found an instruction that uses, and kills PrevVCMP's result,
+ // remove the kill flag.
+ if (PrevVCMPResultKiller)
+ PrevVCMPResultKiller->setIsKill(false);
+
+ // Finally, mark the old VCMP for removal and reset
+ // PrevVCMP/PrevVCMPResultKiller.
+ DeadInstructions.push_back(&Instr);
+ PrevVCMP = nullptr;
+ PrevVCMPResultKiller = nullptr;
+ }
+
+ for (MachineInstr *DeadInstruction : DeadInstructions)
+ DeadInstruction->removeFromParent();
+
+ return !DeadInstructions.empty();
+}
+
+bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
+ const ARMSubtarget &STI =
+ static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+
+ if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
+ return false;
+
+ TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
+ MRI = &Fn.getRegInfo();
+
+ LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
+ << "********** Function: " << Fn.getName() << '\n');
+
+ bool Modified = false;
+ for (MachineBasicBlock &MBB : Fn) {
+ Modified |= ReplaceVCMPsByVPNOTs(MBB);
+ Modified |= ReduceOldVCCRValueUses(MBB);
+ }
+
+ LLVM_DEBUG(dbgs() << "**************************************\n");
+ return Modified;
+}
+
+/// createMVEVPTOptimisationsPass
+FunctionPass *llvm::createMVEVPTOptimisationsPass() {
+ return new MVEVPTOptimisations();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 956d474f1d79..d568e9afe432 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -88,8 +88,10 @@ emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB,
0, MIFlags);
}
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP)
- .addReg(ARM::SP).addReg(ScratchReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
+ .addReg(ARM::SP)
+ .addReg(ScratchReg, RegState::Kill)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
return;
}
// FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate
@@ -127,7 +129,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- Amount = alignTo(Amount, getStackAlignment());
+ Amount = alignTo(Amount, getStackAlign());
// Replace the pseudo instruction with a new instruction...
unsigned Opc = Old.getOpcode();
@@ -180,9 +182,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
if (ArgRegsSaveSize) {
emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
ARM::NoRegister, MachineInstr::FrameSetup);
- CFAOffset -= ArgRegsSaveSize;
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ CFAOffset += ArgRegsSaveSize;
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -193,9 +195,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
-(NumBytes - ArgRegsSaveSize),
ARM::NoRegister, MachineInstr::FrameSetup);
- CFAOffset -= NumBytes - ArgRegsSaveSize;
+ CFAOffset += NumBytes - ArgRegsSaveSize;
unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -257,9 +259,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
}
if (adjustedGPRCS1Size) {
- CFAOffset -= adjustedGPRCS1Size;
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ CFAOffset += adjustedGPRCS1Size;
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -305,8 +307,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlags(MachineInstr::FrameSetup)
.add(predOps(ARMCC::AL));
if(FramePtrOffsetInBlock) {
- CFAOffset += FramePtrOffsetInBlock;
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
+ CFAOffset -= FramePtrOffsetInBlock;
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
@@ -384,9 +386,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
ScratchRegister, MachineInstr::FrameSetup);
if (!HasFP) {
- CFAOffset -= NumBytes;
+ CFAOffset += NumBytes;
unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -402,7 +404,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
if (RegInfo->needsStackRealignment(MF)) {
- const unsigned NrBitsToZero = countTrailingZeros(MFI.getMaxAlignment());
+ const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
// Emit the following sequence, using R4 as a temporary, since we cannot use
// SP as a source or destination register for the shifts:
// mov r4, sp
@@ -804,11 +806,9 @@ static const unsigned *findNextOrderedReg(const unsigned *CurrentReg,
return CurrentReg;
}
-bool Thumb1FrameLowering::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool Thumb1FrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -927,11 +927,9 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-bool Thumb1FrameLowering::
-restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -1049,6 +1047,10 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (!STI.hasV5TOps())
continue;
+ // CMSE entry functions must return via BXNS, see emitEpilogue.
+ if (AFI->isCmseNSEntryFunction())
+ continue;
+
// Pop LR into PC.
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.h
index 61af48712b6c..a4b2a085ea38 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -27,12 +27,13 @@ public:
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index b08b71a4952d..79afa378cb62 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -76,7 +76,7 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
void Thumb1InstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
+ Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
assert((RC == &ARM::tGPRRegClass ||
@@ -92,7 +92,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
BuildMI(MBB, I, DL, get(ARM::tSTRspi))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
@@ -104,7 +104,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void Thumb1InstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
+ Register DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
assert(
@@ -121,7 +121,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
.addFrameIndex(FI)
.addImm(0)
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.h
index 530289fe8c5d..017b7222337c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -42,13 +42,13 @@ public:
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 786fc78d0233..5cdaa7f02201 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -183,7 +183,7 @@ Thumb2ITBlock::MoveCopyOutOfITBlock(MachineInstr *MI,
++I;
if (I != E) {
- unsigned NPredReg = 0;
+ Register NPredReg;
ARMCC::CondCodes NCC = getITInstrPredicate(*I, NPredReg);
if (NCC == CC || NCC == OCC)
return true;
@@ -199,7 +199,7 @@ bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) {
while (MBBI != E) {
MachineInstr *MI = &*MBBI;
DebugLoc dl = MI->getDebugLoc();
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes CC = getITInstrPredicate(*MI, PredReg);
if (CC == ARMCC::AL) {
++MBBI;
@@ -239,7 +239,7 @@ bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) {
MachineInstr *NMI = &*MBBI;
MI = NMI;
- unsigned NPredReg = 0;
+ Register NPredReg;
ARMCC::CondCodes NCC = getITInstrPredicate(*NMI, NPredReg);
if (NCC == CC || NCC == OCC) {
Mask |= ((NCC ^ CC) & 1) << Pos;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index e06bb9546c03..48c6b47f2154 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -66,7 +66,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
// If the first instruction of Tail is predicated, we may have to update
// the IT instruction.
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes CC = getInstrPredicate(*Tail, PredReg);
MachineBasicBlock::iterator MBBI = Tail;
if (CC != ARMCC::AL)
@@ -114,7 +114,7 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
return false;
}
- unsigned PredReg = 0;
+ Register PredReg;
return getITInstrPredicate(*MBBI, PredReg) == ARMCC::AL;
}
@@ -133,7 +133,7 @@ void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
void Thumb2InstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
+ Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
@@ -143,7 +143,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::t2STRi12))
@@ -176,14 +176,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void Thumb2InstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
+ Register DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -229,9 +229,9 @@ void Thumb2InstrInfo::expandLoadStackGuard(
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg,
- unsigned BaseReg, int NumBytes,
- ARMCC::CondCodes Pred, unsigned PredReg,
+ const DebugLoc &dl, Register DestReg,
+ Register BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, Register PredReg,
const ARMBaseInstrInfo &TII,
unsigned MIFlags) {
if (NumBytes == 0 && DestReg != BaseReg) {
@@ -471,7 +471,7 @@ immediateOffsetOpcode(unsigned opcode)
}
bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
+ Register FrameReg, int &Offset,
const ARMBaseInstrInfo &TII,
const TargetRegisterInfo *TRI) {
unsigned Opcode = MI.getOpcode();
@@ -491,7 +491,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
if (IsSP || Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
Offset += MI.getOperand(FrameRegIdx+1).getImm();
- unsigned PredReg;
+ Register PredReg;
if (Offset == 0 && getInstrPredicate(MI, PredReg) == ARMCC::AL &&
!MI.definesRegister(ARM::CPSR)) {
// Turn it into a move.
@@ -634,7 +634,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
assert((Offset & OffsetMask) == 0 && "Can't encode this offset!");
(void)OffsetMask; // squash unused-variable warning at -NDEBUG
} else if (AddrMode == ARMII::AddrModeT2_i8s4) {
- Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
+ Offset += MI.getOperand(FrameRegIdx + 1).getImm();
NumBits = 8 + 2;
// MCInst operand expects already scaled value.
Scale = 1;
@@ -706,7 +706,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
}
ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI,
- unsigned &PredReg) {
+ Register &PredReg) {
unsigned Opc = MI.getOpcode();
if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
return ARMCC::AL;
@@ -727,7 +727,7 @@ int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) {
}
ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI,
- unsigned &PredReg) {
+ Register &PredReg) {
int PIdx = findFirstVPTPredOperandIdx(MI);
if (PIdx == -1) {
PredReg = 0;
@@ -737,3 +737,33 @@ ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI,
PredReg = MI.getOperand(PIdx+1).getReg();
return (ARMVCC::VPTCodes)MI.getOperand(PIdx).getImm();
}
+
+void llvm::recomputeVPTBlockMask(MachineInstr &Instr) {
+ assert(isVPTOpcode(Instr.getOpcode()) && "Not a VPST or VPT Instruction!");
+
+ MachineOperand &MaskOp = Instr.getOperand(0);
+ assert(MaskOp.isImm() && "Operand 0 is not the block mask of the VPT/VPST?!");
+
+ MachineBasicBlock::iterator Iter = ++Instr.getIterator(),
+ End = Instr.getParent()->end();
+
+ // Verify that the instruction after the VPT/VPST is predicated (it should
+ // be), and skip it.
+ assert(
+ getVPTInstrPredicate(*Iter) == ARMVCC::Then &&
+ "VPT/VPST should be followed by an instruction with a 'then' predicate!");
+ ++Iter;
+
+ // Iterate over the predicated instructions, updating the BlockMask as we go.
+ ARM::PredBlockMask BlockMask = ARM::PredBlockMask::T;
+ while (Iter != End) {
+ ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*Iter);
+ if (Pred == ARMVCC::None)
+ break;
+ BlockMask = expandPredBlockMask(BlockMask, Pred);
+ ++Iter;
+ }
+
+ // Rewrite the BlockMask.
+ MaskOp.setImm((int64_t)(BlockMask));
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 7d8dff14e1e7..ec3763632239 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -44,13 +44,13 @@ public:
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
@@ -67,13 +67,24 @@ private:
/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
/// to llvm::getInstrPredicate except it returns AL for conditional branch
/// instructions which are "predicated", but are not in IT blocks.
-ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, Register &PredReg);
// getVPTInstrPredicate: VPT analogue of that, plus a helper function
// corresponding to MachineInstr::findFirstPredOperandIdx.
int findFirstVPTPredOperandIdx(const MachineInstr &MI);
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI,
- unsigned &PredReg);
+ Register &PredReg);
+inline ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI) {
+ Register PredReg;
+ return getVPTInstrPredicate(MI, PredReg);
}
+// Recomputes the Block Mask of Instr, a VPT or VPST instruction.
+// This rebuilds the block mask of the instruction depending on the predicates
+// of the instructions following it. This should only be used after the
+// MVEVPTBlockInsertion pass has run, and should be used whenever a predicated
+// instruction is added to/removed from the block.
+void recomputeVPTBlockMask(MachineInstr &Instr);
+} // namespace llvm
+
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index c5a62aa33990..ae661594bdc9 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -457,7 +457,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
return false;
if (!MI->hasOneMemOperand() ||
- (*MI->memoperands_begin())->getAlignment() < 4)
+ (*MI->memoperands_begin())->getAlign() < Align(4))
return false;
// We're creating a completely different type of load/store - LDM from LDR.
@@ -516,13 +516,23 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
isLdStMul = true;
break;
}
- case ARM::t2STMIA:
- // If the base register is killed, we don't care what its value is after the
- // instruction, so we can use an updating STMIA.
+ case ARM::t2STMIA: {
+ // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this
+ // if the base register is killed, as then it doesn't matter what its value
+ // is after the instruction.
if (!MI->getOperand(0).isKill())
return false;
+ // If the base register is in the register list and isn't the lowest
+ // numbered register (i.e. it's in operand 4 onwards) then with writeback
+ // the stored value is unknown, so we can't convert to tSTMIA_UPD.
+ Register BaseReg = MI->getOperand(0).getReg();
+ for (unsigned i = 4; i < MI->getNumOperands(); ++i)
+ if (MI->getOperand(i).getReg() == BaseReg)
+ return false;
+
break;
+ }
case ARM::t2LDMIA_RET: {
Register BaseReg = MI->getOperand(1).getReg();
if (BaseReg != ARM::SP)
@@ -676,7 +686,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
default: break;
case ARM::t2ADDSri:
case ARM::t2ADDSrr: {
- unsigned PredReg = 0;
+ Register PredReg;
if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
switch (Opc) {
default: break;
@@ -718,7 +728,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
case ARM::t2TEQrr: {
- unsigned PredReg = 0;
+ Register PredReg;
// Can only convert to eors if we're not in an IT block.
if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
break;
@@ -789,7 +799,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Check if it's possible / necessary to transfer the predicate.
const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
bool SkipPred = false;
if (Pred != ARMCC::AL) {
@@ -882,7 +892,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Check if it's possible / necessary to transfer the predicate.
const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
- unsigned PredReg = 0;
+ Register PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
bool SkipPred = false;
if (Pred != ARMCC::AL) {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index b0ba58d8dc4a..4da6f6ab6994 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -70,7 +70,7 @@ static void emitThumb1LoadConstPool(MachineBasicBlock &MBB,
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val);
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align(4));
BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
.addReg(DestReg, getDefRegState(true), SubIdx)
@@ -89,7 +89,7 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val);
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align(4));
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
.addReg(DestReg, getDefRegState(true), SubIdx)
@@ -102,14 +102,13 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
/// specified immediate.
void ThumbRegisterInfo::emitLoadConstPool(
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg, unsigned SubIdx, int Val,
- ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const {
+ const DebugLoc &dl, Register DestReg, unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, Register PredReg, unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
if (STI.isThumb1Only()) {
- assert(
- (isARMLowRegister(DestReg) || Register::isVirtualRegister(DestReg)) &&
- "Thumb1 does not have ldr to high register");
+ assert((isARMLowRegister(DestReg) || DestReg.isVirtual()) &&
+ "Thumb1 does not have ldr to high register");
return emitThumb1LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
PredReg, MIFlags);
}
@@ -123,7 +122,7 @@ void ThumbRegisterInfo::emitLoadConstPool(
/// constpool entry.
static void emitThumbRegPlusImmInReg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg, unsigned BaseReg, int NumBytes,
+ const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes,
bool CanChangeCC, const TargetInstrInfo &TII,
const ARMBaseRegisterInfo &MRI, unsigned MIFlags = MachineInstr::NoFlags) {
MachineFunction &MF = *MBB.getParent();
@@ -139,7 +138,7 @@ static void emitThumbRegPlusImmInReg(
isSub = true;
NumBytes = -NumBytes;
}
- unsigned LdReg = DestReg;
+ Register LdReg = DestReg;
if (DestReg == ARM::SP)
assert(BaseReg == ARM::SP && "Unexpected!");
if (!isARMLowRegister(DestReg) && !Register::isVirtualRegister(DestReg))
@@ -185,8 +184,8 @@ static void emitThumbRegPlusImmInReg(
/// be too long. This is allowed to modify the condition flags.
void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg,
- unsigned BaseReg, int NumBytes,
+ const DebugLoc &dl, Register DestReg,
+ Register BaseReg, int NumBytes,
const TargetInstrInfo &TII,
const ARMBaseRegisterInfo &MRI,
unsigned MIFlags) {
@@ -358,7 +357,7 @@ static unsigned convertToNonSPOpcode(unsigned Opcode) {
bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
+ Register FrameReg, int &Offset,
const ARMBaseInstrInfo &TII) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
@@ -427,8 +426,8 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
return Offset == 0;
}
-void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
- int64_t Offset) const {
+void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
+ int64_t Offset) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
if (!STI.isThumb1Only())
@@ -458,12 +457,12 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return ARMBaseRegisterInfo::eliminateFrameIndex(II, SPAdj, FIOperandNum,
RS);
- unsigned VReg = 0;
+ Register VReg;
const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB(*MBB.getParent(), &MI);
- unsigned FrameReg;
+ Register FrameReg;
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
const ARMFrameLowering *TFI = getFrameLowering(MF);
int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.h
index 08cf67284d4c..e05a24dbaca5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ThumbRegisterInfo.h
@@ -38,18 +38,18 @@ public:
/// specified immediate.
void
emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- const DebugLoc &dl, unsigned DestReg, unsigned SubIdx,
+ const DebugLoc &dl, Register DestReg, unsigned SubIdx,
int Val, ARMCC::CondCodes Pred = ARMCC::AL,
- unsigned PredReg = 0,
+ Register PredReg = Register(),
unsigned MIFlags = MachineInstr::NoFlags) const override;
// rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
// however much remains to be handled. Return 'true' if no further
// work is required.
bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
+ Register FrameReg, int &Offset,
const ARMBaseInstrInfo &TII) const;
- void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp
index 4ace61cccd0f..3356d56481e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp
@@ -15,6 +15,37 @@
using namespace llvm;
namespace llvm {
+ARM::PredBlockMask expandPredBlockMask(ARM::PredBlockMask BlockMask,
+ ARMVCC::VPTCodes Kind) {
+ using PredBlockMask = ARM::PredBlockMask;
+ assert(Kind != ARMVCC::None && "Cannot expand a mask with None!");
+ assert(countTrailingZeros((unsigned)BlockMask) != 0 &&
+ "Mask is already full");
+
+ auto ChooseMask = [&](PredBlockMask AddedThen, PredBlockMask AddedElse) {
+ return Kind == ARMVCC::Then ? AddedThen : AddedElse;
+ };
+
+ switch (BlockMask) {
+ case PredBlockMask::T:
+ return ChooseMask(PredBlockMask::TT, PredBlockMask::TE);
+ case PredBlockMask::TT:
+ return ChooseMask(PredBlockMask::TTT, PredBlockMask::TTE);
+ case PredBlockMask::TE:
+ return ChooseMask(PredBlockMask::TET, PredBlockMask::TEE);
+ case PredBlockMask::TTT:
+ return ChooseMask(PredBlockMask::TTTT, PredBlockMask::TTTE);
+ case PredBlockMask::TTE:
+ return ChooseMask(PredBlockMask::TTET, PredBlockMask::TTEE);
+ case PredBlockMask::TET:
+ return ChooseMask(PredBlockMask::TETT, PredBlockMask::TETE);
+ case PredBlockMask::TEE:
+ return ChooseMask(PredBlockMask::TEET, PredBlockMask::TEEE);
+ default:
+ llvm_unreachable("Unknown Mask");
+ }
+}
+
namespace ARMSysReg {
// lookup system register using 12-bit SYSm value.
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
index 27605422983d..80b7276adb4e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
@@ -91,41 +91,41 @@ namespace ARMVCC {
Then,
Else
};
-
- enum VPTMaskValue {
- T = 8, // 0b1000
- TT = 4, // 0b0100
- TE = 12, // 0b1100
- TTT = 2, // 0b0010
- TTE = 6, // 0b0110
- TEE = 10, // 0b1010
- TET = 14, // 0b1110
- TTTT = 1, // 0b0001
- TTTE = 3, // 0b0011
- TTEE = 5, // 0b0101
- TTET = 7, // 0b0111
- TEEE = 9, // 0b1001
- TEET = 11, // 0b1011
- TETT = 13, // 0b1101
- TETE = 15 // 0b1111
+} // namespace ARMVCC
+
+namespace ARM {
+ /// Mask values for IT and VPT Blocks, to be used by MCOperands.
+ /// Note that this is different from the "real" encoding used by the
+ /// instructions. In this encoding, the lowest set bit indicates the end of
+ /// the encoding, and above that, "1" indicates an else, while "0" indicates
+ /// a then.
+ /// Tx = x100
+ /// Txy = xy10
+ /// Txyz = xyz1
+ enum class PredBlockMask {
+ T = 0b1000,
+ TT = 0b0100,
+ TE = 0b1100,
+ TTT = 0b0010,
+ TTE = 0b0110,
+ TEE = 0b1110,
+ TET = 0b1010,
+ TTTT = 0b0001,
+ TTTE = 0b0011,
+ TTEE = 0b0111,
+ TTET = 0b0101,
+ TEEE = 0b1111,
+ TEET = 0b1101,
+ TETT = 0b1001,
+ TETE = 0b1011
};
-}
+} // namespace ARM
-inline static unsigned getARMVPTBlockMask(unsigned NumInsts) {
- switch (NumInsts) {
- case 1:
- return ARMVCC::T;
- case 2:
- return ARMVCC::TT;
- case 3:
- return ARMVCC::TTT;
- case 4:
- return ARMVCC::TTTT;
- default:
- break;
- };
- llvm_unreachable("Unexpected number of instruction in a VPT block");
-}
+// Expands a PredBlockMask by adding an E or a T at the end, depending on Kind.
+// e.g ExpandPredBlockMask(T, Then) = TT, ExpandPredBlockMask(TT, Else) = TTE,
+// and so on.
+ARM::PredBlockMask expandPredBlockMask(ARM::PredBlockMask BlockMask,
+ ARMVCC::VPTCodes Kind);
inline static const char *ARMVPTPredToString(ARMVCC::VPTCodes CC) {
switch (CC) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
index 9b09c7456543..722eecdc16a1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -51,7 +51,7 @@ public:
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
const char *ExtraCode, raw_ostream &O) override;
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
private:
const MCRegisterInfo &MRI;
@@ -168,7 +168,7 @@ bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-void AVRAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void AVRAsmPrinter::emitInstruction(const MachineInstr *MI) {
AVRMCInstLower MCInstLowering(OutContext, *this);
MCInst I;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRCallingConv.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRCallingConv.td
index 213e35fca66d..65545e531a88 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRCallingConv.td
@@ -6,21 +6,13 @@
//
//===----------------------------------------------------------------------===//
// This describes the calling conventions for AVR architecture.
+// Normal functions use a special calling convention, solved in code.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// AVR Return Value Calling Convention
//===----------------------------------------------------------------------===//
-def RetCC_AVR : CallingConv
-<[
- // i8 is returned in R24.
- CCIfType<[i8], CCAssignToReg<[R24]>>,
-
- // i16 are returned in R25:R24, R23:R22, R21:R20 and R19:R18.
- CCIfType<[i16], CCAssignToReg<[R25R24, R23R22, R21R20, R19R18]>>
-]>;
-
// Special return value calling convention for runtime functions.
def RetCC_AVR_BUILTIN : CallingConv
<[
@@ -41,14 +33,6 @@ def ArgCC_AVR_Vararg : CallingConv
CCAssignToStack<2, 1>
]>;
-// Special argument calling convention for
-// division runtime functions.
-def ArgCC_AVR_BUILTIN_DIV : CallingConv
-<[
- CCIfType<[i8], CCAssignToReg<[R24,R22]>>,
- CCIfType<[i16], CCAssignToReg<[R25R24, R23R22]>>
-]>;
-
//===----------------------------------------------------------------------===//
// Callee-saved register lists.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td
index 62def4574437..6730f2e1673e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRDevices.td
@@ -121,6 +121,11 @@ def FeatureTinyEncoding : SubtargetFeature<"tinyencoding",
"The device has Tiny core specific "
"instruction encodings">;
+// The device has CPU registers mapped in data address space
+def FeatureMMR : SubtargetFeature<"memmappedregs", "m_hasMemMappedGPR",
+ "true", "The device has CPU registers "
+ "mapped in data address space">;
+
class ELFArch<string name> : SubtargetFeature<"", "ELFArch",
!strconcat("ELF::",name), "">;
@@ -152,7 +157,7 @@ def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">;
// device should have.
def FamilyAVR0 : Family<"avr0", []>;
-def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM]>;
+def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM, FeatureMMR]>;
def FamilyAVR2 : Family<"avr2",
[FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW,
@@ -190,11 +195,14 @@ def FamilyAVR6 : Family<"avr6",
def FamilyTiny : Family<"avrtiny",
[FamilyAVR0, FeatureBREAK, FeatureSRAM,
- FeatureTinyEncoding]>;
+ FeatureTinyEncoding, FeatureMMR]>;
def FamilyXMEGA : Family<"xmega",
- [FamilyAVR51, FeatureEIJMPCALL, FeatureSPMX,
- FeatureDES]>;
+ [FamilyAVR0, FeatureLPM, FeatureIJMPCALL, FeatureADDSUBIW,
+ FeatureSRAM, FeatureJMPCALL, FeatureMultiplication,
+ FeatureMOVW, FeatureLPMX, FeatureSPM,
+ FeatureBREAK, FeatureEIJMPCALL, FeatureSPMX,
+ FeatureDES, FeatureELPM, FeatureELPMX]>;
def FamilyXMEGAU : Family<"xmegau",
[FamilyXMEGA, FeatureRMW]>;
@@ -208,7 +216,7 @@ def FeatureSetSpecial : FeatureSet<"special",
FeatureLPM, FeatureLPMX, FeatureELPM,
FeatureELPMX, FeatureSPM, FeatureSPMX,
FeatureDES, FeatureRMW,
- FeatureMultiplication, FeatureBREAK]>;
+ FeatureMultiplication, FeatureBREAK, FeatureMMR]>;
//===---------------------------------------------------------------------===//
// AVR microcontrollers supported.
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index f466c5c053ad..8ee69201e932 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -51,9 +51,9 @@ private:
const TargetInstrInfo *TII;
/// The register to be used for temporary storage.
- const unsigned SCRATCH_REGISTER = AVR::R0;
+ const Register SCRATCH_REGISTER = AVR::R0;
/// The register that will always contain zero.
- const unsigned ZERO_REGISTER = AVR::R1;
+ const Register ZERO_REGISTER = AVR::R1;
/// The IO address of the status register.
const unsigned SREG_ADDR = 0x3f;
@@ -66,7 +66,7 @@ private:
}
MachineInstrBuilder buildMI(Block &MBB, BlockIt MBBI, unsigned Opcode,
- unsigned DstReg) {
+ Register DstReg) {
return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode), DstReg);
}
@@ -91,7 +91,7 @@ private:
BlockIt MBBI);
/// Scavenges a free GPR8 register for use.
- unsigned scavengeGPR8(MachineInstr &MI);
+ Register scavengeGPR8(MachineInstr &MI);
};
char AVRExpandPseudo::ID = 0;
@@ -141,7 +141,7 @@ bool AVRExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
bool AVRExpandPseudo::
expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
+ Register SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(2).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
@@ -174,7 +174,7 @@ expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) {
bool AVRExpandPseudo::
expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
+ Register SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(2).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
@@ -221,7 +221,7 @@ bool AVRExpandPseudo::
bool AVRExpandPseudo::
expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned DstLoReg, DstHiReg;
+ Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsKill = MI.getOperand(1).isKill();
@@ -273,8 +273,8 @@ bool AVRExpandPseudo::expand<AVR::SUBWRdRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::SUBIWRdK>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(3).isDead();
@@ -325,16 +325,16 @@ bool AVRExpandPseudo::expand<AVR::SBCWRdRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::SBCIWRdK>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(3).isDead();
unsigned Imm = MI.getOperand(2).getImm();
unsigned Lo8 = Imm & 0xff;
unsigned Hi8 = (Imm >> 8) & 0xff;
- OpLo = AVR::SBCIRdK;
- OpHi = AVR::SBCIRdK;
+ unsigned OpLo = AVR::SBCIRdK;
+ unsigned OpHi = AVR::SBCIRdK;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
auto MIBLO = buildMI(MBB, MBBI, OpLo)
@@ -388,13 +388,13 @@ bool AVRExpandPseudo::expand<AVR::EORWRdRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::COMWRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
- OpLo = AVR::COMRd;
- OpHi = AVR::COMRd;
+ unsigned OpLo = AVR::COMRd;
+ unsigned OpHi = AVR::COMRd;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
auto MIBLO = buildMI(MBB, MBBI, OpLo)
@@ -418,14 +418,14 @@ bool AVRExpandPseudo::expand<AVR::COMWRd>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::CPWRdRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
bool DstIsKill = MI.getOperand(0).isKill();
bool SrcIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
- OpLo = AVR::CPRdRr;
- OpHi = AVR::CPCRdRr;
+ unsigned OpLo = AVR::CPRdRr;
+ unsigned OpHi = AVR::CPCRdRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
@@ -451,14 +451,14 @@ bool AVRExpandPseudo::expand<AVR::CPWRdRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::CPCWRdRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
bool DstIsKill = MI.getOperand(0).isKill();
bool SrcIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
- OpLo = AVR::CPCRdRr;
- OpHi = AVR::CPCRdRr;
+ unsigned OpLo = AVR::CPCRdRr;
+ unsigned OpHi = AVR::CPCRdRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
@@ -486,11 +486,11 @@ bool AVRExpandPseudo::expand<AVR::CPCWRdRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::LDIWRdK>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
- OpLo = AVR::LDIRdK;
- OpHi = AVR::LDIRdK;
+ unsigned OpLo = AVR::LDIRdK;
+ unsigned OpHi = AVR::LDIRdK;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
auto MIBLO = buildMI(MBB, MBBI, OpLo)
@@ -535,11 +535,11 @@ bool AVRExpandPseudo::expand<AVR::LDIWRdK>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::LDSWRdK>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
- OpLo = AVR::LDSRdK;
- OpHi = AVR::LDSRdK;
+ unsigned OpLo = AVR::LDSRdK;
+ unsigned OpHi = AVR::LDSRdK;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
auto MIBLO = buildMI(MBB, MBBI, OpLo)
@@ -579,26 +579,26 @@ bool AVRExpandPseudo::expand<AVR::LDSWRdK>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned TmpReg = 0; // 0 for no temporary register
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register TmpReg = 0; // 0 for no temporary register
+ Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
- OpLo = AVR::LDRdPtr;
- OpHi = AVR::LDDRdPtrQ;
+ unsigned OpLo = AVR::LDRdPtr;
+ unsigned OpHi = AVR::LDDRdPtrQ;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// Use a temporary register if src and dst registers are the same.
if (DstReg == SrcReg)
TmpReg = scavengeGPR8(MI);
- unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg;
- unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg;
+ Register CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg;
+ Register CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg;
// Load low byte.
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(CurDstLoReg, RegState::Define)
- .addReg(SrcReg, RegState::Define);
+ .addReg(CurDstLoReg, RegState::Define)
+ .addReg(SrcReg);
// Push low byte onto stack if necessary.
if (TmpReg)
@@ -628,13 +628,13 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::LDWRdPtrPi>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsDead = MI.getOperand(1).isKill();
- OpLo = AVR::LDRdPtrPi;
- OpHi = AVR::LDRdPtrPi;
+ unsigned OpLo = AVR::LDRdPtrPi;
+ unsigned OpHi = AVR::LDRdPtrPi;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
@@ -659,13 +659,13 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPi>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::LDWRdPtrPd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsDead = MI.getOperand(1).isKill();
- OpLo = AVR::LDRdPtrPd;
- OpHi = AVR::LDRdPtrPd;
+ unsigned OpLo = AVR::LDRdPtrPd;
+ unsigned OpHi = AVR::LDRdPtrPd;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
@@ -690,14 +690,14 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPd>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned TmpReg = 0; // 0 for no temporary register
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register TmpReg = 0; // 0 for no temporary register
+ Register SrcReg = MI.getOperand(1).getReg();
unsigned Imm = MI.getOperand(2).getImm();
bool SrcIsKill = MI.getOperand(1).isKill();
- OpLo = AVR::LDDRdPtrQ;
- OpHi = AVR::LDDRdPtrQ;
+ unsigned OpLo = AVR::LDDRdPtrQ;
+ unsigned OpHi = AVR::LDDRdPtrQ;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
@@ -708,8 +708,8 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
if (DstReg == SrcReg)
TmpReg = scavengeGPR8(MI);
- unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg;
- unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg;
+ Register CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg;
+ Register CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg;
// Load low byte.
auto MIBLO = buildMI(MBB, MBBI, OpLo)
@@ -745,21 +745,21 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned TmpReg = 0; // 0 for no temporary register
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register TmpReg = 0; // 0 for no temporary register
+ Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
- OpLo = AVR::LPMRdZPi;
- OpHi = AVR::LPMRdZ;
+ unsigned OpLo = AVR::LPMRdZPi;
+ unsigned OpHi = AVR::LPMRdZ;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// Use a temporary register if src and dst registers are the same.
if (DstReg == SrcReg)
TmpReg = scavengeGPR8(MI);
- unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg;
- unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg;
+ Register CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg;
+ Register CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg;
// Load low byte.
auto MIBLO = buildMI(MBB, MBBI, OpLo)
@@ -862,7 +862,7 @@ bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width,
});
}
-unsigned AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) {
+Register AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
RegScavenger RS;
@@ -968,11 +968,11 @@ bool AVRExpandPseudo::expand<AVR::AtomicFence>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::STSWKRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcLoReg, SrcHiReg;
+ Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
- OpLo = AVR::STSKRr;
- OpHi = AVR::STSKRr;
+ unsigned OpLo = AVR::STSKRr;
+ unsigned OpHi = AVR::STSKRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
// Write the high byte first in case this address belongs to a special
@@ -1014,12 +1014,12 @@ bool AVRExpandPseudo::expand<AVR::STSWKRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcLoReg, SrcHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
- OpLo = AVR::STPtrRr;
- OpHi = AVR::STDPtrQRr;
+ unsigned OpLo = AVR::STPtrRr;
+ unsigned OpHi = AVR::STDPtrQRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
//:TODO: need to reverse this order like inw and stsw?
@@ -1042,14 +1042,14 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::STWPtrPiRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(2).getReg();
+ Register SrcLoReg, SrcHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
unsigned Imm = MI.getOperand(3).getImm();
bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsKill = MI.getOperand(2).isKill();
- OpLo = AVR::STPtrPiRr;
- OpHi = AVR::STPtrPiRr;
+ unsigned OpLo = AVR::STPtrPiRr;
+ unsigned OpHi = AVR::STPtrPiRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
@@ -1076,14 +1076,14 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPiRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::STWPtrPdRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(2).getReg();
+ Register SrcLoReg, SrcHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
unsigned Imm = MI.getOperand(3).getImm();
bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsKill = MI.getOperand(2).isKill();
- OpLo = AVR::STPtrPdRr;
- OpHi = AVR::STPtrPdRr;
+ unsigned OpLo = AVR::STPtrPdRr;
+ unsigned OpHi = AVR::STPtrPdRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
@@ -1110,14 +1110,14 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPdRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(2).getReg();
+ Register SrcLoReg, SrcHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
unsigned Imm = MI.getOperand(1).getImm();
bool DstIsKill = MI.getOperand(0).isKill();
bool SrcIsKill = MI.getOperand(2).isKill();
- OpLo = AVR::STDPtrQRr;
- OpHi = AVR::STDPtrQRr;
+ unsigned OpLo = AVR::STDPtrQRr;
+ unsigned OpHi = AVR::STDPtrQRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
// Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
@@ -1144,12 +1144,12 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+ Register DstLoReg, DstHiReg;
unsigned Imm = MI.getOperand(1).getImm();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
- OpLo = AVR::INRdA;
- OpHi = AVR::INRdA;
+ unsigned OpLo = AVR::INRdA;
+ unsigned OpHi = AVR::INRdA;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
@@ -1174,12 +1174,12 @@ bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
+ Register SrcLoReg, SrcHiReg;
unsigned Imm = MI.getOperand(0).getImm();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
- OpLo = AVR::OUTARr;
- OpHi = AVR::OUTARr;
+ unsigned OpLo = AVR::OUTARr;
+ unsigned OpHi = AVR::OUTARr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
// Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
@@ -1205,12 +1205,12 @@ bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::PUSHWRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register SrcLoReg, SrcHiReg;
+ Register SrcReg = MI.getOperand(0).getReg();
bool SrcIsKill = MI.getOperand(0).isKill();
unsigned Flags = MI.getFlags();
- OpLo = AVR::PUSHRr;
- OpHi = AVR::PUSHRr;
+ unsigned OpLo = AVR::PUSHRr;
+ unsigned OpHi = AVR::PUSHRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
// Low part
@@ -1230,11 +1230,11 @@ bool AVRExpandPseudo::expand<AVR::PUSHWRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::POPWRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
unsigned Flags = MI.getFlags();
- OpLo = AVR::POPRd;
- OpHi = AVR::POPRd;
+ unsigned OpLo = AVR::POPRd;
+ unsigned OpHi = AVR::POPRd;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
buildMI(MBB, MBBI, OpHi, DstHiReg).setMIFlags(Flags); // High
@@ -1254,7 +1254,7 @@ bool AVRExpandPseudo::expand<AVR::ROLBRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned OpShift, OpCarry;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
OpShift = AVR::ADDRdRr;
OpCarry = AVR::ADCRdRr;
@@ -1291,7 +1291,7 @@ bool AVRExpandPseudo::expand<AVR::RORBRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned OpShiftOut, OpLoad, OpShiftIn, OpAdd;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
OpShiftOut = AVR::LSRRd;
OpLoad = AVR::LDIRdK;
@@ -1334,13 +1334,13 @@ bool AVRExpandPseudo::expand<AVR::RORBRd>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
- OpLo = AVR::ADDRdRr; // ADD Rd, Rd <==> LSL Rd
- OpHi = AVR::ADCRdRr; // ADC Rd, Rd <==> ROL Rd
+ unsigned OpLo = AVR::ADDRdRr; // ADD Rd, Rd <==> LSL Rd
+ unsigned OpHi = AVR::ADCRdRr; // ADC Rd, Rd <==> ROL Rd
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// Low part
@@ -1367,13 +1367,13 @@ bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::LSRWRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
- OpLo = AVR::RORRd;
- OpHi = AVR::LSRRd;
+ unsigned OpLo = AVR::RORRd;
+ unsigned OpHi = AVR::LSRRd;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// High part
@@ -1410,13 +1410,13 @@ bool AVRExpandPseudo::expand<AVR::ROLWRd>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::ASRWRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
- OpLo = AVR::RORRd;
- OpHi = AVR::ASRRd;
+ unsigned OpLo = AVR::RORRd;
+ unsigned OpHi = AVR::ASRRd;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// High part
@@ -1440,7 +1440,7 @@ bool AVRExpandPseudo::expand<AVR::ASRWRd>(Block &MBB, BlockIt MBBI) {
template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned DstLoReg, DstHiReg;
+ Register DstLoReg, DstHiReg;
// sext R17:R16, R17
// mov r16, r17
// lsl r17
@@ -1454,8 +1454,8 @@ template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
// mov r17, r16
// lsl r17
// sbc r17, r17
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
@@ -1499,7 +1499,7 @@ template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
template <> bool AVRExpandPseudo::expand<AVR::ZEXT>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned DstLoReg, DstHiReg;
+ Register DstLoReg, DstHiReg;
// zext R25:R24, R20
// mov R24, R20
// eor R25, R25
@@ -1508,8 +1508,8 @@ template <> bool AVRExpandPseudo::expand<AVR::ZEXT>(Block &MBB, BlockIt MBBI) {
// zext R25:R24, R25
// mov R24, R25
// eor R25, R25
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(2).isDead();
@@ -1536,12 +1536,12 @@ template <> bool AVRExpandPseudo::expand<AVR::ZEXT>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::SPREAD>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned OpLo, OpHi, DstLoReg, DstHiReg;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
unsigned Flags = MI.getFlags();
- OpLo = AVR::INRdA;
- OpHi = AVR::INRdA;
+ unsigned OpLo = AVR::INRdA;
+ unsigned OpHi = AVR::INRdA;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// Low part
@@ -1563,8 +1563,8 @@ bool AVRExpandPseudo::expand<AVR::SPREAD>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::SPWRITE>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- unsigned SrcLoReg, SrcHiReg;
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcLoReg, SrcHiReg;
+ Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
unsigned Flags = MI.getFlags();
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp
index e6c48de5a782..c95a553b86ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -30,8 +30,7 @@
namespace llvm {
AVRFrameLowering::AVRFrameLowering()
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align::None(),
- -2) {}
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(1), -2) {}
bool AVRFrameLowering::canSimplifyCallFramePseudos(
const MachineFunction &MF) const {
@@ -53,30 +52,22 @@ bool AVRFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
void AVRFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
- CallingConv::ID CallConv = MF.getFunction().getCallingConv();
DebugLoc DL = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
const AVRInstrInfo &TII = *STI.getInstrInfo();
+ const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
bool HasFP = hasFP(MF);
// Interrupt handlers re-enable interrupts in function entry.
- if (CallConv == CallingConv::AVR_INTR) {
+ if (AFI->isInterruptHandler()) {
BuildMI(MBB, MBBI, DL, TII.get(AVR::BSETs))
.addImm(0x07)
.setMIFlag(MachineInstr::FrameSetup);
}
- // Save the frame pointer if we have one.
- if (HasFP) {
- BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr))
- .addReg(AVR::R29R28, RegState::Kill)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
// Emit special prologue code to save R1, R0 and SREG in interrupt/signal
// handlers before saving any other registers.
- if (CallConv == CallingConv::AVR_INTR ||
- CallConv == CallingConv::AVR_SIGNAL) {
+ if (AFI->isInterruptOrSignalHandler()) {
BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr))
.addReg(AVR::R1R0, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
@@ -100,7 +91,6 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
}
const MachineFrameInfo &MFI = MF.getFrameInfo();
- const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
unsigned FrameSize = MFI.getStackSize() - AFI->getCalleeSavedFrameSize();
// Skip the callee-saved push instructions.
@@ -143,13 +133,11 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- CallingConv::ID CallConv = MF.getFunction().getCallingConv();
- bool isHandler = (CallConv == CallingConv::AVR_INTR ||
- CallConv == CallingConv::AVR_SIGNAL);
+ const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
// Early exit if the frame pointer is not needed in this function except for
// signal/interrupt handlers where special code generation is required.
- if (!hasFP(MF) && !isHandler) {
+ if (!hasFP(MF) && !AFI->isInterruptOrSignalHandler()) {
return;
}
@@ -159,14 +147,13 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc DL = MBBI->getDebugLoc();
const MachineFrameInfo &MFI = MF.getFrameInfo();
- const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
unsigned FrameSize = MFI.getStackSize() - AFI->getCalleeSavedFrameSize();
const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
const AVRInstrInfo &TII = *STI.getInstrInfo();
// Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal
// handlers at the very end of the function, just before reti.
- if (isHandler) {
+ if (AFI->isInterruptOrSignalHandler()) {
BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0);
BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr))
.addImm(0x3f)
@@ -174,9 +161,6 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0);
}
- if (hasFP(MF))
- BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R29R28);
-
// Early exit if there is no need to restore the frame pointer.
if (!FrameSize) {
return;
@@ -234,8 +218,7 @@ bool AVRFrameLowering::hasFP(const MachineFunction &MF) const {
bool AVRFrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty()) {
return false;
}
@@ -275,8 +258,7 @@ bool AVRFrameLowering::spillCalleeSavedRegisters(
bool AVRFrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty()) {
return false;
}
@@ -299,15 +281,10 @@ bool AVRFrameLowering::restoreCalleeSavedRegisters(
}
/// Replace pseudo store instructions that pass arguments through the stack with
-/// real instructions. If insertPushes is true then all instructions are
-/// replaced with push instructions, otherwise regular std instructions are
-/// inserted.
+/// real instructions.
static void fixStackStores(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const TargetInstrInfo &TII, bool insertPushes) {
- const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
- const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
-
+ const TargetInstrInfo &TII, Register FP) {
// Iterate through the BB until we hit a call instruction or we reach the end.
for (auto I = MI, E = MBB.end(); I != E && !I->isCall();) {
MachineBasicBlock::iterator NextMI = std::next(I);
@@ -322,29 +299,6 @@ static void fixStackStores(MachineBasicBlock &MBB,
assert(MI.getOperand(0).getReg() == AVR::SP &&
"Invalid register, should be SP!");
- if (insertPushes) {
- // Replace this instruction with a push.
- Register SrcReg = MI.getOperand(2).getReg();
- bool SrcIsKill = MI.getOperand(2).isKill();
-
- // We can't use PUSHWRr here because when expanded the order of the new
- // instructions are reversed from what we need. Perform the expansion now.
- if (Opcode == AVR::STDWSPQRr) {
- BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr))
- .addReg(TRI.getSubReg(SrcReg, AVR::sub_hi),
- getKillRegState(SrcIsKill));
- BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr))
- .addReg(TRI.getSubReg(SrcReg, AVR::sub_lo),
- getKillRegState(SrcIsKill));
- } else {
- BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr))
- .addReg(SrcReg, getKillRegState(SrcIsKill));
- }
-
- MI.eraseFromParent();
- I = NextMI;
- continue;
- }
// Replace this instruction with a regular store. Use Y as the base
// pointer since it is guaranteed to contain a copy of SP.
@@ -352,7 +306,7 @@ static void fixStackStores(MachineBasicBlock &MBB,
(Opcode == AVR::STDWSPQRr) ? AVR::STDWPtrQRr : AVR::STDPtrQRr;
MI.setDesc(TII.get(STOpc));
- MI.getOperand(0).setReg(AVR::R29R28);
+ MI.getOperand(0).setReg(FP);
I = NextMI;
}
@@ -368,7 +322,7 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
// function entry. Delete the call frame pseudo and replace all pseudo stores
// with real store instructions.
if (hasReservedCallFrame(MF)) {
- fixStackStores(MBB, MI, TII, false);
+ fixStackStores(MBB, MI, TII, AVR::R29R28);
return MBB.erase(MI);
}
@@ -376,18 +330,37 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
unsigned int Opcode = MI->getOpcode();
int Amount = TII.getFrameSize(*MI);
- // Adjcallstackup does not need to allocate stack space for the call, instead
- // we insert push instructions that will allocate the necessary stack.
- // For adjcallstackdown we convert it into an 'adiw reg, <amt>' handling
- // the read and write of SP in I/O space.
+ // ADJCALLSTACKUP and ADJCALLSTACKDOWN are converted to adiw/subi
+ // instructions to read and write the stack pointer in I/O space.
if (Amount != 0) {
- assert(getStackAlignment() == 1 && "Unsupported stack alignment");
+ assert(getStackAlign() == Align(1) && "Unsupported stack alignment");
if (Opcode == TII.getCallFrameSetupOpcode()) {
- fixStackStores(MBB, MI, TII, true);
+ // Update the stack pointer.
+ // In many cases this can be done far more efficiently by pushing the
+ // relevant values directly to the stack. However, doing that correctly
+ // (in the right order, possibly skipping some empty space for undef
+ // values, etc) is tricky and thus left to be optimized in the future.
+ BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP);
+
+ MachineInstr *New = BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30)
+ .addReg(AVR::R31R30, RegState::Kill)
+ .addImm(Amount);
+ New->getOperand(3).setIsDead();
+
+ BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP)
+ .addReg(AVR::R31R30, RegState::Kill);
+
+ // Make sure the remaining stack stores are converted to real store
+ // instructions.
+ fixStackStores(MBB, MI, TII, AVR::R31R30);
} else {
assert(Opcode == TII.getCallFrameDestroyOpcode());
+ // Note that small stack changes could be implemented more efficiently
+ // with a few pop instructions instead of the 8-9 instructions now
+ // required.
+
// Select the best opcode to adjust SP based on the offset size.
unsigned addOpcode;
if (isUInt<6>(Amount)) {
@@ -419,8 +392,10 @@ void AVRFrameLowering::determineCalleeSaves(MachineFunction &MF,
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
// If we have a frame pointer, the Y register needs to be saved as well.
- // We don't do that here however - the prologue and epilogue generation
- // code will handle it specially.
+ if (hasFP(MF)) {
+ SavedRegs.set(AVR::R29);
+ SavedRegs.set(AVR::R28);
+ }
}
/// The frame analyzer pass.
///
@@ -430,7 +405,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass {
static char ID;
AVRFrameAnalyzer() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
const MachineFrameInfo &MFI = MF.getFrameInfo();
AVRMachineFunctionInfo *FuncInfo = MF.getInfo<AVRMachineFunctionInfo>();
@@ -482,7 +457,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass {
return false;
}
- StringRef getPassName() const { return "AVR Frame Analyzer"; }
+ StringRef getPassName() const override { return "AVR Frame Analyzer"; }
};
char AVRFrameAnalyzer::ID = 0;
@@ -498,7 +473,7 @@ struct AVRDynAllocaSR : public MachineFunctionPass {
static char ID;
AVRDynAllocaSR() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
// Early exit when there are no variable sized objects in the function.
if (!MF.getFrameInfo().hasVarSizedObjects()) {
return false;
@@ -510,7 +485,7 @@ struct AVRDynAllocaSR : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI = EntryMBB.begin();
DebugLoc DL = EntryMBB.findDebugLoc(MBBI);
- unsigned SPCopy =
+ Register SPCopy =
MF.getRegInfo().createVirtualRegister(&AVR::DREGSRegClass);
// Create a copy of SP in function entry before any dynallocas are
@@ -531,7 +506,7 @@ struct AVRDynAllocaSR : public MachineFunctionPass {
return true;
}
- StringRef getPassName() const {
+ StringRef getPassName() const override {
return "AVR dynalloca stack pointer save/restore";
}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.h
index a7658438232a..a550c0efbb8e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRFrameLowering.h
@@ -24,12 +24,12 @@ public:
bool hasFP(const MachineFunction &MF) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
bool
restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index 4c4f4faa0508..fe31fa42c403 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -265,7 +265,7 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
if (RI.getRegClass(Reg) != &AVR::PTRDISPREGSRegClass) {
SDLoc dl(CopyFromRegOp);
- unsigned VReg = RI.createVirtualRegister(&AVR::PTRDISPREGSRegClass);
+ Register VReg = RI.createVirtualRegister(&AVR::PTRDISPREGSRegClass);
SDValue CopyToReg =
CurDAG->getCopyToReg(CopyFromRegOp, dl, VReg, CopyFromRegOp);
@@ -294,7 +294,7 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
// More generic case.
// Create chain that puts Op into pointer register
// and return that register.
- unsigned VReg = RI.createVirtualRegister(&AVR::PTRDISPREGSRegClass);
+ Register VReg = RI.createVirtualRegister(&AVR::PTRDISPREGSRegClass);
SDValue CopyToReg = CurDAG->getCopyToReg(Op, dl, VReg, Op);
SDValue CopyFromReg =
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 880688807702..bf9b32e1278e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -14,6 +14,7 @@
#include "AVRISelLowering.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -151,10 +152,12 @@ AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
setOperationAction(ISD::SREM, MVT::i16, Expand);
// Make division and modulus custom
- for (MVT VT : MVT::integer_valuetypes()) {
- setOperationAction(ISD::UDIVREM, VT, Custom);
- setOperationAction(ISD::SDIVREM, VT, Custom);
- }
+ setOperationAction(ISD::UDIVREM, MVT::i8, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::SDIVREM, MVT::i8, Custom);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Custom);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
// Do not use MUL. The AVR instructions are closer to SMUL_LOHI &co.
setOperationAction(ISD::MUL, MVT::i8, Expand);
@@ -190,41 +193,29 @@ AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
// improvements in how we treat 16-bit "registers" to be feasible.
}
- // Division rtlib functions (not supported)
+ // Division rtlib functions (not supported), use divmod functions instead
setLibcallName(RTLIB::SDIV_I8, nullptr);
setLibcallName(RTLIB::SDIV_I16, nullptr);
setLibcallName(RTLIB::SDIV_I32, nullptr);
- setLibcallName(RTLIB::SDIV_I64, nullptr);
- setLibcallName(RTLIB::SDIV_I128, nullptr);
setLibcallName(RTLIB::UDIV_I8, nullptr);
setLibcallName(RTLIB::UDIV_I16, nullptr);
setLibcallName(RTLIB::UDIV_I32, nullptr);
- setLibcallName(RTLIB::UDIV_I64, nullptr);
- setLibcallName(RTLIB::UDIV_I128, nullptr);
- // Modulus rtlib functions (not supported)
+ // Modulus rtlib functions (not supported), use divmod functions instead
setLibcallName(RTLIB::SREM_I8, nullptr);
setLibcallName(RTLIB::SREM_I16, nullptr);
setLibcallName(RTLIB::SREM_I32, nullptr);
- setLibcallName(RTLIB::SREM_I64, nullptr);
- setLibcallName(RTLIB::SREM_I128, nullptr);
setLibcallName(RTLIB::UREM_I8, nullptr);
setLibcallName(RTLIB::UREM_I16, nullptr);
setLibcallName(RTLIB::UREM_I32, nullptr);
- setLibcallName(RTLIB::UREM_I64, nullptr);
- setLibcallName(RTLIB::UREM_I128, nullptr);
// Division and modulus rtlib functions
setLibcallName(RTLIB::SDIVREM_I8, "__divmodqi4");
setLibcallName(RTLIB::SDIVREM_I16, "__divmodhi4");
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
- setLibcallName(RTLIB::SDIVREM_I64, "__divmoddi4");
- setLibcallName(RTLIB::SDIVREM_I128, "__divmodti4");
setLibcallName(RTLIB::UDIVREM_I8, "__udivmodqi4");
setLibcallName(RTLIB::UDIVREM_I16, "__udivmodhi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
- setLibcallName(RTLIB::UDIVREM_I64, "__udivmoddi4");
- setLibcallName(RTLIB::UDIVREM_I128, "__udivmodti4");
// Several of the runtime library functions use a special calling conv
setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::AVR_BUILTIN);
@@ -259,6 +250,8 @@ const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE(ASR);
NODE(LSLLOOP);
NODE(LSRLOOP);
+ NODE(ROLLOOP);
+ NODE(RORLOOP);
NODE(ASRLOOP);
NODE(BRCOND);
NODE(CMP);
@@ -282,6 +275,8 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
const SDNode *N = Op.getNode();
EVT VT = Op.getValueType();
SDLoc dl(N);
+ assert(isPowerOf2_32(VT.getSizeInBits()) &&
+ "Expected power-of-2 shift amount");
// Expand non-constant shifts to loops.
if (!isa<ConstantSDNode>(N->getOperand(1))) {
@@ -294,12 +289,20 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL:
return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0),
N->getOperand(1));
- case ISD::ROTL:
- return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0),
- N->getOperand(1));
- case ISD::ROTR:
- return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0),
- N->getOperand(1));
+ case ISD::ROTL: {
+ SDValue Amt = N->getOperand(1);
+ EVT AmtVT = Amt.getValueType();
+ Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt,
+ DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT));
+ return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0), Amt);
+ }
+ case ISD::ROTR: {
+ SDValue Amt = N->getOperand(1);
+ EVT AmtVT = Amt.getValueType();
+ Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt,
+ DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT));
+ return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0), Amt);
+ }
case ISD::SRA:
return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0),
N->getOperand(1));
@@ -315,9 +318,11 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
break;
case ISD::ROTL:
Opc8 = AVRISD::ROL;
+ ShiftAmount = ShiftAmount % VT.getSizeInBits();
break;
case ISD::ROTR:
Opc8 = AVRISD::ROR;
+ ShiftAmount = ShiftAmount % VT.getSizeInBits();
break;
case ISD::SRL:
Opc8 = AVRISD::LSR;
@@ -357,12 +362,6 @@ SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
case MVT::i32:
LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
break;
- case MVT::i64:
- LC = IsSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64;
- break;
- case MVT::i128:
- LC = IsSigned ? RTLIB::SDIVREM_I128 : RTLIB::UDIVREM_I128;
- break;
}
SDValue InChain = DAG.getEntryNode();
@@ -883,173 +882,145 @@ bool AVRTargetLowering::isOffsetFoldingLegal(
#include "AVRGenCallingConv.inc"
-/// For each argument in a function store the number of pieces it is composed
-/// of.
-static void parseFunctionArgs(const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<unsigned> &Out) {
- for (const ISD::InputArg &Arg : Ins) {
- if(Arg.PartOffset > 0) continue;
- unsigned Bytes = ((Arg.ArgVT.getSizeInBits()) + 7) / 8;
-
- Out.push_back((Bytes + 1) / 2);
- }
-}
-
-/// For external symbols there is no function prototype information so we
-/// have to rely directly on argument sizes.
-static void parseExternFuncCallArgs(const SmallVectorImpl<ISD::OutputArg> &In,
- SmallVectorImpl<unsigned> &Out) {
- for (unsigned i = 0, e = In.size(); i != e;) {
- unsigned Size = 0;
- unsigned Offset = 0;
- while ((i != e) && (In[i].PartOffset == Offset)) {
- Offset += In[i].VT.getStoreSize();
- ++i;
- ++Size;
- }
- Out.push_back(Size);
- }
-}
-
-static StringRef getFunctionName(TargetLowering::CallLoweringInfo &CLI) {
- SDValue Callee = CLI.Callee;
-
- if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- return G->getSymbol();
- }
-
- if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- return G->getGlobal()->getName();
- }
-
- llvm_unreachable("don't know how to get the name for this callee");
-}
+/// Registers for calling conventions, ordered in reverse as required by ABI.
+/// Both arrays must be of the same length.
+static const MCPhysReg RegList8[] = {
+ AVR::R25, AVR::R24, AVR::R23, AVR::R22, AVR::R21, AVR::R20,
+ AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14,
+ AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9, AVR::R8};
+static const MCPhysReg RegList16[] = {
+ AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22,
+ AVR::R22R21, AVR::R21R20, AVR::R20R19, AVR::R19R18,
+ AVR::R18R17, AVR::R17R16, AVR::R16R15, AVR::R15R14,
+ AVR::R14R13, AVR::R13R12, AVR::R12R11, AVR::R11R10,
+ AVR::R10R9, AVR::R9R8};
+
+static_assert(array_lengthof(RegList8) == array_lengthof(RegList16),
+ "8-bit and 16-bit register arrays must be of equal length");
/// Analyze incoming and outgoing function arguments. We need custom C++ code
-/// to handle special constraints in the ABI like reversing the order of the
-/// pieces of splitted arguments. In addition, all pieces of a certain argument
-/// have to be passed either using registers or the stack but never mixing both.
-static void analyzeStandardArguments(TargetLowering::CallLoweringInfo *CLI,
- const Function *F, const DataLayout *TD,
- const SmallVectorImpl<ISD::OutputArg> *Outs,
- const SmallVectorImpl<ISD::InputArg> *Ins,
- CallingConv::ID CallConv,
- SmallVectorImpl<CCValAssign> &ArgLocs,
- CCState &CCInfo, bool IsCall, bool IsVarArg) {
- static const MCPhysReg RegList8[] = {AVR::R24, AVR::R22, AVR::R20,
- AVR::R18, AVR::R16, AVR::R14,
- AVR::R12, AVR::R10, AVR::R8};
- static const MCPhysReg RegList16[] = {AVR::R25R24, AVR::R23R22, AVR::R21R20,
- AVR::R19R18, AVR::R17R16, AVR::R15R14,
- AVR::R13R12, AVR::R11R10, AVR::R9R8};
- if (IsVarArg) {
- // Variadic functions do not need all the analysis below.
- if (IsCall) {
- CCInfo.AnalyzeCallOperands(*Outs, ArgCC_AVR_Vararg);
- } else {
- CCInfo.AnalyzeFormalArguments(*Ins, ArgCC_AVR_Vararg);
+/// to handle special constraints in the ABI.
+/// In addition, all pieces of a certain argument have to be passed either
+/// using registers or the stack but never mixing both.
+template <typename ArgT>
+static void
+analyzeArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F,
+ const DataLayout *TD, const SmallVectorImpl<ArgT> &Args,
+ SmallVectorImpl<CCValAssign> &ArgLocs, CCState &CCInfo) {
+ unsigned NumArgs = Args.size();
+ // This is the index of the last used register, in RegList*.
+ // -1 means R26 (R26 is never actually used in CC).
+ int RegLastIdx = -1;
+ // Once a value is passed to the stack it will always be used
+ bool UseStack = false;
+ for (unsigned i = 0; i != NumArgs;) {
+ MVT VT = Args[i].VT;
+ // We have to count the number of bytes for each function argument, that is
+ // those Args with the same OrigArgIndex. This is important in case the
+ // function takes an aggregate type.
+ // Current argument will be between [i..j).
+ unsigned ArgIndex = Args[i].OrigArgIndex;
+ unsigned TotalBytes = VT.getStoreSize();
+ unsigned j = i + 1;
+ for (; j != NumArgs; ++j) {
+ if (Args[j].OrigArgIndex != ArgIndex)
+ break;
+ TotalBytes += Args[j].VT.getStoreSize();
}
- return;
- }
-
- // Fill in the Args array which will contain original argument sizes.
- SmallVector<unsigned, 8> Args;
- if (IsCall) {
- parseExternFuncCallArgs(*Outs, Args);
- } else {
- assert(F != nullptr && "function should not be null");
- parseFunctionArgs(*Ins, Args);
- }
-
- unsigned RegsLeft = array_lengthof(RegList8), ValNo = 0;
- // Variadic functions always use the stack.
- bool UsesStack = false;
- for (unsigned i = 0, pos = 0, e = Args.size(); i != e; ++i) {
- unsigned Size = Args[i];
-
- // If we have a zero-sized argument, don't attempt to lower it.
- // AVR-GCC does not support zero-sized arguments and so we need not
- // worry about ABI compatibility.
- if (Size == 0) continue;
-
- MVT LocVT = (IsCall) ? (*Outs)[pos].VT : (*Ins)[pos].VT;
-
- // If we have plenty of regs to pass the whole argument do it.
- if (!UsesStack && (Size <= RegsLeft)) {
- const MCPhysReg *RegList = (LocVT == MVT::i16) ? RegList16 : RegList8;
+ // Round up to even number of bytes.
+ TotalBytes = alignTo(TotalBytes, 2);
+ // Skip zero sized arguments
+ if (TotalBytes == 0)
+ continue;
+ // The index of the first register to be used
+ unsigned RegIdx = RegLastIdx + TotalBytes;
+ RegLastIdx = RegIdx;
+ // If there are not enough registers, use the stack
+ if (RegIdx >= array_lengthof(RegList8)) {
+ UseStack = true;
+ }
+ for (; i != j; ++i) {
+ MVT VT = Args[i].VT;
- for (unsigned j = 0; j != Size; ++j) {
- unsigned Reg = CCInfo.AllocateReg(
- ArrayRef<MCPhysReg>(RegList, array_lengthof(RegList8)));
+ if (UseStack) {
+ auto evt = EVT(VT).getTypeForEVT(CCInfo.getContext());
+ unsigned Offset = CCInfo.AllocateStack(TD->getTypeAllocSize(evt),
+ TD->getABITypeAlign(evt));
CCInfo.addLoc(
- CCValAssign::getReg(ValNo++, LocVT, Reg, LocVT, CCValAssign::Full));
- --RegsLeft;
- }
-
- // Reverse the order of the pieces to agree with the "big endian" format
- // required in the calling convention ABI.
- std::reverse(ArgLocs.begin() + pos, ArgLocs.begin() + pos + Size);
- } else {
- // Pass the rest of arguments using the stack.
- UsesStack = true;
- for (unsigned j = 0; j != Size; ++j) {
- unsigned Offset = CCInfo.AllocateStack(
- TD->getTypeAllocSize(EVT(LocVT).getTypeForEVT(CCInfo.getContext())),
- TD->getABITypeAlignment(
- EVT(LocVT).getTypeForEVT(CCInfo.getContext())));
- CCInfo.addLoc(CCValAssign::getMem(ValNo++, LocVT, Offset, LocVT,
- CCValAssign::Full));
+ CCValAssign::getMem(i, VT, Offset, VT, CCValAssign::Full));
+ } else {
+ unsigned Reg;
+ if (VT == MVT::i8) {
+ Reg = CCInfo.AllocateReg(RegList8[RegIdx]);
+ } else if (VT == MVT::i16) {
+ Reg = CCInfo.AllocateReg(RegList16[RegIdx]);
+ } else {
+ llvm_unreachable(
+ "calling convention can only manage i8 and i16 types");
+ }
+ assert(Reg && "register not available in calling convention");
+ CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full));
+ // Registers inside a particular argument are sorted in increasing order
+ // (remember the array is reversed).
+ RegIdx -= VT.getStoreSize();
}
}
- pos += Size;
}
}
-static void analyzeBuiltinArguments(TargetLowering::CallLoweringInfo &CLI,
- const Function *F, const DataLayout *TD,
- const SmallVectorImpl<ISD::OutputArg> *Outs,
- const SmallVectorImpl<ISD::InputArg> *Ins,
- CallingConv::ID CallConv,
- SmallVectorImpl<CCValAssign> &ArgLocs,
- CCState &CCInfo, bool IsCall, bool IsVarArg) {
- StringRef FuncName = getFunctionName(CLI);
-
- if (FuncName.startswith("__udivmod") || FuncName.startswith("__divmod")) {
- CCInfo.AnalyzeCallOperands(*Outs, ArgCC_AVR_BUILTIN_DIV);
- } else {
- analyzeStandardArguments(&CLI, F, TD, Outs, Ins,
- CallConv, ArgLocs, CCInfo,
- IsCall, IsVarArg);
+/// Count the total number of bytes needed to pass or return these arguments.
+template <typename ArgT>
+static unsigned getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) {
+ unsigned TotalBytes = 0;
+
+ for (const ArgT& Arg : Args) {
+ TotalBytes += Arg.VT.getStoreSize();
}
+ return TotalBytes;
}
-static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI,
- const Function *F, const DataLayout *TD,
- const SmallVectorImpl<ISD::OutputArg> *Outs,
- const SmallVectorImpl<ISD::InputArg> *Ins,
- CallingConv::ID CallConv,
- SmallVectorImpl<CCValAssign> &ArgLocs,
- CCState &CCInfo, bool IsCall, bool IsVarArg) {
- switch (CallConv) {
- case CallingConv::AVR_BUILTIN: {
- analyzeBuiltinArguments(*CLI, F, TD, Outs, Ins,
- CallConv, ArgLocs, CCInfo,
- IsCall, IsVarArg);
- return;
- }
- default: {
- analyzeStandardArguments(CLI, F, TD, Outs, Ins,
- CallConv, ArgLocs, CCInfo,
- IsCall, IsVarArg);
- return;
+/// Analyze incoming and outgoing value of returning from a function.
+/// The algorithm is similar to analyzeArguments, but there can only be
+/// one value, possibly an aggregate, and it is limited to 8 bytes.
+template <typename ArgT>
+static void analyzeReturnValues(const SmallVectorImpl<ArgT> &Args,
+ CCState &CCInfo) {
+ unsigned NumArgs = Args.size();
+ unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args);
+ // CanLowerReturn() guarantees this assertion.
+ assert(TotalBytes <= 8 && "return values greater than 8 bytes cannot be lowered");
+
+ // GCC-ABI says that the size is rounded up to the next even number,
+ // but actually once it is more than 4 it will always round up to 8.
+ if (TotalBytes > 4) {
+ TotalBytes = 8;
+ } else {
+ TotalBytes = alignTo(TotalBytes, 2);
+ }
+
+ // The index of the first register to use.
+ int RegIdx = TotalBytes - 1;
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT VT = Args[i].VT;
+ unsigned Reg;
+ if (VT == MVT::i8) {
+ Reg = CCInfo.AllocateReg(RegList8[RegIdx]);
+ } else if (VT == MVT::i16) {
+ Reg = CCInfo.AllocateReg(RegList16[RegIdx]);
+ } else {
+ llvm_unreachable("calling convention can only manage i8 and i16 types");
}
+ assert(Reg && "register not available in calling convention");
+ CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full));
+ // Registers sort in increasing order
+ RegIdx -= VT.getStoreSize();
}
}
SDValue AVRTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
auto DL = DAG.getDataLayout();
@@ -1059,8 +1030,12 @@ SDValue AVRTargetLowering::LowerFormalArguments(
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
- analyzeArguments(nullptr, &MF.getFunction(), &DL, 0, &Ins, CallConv, ArgLocs, CCInfo,
- false, isVarArg);
+ // Variadic functions do not need all the analysis below.
+ if (isVarArg) {
+ CCInfo.AnalyzeFormalArguments(Ins, ArgCC_AVR_Vararg);
+ } else {
+ analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo);
+ }
SDValue ArgValue;
for (CCValAssign &VA : ArgLocs) {
@@ -1181,8 +1156,12 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
getPointerTy(DAG.getDataLayout()));
}
- analyzeArguments(&CLI, F, &DAG.getDataLayout(), &Outs, 0, CallConv, ArgLocs, CCInfo,
- true, isVarArg);
+ // Variadic functions do not need all the analysis below.
+ if (isVarArg) {
+ CCInfo.AnalyzeCallOperands(Outs, ArgCC_AVR_Vararg);
+ } else {
+ analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo);
+ }
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -1319,13 +1298,10 @@ SDValue AVRTargetLowering::LowerCallResult(
*DAG.getContext());
// Handle runtime calling convs.
- auto CCFunction = CCAssignFnForReturn(CallConv);
- CCInfo.AnalyzeCallResult(Ins, CCFunction);
-
- if (CallConv != CallingConv::AVR_BUILTIN && RVLocs.size() > 1) {
- // Reverse splitted return values to get the "big endian" format required
- // to agree with the calling convention ABI.
- std::reverse(RVLocs.begin(), RVLocs.end());
+ if (CallConv == CallingConv::AVR_BUILTIN) {
+ CCInfo.AnalyzeCallResult(Ins, RetCC_AVR_BUILTIN);
+ } else {
+ analyzeReturnValues(Ins, CCInfo);
}
// Copy all of the result registers out of their specified physreg.
@@ -1344,26 +1320,17 @@ SDValue AVRTargetLowering::LowerCallResult(
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
-CCAssignFn *AVRTargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
- switch (CC) {
- case CallingConv::AVR_BUILTIN:
- return RetCC_AVR_BUILTIN;
- default:
- return RetCC_AVR;
+bool AVRTargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+ if (CallConv == CallingConv::AVR_BUILTIN) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_AVR_BUILTIN);
}
-}
-bool
-AVRTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
- MachineFunction &MF, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const
-{
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
-
- auto CCFunction = CCAssignFnForReturn(CallConv);
- return CCInfo.CheckReturn(Outs, CCFunction);
+ unsigned TotalBytes = getTotalArgumentsSizeInBytes(Outs);
+ return TotalBytes <= 8;
}
SDValue
@@ -1379,25 +1346,19 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
- // Analyze return values.
- auto CCFunction = CCAssignFnForReturn(CallConv);
- CCInfo.AnalyzeReturn(Outs, CCFunction);
-
- // If this is the first return lowered for this function, add the regs to
- // the liveout set for the function.
MachineFunction &MF = DAG.getMachineFunction();
- unsigned e = RVLocs.size();
- // Reverse splitted return values to get the "big endian" format required
- // to agree with the calling convention ABI.
- if (e > 1) {
- std::reverse(RVLocs.begin(), RVLocs.end());
+ // Analyze return values.
+ if (CallConv == CallingConv::AVR_BUILTIN) {
+ CCInfo.AnalyzeReturn(Outs, RetCC_AVR_BUILTIN);
+ } else {
+ analyzeReturnValues(Outs, CCInfo);
}
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
- for (unsigned i = 0; i != e; ++i) {
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
@@ -1415,10 +1376,12 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return Chain;
}
+ const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
+
unsigned RetOpc =
- (CallConv == CallingConv::AVR_INTR || CallConv == CallingConv::AVR_SIGNAL)
- ? AVRISD::RETI_FLAG
- : AVRISD::RET_FLAG;
+ AFI->isInterruptOrSignalHandler()
+ ? AVRISD::RETI_FLAG
+ : AVRISD::RET_FLAG;
RetOps[0] = Chain; // Update chain.
@@ -1514,8 +1477,8 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
LoopBB->addSuccessor(RemBB);
LoopBB->addSuccessor(LoopBB);
- unsigned ShiftAmtReg = RI.createVirtualRegister(&AVR::LD8RegClass);
- unsigned ShiftAmtReg2 = RI.createVirtualRegister(&AVR::LD8RegClass);
+ Register ShiftAmtReg = RI.createVirtualRegister(&AVR::LD8RegClass);
+ Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::LD8RegClass);
Register ShiftReg = RI.createVirtualRegister(RC);
Register ShiftReg2 = RI.createVirtualRegister(RC);
Register ShiftAmtSrcReg = MI.getOperand(2).getReg();
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
index aca1ea1d50e5..d1eaf53b15e9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -146,10 +146,8 @@ private:
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
-
- bool CanLowerReturn(CallingConv::ID CallConv,
- MachineFunction &MF, bool isVarArg,
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td
index ef596f5cebd5..6eb49076efb0 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrFormats.td
@@ -148,6 +148,8 @@ class FRd<bits<4> opcode, bits<7> f, dag outs, dag ins, string asmstr,
let Inst{11-9} = f{6-4};
let Inst{8-4} = d;
let Inst{3-0} = f{3-0};
+
+ let DecoderMethod = "decodeFRd";
}
//===----------------------------------------------------------------------===//
@@ -235,6 +237,8 @@ class FLPMX<bit e, bit p, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{3-2} = 0b01;
let Inst{1} = e;
let Inst{0} = p;
+
+ let DecoderMethod = "decodeFLPMX";
}
//===----------------------------------------------------------------------===//
@@ -252,6 +256,8 @@ class FMOVWRdRr<dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{15-8} = 0b00000001;
let Inst{7-4} = d{4-1};
let Inst{3-0} = r{4-1};
+
+ let DecoderMethod = "decodeFMOVWRdRr";
}
//===----------------------------------------------------------------------===//
@@ -270,6 +276,8 @@ class FMUL2RdRr<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{8} = f;
let Inst{7-4} = rd{3-0};
let Inst{3-0} = rr{3-0};
+
+ let DecoderMethod = "decodeFMUL2RdRr";
}
// Special encoding for the FMUL family of instructions.
@@ -293,6 +301,8 @@ class FFMULRdRr<bits<2> f, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{6-4} = rd;
let Inst{3} = f{0};
let Inst{2-0} = rr;
+
+ let DecoderMethod = "decodeFFMULRdRr";
}
@@ -314,6 +324,8 @@ class FWRdK<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{7-6} = k{5-4};
let Inst{5-4} = dst{2-1};
let Inst{3-0} = k{3-0};
+
+ let DecoderMethod = "decodeFWRdK";
}
//===----------------------------------------------------------------------===//
@@ -332,6 +344,8 @@ class FIORdA<dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{10-9} = A{5-4};
let Inst{8-4} = d;
let Inst{3-0} = A{3-0};
+
+ let DecoderMethod = "decodeFIORdA";
}
//===----------------------------------------------------------------------===//
@@ -350,6 +364,8 @@ class FIOARr<dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{10-9} = A{5-4};
let Inst{8-4} = r;
let Inst{3-0} = A{3-0};
+
+ let DecoderMethod = "decodeFIOARr";
}
//===----------------------------------------------------------------------===//
@@ -374,6 +390,8 @@ class FIOBIT<bits<2> t, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{3} = A{0};
let Inst{2-0} = b{2-0};
+
+ let DecoderMethod = "decodeFIOBIT";
}
//===----------------------------------------------------------------------===//
@@ -485,7 +503,7 @@ class F32BRk<bits<3> f, dag outs, dag ins, string asmstr, list<dag> pattern>
}
//===----------------------------------------------------------------------===//
-// 32 bits direct mem instructions: <|1001|00fd|dddd|0000|kkkk|kkkk|kkkk|kkkk|>
+// 32 bits direct mem instructions: <|1001|00fd|dddd|0000|kkkk|kkkk|kkkk|kkkk|>
// f = secondary opcode = 1 bit
// d = destination = 5 bits
// k = constant address = 16 bits
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index a6832f282b31..06f07696bde3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -48,11 +48,11 @@ void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Not all AVR devices support the 16-bit `MOVW` instruction.
if (AVR::DREGSRegClass.contains(DestReg, SrcReg)) {
- if (STI.hasMOVW()) {
+ if (STI.hasMOVW() && AVR::DREGSMOVWRegClass.contains(DestReg, SrcReg)) {
BuildMI(MBB, MI, DL, get(AVR::MOVWRdRr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
- unsigned DestLo, DestHi, SrcLo, SrcHi;
+ Register DestLo, DestHi, SrcLo, SrcHi;
TRI.splitReg(DestReg, DestLo, DestHi);
TRI.splitReg(SrcReg, SrcLo, SrcHi);
@@ -119,7 +119,7 @@ unsigned AVRInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
void AVRInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill,
+ Register SrcReg, bool isKill,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
@@ -138,7 +138,7 @@ void AVRInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIndex),
MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MFI.getObjectAlign(FrameIndex));
unsigned Opcode = 0;
if (TRI->isTypeLegalForClass(*RC, MVT::i8)) {
@@ -158,7 +158,7 @@ void AVRInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
@@ -172,7 +172,7 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIndex),
MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MFI.getObjectAlign(FrameIndex));
unsigned Opcode = 0;
if (TRI->isTypeLegalForClass(*RC, MVT::i8)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.h
index bb00ca8c724a..11f45865de54 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.h
@@ -75,12 +75,12 @@ public:
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned SrcReg,
+ MachineBasicBlock::iterator MI, Register SrcReg,
bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned DestReg,
+ MachineBasicBlock::iterator MI, Register DestReg,
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
index acf991dcfbb1..f03c254382b4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -107,7 +107,9 @@ def imm_com8 : Operand<i8> {
def ioaddr_XFORM : SDNodeXForm<imm,
[{
- return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()) - 0x20, SDLoc(N), MVT::i8);
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()) - offset,
+ SDLoc(N), MVT::i8);
}]>;
def iobitpos8_XFORM : SDNodeXForm<imm,
@@ -124,20 +126,23 @@ def iobitposn8_XFORM : SDNodeXForm<imm,
def ioaddr8 : PatLeaf<(imm),
[{
- uint64_t val = N->getZExtValue();
- return val >= 0x20 && val < 0x60;
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ uint64_t val = N->getZExtValue() - offset;
+ return val < 0x40;
}], ioaddr_XFORM>;
def lowioaddr8 : PatLeaf<(imm),
[{
- uint64_t val = N->getZExtValue();
- return val >= 0x20 && val < 0x40;
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ uint64_t val = N->getZExtValue() - offset;
+ return val < 0x20;
}], ioaddr_XFORM>;
def ioaddr16 : PatLeaf<(imm),
[{
- uint64_t val = N->getZExtValue();
- return val >= 0x20 && val < 0x5f;
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ uint64_t val = N->getZExtValue() - offset;
+ return val < 0x3f;
}], ioaddr_XFORM>;
def iobitpos8 : PatLeaf<(imm),
@@ -188,6 +193,7 @@ def brtarget_13 : Operand<OtherVT>
def call_target : Operand<iPTR>
{
let EncoderMethod = "encodeCallTarget";
+ let DecoderMethod = "decodeCallTarget";
}
// A 16-bit address (which can lead to an R_AVR_16 relocation).
@@ -260,58 +266,58 @@ def LDDSTDPtrReg : Operand<i16>
//===----------------------------------------------------------------------===//
def HasSRAM : Predicate<"Subtarget->hasSRAM()">,
- AssemblerPredicate<"FeatureSRAM">;
+ AssemblerPredicate<(all_of FeatureSRAM)>;
def HasJMPCALL : Predicate<"Subtarget->hasJMPCALL()">,
- AssemblerPredicate<"FeatureJMPCALL">;
+ AssemblerPredicate<(all_of FeatureJMPCALL)>;
def HasIJMPCALL : Predicate<"Subtarget->hasIJMPCALL()">,
- AssemblerPredicate<"FeatureIJMPCALL">;
+ AssemblerPredicate<(all_of FeatureIJMPCALL)>;
def HasEIJMPCALL : Predicate<"Subtarget->hasEIJMPCALL()">,
- AssemblerPredicate<"FeatureEIJMPCALL">;
+ AssemblerPredicate<(all_of FeatureEIJMPCALL)>;
def HasADDSUBIW : Predicate<"Subtarget->hasADDSUBIW()">,
- AssemblerPredicate<"FeatureADDSUBIW">;
+ AssemblerPredicate<(all_of FeatureADDSUBIW)>;
def HasSmallStack : Predicate<"Subtarget->HasSmallStack()">,
- AssemblerPredicate<"FeatureSmallStack">;
+ AssemblerPredicate<(all_of FeatureSmallStack)>;
def HasMOVW : Predicate<"Subtarget->hasMOVW()">,
- AssemblerPredicate<"FeatureMOVW">;
+ AssemblerPredicate<(all_of FeatureMOVW)>;
def HasLPM : Predicate<"Subtarget->hasLPM()">,
- AssemblerPredicate<"FeatureLPM">;
+ AssemblerPredicate<(all_of FeatureLPM)>;
def HasLPMX : Predicate<"Subtarget->hasLPMX()">,
- AssemblerPredicate<"FeatureLPMX">;
+ AssemblerPredicate<(all_of FeatureLPMX)>;
def HasELPM : Predicate<"Subtarget->hasELPM()">,
- AssemblerPredicate<"FeatureELPM">;
+ AssemblerPredicate<(all_of FeatureELPM)>;
def HasELPMX : Predicate<"Subtarget->hasELPMX()">,
- AssemblerPredicate<"FeatureELPMX">;
+ AssemblerPredicate<(all_of FeatureELPMX)>;
def HasSPM : Predicate<"Subtarget->hasSPM()">,
- AssemblerPredicate<"FeatureSPM">;
+ AssemblerPredicate<(all_of FeatureSPM)>;
def HasSPMX : Predicate<"Subtarget->hasSPMX()">,
- AssemblerPredicate<"FeatureSPMX">;
+ AssemblerPredicate<(all_of FeatureSPMX)>;
def HasDES : Predicate<"Subtarget->hasDES()">,
- AssemblerPredicate<"FeatureDES">;
+ AssemblerPredicate<(all_of FeatureDES)>;
def SupportsRMW : Predicate<"Subtarget->supportsRMW()">,
- AssemblerPredicate<"FeatureRMW">;
+ AssemblerPredicate<(all_of FeatureRMW)>;
def SupportsMultiplication : Predicate<"Subtarget->supportsMultiplication()">,
- AssemblerPredicate<"FeatureMultiplication">;
+ AssemblerPredicate<(all_of FeatureMultiplication)>;
def HasBREAK : Predicate<"Subtarget->hasBREAK()">,
- AssemblerPredicate<"FeatureBREAK">;
+ AssemblerPredicate<(all_of FeatureBREAK)>;
def HasTinyEncoding : Predicate<"Subtarget->hasTinyEncoding()">,
- AssemblerPredicate<"FeatureTinyEncoding">;
+ AssemblerPredicate<(all_of FeatureTinyEncoding)>;
// AVR specific condition code. These correspond to AVR_*_COND in
@@ -555,7 +561,7 @@ Defs = [R1, R0, SREG] in
def MULSRdRr : FMUL2RdRr<0,
(outs),
- (ins GPR8:$lhs, GPR8:$rhs),
+ (ins LD8:$lhs, LD8:$rhs),
"muls\t$lhs, $rhs",
[]>,
Requires<[SupportsMultiplication]>;
@@ -563,28 +569,28 @@ Defs = [R1, R0, SREG] in
def MULSURdRr : FMUL2RdRr<1,
(outs),
- (ins GPR8:$lhs, GPR8:$rhs),
+ (ins LD8lo:$lhs, LD8lo:$rhs),
"mulsu\t$lhs, $rhs",
[]>,
Requires<[SupportsMultiplication]>;
def FMUL : FFMULRdRr<0b01,
(outs),
- (ins GPR8:$lhs, GPR8:$rhs),
+ (ins LD8lo:$lhs, LD8lo:$rhs),
"fmul\t$lhs, $rhs",
[]>,
Requires<[SupportsMultiplication]>;
def FMULS : FFMULRdRr<0b10,
(outs),
- (ins GPR8:$lhs, GPR8:$rhs),
+ (ins LD8lo:$lhs, LD8lo:$rhs),
"fmuls\t$lhs, $rhs",
[]>,
Requires<[SupportsMultiplication]>;
def FMULSU : FFMULRdRr<0b11,
(outs),
- (ins GPR8:$lhs, GPR8:$rhs),
+ (ins LD8lo:$lhs, LD8lo:$rhs),
"fmulsu\t$lhs, $rhs",
[]>,
Requires<[SupportsMultiplication]>;
@@ -840,7 +846,7 @@ let isCall = 1 in
//===----------------------------------------------------------------------===//
let isTerminator = 1,
isReturn = 1,
-isBarrier = 1 in
+isBarrier = 1 in
{
def RET : F16<0b1001010100001000,
(outs),
@@ -2042,8 +2048,6 @@ def : Pat<(add i16:$src1, imm:$src2),
(SUBIWRdK i16:$src1, (imm16_neg_XFORM imm:$src2))>;
def : Pat<(addc i16:$src1, imm:$src2),
(SUBIWRdK i16:$src1, (imm16_neg_XFORM imm:$src2))>;
-def : Pat<(adde i16:$src1, imm:$src2),
- (SBCIWRdK i16:$src1, (imm16_neg_XFORM imm:$src2))>;
def : Pat<(add i8:$src1, imm:$src2),
(SUBIRdK i8:$src1, (imm8_neg_XFORM imm:$src2))>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
index 5226e30491c3..5432fac122ef 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
@@ -31,6 +31,12 @@ class AVRMachineFunctionInfo : public MachineFunctionInfo {
/// used inside the function.
bool HasStackArgs;
+ /// Whether or not the function is an interrupt handler.
+ bool IsInterruptHandler;
+
+ /// Whether or not the function is an non-blocking interrupt handler.
+ bool IsSignalHandler;
+
/// Size of the callee-saved register portion of the
/// stack frame in bytes.
unsigned CalleeSavedFrameSize;
@@ -41,11 +47,17 @@ class AVRMachineFunctionInfo : public MachineFunctionInfo {
public:
AVRMachineFunctionInfo()
: HasSpills(false), HasAllocas(false), HasStackArgs(false),
+ IsInterruptHandler(false), IsSignalHandler(false),
CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {}
explicit AVRMachineFunctionInfo(MachineFunction &MF)
: HasSpills(false), HasAllocas(false), HasStackArgs(false),
- CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {}
+ CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {
+ unsigned CallConv = MF.getFunction().getCallingConv();
+
+ this->IsInterruptHandler = CallConv == CallingConv::AVR_INTR || MF.getFunction().hasFnAttribute("interrupt");
+ this->IsSignalHandler = CallConv == CallingConv::AVR_SIGNAL || MF.getFunction().hasFnAttribute("signal");
+ }
bool getHasSpills() const { return HasSpills; }
void setHasSpills(bool B) { HasSpills = B; }
@@ -56,6 +68,12 @@ public:
bool getHasStackArgs() const { return HasStackArgs; }
void setHasStackArgs(bool B) { HasStackArgs = B; }
+ /// Checks if the function is some form of interrupt service routine.
+ bool isInterruptOrSignalHandler() const { return isInterruptHandler() || isSignalHandler(); }
+
+ bool isInterruptHandler() const { return IsInterruptHandler; }
+ bool isSignalHandler() const { return IsSignalHandler; }
+
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned Bytes) { CalleeSavedFrameSize = Bytes; }
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index 8fce05c933bc..2a4905ce2461 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -22,6 +22,7 @@
#include "AVR.h"
#include "AVRInstrInfo.h"
+#include "AVRMachineFunctionInfo.h"
#include "AVRTargetMachine.h"
#include "MCTargetDesc/AVRMCTargetDesc.h"
@@ -34,19 +35,21 @@ AVRRegisterInfo::AVRRegisterInfo() : AVRGenRegisterInfo(0) {}
const uint16_t *
AVRRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- CallingConv::ID CC = MF->getFunction().getCallingConv();
+ const AVRMachineFunctionInfo *AFI = MF->getInfo<AVRMachineFunctionInfo>();
- return ((CC == CallingConv::AVR_INTR || CC == CallingConv::AVR_SIGNAL)
+ return AFI->isInterruptOrSignalHandler()
? CSR_Interrupts_SaveList
- : CSR_Normal_SaveList);
+ : CSR_Normal_SaveList;
}
const uint32_t *
AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
- return ((CC == CallingConv::AVR_INTR || CC == CallingConv::AVR_SIGNAL)
+ const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
+
+ return AFI->isInterruptOrSignalHandler()
? CSR_Interrupts_RegMask
- : CSR_Normal_RegMask);
+ : CSR_Normal_RegMask;
}
BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -95,7 +98,8 @@ AVRRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
}
/// Fold a frame offset shared between two add instructions into a single one.
-static void foldFrameOffset(MachineBasicBlock::iterator &II, int &Offset, unsigned DstReg) {
+static void foldFrameOffset(MachineBasicBlock::iterator &II, int &Offset,
+ Register DstReg) {
MachineInstr &MI = *II;
int Opcode = MI.getOpcode();
@@ -264,13 +268,12 @@ AVRRegisterInfo::getPointerRegClass(const MachineFunction &MF,
return &AVR::PTRDISPREGSRegClass;
}
-void AVRRegisterInfo::splitReg(unsigned Reg,
- unsigned &LoReg,
- unsigned &HiReg) const {
- assert(AVR::DREGSRegClass.contains(Reg) && "can only split 16-bit registers");
+void AVRRegisterInfo::splitReg(Register Reg, Register &LoReg,
+ Register &HiReg) const {
+ assert(AVR::DREGSRegClass.contains(Reg) && "can only split 16-bit registers");
- LoReg = getSubReg(Reg, AVR::sub_lo);
- HiReg = getSubReg(Reg, AVR::sub_hi);
+ LoReg = getSubReg(Reg, AVR::sub_lo);
+ HiReg = getSubReg(Reg, AVR::sub_hi);
}
bool AVRRegisterInfo::shouldCoalesce(MachineInstr *MI,
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.h
index 8e6e63af3d57..23439f2fe195 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.h
@@ -49,11 +49,7 @@ public:
/// Splits a 16-bit `DREGS` register into the lo/hi register pair.
/// \param Reg A 16-bit register to split.
- void splitReg(unsigned Reg, unsigned &LoReg, unsigned &HiReg) const;
-
- bool trackLivenessAfterRegAlloc(const MachineFunction &) const override {
- return true;
- }
+ void splitReg(Register Reg, Register &LoReg, Register &HiReg) const;
bool shouldCoalesce(MachineInstr *MI,
const TargetRegisterClass *SrcRC,
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.td
index ea38fedd22ce..ab5d02356c9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -103,6 +103,17 @@ CoveredBySubRegs = 1 in
def R5R4 : AVRReg<4, "r5:r4", [R4, R5]>, DwarfRegNum<[4]>;
def R3R2 : AVRReg<2, "r3:r2", [R2, R3]>, DwarfRegNum<[2]>;
def R1R0 : AVRReg<0, "r1:r0", [R0, R1]>, DwarfRegNum<[0]>;
+
+ // Pseudo registers for unaligned i16
+ def R26R25 : AVRReg<25, "r26:r25", [R25, R26]>, DwarfRegNum<[25]>;
+ def R24R23 : AVRReg<23, "r24:r23", [R23, R24]>, DwarfRegNum<[23]>;
+ def R22R21 : AVRReg<21, "r22:r21", [R21, R22]>, DwarfRegNum<[21]>;
+ def R20R19 : AVRReg<19, "r20:r19", [R19, R20]>, DwarfRegNum<[19]>;
+ def R18R17 : AVRReg<17, "r18:r17", [R17, R18]>, DwarfRegNum<[17]>;
+ def R16R15 : AVRReg<15, "r16:r15", [R15, R16]>, DwarfRegNum<[15]>;
+ def R14R13 : AVRReg<13, "r14:r13", [R13, R14]>, DwarfRegNum<[13]>;
+ def R12R11 : AVRReg<11, "r12:r11", [R11, R12]>, DwarfRegNum<[11]>;
+ def R10R9 : AVRReg<9, "r10:r9", [R9, R10]>, DwarfRegNum<[9]>;
}
//===----------------------------------------------------------------------===//
@@ -153,6 +164,22 @@ def DREGS : RegisterClass<"AVR", [i16], 8,
R31R30, R27R26,
// Callee saved registers.
R29R28, R17R16, R15R14, R13R12, R11R10,
+ R9R8, R7R6, R5R4, R3R2, R1R0,
+ // Pseudo regs for unaligned 16-bits
+ R26R25, R24R23, R22R21,
+ R20R19, R18R17, R16R15,
+ R14R13, R12R11, R10R9
+ )>;
+
+// 16-bit pair register class for movw
+def DREGSMOVW : RegisterClass<"AVR", [i16], 8,
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28, R17R16, R15R14, R13R12, R11R10,
R9R8, R7R6, R5R4, R3R2, R1R0
)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.cpp
index 6a41036fdd6c..195ca95bc3bd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.cpp
@@ -29,16 +29,19 @@ namespace llvm {
AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const AVRTargetMachine &TM)
- : AVRGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(),
- TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo(),
+ : AVRGenSubtargetInfo(TT, CPU, FS), ELFArch(0),
// Subtarget features
m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false),
m_hasEIJMPCALL(false), m_hasADDSUBIW(false), m_hasSmallStack(false),
- m_hasMOVW(false), m_hasLPM(false), m_hasLPMX(false), m_hasELPM(false),
+ m_hasMOVW(false), m_hasLPM(false), m_hasLPMX(false), m_hasELPM(false),
m_hasELPMX(false), m_hasSPM(false), m_hasSPMX(false), m_hasDES(false),
m_supportsRMW(false), m_supportsMultiplication(false), m_hasBREAK(false),
- m_hasTinyEncoding(false), ELFArch(false), m_FeatureSetDummy(false) {
+ m_hasTinyEncoding(false), m_hasMemMappedGPR(false),
+ m_FeatureSetDummy(false),
+
+ InstrInfo(), FrameLowering(),
+ TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo() {
// Parse features string.
ParseSubtargetFeatures(CPU, FS);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h
index da9289af7c8d..81d883eb30d9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h
@@ -71,6 +71,9 @@ public:
bool supportsMultiplication() const { return m_supportsMultiplication; }
bool hasBREAK() const { return m_hasBREAK; }
bool hasTinyEncoding() const { return m_hasTinyEncoding; }
+ bool hasMemMappedGPR() const { return m_hasMemMappedGPR; }
+
+ uint8_t getIORegisterOffset() const { return hasMemMappedGPR() ? 0x20 : 0x0; }
/// Gets the ELF architecture for the e_flags field
/// of an ELF object file.
@@ -81,10 +84,9 @@ public:
}
private:
- AVRInstrInfo InstrInfo;
- AVRFrameLowering FrameLowering;
- AVRTargetLowering TLInfo;
- AVRSelectionDAGInfo TSInfo;
+
+ /// The ELF e_flags architecture.
+ unsigned ELFArch;
// Subtarget feature settings
// See AVR.td for details.
@@ -106,13 +108,16 @@ private:
bool m_supportsMultiplication;
bool m_hasBREAK;
bool m_hasTinyEncoding;
-
- /// The ELF e_flags architecture.
- unsigned ELFArch;
+ bool m_hasMemMappedGPR;
// Dummy member, used by FeatureSet's. We cannot have a SubtargetFeature with
// no variable, so we instead bind pseudo features to this variable.
bool m_FeatureSetDummy;
+
+ AVRInstrInfo InstrInfo;
+ AVRFrameLowering FrameLowering;
+ AVRTargetLowering TLInfo;
+ AVRSelectionDAGInfo TSInfo;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index b33284b73d63..0c7136e6f77e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -49,7 +49,7 @@ AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, AVRDataLayout, TT, getCPU(CPU), FS, Options,
getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
- SubTarget(TT, getCPU(CPU), FS, *this) {
+ SubTarget(TT, std::string(getCPU(CPU)), std::string(FS), *this) {
this->TLOF = std::make_unique<AVRTargetObjectFile>();
initAsmInfo();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
index 980096a09835..14206cdb8276 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -30,7 +30,7 @@ AVRTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO,
const TargetMachine &TM) const {
// Global values in flash memory are placed in the progmem.data section
// unless they already have a user assigned section.
- if (AVR::isProgramMemoryAddress(GO) && !GO->hasSection())
+ if (AVR::isProgramMemoryAddress(GO) && !GO->hasSection() && Kind.isReadOnly())
return ProgmemDataSection;
// Otherwise, we work the same way as ELF.
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index fc34583ae573..230bc7adc07a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -53,6 +53,8 @@ class AVRAsmParser : public MCTargetAsmParser {
bool MatchingInlineAsm) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -64,7 +66,7 @@ class AVRAsmParser : public MCTargetAsmParser {
bool parseOperand(OperandVector &Operands);
int parseRegisterName(unsigned (*matchFn)(StringRef));
int parseRegisterName();
- int parseRegister();
+ int parseRegister(bool RestoreOnFailure = false);
bool tryParseRegisterOperand(OperandVector &Operands);
bool tryParseExpression(OperandVector &Operands);
bool tryParseRelocExpression(OperandVector &Operands);
@@ -176,10 +178,10 @@ public:
return isUInt<8>(Value);
}
- bool isReg() const { return Kind == k_Register; }
- bool isImm() const { return Kind == k_Immediate; }
- bool isToken() const { return Kind == k_Token; }
- bool isMem() const { return Kind == k_Memri; }
+ bool isReg() const override { return Kind == k_Register; }
+ bool isImm() const override { return Kind == k_Immediate; }
+ bool isToken() const override { return Kind == k_Token; }
+ bool isMem() const override { return Kind == k_Memri; }
bool isMemri() const { return Kind == k_Memri; }
StringRef getToken() const {
@@ -187,7 +189,7 @@ public:
return Tok;
}
- unsigned getReg() const {
+ unsigned getReg() const override {
assert((Kind == k_Register || Kind == k_Memri) && "Invalid access!");
return RegImm.Reg;
@@ -237,10 +239,10 @@ public:
RegImm = {RegNo, Imm};
}
- SMLoc getStartLoc() const { return Start; }
- SMLoc getEndLoc() const { return End; }
+ SMLoc getStartLoc() const override { return Start; }
+ SMLoc getEndLoc() const override { return End; }
- virtual void print(raw_ostream &O) const {
+ void print(raw_ostream &O) const override {
switch (Kind) {
case k_Token:
O << "Token: \"" << getToken() << "\"";
@@ -307,7 +309,7 @@ bool AVRAsmParser::missingFeature(llvm::SMLoc const &Loc,
bool AVRAsmParser::emit(MCInst &Inst, SMLoc const &Loc, MCStreamer &Out) const {
Inst.setLoc(Loc);
- Out.EmitInstruction(Inst, STI);
+ Out.emitInstruction(Inst, STI);
return false;
}
@@ -359,19 +361,25 @@ int AVRAsmParser::parseRegisterName() {
return RegNum;
}
-int AVRAsmParser::parseRegister() {
+int AVRAsmParser::parseRegister(bool RestoreOnFailure) {
int RegNum = AVR::NoRegister;
if (Parser.getTok().is(AsmToken::Identifier)) {
// Check for register pair syntax
if (Parser.getLexer().peekTok().is(AsmToken::Colon)) {
+ AsmToken HighTok = Parser.getTok();
Parser.Lex();
+ AsmToken ColonTok = Parser.getTok();
Parser.Lex(); // Eat high (odd) register and colon
if (Parser.getTok().is(AsmToken::Identifier)) {
// Convert lower (even) register to DREG
RegNum = toDREG(parseRegisterName());
}
+ if (RegNum == AVR::NoRegister && RestoreOnFailure) {
+ getLexer().UnLex(std::move(ColonTok));
+ getLexer().UnLex(std::move(HighTok));
+ }
} else {
RegNum = parseRegisterName();
}
@@ -580,12 +588,24 @@ AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
bool AVRAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
StartLoc = Parser.getTok().getLoc();
- RegNo = parseRegister();
+ RegNo = parseRegister(/*RestoreOnFailure=*/false);
EndLoc = Parser.getTok().getLoc();
return (RegNo == AVR::NoRegister);
}
+OperandMatchResultTy AVRAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ StartLoc = Parser.getTok().getLoc();
+ RegNo = parseRegister(/*RestoreOnFailure=*/true);
+ EndLoc = Parser.getTok().getLoc();
+
+ if (RegNo == AVR::NoRegister)
+ return MatchOperand_NoMatch;
+ return MatchOperand_Success;
+}
+
void AVRAsmParser::eatComma() {
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex();
@@ -650,7 +670,7 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
Tokens[0].getKind() == AsmToken::Minus &&
Tokens[1].getKind() == AsmToken::Identifier) {
MCSymbol *Symbol = getContext().getOrCreateSymbol(".text");
- AVRStreamer.EmitValueForModiferKind(Symbol, SizeInBytes, L,
+ AVRStreamer.emitValueForModiferKind(Symbol, SizeInBytes, L,
AVRMCExpr::VK_AVR_None);
return false;
}
@@ -668,7 +688,7 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
}
MCSymbol *Symbol =
getContext().getOrCreateSymbol(Parser.getTok().getString());
- AVRStreamer.EmitValueForModiferKind(Symbol, SizeInBytes, L, ModifierKind);
+ AVRStreamer.emitValueForModiferKind(Symbol, SizeInBytes, L, ModifierKind);
return false;
}
@@ -676,7 +696,7 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
const MCExpr *Value;
if (Parser.parseExpression(Value))
return true;
- Parser.getStreamer().EmitValue(Value, SizeInBytes, L);
+ Parser.getStreamer().emitValue(Value, SizeInBytes, L);
return false;
};
return (parseMany(parseOne));
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index 694aee818f7c..8e7251a74dfd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -57,23 +57,180 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRDisassembler() {
createAVRDisassembler);
}
+static const uint16_t GPRDecoderTable[] = {
+ AVR::R0, AVR::R1, AVR::R2, AVR::R3,
+ AVR::R4, AVR::R5, AVR::R6, AVR::R7,
+ AVR::R8, AVR::R9, AVR::R10, AVR::R11,
+ AVR::R12, AVR::R13, AVR::R14, AVR::R15,
+ AVR::R16, AVR::R17, AVR::R18, AVR::R19,
+ AVR::R20, AVR::R21, AVR::R22, AVR::R23,
+ AVR::R24, AVR::R25, AVR::R26, AVR::R27,
+ AVR::R28, AVR::R29, AVR::R30, AVR::R31,
+};
+
static DecodeStatus DecodeGPR8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
return MCDisassembler::Success;
}
static DecodeStatus DecodeLD8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GPRDecoderTable[RegNo+16];
+ Inst.addOperand(MCOperand::createReg(Register));
return MCDisassembler::Success;
}
static DecodeStatus DecodePTRREGSRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
+ // Note: this function must be defined but does not seem to be called.
+ assert(false && "unimplemented: PTRREGS register class");
return MCDisassembler::Success;
}
+static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
#include "AVRGenDisassemblerTables.inc"
+static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned addr = 0;
+ addr |= fieldFromInstruction(Insn, 0, 4);
+ addr |= fieldFromInstruction(Insn, 9, 2) << 4;
+ unsigned reg = fieldFromInstruction(Insn, 4, 5);
+ Inst.addOperand(MCOperand::createImm(addr));
+ if (DecodeGPR8RegisterClass(Inst, reg, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned addr = 0;
+ addr |= fieldFromInstruction(Insn, 0, 4);
+ addr |= fieldFromInstruction(Insn, 9, 2) << 4;
+ unsigned reg = fieldFromInstruction(Insn, 4, 5);
+ if (DecodeGPR8RegisterClass(Inst, reg, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(addr));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned addr = fieldFromInstruction(Insn, 3, 5);
+ unsigned b = fieldFromInstruction(Insn, 0, 3);
+ Inst.addOperand(MCOperand::createImm(addr));
+ Inst.addOperand(MCOperand::createImm(b));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Field,
+ uint64_t Address, const void *Decoder) {
+ // Call targets need to be shifted left by one so this needs a custom
+ // decoder.
+ Inst.addOperand(MCOperand::createImm(Field << 1));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned d = fieldFromInstruction(Insn, 4, 5);
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ if (decodeFRd(Inst, Insn, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(AVR::R31R30));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned d = fieldFromInstruction(Insn, 4, 3) + 16;
+ unsigned r = fieldFromInstruction(Insn, 0, 3) + 16;
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned r = fieldFromInstruction(Insn, 4, 4) * 2;
+ unsigned d = fieldFromInstruction(Insn, 0, 4) * 2;
+ if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned d = fieldFromInstruction(Insn, 4, 2) * 2 + 24; // starts at r24:r25
+ unsigned k = 0;
+ k |= fieldFromInstruction(Insn, 0, 4);
+ k |= fieldFromInstruction(Insn, 6, 2) << 4;
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(k));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned rd = fieldFromInstruction(Insn, 4, 4) + 16;
+ unsigned rr = fieldFromInstruction(Insn, 0, 4) + 16;
+ if (DecodeGPR8RegisterClass(Inst, rd, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ if (DecodeGPR8RegisterClass(Inst, rr, Address, Decoder) == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ return MCDisassembler::Success;
+}
+
static DecodeStatus readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
uint64_t &Size, uint32_t &Insn) {
if (Bytes.size() < 2) {
@@ -96,7 +253,7 @@ static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
}
Size = 4;
- Insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) | (Bytes[3] << 24);
+ Insn = (Bytes[0] << 16) | (Bytes[1] << 24) | (Bytes[2] << 0) | (Bytes[3] << 8);
return MCDisassembler::Success;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index e92b16c8ee9d..ac72abe0d9f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -34,8 +34,9 @@ namespace adjust {
using namespace llvm;
-void signed_width(unsigned Width, uint64_t Value, std::string Description,
- const MCFixup &Fixup, MCContext *Ctx = nullptr) {
+static void signed_width(unsigned Width, uint64_t Value,
+ std::string Description, const MCFixup &Fixup,
+ MCContext *Ctx = nullptr) {
if (!isIntN(Width, Value)) {
std::string Diagnostic = "out of range " + Description;
@@ -53,8 +54,9 @@ void signed_width(unsigned Width, uint64_t Value, std::string Description,
}
}
-void unsigned_width(unsigned Width, uint64_t Value, std::string Description,
- const MCFixup &Fixup, MCContext *Ctx = nullptr) {
+static void unsigned_width(unsigned Width, uint64_t Value,
+ std::string Description, const MCFixup &Fixup,
+ MCContext *Ctx = nullptr) {
if (!isUIntN(Width, Value)) {
std::string Diagnostic = "out of range " + Description;
@@ -72,8 +74,8 @@ void unsigned_width(unsigned Width, uint64_t Value, std::string Description,
}
/// Adjusts the value of a branch target before fixup application.
-void adjustBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void adjustBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
// We have one extra bit of precision because the value is rightshifted by
// one.
unsigned_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx);
@@ -83,14 +85,12 @@ void adjustBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
}
/// Adjusts the value of a relative branch target before fixup application.
-void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup,
+ uint64_t &Value, MCContext *Ctx = nullptr) {
// We have one extra bit of precision because the value is rightshifted by
// one.
signed_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx);
- Value -= 2;
-
// Rightshifts the value by one.
AVR::fixups::adjustBranchTarget(Value);
}
@@ -101,8 +101,8 @@ void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
/// 1001 kkkk 010k kkkk kkkk kkkk 111k kkkk
///
/// Offset of 0 (so the result is left shifted by 3 bits before application).
-void fixup_call(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void fixup_call(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
adjustBranch(Size, Fixup, Value, Ctx);
auto top = Value & (0xf00000 << 6); // the top four bits
@@ -117,8 +117,8 @@ void fixup_call(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
/// Resolves to:
/// 0000 00kk kkkk k000
/// Offset of 0 (so the result is left shifted by 3 bits before application).
-void fixup_7_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void fixup_7_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
adjustRelativeBranch(Size, Fixup, Value, Ctx);
// Because the value may be negative, we must mask out the sign bits
@@ -131,21 +131,33 @@ void fixup_7_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
/// Resolves to:
/// 0000 kkkk kkkk kkkk
/// Offset of 0 (so the result isn't left-shifted before application).
-void fixup_13_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void fixup_13_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
adjustRelativeBranch(Size, Fixup, Value, Ctx);
// Because the value may be negative, we must mask out the sign bits
Value &= 0xfff;
}
+/// 6-bit fixup for the immediate operand of the STD/LDD family of
+/// instructions.
+///
+/// Resolves to:
+/// 10q0 qq10 0000 1qqq
+static void fixup_6(const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
+ unsigned_width(6, Value, std::string("immediate"), Fixup, Ctx);
+
+ Value = ((Value & 0x20) << 8) | ((Value & 0x18) << 7) | (Value & 0x07);
+}
+
/// 6-bit fixup for the immediate operand of the ADIW family of
/// instructions.
///
/// Resolves to:
/// 0000 0000 kk00 kkkk
-void fixup_6_adiw(const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void fixup_6_adiw(const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
unsigned_width(6, Value, std::string("immediate"), Fixup, Ctx);
Value = ((Value & 0x30) << 2) | (Value & 0x0f);
@@ -155,8 +167,8 @@ void fixup_6_adiw(const MCFixup &Fixup, uint64_t &Value,
///
/// Resolves to:
/// 0000 0000 AAAA A000
-void fixup_port5(const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void fixup_port5(const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
unsigned_width(5, Value, std::string("port number"), Fixup, Ctx);
Value &= 0x1f;
@@ -168,8 +180,8 @@ void fixup_port5(const MCFixup &Fixup, uint64_t &Value,
///
/// Resolves to:
/// 1011 0AAd dddd AAAA
-void fixup_port6(const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void fixup_port6(const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
unsigned_width(6, Value, std::string("port number"), Fixup, Ctx);
Value = ((Value & 0x30) << 5) | (Value & 0x0f);
@@ -177,9 +189,7 @@ void fixup_port6(const MCFixup &Fixup, uint64_t &Value,
/// Adjusts a program memory address.
/// This is a simple right-shift.
-void pm(uint64_t &Value) {
- Value >>= 1;
-}
+static void pm(uint64_t &Value) { Value >>= 1; }
/// Fixups relating to the LDI instruction.
namespace ldi {
@@ -189,36 +199,36 @@ namespace ldi {
/// Resolves to:
/// 0000 KKKK 0000 KKKK
/// Offset of 0 (so the result isn't left-shifted before application).
-void fixup(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void fixup(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
uint64_t upper = Value & 0xf0;
uint64_t lower = Value & 0x0f;
Value = (upper << 4) | lower;
}
-void neg(uint64_t &Value) { Value *= -1; }
+static void neg(uint64_t &Value) { Value *= -1; }
-void lo8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void lo8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
Value &= 0xff;
ldi::fixup(Size, Fixup, Value, Ctx);
}
-void hi8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void hi8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
Value = (Value & 0xff00) >> 8;
ldi::fixup(Size, Fixup, Value, Ctx);
}
-void hh8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void hh8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
Value = (Value & 0xff0000) >> 16;
ldi::fixup(Size, Fixup, Value, Ctx);
}
-void ms8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
- MCContext *Ctx = nullptr) {
+static void ms8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+ MCContext *Ctx = nullptr) {
Value = (Value & 0xff000000) >> 24;
ldi::fixup(Size, Fixup, Value, Ctx);
}
@@ -237,17 +247,6 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup,
uint64_t Size = AVRAsmBackend::getFixupKindInfo(Fixup.getKind()).TargetSize;
unsigned Kind = Fixup.getKind();
-
- // Parsed LLVM-generated temporary labels are already
- // adjusted for instruction size, but normal labels aren't.
- //
- // To handle both cases, we simply un-adjust the temporary label
- // case so it acts like all other labels.
- if (const MCSymbolRefExpr *A = Target.getSymA()) {
- if (A->getSymbol().isTemporary())
- Value += 2;
- }
-
switch (Kind) {
default:
llvm_unreachable("unhandled fixup");
@@ -326,6 +325,9 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup,
Value &= 0xffff;
break;
+ case AVR::fixup_6:
+ adjust::fixup_6(Fixup, Value, Ctx);
+ break;
case AVR::fixup_6_adiw:
adjust::fixup_6_adiw(Fixup, Value, Ctx);
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index 1e713db38145..9e150f120dd4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -22,9 +22,6 @@
namespace llvm {
class MCAssembler;
-class MCObjectWriter;
-class Target;
-
struct MCFixupKindInfo;
/// Utilities for manipulating generated AVR machine code.
@@ -62,9 +59,6 @@ public:
return false;
}
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {}
-
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
index b3504b89e4d3..a0dd1dc8ac3e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
@@ -137,7 +137,7 @@ namespace fixups {
/// of the fact that all instructions are aligned to addresses of size
/// 2, so bit 0 of an address is always 0. This gives us another bit
/// of precision.
-/// \param[in,out] The target to adjust.
+/// \param [in,out] val The target to adjust.
template <typename T> inline void adjustBranchTarget(T &val) { val >>= 1; }
} // end of namespace fixups
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
index 832112406155..42fac5e2e000 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
@@ -78,7 +78,7 @@ void AVRInstPrinter::printInst(const MCInst *MI, uint64_t Address,
printOperand(MI, 2, O);
break;
default:
- if (!printAliasInstr(MI, O))
+ if (!printAliasInstr(MI, Address, O))
printInstruction(MI, Address, O);
printAnnotation(O, Annot);
@@ -100,8 +100,25 @@ const char *AVRInstPrinter::getPrettyRegisterName(unsigned RegNum,
void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
const MCOperandInfo &MOI = this->MII.get(MI->getOpcode()).OpInfo[OpNo];
+ if (MOI.RegClass == AVR::ZREGRegClassID) {
+ // Special case for the Z register, which sometimes doesn't have an operand
+ // in the MCInst.
+ O << "Z";
+ return;
+ }
+
+ if (OpNo >= MI->size()) {
+ // Not all operands are correctly disassembled at the moment. This means
+ // that some machine instructions won't have all the necessary operands
+ // set.
+ // To avoid asserting, print <unknown> instead until the necessary support
+ // has been implemented.
+ O << "<unknown>";
+ return;
+ }
+
+ const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
bool isPtrReg = (MOI.RegClass == AVR::PTRREGSRegClassID) ||
@@ -114,7 +131,7 @@ void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << getPrettyRegisterName(Op.getReg(), MRI);
}
} else if (Op.isImm()) {
- O << Op.getImm();
+ O << formatImm(Op.getImm());
} else {
assert(Op.isExpr() && "Unknown operand kind in printOperand");
O << *Op.getExpr();
@@ -125,6 +142,16 @@ void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
/// being encoded as a pc-relative value.
void AVRInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
+ if (OpNo >= MI->size()) {
+ // Not all operands are correctly disassembled at the moment. This means
+ // that some machine instructions won't have all the necessary operands
+ // set.
+ // To avoid asserting, print <unknown> instead until the necessary support
+ // has been implemented.
+ O << "<unknown>";
+ return;
+ }
+
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
index 247e9fc83989..910fd3455dee 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
@@ -38,13 +38,18 @@ private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPCRelImm(const MCInst *MI, uint64_t /*Address*/, unsigned OpNo,
+ raw_ostream &O) {
+ printPCRelImm(MI, OpNo, O);
+ }
void printMemri(const MCInst *MI, unsigned OpNo, raw_ostream &O);
// Autogenerated by TableGen.
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
- bool printAliasInstr(const MCInst *MI, raw_ostream &O);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &O);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
+ raw_ostream &O);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
index c25a2b232013..b11ee42bfcd6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
@@ -21,8 +21,8 @@ AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) {
CalleeSaveStackSlotSize = 2;
CommentString = ";";
PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = ".L";
UsesELFSectionDirectiveForBSS = true;
- UseIntegratedAssembler = true;
SupportsDebugInformation = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
index d9169f90a765..77b49931843b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
@@ -20,7 +20,7 @@
using namespace llvm;
-void AVRMCELFStreamer::EmitValueForModiferKind(
+void AVRMCELFStreamer::emitValueForModiferKind(
const MCSymbol *Sym, unsigned SizeInBytes, SMLoc Loc,
AVRMCExpr::VariantKind ModifierKind) {
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_AVR_NONE;
@@ -36,7 +36,7 @@ void AVRMCELFStreamer::EmitValueForModiferKind(
Kind = MCSymbolRefExpr::VK_AVR_HI8;
else if (ModifierKind == AVRMCExpr::VK_AVR_HH8)
Kind = MCSymbolRefExpr::VK_AVR_HLO8;
- MCELFStreamer::EmitValue(MCSymbolRefExpr::create(Sym, Kind, getContext()),
+ MCELFStreamer::emitValue(MCSymbolRefExpr::create(Sym, Kind, getContext()),
SizeInBytes, Loc);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h
index 37a610bc4248..1d05b8d56d93 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h
@@ -41,7 +41,7 @@ public:
std::move(Emitter)),
MCII(createAVRMCInstrInfo()) {}
- void EmitValueForModiferKind(
+ void emitValueForModiferKind(
const MCSymbol *Sym, unsigned SizeInBytes, SMLoc Loc = SMLoc(),
AVRMCExpr::VariantKind ModifierKind = AVRMCExpr::VK_AVR_None);
};
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
index 470db01ff468..ef116793d326 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
@@ -27,10 +27,7 @@ class MCObjectTargetWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class MCTargetOptions;
-class StringRef;
class Target;
-class Triple;
-class raw_pwrite_stream;
MCInstrInfo *createAVRMCInstrInfo();
diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
index 3487a2bbb864..eccd343d79ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
@@ -32,11 +32,11 @@ void AVRTargetStreamer::finish() {
OS.emitRawComment(" Declaring this symbol tells the CRT that it should");
OS.emitRawComment("copy all variables from program memory to RAM on startup");
- OS.EmitSymbolAttribute(DoCopyData, MCSA_Global);
+ OS.emitSymbolAttribute(DoCopyData, MCSA_Global);
OS.emitRawComment(" Declaring this symbol tells the CRT that it should");
OS.emitRawComment("clear the zeroed data section on startup");
- OS.EmitSymbolAttribute(DoClearBss, MCSA_Global);
+ OS.emitSymbolAttribute(DoClearBss, MCSA_Global);
}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 1f5d5025bc7b..57488bc28f98 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -39,6 +39,8 @@ class BPFAsmParser : public MCTargetAsmParser {
bool MatchingInlineAsm) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -295,7 +297,7 @@ bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
break;
case Match_Success:
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, getSTI());
+ Out.emitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
@@ -322,6 +324,14 @@ bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
bool BPFAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
+ if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ return Error(StartLoc, "invalid register name");
+ return false;
+}
+
+OperandMatchResultTy BPFAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
@@ -330,10 +340,10 @@ bool BPFAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
if (!MatchRegisterName(Name)) {
getParser().Lex(); // Eat identifier token.
- return false;
+ return MatchOperand_Success;
}
- return Error(StartLoc, "invalid register name");
+ return MatchOperand_NoMatch;
}
OperandMatchResultTy
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h
index 6e4f35f4c5d7..4a46b11e5e08 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPF.h
@@ -16,6 +16,7 @@ namespace llvm {
class BPFTargetMachine;
ModulePass *createBPFAbstractMemberAccess(BPFTargetMachine *TM);
+ModulePass *createBPFPreserveDIType();
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMISimplifyPatchablePass();
@@ -25,6 +26,7 @@ FunctionPass *createBPFMIPreEmitPeepholePass();
FunctionPass *createBPFMIPreEmitCheckingPass();
void initializeBPFAbstractMemberAccessPass(PassRegistry&);
+void initializeBPFPreserveDITypePass(PassRegistry&);
void initializeBPFMISimplifyPatchablePass(PassRegistry&);
void initializeBPFMIPeepholePass(PassRegistry&);
void initializeBPFMIPeepholeTruncElimPass(PassRegistry&);
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index a28816cc87b7..16708c4d1ce6 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -92,7 +92,7 @@
#define DEBUG_TYPE "bpf-abstract-member-access"
namespace llvm {
-const std::string BPFCoreSharedInfo::AmaAttr = "btf_ama";
+constexpr StringRef BPFCoreSharedInfo::AmaAttr;
} // namespace llvm
using namespace llvm;
@@ -117,7 +117,7 @@ public:
struct CallInfo {
uint32_t Kind;
uint32_t AccessIndex;
- uint32_t RecordAlignment;
+ Align RecordAlignment;
MDNode *Metadata;
Value *Base;
};
@@ -157,11 +157,11 @@ private:
void replaceWithGEP(std::vector<CallInst *> &CallList,
uint32_t NumOfZerosIndex, uint32_t DIIndex);
bool HasPreserveFieldInfoCall(CallInfoStack &CallStack);
- void GetStorageBitRange(DIDerivedType *MemberTy, uint32_t RecordAlignment,
+ void GetStorageBitRange(DIDerivedType *MemberTy, Align RecordAlignment,
uint32_t &StartBitOffset, uint32_t &EndBitOffset);
uint32_t GetFieldInfo(uint32_t InfoKind, DICompositeType *CTy,
uint32_t AccessIndex, uint32_t PatchImm,
- uint32_t RecordAlignment);
+ Align RecordAlignment);
Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo,
std::string &AccessKey, MDNode *&BaseMeta);
@@ -189,18 +189,20 @@ bool BPFAbstractMemberAccess::runOnModule(Module &M) {
return doTransformation(M);
}
-static bool SkipDIDerivedTag(unsigned Tag) {
+static bool SkipDIDerivedTag(unsigned Tag, bool skipTypedef) {
if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type &&
Tag != dwarf::DW_TAG_volatile_type &&
Tag != dwarf::DW_TAG_restrict_type &&
Tag != dwarf::DW_TAG_member)
- return false;
+ return false;
+ if (Tag == dwarf::DW_TAG_typedef && !skipTypedef)
+ return false;
return true;
}
-static DIType * stripQualifiers(DIType *Ty) {
+static DIType * stripQualifiers(DIType *Ty, bool skipTypedef = true) {
while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
- if (!SkipDIDerivedTag(DTy->getTag()))
+ if (!SkipDIDerivedTag(DTy->getTag(), skipTypedef))
break;
Ty = DTy->getBaseType();
}
@@ -209,7 +211,7 @@ static DIType * stripQualifiers(DIType *Ty) {
static const DIType * stripQualifiers(const DIType *Ty) {
while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
- if (!SkipDIDerivedTag(DTy->getTag()))
+ if (!SkipDIDerivedTag(DTy->getTag(), true))
break;
Ty = DTy->getBaseType();
}
@@ -237,7 +239,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
if (!Call)
return false;
- const auto *GV = dyn_cast<GlobalValue>(Call->getCalledValue());
+ const auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
if (!GV)
return false;
if (GV->getName().startswith("llvm.preserve.array.access.index")) {
@@ -248,7 +250,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
CInfo.AccessIndex = getConstant(Call->getArgOperand(2));
CInfo.Base = Call->getArgOperand(0);
CInfo.RecordAlignment =
- DL->getABITypeAlignment(CInfo.Base->getType()->getPointerElementType());
+ DL->getABITypeAlign(CInfo.Base->getType()->getPointerElementType());
return true;
}
if (GV->getName().startswith("llvm.preserve.union.access.index")) {
@@ -259,7 +261,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
CInfo.AccessIndex = getConstant(Call->getArgOperand(1));
CInfo.Base = Call->getArgOperand(0);
CInfo.RecordAlignment =
- DL->getABITypeAlignment(CInfo.Base->getType()->getPointerElementType());
+ DL->getABITypeAlign(CInfo.Base->getType()->getPointerElementType());
return true;
}
if (GV->getName().startswith("llvm.preserve.struct.access.index")) {
@@ -270,7 +272,7 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
CInfo.AccessIndex = getConstant(Call->getArgOperand(2));
CInfo.Base = Call->getArgOperand(0);
CInfo.RecordAlignment =
- DL->getABITypeAlignment(CInfo.Base->getType()->getPointerElementType());
+ DL->getABITypeAlign(CInfo.Base->getType()->getPointerElementType());
return true;
}
if (GV->getName().startswith("llvm.bpf.preserve.field.info")) {
@@ -520,12 +522,12 @@ uint64_t BPFAbstractMemberAccess::getConstant(const Value *IndexValue) {
/// Get the start and the end of storage offset for \p MemberTy.
void BPFAbstractMemberAccess::GetStorageBitRange(DIDerivedType *MemberTy,
- uint32_t RecordAlignment,
+ Align RecordAlignment,
uint32_t &StartBitOffset,
uint32_t &EndBitOffset) {
uint32_t MemberBitSize = MemberTy->getSizeInBits();
uint32_t MemberBitOffset = MemberTy->getOffsetInBits();
- uint32_t AlignBits = RecordAlignment * 8;
+ uint32_t AlignBits = RecordAlignment.value() * 8;
if (RecordAlignment > 8 || MemberBitSize > AlignBits)
report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, "
"requiring too big alignment");
@@ -541,7 +543,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
DICompositeType *CTy,
uint32_t AccessIndex,
uint32_t PatchImm,
- uint32_t RecordAlignment) {
+ Align RecordAlignment) {
if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE)
return 1;
@@ -710,7 +712,7 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
// calculated here as all debuginfo types are available.
// Get type name and calculate the first index.
- // We only want to get type name from structure or union.
+ // We only want to get type name from typedef, structure or union.
// If user wants a relocation like
// int *p; ... __builtin_preserve_access_index(&p[4]) ...
// or
@@ -727,12 +729,15 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
if (!Base)
Base = CInfo.Base;
- DIType *Ty = stripQualifiers(cast<DIType>(CInfo.Metadata));
+ DIType *PossibleTypeDef = stripQualifiers(cast<DIType>(CInfo.Metadata),
+ false);
+ DIType *Ty = stripQualifiers(PossibleTypeDef);
if (CInfo.Kind == BPFPreserveUnionAI ||
CInfo.Kind == BPFPreserveStructAI) {
- // struct or union type
- TypeName = Ty->getName();
- TypeMeta = Ty;
+ // struct or union type. If the typedef is in the metadata, always
+ // use the typedef.
+ TypeName = std::string(PossibleTypeDef->getName());
+ TypeMeta = PossibleTypeDef;
PatchImm += FirstIndex * (Ty->getSizeInBits() >> 3);
break;
}
@@ -782,7 +787,7 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
unsigned CTag = CTy->getTag();
if (CTag == dwarf::DW_TAG_structure_type || CTag == dwarf::DW_TAG_union_type) {
- TypeName = CTy->getName();
+ TypeName = std::string(CTy->getName());
} else {
if (HasPreserveFieldInfoCall(CallStack))
report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic");
@@ -803,8 +808,10 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
CInfo = StackElem.second;
CallStack.pop();
- if (CInfo.Kind == BPFPreserveFieldInfoAI)
+ if (CInfo.Kind == BPFPreserveFieldInfoAI) {
+ InfoKind = CInfo.AccessIndex;
break;
+ }
// If the next Call (the top of the stack) is a BPFPreserveFieldInfoAI,
// the action will be extracting field info.
@@ -822,11 +829,10 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
AccessKey += ":" + std::to_string(AccessIndex);
MDNode *MDN = CInfo.Metadata;
- uint32_t RecordAlignment = CInfo.RecordAlignment;
// At this stage, it cannot be pointer type.
auto *CTy = cast<DICompositeType>(stripQualifiers(cast<DIType>(MDN)));
PatchImm = GetFieldInfo(InfoKind, CTy, AccessIndex, PatchImm,
- RecordAlignment);
+ CInfo.RecordAlignment);
}
// Access key is the
@@ -873,8 +879,8 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
if (CInfo.Kind == BPFPreserveFieldInfoAI) {
// Load the global variable which represents the returned field info.
- auto *LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV);
- BB->getInstList().insert(Call->getIterator(), LDInst);
+ auto *LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV, "",
+ Call);
Call->replaceAllUsesWith(LDInst);
Call->eraseFromParent();
return true;
@@ -891,8 +897,7 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
// The original Call inst is removed.
// Load the global variable.
- auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV);
- BB->getInstList().insert(Call->getIterator(), LDInst);
+ auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV, "", Call);
// Generate a BitCast
auto *BCInst = new BitCastInst(Base, Type::getInt8PtrTy(BB->getContext()));
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index b81386f479d3..37950e105bdc 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -48,7 +48,7 @@ public:
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
const char *ExtraCode, raw_ostream &O) override;
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
private:
BTFDebug *BTF;
@@ -137,7 +137,7 @@ bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void BPFAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
if (!BTF || !BTF->InstLower(MI, TmpInst)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h
index ed4778353e52..af6425b16fa0 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFCORE.h
@@ -9,22 +9,36 @@
#ifndef LLVM_LIB_TARGET_BPF_BPFCORE_H
#define LLVM_LIB_TARGET_BPF_BPFCORE_H
+#include "llvm/ADT/StringRef.h"
+
namespace llvm {
class BPFCoreSharedInfo {
public:
- enum OffsetRelocKind : uint32_t {
+ enum PatchableRelocKind : uint32_t {
FIELD_BYTE_OFFSET = 0,
FIELD_BYTE_SIZE,
FIELD_EXISTENCE,
FIELD_SIGNEDNESS,
FIELD_LSHIFT_U64,
FIELD_RSHIFT_U64,
+ BTF_TYPE_ID_LOCAL,
+ BTF_TYPE_ID_REMOTE,
MAX_FIELD_RELOC_KIND,
};
+
+ enum BTFTypeIdFlag : uint32_t {
+ BTF_TYPE_ID_LOCAL_RELOC = 0,
+ BTF_TYPE_ID_REMOTE_RELOC,
+
+ MAX_BTF_TYPE_ID_FLAG,
+ };
+
/// The attribute attached to globals representing a field access
- static const std::string AmaAttr;
+ static constexpr StringRef AmaAttr = "btf_ama";
+ /// The attribute attached to globals representing a type id
+ static constexpr StringRef TypeIdAttr = "btf_type_id";
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 56e0288f26c9..a02556a39909 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -171,6 +171,38 @@ bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) cons
return false;
}
+bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ return NumBits1 > NumBits2;
+}
+
+bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ return NumBits1 > NumBits2;
+}
+
+bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
+ if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ return NumBits1 == 32 && NumBits2 == 64;
+}
+
+bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+ if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ return NumBits1 == 32 && NumBits2 == 64;
+}
+
std::pair<unsigned, const TargetRegisterClass *>
BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
@@ -195,6 +227,8 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerGlobalAddress(Op, DAG);
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ report_fatal_error("Unsupported dynamic stack allocation");
default:
llvm_unreachable("unimplemented operand");
}
@@ -570,6 +604,12 @@ BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
DebugLoc DL = MI.getDebugLoc();
MachineRegisterInfo &RegInfo = F->getRegInfo();
+
+ if (!isSigned) {
+ Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
+ return PromotedReg0;
+ }
Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
Register PromotedReg1 = RegInfo.createVirtualRegister(RC);
Register PromotedReg2 = RegInfo.createVirtualRegister(RC);
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.h
index cbae4675cb14..cc752dda87b0 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -99,10 +99,9 @@ private:
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
- EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+ EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override {
- return Size >= 8 ? MVT::i64 : MVT::i32;
+ return Op.size() >= 8 ? MVT::i64 : MVT::i32;
}
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
@@ -123,6 +122,16 @@ private:
return false;
}
+ // isTruncateFree - Return true if it's free to truncate a value of
+ // type Ty1 to type Ty2. e.g. On BPF at alu32 mode, it's free to truncate
+ // a i64 value in register R1 to i32 by referencing its sub-register W1.
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+ bool isTruncateFree(EVT VT1, EVT VT2) const override;
+
+ // For 32bit ALU result zext to 64bit is free.
+ bool isZExtFree(Type *Ty1, Type *Ty2) const override;
+ bool isZExtFree(EVT VT1, EVT VT2) const override;
+
unsigned EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, unsigned Reg,
bool isSigned) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.cpp
index 626c204e99e6..54360a89782b 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@@ -123,7 +123,7 @@ bool BPFInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
void BPFInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned SrcReg, bool IsKill, int FI,
+ Register SrcReg, bool IsKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
@@ -146,7 +146,7 @@ void BPFInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void BPFInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
+ Register DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.h
index 22413d530e17..e797363ead8f 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.h
@@ -36,13 +36,13 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ MachineBasicBlock::iterator MBBI, Register SrcReg,
bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, unsigned DestReg,
+ MachineBasicBlock::iterator MBBI, Register DestReg,
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
index 0f39294daa2b..4298e2eaec04 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -526,7 +526,7 @@ class NOP_I<string OpcodeStr>
let BPFClass = BPF_ALU64;
}
-let hasSideEffects = 0 in
+let hasSideEffects = 0, isCodeGenOnly = 1 in
def NOP : NOP_I<"nop">;
class RET<string OpcodeStr>
@@ -732,8 +732,7 @@ let isCodeGenOnly = 1 in {
def : Pat<(i64 (sext GPR32:$src)),
(SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>;
-def : Pat<(i64 (zext GPR32:$src)),
- (SRL_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>;
+def : Pat<(i64 (zext GPR32:$src)), (MOV_32_64 GPR32:$src)>;
// For i64 -> i32 truncation, use the 32-bit subregister directly.
def : Pat<(i32 (trunc GPR:$src)),
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMCInstLower.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMCInstLower.h
index 0622d20814d3..4bd0f1f0bf1c 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMCInstLower.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMCInstLower.h
@@ -18,9 +18,7 @@ class MCInst;
class MCOperand;
class MCSymbol;
class MachineInstr;
-class MachineModuleInfoMachO;
class MachineOperand;
-class Mangler;
// BPFMCInstLower - This class is used to lower an MachineInstr into an MCInst.
class LLVM_LIBRARY_VISIBILITY BPFMCInstLower {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index 022267fbe3c2..df870314fffe 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include <set>
using namespace llvm;
@@ -56,6 +57,7 @@ private:
bool isPhiFrom32Def(MachineInstr *MovMI);
bool isMovFrom32Def(MachineInstr *MovMI);
bool eliminateZExtSeq(void);
+ bool eliminateZExt(void);
std::set<MachineInstr *> PhiInsns;
@@ -68,7 +70,12 @@ public:
initialize(MF);
- return eliminateZExtSeq();
+ // First try to eliminate (zext, lshift, rshift) and then
+ // try to eliminate zext.
+ bool ZExtSeqExist, ZExtExist;
+ ZExtSeqExist = eliminateZExtSeq();
+ ZExtExist = eliminateZExt();
+ return ZExtSeqExist || ZExtExist;
}
};
@@ -233,6 +240,51 @@ bool BPFMIPeephole::eliminateZExtSeq(void) {
return Eliminated;
}
+bool BPFMIPeephole::eliminateZExt(void) {
+ MachineInstr* ToErase = nullptr;
+ bool Eliminated = false;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ // If the previous instruction was marked for elimination, remove it now.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ if (MI.getOpcode() != BPF::MOV_32_64)
+ continue;
+
+ // Eliminate MOV_32_64 if possible.
+ // MOV_32_64 rA, wB
+ //
+ // If wB has been zero extended, replace it with a SUBREG_TO_REG.
+ // This is to workaround BPF programs where pkt->{data, data_end}
+ // is encoded as u32, but actually the verifier populates them
+ // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
+ LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
+ LLVM_DEBUG(MI.dump());
+
+ if (!isMovFrom32Def(&MI))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");
+
+ Register dst = MI.getOperand(0).getReg();
+ Register src = MI.getOperand(1).getReg();
+
+ // Build a SUBREG_TO_REG instruction.
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst)
+ .addImm(0).addReg(src).addImm(BPF::sub_32);
+
+ ToErase = &MI;
+ Eliminated = true;
+ }
+ }
+
+ return Eliminated;
+}
+
} // end default namespace
INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
@@ -300,19 +352,16 @@ bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) {
//
// MOV rA, rA
//
- // This is particularly possible to happen when sub-register support
- // enabled. The special type cast insn MOV_32_64 involves different
- // register class on src (i32) and dst (i64), RA could generate useless
- // instruction due to this.
+ // Note that we cannot remove
+ // MOV_32_64 rA, wA
+ // MOV_rr_32 wA, wA
+ // as these two instructions having side effects, zeroing out
+ // top 32 bits of rA.
unsigned Opcode = MI.getOpcode();
- if (Opcode == BPF::MOV_32_64 ||
- Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) {
+ if (Opcode == BPF::MOV_rr) {
Register dst = MI.getOperand(0).getReg();
Register src = MI.getOperand(1).getReg();
- if (Opcode == BPF::MOV_32_64)
- dst = TRI->getSubReg(dst, BPF::sub_32);
-
if (dst != src)
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index b2ecb531db9d..ae1f5ea21c12 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -22,6 +22,9 @@
// r1 = <calculated field_info>
// add r3, struct_base_reg, r1
//
+// This pass also removes the intermediate load generated in IR pass for
+// __builtin_btf_type_id() intrinsic.
+//
//===----------------------------------------------------------------------===//
#include "BPF.h"
@@ -55,10 +58,10 @@ private:
bool removeLD(void);
void processCandidate(MachineRegisterInfo *MRI, MachineBasicBlock &MBB,
MachineInstr &MI, Register &SrcReg, Register &DstReg,
- const GlobalValue *GVal);
+ const GlobalValue *GVal, bool IsAma);
void processDstReg(MachineRegisterInfo *MRI, Register &DstReg,
Register &SrcReg, const GlobalValue *GVal,
- bool doSrcRegProp);
+ bool doSrcRegProp, bool IsAma);
void processInst(MachineRegisterInfo *MRI, MachineInstr *Inst,
MachineOperand *RelocOp, const GlobalValue *GVal);
void checkADDrr(MachineRegisterInfo *MRI, MachineOperand *RelocOp,
@@ -155,25 +158,27 @@ void BPFMISimplifyPatchable::checkShift(MachineRegisterInfo *MRI,
void BPFMISimplifyPatchable::processCandidate(MachineRegisterInfo *MRI,
MachineBasicBlock &MBB, MachineInstr &MI, Register &SrcReg,
- Register &DstReg, const GlobalValue *GVal) {
+ Register &DstReg, const GlobalValue *GVal, bool IsAma) {
if (MRI->getRegClass(DstReg) == &BPF::GPR32RegClass) {
- // We can optimize such a pattern:
- // %1:gpr = LD_imm64 @"llvm.s:0:4$0:2"
- // %2:gpr32 = LDW32 %1:gpr, 0
- // %3:gpr = SUBREG_TO_REG 0, %2:gpr32, %subreg.sub_32
- // %4:gpr = ADD_rr %0:gpr, %3:gpr
- // or similar patterns below for non-alu32 case.
- auto Begin = MRI->use_begin(DstReg), End = MRI->use_end();
- decltype(End) NextI;
- for (auto I = Begin; I != End; I = NextI) {
- NextI = std::next(I);
- if (!MRI->getUniqueVRegDef(I->getReg()))
- continue;
-
- unsigned Opcode = I->getParent()->getOpcode();
- if (Opcode == BPF::SUBREG_TO_REG) {
- Register TmpReg = I->getParent()->getOperand(0).getReg();
- processDstReg(MRI, TmpReg, DstReg, GVal, false);
+ if (IsAma) {
+ // We can optimize such a pattern:
+ // %1:gpr = LD_imm64 @"llvm.s:0:4$0:2"
+ // %2:gpr32 = LDW32 %1:gpr, 0
+ // %3:gpr = SUBREG_TO_REG 0, %2:gpr32, %subreg.sub_32
+ // %4:gpr = ADD_rr %0:gpr, %3:gpr
+ // or similar patterns below for non-alu32 case.
+ auto Begin = MRI->use_begin(DstReg), End = MRI->use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ if (!MRI->getUniqueVRegDef(I->getReg()))
+ continue;
+
+ unsigned Opcode = I->getParent()->getOpcode();
+ if (Opcode == BPF::SUBREG_TO_REG) {
+ Register TmpReg = I->getParent()->getOperand(0).getReg();
+ processDstReg(MRI, TmpReg, DstReg, GVal, false, IsAma);
+ }
}
}
@@ -183,12 +188,12 @@ void BPFMISimplifyPatchable::processCandidate(MachineRegisterInfo *MRI,
}
// All uses of DstReg replaced by SrcReg
- processDstReg(MRI, DstReg, SrcReg, GVal, true);
+ processDstReg(MRI, DstReg, SrcReg, GVal, true, IsAma);
}
void BPFMISimplifyPatchable::processDstReg(MachineRegisterInfo *MRI,
Register &DstReg, Register &SrcReg, const GlobalValue *GVal,
- bool doSrcRegProp) {
+ bool doSrcRegProp, bool IsAma) {
auto Begin = MRI->use_begin(DstReg), End = MRI->use_end();
decltype(End) NextI;
for (auto I = Begin; I != End; I = NextI) {
@@ -197,7 +202,7 @@ void BPFMISimplifyPatchable::processDstReg(MachineRegisterInfo *MRI,
I->setReg(SrcReg);
// The candidate needs to have a unique definition.
- if (MRI->getUniqueVRegDef(I->getReg()))
+ if (IsAma && MRI->getUniqueVRegDef(I->getReg()))
processInst(MRI, I->getParent(), &*I, GVal);
}
}
@@ -269,28 +274,26 @@ bool BPFMISimplifyPatchable::removeLD() {
if (!DefInst)
continue;
- bool IsCandidate = false;
- const GlobalValue *GVal = nullptr;
- if (DefInst->getOpcode() == BPF::LD_imm64) {
- const MachineOperand &MO = DefInst->getOperand(1);
- if (MO.isGlobal()) {
- GVal = MO.getGlobal();
- auto *GVar = dyn_cast<GlobalVariable>(GVal);
- if (GVar) {
- // Global variables representing structure offset or
- // patchable extern globals.
- if (GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
- assert(MI.getOperand(2).getImm() == 0);
- IsCandidate = true;
- }
- }
- }
- }
+ if (DefInst->getOpcode() != BPF::LD_imm64)
+ continue;
+
+ const MachineOperand &MO = DefInst->getOperand(1);
+ if (!MO.isGlobal())
+ continue;
+
+ const GlobalValue *GVal = MO.getGlobal();
+ auto *GVar = dyn_cast<GlobalVariable>(GVal);
+ if (!GVar)
+ continue;
- if (!IsCandidate)
+ // Global variables representing structure offset or type id.
+ bool IsAma = false;
+ if (GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr))
+ IsAma = true;
+ else if (!GVar->hasAttribute(BPFCoreSharedInfo::TypeIdAttr))
continue;
- processCandidate(MRI, MBB, MI, SrcReg, DstReg, GVal);
+ processCandidate(MRI, MBB, MI, SrcReg, DstReg, GVal, IsAma);
ToErase = &MI;
Changed = true;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
new file mode 100644
index 000000000000..c3cb7647aa79
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
@@ -0,0 +1,131 @@
+//===----------- BPFPreserveDIType.cpp - Preserve DebugInfo Types ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Preserve Debuginfo types encoded in __builtin_btf_type_id() metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFCORE.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "bpf-preserve-di-type"
+
+namespace llvm {
+constexpr StringRef BPFCoreSharedInfo::TypeIdAttr;
+} // namespace llvm
+
+using namespace llvm;
+
+namespace {
+
+class BPFPreserveDIType final : public ModulePass {
+ StringRef getPassName() const override {
+ return "BPF Preserve DebugInfo Type";
+ }
+
+ bool runOnModule(Module &M) override;
+
+public:
+ static char ID;
+ BPFPreserveDIType() : ModulePass(ID) {}
+
+private:
+ bool doTransformation(Module &M);
+};
+} // End anonymous namespace
+
+char BPFPreserveDIType::ID = 0;
+INITIALIZE_PASS(BPFPreserveDIType, DEBUG_TYPE, "preserve debuginfo type", false,
+ false)
+
+ModulePass *llvm::createBPFPreserveDIType() { return new BPFPreserveDIType(); }
+
+bool BPFPreserveDIType::runOnModule(Module &M) {
+ LLVM_DEBUG(dbgs() << "********** preserve debuginfo type **********\n");
+
+ // Bail out if no debug info.
+ if (M.debug_compile_units().empty())
+ return false;
+
+ return doTransformation(M);
+}
+
+bool BPFPreserveDIType::doTransformation(Module &M) {
+ std::vector<CallInst *> PreserveDITypeCalls;
+
+ for (auto &F : M) {
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (!Call)
+ continue;
+
+ const auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
+ if (!GV)
+ continue;
+
+ if (GV->getName().startswith("llvm.bpf.btf.type.id")) {
+ if (!Call->getMetadata(LLVMContext::MD_preserve_access_index))
+ report_fatal_error(
+ "Missing metadata for llvm.bpf.btf.type.id intrinsic");
+ PreserveDITypeCalls.push_back(Call);
+ }
+ }
+ }
+ }
+
+ if (PreserveDITypeCalls.empty())
+ return false;
+
+ std::string BaseName = "llvm.btf_type_id.";
+ int Count = 0;
+ for (auto Call : PreserveDITypeCalls) {
+ const ConstantInt *Flag = dyn_cast<ConstantInt>(Call->getArgOperand(2));
+ assert(Flag);
+ uint64_t FlagValue = Flag->getValue().getZExtValue();
+
+ if (FlagValue >= BPFCoreSharedInfo::MAX_BTF_TYPE_ID_FLAG)
+ report_fatal_error("Incorrect flag for llvm.bpf.btf.type.id intrinsic");
+
+ uint32_t Reloc;
+ if (FlagValue == BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL_RELOC)
+ Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL;
+ else
+ Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE;
+
+ BasicBlock *BB = Call->getParent();
+ IntegerType *VarType = Type::getInt32Ty(BB->getContext());
+ std::string GVName = BaseName + std::to_string(Count) + "$" +
+ std::to_string(Reloc);
+ GlobalVariable *GV =
+ new GlobalVariable(M, VarType, false, GlobalVariable::ExternalLinkage,
+ NULL, GVName);
+ GV->addAttribute(BPFCoreSharedInfo::TypeIdAttr);
+ MDNode *MD = Call->getMetadata(LLVMContext::MD_preserve_access_index);
+ GV->setMetadata(LLVMContext::MD_preserve_access_index, MD);
+
+ // Load the global variable which represents the type info.
+ auto *LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV, "",
+ Call);
+ Call->replaceAllUsesWith(LDInst);
+ Call->eraseFromParent();
+ Count++;
+ }
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp
index a711294048ba..4c36c0edcef6 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
// Requires the copy size to be a constant.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -27,7 +27,7 @@ SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy(
return SDValue();
unsigned CopyLen = ConstantSize->getZExtValue();
- unsigned StoresNumEstimate = alignTo(CopyLen, Align) >> Log2_32(Align);
+ unsigned StoresNumEstimate = alignTo(CopyLen, Alignment) >> Log2(Alignment);
// Impose the same copy length limit as MaxStoresPerMemcpy.
if (StoresNumEstimate > getCommonMaxStoresPerMemFunc())
return SDValue();
@@ -36,7 +36,7 @@ SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy(
Dst = DAG.getNode(BPFISD::MEMCPY, dl, VTs, Chain, Dst, Src,
DAG.getConstant(CopyLen, dl, MVT::i64),
- DAG.getConstant(Align, dl, MVT::i64));
+ DAG.getConstant(Alignment.value(), dl, MVT::i64));
return Dst.getValue(0);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h
index fb88c32ceb0c..79f05e57bb5c 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h
@@ -21,8 +21,8 @@ class BPFSelectionDAGInfo : public SelectionDAGTargetInfo {
public:
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
- bool AlwaysInline,
+ SDValue Size, Align Alignment,
+ bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 40375bc88bff..54204ee197ec 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -35,6 +35,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeBPFAbstractMemberAccessPass(PR);
+ initializeBPFPreserveDITypePass(PR);
initializeBPFMIPeepholePass(PR);
initializeBPFMIPeepholeTruncElimPass(PR);
}
@@ -42,9 +43,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() {
// DataLayout: little or big endian
static std::string computeDataLayout(const Triple &TT) {
if (TT.getArch() == Triple::bpfeb)
- return "E-m:e-p:64:64-i64:64-n32:64-S128";
+ return "E-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
else
- return "e-m:e-p:64:64-i64:64-n32:64-S128";
+ return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
@@ -63,7 +64,7 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(std::make_unique<TargetLoweringObjectFileELF>()),
- Subtarget(TT, CPU, FS, *this) {
+ Subtarget(TT, std::string(CPU), std::string(FS), *this) {
initAsmInfo();
BPFMCAsmInfo *MAI =
@@ -96,6 +97,7 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
void BPFPassConfig::addIRPasses() {
addPass(createBPFAbstractMemberAccess(&getBPFTargetMachine()));
+ addPass(createBPFPreserveDIType());
TargetPassConfig::addIRPasses();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
index 6daeb3b4b63b..4510e9357489 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -34,10 +34,10 @@ static const char *BTFKindStr[] = {
void BTFTypeBase::emitType(MCStreamer &OS) {
OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) +
")");
- OS.EmitIntValue(BTFType.NameOff, 4);
+ OS.emitInt32(BTFType.NameOff);
OS.AddComment("0x" + Twine::utohexstr(BTFType.Info));
- OS.EmitIntValue(BTFType.Info, 4);
- OS.EmitIntValue(BTFType.Size, 4);
+ OS.emitInt32(BTFType.Info);
+ OS.emitInt32(BTFType.Size);
}
BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag,
@@ -148,7 +148,7 @@ void BTFTypeInt::completeType(BTFDebug &BDebug) {
void BTFTypeInt::emitType(MCStreamer &OS) {
BTFTypeBase::emitType(OS);
OS.AddComment("0x" + Twine::utohexstr(IntVal));
- OS.EmitIntValue(IntVal, 4);
+ OS.emitInt32(IntVal);
}
BTFTypeEnum::BTFTypeEnum(const DICompositeType *ETy, uint32_t VLen) : ETy(ETy) {
@@ -171,7 +171,12 @@ void BTFTypeEnum::completeType(BTFDebug &BDebug) {
struct BTF::BTFEnum BTFEnum;
BTFEnum.NameOff = BDebug.addString(Enum->getName());
// BTF enum value is 32bit, enforce it.
- BTFEnum.Val = static_cast<uint32_t>(Enum->getValue());
+ uint32_t Value;
+ if (Enum->isUnsigned())
+ Value = static_cast<uint32_t>(Enum->getValue().getZExtValue());
+ else
+ Value = static_cast<uint32_t>(Enum->getValue().getSExtValue());
+ BTFEnum.Val = Value;
EnumValues.push_back(BTFEnum);
}
}
@@ -179,8 +184,8 @@ void BTFTypeEnum::completeType(BTFDebug &BDebug) {
void BTFTypeEnum::emitType(MCStreamer &OS) {
BTFTypeBase::emitType(OS);
for (const auto &Enum : EnumValues) {
- OS.EmitIntValue(Enum.NameOff, 4);
- OS.EmitIntValue(Enum.Val, 4);
+ OS.emitInt32(Enum.NameOff);
+ OS.emitInt32(Enum.Val);
}
}
@@ -209,9 +214,9 @@ void BTFTypeArray::completeType(BTFDebug &BDebug) {
void BTFTypeArray::emitType(MCStreamer &OS) {
BTFTypeBase::emitType(OS);
- OS.EmitIntValue(ArrayInfo.ElemType, 4);
- OS.EmitIntValue(ArrayInfo.IndexType, 4);
- OS.EmitIntValue(ArrayInfo.Nelems, 4);
+ OS.emitInt32(ArrayInfo.ElemType);
+ OS.emitInt32(ArrayInfo.IndexType);
+ OS.emitInt32(ArrayInfo.Nelems);
}
/// Represent either a struct or a union.
@@ -252,14 +257,14 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) {
void BTFTypeStruct::emitType(MCStreamer &OS) {
BTFTypeBase::emitType(OS);
for (const auto &Member : Members) {
- OS.EmitIntValue(Member.NameOff, 4);
- OS.EmitIntValue(Member.Type, 4);
+ OS.emitInt32(Member.NameOff);
+ OS.emitInt32(Member.Type);
OS.AddComment("0x" + Twine::utohexstr(Member.Offset));
- OS.EmitIntValue(Member.Offset, 4);
+ OS.emitInt32(Member.Offset);
}
}
-std::string BTFTypeStruct::getName() { return STy->getName(); }
+std::string BTFTypeStruct::getName() { return std::string(STy->getName()); }
/// The Func kind represents both subprogram and pointee of function
/// pointers. If the FuncName is empty, it represents a pointee of function
@@ -303,8 +308,8 @@ void BTFTypeFuncProto::completeType(BTFDebug &BDebug) {
void BTFTypeFuncProto::emitType(MCStreamer &OS) {
BTFTypeBase::emitType(OS);
for (const auto &Param : Parameters) {
- OS.EmitIntValue(Param.NameOff, 4);
- OS.EmitIntValue(Param.Type, 4);
+ OS.emitInt32(Param.NameOff);
+ OS.emitInt32(Param.Type);
}
}
@@ -340,7 +345,7 @@ void BTFKindVar::completeType(BTFDebug &BDebug) {
void BTFKindVar::emitType(MCStreamer &OS) {
BTFTypeBase::emitType(OS);
- OS.EmitIntValue(Info, 4);
+ OS.emitInt32(Info);
}
BTFKindDataSec::BTFKindDataSec(AsmPrinter *AsmPrt, std::string SecName)
@@ -359,9 +364,9 @@ void BTFKindDataSec::emitType(MCStreamer &OS) {
BTFTypeBase::emitType(OS);
for (const auto &V : Vars) {
- OS.EmitIntValue(std::get<0>(V), 4);
- Asm->EmitLabelReference(std::get<1>(V), 4);
- OS.EmitIntValue(std::get<2>(V), 4);
+ OS.emitInt32(std::get<0>(V));
+ Asm->emitLabelReference(std::get<1>(V), 4);
+ OS.emitInt32(std::get<2>(V));
}
}
@@ -374,7 +379,7 @@ uint32_t BTFStringTable::addString(StringRef S) {
// Not find, add to the string table.
uint32_t Offset = Size;
OffsetToIdMap[Offset] = Table.size();
- Table.push_back(S);
+ Table.push_back(std::string(S));
Size += S.size() + 1;
return Offset;
}
@@ -563,7 +568,7 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
auto CTag = CTy->getTag();
if ((CTag == dwarf::DW_TAG_structure_type ||
CTag == dwarf::DW_TAG_union_type) &&
- !CTy->isForwardDecl()) {
+ !CTy->getName().empty() && !CTy->isForwardDecl()) {
/// Find a candidate, generate a fixup. Later on the struct/union
/// pointee type will be replaced with either a real type or
/// a forward declaration.
@@ -659,7 +664,17 @@ void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) {
return;
}
- // MapDef type is a struct type
+ // MapDef type may be a struct type or a non-pointer derived type
+ const DIType *OrigTy = Ty;
+ while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ auto Tag = DTy->getTag();
+ if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type &&
+ Tag != dwarf::DW_TAG_volatile_type &&
+ Tag != dwarf::DW_TAG_restrict_type)
+ break;
+ Ty = DTy->getBaseType();
+ }
+
const auto *CTy = dyn_cast<DICompositeType>(Ty);
if (!CTy)
return;
@@ -668,27 +683,15 @@ void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) {
if (Tag != dwarf::DW_TAG_structure_type || CTy->isForwardDecl())
return;
- // Record this type
+ // Visit all struct members to ensure pointee type is visited
const DINodeArray Elements = CTy->getElements();
- bool HasBitField = false;
- for (const auto *Element : Elements) {
- auto E = cast<DIDerivedType>(Element);
- if (E->isBitField()) {
- HasBitField = true;
- break;
- }
- }
-
- auto TypeEntry =
- std::make_unique<BTFTypeStruct>(CTy, true, HasBitField, Elements.size());
- StructTypes.push_back(TypeEntry.get());
- TypeId = addType(std::move(TypeEntry), CTy);
-
- // Visit all struct members
for (const auto *Element : Elements) {
const auto *MemberType = cast<DIDerivedType>(Element);
visitTypeEntry(MemberType->getBaseType());
}
+
+ // Visit this type, struct or a const/typedef/volatile/restrict type
+ visitTypeEntry(OrigTy, TypeId, false, false);
}
/// Read file contents from the actual file or from the source
@@ -699,7 +702,7 @@ std::string BTFDebug::populateFileContent(const DISubprogram *SP) {
if (!File->getFilename().startswith("/") && File->getDirectory().size())
FileName = File->getDirectory().str() + "/" + File->getFilename().str();
else
- FileName = File->getFilename();
+ FileName = std::string(File->getFilename());
// No need to populate the contends if it has been populated!
if (FileContent.find(FileName) != FileContent.end())
@@ -718,7 +721,7 @@ std::string BTFDebug::populateFileContent(const DISubprogram *SP) {
Buf = std::move(*BufOrErr);
if (Buf)
for (line_iterator I(*Buf, false), E; I != E; ++I)
- Content.push_back(*I);
+ Content.push_back(std::string(*I));
FileContent[FileName] = Content;
return FileName;
@@ -743,9 +746,9 @@ void BTFDebug::constructLineInfo(const DISubprogram *SP, MCSymbol *Label,
void BTFDebug::emitCommonHeader() {
OS.AddComment("0x" + Twine::utohexstr(BTF::MAGIC));
- OS.EmitIntValue(BTF::MAGIC, 2);
- OS.EmitIntValue(BTF::VERSION, 1);
- OS.EmitIntValue(0, 1);
+ OS.emitIntValue(BTF::MAGIC, 2);
+ OS.emitInt8(BTF::VERSION);
+ OS.emitInt8(0);
}
void BTFDebug::emitBTFSection() {
@@ -758,17 +761,17 @@ void BTFDebug::emitBTFSection() {
// Emit header.
emitCommonHeader();
- OS.EmitIntValue(BTF::HeaderSize, 4);
+ OS.emitInt32(BTF::HeaderSize);
uint32_t TypeLen = 0, StrLen;
for (const auto &TypeEntry : TypeEntries)
TypeLen += TypeEntry->getSize();
StrLen = StringTable.getSize();
- OS.EmitIntValue(0, 4);
- OS.EmitIntValue(TypeLen, 4);
- OS.EmitIntValue(TypeLen, 4);
- OS.EmitIntValue(StrLen, 4);
+ OS.emitInt32(0);
+ OS.emitInt32(TypeLen);
+ OS.emitInt32(TypeLen);
+ OS.emitInt32(StrLen);
// Emit type table.
for (const auto &TypeEntry : TypeEntries)
@@ -778,8 +781,8 @@ void BTFDebug::emitBTFSection() {
uint32_t StringOffset = 0;
for (const auto &S : StringTable.getTable()) {
OS.AddComment("string offset=" + std::to_string(StringOffset));
- OS.EmitBytes(S);
- OS.EmitBytes(StringRef("\0", 1));
+ OS.emitBytes(S);
+ OS.emitBytes(StringRef("\0", 1));
StringOffset += S.size() + 1;
}
}
@@ -796,7 +799,7 @@ void BTFDebug::emitBTFExtSection() {
// Emit header.
emitCommonHeader();
- OS.EmitIntValue(BTF::ExtHeaderSize, 4);
+ OS.emitInt32(BTF::ExtHeaderSize);
// Account for FuncInfo/LineInfo record size as well.
uint32_t FuncLen = 4, LineLen = 4;
@@ -818,59 +821,59 @@ void BTFDebug::emitBTFExtSection() {
if (FieldRelocLen)
FieldRelocLen += 4;
- OS.EmitIntValue(0, 4);
- OS.EmitIntValue(FuncLen, 4);
- OS.EmitIntValue(FuncLen, 4);
- OS.EmitIntValue(LineLen, 4);
- OS.EmitIntValue(FuncLen + LineLen, 4);
- OS.EmitIntValue(FieldRelocLen, 4);
+ OS.emitInt32(0);
+ OS.emitInt32(FuncLen);
+ OS.emitInt32(FuncLen);
+ OS.emitInt32(LineLen);
+ OS.emitInt32(FuncLen + LineLen);
+ OS.emitInt32(FieldRelocLen);
// Emit func_info table.
OS.AddComment("FuncInfo");
- OS.EmitIntValue(BTF::BPFFuncInfoSize, 4);
+ OS.emitInt32(BTF::BPFFuncInfoSize);
for (const auto &FuncSec : FuncInfoTable) {
OS.AddComment("FuncInfo section string offset=" +
std::to_string(FuncSec.first));
- OS.EmitIntValue(FuncSec.first, 4);
- OS.EmitIntValue(FuncSec.second.size(), 4);
+ OS.emitInt32(FuncSec.first);
+ OS.emitInt32(FuncSec.second.size());
for (const auto &FuncInfo : FuncSec.second) {
- Asm->EmitLabelReference(FuncInfo.Label, 4);
- OS.EmitIntValue(FuncInfo.TypeId, 4);
+ Asm->emitLabelReference(FuncInfo.Label, 4);
+ OS.emitInt32(FuncInfo.TypeId);
}
}
// Emit line_info table.
OS.AddComment("LineInfo");
- OS.EmitIntValue(BTF::BPFLineInfoSize, 4);
+ OS.emitInt32(BTF::BPFLineInfoSize);
for (const auto &LineSec : LineInfoTable) {
OS.AddComment("LineInfo section string offset=" +
std::to_string(LineSec.first));
- OS.EmitIntValue(LineSec.first, 4);
- OS.EmitIntValue(LineSec.second.size(), 4);
+ OS.emitInt32(LineSec.first);
+ OS.emitInt32(LineSec.second.size());
for (const auto &LineInfo : LineSec.second) {
- Asm->EmitLabelReference(LineInfo.Label, 4);
- OS.EmitIntValue(LineInfo.FileNameOff, 4);
- OS.EmitIntValue(LineInfo.LineOff, 4);
+ Asm->emitLabelReference(LineInfo.Label, 4);
+ OS.emitInt32(LineInfo.FileNameOff);
+ OS.emitInt32(LineInfo.LineOff);
OS.AddComment("Line " + std::to_string(LineInfo.LineNum) + " Col " +
std::to_string(LineInfo.ColumnNum));
- OS.EmitIntValue(LineInfo.LineNum << 10 | LineInfo.ColumnNum, 4);
+ OS.emitInt32(LineInfo.LineNum << 10 | LineInfo.ColumnNum);
}
}
// Emit field reloc table.
if (FieldRelocLen) {
OS.AddComment("FieldReloc");
- OS.EmitIntValue(BTF::BPFFieldRelocSize, 4);
+ OS.emitInt32(BTF::BPFFieldRelocSize);
for (const auto &FieldRelocSec : FieldRelocTable) {
OS.AddComment("Field reloc section string offset=" +
std::to_string(FieldRelocSec.first));
- OS.EmitIntValue(FieldRelocSec.first, 4);
- OS.EmitIntValue(FieldRelocSec.second.size(), 4);
+ OS.emitInt32(FieldRelocSec.first);
+ OS.emitInt32(FieldRelocSec.second.size());
for (const auto &FieldRelocInfo : FieldRelocSec.second) {
- Asm->EmitLabelReference(FieldRelocInfo.Label, 4);
- OS.EmitIntValue(FieldRelocInfo.TypeID, 4);
- OS.EmitIntValue(FieldRelocInfo.OffsetNameOff, 4);
- OS.EmitIntValue(FieldRelocInfo.RelocKind, 4);
+ Asm->emitLabelReference(FieldRelocInfo.Label, 4);
+ OS.emitInt32(FieldRelocInfo.TypeID);
+ OS.emitInt32(FieldRelocInfo.OffsetNameOff);
+ OS.emitInt32(FieldRelocInfo.RelocKind);
}
}
}
@@ -947,7 +950,7 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
MCSection &Section = FuncLabel->getSection();
const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
assert(SectionELF && "Null section for Function Label");
- SecNameOff = addString(SectionELF->getSectionName());
+ SecNameOff = addString(SectionELF->getName());
} else {
SecNameOff = addString(".text");
}
@@ -960,9 +963,9 @@ void BTFDebug::endFunctionImpl(const MachineFunction *MF) {
SecNameOff = 0;
}
-/// On-demand populate struct types as requested from abstract member
-/// accessing.
-unsigned BTFDebug::populateStructType(const DIType *Ty) {
+/// On-demand populate types as requested from abstract member
+/// accessing or preserve debuginfo type.
+unsigned BTFDebug::populateType(const DIType *Ty) {
unsigned Id;
visitTypeEntry(Ty, Id, false, false);
for (const auto &TypeEntry : TypeEntries)
@@ -971,24 +974,32 @@ unsigned BTFDebug::populateStructType(const DIType *Ty) {
}
/// Generate a struct member field relocation.
-void BTFDebug::generateFieldReloc(const MCSymbol *ORSym, DIType *RootTy,
- StringRef AccessPattern) {
- unsigned RootId = populateStructType(RootTy);
- size_t FirstDollar = AccessPattern.find_first_of('$');
- size_t FirstColon = AccessPattern.find_first_of(':');
- size_t SecondColon = AccessPattern.find_first_of(':', FirstColon + 1);
- StringRef IndexPattern = AccessPattern.substr(FirstDollar + 1);
- StringRef RelocKindStr = AccessPattern.substr(FirstColon + 1,
- SecondColon - FirstColon);
- StringRef PatchImmStr = AccessPattern.substr(SecondColon + 1,
- FirstDollar - SecondColon);
-
+void BTFDebug::generatePatchImmReloc(const MCSymbol *ORSym, uint32_t RootId,
+ const GlobalVariable *GVar, bool IsAma) {
BTFFieldReloc FieldReloc;
FieldReloc.Label = ORSym;
- FieldReloc.OffsetNameOff = addString(IndexPattern);
FieldReloc.TypeID = RootId;
- FieldReloc.RelocKind = std::stoull(RelocKindStr);
- PatchImms[AccessPattern.str()] = std::stoul(PatchImmStr);
+
+ StringRef AccessPattern = GVar->getName();
+ size_t FirstDollar = AccessPattern.find_first_of('$');
+ if (IsAma) {
+ size_t FirstColon = AccessPattern.find_first_of(':');
+ size_t SecondColon = AccessPattern.find_first_of(':', FirstColon + 1);
+ StringRef IndexPattern = AccessPattern.substr(FirstDollar + 1);
+ StringRef RelocKindStr = AccessPattern.substr(FirstColon + 1,
+ SecondColon - FirstColon);
+ StringRef PatchImmStr = AccessPattern.substr(SecondColon + 1,
+ FirstDollar - SecondColon);
+
+ FieldReloc.OffsetNameOff = addString(IndexPattern);
+ FieldReloc.RelocKind = std::stoull(std::string(RelocKindStr));
+ PatchImms[GVar] = std::stoul(std::string(PatchImmStr));
+ } else {
+ StringRef RelocStr = AccessPattern.substr(FirstDollar + 1);
+ FieldReloc.OffsetNameOff = addString("0");
+ FieldReloc.RelocKind = std::stoull(std::string(RelocStr));
+ PatchImms[GVar] = RootId;
+ }
FieldRelocTable[SecNameOff].push_back(FieldReloc);
}
@@ -997,14 +1008,20 @@ void BTFDebug::processReloc(const MachineOperand &MO) {
if (MO.isGlobal()) {
const GlobalValue *GVal = MO.getGlobal();
auto *GVar = dyn_cast<GlobalVariable>(GVal);
- if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
- MCSymbol *ORSym = OS.getContext().createTempSymbol();
- OS.EmitLabel(ORSym);
+ if (!GVar)
+ return;
- MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
- DIType *Ty = dyn_cast<DIType>(MDN);
- generateFieldReloc(ORSym, Ty, GVar->getName());
- }
+ if (!GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr) &&
+ !GVar->hasAttribute(BPFCoreSharedInfo::TypeIdAttr))
+ return;
+
+ MCSymbol *ORSym = OS.getContext().createTempSymbol();
+ OS.emitLabel(ORSym);
+
+ MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
+ uint32_t RootId = populateType(dyn_cast<DIType>(MDN));
+ generatePatchImmReloc(ORSym, RootId, GVar,
+ GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr));
}
}
@@ -1040,6 +1057,9 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) {
// Later, the insn is replaced with "r2 = <offset>"
// where "<offset>" equals to the offset based on current
// type definitions.
+ //
+ // If the insn is "r2 = LD_imm64 @<an TypeIdAttr global>",
+ // The LD_imm64 result will be replaced with a btf type id.
processReloc(MI->getOperand(1));
} else if (MI->getOpcode() == BPF::CORE_MEM ||
MI->getOpcode() == BPF::CORE_ALU32_MEM ||
@@ -1072,7 +1092,7 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) {
// Create a temporary label to remember the insn for lineinfo.
MCSymbol *LineSym = OS.getContext().createTempSymbol();
- OS.EmitLabel(LineSym);
+ OS.emitLabel(LineSym);
// Construct the lineinfo.
auto SP = DL.get()->getScope()->getSubprogram();
@@ -1151,15 +1171,17 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
assert(!SecName.empty());
// Find or create a DataSec
- if (DataSecEntries.find(SecName) == DataSecEntries.end()) {
- DataSecEntries[SecName] = std::make_unique<BTFKindDataSec>(Asm, SecName);
+ if (DataSecEntries.find(std::string(SecName)) == DataSecEntries.end()) {
+ DataSecEntries[std::string(SecName)] =
+ std::make_unique<BTFKindDataSec>(Asm, std::string(SecName));
}
// Calculate symbol size
const DataLayout &DL = Global.getParent()->getDataLayout();
uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType());
- DataSecEntries[SecName]->addVar(VarId, Asm->getSymbol(&Global), Size);
+ DataSecEntries[std::string(SecName)]->addVar(VarId, Asm->getSymbol(&Global),
+ Size);
}
}
@@ -1170,9 +1192,15 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
if (MO.isGlobal()) {
const GlobalValue *GVal = MO.getGlobal();
auto *GVar = dyn_cast<GlobalVariable>(GVal);
- if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
- // Emit "mov ri, <imm>" for patched immediate.
- uint32_t Imm = PatchImms[GVar->getName().str()];
+ if (GVar) {
+ // Emit "mov ri, <imm>"
+ uint32_t Imm;
+ if (GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr) ||
+ GVar->hasAttribute(BPFCoreSharedInfo::TypeIdAttr))
+ Imm = PatchImms[GVar];
+ else
+ return false;
+
OutMI.setOpcode(BPF::MOV_ri);
OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
OutMI.addOperand(MCOperand::createImm(Imm));
@@ -1187,7 +1215,7 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
const GlobalValue *GVal = MO.getGlobal();
auto *GVar = dyn_cast<GlobalVariable>(GVal);
if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
- uint32_t Imm = PatchImms[GVar->getName().str()];
+ uint32_t Imm = PatchImms[GVar];
OutMI.setOpcode(MI->getOperand(1).getImm());
if (MI->getOperand(0).isImm())
OutMI.addOperand(MCOperand::createImm(MI->getOperand(0).getImm()));
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h
index 0812c4f7915d..2f39f665299a 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/DebugHandlerBase.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include <set>
#include <unordered_map>
#include "BTF.h"
@@ -25,6 +26,7 @@ namespace llvm {
class AsmPrinter;
class BTFDebug;
class DIType;
+class GlobalVariable;
class MCStreamer;
class MCSymbol;
class MachineFunction;
@@ -61,8 +63,8 @@ class BTFTypeDerived : public BTFTypeBase {
public:
BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag, bool NeedsFixup);
- void completeType(BTFDebug &BDebug);
- void emitType(MCStreamer &OS);
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
void setPointeeType(uint32_t PointeeType);
};
@@ -72,8 +74,8 @@ class BTFTypeFwd : public BTFTypeBase {
public:
BTFTypeFwd(StringRef Name, bool IsUnion);
- void completeType(BTFDebug &BDebug);
- void emitType(MCStreamer &OS);
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
};
/// Handle int type.
@@ -84,9 +86,9 @@ class BTFTypeInt : public BTFTypeBase {
public:
BTFTypeInt(uint32_t Encoding, uint32_t SizeInBits, uint32_t OffsetInBits,
StringRef TypeName);
- uint32_t getSize() { return BTFTypeBase::getSize() + sizeof(uint32_t); }
- void completeType(BTFDebug &BDebug);
- void emitType(MCStreamer &OS);
+ uint32_t getSize() override { return BTFTypeBase::getSize() + sizeof(uint32_t); }
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
};
/// Handle enumerate type.
@@ -96,11 +98,11 @@ class BTFTypeEnum : public BTFTypeBase {
public:
BTFTypeEnum(const DICompositeType *ETy, uint32_t NumValues);
- uint32_t getSize() {
+ uint32_t getSize() override {
return BTFTypeBase::getSize() + EnumValues.size() * BTF::BTFEnumSize;
}
- void completeType(BTFDebug &BDebug);
- void emitType(MCStreamer &OS);
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
};
/// Handle array type.
@@ -109,9 +111,9 @@ class BTFTypeArray : public BTFTypeBase {
public:
BTFTypeArray(uint32_t ElemTypeId, uint32_t NumElems);
- uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
- void completeType(BTFDebug &BDebug);
- void emitType(MCStreamer &OS);
+ uint32_t getSize() override { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
};
/// Handle struct/union type.
@@ -123,11 +125,11 @@ class BTFTypeStruct : public BTFTypeBase {
public:
BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField,
uint32_t NumMembers);
- uint32_t getSize() {
+ uint32_t getSize() override {
return BTFTypeBase::getSize() + Members.size() * BTF::BTFMemberSize;
}
- void completeType(BTFDebug &BDebug);
- void emitType(MCStreamer &OS);
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
std::string getName();
};
@@ -140,11 +142,11 @@ class BTFTypeFuncProto : public BTFTypeBase {
public:
BTFTypeFuncProto(const DISubroutineType *STy, uint32_t NumParams,
const std::unordered_map<uint32_t, StringRef> &FuncArgNames);
- uint32_t getSize() {
+ uint32_t getSize() override {
return BTFTypeBase::getSize() + Parameters.size() * BTF::BTFParamSize;
}
- void completeType(BTFDebug &BDebug);
- void emitType(MCStreamer &OS);
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
};
/// Handle subprogram
@@ -153,9 +155,9 @@ class BTFTypeFunc : public BTFTypeBase {
public:
BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId, uint32_t Scope);
- uint32_t getSize() { return BTFTypeBase::getSize(); }
- void completeType(BTFDebug &BDebug);
- void emitType(MCStreamer &OS);
+ uint32_t getSize() override { return BTFTypeBase::getSize(); }
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
};
/// Handle variable instances
@@ -165,9 +167,9 @@ class BTFKindVar : public BTFTypeBase {
public:
BTFKindVar(StringRef VarName, uint32_t TypeId, uint32_t VarInfo);
- uint32_t getSize() { return BTFTypeBase::getSize() + 4; }
- void completeType(BTFDebug &BDebug);
- void emitType(MCStreamer &OS);
+ uint32_t getSize() override { return BTFTypeBase::getSize() + 4; }
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
};
/// Handle data sections
@@ -178,15 +180,15 @@ class BTFKindDataSec : public BTFTypeBase {
public:
BTFKindDataSec(AsmPrinter *AsmPrt, std::string SecName);
- uint32_t getSize() {
+ uint32_t getSize() override {
return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size();
}
void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
Vars.push_back(std::make_tuple(Id, Sym, Size));
}
std::string getName() { return Name; }
- void completeType(BTFDebug &BDebug);
- void emitType(MCStreamer &OS);
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
};
/// String table.
@@ -249,7 +251,7 @@ class BTFDebug : public DebugHandlerBase {
StringMap<std::vector<std::string>> FileContent;
std::map<std::string, std::unique_ptr<BTFKindDataSec>> DataSecEntries;
std::vector<BTFTypeStruct *> StructTypes;
- std::map<std::string, uint32_t> PatchImms;
+ std::map<const GlobalVariable *, uint32_t> PatchImms;
std::map<StringRef, std::pair<bool, std::vector<BTFTypeDerived *>>>
FixupDerivedTypes;
std::set<const Function *>ProtoFunctions;
@@ -299,11 +301,11 @@ class BTFDebug : public DebugHandlerBase {
void processFuncPrototypes(const Function *);
/// Generate one field relocation record.
- void generateFieldReloc(const MCSymbol *ORSym, DIType *RootTy,
- StringRef AccessPattern);
+ void generatePatchImmReloc(const MCSymbol *ORSym, uint32_t RootId,
+ const GlobalVariable *, bool IsAma);
- /// Populating unprocessed struct type.
- unsigned populateStructType(const DIType *Ty);
+ /// Populating unprocessed type on demand.
+ unsigned populateType(const DIType *Ty);
/// Process relocation instructions.
void processReloc(const MachineOperand &MO);
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 75f963b5448a..4d98dc7341d0 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -126,6 +126,9 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus decodeMemoryOpValue(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned Register = (Insn >> 16) & 0xf;
+ if (Register > 11)
+ return MCDisassembler::Fail;
+
Inst.addOperand(MCOperand::createReg(GPRDecoderTable[Register]));
unsigned Offset = (Insn & 0xffff);
Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Offset)));
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index ba35a175b9a7..9d829ac45a10 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -48,9 +48,6 @@ public:
return false;
}
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {}
-
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 97f0cbd58608..3292c3e5ebb5 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -17,7 +17,6 @@
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
-class Target;
class BPFMCAsmInfo : public MCAsmInfo {
public:
@@ -42,6 +41,8 @@ public:
// section will be parsable, but with odd offsets and
// line numbers, etc.
CodePointerSize = 8;
+
+ UseIntegratedAssembler = false;
}
void setDwarfUsesRelocationsAcrossSections(bool enable) {
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 1a391321f60d..a426a132cf47 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -27,11 +27,7 @@ class MCObjectTargetWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class MCTargetOptions;
-class StringRef;
class Target;
-class Triple;
-class raw_ostream;
-class raw_pwrite_stream;
MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index cee1954e369b..1e7862c36ea0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -116,6 +116,8 @@ class HexagonAsmParser : public MCTargetAsmParser {
bool ParseDirectiveFalign(unsigned Size, SMLoc L);
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseDirectiveSubsection(SMLoc L);
bool ParseDirectiveComm(bool IsLocal, SMLoc L);
bool RegisterMatchesArch(unsigned MatchNum) const;
@@ -312,6 +314,8 @@ public:
bool iss30_2Imm() const { return true; }
bool iss29_3Imm() const { return true; }
bool iss27_2Imm() const { return CheckImmRange(27, 2, true, true, false); }
+ bool iss10_0Imm() const { return CheckImmRange(10, 0, true, false, false); }
+ bool iss10_6Imm() const { return CheckImmRange(10, 6, true, false, false); }
bool iss9_0Imm() const { return CheckImmRange(9, 0, true, false, false); }
bool iss8_0Imm() const { return CheckImmRange(8, 0, true, false, false); }
bool iss8_0Imm64() const { return CheckImmRange(8, 0, true, true, false); }
@@ -467,13 +471,16 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {
LLVM_DEBUG(dbgs() << "--\n");
MCB.setLoc(IDLoc);
+
// Check the bundle for errors.
const MCRegisterInfo *RI = getContext().getRegisterInfo();
- HexagonMCChecker Check(getContext(), MII, getSTI(), MCB, *RI);
+ MCSubtargetInfo const &STI = getSTI();
+
+ MCInst OrigBundle = MCB;
+ HexagonMCChecker Check(getContext(), MII, STI, MCB, *RI, true);
- bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MII, getSTI(),
- getContext(), MCB,
- &Check);
+ bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(
+ MII, STI, getContext(), MCB, &Check, true);
if (CheckOk) {
if (HexagonMCInstrInfo::bundleSize(MCB) == 0) {
@@ -482,15 +489,12 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {
// Empty packets are valid yet aren't emitted
return false;
}
- Out.EmitInstruction(MCB, getSTI());
- } else {
- // If compounding and duplexing didn't reduce the size below
- // 4 or less we have a packet that is too big.
- if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) {
- Error(IDLoc, "invalid instruction packet: out of slots");
- }
+
+ assert(HexagonMCInstrInfo::isBundle(MCB));
+
+ Out.emitInstruction(MCB, STI);
+ } else
return true; // Error
- }
return false; // No error
}
@@ -518,6 +522,8 @@ bool HexagonAsmParser::matchBundleOptions() {
HexagonMCInstrInfo::setMemReorderDisabled(MCB);
else
return getParser().Error(IDLoc, MemNoShuffMsg);
+ } else if (Option.compare_lower("mem_no_order") == 0) {
+ // Nothing.
} else
return getParser().Error(IDLoc, llvm::Twine("'") + Option +
"' is not a valid bundle option");
@@ -578,6 +584,7 @@ bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc,
case Match_MnemonicFail:
return Error(IDLoc, "unrecognized instruction");
case Match_InvalidOperand:
+ LLVM_FALLTHROUGH;
case Match_InvalidTiedOperand:
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0U) {
@@ -937,8 +944,8 @@ bool HexagonAsmParser::isLabel(AsmToken &Token) {
assert(Second.is(AsmToken::Colon));
StringRef Raw(String.data(), Third.getString().data() - String.data() +
Third.getString().size());
- std::string Collapsed = Raw;
- Collapsed.erase(llvm::remove_if(Collapsed, isspace), Collapsed.end());
+ std::string Collapsed = std::string(Raw);
+ Collapsed.erase(llvm::remove_if(Collapsed, isSpace), Collapsed.end());
StringRef Whole = Collapsed;
std::pair<StringRef, StringRef> DotSplit = Whole.split('.');
if (!matchRegister(DotSplit.first.lower()))
@@ -959,6 +966,12 @@ bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious,
bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
+ return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
+}
+
+OperandMatchResultTy HexagonAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
MCAsmLexer &Lexer = getLexer();
StartLoc = getLexer().getLoc();
SmallVector<AsmToken, 5> Lookahead;
@@ -983,8 +996,8 @@ bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
Again = (Contigious && Type) || (Workaround && Type);
NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type));
}
- std::string Collapsed = RawString;
- Collapsed.erase(llvm::remove_if(Collapsed, isspace), Collapsed.end());
+ std::string Collapsed = std::string(RawString);
+ Collapsed.erase(llvm::remove_if(Collapsed, isSpace), Collapsed.end());
StringRef FullString = Collapsed;
std::pair<StringRef, StringRef> DotSplit = FullString.split('.');
unsigned DotReg = matchRegister(DotSplit.first.lower());
@@ -993,8 +1006,8 @@ bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
RegNo = DotReg;
EndLoc = Lexer.getLoc();
if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
- return true;
- return false;
+ return MatchOperand_NoMatch;
+ return MatchOperand_Success;
} else {
RegNo = DotReg;
size_t First = RawString.find('.');
@@ -1002,28 +1015,26 @@ bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
Lexer.UnLex(AsmToken(AsmToken::Identifier, DotString));
EndLoc = Lexer.getLoc();
if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
- return true;
- return false;
+ return MatchOperand_NoMatch;
+ return MatchOperand_Success;
}
}
std::pair<StringRef, StringRef> ColonSplit = StringRef(FullString).split(':');
unsigned ColonReg = matchRegister(ColonSplit.first.lower());
if (ColonReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
do {
- Lexer.UnLex(Lookahead.back());
- Lookahead.pop_back();
- } while (!Lookahead.empty () && !Lexer.is(AsmToken::Colon));
+ Lexer.UnLex(Lookahead.pop_back_val());
+ } while (!Lookahead.empty() && !Lexer.is(AsmToken::Colon));
RegNo = ColonReg;
EndLoc = Lexer.getLoc();
if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
- return true;
- return false;
+ return MatchOperand_NoMatch;
+ return MatchOperand_Success;
}
while (!Lookahead.empty()) {
- Lexer.UnLex(Lookahead.back());
- Lookahead.pop_back();
+ Lexer.UnLex(Lookahead.pop_back_val());
}
- return true;
+ return MatchOperand_NoMatch;
}
bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) {
@@ -1283,9 +1294,28 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
SMLoc IDLoc) {
MCContext &Context = getParser().getContext();
const MCRegisterInfo *RI = getContext().getRegisterInfo();
- std::string r = "r";
- std::string v = "v";
- std::string Colon = ":";
+ const std::string r = "r";
+ const std::string v = "v";
+ const std::string Colon = ":";
+ using RegPairVals = std::pair<unsigned, unsigned>;
+ auto GetRegPair = [this, r](RegPairVals RegPair) {
+ const std::string R1 = r + utostr(RegPair.first);
+ const std::string R2 = r + utostr(RegPair.second);
+
+ return std::make_pair(matchRegister(R1), matchRegister(R2));
+ };
+ auto GetScalarRegs = [RI, GetRegPair](unsigned RegPair) {
+ const unsigned Lower = RI->getEncodingValue(RegPair);
+ const RegPairVals RegPair_ = std::make_pair(Lower + 1, Lower);
+
+ return GetRegPair(RegPair_);
+ };
+ auto GetVecRegs = [GetRegPair](unsigned VecRegPair) {
+ const RegPairVals RegPair =
+ HexagonMCInstrInfo::GetVecRegPairIndices(VecRegPair);
+
+ return GetRegPair(RegPair);
+ };
bool is32bit = false; // used to distinguish between CONST32 and CONST64
switch (Inst.getOpcode()) {
@@ -1377,14 +1407,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
// Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
case Hexagon::A2_tfrp: {
MCOperand &MO = Inst.getOperand(1);
- unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
- std::string R1 = r + utostr(RegPairNum + 1);
- StringRef Reg1(R1);
- MO.setReg(matchRegister(Reg1));
- // Add a new operand for the second register in the pair.
- std::string R2 = r + utostr(RegPairNum);
- StringRef Reg2(R2);
- Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
+ const std::pair<unsigned, unsigned> RegPair = GetScalarRegs(MO.getReg());
+ MO.setReg(RegPair.first);
+ Inst.addOperand(MCOperand::createReg(RegPair.second));
Inst.setOpcode(Hexagon::A2_combinew);
break;
}
@@ -1392,14 +1417,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
case Hexagon::A2_tfrpt:
case Hexagon::A2_tfrpf: {
MCOperand &MO = Inst.getOperand(2);
- unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
- std::string R1 = r + utostr(RegPairNum + 1);
- StringRef Reg1(R1);
- MO.setReg(matchRegister(Reg1));
- // Add a new operand for the second register in the pair.
- std::string R2 = r + utostr(RegPairNum);
- StringRef Reg2(R2);
- Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
+ const std::pair<unsigned, unsigned> RegPair = GetScalarRegs(MO.getReg());
+ MO.setReg(RegPair.first);
+ Inst.addOperand(MCOperand::createReg(RegPair.second));
Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
? Hexagon::C2_ccombinewt
: Hexagon::C2_ccombinewf);
@@ -1408,14 +1428,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
case Hexagon::A2_tfrptnew:
case Hexagon::A2_tfrpfnew: {
MCOperand &MO = Inst.getOperand(2);
- unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
- std::string R1 = r + utostr(RegPairNum + 1);
- StringRef Reg1(R1);
- MO.setReg(matchRegister(Reg1));
- // Add a new operand for the second register in the pair.
- std::string R2 = r + utostr(RegPairNum);
- StringRef Reg2(R2);
- Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
+ const std::pair<unsigned, unsigned> RegPair = GetScalarRegs(MO.getReg());
+ MO.setReg(RegPair.first);
+ Inst.addOperand(MCOperand::createReg(RegPair.second));
Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
? Hexagon::C2_ccombinewnewt
: Hexagon::C2_ccombinewnewf);
@@ -1425,12 +1440,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
// Translate a "$Vdd = $Vss" to "$Vdd = vcombine($Vs, $Vt)"
case Hexagon::V6_vassignp: {
MCOperand &MO = Inst.getOperand(1);
- unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
- std::string R1 = v + utostr(RegPairNum + 1);
- MO.setReg(MatchRegisterName(R1));
- // Add a new operand for the second register in the pair.
- std::string R2 = v + utostr(RegPairNum);
- Inst.addOperand(MCOperand::createReg(MatchRegisterName(R2)));
+ const std::pair<unsigned, unsigned> RegPair = GetVecRegs(MO.getReg());
+ MO.setReg(RegPair.first);
+ Inst.addOperand(MCOperand::createReg(RegPair.second));
Inst.setOpcode(Hexagon::V6_vcombine);
break;
}
@@ -1485,7 +1497,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
MES->SwitchSection(mySection);
unsigned byteSize = is32bit ? 4 : 8;
- getStreamer().EmitCodeAlignment(byteSize, byteSize);
+ getStreamer().emitCodeAlignment(byteSize, byteSize);
MCSymbol *Sym;
@@ -1495,9 +1507,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
if (Absolute) {
Sym = getContext().getOrCreateSymbol(StringRef(myCharStr.c_str() + 16));
if (Sym->isUndefined()) {
- getStreamer().EmitLabel(Sym);
- getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
- getStreamer().EmitIntValue(Value, byteSize);
+ getStreamer().emitLabel(Sym);
+ getStreamer().emitSymbolAttribute(Sym, MCSA_Global);
+ getStreamer().emitIntValue(Value, byteSize);
}
} else if (MO_1.isExpr()) {
const char *StringStart = nullptr;
@@ -1517,9 +1529,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
if (Sym->isUndefined()) {
// case where symbol is not yet defined: emit symbol
- getStreamer().EmitLabel(Sym);
- getStreamer().EmitSymbolAttribute(Sym, MCSA_Local);
- getStreamer().EmitValue(MO_1.getExpr(), 4);
+ getStreamer().emitLabel(Sym);
+ getStreamer().emitSymbolAttribute(Sym, MCSA_Local);
+ getStreamer().emitValue(MO_1.getExpr(), 4);
}
} else
llvm_unreachable("unexpected type of machine operand!");
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
index 8a07b991ff5a..7ef23ef35a74 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -954,6 +954,9 @@ void BT::visitBranchesFrom(const MachineInstr &BI) {
++It;
} while (FallsThrough && It != End);
+ if (B.mayHaveInlineAsmBr())
+ DefaultToAll = true;
+
if (!DefaultToAll) {
// Need to add all CFG successors that lead to EH landing pads.
// There won't be explicit branches to these blocks, but they must
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 7a90d585eb9a..f3a87ef20a60 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -185,7 +185,10 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
if (Size > HEXAGON_MAX_PACKET_SIZE)
return MCDisassembler::Fail;
- HexagonMCChecker Checker(getContext(), *MCII, STI, MI,
+
+ const auto ArchSTI = Hexagon_MC::getArchSubtarget(&STI);
+ const auto STI_ = (ArchSTI != nullptr) ? *ArchSTI : STI;
+ HexagonMCChecker Checker(getContext(), *MCII, STI_, MI,
*getContext().getRegisterInfo(), false);
if (!Checker.check())
return MCDisassembler::Fail;
@@ -495,9 +498,13 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
} else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) {
unsigned Producer =
HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg();
- if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
- Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0;
- else if (SubregBit)
+
+ if (HexagonMCInstrInfo::IsVecRegPair(Producer)) {
+ const bool Rev = HexagonMCInstrInfo::IsReverseVecRegPair(Producer);
+ const unsigned ProdPairIndex =
+ Rev ? Producer - Hexagon::WR0 : Producer - Hexagon::W0;
+ Producer = (ProdPairIndex << 1) + SubregBit + Hexagon::V0;
+ } else if (SubregBit)
// Hexagon PRM 10.11 New-value operands
// Nt[0] is reserved and should always be encoded as zero.
return MCDisassembler::Fail;
@@ -603,12 +610,16 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
static const MCPhysReg HvxWRDecoderTable[] = {
- Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3,
- Hexagon::W4, Hexagon::W5, Hexagon::W6, Hexagon::W7,
- Hexagon::W8, Hexagon::W9, Hexagon::W10, Hexagon::W11,
- Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15};
+ Hexagon::W0, Hexagon::WR0, Hexagon::W1, Hexagon::WR1, Hexagon::W2,
+ Hexagon::WR2, Hexagon::W3, Hexagon::WR3, Hexagon::W4, Hexagon::WR4,
+ Hexagon::W5, Hexagon::WR5, Hexagon::W6, Hexagon::WR6, Hexagon::W7,
+ Hexagon::WR7, Hexagon::W8, Hexagon::WR8, Hexagon::W9, Hexagon::WR9,
+ Hexagon::W10, Hexagon::WR10, Hexagon::W11, Hexagon::WR11, Hexagon::W12,
+ Hexagon::WR12, Hexagon::W13, Hexagon::WR13, Hexagon::W14, Hexagon::WR14,
+ Hexagon::W15, Hexagon::WR15,
+ };
- return (DecodeRegisterClass(Inst, RegNo >> 1, HvxWRDecoderTable));
+ return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable);
}
LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/Hexagon.td
index 26869391c7a3..2fadb0b5ddc4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/Hexagon.td
@@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
// Hexagon Architectures
include "HexagonDepArch.td"
+def ProcTinyCore: SubtargetFeature<"tinycore", "HexagonProcFamily",
+ "TinyCore", "Hexagon Tiny Core">;
+
// Hexagon ISA Extensions
def ExtensionZReg: SubtargetFeature<"zreg", "UseZRegOps", "true",
"Hexagon ZReg extension instructions">;
@@ -42,14 +45,25 @@ def ExtensionHVXV66: SubtargetFeature<"hvxv66", "HexagonHVXVersion",
"Hexagon::ArchEnum::V66", "Hexagon HVX instructions",
[ExtensionHVX, ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65,
ExtensionZReg]>;
+def ExtensionHVXV67: SubtargetFeature<"hvxv67", "HexagonHVXVersion",
+ "Hexagon::ArchEnum::V67", "Hexagon HVX instructions",
+ [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66]>;
+
def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps",
"true", "Hexagon HVX 64B instructions", [ExtensionHVX]>;
def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps",
"true", "Hexagon HVX 128B instructions", [ExtensionHVX]>;
+def ExtensionAudio: SubtargetFeature<"audio", "UseAudioOps", "true",
+ "Hexagon Audio extension instructions">;
+
+def FeatureCompound: SubtargetFeature<"compound", "UseCompound", "true",
+ "Use compound instructions">;
def FeaturePackets: SubtargetFeature<"packets", "UsePackets", "true",
"Support for instruction packets">;
+def FeaturePreV65: SubtargetFeature<"prev65", "HasPreV65", "true",
+ "Support features deprecated in v65">;
def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true",
"Use constant-extended calls">;
def FeatureMemNoShuf: SubtargetFeature<"mem_noshuf", "HasMemNoShuf", "false",
@@ -64,6 +78,8 @@ def FeatureSmallData: SubtargetFeature<"small-data", "UseSmallData", "true",
"Allow GP-relative addressing of global variables">;
def FeatureDuplex: SubtargetFeature<"duplex", "EnableDuplex", "true",
"Enable generation of duplex instruction">;
+def FeatureUnsafeFP: SubtargetFeature<"unsafe-fp", "UseUnsafeMath", "true",
+ "Use unsafe FP math">;
def FeatureReservedR19: SubtargetFeature<"reserved-r19", "ReservedR19",
"true", "Reserve register R19">;
def FeatureNoreturnStackElim: SubtargetFeature<"noreturn-stack-elim",
@@ -76,21 +92,36 @@ def FeatureNoreturnStackElim: SubtargetFeature<"noreturn-stack-elim",
def UseMEMOPS : Predicate<"HST->useMemops()">;
def UseHVX64B : Predicate<"HST->useHVX64BOps()">,
- AssemblerPredicate<"ExtensionHVX64B">;
+ AssemblerPredicate<(all_of ExtensionHVX64B)>;
def UseHVX128B : Predicate<"HST->useHVX128BOps()">,
- AssemblerPredicate<"ExtensionHVX128B">;
+ AssemblerPredicate<(all_of ExtensionHVX128B)>;
def UseHVX : Predicate<"HST->useHVXOps()">,
- AssemblerPredicate<"ExtensionHVXV60">;
-def UseHVXV60 : Predicate<"HST->useHVXOps()">,
- AssemblerPredicate<"ExtensionHVXV60">;
-def UseHVXV62 : Predicate<"HST->useHVXOps()">,
- AssemblerPredicate<"ExtensionHVXV62">;
-def UseHVXV65 : Predicate<"HST->useHVXOps()">,
- AssemblerPredicate<"ExtensionHVXV65">;
-def UseHVXV66 : Predicate<"HST->useHVXOps()">,
- AssemblerPredicate<"ExtensionHVXV66">;
+ AssemblerPredicate<(all_of ExtensionHVXV60)>;
+def UseHVXV60 : Predicate<"HST->useHVXV60Ops()">,
+ AssemblerPredicate<(all_of ExtensionHVXV60)>;
+def UseHVXV62 : Predicate<"HST->useHVXV62Ops()">,
+ AssemblerPredicate<(all_of ExtensionHVXV62)>;
+def UseHVXV65 : Predicate<"HST->useHVXV65Ops()">,
+ AssemblerPredicate<(all_of ExtensionHVXV65)>;
+def UseHVXV66 : Predicate<"HST->useHVXV66Ops()">,
+ AssemblerPredicate<(all_of ExtensionHVXV66)>;
+def UseHVXV67 : Predicate<"HST->useHVXV67Ops()">,
+ AssemblerPredicate<(all_of ExtensionHVXV67)>;
+def UseAudio : Predicate<"HST->useAudioOps()">,
+ AssemblerPredicate<(all_of ExtensionAudio)>;
def UseZReg : Predicate<"HST->useZRegOps()">,
- AssemblerPredicate<"ExtensionZReg">;
+ AssemblerPredicate<(all_of ExtensionZReg)>;
+def UseCompound : Predicate<"HST->useCompound()">;
+def HasPreV65 : Predicate<"HST->hasPreV65()">,
+ AssemblerPredicate<(all_of FeaturePreV65)>;
+def HasMemNoShuf : Predicate<"HST->hasMemNoShuf()">,
+ AssemblerPredicate<(all_of FeatureMemNoShuf)>;
+def UseUnsafeMath : Predicate<"HST->useUnsafeMath()">;
+def NotOptTinyCore : Predicate<"!HST->isTinyCore() ||"
+ "MF->getFunction().hasOptSize()"> {
+ let RecomputePerFunction = 1;
+}
+def UseSmallData : Predicate<"HST->useSmallData()">;
def Hvx64: HwMode<"+hvx-length64b">;
def Hvx128: HwMode<"+hvx-length128b">;
@@ -99,6 +130,7 @@ def Hvx128: HwMode<"+hvx-length128b">;
// Classes used for relation maps.
//===----------------------------------------------------------------------===//
+// The classes below should remain in hierarchical order...
class ImmRegShl;
// ImmRegRel - Filter class used to relate instructions having reg-reg form
// with their reg-imm counterparts.
@@ -106,17 +138,14 @@ class ImmRegRel;
// PredRel - Filter class used to relate non-predicated instructions with their
// predicated forms.
class PredRel;
-// PredNewRel - Filter class used to relate predicated instructions with their
-// predicate-new forms.
class PredNewRel: PredRel;
// NewValueRel - Filter class used to relate regular store instructions with
// their new-value store form.
class NewValueRel: PredNewRel;
-// NewValueRel - Filter class used to relate load/store instructions having
-// different addressing modes with each other.
class AddrModeRel: NewValueRel;
class PostInc_BaseImm;
class IntrinsicsRel;
+// ... through here.
//===----------------------------------------------------------------------===//
// Generate mapping table to relate non-predicate instructions with their
@@ -335,31 +364,43 @@ class Proc<string Name, SchedMachineModel Model,
def : Proc<"generic", HexagonModelV60,
[ArchV5, ArchV55, ArchV60,
- FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
- FeaturePackets, FeatureSmallData]>;
+ FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv5", HexagonModelV5,
[ArchV5,
- FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
- FeaturePackets, FeatureSmallData]>;
+ FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv55", HexagonModelV55,
[ArchV5, ArchV55,
- FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
- FeaturePackets, FeatureSmallData]>;
+ FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv60", HexagonModelV60,
[ArchV5, ArchV55, ArchV60,
- FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
- FeaturePackets, FeatureSmallData]>;
+ FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv62", HexagonModelV62,
[ArchV5, ArchV55, ArchV60, ArchV62,
- FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
- FeaturePackets, FeatureSmallData]>;
+ FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv65", HexagonModelV65,
[ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
- FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ,
- FeatureNVS, FeaturePackets, FeatureSmallData]>;
+ FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv66", HexagonModelV66,
[ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66,
- FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ,
+ FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+def : Proc<"hexagonv67", HexagonModelV67,
+ [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67,
+ FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
+ FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
+// Need to update the correct features for tiny core.
+// Disable NewValueJumps since the packetizer is unable to handle a packet with
+// a new value jump and another SLOT0 instruction.
+def : Proc<"hexagonv67t", HexagonModelV67T,
+ [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67,
+ ProcTinyCore, ExtensionAudio,
+ FeatureCompound, FeatureMemNoShuf, FeatureMemops,
FeatureNVS, FeaturePackets, FeatureSmallData]>;
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonArch.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonArch.h
new file mode 100644
index 000000000000..e5d528390c51
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonArch.h
@@ -0,0 +1,37 @@
+//===- HexagonArch.h ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "HexagonDepArch.h"
+#include <algorithm>
+
+namespace llvm {
+namespace Hexagon {
+
+template <class ArchCont, typename Val>
+bool ValidArch(ArchCont const &ArchList, Val HexArch) {
+ return std::any_of(std::begin(ArchList), std::end(ArchList),
+ [HexArch](Val V) { return V == HexArch; });
+}
+
+template <class ArchCont, typename Val>
+llvm::Optional<ArchEnum> GetCpu(ArchCont const &ArchList, Val CPUString) {
+ llvm::Optional<ArchEnum> Res;
+ auto Entry = ArchList.find(CPUString);
+ if (Entry != ArchList.end())
+ Res = Entry->second;
+ return Res;
+}
+} // namespace Hexagon
+} // namespace llvm
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 30fdde70d01a..f3017d02995e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -206,10 +206,10 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
Sym = AP.OutContext.getOrCreateSymbol(Twine(symbolName));
if (Sym->isUndefined()) {
- OutStreamer.EmitLabel(Sym);
- OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
- OutStreamer.EmitIntValue(Value, AlignSize);
- OutStreamer.EmitCodeAlignment(AlignSize);
+ OutStreamer.emitLabel(Sym);
+ OutStreamer.emitSymbolAttribute(Sym, MCSA_Global);
+ OutStreamer.emitIntValue(Value, AlignSize);
+ OutStreamer.emitCodeAlignment(AlignSize);
}
} else {
assert(Imm.isExpr() && "Expected expression and found none");
@@ -234,10 +234,10 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
OutStreamer.SwitchSection(Section);
Sym = AP.OutContext.getOrCreateSymbol(Twine(LitaName));
if (Sym->isUndefined()) {
- OutStreamer.EmitLabel(Sym);
- OutStreamer.EmitSymbolAttribute(Sym, MCSA_Local);
- OutStreamer.EmitValue(Imm.getExpr(), AlignSize);
- OutStreamer.EmitCodeAlignment(AlignSize);
+ OutStreamer.emitLabel(Sym);
+ OutStreamer.emitSymbolAttribute(Sym, MCSA_Local);
+ OutStreamer.emitValue(Imm.getExpr(), AlignSize);
+ OutStreamer.emitCodeAlignment(AlignSize);
}
}
return Sym;
@@ -740,7 +740,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
}
/// Print out a single Hexagon MI to the current output stream.
-void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void HexagonAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCInst MCB;
MCB.setOpcode(Hexagon::BUNDLE);
MCB.addOperand(MCOperand::createImm(0));
@@ -768,7 +768,7 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Ok); (void)Ok;
if (HexagonMCInstrInfo::bundleSize(MCB) == 0)
return;
- OutStreamer->EmitInstruction(MCB, getSubtargetInfo());
+ OutStreamer->emitInstruction(MCB, getSubtargetInfo());
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonAsmPrinter() {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
index 6c4b664e83f5..3932def87854 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -46,7 +46,7 @@ class TargetMachine;
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
const override;
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
void HexagonProcessInstruction(MCInst &Inst, const MachineInstr &MBB);
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 799b85ed48b4..49edb0d99492 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1433,10 +1433,16 @@ unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
.addImm(int32_t(Lo));
return Reg;
}
+ MachineFunction *MF = B.getParent();
+ auto &HST = MF->getSubtarget<HexagonSubtarget>();
- BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg)
- .addImm(C);
- return Reg;
+ // Disable CONST64 for tiny core since it takes a LD resource.
+ if (!HST.isTinyCore() ||
+ MF->getFunction().hasOptSize()) {
+ BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg)
+ .addImm(C);
+ return Reg;
+ }
}
if (RC == &Hexagon::PredRegsRegClass) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index ebd060ce503e..1e4030b84bc1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -330,7 +330,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
case PS_fi: {
int FI = op(1).getIndex();
int Off = op(2).getImm();
- unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off);
+ unsigned A = MFI.getObjectAlign(FI).value() + std::abs(Off);
unsigned L = countTrailingZeros(A);
RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
RC.fill(0, L, BT::BitValue::Zero);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
index 08f740806879..6891455631a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
@@ -105,7 +105,7 @@ void HexagonBranchRelaxation::computeOffset(MachineFunction &MF,
// offset of the current instruction from the start.
unsigned InstOffset = 0;
for (auto &B : MF) {
- if (B.getAlignment() != Align::None()) {
+ if (B.getAlignment() != Align(1)) {
// Although we don't know the exact layout of the final code, we need
// to account for alignment padding somehow. This heuristic pads each
// aligned basic block according to the alignment value.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCallingConv.td
index 5c31a81a1e87..93e17e608dd1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCallingConv.td
@@ -18,7 +18,7 @@ def CC_HexagonStack: CallingConv<[
CCAssignToStack<8,8>>
]>;
-def CC_Hexagon: CallingConv<[
+def CC_Hexagon_Legacy: CallingConv<[
CCIfType<[i1,i8,i16],
CCPromoteToType<i32>>,
CCIfType<[f32],
@@ -48,6 +48,36 @@ def CC_Hexagon: CallingConv<[
CCDelegateTo<CC_HexagonStack>
]>;
+def CC_Hexagon: CallingConv<[
+ CCIfType<[i1,i8,i16],
+ CCPromoteToType<i32>>,
+ CCIfType<[f32],
+ CCBitConvertToType<i32>>,
+ CCIfType<[f64],
+ CCBitConvertToType<i64>>,
+
+ CCIfByVal<
+ CCPassByVal<8,1>>,
+ CCIfArgIsVarArg<
+ CCDelegateTo<CC_HexagonStack>>,
+
+ // Pass split values in pairs, allocate odd register if necessary.
+ CCIfType<[i32],
+ CCIfSplit<
+ CCCustom<"CC_SkipOdd">>>,
+
+ CCIfType<[i32,v2i16,v4i8],
+ CCAssignToReg<[R0,R1,R2,R3,R4,R5]>>,
+ // Make sure to allocate any skipped 32-bit register, so it does not get
+ // allocated to a subsequent 32-bit value.
+ CCIfType<[i64,v2i32,v4i16,v8i8],
+ CCCustom<"CC_SkipOdd">>,
+ CCIfType<[i64,v2i32,v4i16,v8i8],
+ CCAssignToReg<[D0,D1,D2]>>,
+
+ CCDelegateTo<CC_HexagonStack>
+]>;
+
def RetCC_Hexagon: CallingConv<[
CCIfType<[i1,i8,i16],
CCPromoteToType<i32>>,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 6d2aadb066cf..6a5192c866cc 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -204,17 +204,7 @@ namespace {
Type *next_type(Type *Ty, Value *Idx) {
if (auto *PTy = dyn_cast<PointerType>(Ty))
return PTy->getElementType();
- // Advance the type.
- if (!Ty->isStructTy()) {
- Type *NexTy = cast<SequentialType>(Ty)->getElementType();
- return NexTy;
- }
- // Otherwise it is a struct type.
- ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
- assert(CI && "Struct type with non-constant index");
- int64_t i = CI->getValue().getSExtValue();
- Type *NextTy = cast<StructType>(Ty)->getElementType(i);
- return NextTy;
+ return GetElementPtrInst::getTypeAtIndex(Ty, Idx);
}
raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN) {
@@ -1302,7 +1292,8 @@ bool HexagonCommonGEP::runOnFunction(Function &F) {
#ifdef EXPENSIVE_CHECKS
// Run this only when expensive checks are enabled.
- verifyFunction(F);
+ if (verifyFunction(F, &dbgs()))
+ report_fatal_error("Broken function");
#endif
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index aa9a715718bf..05b95d8b7314 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -379,6 +379,7 @@ namespace {
using AssignmentMap = std::map<ExtenderInit, IndexList>;
using LocDefList = std::vector<std::pair<Loc, IndexList>>;
+ const HexagonSubtarget *HST = nullptr;
const HexagonInstrInfo *HII = nullptr;
const HexagonRegisterInfo *HRI = nullptr;
MachineDominatorTree *MDT = nullptr;
@@ -1562,13 +1563,31 @@ HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) {
.add(ExtOp);
}
} else {
- unsigned NewOpc = Ex.Neg ? Hexagon::S4_subi_asl_ri
- : Hexagon::S4_addi_asl_ri;
- // DefR = add(##EV,asl(Rb,S))
- InitI = BuildMI(MBB, At, dl, HII->get(NewOpc), DefR)
- .add(ExtOp)
- .add(MachineOperand(Ex.Rs))
- .addImm(Ex.S);
+ if (HST->useCompound()) {
+ unsigned NewOpc = Ex.Neg ? Hexagon::S4_subi_asl_ri
+ : Hexagon::S4_addi_asl_ri;
+ // DefR = add(##EV,asl(Rb,S))
+ InitI = BuildMI(MBB, At, dl, HII->get(NewOpc), DefR)
+ .add(ExtOp)
+ .add(MachineOperand(Ex.Rs))
+ .addImm(Ex.S);
+ } else {
+ // No compounds are available. It is not clear whether we should
+ // even process such extenders where the initializer cannot be
+ // a single instruction, but do it for now.
+ unsigned TmpR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(MBB, At, dl, HII->get(Hexagon::S2_asl_i_r), TmpR)
+ .add(MachineOperand(Ex.Rs))
+ .addImm(Ex.S);
+ if (Ex.Neg)
+ InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_subri), DefR)
+ .add(ExtOp)
+ .add(MachineOperand(Register(TmpR, 0)));
+ else
+ InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_addi), DefR)
+ .add(MachineOperand(Register(TmpR, 0)))
+ .add(ExtOp);
+ }
}
}
@@ -1952,8 +1971,9 @@ bool HCE::runOnMachineFunction(MachineFunction &MF) {
}
LLVM_DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr));
- HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
- HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ HST = &MF.getSubtarget<HexagonSubtarget>();
+ HII = HST->getInstrInfo();
+ HRI = HST->getRegisterInfo();
MDT = &getAnalysis<MachineDominatorTree>();
MRI = &MF.getRegInfo();
AssignmentMap IMap;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 5b61d1084e08..77578378b058 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -754,6 +754,9 @@ void MachineConstPropagator::visitBranchesFrom(const MachineInstr &BrI) {
++It;
}
+ if (B.mayHaveInlineAsmBr())
+ EvalOk = false;
+
if (EvalOk) {
// Need to add all CFG successors that lead to EH landing pads.
// There won't be explicit branches to these blocks, but they must
@@ -810,8 +813,12 @@ void MachineConstPropagator::visitUsesOf(unsigned Reg) {
bool MachineConstPropagator::computeBlockSuccessors(const MachineBasicBlock *MB,
SetVector<const MachineBasicBlock*> &Targets) {
+ Targets.clear();
+
MachineBasicBlock::const_iterator FirstBr = MB->end();
for (const MachineInstr &MI : *MB) {
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+ return false;
if (MI.isDebugInstr())
continue;
if (MI.isBranch()) {
@@ -820,7 +827,6 @@ bool MachineConstPropagator::computeBlockSuccessors(const MachineBasicBlock *MB,
}
}
- Targets.clear();
MachineBasicBlock::const_iterator End = MB->end();
bool DoNext = true;
@@ -2836,6 +2842,9 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
if (MI.isCopy())
return false;
+ MachineFunction *MF = MI.getParent()->getParent();
+ auto &HST = MF->getSubtarget<HexagonSubtarget>();
+
// Collect all virtual register-def operands.
SmallVector<unsigned,2> DefRegs;
for (const MachineOperand &MO : MI.operands()) {
@@ -2923,11 +2932,13 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
NewMI = BuildMI(B, At, DL, *NewD, NewR)
.addImm(Hi)
.addImm(Lo);
- } else {
+ } else if (MF->getFunction().hasOptSize() || !HST.isTinyCore()) {
+ // Disable CONST64 for tiny core since it takes a LD resource.
NewD = &HII.get(Hexagon::CONST64);
NewMI = BuildMI(B, At, DL, *NewD, NewR)
.addImm(V);
- }
+ } else
+ return false;
}
}
(void)NewMI;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 394a329ac447..587527d8c32c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -21,7 +21,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/PassSupport.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -212,7 +212,7 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI,
// There is a combine of two constant extended values into CONST64,
// provided both constants are true immediates.
if (isGreaterThanNBitTFRI<16>(HighRegInst) &&
- isGreaterThanNBitTFRI<16>(LowRegInst))
+ isGreaterThanNBitTFRI<16>(LowRegInst) && !IsConst64Disabled)
return (HighRegInst.getOperand(1).isImm() &&
LowRegInst.getOperand(1).isImm());
@@ -279,11 +279,11 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1,
// A reverse_iterator instantiated like below starts before I2, and I1
// respectively.
// Look at instructions I in between I2 and (excluding) I1.
- MachineBasicBlock::reverse_iterator I(I2),
- End = --(MachineBasicBlock::reverse_iterator(I1));
+ MachineBasicBlock::reverse_iterator I = ++I2.getIterator().getReverse();
+ MachineBasicBlock::reverse_iterator End = I1.getIterator().getReverse();
// At 03 we got better results (dhrystone!) by being more conservative.
if (!ShouldCombineAggressively)
- End = MachineBasicBlock::reverse_iterator(I1);
+ End = ++I1.getIterator().getReverse();
// If I2 kills its operand and we move I2 over an instruction that also
// uses I2's use reg we need to modify that (first) instruction to now kill
// this reg.
@@ -477,6 +477,10 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
ShouldCombineAggressively =
MF.getTarget().getOptLevel() <= CodeGenOpt::Default;
+ // Disable CONST64 for tiny core since it takes a LD resource.
+ if (!OptForSize && ST->isTinyCore())
+ IsConst64Disabled = true;
+
// Traverse basic blocks.
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
++BI) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepArch.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepArch.h
index 529be7ef0ac7..45b4cf042443 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepArch.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepArch.h
@@ -5,15 +5,44 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
-#ifndef HEXAGON_DEP_ARCH_H
-#define HEXAGON_DEP_ARCH_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include <map>
+
namespace llvm {
namespace Hexagon {
-enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66 };
+enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67 };
+
+static constexpr unsigned ArchValsNumArray[] = {5, 55, 60, 62, 65, 66, 67};
+static constexpr ArrayRef<unsigned> ArchValsNum(ArchValsNumArray);
+
+static constexpr StringLiteral ArchValsTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67" };
+static constexpr ArrayRef<StringLiteral> ArchValsText(ArchValsTextArray);
+
+static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67", "hexagonv67t" };
+static constexpr ArrayRef<StringLiteral> CpuValsText(CpuValsTextArray);
+
+static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v67t" };
+static constexpr ArrayRef<StringLiteral> CpuNickText(CpuNickTextArray);
+
+static const std::map<std::string, ArchEnum> CpuTable{
+ {"generic", Hexagon::ArchEnum::V60},
+ {"hexagonv5", Hexagon::ArchEnum::V5},
+ {"hexagonv55", Hexagon::ArchEnum::V55},
+ {"hexagonv60", Hexagon::ArchEnum::V60},
+ {"hexagonv62", Hexagon::ArchEnum::V62},
+ {"hexagonv65", Hexagon::ArchEnum::V65},
+ {"hexagonv66", Hexagon::ArchEnum::V66},
+ {"hexagonv67", Hexagon::ArchEnum::V67},
+ {"hexagonv67t", Hexagon::ArchEnum::V67},
+};
} // namespace Hexagon
} // namespace llvm;
-#endif // HEXAGON_DEP_ARCH_H
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepArch.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepArch.td
index 115cf2383a7a..9374055eae7d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepArch.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepArch.td
@@ -5,18 +5,20 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
-def ArchV66: SubtargetFeature<"v66", "HexagonArchVersion", "Hexagon::ArchEnum::V66", "Enable Hexagon V66 architecture">;
-def HasV66 : Predicate<"HST->hasV66Ops()">, AssemblerPredicate<"ArchV66">;
-def ArchV65: SubtargetFeature<"v65", "HexagonArchVersion", "Hexagon::ArchEnum::V65", "Enable Hexagon V65 architecture">;
-def HasV65 : Predicate<"HST->hasV65Ops()">, AssemblerPredicate<"ArchV65">;
-def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "Hexagon::ArchEnum::V62", "Enable Hexagon V62 architecture">;
-def HasV62 : Predicate<"HST->hasV62Ops()">, AssemblerPredicate<"ArchV62">;
-def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "Hexagon::ArchEnum::V60", "Enable Hexagon V60 architecture">;
-def HasV60 : Predicate<"HST->hasV60Ops()">, AssemblerPredicate<"ArchV60">;
-def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "Hexagon::ArchEnum::V55", "Enable Hexagon V55 architecture">;
-def HasV55 : Predicate<"HST->hasV55Ops()">, AssemblerPredicate<"ArchV55">;
def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "Hexagon::ArchEnum::V5", "Enable Hexagon V5 architecture">;
-def HasV5 : Predicate<"HST->hasV5Ops()">, AssemblerPredicate<"ArchV5">;
+def HasV5 : Predicate<"HST->hasV5Ops()">, AssemblerPredicate<(all_of ArchV5)>;
+def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "Hexagon::ArchEnum::V55", "Enable Hexagon V55 architecture">;
+def HasV55 : Predicate<"HST->hasV55Ops()">, AssemblerPredicate<(all_of ArchV55)>;
+def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "Hexagon::ArchEnum::V60", "Enable Hexagon V60 architecture">;
+def HasV60 : Predicate<"HST->hasV60Ops()">, AssemblerPredicate<(all_of ArchV60)>;
+def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "Hexagon::ArchEnum::V62", "Enable Hexagon V62 architecture">;
+def HasV62 : Predicate<"HST->hasV62Ops()">, AssemblerPredicate<(all_of ArchV62)>;
+def ArchV65: SubtargetFeature<"v65", "HexagonArchVersion", "Hexagon::ArchEnum::V65", "Enable Hexagon V65 architecture">;
+def HasV65 : Predicate<"HST->hasV65Ops()">, AssemblerPredicate<(all_of ArchV65)>;
+def ArchV66: SubtargetFeature<"v66", "HexagonArchVersion", "Hexagon::ArchEnum::V66", "Enable Hexagon V66 architecture">;
+def HasV66 : Predicate<"HST->hasV66Ops()">, AssemblerPredicate<(all_of ArchV66)>;
+def ArchV67: SubtargetFeature<"v67", "HexagonArchVersion", "Hexagon::ArchEnum::V67", "Enable Hexagon V67 architecture">;
+def HasV67 : Predicate<"HST->hasV67Ops()">, AssemblerPredicate<(all_of ArchV67)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc
index 10068abce7ec..ce7aa02e3e06 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc
@@ -5,7 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
// clang-format off
@@ -15,39 +15,44 @@
#pragma clang diagnostic ignored "-Wunused-function"
#endif
+static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<8>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t, const void *Decoder) {
signedDecoder<4>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t, const void *Decoder) {
- signedDecoder<14>(MI, tmp, Decoder);
+ signedDecoder<12>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t, const void *Decoder) {
- signedDecoder<8>(MI, tmp, Decoder);
+ signedDecoder<5>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t, const void *Decoder) {
- signedDecoder<7>(MI, tmp, Decoder);
+ signedDecoder<13>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t, const void *Decoder) {
- signedDecoder<12>(MI, tmp, Decoder);
+ signedDecoder<6>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t, const void *Decoder) {
- signedDecoder<3>(MI, tmp, Decoder);
+ signedDecoder<14>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t, const void *Decoder) {
- signedDecoder<13>(MI, tmp, Decoder);
+ signedDecoder<7>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
@@ -60,14 +65,9 @@ static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp,
signedDecoder<9>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
- signedDecoder<5>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t, const void *Decoder) {
- signedDecoder<6>(MI, tmp, Decoder);
+ signedDecoder<3>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
index fefbbfd3f1ac..1547e8f769b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -5,7 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
def tc_04da405a : InstrItinClass;
@@ -2554,3 +2554,494 @@ class DepHVXItinV66 {
[HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
];
}
+
+class DepHVXItinV67 {
+ list<InstrItinData> DepHVXItinV67_list = [
+ InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_05058f6f, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_05ac6f98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [],
+ []>,
+
+ InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+ [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+ [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_649072c2, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7177e272, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_8772086c, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 5],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+ [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+ [HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+ [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+ [HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+ InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+ [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [3],
+ [HVX_FWD]>,
+
+ InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+ [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+ [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_ZW]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e8797b98, /*SLOT1,LOAD,VA*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+ [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+ [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+ InstrItinData <tc_fd7610da, /*SLOT1,LOAD,VA_DV*/
+ [InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+ [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>
+ ];
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
index 34da0be02d19..fecccb250198 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -5,3792 +5,7079 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
-def tc_002cb246 : InstrItinClass;
-def tc_0371abea : InstrItinClass;
-def tc_05c070ec : InstrItinClass;
-def tc_05d3a09b : InstrItinClass;
-def tc_0663f615 : InstrItinClass;
-def tc_096199d3 : InstrItinClass;
-def tc_0a705168 : InstrItinClass;
-def tc_0ae0825c : InstrItinClass;
-def tc_0b2be201 : InstrItinClass;
-def tc_0d8f5752 : InstrItinClass;
-def tc_13bfbcf9 : InstrItinClass;
-def tc_14b272fa : InstrItinClass;
-def tc_14b5c689 : InstrItinClass;
-def tc_15aa71c5 : InstrItinClass;
-def tc_174516e8 : InstrItinClass;
-def tc_17e0d2cd : InstrItinClass;
-def tc_1a2fd869 : InstrItinClass;
-def tc_1ad90acd : InstrItinClass;
-def tc_1ae57e39 : InstrItinClass;
-def tc_1b6f7cec : InstrItinClass;
-def tc_1c4528a2 : InstrItinClass;
-def tc_1c80410a : InstrItinClass;
-def tc_1d81e60e : InstrItinClass;
-def tc_1fc97744 : InstrItinClass;
-def tc_20cdee80 : InstrItinClass;
-def tc_2332b92e : InstrItinClass;
-def tc_24b66c99 : InstrItinClass;
-def tc_25a78932 : InstrItinClass;
-def tc_2b8da4c2 : InstrItinClass;
-def tc_2eabeebe : InstrItinClass;
-def tc_2f7c551d : InstrItinClass;
-def tc_2ff964b4 : InstrItinClass;
-def tc_30b9bb4a : InstrItinClass;
-def tc_32779c6f : InstrItinClass;
-def tc_36153880 : InstrItinClass;
-def tc_362c6592 : InstrItinClass;
-def tc_3962fa26 : InstrItinClass;
-def tc_39dfefe8 : InstrItinClass;
-def tc_3a867367 : InstrItinClass;
-def tc_3b470976 : InstrItinClass;
-def tc_3b5b7ef9 : InstrItinClass;
-def tc_3bd75825 : InstrItinClass;
-def tc_3c76b0ff : InstrItinClass;
-def tc_3d495a39 : InstrItinClass;
-def tc_40116ca8 : InstrItinClass;
-def tc_434c8e1e : InstrItinClass;
-def tc_4414d8b1 : InstrItinClass;
-def tc_44d3da28 : InstrItinClass;
-def tc_4560740b : InstrItinClass;
-def tc_4837eefb : InstrItinClass;
-def tc_49a8207d : InstrItinClass;
-def tc_4ae7b58b : InstrItinClass;
-def tc_4b68bce4 : InstrItinClass;
-def tc_4c5ba658 : InstrItinClass;
-def tc_4d5fa3a1 : InstrItinClass;
-def tc_53559e35 : InstrItinClass;
-def tc_56336eb0 : InstrItinClass;
-def tc_56f114f4 : InstrItinClass;
-def tc_57890846 : InstrItinClass;
-def tc_5a2711e5 : InstrItinClass;
-def tc_5abb5e3f : InstrItinClass;
-def tc_5aee39f7 : InstrItinClass;
-def tc_5b54b33f : InstrItinClass;
-def tc_5b7c0967 : InstrItinClass;
-def tc_5bf126a6 : InstrItinClass;
-def tc_5d7f5414 : InstrItinClass;
-def tc_5ef37dc4 : InstrItinClass;
-def tc_6132ba3d : InstrItinClass;
-def tc_61830035 : InstrItinClass;
-def tc_640086b5 : InstrItinClass;
-def tc_643b4717 : InstrItinClass;
-def tc_67435e81 : InstrItinClass;
-def tc_675e4897 : InstrItinClass;
-def tc_679309b8 : InstrItinClass;
-def tc_6b25e783 : InstrItinClass;
-def tc_703e822c : InstrItinClass;
-def tc_7186d325 : InstrItinClass;
-def tc_7646c131 : InstrItinClass;
-def tc_76851da1 : InstrItinClass;
-def tc_779080bf : InstrItinClass;
-def tc_784490da : InstrItinClass;
-def tc_785f65a7 : InstrItinClass;
-def tc_7a91e76a : InstrItinClass;
-def tc_838b34ea : InstrItinClass;
-def tc_85c9c08f : InstrItinClass;
-def tc_85d5d03f : InstrItinClass;
-def tc_862b3e70 : InstrItinClass;
-def tc_88b4f13d : InstrItinClass;
-def tc_89e94ad3 : InstrItinClass;
-def tc_8b121f4a : InstrItinClass;
-def tc_8b3e402a : InstrItinClass;
-def tc_8c945be0 : InstrItinClass;
-def tc_8c99de45 : InstrItinClass;
-def tc_8d9d0154 : InstrItinClass;
-def tc_8fb7ab1b : InstrItinClass;
-def tc_9461ff31 : InstrItinClass;
-def tc_946df596 : InstrItinClass;
-def tc_9ad9998f : InstrItinClass;
-def tc_9bfd761f : InstrItinClass;
-def tc_9c3ecd83 : InstrItinClass;
-def tc_9ca930f7 : InstrItinClass;
-def tc_9da59d12 : InstrItinClass;
-def tc_9debc299 : InstrItinClass;
-def tc_9e313203 : InstrItinClass;
-def tc_9fc3dae0 : InstrItinClass;
-def tc_a1123dda : InstrItinClass;
-def tc_a1c00888 : InstrItinClass;
-def tc_a58fd5cc : InstrItinClass;
-def tc_a5d4aeec : InstrItinClass;
-def tc_a6b1eca9 : InstrItinClass;
-def tc_a813cf9a : InstrItinClass;
-def tc_a9d88b22 : InstrItinClass;
-def tc_ae53734a : InstrItinClass;
-def tc_b31c2e97 : InstrItinClass;
-def tc_b343892a : InstrItinClass;
-def tc_b43e7930 : InstrItinClass;
-def tc_b4407292 : InstrItinClass;
-def tc_b44ecf75 : InstrItinClass;
-def tc_b4b5c03a : InstrItinClass;
-def tc_b51dc29a : InstrItinClass;
-def tc_b83e6d73 : InstrItinClass;
-def tc_b857bf4e : InstrItinClass;
-def tc_b8bffe55 : InstrItinClass;
-def tc_b90a29b1 : InstrItinClass;
-def tc_b9272d6c : InstrItinClass;
-def tc_b9e09e03 : InstrItinClass;
-def tc_bab0eed9 : InstrItinClass;
-def tc_bafaade3 : InstrItinClass;
-def tc_bcf98408 : InstrItinClass;
-def tc_bd8382d1 : InstrItinClass;
-def tc_bdceeac1 : InstrItinClass;
-def tc_be9602ff : InstrItinClass;
-def tc_bf061958 : InstrItinClass;
-def tc_bfec0f01 : InstrItinClass;
-def tc_c4db48cb : InstrItinClass;
-def tc_c4f596e3 : InstrItinClass;
-def tc_c79a189f : InstrItinClass;
-def tc_c8ce0b5c : InstrItinClass;
-def tc_cd374165 : InstrItinClass;
-def tc_cf8126ae : InstrItinClass;
-def tc_cfd8378a : InstrItinClass;
-def tc_d08ee0f4 : InstrItinClass;
-def tc_d1aa9eaa : InstrItinClass;
-def tc_d2e63d61 : InstrItinClass;
-def tc_d5b7b0c1 : InstrItinClass;
-def tc_d5c0729a : InstrItinClass;
-def tc_d63f638c : InstrItinClass;
-def tc_d65dbf51 : InstrItinClass;
-def tc_d773585a : InstrItinClass;
-def tc_d9d43ecb : InstrItinClass;
-def tc_da4a37ed : InstrItinClass;
-def tc_da97ee82 : InstrItinClass;
-def tc_db2bce9c : InstrItinClass;
-def tc_de4df740 : InstrItinClass;
-def tc_de554571 : InstrItinClass;
-def tc_df3319ed : InstrItinClass;
-def tc_e06f432a : InstrItinClass;
-def tc_e4a7f9f0 : InstrItinClass;
-def tc_e4b3cb20 : InstrItinClass;
-def tc_e78647bd : InstrItinClass;
-def tc_e86aa961 : InstrItinClass;
-def tc_e93a3d71 : InstrItinClass;
-def tc_e95795ec : InstrItinClass;
-def tc_e9f3243f : InstrItinClass;
-def tc_f429765c : InstrItinClass;
-def tc_f675fee8 : InstrItinClass;
-def tc_f8e23f0b : InstrItinClass;
-def tc_f9058dd7 : InstrItinClass;
-def tc_fc3999b4 : InstrItinClass;
-def tc_fcc3ddf9 : InstrItinClass;
-def tc_fe211424 : InstrItinClass;
+def tc_011e0e9d : InstrItinClass;
+def tc_01d44cb2 : InstrItinClass;
+def tc_01e1be3b : InstrItinClass;
+def tc_02fe1c65 : InstrItinClass;
+def tc_0655b949 : InstrItinClass;
+def tc_075c8dd8 : InstrItinClass;
+def tc_0a195f2c : InstrItinClass;
+def tc_0a6c20ae : InstrItinClass;
+def tc_0ba0d5da : InstrItinClass;
+def tc_0dfac0a7 : InstrItinClass;
+def tc_0fac1eb8 : InstrItinClass;
+def tc_1044324a : InstrItinClass;
+def tc_10b884b7 : InstrItinClass;
+def tc_112d30d6 : InstrItinClass;
+def tc_1242dc2a : InstrItinClass;
+def tc_1248597c : InstrItinClass;
+def tc_14ab4f41 : InstrItinClass;
+def tc_151bf368 : InstrItinClass;
+def tc_158aa3f7 : InstrItinClass;
+def tc_197dce51 : InstrItinClass;
+def tc_1981450d : InstrItinClass;
+def tc_1b8138fc : InstrItinClass;
+def tc_1c2c7a4a : InstrItinClass;
+def tc_1c7522a8 : InstrItinClass;
+def tc_1d41f8b7 : InstrItinClass;
+def tc_1e7875f0 : InstrItinClass;
+def tc_1fcb8495 : InstrItinClass;
+def tc_1fe4ab69 : InstrItinClass;
+def tc_20131976 : InstrItinClass;
+def tc_2237d952 : InstrItinClass;
+def tc_234f8560 : InstrItinClass;
+def tc_23708a21 : InstrItinClass;
+def tc_24e109c7 : InstrItinClass;
+def tc_24f426ab : InstrItinClass;
+def tc_27106296 : InstrItinClass;
+def tc_280f7fe1 : InstrItinClass;
+def tc_28e55c6f : InstrItinClass;
+def tc_2c13e7f5 : InstrItinClass;
+def tc_2c3e17fc : InstrItinClass;
+def tc_2f573607 : InstrItinClass;
+def tc_2f669c77 : InstrItinClass;
+def tc_362b0be2 : InstrItinClass;
+def tc_38382228 : InstrItinClass;
+def tc_388f9897 : InstrItinClass;
+def tc_38e0bae9 : InstrItinClass;
+def tc_3d14a17b : InstrItinClass;
+def tc_3edca78f : InstrItinClass;
+def tc_3fbf1042 : InstrItinClass;
+def tc_407e96f9 : InstrItinClass;
+def tc_40d64c94 : InstrItinClass;
+def tc_4222e6bf : InstrItinClass;
+def tc_42ff66ba : InstrItinClass;
+def tc_442395f3 : InstrItinClass;
+def tc_449acf79 : InstrItinClass;
+def tc_44d5a428 : InstrItinClass;
+def tc_44fffc58 : InstrItinClass;
+def tc_45791fb8 : InstrItinClass;
+def tc_45f9d1be : InstrItinClass;
+def tc_49fdfd4b : InstrItinClass;
+def tc_4a55d03c : InstrItinClass;
+def tc_4abdbdc6 : InstrItinClass;
+def tc_4ac61d92 : InstrItinClass;
+def tc_4c1520ae : InstrItinClass;
+def tc_503ce0f3 : InstrItinClass;
+def tc_53c851ab : InstrItinClass;
+def tc_5502c366 : InstrItinClass;
+def tc_55255f2b : InstrItinClass;
+def tc_556f6577 : InstrItinClass;
+def tc_55a9a350 : InstrItinClass;
+def tc_55b33fda : InstrItinClass;
+def tc_56a124a7 : InstrItinClass;
+def tc_57a55b54 : InstrItinClass;
+def tc_5944960d : InstrItinClass;
+def tc_59a7822c : InstrItinClass;
+def tc_5a4b5e58 : InstrItinClass;
+def tc_5b347363 : InstrItinClass;
+def tc_5ceb2f9e : InstrItinClass;
+def tc_5d636bc7 : InstrItinClass;
+def tc_5da50c4b : InstrItinClass;
+def tc_5deb5e47 : InstrItinClass;
+def tc_5e4cf0e8 : InstrItinClass;
+def tc_5f2afaf7 : InstrItinClass;
+def tc_60e324ff : InstrItinClass;
+def tc_63567288 : InstrItinClass;
+def tc_64b00d8a : InstrItinClass;
+def tc_651cbe02 : InstrItinClass;
+def tc_65279839 : InstrItinClass;
+def tc_65cbd974 : InstrItinClass;
+def tc_69bfb303 : InstrItinClass;
+def tc_6ae3426b : InstrItinClass;
+def tc_6d861a95 : InstrItinClass;
+def tc_6e20402a : InstrItinClass;
+def tc_6f42bc60 : InstrItinClass;
+def tc_6fb32599 : InstrItinClass;
+def tc_6fc5dbea : InstrItinClass;
+def tc_711c805f : InstrItinClass;
+def tc_713b66bf : InstrItinClass;
+def tc_7401744f : InstrItinClass;
+def tc_7476d766 : InstrItinClass;
+def tc_74a42bda : InstrItinClass;
+def tc_76bb5435 : InstrItinClass;
+def tc_77f94a5e : InstrItinClass;
+def tc_788b1d09 : InstrItinClass;
+def tc_7b9187d3 : InstrItinClass;
+def tc_7c31e19a : InstrItinClass;
+def tc_7c6d32e4 : InstrItinClass;
+def tc_7dc63b5c : InstrItinClass;
+def tc_7dcd9d89 : InstrItinClass;
+def tc_7f7f45f5 : InstrItinClass;
+def tc_7f8ae742 : InstrItinClass;
+def tc_8035e91f : InstrItinClass;
+def tc_822c3c68 : InstrItinClass;
+def tc_829d8a86 : InstrItinClass;
+def tc_838c4d7a : InstrItinClass;
+def tc_84a7500d : InstrItinClass;
+def tc_86173609 : InstrItinClass;
+def tc_887d1bb7 : InstrItinClass;
+def tc_8a6d0d94 : InstrItinClass;
+def tc_8a825db2 : InstrItinClass;
+def tc_8b5bd4f5 : InstrItinClass;
+def tc_8e82e8ca : InstrItinClass;
+def tc_9124c04f : InstrItinClass;
+def tc_9165014d : InstrItinClass;
+def tc_92240447 : InstrItinClass;
+def tc_934753bb : InstrItinClass;
+def tc_937dd41c : InstrItinClass;
+def tc_9406230a : InstrItinClass;
+def tc_95a33176 : InstrItinClass;
+def tc_96ef76ef : InstrItinClass;
+def tc_975a4e54 : InstrItinClass;
+def tc_9783714b : InstrItinClass;
+def tc_988416e3 : InstrItinClass;
+def tc_9b34f5e0 : InstrItinClass;
+def tc_9b3c0462 : InstrItinClass;
+def tc_9bcfb2ee : InstrItinClass;
+def tc_9c52f549 : InstrItinClass;
+def tc_9e27f2f9 : InstrItinClass;
+def tc_9e72dc89 : InstrItinClass;
+def tc_9edb7c77 : InstrItinClass;
+def tc_9edefe01 : InstrItinClass;
+def tc_9f6cd987 : InstrItinClass;
+def tc_a08b630b : InstrItinClass;
+def tc_a1297125 : InstrItinClass;
+def tc_a154b476 : InstrItinClass;
+def tc_a2b365d2 : InstrItinClass;
+def tc_a3070909 : InstrItinClass;
+def tc_a32e03e7 : InstrItinClass;
+def tc_a38c45dc : InstrItinClass;
+def tc_a4e22bbd : InstrItinClass;
+def tc_a4ee89db : InstrItinClass;
+def tc_a7a13fac : InstrItinClass;
+def tc_a7bdb22c : InstrItinClass;
+def tc_a9edeffa : InstrItinClass;
+def tc_abfd9a6d : InstrItinClass;
+def tc_ac65613f : InstrItinClass;
+def tc_addc37a8 : InstrItinClass;
+def tc_ae5babd7 : InstrItinClass;
+def tc_aee6250c : InstrItinClass;
+def tc_b1ae5f67 : InstrItinClass;
+def tc_b34eb232 : InstrItinClass;
+def tc_b4dc7630 : InstrItinClass;
+def tc_b570493d : InstrItinClass;
+def tc_b7c4062a : InstrItinClass;
+def tc_b837298f : InstrItinClass;
+def tc_ba9255a6 : InstrItinClass;
+def tc_bb07f2c5 : InstrItinClass;
+def tc_bb831a7c : InstrItinClass;
+def tc_bf2ffc0f : InstrItinClass;
+def tc_c20701f0 : InstrItinClass;
+def tc_c21d7447 : InstrItinClass;
+def tc_c57d9f39 : InstrItinClass;
+def tc_c818ff7f : InstrItinClass;
+def tc_ce59038e : InstrItinClass;
+def tc_cfa0e29b : InstrItinClass;
+def tc_d03278fd : InstrItinClass;
+def tc_d33e5eee : InstrItinClass;
+def tc_d3632d88 : InstrItinClass;
+def tc_d45ba9cd : InstrItinClass;
+def tc_d47648a2 : InstrItinClass;
+def tc_d57d649c : InstrItinClass;
+def tc_d61dfdc3 : InstrItinClass;
+def tc_d68dca5c : InstrItinClass;
+def tc_d7718fbe : InstrItinClass;
+def tc_db596beb : InstrItinClass;
+def tc_db96aa6b : InstrItinClass;
+def tc_dc51281d : InstrItinClass;
+def tc_decdde8a : InstrItinClass;
+def tc_df4536ae : InstrItinClass;
+def tc_df5d53f9 : InstrItinClass;
+def tc_e3d699e3 : InstrItinClass;
+def tc_e9170fb7 : InstrItinClass;
+def tc_ed03645c : InstrItinClass;
+def tc_eed07714 : InstrItinClass;
+def tc_eeda4109 : InstrItinClass;
+def tc_ef921005 : InstrItinClass;
+def tc_f098b237 : InstrItinClass;
+def tc_f0cdeccf : InstrItinClass;
+def tc_f0e8e832 : InstrItinClass;
+def tc_f34c1c21 : InstrItinClass;
+def tc_f38f92e1 : InstrItinClass;
+def tc_f529831b : InstrItinClass;
+def tc_f6e2aff9 : InstrItinClass;
+def tc_f7569068 : InstrItinClass;
+def tc_f999c66e : InstrItinClass;
+def tc_fae9dfa5 : InstrItinClass;
+def tc_fedb7e19 : InstrItinClass;
class DepScalarItinV5 {
list<InstrItinData> DepScalarItinV5_list = [
- InstrItinData <tc_002cb246, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_0371abea, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_05c070ec, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_05d3a09b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_0663f615, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_096199d3, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_0a705168, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0ae0825c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_0b2be201, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0d8f5752, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_13bfbcf9, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_14b272fa, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_14b5c689, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_15aa71c5, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_174516e8, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_17e0d2cd, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_1a2fd869, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1ad90acd, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_1ae57e39, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_1b6f7cec, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_1c4528a2, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_1c80410a, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_1d81e60e, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1fc97744, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_20cdee80, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2332b92e, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_24b66c99, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_25a78932, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_2b8da4c2, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_2eabeebe, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_2f7c551d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2ff964b4, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_30b9bb4a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_32779c6f, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_36153880, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_362c6592, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_3962fa26, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_39dfefe8, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_3a867367, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_3b470976, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_3b5b7ef9, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_3bd75825, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_3c76b0ff, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_3d495a39, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_40116ca8, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_434c8e1e, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_4414d8b1, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_44d3da28, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_4560740b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4837eefb, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_49a8207d, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_4ae7b58b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_4b68bce4, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_4c5ba658, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_4d5fa3a1, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_53559e35, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_56336eb0, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_56f114f4, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_57890846, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_5a2711e5, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_5abb5e3f, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_5aee39f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_5b54b33f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_5b7c0967, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_5bf126a6, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_5d7f5414, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_5ef37dc4, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_6132ba3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_61830035, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_640086b5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_643b4717, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_67435e81, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_675e4897, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_679309b8, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_6b25e783, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_703e822c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_7186d325, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_7646c131, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_76851da1, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_779080bf, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_784490da, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_785f65a7, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_7a91e76a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_838b34ea, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_85c9c08f, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_85d5d03f, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_862b3e70, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_88b4f13d, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_89e94ad3, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_8b121f4a, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_8b3e402a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_8c945be0, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_8c99de45, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_8d9d0154, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_8fb7ab1b, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_9461ff31, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_946df596, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9ad9998f, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_9bfd761f, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_9c3ecd83, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9ca930f7, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_9da59d12, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_9debc299, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9e313203, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_9fc3dae0, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a1123dda, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a1c00888, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_a58fd5cc, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_a5d4aeec, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a6b1eca9, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_a813cf9a, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_a9d88b22, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_ae53734a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b31c2e97, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b343892a, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b43e7930, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b4407292, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b44ecf75, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b4b5c03a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b51dc29a, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_b83e6d73, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_b857bf4e, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b8bffe55, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b90a29b1, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_b9272d6c, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_b9e09e03, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_bab0eed9, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_bafaade3, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_bcf98408, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_bd8382d1, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_bdceeac1, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_be9602ff, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_bf061958, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_bfec0f01, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_c4db48cb, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_c4f596e3, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_c79a189f, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_c8ce0b5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_cd374165, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_cf8126ae, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_cfd8378a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_d08ee0f4, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_d1aa9eaa, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_d2e63d61, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_d5b7b0c1, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_d5c0729a, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_d63f638c, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_d65dbf51, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_d773585a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_d9d43ecb, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_da4a37ed, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_da97ee82, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_db2bce9c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_de4df740, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_de554571, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_df3319ed, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_e06f432a, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e4a7f9f0, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_e4b3cb20, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_e78647bd, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_e86aa961, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_e93a3d71, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_e95795ec, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e9f3243f, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_f429765c, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f675fee8, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f8e23f0b, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_f9058dd7, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_fc3999b4, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_fcc3ddf9, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_fe211424, [InstrStage<1, [SLOT0]>]> ];
+ InstrItinData <tc_011e0e9d, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_01d44cb2, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_01e1be3b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_02fe1c65, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_0655b949, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_075c8dd8, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_0a195f2c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_0a6c20ae, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_0ba0d5da, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_0dfac0a7, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_0fac1eb8, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_1044324a, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_10b884b7, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_112d30d6, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1242dc2a, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_1248597c, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_14ab4f41, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_151bf368, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_158aa3f7, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_197dce51, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_1981450d, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_1b8138fc, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_1c2c7a4a, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1c7522a8, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_1d41f8b7, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1e7875f0, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_1fcb8495, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_1fe4ab69, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_20131976, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_2237d952, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_234f8560, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_23708a21, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_24e109c7, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_24f426ab, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_27106296, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_280f7fe1, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_28e55c6f, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_2c13e7f5, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_2c3e17fc, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_2f573607, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_2f669c77, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_362b0be2, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_38382228, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_388f9897, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_38e0bae9, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_3d14a17b, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_3edca78f, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_3fbf1042, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_407e96f9, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_40d64c94, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_4222e6bf, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_42ff66ba, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_442395f3, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_449acf79, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_44d5a428, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_44fffc58, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_45791fb8, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_45f9d1be, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_49fdfd4b, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_4a55d03c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_4abdbdc6, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_4ac61d92, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_4c1520ae, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_503ce0f3, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_53c851ab, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_5502c366, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_55255f2b, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_556f6577, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_55a9a350, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_55b33fda, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_56a124a7, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_57a55b54, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_5944960d, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_59a7822c, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_5a4b5e58, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_5b347363, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_5ceb2f9e, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_5d636bc7, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_5da50c4b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_5deb5e47, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_5e4cf0e8, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_5f2afaf7, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_60e324ff, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_63567288, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_64b00d8a, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_651cbe02, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_65279839, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_65cbd974, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_69bfb303, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_6ae3426b, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_6d861a95, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_6e20402a, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_6f42bc60, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_6fb32599, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_6fc5dbea, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_711c805f, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_713b66bf, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_7401744f, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_7476d766, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_74a42bda, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_76bb5435, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_77f94a5e, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_788b1d09, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_7b9187d3, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_7c31e19a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_7c6d32e4, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_7dc63b5c, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_7dcd9d89, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_7f7f45f5, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_7f8ae742, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_8035e91f, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_822c3c68, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_829d8a86, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_838c4d7a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_84a7500d, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_86173609, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_887d1bb7, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_8a6d0d94, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_8a825db2, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_8b5bd4f5, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_8e82e8ca, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_9124c04f, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9165014d, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_92240447, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_934753bb, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_937dd41c, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_9406230a, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_95a33176, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_96ef76ef, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_975a4e54, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_9783714b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_988416e3, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_9b34f5e0, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_9b3c0462, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9bcfb2ee, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_9c52f549, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9e27f2f9, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9e72dc89, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9edb7c77, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_9edefe01, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_9f6cd987, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a08b630b, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a1297125, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a154b476, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a2b365d2, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_a3070909, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_a32e03e7, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_a38c45dc, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a4e22bbd, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a4ee89db, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_a7a13fac, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a7bdb22c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_a9edeffa, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_abfd9a6d, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_ac65613f, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_addc37a8, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_ae5babd7, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_aee6250c, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_b1ae5f67, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b34eb232, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_b4dc7630, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_b570493d, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_b7c4062a, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_b837298f, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_ba9255a6, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_bb07f2c5, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_bb831a7c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_bf2ffc0f, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_c20701f0, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_c21d7447, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_c57d9f39, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_c818ff7f, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_ce59038e, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_cfa0e29b, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_d03278fd, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_d33e5eee, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d3632d88, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d45ba9cd, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_d47648a2, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_d57d649c, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_d61dfdc3, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d68dca5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_d7718fbe, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_db596beb, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_db96aa6b, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_dc51281d, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_decdde8a, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_df4536ae, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_df5d53f9, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_e3d699e3, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_e9170fb7, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_ed03645c, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_eed07714, [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData <tc_eeda4109, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_ef921005, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_f098b237, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_f0cdeccf, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_f0e8e832, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_f34c1c21, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_f38f92e1, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_f529831b, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_f6e2aff9, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_f7569068, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_f999c66e, [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData <tc_fae9dfa5, [InstrStage<1, [SLOT3]>]>,
+ InstrItinData <tc_fedb7e19, [InstrStage<1, [SLOT0, SLOT1]>]> ];
}
class DepScalarItinV55 {
list<InstrItinData> DepScalarItinV55_list = [
- InstrItinData <tc_002cb246, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01d44cb2, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0371abea, /*tc_st*/
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_02fe1c65, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0655b949, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05c070ec, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05d3a09b, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0663f615, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ba0d5da, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_096199d3, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0a705168, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_1044324a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_10b884b7, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14ab4f41, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0ae0825c, /*tc_1*/
+ InstrItinData <tc_151bf368, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0b2be201, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0d8f5752, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_1981450d, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1b8138fc, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_13bfbcf9, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14b272fa, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14b5c689, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1d41f8b7, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_15aa71c5, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_1e7875f0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_174516e8, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_17e0d2cd, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1a2fd869, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1ad90acd, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_20131976, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1ae57e39, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+ InstrItinData <tc_234f8560, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b6f7cec, /*tc_2early*/
+ InstrItinData <tc_23708a21, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_1c4528a2, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_24e109c7, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_24f426ab, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_27106296, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_2f573607, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1c80410a, /*tc_1*/
+ InstrItinData <tc_2f669c77, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_362b0be2, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_38382228, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_388f9897, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1d81e60e, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_38e0bae9, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1fc97744, /*tc_1*/
+ InstrItinData <tc_3fbf1042, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_407e96f9, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_20cdee80, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_40d64c94, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2332b92e, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_42ff66ba, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_24b66c99, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ InstrItinData <tc_442395f3, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_449acf79, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_25a78932, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44fffc58, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b8da4c2, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_2eabeebe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_49fdfd4b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2f7c551d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ InstrItinData <tc_4a55d03c, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2ff964b4, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4abdbdc6, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_30b9bb4a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_32779c6f, /*tc_3x*/
+ InstrItinData <tc_4c1520ae, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_36153880, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_362c6592, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ InstrItinData <tc_53c851ab, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3962fa26, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5502c366, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_39dfefe8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
+ InstrItinData <tc_55255f2b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [],
[]>,
- InstrItinData <tc_3a867367, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_3b470976, /*tc_3x*/
+ InstrItinData <tc_556f6577, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3b5b7ef9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55b33fda, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56a124a7, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57a55b54, /*tc_2early*/
+ [InstrStage<1, [SLOT3]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bd75825, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_59a7822c, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3c76b0ff, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3d495a39, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
+ InstrItinData <tc_5b347363, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_40116ca8, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_434c8e1e, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_5d636bc7, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4414d8b1, /*tc_1*/
+ InstrItinData <tc_5da50c4b, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44d3da28, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4560740b, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1, 2],
+ InstrItinData <tc_5f2afaf7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4837eefb, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_49a8207d, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
+ InstrItinData <tc_60e324ff, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_4ae7b58b, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_63567288, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_4b68bce4, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_651cbe02, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4c5ba658, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_65279839, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d5fa3a1, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_69bfb303, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53559e35, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56336eb0, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6d861a95, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56f114f4, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57890846, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_6fb32599, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_711c805f, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5a2711e5, /*tc_1*/
+ InstrItinData <tc_713b66bf, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5abb5e3f, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ InstrItinData <tc_7401744f, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7476d766, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5aee39f7, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_77f94a5e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
- InstrItinData <tc_5b54b33f, /*tc_3*/
+ InstrItinData <tc_788b1d09, /*tc_3*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5b7c0967, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_7b9187d3, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5bf126a6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
+ InstrItinData <tc_7c31e19a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dc63b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5d7f5414, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_7dcd9d89, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ef37dc4, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ InstrItinData <tc_7f7f45f5, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6132ba3d, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_8035e91f, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_829d8a86, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_61830035, /*tc_2*/
+ InstrItinData <tc_84a7500d, /*tc_2*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_640086b5, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_86173609, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_643b4717, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_887d1bb7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a825db2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_67435e81, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_8b5bd4f5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9124c04f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9165014d, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_675e4897, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 2],
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_679309b8, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6b25e783, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_703e822c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_975a4e54, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9783714b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7186d325, /*tc_st*/
+ InstrItinData <tc_988416e3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_9b34f5e0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_9b3c0462, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7646c131, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ InstrItinData <tc_9c52f549, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_76851da1, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_9e27f2f9, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_779080bf, /*tc_2*/
+ InstrItinData <tc_9e72dc89, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edb7c77, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f6cd987, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a08b630b, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_784490da, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a1297125, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_785f65a7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a91e76a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_a2b365d2, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_838b34ea, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_85c9c08f, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4e22bbd, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_a7a13fac, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7bdb22c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_85d5d03f, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_862b3e70, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_88b4f13d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae5babd7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_89e94ad3, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b121f4a, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [],
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b34eb232, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_8b3e402a, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_b4dc7630, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8c945be0, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_b570493d, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8c99de45, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [],
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_8d9d0154, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_ba9255a6, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fb7ab1b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb831a7c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9461ff31, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_c20701f0, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_946df596, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_c21d7447, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ad9998f, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [],
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c818ff7f, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_9bfd761f, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c3ecd83, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ca930f7, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
+ InstrItinData <tc_d33e5eee, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9da59d12, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 3, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_9debc299, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d47648a2, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d57d649c, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_9e313203, /*tc_3x*/
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9fc3dae0, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ InstrItinData <tc_d68dca5c, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_db596beb, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_db96aa6b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_dc51281d, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a1123dda, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ InstrItinData <tc_decdde8a, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_a1c00888, /*tc_1*/
+ InstrItinData <tc_df4536ae, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df5d53f9, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e3d699e3, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a58fd5cc, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 1],
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed03645c, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eeda4109, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f098b237, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a5d4aeec, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f34c1c21, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f38f92e1, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_f529831b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a6b1eca9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ InstrItinData <tc_f6e2aff9, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a813cf9a, /*tc_2*/
+ InstrItinData <tc_f7569068, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f999c66e, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a9d88b22, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+ ];
+}
+
+class DepScalarItinV60 {
+ list<InstrItinData> DepScalarItinV60_list = [
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01d44cb2, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_02fe1c65, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0655b949, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ae53734a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b31c2e97, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b343892a, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 3, 2],
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ba0d5da, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b43e7930, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1044324a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b4407292, /*tc_2early*/
- [InstrStage<1, [SLOT0]>], [],
+ InstrItinData <tc_10b884b7, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
[]>,
- InstrItinData <tc_b44ecf75, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_b4b5c03a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_b51dc29a, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 1],
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b83e6d73, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b857bf4e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_151bf368, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b8bffe55, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1],
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b90a29b1, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1981450d, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1b8138fc, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9272d6c, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_1d41f8b7, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1e7875f0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9e09e03, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bab0eed9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bafaade3, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_20131976, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcf98408, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bd8382d1, /*tc_3stall*/
+ InstrItinData <tc_234f8560, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_23708a21, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_24e109c7, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bdceeac1, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24f426ab, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be9602ff, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ InstrItinData <tc_27106296, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bf061958, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bfec0f01, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c4db48cb, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c4f596e3, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c79a189f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_2f573607, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2f669c77, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_362b0be2, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ InstrItinData <tc_38382228, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cd374165, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_388f9897, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_38e0bae9, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf8126ae, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cfd8378a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
+ InstrItinData <tc_3fbf1042, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_407e96f9, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_40d64c94, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d08ee0f4, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1aa9eaa, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
+ InstrItinData <tc_42ff66ba, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_442395f3, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_449acf79, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d2e63d61, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d5b7b0c1, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
+ InstrItinData <tc_44fffc58, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_d5c0729a, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d63f638c, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_d65dbf51, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_49fdfd4b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d773585a, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4a55d03c, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9d43ecb, /*tc_2early*/
- [InstrStage<1, [SLOT3]>], [1, 2],
+ InstrItinData <tc_4abdbdc6, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_da4a37ed, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_da97ee82, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_4c1520ae, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_db2bce9c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_53c851ab, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_de4df740, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_5502c366, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_de554571, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_55255f2b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_556f6577, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55b33fda, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_df3319ed, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_56a124a7, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e06f432a, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_57a55b54, /*tc_2early*/
+ [InstrStage<1, [SLOT3]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e4a7f9f0, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e4b3cb20, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e78647bd, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_59a7822c, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e86aa961, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e93a3d71, /*tc_ld*/
+ InstrItinData <tc_5b347363, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e95795ec, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5d636bc7, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9f3243f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5da50c4b, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f429765c, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f675fee8, /*tc_2*/
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f8e23f0b, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5f2afaf7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f9058dd7, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_60e324ff, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_fc3999b4, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_63567288, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_fcc3ddf9, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_651cbe02, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65279839, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fe211424, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_69bfb303, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
-class DepScalarItinV60 {
- list<InstrItinData> DepScalarItinV60_list = [
- InstrItinData <tc_002cb246, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0371abea, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_6d861a95, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05c070ec, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fb32599, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05d3a09b, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_711c805f, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_713b66bf, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7401744f, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0663f615, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_7476d766, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_096199d3, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_77f94a5e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_788b1d09, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c31e19a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0a705168, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0ae0825c, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_7dc63b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dcd9d89, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0b2be201, /*tc_st*/
+ InstrItinData <tc_7f7f45f5, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8035e91f, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0d8f5752, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_13bfbcf9, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_829d8a86, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_84a7500d, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14b272fa, /*tc_st*/
+ InstrItinData <tc_86173609, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_887d1bb7, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14b5c689, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_15aa71c5, /*tc_ld*/
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_174516e8, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_8a825db2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_17e0d2cd, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ InstrItinData <tc_8b5bd4f5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1a2fd869, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1ad90acd, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_9124c04f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1ae57e39, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+ InstrItinData <tc_9165014d, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b6f7cec, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
[]>,
- InstrItinData <tc_1c4528a2, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1c80410a, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1d81e60e, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_975a4e54, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9783714b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1fc97744, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_988416e3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_9b34f5e0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_9b3c0462, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c52f549, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_20cdee80, /*tc_2early*/
+ InstrItinData <tc_9e27f2f9, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e72dc89, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edb7c77, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f6cd987, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2332b92e, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
+ InstrItinData <tc_a08b630b, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1297125, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a2b365d2, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_24b66c99, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_25a78932, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a4e22bbd, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2eabeebe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_2f7c551d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ InstrItinData <tc_a7a13fac, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7bdb22c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2ff964b4, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_30b9bb4a, /*tc_st*/
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae5babd7, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_32779c6f, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_36153880, /*tc_newvjump*/
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b34eb232, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_362c6592, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b4dc7630, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3962fa26, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ InstrItinData <tc_b570493d, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_39dfefe8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_3a867367, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_3b470976, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ InstrItinData <tc_ba9255a6, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3b5b7ef9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bd75825, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_3c76b0ff, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ InstrItinData <tc_bb831a7c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3d495a39, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c20701f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_40116ca8, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_c21d7447, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_434c8e1e, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4414d8b1, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44d3da28, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4560740b, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d33e5eee, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d47648a2, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d57d649c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_4837eefb, /*tc_3stall*/
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d68dca5c, /*tc_3stall*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_49a8207d, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_db596beb, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_db96aa6b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_4ae7b58b, /*tc_2early*/
+ InstrItinData <tc_dc51281d, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_decdde8a, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_4b68bce4, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ InstrItinData <tc_df4536ae, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4c5ba658, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e3d699e3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed03645c, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eeda4109, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f098b237, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f34c1c21, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f38f92e1, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_53559e35, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ InstrItinData <tc_f529831b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56336eb0, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
+ InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56f114f4, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f7569068, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57890846, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_f999c66e, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5a2711e5, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5abb5e3f, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+ ];
+}
- InstrItinData <tc_5aee39f7, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+class DepScalarItinV60se {
+ list<InstrItinData> DepScalarItinV60se_list = [
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5b54b33f, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ InstrItinData <tc_01d44cb2, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5b7c0967, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_02fe1c65, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5bf126a6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
+ InstrItinData <tc_0655b949, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5d7f5414, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ef37dc4, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6132ba3d, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ba0d5da, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_61830035, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_640086b5, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_1044324a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_10b884b7, /*tc_3stall*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_ST]>], [],
+ []>,
+
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_643b4717, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_151bf368, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_67435e81, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_675e4897, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_1981450d, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1b8138fc, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_679309b8, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1d41f8b7, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1e7875f0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6b25e783, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_703e822c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_20131976, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7186d325, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_234f8560, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7646c131, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ InstrItinData <tc_23708a21, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [],
+ []>,
+
+ InstrItinData <tc_24e109c7, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_76851da1, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_24f426ab, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_779080bf, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_27106296, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_784490da, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_785f65a7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a91e76a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_838b34ea, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_85c9c08f, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_2f573607, /*tc_2early*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_85d5d03f, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2f669c77, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_862b3e70, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_362b0be2, /*tc_2early*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_ST]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_88b4f13d, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_38382228, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_89e94ad3, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_388f9897, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_38e0bae9, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b121f4a, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [],
- []>,
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b3e402a, /*tc_1*/
+ InstrItinData <tc_3fbf1042, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_8c945be0, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_407e96f9, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_40d64c94, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8c99de45, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8d9d0154, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_42ff66ba, /*tc_2early*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_442395f3, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_449acf79, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44fffc58, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_8fb7ab1b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_ST]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_49fdfd4b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4a55d03c, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4abdbdc6, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4c1520ae, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9461ff31, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_53c851ab, /*tc_2early*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_946df596, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_5502c366, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ad9998f, /*tc_3stall*/
+ InstrItinData <tc_55255f2b, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [],
[]>,
- InstrItinData <tc_9bfd761f, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_556f6577, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c3ecd83, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ca930f7, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
+ InstrItinData <tc_55b33fda, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9da59d12, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ InstrItinData <tc_56a124a7, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57a55b54, /*tc_2early*/
+ [InstrStage<1, [SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9debc299, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ InstrItinData <tc_59a7822c, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b347363, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9e313203, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_5d636bc7, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9fc3dae0, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5da50c4b, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a1123dda, /*tc_1*/
+ InstrItinData <tc_5f2afaf7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_60e324ff, /*tc_2early*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_ST]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_63567288, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_a1c00888, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a58fd5cc, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ InstrItinData <tc_651cbe02, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65279839, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a5d4aeec, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_69bfb303, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a6b1eca9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a813cf9a, /*tc_2*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_6d861a95, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a9d88b22, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ae53734a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fb32599, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_b31c2e97, /*tc_2early*/
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b343892a, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2, 3, 2],
+ InstrItinData <tc_711c805f, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_713b66bf, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b43e7930, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
+ InstrItinData <tc_7401744f, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7476d766, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b4407292, /*tc_2early*/
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_b44ecf75, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_b4b5c03a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_788b1d09, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b51dc29a, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 1],
+ InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b83e6d73, /*tc_st*/
+ InstrItinData <tc_7c31e19a, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b857bf4e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dc63b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dcd9d89, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b8bffe55, /*tc_4x*/
+ InstrItinData <tc_7f7f45f5, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b90a29b1, /*tc_st*/
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8035e91f, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_829d8a86, /*tc_st*/
[InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9272d6c, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_84a7500d, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_86173609, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_887d1bb7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9e09e03, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
+ InstrItinData <tc_8a825db2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bab0eed9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ InstrItinData <tc_8b5bd4f5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9124c04f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9165014d, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bafaade3, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcf98408, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bd8382d1, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bdceeac1, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be9602ff, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ InstrItinData <tc_975a4e54, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bf061958, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ InstrItinData <tc_9783714b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_988416e3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_9b34f5e0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_9b3c0462, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c52f549, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e27f2f9, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e72dc89, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bfec0f01, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ InstrItinData <tc_9edb7c77, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c4db48cb, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f6cd987, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c4f596e3, /*tc_st*/
+ InstrItinData <tc_a08b630b, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1297125, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a2b365d2, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c79a189f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4e22bbd, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c8ce0b5c, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [],
+ []>,
- InstrItinData <tc_cd374165, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a7a13fac, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf8126ae, /*tc_2*/
+ InstrItinData <tc_a7bdb22c, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cfd8378a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d08ee0f4, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1aa9eaa, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d2e63d61, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ InstrItinData <tc_ae5babd7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d5b7b0c1, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_d5c0729a, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ InstrItinData <tc_b34eb232, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b4dc7630, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d63f638c, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_b570493d, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d65dbf51, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d773585a, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
- InstrItinData <tc_d9d43ecb, /*tc_2early*/
- [InstrStage<1, [SLOT3]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ba9255a6, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_da4a37ed, /*tc_st*/
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_da97ee82, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_bb831a7c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_db2bce9c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_c20701f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c21d7447, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_de4df740, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [],
+ []>,
+
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_de554571, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d33e5eee, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_df3319ed, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d47648a2, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e06f432a, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3],
+ InstrItinData <tc_d57d649c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_ST]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_e4a7f9f0, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d68dca5c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e4b3cb20, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_e78647bd, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_db596beb, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_db96aa6b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_dc51281d, /*tc_2early*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_ST]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e86aa961, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_decdde8a, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_e93a3d71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_df4536ae, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e95795ec, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9f3243f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e3d699e3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f429765c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f675fee8, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed03645c, /*tc_2early*/
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f8e23f0b, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f9058dd7, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ InstrItinData <tc_eeda4109, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f098b237, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fc3999b4, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f34c1c21, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_fcc3ddf9, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ InstrItinData <tc_f529831b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f7569068, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fe211424, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>
+ InstrItinData <tc_f999c66e, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
];
}
class DepScalarItinV62 {
list<InstrItinData> DepScalarItinV62_list = [
- InstrItinData <tc_002cb246, /*tc_2*/
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01d44cb2, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0371abea, /*tc_st*/
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_02fe1c65, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0655b949, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05c070ec, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05d3a09b, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0663f615, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ba0d5da, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_096199d3, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0a705168, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_1044324a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_10b884b7, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0ae0825c, /*tc_1*/
+ InstrItinData <tc_151bf368, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0b2be201, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0d8f5752, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_1981450d, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1b8138fc, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_13bfbcf9, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14b272fa, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14b5c689, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1d41f8b7, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_15aa71c5, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_1e7875f0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_174516e8, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_17e0d2cd, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1a2fd869, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1ad90acd, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_20131976, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1ae57e39, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
+ InstrItinData <tc_234f8560, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b6f7cec, /*tc_2early*/
+ InstrItinData <tc_23708a21, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_1c4528a2, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_24e109c7, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_24f426ab, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_27106296, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_2f573607, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2f669c77, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1c80410a, /*tc_1*/
+ InstrItinData <tc_362b0be2, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_38382228, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_388f9897, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1d81e60e, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_38e0bae9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1fc97744, /*tc_1*/
+ InstrItinData <tc_3fbf1042, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_407e96f9, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_20cdee80, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_40d64c94, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2332b92e, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2],
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_42ff66ba, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_24b66c99, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ InstrItinData <tc_442395f3, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_449acf79, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_25a78932, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44fffc58, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_2eabeebe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_49fdfd4b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2f7c551d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ InstrItinData <tc_4a55d03c, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2ff964b4, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4abdbdc6, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_30b9bb4a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_32779c6f, /*tc_3stall*/
+ InstrItinData <tc_4c1520ae, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_36153880, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_362c6592, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ InstrItinData <tc_53c851ab, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3962fa26, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5502c366, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_39dfefe8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
+ InstrItinData <tc_55255f2b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
[]>,
- InstrItinData <tc_3a867367, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ InstrItinData <tc_556f6577, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55b33fda, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3b470976, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_56a124a7, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3b5b7ef9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_57a55b54, /*tc_2early*/
+ [InstrStage<1, [SLOT3]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bd75825, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_59a7822c, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3c76b0ff, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3d495a39, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2, 2],
+ InstrItinData <tc_5b347363, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_40116ca8, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_434c8e1e, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_5d636bc7, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4414d8b1, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_5da50c4b, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44d3da28, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4560740b, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ InstrItinData <tc_5f2afaf7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4837eefb, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_49a8207d, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
+ InstrItinData <tc_60e324ff, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_4ae7b58b, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_63567288, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_4b68bce4, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_651cbe02, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4c5ba658, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_65279839, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_69bfb303, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_53559e35, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56336eb0, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6d861a95, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56f114f4, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57890846, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_6fb32599, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_711c805f, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5a2711e5, /*tc_1*/
+ InstrItinData <tc_713b66bf, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5abb5e3f, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ InstrItinData <tc_7401744f, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7476d766, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5aee39f7, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5b54b33f, /*tc_3x*/
+ InstrItinData <tc_77f94a5e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_788b1d09, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5b7c0967, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5bf126a6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
+ InstrItinData <tc_7c31e19a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dc63b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5d7f5414, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_7dcd9d89, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ef37dc4, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ InstrItinData <tc_7f7f45f5, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6132ba3d, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_8035e91f, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_61830035, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_829d8a86, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_640086b5, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_84a7500d, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_643b4717, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ InstrItinData <tc_86173609, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_67435e81, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_887d1bb7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_675e4897, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_679309b8, /*tc_2*/
+ InstrItinData <tc_8a825db2, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6b25e783, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_8b5bd4f5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_703e822c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9124c04f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7186d325, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_9165014d, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7646c131, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_76851da1, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_975a4e54, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_779080bf, /*tc_2*/
+ InstrItinData <tc_9783714b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_988416e3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_9b34f5e0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_9b3c0462, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_784490da, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c52f549, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e27f2f9, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e72dc89, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_785f65a7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_9edb7c77, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a91e76a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f6cd987, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a08b630b, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1297125, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_838b34ea, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ InstrItinData <tc_a2b365d2, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_85c9c08f, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4e22bbd, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_a7a13fac, /*tc_2early*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7bdb22c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_85d5d03f, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_862b3e70, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_88b4f13d, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae5babd7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_89e94ad3, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b121f4a, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [],
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b34eb232, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_8b3e402a, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_b4dc7630, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8c945be0, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_b570493d, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8c99de45, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [],
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_8d9d0154, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_ba9255a6, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fb7ab1b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb831a7c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9461ff31, /*tc_2*/
+ InstrItinData <tc_c20701f0, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_946df596, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_c21d7447, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ad9998f, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_9bfd761f, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c3ecd83, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ca930f7, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
+ InstrItinData <tc_d33e5eee, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9da59d12, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_9debc299, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d47648a2, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9e313203, /*tc_2*/
+ InstrItinData <tc_d57d649c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9fc3dae0, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ InstrItinData <tc_d68dca5c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_db596beb, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_db96aa6b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_dc51281d, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a1123dda, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ InstrItinData <tc_decdde8a, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_a1c00888, /*tc_1*/
+ InstrItinData <tc_df4536ae, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e3d699e3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed03645c, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eeda4109, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a58fd5cc, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ InstrItinData <tc_f098b237, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a5d4aeec, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f34c1c21, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_f529831b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a6b1eca9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a813cf9a, /*tc_2*/
+ InstrItinData <tc_f7569068, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f999c66e, /*tc_2early*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
[InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a9d88b22, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+ ];
+}
+
+class DepScalarItinV65 {
+ list<InstrItinData> DepScalarItinV65_list = [
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01d44cb2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_02fe1c65, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0655b949, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ae53734a, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b31c2e97, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b343892a, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2, 3, 2],
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ba0d5da, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b43e7930, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1044324a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b4407292, /*tc_2early*/
- [InstrStage<1, [SLOT0]>], [],
+ InstrItinData <tc_10b884b7, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
[]>,
- InstrItinData <tc_b44ecf75, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_b4b5c03a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_b51dc29a, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 1],
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b83e6d73, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b857bf4e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_151bf368, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b8bffe55, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b90a29b1, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1981450d, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1b8138fc, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9272d6c, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_1d41f8b7, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1e7875f0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9e09e03, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bab0eed9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bafaade3, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_20131976, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcf98408, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+ InstrItinData <tc_234f8560, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_23708a21, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_24e109c7, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bdceeac1, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_24f426ab, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be9602ff, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ InstrItinData <tc_27106296, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bf061958, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bfec0f01, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c4db48cb, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c4f596e3, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c79a189f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_2f573607, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2f669c77, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c8ce0b5c, /*tc_3x*/
+ InstrItinData <tc_362b0be2, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_38382228, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cd374165, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_388f9897, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_38e0bae9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf8126ae, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cfd8378a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
+ InstrItinData <tc_3fbf1042, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_407e96f9, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_40d64c94, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d08ee0f4, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1aa9eaa, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ InstrItinData <tc_42ff66ba, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_442395f3, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_449acf79, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d2e63d61, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d5b7b0c1, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
+ InstrItinData <tc_44fffc58, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_d5c0729a, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d63f638c, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_d65dbf51, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_49fdfd4b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d773585a, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4a55d03c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9d43ecb, /*tc_2early*/
- [InstrStage<1, [SLOT3]>], [1, 2],
+ InstrItinData <tc_4abdbdc6, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_da4a37ed, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_da97ee82, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_4c1520ae, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_db2bce9c, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_53c851ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_de4df740, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
+ InstrItinData <tc_5502c366, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55255f2b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_556f6577, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_de554571, /*tc_2early*/
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55b33fda, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_df3319ed, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_56a124a7, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e06f432a, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3],
- [Hex_FWD]>,
+ InstrItinData <tc_57a55b54, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e4a7f9f0, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e4b3cb20, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e78647bd, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_59a7822c, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e86aa961, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b347363, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e93a3d71, /*tc_ld*/
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e95795ec, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5d636bc7, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9f3243f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5da50c4b, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f429765c, /*tc_2*/
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f675fee8, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_60e324ff, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_63567288, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_651cbe02, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65279839, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f8e23f0b, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ InstrItinData <tc_69bfb303, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6d861a95, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fb32599, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f9058dd7, /*tc_2*/
+ InstrItinData <tc_711c805f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_713b66bf, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7401744f, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fc3999b4, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_7476d766, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fcc3ddf9, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_77f94a5e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_788b1d09, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fe211424, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
-class DepScalarItinV65 {
- list<InstrItinData> DepScalarItinV65_list = [
- InstrItinData <tc_002cb246, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_7c31e19a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0371abea, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dc63b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05c070ec, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ InstrItinData <tc_7dcd9d89, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f7f45f5, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05d3a09b, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_8035e91f, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0663f615, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_096199d3, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_829d8a86, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0a705168, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_84a7500d, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0ae0825c, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_86173609, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0b2be201, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ InstrItinData <tc_887d1bb7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0d8f5752, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_13bfbcf9, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_8a825db2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14b272fa, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_8b5bd4f5, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14b5c689, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9124c04f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_15aa71c5, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ InstrItinData <tc_9165014d, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_174516e8, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_17e0d2cd, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1a2fd869, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
- InstrItinData <tc_1ad90acd, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1ae57e39, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b6f7cec, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1c4528a2, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_975a4e54, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9783714b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1c80410a, /*tc_1*/
+ InstrItinData <tc_988416e3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_9b34f5e0, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_9b3c0462, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c52f549, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1d81e60e, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_9e27f2f9, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e72dc89, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edb7c77, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f6cd987, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1fc97744, /*tc_1*/
+ InstrItinData <tc_a08b630b, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1297125, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_20cdee80, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2332b92e, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_a2b365d2, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_24b66c99, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_25a78932, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a4e22bbd, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2eabeebe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_2f7c551d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ InstrItinData <tc_a7a13fac, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7bdb22c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2ff964b4, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_30b9bb4a, /*tc_st*/
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae5babd7, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_32779c6f, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_36153880, /*tc_newvjump*/
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b34eb232, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_362c6592, /*tc_st*/
+ InstrItinData <tc_b4dc7630, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b570493d, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_ba9255a6, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3962fa26, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb831a7c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_39dfefe8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c20701f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c21d7447, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_3a867367, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d33e5eee, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d47648a2, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d57d649c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d68dca5c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_db596beb, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_db96aa6b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_dc51281d, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_decdde8a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_df4536ae, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e3d699e3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3b470976, /*tc_4x*/
+ InstrItinData <tc_ed03645c, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eeda4109, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f098b237, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
[InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3b5b7ef9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_f34c1c21, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bd75825, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [1],
+ InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_3c76b0ff, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ InstrItinData <tc_f529831b, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3d495a39, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_40116ca8, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f7569068, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_434c8e1e, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_f999c66e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4414d8b1, /*tc_2*/
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+ ];
+}
+
+class DepScalarItinV66 {
+ list<InstrItinData> DepScalarItinV66_list = [
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01d44cb2, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44d3da28, /*tc_ld*/
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_02fe1c65, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0655b949, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4560740b, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4837eefb, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_0ba0d5da, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_49a8207d, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1044324a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_10b884b7, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_4ae7b58b, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_4b68bce4, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4c5ba658, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_151bf368, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2],
+ InstrItinData <tc_1981450d, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_53559e35, /*tc_latepredstaia*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+ InstrItinData <tc_1b8138fc, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56336eb0, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ InstrItinData <tc_1d41f8b7, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1e7875f0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56f114f4, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_20131976, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57890846, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5a2711e5, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_234f8560, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5abb5e3f, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ InstrItinData <tc_23708a21, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_24e109c7, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5aee39f7, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
+ InstrItinData <tc_24f426ab, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_27106296, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5b54b33f, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5b7c0967, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_2f573607, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5bf126a6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
+ InstrItinData <tc_2f669c77, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5d7f5414, /*tc_3stall*/
+ InstrItinData <tc_362b0be2, /*tc_3*/
[InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_5ef37dc4, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_38382228, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6132ba3d, /*tc_1*/
+ InstrItinData <tc_388f9897, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_38e0bae9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3fbf1042, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_407e96f9, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_61830035, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_40d64c94, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_640086b5, /*tc_1*/
+ InstrItinData <tc_42ff66ba, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_442395f3, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_449acf79, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44fffc58, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_49fdfd4b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4a55d03c, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_643b4717, /*tc_1*/
+ InstrItinData <tc_4abdbdc6, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4c1520ae, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_53c851ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_67435e81, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_55255f2b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_556f6577, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_675e4897, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 1],
+ InstrItinData <tc_55b33fda, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_679309b8, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_56a124a7, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6b25e783, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_57a55b54, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_703e822c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_59a7822c, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7186d325, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7646c131, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ InstrItinData <tc_5b347363, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_76851da1, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5d636bc7, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5da50c4b, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_779080bf, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_784490da, /*tc_2*/
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_785f65a7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a91e76a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_60e324ff, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_838b34ea, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_63567288, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ [Hex_FWD]>,
- InstrItinData <tc_85c9c08f, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [2, 2],
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_85d5d03f, /*tc_1*/
+ InstrItinData <tc_651cbe02, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_862b3e70, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_65279839, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_69bfb303, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6d861a95, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fb32599, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_88b4f13d, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_711c805f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_713b66bf, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_89e94ad3, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_7401744f, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7476d766, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b121f4a, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [],
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_77f94a5e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_8b3e402a, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4],
- [Hex_FWD]>,
+ InstrItinData <tc_788b1d09, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8c945be0, /*tc_newvjump*/
+ InstrItinData <tc_7b9187d3, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8c99de45, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_7c31e19a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8d9d0154, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8fb7ab1b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ InstrItinData <tc_7dc63b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dcd9d89, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f7f45f5, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9461ff31, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_8035e91f, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_946df596, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_829d8a86, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ad9998f, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
+ InstrItinData <tc_84a7500d, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9bfd761f, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_86173609, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c3ecd83, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_887d1bb7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ca930f7, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9da59d12, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ InstrItinData <tc_8a825db2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9debc299, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ InstrItinData <tc_8b5bd4f5, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9e313203, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_9124c04f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9fc3dae0, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+ InstrItinData <tc_9165014d, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a1123dda, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
+ []>,
+
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_975a4e54, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9783714b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_988416e3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_a1c00888, /*tc_1*/
+ InstrItinData <tc_9b34f5e0, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_9b3c0462, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c52f549, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e27f2f9, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e72dc89, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edb7c77, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f6cd987, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a58fd5cc, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ InstrItinData <tc_a08b630b, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1297125, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a5d4aeec, /*tc_ld*/
+ InstrItinData <tc_a2b365d2, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a6b1eca9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a813cf9a, /*tc_2*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_a4e22bbd, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_a7a13fac, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7bdb22c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a9d88b22, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae5babd7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ae53734a, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_b31c2e97, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_b34eb232, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b4dc7630, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b570493d, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_ba9255a6, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb831a7c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b343892a, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2, 3, 2],
+ InstrItinData <tc_c20701f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b43e7930, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
+ InstrItinData <tc_c21d7447, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b4407292, /*tc_2early*/
+ InstrItinData <tc_c818ff7f, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_b44ecf75, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b4b5c03a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b51dc29a, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [3, 2],
+ InstrItinData <tc_d33e5eee, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b83e6d73, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d47648a2, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d57d649c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d68dca5c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_db596beb, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b857bf4e, /*tc_st*/
+ InstrItinData <tc_db96aa6b, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_b8bffe55, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ InstrItinData <tc_dc51281d, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_decdde8a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_df4536ae, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b90a29b1, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9272d6c, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_e3d699e3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9e09e03, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed03645c, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bab0eed9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ InstrItinData <tc_eeda4109, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f098b237, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bafaade3, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcf98408, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_f34c1c21, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bd8382d1, /*tc_newvjump*/
+ InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_f529831b, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bdceeac1, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ InstrItinData <tc_f7569068, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be9602ff, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ InstrItinData <tc_f999c66e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+ ];
+}
+
+class DepScalarItinV67 {
+ list<InstrItinData> DepScalarItinV67_list = [
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01d44cb2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bf061958, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bfec0f01, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_02fe1c65, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c4db48cb, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_0655b949, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c4f596e3, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c79a189f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c8ce0b5c, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ba0d5da, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cd374165, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf8126ae, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_1044324a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cfd8378a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_10b884b7, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d08ee0f4, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1aa9eaa, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_151bf368, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d2e63d61, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d5b7b0c1, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1981450d, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_d5c0729a, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ InstrItinData <tc_1b8138fc, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d63f638c, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_1d41f8b7, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d65dbf51, /*tc_latepredstaia*/
- [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+ InstrItinData <tc_1e7875f0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d773585a, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_20131976, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9d43ecb, /*tc_1*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_da4a37ed, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+ InstrItinData <tc_234f8560, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_da97ee82, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_23708a21, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_24e109c7, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_24f426ab, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_27106296, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_db2bce9c, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_de4df740, /*tc_1*/
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_2f573607, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2f669c77, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_362b0be2, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_38382228, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_388f9897, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_de554571, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_38e0bae9, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_df3319ed, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e06f432a, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3],
+ InstrItinData <tc_3fbf1042, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_e4a7f9f0, /*tc_1*/
+ InstrItinData <tc_407e96f9, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e4b3cb20, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_40d64c94, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e78647bd, /*tc_1*/
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_42ff66ba, /*tc_1*/
[InstrStage<1, [SLOT2]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e86aa961, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_442395f3, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e93a3d71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ InstrItinData <tc_449acf79, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e95795ec, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9f3243f, /*tc_latepredldaia*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+ InstrItinData <tc_44fffc58, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f429765c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_f675fee8, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_49fdfd4b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f8e23f0b, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4a55d03c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f9058dd7, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_4abdbdc6, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fc3999b4, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fcc3ddf9, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ InstrItinData <tc_4c1520ae, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fe211424, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>
- ];
-}
+ InstrItinData <tc_53c851ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
-class DepScalarItinV66 {
- list<InstrItinData> DepScalarItinV66_list = [
- InstrItinData <tc_002cb246, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_5502c366, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0371abea, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_55255f2b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [],
+ []>,
- InstrItinData <tc_05c070ec, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_556f6577, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_05d3a09b, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0663f615, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_55b33fda, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_56a124a7, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57a55b54, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_096199d3, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
+ InstrItinData <tc_59a7822c, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0a705168, /*tc_1*/
+ InstrItinData <tc_5b347363, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0ae0825c, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5d636bc7, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0b2be201, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+ InstrItinData <tc_5da50c4b, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0d8f5752, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_60e324ff, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_63567288, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_13bfbcf9, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_651cbe02, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14b272fa, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+ InstrItinData <tc_65279839, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_14b5c689, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_69bfb303, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_15aa71c5, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6d861a95, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_174516e8, /*tc_3x*/
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fb32599, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_17e0d2cd, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_711c805f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_713b66bf, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1a2fd869, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+ InstrItinData <tc_7401744f, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1ad90acd, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [2, 1],
+ InstrItinData <tc_7476d766, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1ae57e39, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1b6f7cec, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_77f94a5e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_1c4528a2, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_788b1d09, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c31e19a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dc63b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7dcd9d89, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 3],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1c80410a, /*tc_2*/
+ InstrItinData <tc_7f7f45f5, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8035e91f, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_829d8a86, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_84a7500d, /*tc_2*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1d81e60e, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_86173609, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_887d1bb7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1fc97744, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_8a825db2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8b5bd4f5, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_20cdee80, /*tc_1*/
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9124c04f, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2332b92e, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_9165014d, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_24b66c99, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_25a78932, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2b8da4c2, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1],
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2eabeebe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [],
[]>,
- InstrItinData <tc_2f7c551d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2ff964b4, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_30b9bb4a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ InstrItinData <tc_975a4e54, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_32779c6f, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_9783714b, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_36153880, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [],
+ InstrItinData <tc_988416e3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_9b34f5e0, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
[]>,
- InstrItinData <tc_362c6592, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
+ InstrItinData <tc_9b3c0462, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3962fa26, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9c52f549, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e27f2f9, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e72dc89, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9edb7c77, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_39dfefe8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
- []>,
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3a867367, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+ InstrItinData <tc_9f6cd987, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3b470976, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ InstrItinData <tc_a08b630b, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3b5b7ef9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+ InstrItinData <tc_a1297125, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3bd75825, /*tc_3*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3c76b0ff, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+ InstrItinData <tc_a2b365d2, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_3d495a39, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4e22bbd, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_a7a13fac, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7bdb22c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_40116ca8, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ae5babd7, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_b34eb232, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b4dc7630, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_434c8e1e, /*tc_3stall*/
+ InstrItinData <tc_b570493d, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4414d8b1, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_ba9255a6, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_44d3da28, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4560740b, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+ InstrItinData <tc_bb831a7c, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4837eefb, /*tc_3stall*/
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c20701f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c21d7447, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_49a8207d, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4ae7b58b, /*tc_3*/
- [InstrStage<1, [SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
+ InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
- InstrItinData <tc_4b68bce4, /*tc_st*/
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4c5ba658, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_4d5fa3a1, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [2],
+ InstrItinData <tc_d33e5eee, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_53559e35, /*tc_latepredstaia*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d47648a2, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56336eb0, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+ InstrItinData <tc_d57d649c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d68dca5c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56f114f4, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_db596beb, /*tc_3x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_57890846, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_db96aa6b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_dc51281d, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5a2711e5, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_decdde8a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_df4536ae, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5abb5e3f, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5aee39f7, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e3d699e3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ed03645c, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_eeda4109, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f098b237, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5b54b33f, /*tc_3x*/
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5b7c0967, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f34c1c21, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_f529831b, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f7569068, /*tc_4x*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f999c66e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5bf126a6, /*tc_st*/
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+ ];
+}
+
+class DepScalarItinV67T {
+ list<InstrItinData> DepScalarItinV67T_list = [
+ InstrItinData <tc_011e0e9d, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01d44cb2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_01e1be3b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_02fe1c65, /*tc_4x*/
+ [InstrStage<1, [SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0655b949, /*tc_st*/
[InstrStage<1, [SLOT0]>], [2, 3],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5d7f5414, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_075c8dd8, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_5ef37dc4, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+ InstrItinData <tc_0a195f2c, /*tc_4x*/
+ [InstrStage<1, [SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6132ba3d, /*tc_2*/
+ InstrItinData <tc_0a6c20ae, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_0ba0d5da, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_0dfac0a7, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_61830035, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_0fac1eb8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_640086b5, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_1044324a, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_10b884b7, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_112d30d6, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1242dc2a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1248597c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_643b4717, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ InstrItinData <tc_151bf368, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_158aa3f7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_67435e81, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+ InstrItinData <tc_197dce51, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_675e4897, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 1],
+ InstrItinData <tc_1981450d, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_1b8138fc, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_679309b8, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_1c2c7a4a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1c7522a8, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1d41f8b7, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1e7875f0, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_1fcb8495, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_6b25e783, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_1fe4ab69, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_703e822c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_20131976, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_2237d952, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7186d325, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ InstrItinData <tc_234f8560, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7646c131, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+ InstrItinData <tc_23708a21, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_24e109c7, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_76851da1, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_24f426ab, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_779080bf, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_27106296, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_784490da, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+ InstrItinData <tc_280f7fe1, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_785f65a7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_28e55c6f, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_7a91e76a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+ InstrItinData <tc_2c13e7f5, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_838b34ea, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2c3e17fc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_85c9c08f, /*tc_1*/
+ InstrItinData <tc_2f573607, /*tc_1*/
[InstrStage<1, [SLOT2]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_85d5d03f, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_2f669c77, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_862b3e70, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_362b0be2, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_88b4f13d, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_38382228, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_89e94ad3, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+ InstrItinData <tc_388f9897, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_38e0bae9, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_3d14a17b, /*tc_1*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b121f4a, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [],
- []>,
+ InstrItinData <tc_3edca78f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8b3e402a, /*tc_2latepred*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4],
+ InstrItinData <tc_3fbf1042, /*tc_1*/
+ [InstrStage<1, [SLOT0]>], [3],
[Hex_FWD]>,
- InstrItinData <tc_8c945be0, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2],
+ InstrItinData <tc_407e96f9, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_40d64c94, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8c99de45, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
+ InstrItinData <tc_4222e6bf, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_42ff66ba, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_442395f3, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_449acf79, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44d5a428, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_44fffc58, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_45791fb8, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8d9d0154, /*tc_3stall*/
+ InstrItinData <tc_45f9d1be, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_8fb7ab1b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ InstrItinData <tc_49fdfd4b, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4a55d03c, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4abdbdc6, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_4c1520ae, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_503ce0f3, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9461ff31, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_53c851ab, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_946df596, /*tc_1*/
+ InstrItinData <tc_5502c366, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ad9998f, /*tc_3stall*/
+ InstrItinData <tc_55255f2b, /*tc_3stall*/
[InstrStage<1, [SLOT3]>], [],
[]>,
- InstrItinData <tc_9bfd761f, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ InstrItinData <tc_556f6577, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9c3ecd83, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_55a9a350, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9ca930f7, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
+ InstrItinData <tc_55b33fda, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9da59d12, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1],
+ InstrItinData <tc_56a124a7, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_57a55b54, /*tc_1*/
+ [InstrStage<1, [SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5944960d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9debc299, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+ InstrItinData <tc_59a7822c, /*tc_1*/
+ [InstrStage<1, [SLOT0]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5a4b5e58, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5b347363, /*tc_1*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9e313203, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ InstrItinData <tc_5d636bc7, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_9fc3dae0, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_5da50c4b, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a1123dda, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
+ InstrItinData <tc_5deb5e47, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5e4cf0e8, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/
+ [InstrStage<1, [SLOT0]>], [4, 4, 3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_60e324ff, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_a1c00888, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_63567288, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0]>], [4],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_64b00d8a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a58fd5cc, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+ InstrItinData <tc_651cbe02, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65279839, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_65cbd974, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a5d4aeec, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_69bfb303, /*tc_3*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a6b1eca9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_6ae3426b, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a813cf9a, /*tc_2*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_6d861a95, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_a9d88b22, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_6e20402a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 3],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ae53734a, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+ InstrItinData <tc_6f42bc60, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_6fb32599, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_b31c2e97, /*tc_1*/
+ InstrItinData <tc_6fc5dbea, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b343892a, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ InstrItinData <tc_711c805f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_713b66bf, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b43e7930, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
+ InstrItinData <tc_7401744f, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7476d766, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b4407292, /*tc_2early*/
+ InstrItinData <tc_74a42bda, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_76bb5435, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_77f94a5e, /*tc_st*/
[InstrStage<1, [SLOT0]>], [],
[]>,
- InstrItinData <tc_b44ecf75, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2],
+ InstrItinData <tc_788b1d09, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7c31e19a, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b4b5c03a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_7c6d32e4, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b51dc29a, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [3, 2],
+ InstrItinData <tc_7dc63b5c, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b83e6d73, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ InstrItinData <tc_7dcd9d89, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 3],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_7f7f45f5, /*tc_4x*/
+ [InstrStage<1, [SLOT3]>], [5, 5, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b857bf4e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_7f8ae742, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b8bffe55, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+ InstrItinData <tc_8035e91f, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_822c3c68, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b90a29b1, /*tc_st*/
+ InstrItinData <tc_829d8a86, /*tc_st*/
[InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9272d6c, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 2],
+ InstrItinData <tc_838c4d7a, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_84a7500d, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_86173609, /*tc_2latepred*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_887d1bb7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8a6d0d94, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b9e09e03, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ InstrItinData <tc_8a825db2, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bab0eed9, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+ InstrItinData <tc_8b5bd4f5, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_8e82e8ca, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9124c04f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9165014d, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_92240447, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bafaade3, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+ InstrItinData <tc_934753bb, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bcf98408, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
+ InstrItinData <tc_937dd41c, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_9406230a, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bd8382d1, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ InstrItinData <tc_95a33176, /*tc_2*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bdceeac1, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ InstrItinData <tc_96ef76ef, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_be9602ff, /*tc_st*/
+ InstrItinData <tc_975a4e54, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 3, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9783714b, /*tc_4x*/
+ [InstrStage<1, [SLOT3]>], [5, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_988416e3, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_9b34f5e0, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [],
+ []>,
+
+ InstrItinData <tc_9b3c0462, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9bcfb2ee, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bf061958, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+ InstrItinData <tc_9c52f549, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e27f2f9, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9e72dc89, /*tc_4x*/
+ [InstrStage<1, [SLOT3]>], [5, 2, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bfec0f01, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+ InstrItinData <tc_9edb7c77, /*tc_4x*/
+ [InstrStage<1, [SLOT3]>], [5, 2, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c4db48cb, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+ InstrItinData <tc_9edefe01, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_9f6cd987, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c4f596e3, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+ InstrItinData <tc_a08b630b, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a1297125, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a154b476, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c79a189f, /*tc_st*/
+ InstrItinData <tc_a2b365d2, /*tc_st*/
[InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_c8ce0b5c, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_cd374165, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_a3070909, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cf8126ae, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_a32e03e7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_cfd8378a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_a38c45dc, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 1, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4e22bbd, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a4ee89db, /*tc_2early*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_a7a13fac, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_a7bdb22c, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d08ee0f4, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_a9edeffa, /*tc_st*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d1aa9eaa, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+ InstrItinData <tc_abfd9a6d, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ac65613f, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_addc37a8, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d2e63d61, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ InstrItinData <tc_ae5babd7, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d5b7b0c1, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_aee6250c, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_b1ae5f67, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_d5c0729a, /*tc_st*/
+ InstrItinData <tc_b34eb232, /*tc_3stall*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_b4dc7630, /*tc_st*/
[InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d63f638c, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
+ InstrItinData <tc_b570493d, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d65dbf51, /*tc_latepredstaia*/
- [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+ InstrItinData <tc_b7c4062a, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d773585a, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+ InstrItinData <tc_b837298f, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [],
+ []>,
+
+ InstrItinData <tc_ba9255a6, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb07f2c5, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bb831a7c, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2, 2, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_d9d43ecb, /*tc_1*/
- [InstrStage<1, [SLOT3]>], [2, 2],
+ InstrItinData <tc_c20701f0, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c21d7447, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_c57d9f39, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_da4a37ed, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+ InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [],
+ []>,
+
+ InstrItinData <tc_ce59038e, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_cfa0e29b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_da97ee82, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
+ InstrItinData <tc_d03278fd, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_db2bce9c, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+ InstrItinData <tc_d33e5eee, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d3632d88, /*tc_2*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_de4df740, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_d45ba9cd, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_de554571, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ InstrItinData <tc_d47648a2, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [1, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_df3319ed, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
+ InstrItinData <tc_d57d649c, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [2],
+ [Hex_FWD]>,
+
+ InstrItinData <tc_d61dfdc3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e06f432a, /*tc_newvjump*/
- [InstrStage<1, [SLOT0]>], [3],
+ InstrItinData <tc_d68dca5c, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_d7718fbe, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [1],
[Hex_FWD]>,
- InstrItinData <tc_e4a7f9f0, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ InstrItinData <tc_db596beb, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e4b3cb20, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_db96aa6b, /*tc_st*/
+ [InstrStage<1, [SLOT0]>], [1],
+ [Hex_FWD]>,
- InstrItinData <tc_e78647bd, /*tc_1*/
- [InstrStage<1, [SLOT2]>], [2, 2],
+ InstrItinData <tc_dc51281d, /*tc_3*/
+ [InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e86aa961, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_decdde8a, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2],
+ [Hex_FWD]>,
- InstrItinData <tc_e93a3d71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_df4536ae, /*tc_3stall*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e95795ec, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e9f3243f, /*tc_latepredldaia*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e3d699e3, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f429765c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_e9170fb7, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f675fee8, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_ed03645c, /*tc_1*/
+ [InstrStage<1, [SLOT2]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f8e23f0b, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_eed07714, /*tc_ld*/
+ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f9058dd7, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+ InstrItinData <tc_eeda4109, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_ef921005, /*tc_1*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f098b237, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f0cdeccf, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fc3999b4, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [2],
+ InstrItinData <tc_f0e8e832, /*tc_4x*/
+ [InstrStage<1, [SLOT3]>], [5, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f34c1c21, /*tc_2*/
+ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [2],
[Hex_FWD]>,
- InstrItinData <tc_fcc3ddf9, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+ InstrItinData <tc_f529831b, /*tc_latepredstaia*/
+ [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+ [InstrStage<1, [SLOT0]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_f7569068, /*tc_4x*/
+ [InstrStage<1, [SLOT3]>], [5, 5, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_fe211424, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>
+ InstrItinData <tc_f999c66e, /*tc_1*/
+ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fae9dfa5, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
+ InstrItinData <tc_fedb7e19, /*tc_ld*/
+ [InstrStage<1, [SLOT0]>], [4, 2, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
];
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepITypes.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepITypes.h
index 358345e027d8..b261b4653127 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepITypes.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepITypes.h
@@ -5,7 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
namespace llvm {
@@ -16,49 +16,48 @@ enum Type {
TypeALU32_ADDI = 2,
TypeALU64 = 3,
TypeCJ = 4,
- TypeCOPROC_VX = 5,
- TypeCR = 6,
- TypeCVI_4SLOT_MPY = 7,
- TypeCVI_GATHER = 8,
- TypeCVI_GATHER_RST = 9,
- TypeCVI_HIST = 10,
- TypeCVI_SCATTER = 11,
- TypeCVI_SCATTER_DV = 12,
- TypeCVI_SCATTER_NEW_RST = 13,
- TypeCVI_SCATTER_NEW_ST = 14,
- TypeCVI_SCATTER_RST = 15,
- TypeCVI_VA = 16,
- TypeCVI_VA_DV = 17,
- TypeCVI_VINLANESAT = 18,
- TypeCVI_VM_LD = 19,
- TypeCVI_VM_NEW_ST = 20,
- TypeCVI_VM_ST = 21,
- TypeCVI_VM_STU = 22,
- TypeCVI_VM_TMP_LD = 23,
- TypeCVI_VM_VP_LDU = 24,
- TypeCVI_VP = 25,
- TypeCVI_VP_VS = 26,
- TypeCVI_VS = 27,
- TypeCVI_VS_VX = 28,
- TypeCVI_VX = 29,
- TypeCVI_VX_DV = 30,
- TypeCVI_VX_LATE = 31,
- TypeCVI_ZW = 32,
- TypeDUPLEX = 33,
- TypeENDLOOP = 34,
- TypeEXTENDER = 35,
- TypeJ = 36,
- TypeLD = 37,
- TypeM = 38,
- TypeMAPPING = 39,
- TypeNCJ = 40,
- TypePSEUDO = 41,
- TypeST = 42,
- TypeSUBINSN = 43,
- TypeS_2op = 44,
- TypeS_3op = 45,
- TypeV2LDST = 48,
- TypeV4LDST = 49,
+ TypeCR = 7,
+ TypeCVI_4SLOT_MPY = 8,
+ TypeCVI_GATHER = 9,
+ TypeCVI_GATHER_DV = 10,
+ TypeCVI_GATHER_RST = 11,
+ TypeCVI_HIST = 12,
+ TypeCVI_SCATTER = 13,
+ TypeCVI_SCATTER_DV = 14,
+ TypeCVI_SCATTER_NEW_RST = 15,
+ TypeCVI_SCATTER_NEW_ST = 16,
+ TypeCVI_SCATTER_RST = 17,
+ TypeCVI_VA = 18,
+ TypeCVI_VA_DV = 19,
+ TypeCVI_VM_LD = 20,
+ TypeCVI_VM_NEW_ST = 21,
+ TypeCVI_VM_ST = 22,
+ TypeCVI_VM_STU = 23,
+ TypeCVI_VM_TMP_LD = 24,
+ TypeCVI_VM_VP_LDU = 25,
+ TypeCVI_VP = 26,
+ TypeCVI_VP_VS = 27,
+ TypeCVI_VS = 28,
+ TypeCVI_VS_VX = 29,
+ TypeCVI_VX = 30,
+ TypeCVI_VX_DV = 31,
+ TypeCVI_VX_LATE = 32,
+ TypeCVI_ZW = 33,
+ TypeDUPLEX = 34,
+ TypeENDLOOP = 35,
+ TypeEXTENDER = 36,
+ TypeJ = 37,
+ TypeLD = 38,
+ TypeM = 39,
+ TypeMAPPING = 40,
+ TypeNCJ = 41,
+ TypePSEUDO = 42,
+ TypeST = 43,
+ TypeSUBINSN = 44,
+ TypeS_2op = 45,
+ TypeS_3op = 46,
+ TypeV2LDST = 49,
+ TypeV4LDST = 50,
};
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepITypes.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepITypes.td
index 91c02b84b87c..f251a291c23c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepITypes.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepITypes.td
@@ -5,7 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
class IType<bits<7> t> { bits<7> Value = t; }
@@ -14,46 +14,45 @@ def TypeALU32_3op : IType<1>;
def TypeALU32_ADDI : IType<2>;
def TypeALU64 : IType<3>;
def TypeCJ : IType<4>;
-def TypeCOPROC_VX : IType<5>;
-def TypeCR : IType<6>;
-def TypeCVI_4SLOT_MPY : IType<7>;
-def TypeCVI_GATHER : IType<8>;
-def TypeCVI_GATHER_RST : IType<9>;
-def TypeCVI_HIST : IType<10>;
-def TypeCVI_SCATTER : IType<11>;
-def TypeCVI_SCATTER_DV : IType<12>;
-def TypeCVI_SCATTER_NEW_RST : IType<13>;
-def TypeCVI_SCATTER_NEW_ST : IType<14>;
-def TypeCVI_SCATTER_RST : IType<15>;
-def TypeCVI_VA : IType<16>;
-def TypeCVI_VA_DV : IType<17>;
-def TypeCVI_VINLANESAT : IType<18>;
-def TypeCVI_VM_LD : IType<19>;
-def TypeCVI_VM_NEW_ST : IType<20>;
-def TypeCVI_VM_ST : IType<21>;
-def TypeCVI_VM_STU : IType<22>;
-def TypeCVI_VM_TMP_LD : IType<23>;
-def TypeCVI_VM_VP_LDU : IType<24>;
-def TypeCVI_VP : IType<25>;
-def TypeCVI_VP_VS : IType<26>;
-def TypeCVI_VS : IType<27>;
-def TypeCVI_VS_VX : IType<28>;
-def TypeCVI_VX : IType<29>;
-def TypeCVI_VX_DV : IType<30>;
-def TypeCVI_VX_LATE : IType<31>;
-def TypeCVI_ZW : IType<32>;
-def TypeDUPLEX : IType<33>;
-def TypeENDLOOP : IType<34>;
-def TypeEXTENDER : IType<35>;
-def TypeJ : IType<36>;
-def TypeLD : IType<37>;
-def TypeM : IType<38>;
-def TypeMAPPING : IType<39>;
-def TypeNCJ : IType<40>;
-def TypePSEUDO : IType<41>;
-def TypeST : IType<42>;
-def TypeSUBINSN : IType<43>;
-def TypeS_2op : IType<44>;
-def TypeS_3op : IType<45>;
-def TypeV2LDST : IType<48>;
-def TypeV4LDST : IType<49>;
+def TypeCR : IType<7>;
+def TypeCVI_4SLOT_MPY : IType<8>;
+def TypeCVI_GATHER : IType<9>;
+def TypeCVI_GATHER_DV : IType<10>;
+def TypeCVI_GATHER_RST : IType<11>;
+def TypeCVI_HIST : IType<12>;
+def TypeCVI_SCATTER : IType<13>;
+def TypeCVI_SCATTER_DV : IType<14>;
+def TypeCVI_SCATTER_NEW_RST : IType<15>;
+def TypeCVI_SCATTER_NEW_ST : IType<16>;
+def TypeCVI_SCATTER_RST : IType<17>;
+def TypeCVI_VA : IType<18>;
+def TypeCVI_VA_DV : IType<19>;
+def TypeCVI_VM_LD : IType<20>;
+def TypeCVI_VM_NEW_ST : IType<21>;
+def TypeCVI_VM_ST : IType<22>;
+def TypeCVI_VM_STU : IType<23>;
+def TypeCVI_VM_TMP_LD : IType<24>;
+def TypeCVI_VM_VP_LDU : IType<25>;
+def TypeCVI_VP : IType<26>;
+def TypeCVI_VP_VS : IType<27>;
+def TypeCVI_VS : IType<28>;
+def TypeCVI_VS_VX : IType<29>;
+def TypeCVI_VX : IType<30>;
+def TypeCVI_VX_DV : IType<31>;
+def TypeCVI_VX_LATE : IType<32>;
+def TypeCVI_ZW : IType<33>;
+def TypeDUPLEX : IType<34>;
+def TypeENDLOOP : IType<35>;
+def TypeEXTENDER : IType<36>;
+def TypeJ : IType<37>;
+def TypeLD : IType<38>;
+def TypeM : IType<39>;
+def TypeMAPPING : IType<40>;
+def TypeNCJ : IType<41>;
+def TypePSEUDO : IType<42>;
+def TypeST : IType<43>;
+def TypeSUBINSN : IType<44>;
+def TypeS_2op : IType<45>;
+def TypeS_3op : IType<46>;
+def TypeV2LDST : IType<49>;
+def TypeV4LDST : IType<50>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
index c08d9a388d3e..305115da5763 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -5,113 +5,138 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
-class Enc_890909 : OpcodeHexagon {
+class Enc_5e2823 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
- bits <2> Pe4;
- let Inst{6-5} = Pe4{1-0};
}
-class Enc_9be1de : OpcodeHexagon {
- bits <2> Qs4;
- let Inst{6-5} = Qs4{1-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vw32;
- let Inst{4-0} = Vw32{4-0};
+class Enc_b9c5fb : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_527412 : OpcodeHexagon {
- bits <2> Ps4;
- let Inst{17-16} = Ps4{1-0};
- bits <2> Pt4;
- let Inst{9-8} = Pt4{1-0};
+class Enc_5ab2be : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_efaed8 : OpcodeHexagon {
- bits <1> Ii;
- let Inst{8-8} = Ii{0-0};
-}
-class Enc_a568d4 : OpcodeHexagon {
+class Enc_bd6011 : OpcodeHexagon {
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rx32;
- let Inst{4-0} = Rx32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_27b757 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{13-13} = Ii{3-3};
- let Inst{10-8} = Ii{2-0};
- bits <2> Pv4;
- let Inst{12-11} = Pv4{1-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vs32;
- let Inst{4-0} = Vs32{4-0};
+class Enc_cb9321 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{27-21} = Ii{15-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_1de724 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <4> n1;
- let Inst{28-28} = n1{3-3};
- let Inst{24-22} = n1{2-0};
+class Enc_a56825 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_0e41fa : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
+class Enc_140c83 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_18c338 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <8> II;
+ let Inst{22-16} = II{7-1};
+ let Inst{13-13} = II{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_be32a5 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_3d6d37 : OpcodeHexagon {
- bits <2> Qs4;
- let Inst{6-5} = Qs4{1-0};
+class Enc_ea23e4 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_e3b0c4 : OpcodeHexagon {
+
+}
+class Enc_ea4c54 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <5> Vw32;
- let Inst{4-0} = Vw32{4-0};
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_a641d0 : OpcodeHexagon {
+class Enc_e38e1f : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <2> Pu4;
+ let Inst{22-21} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_9b0bc1 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vvv32;
- let Inst{12-8} = Vvv32{4-0};
- bits <5> Vw32;
- let Inst{4-0} = Vw32{4-0};
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_802dc0 : OpcodeHexagon {
- bits <1> Ii;
- let Inst{8-8} = Ii{0-0};
- bits <2> Qv4;
- let Inst{23-22} = Qv4{1-0};
+class Enc_90cd8b : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_6b197f : OpcodeHexagon {
- bits <4> Ii;
- let Inst{8-5} = Ii{3-0};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_3a3d62 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_0cb018 : OpcodeHexagon {
+ bits <5> Cs32;
+ let Inst{20-16} = Cs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
class Enc_51436c : OpcodeHexagon {
bits <16> Ii;
@@ -120,142 +145,281 @@ class Enc_51436c : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_c7a204 : OpcodeHexagon {
- bits <6> II;
- let Inst{5-0} = II{5-0};
+class Enc_bd811a : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Cd32;
+ let Inst{4-0} = Cd32{4-0};
+}
+class Enc_5e87ce : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{23-22} = Ii{15-14};
+ let Inst{20-16} = Ii{13-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_fcf7a7 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
bits <5> Rtt32;
let Inst{12-8} = Rtt32{4-0};
- bits <5> Re32;
- let Inst{20-16} = Re32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
}
-class Enc_db40cd : OpcodeHexagon {
- bits <6> Ii;
- let Inst{6-3} = Ii{5-2};
+class Enc_88c16c : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_2b3f60 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <2> Px4;
+ let Inst{6-5} = Px4{1-0};
+}
+class Enc_311abd : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_c2b48e : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
}
-class Enc_a1e29d : OpcodeHexagon {
+class Enc_08d755 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_02553a : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{11-5} = Ii{6-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_f0cca7 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <6> II;
+ let Inst{20-16} = II{5-1};
+ let Inst{13-13} = II{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_9cdba7 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_a05677 : OpcodeHexagon {
bits <5> Ii;
let Inst{12-8} = Ii{4-0};
- bits <5> II;
- let Inst{22-21} = II{4-3};
- let Inst{7-5} = II{2-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rx32;
- let Inst{4-0} = Rx32{4-0};
-}
-class Enc_d15d19 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vs32;
- let Inst{4-0} = Vs32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_e90a15 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <3> Ns8;
- let Inst{18-16} = Ns8{2-0};
- bits <4> n1;
- let Inst{29-29} = n1{3-3};
- let Inst{26-25} = n1{2-1};
- let Inst{22-22} = n1{0-0};
+class Enc_2b518f : OpcodeHexagon {
+ bits <32> Ii;
+ let Inst{27-16} = Ii{31-20};
+ let Inst{13-0} = Ii{19-6};
}
-class Enc_e0a47a : OpcodeHexagon {
- bits <4> Ii;
- let Inst{8-5} = Ii{3-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
+class Enc_fb6577 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{9-8} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
}
-class Enc_140c83 : OpcodeHexagon {
- bits <10> Ii;
- let Inst{21-21} = Ii{9-9};
- let Inst{13-5} = Ii{8-0};
+class Enc_b8c967 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_7eee72 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
+class Enc_667b39 : OpcodeHexagon {
+ bits <5> Css32;
+ let Inst{20-16} = Css32{4-0};
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
}
-class Enc_310ba1 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
+class Enc_0ed752 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Cdd32;
+ let Inst{4-0} = Cdd32{4-0};
+}
+class Enc_03833b : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_0d8adb : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_3680c2 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{11-5} = Ii{6-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_412ff0 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rxx32;
+ let Inst{12-8} = Rxx32{4-0};
+}
+class Enc_831a7d : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vx32;
- let Inst{4-0} = Vx32{4-0};
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+ bits <2> Pe4;
+ let Inst{6-5} = Pe4{1-0};
}
-class Enc_d7dc10 : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_d2216a : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
bits <5> Rtt32;
let Inst{12-8} = Rtt32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_6baed4 : OpcodeHexagon {
- bits <3> Ii;
- let Inst{10-8} = Ii{2-0};
- bits <2> Pv4;
- let Inst{12-11} = Pv4{1-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_d2c7f1 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <2> Pe4;
+ let Inst{6-5} = Pe4{1-0};
}
-class Enc_736575 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <4> n1;
- let Inst{28-28} = n1{3-3};
- let Inst{25-23} = n1{2-0};
+class Enc_5eac98 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_8dec2e : OpcodeHexagon {
+class Enc_927852 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_7e5a82 : OpcodeHexagon {
bits <5> Ii;
let Inst{12-8} = Ii{4-0};
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_65d691 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_454a26 : OpcodeHexagon {
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_5d6c34 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_cb4b4e : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_cda00a : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{19-16} = Ii{11-8};
+ let Inst{12-5} = Ii{7-0};
+ bits <2> Pu4;
+ let Inst{22-21} = Pu4{1-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_28dcbb : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vvv32;
- let Inst{4-0} = Vvv32{4-0};
+class Enc_bd0b33 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
}
-class Enc_eaa9f8 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <2> Qx4;
- let Inst{1-0} = Qx4{1-0};
+class Enc_c0cdde : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
}
-class Enc_509701 : OpcodeHexagon {
- bits <19> Ii;
- let Inst{26-25} = Ii{18-17};
- let Inst{20-16} = Ii{16-12};
- let Inst{13-5} = Ii{11-3};
+class Enc_78e566 : OpcodeHexagon {
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
@@ -270,80 +434,29 @@ class Enc_830e5d : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_79b8c8 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{6-3} = Ii{5-2};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
-class Enc_58a8bf : OpcodeHexagon {
- bits <3> Ii;
- let Inst{10-8} = Ii{2-0};
- bits <2> Pv4;
- let Inst{12-11} = Pv4{1-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
-class Enc_041d7b : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <5> n1;
- let Inst{28-28} = n1{4-4};
- let Inst{24-23} = n1{3-2};
- let Inst{13-13} = n1{1-1};
- let Inst{8-8} = n1{0-0};
+class Enc_f5e933 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_f44229 : OpcodeHexagon {
- bits <7> Ii;
- let Inst{13-13} = Ii{6-6};
- let Inst{7-3} = Ii{5-1};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
+class Enc_48b75f : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
-}
-class Enc_aad80c : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vdd32;
- let Inst{4-0} = Vdd32{4-0};
-}
-class Enc_87c142 : OpcodeHexagon {
- bits <7> Ii;
- let Inst{8-4} = Ii{6-2};
- bits <4> Rt16;
- let Inst{3-0} = Rt16{3-0};
-}
-class Enc_86a14b : OpcodeHexagon {
- bits <8> Ii;
- let Inst{7-3} = Ii{7-3};
- bits <3> Rdd8;
- let Inst{2-0} = Rdd8{2-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
}
-class Enc_9a33d5 : OpcodeHexagon {
- bits <7> Ii;
- let Inst{6-3} = Ii{6-3};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_527412 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_a56825 : OpcodeHexagon {
+class Enc_329361 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
bits <5> Rtt32;
@@ -351,81 +464,85 @@ class Enc_a56825 : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_9ea4cf : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{6-6} = Ii{0-0};
- bits <6> II;
- let Inst{5-0} = II{5-0};
- bits <5> Ru32;
- let Inst{20-16} = Ru32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
+class Enc_284ebb : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
}
-class Enc_ee5ed0 : OpcodeHexagon {
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
- bits <4> Rd16;
- let Inst{3-0} = Rd16{3-0};
- bits <2> n1;
- let Inst{9-8} = n1{1-0};
+class Enc_607661 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_bddee3 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vyyyy32;
- let Inst{4-0} = Vyyyy32{4-0};
- bits <3> Rx8;
- let Inst{18-16} = Rx8{2-0};
+class Enc_9ac432 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <2> Pu4;
+ let Inst{7-6} = Pu4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
}
-class Enc_935d9b : OpcodeHexagon {
+class Enc_1f19b5 : OpcodeHexagon {
bits <5> Ii;
- let Inst{6-3} = Ii{4-1};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+ let Inst{9-5} = Ii{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
}
-class Enc_61f0b0 : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rxx32;
- let Inst{4-0} = Rxx32{4-0};
+class Enc_e6c957 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_bd6011 : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
+class Enc_83ee64 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
-class Enc_65d691 : OpcodeHexagon {
- bits <2> Ps4;
- let Inst{17-16} = Ps4{1-0};
bits <2> Pd4;
let Inst{1-0} = Pd4{1-0};
}
-class Enc_e8c45e : OpcodeHexagon {
- bits <7> Ii;
- let Inst{13-13} = Ii{6-6};
- let Inst{7-3} = Ii{5-1};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
+class Enc_2ae154 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
}
-class Enc_ca3887 : OpcodeHexagon {
+class Enc_437f33 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_6c9440 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_890909 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <2> Pe4;
+ let Inst{6-5} = Pe4{1-0};
}
class Enc_a94f3b : OpcodeHexagon {
bits <5> Rs32;
@@ -437,51 +554,98 @@ class Enc_a94f3b : OpcodeHexagon {
bits <2> Pe4;
let Inst{6-5} = Pe4{1-0};
}
-class Enc_625deb : OpcodeHexagon {
- bits <4> Ii;
- let Inst{10-8} = Ii{3-1};
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
- bits <4> Rt16;
- let Inst{3-0} = Rt16{3-0};
+class Enc_0aa344 : OpcodeHexagon {
+ bits <5> Gss32;
+ let Inst{20-16} = Gss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_1f5ba6 : OpcodeHexagon {
- bits <4> Rd16;
- let Inst{3-0} = Rd16{3-0};
+class Enc_44271f : OpcodeHexagon {
+ bits <5> Gs32;
+ let Inst{20-16} = Gs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_cd82bc : OpcodeHexagon {
- bits <4> Ii;
- let Inst{21-21} = Ii{3-3};
- let Inst{7-5} = Ii{2-0};
- bits <6> II;
- let Inst{13-8} = II{5-0};
+class Enc_ed5027 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Gdd32;
+ let Inst{4-0} = Gdd32{4-0};
+}
+class Enc_621fba : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rx32;
- let Inst{4-0} = Rx32{4-0};
+ bits <5> Gd32;
+ let Inst{4-0} = Gd32{4-0};
}
-class Enc_399e12 : OpcodeHexagon {
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
- bits <3> Rdd8;
- let Inst{2-0} = Rdd8{2-0};
+class Enc_81ac1d : OpcodeHexagon {
+ bits <24> Ii;
+ let Inst{24-16} = Ii{23-15};
+ let Inst{13-1} = Ii{14-2};
}
-class Enc_d7a65e : OpcodeHexagon {
- bits <6> Ii;
- let Inst{12-7} = Ii{5-0};
- bits <6> II;
- let Inst{13-13} = II{5-5};
- let Inst{4-0} = II{4-0};
- bits <2> Pv4;
- let Inst{6-5} = Pv4{1-0};
+class Enc_daea09 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{23-22} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <2> Pu4;
+ let Inst{9-8} = Pu4{1-0};
+}
+class Enc_ecbcc8 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
}
-class Enc_607661 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{12-7} = Ii{5-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+class Enc_88d4d9 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{9-8} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_0fa531 : OpcodeHexagon {
+ bits <15> Ii;
+ let Inst{21-21} = Ii{14-14};
+ let Inst{13-13} = Ii{13-13};
+ let Inst{11-1} = Ii{12-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_4dc228 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{12-8} = Ii{8-4};
+ let Inst{4-3} = Ii{3-2};
+ bits <10> II;
+ let Inst{20-16} = II{9-5};
+ let Inst{7-5} = II{4-2};
+ let Inst{1-0} = II{1-0};
+}
+class Enc_864a5a : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{12-8} = Ii{8-4};
+ let Inst{4-3} = Ii{3-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_a51a9a : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-8} = Ii{7-3};
+ let Inst{4-2} = Ii{2-0};
+}
+class Enc_33f8ba : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-8} = Ii{7-3};
+ let Inst{4-2} = Ii{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_c9a18e : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
}
class Enc_6a5972 : OpcodeHexagon {
bits <11> Ii;
@@ -492,74 +656,56 @@ class Enc_6a5972 : OpcodeHexagon {
bits <4> Rt16;
let Inst{11-8} = Rt16{3-0};
}
-class Enc_ff3442 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{13-13} = Ii{3-3};
- let Inst{10-8} = Ii{2-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
+class Enc_eafd18 : OpcodeHexagon {
+ bits <5> II;
+ let Inst{12-8} = II{4-0};
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
}
-class Enc_53dca9 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{11-8} = Ii{5-2};
+class Enc_14d27a : OpcodeHexagon {
+ bits <5> II;
+ let Inst{12-8} = II{4-0};
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
- bits <4> Rd16;
- let Inst{3-0} = Rd16{3-0};
-}
-class Enc_27fd0e : OpcodeHexagon {
- bits <6> Ii;
- let Inst{8-5} = Ii{5-2};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
-class Enc_d7bc34 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <3> Rt8;
- let Inst{18-16} = Rt8{2-0};
- bits <5> Vyyyy32;
- let Inst{4-0} = Vyyyy32{4-0};
-}
-class Enc_93af4c : OpcodeHexagon {
- bits <7> Ii;
- let Inst{10-4} = Ii{6-0};
- bits <4> Rx16;
- let Inst{3-0} = Rx16{3-0};
-}
-class Enc_621fba : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Gd32;
- let Inst{4-0} = Gd32{4-0};
+ let Inst{19-16} = Rs16{3-0};
}
-class Enc_5bdd42 : OpcodeHexagon {
- bits <7> Ii;
- let Inst{8-5} = Ii{6-3};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_e90a15 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <4> n1;
+ let Inst{29-29} = n1{3-3};
+ let Inst{26-25} = n1{2-1};
+ let Inst{22-22} = n1{0-0};
}
-class Enc_ad9bef : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vxx32;
- let Inst{4-0} = Vxx32{4-0};
+class Enc_5a18b3 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> n1;
+ let Inst{29-29} = n1{4-4};
+ let Inst{26-25} = n1{3-2};
+ let Inst{22-22} = n1{1-1};
+ let Inst{13-13} = n1{0-0};
}
-class Enc_71f1b4 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{8-5} = Ii{5-2};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_1de724 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{24-22} = n1{2-0};
}
class Enc_14640c : OpcodeHexagon {
bits <11> Ii;
@@ -572,165 +718,215 @@ class Enc_14640c : OpcodeHexagon {
let Inst{24-22} = n1{3-1};
let Inst{13-13} = n1{0-0};
}
-class Enc_31db33 : OpcodeHexagon {
- bits <2> Qt4;
- let Inst{6-5} = Qt4{1-0};
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+class Enc_668704 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{25-22} = n1{3-0};
}
-class Enc_65f095 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{6-3} = Ii{5-2};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_800e04 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <6> n1;
+ let Inst{28-28} = n1{5-5};
+ let Inst{25-22} = n1{4-1};
+ let Inst{13-13} = n1{0-0};
}
-class Enc_784502 : OpcodeHexagon {
- bits <3> Ii;
- let Inst{10-8} = Ii{2-0};
- bits <2> Pv4;
- let Inst{12-11} = Pv4{1-0};
- bits <3> Os8;
- let Inst{2-0} = Os8{2-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_4aca3a : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <3> n1;
+ let Inst{29-29} = n1{2-2};
+ let Inst{26-25} = n1{1-0};
}
-class Enc_6413b6 : OpcodeHexagon {
+class Enc_f7ea77 : OpcodeHexagon {
bits <11> Ii;
let Inst{21-20} = Ii{10-9};
let Inst{7-1} = Ii{8-2};
bits <3> Ns8;
let Inst{18-16} = Ns8{2-0};
- bits <5> n1;
- let Inst{29-29} = n1{4-4};
- let Inst{26-25} = n1{3-2};
- let Inst{23-23} = n1{1-1};
+ bits <4> n1;
+ let Inst{29-29} = n1{3-3};
+ let Inst{26-25} = n1{2-1};
let Inst{13-13} = n1{0-0};
}
-class Enc_7a0ea6 : OpcodeHexagon {
- bits <4> Rd16;
- let Inst{3-0} = Rd16{3-0};
- bits <1> n1;
- let Inst{9-9} = n1{0-0};
+class Enc_405228 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <3> n1;
+ let Inst{28-28} = n1{2-2};
+ let Inst{24-23} = n1{1-0};
}
-class Enc_84bff1 : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{7-7} = Ii{0-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+class Enc_3a2484 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{24-23} = n1{2-1};
+ let Inst{13-13} = n1{0-0};
}
-class Enc_f4413a : OpcodeHexagon {
- bits <4> Ii;
- let Inst{8-5} = Ii{3-0};
- bits <2> Pt4;
- let Inst{10-9} = Pt4{1-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_736575 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{25-23} = n1{2-0};
}
-class Enc_78e566 : OpcodeHexagon {
- bits <2> Pt4;
- let Inst{9-8} = Pt4{1-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+class Enc_8e583a : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{25-23} = n1{3-1};
+ let Inst{13-13} = n1{0-0};
}
-class Enc_437f33 : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <2> Pu4;
- let Inst{6-5} = Pu4{1-0};
- bits <5> Rx32;
- let Inst{4-0} = Rx32{4-0};
+class Enc_3694bd : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> n1;
+ let Inst{29-29} = n1{4-4};
+ let Inst{26-25} = n1{3-2};
+ let Inst{23-22} = n1{1-0};
}
-class Enc_0527db : OpcodeHexagon {
+class Enc_a6853f : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <6> n1;
+ let Inst{29-29} = n1{5-5};
+ let Inst{26-25} = n1{4-3};
+ let Inst{23-22} = n1{2-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_a42857 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
- bits <4> Rx16;
- let Inst{3-0} = Rx16{3-0};
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{24-22} = n1{3-1};
+ let Inst{8-8} = n1{0-0};
}
-class Enc_420cf3 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{22-21} = Ii{5-4};
- let Inst{13-13} = Ii{3-3};
- let Inst{7-5} = Ii{2-0};
- bits <5> Ru32;
- let Inst{4-0} = Ru32{4-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{12-8} = Rd32{4-0};
+class Enc_f6fe0b : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <6> n1;
+ let Inst{28-28} = n1{5-5};
+ let Inst{24-22} = n1{4-2};
+ let Inst{13-13} = n1{1-1};
+ let Inst{8-8} = n1{0-0};
}
-class Enc_e39bb2 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{9-4} = Ii{5-0};
- bits <4> Rd16;
- let Inst{3-0} = Rd16{3-0};
+class Enc_3e3989 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <6> n1;
+ let Inst{28-28} = n1{5-5};
+ let Inst{25-22} = n1{4-1};
+ let Inst{8-8} = n1{0-0};
}
-class Enc_1b64fb : OpcodeHexagon {
- bits <16> Ii;
- let Inst{26-25} = Ii{15-14};
- let Inst{20-16} = Ii{13-9};
- let Inst{13-13} = Ii{8-8};
- let Inst{7-0} = Ii{7-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
+class Enc_b909d2 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <7> n1;
+ let Inst{28-28} = n1{6-6};
+ let Inst{25-22} = n1{5-2};
+ let Inst{13-13} = n1{1-1};
+ let Inst{8-8} = n1{0-0};
}
-class Enc_c1d806 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
- bits <2> Qe4;
- let Inst{6-5} = Qe4{1-0};
+class Enc_f82302 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <4> n1;
+ let Inst{29-29} = n1{3-3};
+ let Inst{26-25} = n1{2-1};
+ let Inst{23-23} = n1{0-0};
}
-class Enc_c6220b : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{7-7} = Ii{0-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Ru32;
- let Inst{12-8} = Ru32{4-0};
- bits <3> Nt8;
- let Inst{2-0} = Nt8{2-0};
+class Enc_6413b6 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> n1;
+ let Inst{29-29} = n1{4-4};
+ let Inst{26-25} = n1{3-2};
+ let Inst{23-23} = n1{1-1};
+ let Inst{13-13} = n1{0-0};
}
-class Enc_322e1b : OpcodeHexagon {
- bits <6> Ii;
- let Inst{22-21} = Ii{5-4};
- let Inst{13-13} = Ii{3-3};
- let Inst{7-5} = Ii{2-0};
- bits <6> II;
- let Inst{23-23} = II{5-5};
- let Inst{4-0} = II{4-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{12-8} = Rd32{4-0};
+class Enc_b78edd : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{24-23} = n1{2-1};
+ let Inst{8-8} = n1{0-0};
}
-class Enc_989021 : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vy32;
- let Inst{12-8} = Vy32{4-0};
- bits <5> Vx32;
- let Inst{4-0} = Vx32{4-0};
+class Enc_041d7b : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{24-23} = n1{3-2};
+ let Inst{13-13} = n1{1-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_b1e1fb : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{25-23} = n1{3-1};
+ let Inst{8-8} = n1{0-0};
}
class Enc_178717 : OpcodeHexagon {
bits <11> Ii;
@@ -744,62 +940,63 @@ class Enc_178717 : OpcodeHexagon {
let Inst{13-13} = n1{1-1};
let Inst{8-8} = n1{0-0};
}
-class Enc_78cbf0 : OpcodeHexagon {
- bits <18> Ii;
- let Inst{26-25} = Ii{17-16};
- let Inst{20-16} = Ii{15-11};
- let Inst{13-13} = Ii{10-10};
- let Inst{7-0} = Ii{9-2};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
-}
-class Enc_052c7d : OpcodeHexagon {
- bits <5> Ii;
- let Inst{6-3} = Ii{4-1};
+class Enc_5de85f : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
}
-class Enc_fcf7a7 : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
+class Enc_9e4c3f : OpcodeHexagon {
+ bits <6> II;
+ let Inst{13-8} = II{5-0};
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rd16;
+ let Inst{19-16} = Rd16{3-0};
}
-class Enc_55355c : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{7-7} = Ii{0-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Ru32;
- let Inst{12-8} = Ru32{4-0};
- bits <5> Rtt32;
- let Inst{4-0} = Rtt32{4-0};
+class Enc_66bce1 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{11-8} = Rd16{3-0};
}
-class Enc_211aaa : OpcodeHexagon {
+class Enc_69d63b : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+}
+class Enc_ad1c74 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+}
+class Enc_a27588 : OpcodeHexagon {
bits <11> Ii;
let Inst{26-25} = Ii{10-9};
let Inst{13-5} = Ii{8-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
}
-class Enc_6185fe : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{7-7} = Ii{0-0};
- bits <6> II;
- let Inst{11-8} = II{5-2};
- let Inst{6-5} = II{1-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+class Enc_1f5d8f : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
class Enc_74aef2 : OpcodeHexagon {
bits <4> Ii;
@@ -811,127 +1008,123 @@ class Enc_74aef2 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_cd4705 : OpcodeHexagon {
- bits <3> Ii;
- let Inst{7-5} = Ii{2-0};
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vx32;
- let Inst{4-0} = Vx32{4-0};
+class Enc_6b197f : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_2ebe3b : OpcodeHexagon {
+class Enc_5cd7e9 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{26-25} = Ii{11-10};
+ let Inst{13-5} = Ii{9-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+}
+class Enc_9e2e1c : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_3d5b28 : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+class Enc_bd1cbc : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_5ab2be : OpcodeHexagon {
+class Enc_de0214 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{26-25} = Ii{11-10};
+ let Inst{13-5} = Ii{9-1};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_fef969 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{20-16} = Ii{5-1};
- let Inst{5-5} = Ii{0-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
+class Enc_74d4e5 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_63eaeb : OpcodeHexagon {
- bits <2> Ii;
- let Inst{1-0} = Ii{1-0};
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
+class Enc_e83554 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_95441f : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <2> Qd4;
- let Inst{1-0} = Qd4{1-0};
+class Enc_152467 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_372c9d : OpcodeHexagon {
- bits <2> Pv4;
- let Inst{12-11} = Pv4{1-0};
+class Enc_2d7491 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-5} = Ii{10-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_7eee72 : OpcodeHexagon {
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
- bits <3> Os8;
- let Inst{2-0} = Os8{2-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_4dff07 : OpcodeHexagon {
- bits <2> Qv4;
- let Inst{12-11} = Qv4{1-0};
+class Enc_70b24b : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
- bits <5> Vs32;
- let Inst{4-0} = Vs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_04c959 : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{7-7} = Ii{0-0};
- bits <6> II;
- let Inst{11-8} = II{5-2};
- let Inst{6-5} = II{1-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
-}
-class Enc_b62ef7 : OpcodeHexagon {
- bits <3> Ii;
- let Inst{10-8} = Ii{2-0};
- bits <5> Vs32;
- let Inst{4-0} = Vs32{4-0};
+class Enc_71f1b4 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_2b518f : OpcodeHexagon {
- bits <32> Ii;
- let Inst{27-16} = Ii{31-20};
- let Inst{13-0} = Ii{19-6};
-}
-class Enc_b388cf : OpcodeHexagon {
- bits <5> Ii;
- let Inst{12-8} = Ii{4-0};
- bits <5> II;
- let Inst{22-21} = II{4-3};
- let Inst{7-5} = II{2-0};
+class Enc_211aaa : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{26-25} = Ii{10-9};
+ let Inst{13-5} = Ii{8-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_ad1c74 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
-}
-class Enc_74d4e5 : OpcodeHexagon {
+class Enc_e0a47a : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
bits <5> Rd32;
@@ -939,14 +1132,6 @@ class Enc_74d4e5 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_c90aca : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-5} = Ii{7-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rx32;
- let Inst{4-0} = Rx32{4-0};
-}
class Enc_222336 : OpcodeHexagon {
bits <4> Ii;
let Inst{8-5} = Ii{3-0};
@@ -955,338 +1140,162 @@ class Enc_222336 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_5e87ce : OpcodeHexagon {
+class Enc_25bef0 : OpcodeHexagon {
bits <16> Ii;
- let Inst{23-22} = Ii{15-14};
+ let Inst{26-25} = Ii{15-14};
let Inst{20-16} = Ii{13-9};
let Inst{13-5} = Ii{8-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_158beb : OpcodeHexagon {
- bits <2> Qs4;
- let Inst{6-5} = Qs4{1-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vv32;
- let Inst{4-0} = Vv32{4-0};
-}
-class Enc_f7ea77 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <3> Ns8;
- let Inst{18-16} = Ns8{2-0};
- bits <4> n1;
- let Inst{29-29} = n1{3-3};
- let Inst{26-25} = n1{2-1};
- let Inst{13-13} = n1{0-0};
-}
-class Enc_245865 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{23-19} = Vv32{4-0};
- bits <3> Rt8;
- let Inst{18-16} = Rt8{2-0};
- bits <5> Vx32;
- let Inst{4-0} = Vx32{4-0};
-}
-class Enc_88d4d9 : OpcodeHexagon {
- bits <2> Pu4;
- let Inst{9-8} = Pu4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
-}
-class Enc_226535 : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-7} = Ii{7-2};
+class Enc_fa3ba4 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{26-25} = Ii{13-12};
+ let Inst{13-5} = Ii{11-3};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rt32;
- let Inst{4-0} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_31aa6a : OpcodeHexagon {
- bits <5> Ii;
- let Inst{6-3} = Ii{4-1};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
+class Enc_b05839 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-5} = Ii{6-3};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_397f23 : OpcodeHexagon {
- bits <8> Ii;
- let Inst{13-13} = Ii{7-7};
- let Inst{7-3} = Ii{6-2};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
-}
-class Enc_865390 : OpcodeHexagon {
- bits <3> Ii;
- let Inst{10-8} = Ii{2-0};
- bits <2> Pv4;
- let Inst{12-11} = Pv4{1-0};
- bits <5> Vs32;
- let Inst{4-0} = Vs32{4-0};
+class Enc_5bdd42 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-5} = Ii{6-3};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_98c0b8 : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{7-7} = Ii{0-0};
- bits <2> Pv4;
- let Inst{6-5} = Pv4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
+class Enc_509701 : OpcodeHexagon {
+ bits <19> Ii;
+ let Inst{26-25} = Ii{18-17};
+ let Inst{20-16} = Ii{16-12};
+ let Inst{13-5} = Ii{11-3};
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_bfbf03 : OpcodeHexagon {
- bits <2> Qs4;
- let Inst{9-8} = Qs4{1-0};
- bits <2> Qd4;
- let Inst{1-0} = Qd4{1-0};
+class Enc_8df4be : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{26-25} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-5} = Ii{9-1};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_ecbcc8 : OpcodeHexagon {
+class Enc_2a3787 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-5} = Ii{10-2};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
-}
-class Enc_f5e933 : OpcodeHexagon {
- bits <2> Ps4;
- let Inst{17-16} = Ps4{1-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_3fc427 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vxx32;
- let Inst{4-0} = Vxx32{4-0};
-}
-class Enc_01d3d0 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vdd32;
- let Inst{4-0} = Vdd32{4-0};
-}
-class Enc_b0e9d8 : OpcodeHexagon {
- bits <10> Ii;
- let Inst{21-21} = Ii{9-9};
- let Inst{13-5} = Ii{8-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_27fd0e : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
bits <5> Rx32;
- let Inst{4-0} = Rx32{4-0};
-}
-class Enc_1bd127 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <3> Rt8;
- let Inst{18-16} = Rt8{2-0};
- bits <5> Vdddd32;
- let Inst{4-0} = Vdddd32{4-0};
-}
-class Enc_3694bd : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <3> Ns8;
- let Inst{18-16} = Ns8{2-0};
- bits <5> n1;
- let Inst{29-29} = n1{4-4};
- let Inst{26-25} = n1{3-2};
- let Inst{23-22} = n1{1-0};
-}
-class Enc_a42857 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <5> n1;
- let Inst{28-28} = n1{4-4};
- let Inst{24-22} = n1{3-1};
- let Inst{8-8} = n1{0-0};
-}
-class Enc_b7fad3 : OpcodeHexagon {
- bits <2> Pv4;
- let Inst{9-8} = Pv4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_223005 : OpcodeHexagon {
+class Enc_3d920a : OpcodeHexagon {
bits <6> Ii;
- let Inst{6-3} = Ii{5-2};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
+ let Inst{8-5} = Ii{5-2};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_9e4c3f : OpcodeHexagon {
- bits <6> II;
- let Inst{13-8} = II{5-0};
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rd16;
- let Inst{19-16} = Rd16{3-0};
+class Enc_4f4ed7 : OpcodeHexagon {
+ bits <18> Ii;
+ let Inst{26-25} = Ii{17-16};
+ let Inst{20-16} = Ii{15-11};
+ let Inst{13-5} = Ii{10-2};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_8b8d61 : OpcodeHexagon {
+class Enc_a21d47 : OpcodeHexagon {
bits <6> Ii;
- let Inst{22-21} = Ii{5-4};
- let Inst{13-13} = Ii{3-3};
- let Inst{7-5} = Ii{2-0};
+ let Inst{10-5} = Ii{5-0};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Ru32;
- let Inst{4-0} = Ru32{4-0};
bits <5> Rd32;
- let Inst{12-8} = Rd32{4-0};
-}
-class Enc_88c16c : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
- bits <5> Rxx32;
- let Inst{4-0} = Rxx32{4-0};
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_770858 : OpcodeHexagon {
- bits <2> Ps4;
- let Inst{6-5} = Ps4{1-0};
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+class Enc_f4413a : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_bd811a : OpcodeHexagon {
+class Enc_acd6ed : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{10-5} = Ii{8-3};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Cd32;
- let Inst{4-0} = Cd32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_b05839 : OpcodeHexagon {
+class Enc_9d1247 : OpcodeHexagon {
bits <7> Ii;
let Inst{8-5} = Ii{6-3};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_bc03e5 : OpcodeHexagon {
- bits <17> Ii;
- let Inst{26-25} = Ii{16-15};
- let Inst{20-16} = Ii{14-10};
- let Inst{13-13} = Ii{9-9};
- let Inst{7-0} = Ii{8-1};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
-}
-class Enc_412ff0 : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Ru32;
- let Inst{4-0} = Ru32{4-0};
- bits <5> Rxx32;
- let Inst{12-8} = Rxx32{4-0};
-}
-class Enc_ef601b : OpcodeHexagon {
- bits <4> Ii;
- let Inst{13-13} = Ii{3-3};
- let Inst{10-8} = Ii{2-0};
- bits <2> Pv4;
- let Inst{12-11} = Pv4{1-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
-}
-class Enc_c9a18e : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <3> Ns8;
- let Inst{18-16} = Ns8{2-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
-}
-class Enc_be32a5 : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
-class Enc_e6abcf : OpcodeHexagon {
+class Enc_a198f6 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{10-5} = Ii{6-1};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
-}
-class Enc_d6990d : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vxx32;
- let Inst{4-0} = Vxx32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_6c9440 : OpcodeHexagon {
- bits <10> Ii;
- let Inst{21-21} = Ii{9-9};
- let Inst{13-5} = Ii{8-0};
+class Enc_733b27 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_0d8adb : OpcodeHexagon {
+class Enc_f82eaf : OpcodeHexagon {
bits <8> Ii;
- let Inst{12-5} = Ii{7-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
-}
-class Enc_50e578 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
+ let Inst{10-5} = Ii{7-2};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_1cf4ca : OpcodeHexagon {
- bits <6> Ii;
- let Inst{17-16} = Ii{5-4};
- let Inst{6-3} = Ii{3-0};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
-}
-class Enc_48b75f : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
-}
class Enc_b97f71 : OpcodeHexagon {
bits <6> Ii;
let Inst{8-5} = Ii{5-2};
@@ -1297,379 +1306,267 @@ class Enc_b97f71 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_9d1247 : OpcodeHexagon {
- bits <7> Ii;
- let Inst{8-5} = Ii{6-3};
- bits <2> Pt4;
- let Inst{10-9} = Pt4{1-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
-class Enc_7b7ba8 : OpcodeHexagon {
- bits <2> Qu4;
- let Inst{9-8} = Qu4{1-0};
+class Enc_d44e31 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+ let Inst{4-0} = Rt32{4-0};
}
-class Enc_f7430e : OpcodeHexagon {
- bits <4> Ii;
- let Inst{13-13} = Ii{3-3};
- let Inst{10-8} = Ii{2-0};
- bits <2> Pv4;
- let Inst{12-11} = Pv4{1-0};
+class Enc_163a3c : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{12-7} = Ii{6-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <3> Os8;
- let Inst{2-0} = Os8{2-0};
-}
-class Enc_e7581c : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
-}
-class Enc_2301d6 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{20-16} = Ii{5-1};
- let Inst{8-8} = Ii{0-0};
- bits <2> Pt4;
- let Inst{10-9} = Pt4{1-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+ let Inst{4-0} = Rt32{4-0};
}
-class Enc_c31910 : OpcodeHexagon {
+class Enc_226535 : OpcodeHexagon {
bits <8> Ii;
- let Inst{23-21} = Ii{7-5};
- let Inst{13-13} = Ii{4-4};
- let Inst{7-5} = Ii{3-1};
- let Inst{3-3} = Ii{0-0};
- bits <5> II;
- let Inst{12-8} = II{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
-class Enc_2f2f04 : OpcodeHexagon {
- bits <1> Ii;
- let Inst{5-5} = Ii{0-0};
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vdd32;
- let Inst{4-0} = Vdd32{4-0};
-}
-class Enc_8d8a30 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{13-13} = Ii{3-3};
- let Inst{10-8} = Ii{2-0};
- bits <2> Pv4;
- let Inst{12-11} = Pv4{1-0};
+ let Inst{12-7} = Ii{7-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+ let Inst{4-0} = Rt32{4-0};
}
-class Enc_2d7491 : OpcodeHexagon {
- bits <13> Ii;
- let Inst{26-25} = Ii{12-11};
- let Inst{13-5} = Ii{10-2};
+class Enc_46c951 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <5> II;
+ let Inst{4-0} = II{4-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
}
-class Enc_a803e0 : OpcodeHexagon {
+class Enc_e66a97 : OpcodeHexagon {
bits <7> Ii;
let Inst{12-7} = Ii{6-1};
- bits <8> II;
- let Inst{13-13} = II{7-7};
- let Inst{6-0} = II{6-0};
+ bits <5> II;
+ let Inst{4-0} = II{4-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
}
-class Enc_45364e : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+class Enc_84b2cd : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-7} = Ii{7-2};
+ bits <5> II;
+ let Inst{4-0} = II{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
}
-class Enc_b909d2 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <7> n1;
- let Inst{28-28} = n1{6-6};
- let Inst{25-22} = n1{5-2};
- let Inst{13-13} = n1{1-1};
- let Inst{8-8} = n1{0-0};
+class Enc_f394d3 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
}
-class Enc_e6c957 : OpcodeHexagon {
- bits <10> Ii;
- let Inst{21-21} = Ii{9-9};
- let Inst{13-5} = Ii{8-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+class Enc_04c959 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
}
-class Enc_0d8870 : OpcodeHexagon {
- bits <12> Ii;
- let Inst{26-25} = Ii{11-10};
- let Inst{13-13} = Ii{9-9};
- let Inst{7-0} = Ii{8-1};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
+class Enc_323f2d : OpcodeHexagon {
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
}
-class Enc_9fae8a : OpcodeHexagon {
- bits <6> Ii;
- let Inst{13-8} = Ii{5-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_4f677b : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_18c338 : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-5} = Ii{7-0};
- bits <8> II;
- let Inst{22-16} = II{7-1};
- let Inst{13-13} = II{0-0};
+class Enc_7fa7f6 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
}
-class Enc_5ccba9 : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-7} = Ii{7-2};
+class Enc_6185fe : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
bits <6> II;
- let Inst{13-13} = II{5-5};
- let Inst{4-0} = II{4-0};
- bits <2> Pv4;
- let Inst{6-5} = Pv4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
-}
-class Enc_0ed752 : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Cdd32;
- let Inst{4-0} = Cdd32{4-0};
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_143445 : OpcodeHexagon {
- bits <13> Ii;
- let Inst{26-25} = Ii{12-11};
- let Inst{13-13} = Ii{10-10};
- let Inst{7-0} = Ii{9-2};
+class Enc_da664b : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_3a3d62 : OpcodeHexagon {
+class Enc_84bff1 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_3e3989 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <6> n1;
- let Inst{28-28} = n1{5-5};
- let Inst{25-22} = n1{4-1};
- let Inst{8-8} = n1{0-0};
+class Enc_2301d6 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{20-16} = Ii{5-1};
+ let Inst{8-8} = Ii{0-0};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_152467 : OpcodeHexagon {
- bits <5> Ii;
- let Inst{8-5} = Ii{4-1};
+class Enc_2e1979 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
}
-class Enc_9ac432 : OpcodeHexagon {
- bits <2> Ps4;
- let Inst{17-16} = Ps4{1-0};
+class Enc_2a7b91 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{20-16} = Ii{5-1};
+ let Inst{8-8} = Ii{0-0};
bits <2> Pt4;
- let Inst{9-8} = Pt4{1-0};
- bits <2> Pu4;
- let Inst{7-6} = Pu4{1-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
-}
-class Enc_a90628 : OpcodeHexagon {
- bits <2> Qv4;
- let Inst{23-22} = Qv4{1-0};
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vx32;
- let Inst{4-0} = Vx32{4-0};
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_f37377 : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-7} = Ii{7-2};
- bits <8> II;
- let Inst{13-13} = II{7-7};
- let Inst{6-0} = II{6-0};
+class Enc_98c0b8 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_a198f6 : OpcodeHexagon {
- bits <7> Ii;
- let Inst{10-5} = Ii{6-1};
- bits <2> Pt4;
- let Inst{12-11} = Pt4{1-0};
+class Enc_b7fad3 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{9-8} = Pv4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_4e4a80 : OpcodeHexagon {
- bits <2> Qs4;
- let Inst{6-5} = Qs4{1-0};
+class Enc_a75aa6 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
+ let Inst{12-8} = Rt32{4-0};
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
- bits <5> Vvv32;
- let Inst{4-0} = Vvv32{4-0};
-}
-class Enc_3dac0b : OpcodeHexagon {
- bits <2> Qt4;
- let Inst{6-5} = Qt4{1-0};
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vdd32;
- let Inst{4-0} = Vdd32{4-0};
}
-class Enc_e38e1f : OpcodeHexagon {
+class Enc_c90aca : OpcodeHexagon {
bits <8> Ii;
let Inst{12-5} = Ii{7-0};
- bits <2> Pu4;
- let Inst{22-21} = Pu4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
-class Enc_f8ecf9 : OpcodeHexagon {
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
- bits <5> Vvv32;
- let Inst{20-16} = Vvv32{4-0};
- bits <5> Vdd32;
- let Inst{4-0} = Vdd32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
}
-class Enc_7f1a05 : OpcodeHexagon {
- bits <5> Ru32;
- let Inst{4-0} = Ru32{4-0};
+class Enc_61f0b0 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Ry32;
- let Inst{12-8} = Ry32{4-0};
-}
-class Enc_2df31d : OpcodeHexagon {
- bits <8> Ii;
- let Inst{9-4} = Ii{7-2};
- bits <4> Rd16;
- let Inst{3-0} = Rd16{3-0};
-}
-class Enc_25bef0 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{26-25} = Ii{15-14};
- let Inst{20-16} = Ii{13-9};
- let Inst{13-5} = Ii{8-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
-class Enc_f82302 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <3> Ns8;
- let Inst{18-16} = Ns8{2-0};
- bits <4> n1;
- let Inst{29-29} = n1{3-3};
- let Inst{26-25} = n1{2-1};
- let Inst{23-23} = n1{0-0};
-}
-class Enc_44271f : OpcodeHexagon {
- bits <5> Gs32;
- let Inst{20-16} = Gs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
}
-class Enc_83ee64 : OpcodeHexagon {
- bits <5> Ii;
- let Inst{12-8} = Ii{4-0};
+class Enc_a568d4 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
}
-class Enc_adf111 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
+class Enc_3d5b28 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <2> Qx4;
- let Inst{1-0} = Qx4{1-0};
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_46c951 : OpcodeHexagon {
+class Enc_322e1b : OpcodeHexagon {
bits <6> Ii;
- let Inst{12-7} = Ii{5-0};
- bits <5> II;
+ let Inst{22-21} = Ii{5-4};
+ let Inst{13-13} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <6> II;
+ let Inst{23-23} = II{5-5};
let Inst{4-0} = II{4-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
}
-class Enc_5d6c34 : OpcodeHexagon {
+class Enc_420cf3 : OpcodeHexagon {
bits <6> Ii;
- let Inst{13-8} = Ii{5-0};
+ let Inst{22-21} = Ii{5-4};
+ let Inst{13-13} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
}
-class Enc_4df4e9 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{26-25} = Ii{10-9};
- let Inst{13-13} = Ii{8-8};
- let Inst{7-0} = Ii{7-0};
+class Enc_277737 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{22-21} = Ii{7-6};
+ let Inst{13-13} = Ii{5-5};
+ let Inst{7-5} = Ii{4-2};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
-}
-class Enc_263841 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
-}
-class Enc_91b9fe : OpcodeHexagon {
- bits <5> Ii;
- let Inst{6-3} = Ii{4-1};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
}
class Enc_a7b8e8 : OpcodeHexagon {
bits <6> Ii;
@@ -1683,175 +1580,151 @@ class Enc_a7b8e8 : OpcodeHexagon {
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_2b3f60 : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
+class Enc_7f1a05 : OpcodeHexagon {
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ry32;
+ let Inst{12-8} = Ry32{4-0};
+}
+class Enc_1b64fb : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{26-25} = Ii{15-14};
+ let Inst{20-16} = Ii{13-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_ad1831 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{26-25} = Ii{15-14};
+ let Inst{20-16} = Ii{13-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_5c124a : OpcodeHexagon {
+ bits <19> Ii;
+ let Inst{26-25} = Ii{18-17};
+ let Inst{20-16} = Ii{16-12};
+ let Inst{13-13} = Ii{11-11};
+ let Inst{7-0} = Ii{10-3};
bits <5> Rtt32;
let Inst{12-8} = Rtt32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
- bits <2> Px4;
- let Inst{6-5} = Px4{1-0};
}
-class Enc_bd1cbc : OpcodeHexagon {
- bits <5> Ii;
- let Inst{8-5} = Ii{4-1};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
-}
-class Enc_c85e2a : OpcodeHexagon {
- bits <5> Ii;
- let Inst{12-8} = Ii{4-0};
- bits <5> II;
- let Inst{22-21} = II{4-3};
- let Inst{7-5} = II{2-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+class Enc_fda92c : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{26-25} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
}
-class Enc_a30110 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{23-19} = Vv32{4-0};
- bits <3> Rt8;
- let Inst{18-16} = Rt8{2-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+class Enc_bc03e5 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{26-25} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
}
-class Enc_33f8ba : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-8} = Ii{7-3};
- let Inst{4-2} = Ii{2-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_541f26 : OpcodeHexagon {
+ bits <18> Ii;
+ let Inst{26-25} = Ii{17-16};
+ let Inst{20-16} = Ii{15-11};
+ let Inst{13-13} = Ii{10-10};
+ let Inst{7-0} = Ii{9-2};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
}
-class Enc_690862 : OpcodeHexagon {
- bits <13> Ii;
- let Inst{26-25} = Ii{12-11};
+class Enc_78cbf0 : OpcodeHexagon {
+ bits <18> Ii;
+ let Inst{26-25} = Ii{17-16};
+ let Inst{20-16} = Ii{15-11};
let Inst{13-13} = Ii{10-10};
let Inst{7-0} = Ii{9-2};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
bits <3> Nt8;
let Inst{10-8} = Nt8{2-0};
}
-class Enc_2a3787 : OpcodeHexagon {
- bits <13> Ii;
- let Inst{26-25} = Ii{12-11};
- let Inst{13-5} = Ii{10-2};
+class Enc_47ef61 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_d5c73f : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
+class Enc_22c845 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{10-0} = Ii{13-3};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_3f97c8 : OpcodeHexagon {
+class Enc_70fb07 : OpcodeHexagon {
bits <6> Ii;
- let Inst{6-3} = Ii{5-2};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_28a2dc : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+ let Inst{4-0} = Rx32{4-0};
}
-class Enc_d50cd3 : OpcodeHexagon {
- bits <3> Ii;
- let Inst{7-5} = Ii{2-0};
+class Enc_12b6e9 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_729ff7 : OpcodeHexagon {
- bits <3> Ii;
- let Inst{7-5} = Ii{2-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
+class Enc_1aa186 : OpcodeHexagon {
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
-class Enc_217147 : OpcodeHexagon {
- bits <2> Qv4;
- let Inst{23-22} = Qv4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
}
-class Enc_b9c5fb : OpcodeHexagon {
+class Enc_8dec2e : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
-class Enc_f394d3 : OpcodeHexagon {
- bits <6> II;
- let Inst{11-8} = II{5-2};
- let Inst{6-5} = II{1-0};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
- bits <5> Re32;
- let Inst{20-16} = Re32{4-0};
-}
-class Enc_0cb018 : OpcodeHexagon {
- bits <5> Cs32;
- let Inst{20-16} = Cs32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_541f26 : OpcodeHexagon {
- bits <18> Ii;
- let Inst{26-25} = Ii{17-16};
- let Inst{20-16} = Ii{15-11};
- let Inst{13-13} = Ii{10-10};
- let Inst{7-0} = Ii{9-2};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
-}
-class Enc_724154 : OpcodeHexagon {
- bits <6> II;
- let Inst{5-0} = II{5-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
- bits <5> Re32;
- let Inst{20-16} = Re32{4-0};
-}
-class Enc_179b35 : OpcodeHexagon {
+class Enc_b388cf : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> II;
+ let Inst{22-21} = II{4-3};
+ let Inst{7-5} = II{2-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
- bits <5> Rx32;
- let Inst{4-0} = Rx32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_585242 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{13-13} = Ii{5-5};
- let Inst{7-3} = Ii{4-0};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
+class Enc_e07374 : OpcodeHexagon {
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
-}
-class Enc_cf1927 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <3> Os8;
- let Inst{2-0} = Os8{2-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
class Enc_b84c4c : OpcodeHexagon {
bits <6> Ii;
@@ -1864,121 +1737,87 @@ class Enc_b84c4c : OpcodeHexagon {
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_8203bb : OpcodeHexagon {
- bits <6> Ii;
- let Inst{12-7} = Ii{5-0};
- bits <8> II;
- let Inst{13-13} = II{7-7};
- let Inst{6-0} = II{6-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
-}
-class Enc_e66a97 : OpcodeHexagon {
- bits <7> Ii;
- let Inst{12-7} = Ii{6-1};
+class Enc_a1e29d : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
bits <5> II;
- let Inst{4-0} = II{4-0};
+ let Inst{22-21} = II{4-3};
+ let Inst{7-5} = II{2-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
-}
-class Enc_8c2412 : OpcodeHexagon {
- bits <2> Ps4;
- let Inst{6-5} = Ps4{1-0};
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vdd32;
- let Inst{4-0} = Vdd32{4-0};
-}
-class Enc_284ebb : OpcodeHexagon {
- bits <2> Ps4;
- let Inst{17-16} = Ps4{1-0};
- bits <2> Pt4;
- let Inst{9-8} = Pt4{1-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
-}
-class Enc_733b27 : OpcodeHexagon {
- bits <5> Ii;
- let Inst{8-5} = Ii{4-1};
- bits <2> Pt4;
- let Inst{10-9} = Pt4{1-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+ let Inst{4-0} = Rx32{4-0};
}
-class Enc_22c845 : OpcodeHexagon {
- bits <14> Ii;
- let Inst{10-0} = Ii{13-3};
+class Enc_179b35 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+ let Inst{4-0} = Rx32{4-0};
}
-class Enc_ed5027 : OpcodeHexagon {
+class Enc_143a3c : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <6> II;
+ let Inst{23-21} = II{5-3};
+ let Inst{7-5} = II{2-0};
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
- bits <5> Gdd32;
- let Inst{4-0} = Gdd32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
}
-class Enc_9b0bc1 : OpcodeHexagon {
- bits <2> Pu4;
- let Inst{6-5} = Pu4{1-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_c85e2a : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> II;
+ let Inst{22-21} = II{4-3};
+ let Inst{7-5} = II{2-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_ea4c54 : OpcodeHexagon {
- bits <2> Pu4;
- let Inst{6-5} = Pu4{1-0};
+class Enc_da8d43 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-13} = Ii{5-5};
+ let Inst{7-3} = Ii{4-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
}
-class Enc_b72622 : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{5-5} = Ii{0-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
+class Enc_cc449f : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
- bits <5> Rxx32;
- let Inst{4-0} = Rxx32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_569cfe : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vx32;
- let Inst{4-0} = Vx32{4-0};
+class Enc_585242 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-13} = Ii{5-5};
+ let Inst{7-3} = Ii{4-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
}
-class Enc_96ce4f : OpcodeHexagon {
+class Enc_52a5dd : OpcodeHexagon {
bits <4> Ii;
let Inst{6-3} = Ii{3-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
bits <3> Nt8;
let Inst{10-8} = Nt8{2-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_143a3c : OpcodeHexagon {
- bits <6> Ii;
- let Inst{13-8} = Ii{5-0};
- bits <6> II;
- let Inst{23-21} = II{5-3};
- let Inst{7-5} = II{2-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rxx32;
- let Inst{4-0} = Rxx32{4-0};
-}
class Enc_57a33e : OpcodeHexagon {
bits <9> Ii;
let Inst{13-13} = Ii{8-8};
@@ -1990,206 +1829,145 @@ class Enc_57a33e : OpcodeHexagon {
bits <5> Rtt32;
let Inst{12-8} = Rtt32{4-0};
}
-class Enc_311abd : OpcodeHexagon {
- bits <5> Ii;
- let Inst{12-8} = Ii{4-0};
+class Enc_9a33d5 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{6-3} = Ii{6-3};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_e8c45e : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{13-13} = Ii{6-6};
+ let Inst{7-3} = Ii{5-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
}
-class Enc_a1640c : OpcodeHexagon {
- bits <6> Ii;
- let Inst{13-8} = Ii{5-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+class Enc_b886fd : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_de0214 : OpcodeHexagon {
- bits <12> Ii;
- let Inst{26-25} = Ii{11-10};
- let Inst{13-5} = Ii{9-1};
+class Enc_f44229 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{13-13} = Ii{6-6};
+ let Inst{7-3} = Ii{5-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
}
-class Enc_daea09 : OpcodeHexagon {
- bits <17> Ii;
- let Inst{23-22} = Ii{16-15};
- let Inst{20-16} = Ii{14-10};
- let Inst{13-13} = Ii{9-9};
- let Inst{7-1} = Ii{8-2};
- bits <2> Pu4;
- let Inst{9-8} = Pu4{1-0};
+class Enc_31aa6a : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_fda92c : OpcodeHexagon {
- bits <17> Ii;
- let Inst{26-25} = Ii{16-15};
- let Inst{20-16} = Ii{14-10};
- let Inst{13-13} = Ii{9-9};
- let Inst{7-0} = Ii{8-1};
+class Enc_397f23 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{13-13} = Ii{7-7};
+ let Inst{7-3} = Ii{6-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
}
-class Enc_831a7d : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
- bits <5> Rxx32;
- let Inst{4-0} = Rxx32{4-0};
- bits <2> Pe4;
- let Inst{6-5} = Pe4{1-0};
-}
-class Enc_11a146 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{11-8} = Ii{3-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
-class Enc_b15941 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{6-3} = Ii{3-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
+class Enc_7eaeb6 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_b78edd : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <4> n1;
- let Inst{28-28} = n1{3-3};
- let Inst{24-23} = n1{2-1};
- let Inst{8-8} = n1{0-0};
-}
-class Enc_a27588 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{26-25} = Ii{10-9};
- let Inst{13-5} = Ii{8-0};
+class Enc_8dbdfe : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{13-13} = Ii{7-7};
+ let Inst{7-3} = Ii{6-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
}
-class Enc_2a7b91 : OpcodeHexagon {
+class Enc_65f095 : OpcodeHexagon {
bits <6> Ii;
- let Inst{20-16} = Ii{5-1};
- let Inst{8-8} = Ii{0-0};
- bits <2> Pt4;
- let Inst{10-9} = Pt4{1-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
-class Enc_b43b67 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
- bits <2> Qx4;
- let Inst{6-5} = Qx4{1-0};
+ let Inst{6-3} = Ii{5-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_4aca3a : OpcodeHexagon {
+class Enc_448f7f : OpcodeHexagon {
bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <3> Ns8;
- let Inst{18-16} = Ns8{2-0};
- bits <3> n1;
- let Inst{29-29} = n1{2-2};
- let Inst{26-25} = n1{1-0};
-}
-class Enc_b38ffc : OpcodeHexagon {
- bits <4> Ii;
- let Inst{11-8} = Ii{3-0};
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
- bits <4> Rt16;
- let Inst{3-0} = Rt16{3-0};
-}
-class Enc_cda00a : OpcodeHexagon {
- bits <12> Ii;
- let Inst{19-16} = Ii{11-8};
- let Inst{12-5} = Ii{7-0};
- bits <2> Pu4;
- let Inst{22-21} = Pu4{1-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
-class Enc_2fbf3c : OpcodeHexagon {
- bits <3> Ii;
- let Inst{10-8} = Ii{2-0};
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
- bits <4> Rd16;
- let Inst{3-0} = Rd16{3-0};
+ let Inst{26-25} = Ii{10-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
}
-class Enc_70b24b : OpcodeHexagon {
- bits <6> Ii;
- let Inst{8-5} = Ii{5-2};
+class Enc_d5c73f : OpcodeHexagon {
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_2ae154 : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_b15941 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
bits <5> Rx32;
- let Inst{4-0} = Rx32{4-0};
-}
-class Enc_50b5ac : OpcodeHexagon {
- bits <6> Ii;
- let Inst{17-16} = Ii{5-4};
- let Inst{6-3} = Ii{3-0};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_2ea740 : OpcodeHexagon {
+class Enc_10bc21 : OpcodeHexagon {
bits <4> Ii;
- let Inst{13-13} = Ii{3-3};
- let Inst{10-8} = Ii{2-0};
- bits <2> Qv4;
- let Inst{12-11} = Qv4{1-0};
+ let Inst{6-3} = Ii{3-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vs32;
- let Inst{4-0} = Vs32{4-0};
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_08d755 : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-5} = Ii{7-0};
+class Enc_4df4e9 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{26-25} = Ii{10-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
-}
-class Enc_1178da : OpcodeHexagon {
- bits <3> Ii;
- let Inst{7-5} = Ii{2-0};
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vxx32;
- let Inst{4-0} = Vxx32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
}
class Enc_8dbe85 : OpcodeHexagon {
bits <1> Mu2;
@@ -2199,275 +1977,299 @@ class Enc_8dbe85 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_5a18b3 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <3> Ns8;
- let Inst{18-16} = Ns8{2-0};
- bits <5> n1;
- let Inst{29-29} = n1{4-4};
- let Inst{26-25} = n1{3-2};
- let Inst{22-22} = n1{1-1};
- let Inst{13-13} = n1{0-0};
+class Enc_96ce4f : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_14d27a : OpcodeHexagon {
- bits <5> II;
- let Inst{12-8} = II{4-0};
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
+class Enc_c7cd90 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_a05677 : OpcodeHexagon {
- bits <5> Ii;
- let Inst{12-8} = Ii{4-0};
+class Enc_ce6828 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{26-25} = Ii{13-12};
+ let Inst{13-13} = Ii{11-11};
+ let Inst{7-0} = Ii{10-3};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
-class Enc_f0cca7 : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-5} = Ii{7-0};
- bits <6> II;
- let Inst{20-16} = II{5-1};
- let Inst{13-13} = II{0-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
-class Enc_500cb0 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vxx32;
- let Inst{4-0} = Vxx32{4-0};
-}
-class Enc_7e5a82 : OpcodeHexagon {
- bits <5> Ii;
- let Inst{12-8} = Ii{4-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
-class Enc_12b6e9 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{11-8} = Ii{3-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
-class Enc_6f70ca : OpcodeHexagon {
- bits <8> Ii;
- let Inst{8-4} = Ii{7-3};
-}
-class Enc_7222b7 : OpcodeHexagon {
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <2> Qd4;
- let Inst{1-0} = Qd4{1-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
}
-class Enc_e3b0c4 : OpcodeHexagon {
-
+class Enc_928ca1 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_a255dc : OpcodeHexagon {
- bits <3> Ii;
- let Inst{10-8} = Ii{2-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+class Enc_395cc4 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{6-3} = Ii{6-3};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_cb785b : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
+class Enc_85bf58 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{6-3} = Ii{6-3};
bits <5> Rtt32;
- let Inst{20-16} = Rtt32{4-0};
- bits <5> Vdd32;
- let Inst{4-0} = Vdd32{4-0};
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_cb4b4e : OpcodeHexagon {
- bits <2> Pu4;
- let Inst{6-5} = Pu4{1-0};
+class Enc_e957fb : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{26-25} = Ii{11-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
}
-class Enc_1f5d8f : OpcodeHexagon {
+class Enc_935d9b : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_9cdba7 : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-5} = Ii{7-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+class Enc_052c7d : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_5cd7e9 : OpcodeHexagon {
+class Enc_0d8870 : OpcodeHexagon {
bits <12> Ii;
let Inst{26-25} = Ii{11-10};
- let Inst{13-5} = Ii{9-1};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
-}
-class Enc_454a26 : OpcodeHexagon {
- bits <2> Pt4;
- let Inst{9-8} = Pt4{1-0};
- bits <2> Ps4;
- let Inst{17-16} = Ps4{1-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
}
-class Enc_a6853f : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <3> Ns8;
- let Inst{18-16} = Ns8{2-0};
- bits <6> n1;
- let Inst{29-29} = n1{5-5};
- let Inst{26-25} = n1{4-3};
- let Inst{23-22} = n1{2-1};
- let Inst{13-13} = n1{0-0};
+class Enc_91b9fe : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_c175d0 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{11-8} = Ii{3-0};
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
- bits <4> Rd16;
- let Inst{3-0} = Rd16{3-0};
+class Enc_e26546 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_16c48b : OpcodeHexagon {
+class Enc_143445 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-13} = Ii{10-10};
+ let Inst{7-0} = Ii{9-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_79b8c8 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
- bits <5> Vv32;
- let Inst{12-8} = Vv32{4-0};
- bits <5> Vw32;
- let Inst{4-0} = Vw32{4-0};
-}
-class Enc_895bd9 : OpcodeHexagon {
- bits <2> Qu4;
- let Inst{9-8} = Qu4{1-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vx32;
- let Inst{4-0} = Vx32{4-0};
-}
-class Enc_ea23e4 : OpcodeHexagon {
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
-}
-class Enc_4dc228 : OpcodeHexagon {
- bits <9> Ii;
- let Inst{12-8} = Ii{8-4};
- let Inst{4-3} = Ii{3-2};
- bits <10> II;
- let Inst{20-16} = II{9-5};
- let Inst{7-5} = II{4-2};
- let Inst{1-0} = II{1-0};
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_10bc21 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{6-3} = Ii{3-0};
+class Enc_db40cd : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_1aaec1 : OpcodeHexagon {
- bits <3> Ii;
- let Inst{10-8} = Ii{2-0};
- bits <3> Os8;
- let Inst{2-0} = Os8{2-0};
+class Enc_690862 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-13} = Ii{10-10};
+ let Inst{7-0} = Ii{9-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_3f97c8 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_329361 : OpcodeHexagon {
- bits <2> Pu4;
- let Inst{6-5} = Pu4{1-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
+class Enc_223005 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_cd82bc : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{21-21} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <6> II;
+ let Inst{13-8} = II{5-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_729ff7 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
bits <5> Rtt32;
let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
}
-class Enc_d2c7f1 : OpcodeHexagon {
+class Enc_8c6530 : OpcodeHexagon {
bits <5> Rtt32;
let Inst{12-8} = Rtt32{4-0};
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
bits <5> Rdd32;
let Inst{4-0} = Rdd32{4-0};
- bits <2> Pe4;
- let Inst{6-5} = Pe4{1-0};
}
-class Enc_3680c2 : OpcodeHexagon {
- bits <7> Ii;
- let Inst{11-5} = Ii{6-0};
+class Enc_d50cd3 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_1ef990 : OpcodeHexagon {
- bits <2> Pv4;
- let Inst{12-11} = Pv4{1-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Vs32;
- let Inst{4-0} = Vs32{4-0};
+class Enc_dbd70c : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_8b8d61 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{22-21} = Ii{5-4};
+ let Inst{13-13} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
+}
+class Enc_c31910 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{23-21} = Ii{7-5};
+ let Inst{13-13} = Ii{4-4};
+ let Inst{7-5} = Ii{3-1};
+ let Inst{3-3} = Ii{0-0};
+ bits <5> II;
+ let Inst{12-8} = II{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_e957fb : OpcodeHexagon {
- bits <12> Ii;
- let Inst{26-25} = Ii{11-10};
- let Inst{13-13} = Ii{9-9};
- let Inst{7-0} = Ii{8-1};
+class Enc_9fae8a : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_a1640c : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_fef969 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{20-16} = Ii{5-1};
+ let Inst{5-5} = Ii{0-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
}
-class Enc_c0cdde : OpcodeHexagon {
- bits <9> Ii;
+class Enc_b0e9d8 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
let Inst{13-5} = Ii{8-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
}
-class Enc_c9e3bc : OpcodeHexagon {
- bits <4> Ii;
- let Inst{13-13} = Ii{3-3};
- let Inst{10-8} = Ii{2-0};
+class Enc_b4e6cf : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_1cf4ca : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{17-16} = Ii{5-4};
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vs32;
- let Inst{4-0} = Vs32{4-0};
+ let Inst{12-8} = Rt32{4-0};
}
-class Enc_2e1979 : OpcodeHexagon {
+class Enc_6339d5 : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
let Inst{7-7} = Ii{0-0};
@@ -2475,28 +2277,21 @@ class Enc_2e1979 : OpcodeHexagon {
let Inst{6-5} = Pv4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
-class Enc_0b2e5b : OpcodeHexagon {
- bits <3> Ii;
- let Inst{7-5} = Ii{2-0};
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+ let Inst{4-0} = Rt32{4-0};
}
-class Enc_6f83e7 : OpcodeHexagon {
- bits <2> Qv4;
- let Inst{23-22} = Qv4{1-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+class Enc_44215c : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{17-16} = Ii{5-4};
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
}
-class Enc_6339d5 : OpcodeHexagon {
+class Enc_47ee5e : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
let Inst{7-7} = Ii{0-0};
@@ -2506,108 +2301,141 @@ class Enc_6339d5 : OpcodeHexagon {
let Inst{20-16} = Rs32{4-0};
bits <5> Ru32;
let Inst{12-8} = Ru32{4-0};
- bits <5> Rt32;
- let Inst{4-0} = Rt32{4-0};
+ bits <3> Nt8;
+ let Inst{2-0} = Nt8{2-0};
}
-class Enc_d483b9 : OpcodeHexagon {
- bits <1> Ii;
- let Inst{5-5} = Ii{0-0};
- bits <5> Vuu32;
- let Inst{12-8} = Vuu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vxx32;
- let Inst{4-0} = Vxx32{4-0};
+class Enc_50b5ac : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{17-16} = Ii{5-4};
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
}
-class Enc_51635c : OpcodeHexagon {
- bits <7> Ii;
- let Inst{8-4} = Ii{6-2};
- bits <4> Rd16;
- let Inst{3-0} = Rd16{3-0};
+class Enc_1a9974 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <5> Rtt32;
+ let Inst{4-0} = Rtt32{4-0};
}
-class Enc_e26546 : OpcodeHexagon {
- bits <5> Ii;
- let Inst{6-3} = Ii{4-1};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_d7dc10 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
}
-class Enc_70fb07 : OpcodeHexagon {
+class Enc_8203bb : OpcodeHexagon {
bits <6> Ii;
- let Inst{13-8} = Ii{5-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rxx32;
- let Inst{4-0} = Rxx32{4-0};
+ let Inst{12-7} = Ii{5-0};
+ bits <8> II;
+ let Inst{13-13} = II{7-7};
+ let Inst{6-0} = II{6-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
}
-class Enc_6c9ee0 : OpcodeHexagon {
- bits <3> Ii;
- let Inst{10-8} = Ii{2-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_d7a65e : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <6> II;
+ let Inst{13-13} = II{5-5};
+ let Inst{4-0} = II{4-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
}
-class Enc_fa3ba4 : OpcodeHexagon {
- bits <14> Ii;
- let Inst{26-25} = Ii{13-12};
- let Inst{13-5} = Ii{11-3};
+class Enc_a803e0 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{12-7} = Ii{6-1};
+ bits <8> II;
+ let Inst{13-13} = II{7-7};
+ let Inst{6-0} = II{6-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
}
-class Enc_44661f : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_f20719 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{12-7} = Ii{6-1};
+ bits <6> II;
+ let Inst{13-13} = II{5-5};
+ let Inst{4-0} = II{4-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
}
-class Enc_277737 : OpcodeHexagon {
+class Enc_f37377 : OpcodeHexagon {
bits <8> Ii;
- let Inst{22-21} = Ii{7-6};
- let Inst{13-13} = Ii{5-5};
- let Inst{7-5} = Ii{4-2};
- bits <5> Ru32;
- let Inst{4-0} = Ru32{4-0};
+ let Inst{12-7} = Ii{7-2};
+ bits <8> II;
+ let Inst{13-13} = II{7-7};
+ let Inst{6-0} = II{6-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{12-8} = Rd32{4-0};
}
-class Enc_5c124a : OpcodeHexagon {
- bits <19> Ii;
- let Inst{26-25} = Ii{18-17};
- let Inst{20-16} = Ii{16-12};
- let Inst{13-13} = Ii{11-11};
- let Inst{7-0} = Ii{10-3};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
+class Enc_5ccba9 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-7} = Ii{7-2};
+ bits <6> II;
+ let Inst{13-13} = II{5-5};
+ let Inst{4-0} = II{4-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
}
-class Enc_928ca1 : OpcodeHexagon {
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_8bcba4 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
}
-class Enc_da664b : OpcodeHexagon {
+class Enc_eca7c8 : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
let Inst{7-7} = Ii{0-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <5> Rt32;
+ let Inst{4-0} = Rt32{4-0};
+}
+class Enc_9ea4cf : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{6-6} = Ii{0-0};
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Ru32;
+ let Inst{20-16} = Ru32{4-0};
bits <5> Rt32;
let Inst{12-8} = Rt32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
}
-class Enc_47ee5e : OpcodeHexagon {
+class Enc_724154 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_c6220b : OpcodeHexagon {
bits <2> Ii;
let Inst{13-13} = Ii{1-1};
let Inst{7-7} = Ii{0-0};
- bits <2> Pv4;
- let Inst{6-5} = Pv4{1-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
bits <5> Ru32;
@@ -2615,776 +2443,1334 @@ class Enc_47ee5e : OpcodeHexagon {
bits <3> Nt8;
let Inst{2-0} = Nt8{2-0};
}
-class Enc_8bcba4 : OpcodeHexagon {
+class Enc_7eb485 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{6-6} = Ii{0-0};
bits <6> II;
let Inst{5-0} = II{5-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
+ bits <5> Ru32;
+ let Inst{20-16} = Ru32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_c7a204 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
bits <5> Re32;
let Inst{20-16} = Re32{4-0};
}
-class Enc_3a2484 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <4> n1;
- let Inst{28-28} = n1{3-3};
- let Inst{24-23} = n1{2-1};
- let Inst{13-13} = n1{0-0};
+class Enc_55355c : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <5> Rtt32;
+ let Inst{4-0} = Rtt32{4-0};
}
-class Enc_a5ed8a : OpcodeHexagon {
+class Enc_f79415 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{6-6} = Ii{0-0};
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Ru32;
+ let Inst{20-16} = Ru32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_645d54 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_cb9321 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{27-21} = Ii{15-9};
- let Inst{13-5} = Ii{8-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_b72622 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_11a146 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_668704 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
+class Enc_93af4c : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{10-4} = Ii{6-0};
+ bits <4> Rx16;
+ let Inst{3-0} = Rx16{3-0};
+}
+class Enc_0527db : OpcodeHexagon {
bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <5> n1;
- let Inst{28-28} = n1{4-4};
- let Inst{25-22} = n1{3-0};
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rx16;
+ let Inst{3-0} = Rx16{3-0};
}
-class Enc_a7341a : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vx32;
- let Inst{4-0} = Vx32{4-0};
+class Enc_2df31d : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{9-4} = Ii{7-2};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
}
-class Enc_5eac98 : OpcodeHexagon {
+class Enc_97d666 : OpcodeHexagon {
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_1f5ba6 : OpcodeHexagon {
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_63eaeb : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{1-0} = Ii{1-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+}
+class Enc_ed48be : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{6-5} = Ii{1-0};
+ bits <3> Rdd8;
+ let Inst{2-0} = Rdd8{2-0};
+}
+class Enc_399e12 : OpcodeHexagon {
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <3> Rdd8;
+ let Inst{2-0} = Rdd8{2-0};
+}
+class Enc_ee5ed0 : OpcodeHexagon {
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+ bits <2> n1;
+ let Inst{9-8} = n1{1-0};
+}
+class Enc_e39bb2 : OpcodeHexagon {
bits <6> Ii;
- let Inst{13-8} = Ii{5-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+ let Inst{9-4} = Ii{5-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
}
-class Enc_02553a : OpcodeHexagon {
+class Enc_7a0ea6 : OpcodeHexagon {
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+ bits <1> n1;
+ let Inst{9-9} = n1{0-0};
+}
+class Enc_53dca9 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{11-8} = Ii{5-2};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_c175d0 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_2fbf3c : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_86a14b : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{7-3} = Ii{7-3};
+ bits <3> Rdd8;
+ let Inst{2-0} = Rdd8{2-0};
+}
+class Enc_2bae10 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{10-8} = Ii{3-1};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_51635c : OpcodeHexagon {
bits <7> Ii;
- let Inst{11-5} = Ii{6-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
+ let Inst{8-4} = Ii{6-2};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
}
-class Enc_acd6ed : OpcodeHexagon {
+class Enc_b38ffc : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
+}
+class Enc_f55a0c : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{11-8} = Ii{5-2};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
+}
+class Enc_6f70ca : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{8-4} = Ii{7-3};
+}
+class Enc_84d359 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{3-0} = Ii{3-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+}
+class Enc_b8309d : OpcodeHexagon {
bits <9> Ii;
- let Inst{10-5} = Ii{8-3};
- bits <2> Pt4;
- let Inst{12-11} = Pt4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+ let Inst{8-3} = Ii{8-3};
+ bits <3> Rtt8;
+ let Inst{2-0} = Rtt8{2-0};
}
-class Enc_8e583a : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
+class Enc_625deb : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{10-8} = Ii{3-1};
bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <5> n1;
- let Inst{28-28} = n1{4-4};
- let Inst{25-23} = n1{3-1};
- let Inst{13-13} = n1{0-0};
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
}
-class Enc_b886fd : OpcodeHexagon {
- bits <5> Ii;
- let Inst{6-3} = Ii{4-1};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_87c142 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-4} = Ii{6-2};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
}
-class Enc_24a7dc : OpcodeHexagon {
+class Enc_a6ce9c : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{3-0} = Ii{5-2};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+}
+class Enc_2146c1 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <3> Qss8;
+ let Inst{2-0} = Qss8{2-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_843e80 : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <3> Qxx8;
+ let Inst{2-0} = Qxx8{2-0};
+}
+class Enc_1f3376 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
bits <5> Vv32;
- let Inst{23-19} = Vv32{4-0};
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
+}
+class Enc_8e9fbd : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
bits <3> Rt8;
- let Inst{18-16} = Rt8{2-0};
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+}
+class Enc_57e245 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
bits <5> Vdd32;
- let Inst{4-0} = Vdd32{4-0};
+ let Inst{7-3} = Vdd32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
}
-class Enc_2d829e : OpcodeHexagon {
- bits <14> Ii;
- let Inst{10-0} = Ii{13-3};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_274a4c : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
}
-class Enc_4f4ed7 : OpcodeHexagon {
- bits <18> Ii;
- let Inst{26-25} = Ii{17-16};
- let Inst{20-16} = Ii{15-11};
- let Inst{13-5} = Ii{10-2};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+class Enc_fbacc2 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
}
-class Enc_84b2cd : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-7} = Ii{7-2};
- bits <5> II;
- let Inst{4-0} = II{4-0};
+class Enc_2a736a : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_b8513b : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_b5e54d : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
}
-class Enc_8dbdfe : OpcodeHexagon {
- bits <8> Ii;
- let Inst{13-13} = Ii{7-7};
- let Inst{7-3} = Ii{6-2};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
+class Enc_50e578 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
bits <5> Rs32;
let Inst{20-16} = Rs32{4-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
-}
-class Enc_90cd8b : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
bits <5> Rd32;
let Inst{4-0} = Rd32{4-0};
}
-class Enc_bd0b33 : OpcodeHexagon {
- bits <10> Ii;
- let Inst{21-21} = Ii{9-9};
- let Inst{13-5} = Ii{8-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
+class Enc_b5b643 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
}
-class Enc_8b8927 : OpcodeHexagon {
+class Enc_2516bf : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_8d04c3 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_2ad23d : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_85daf5 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_e570b0 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_41dcc3 : OpcodeHexagon {
bits <5> Rt32;
let Inst{20-16} = Rt32{4-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_3126d7 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
bits <5> Vv32;
- let Inst{4-0} = Vv32{4-0};
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
}
-class Enc_c7cd90 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{6-3} = Ii{3-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
+class Enc_1cd70f : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_12dd8f : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_8d5d98 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
+}
+class Enc_fc563d : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_c84567 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_334c2b : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_3c46e8 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_129701 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_790d6e : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_880793 : OpcodeHexagon {
+ bits <3> Qt8;
+ let Inst{2-0} = Qt8{2-0};
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_a265b7 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_6b1bc4 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <3> Qt8;
+ let Inst{10-8} = Qt8{2-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_b2ffce : OpcodeHexagon {
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_fde0e3 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_b3bac4 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_e7c9de : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+}
+class Enc_5c3a80 : OpcodeHexagon {
+ bits <3> Qt8;
+ let Inst{10-8} = Qt8{2-0};
+ bits <3> Qd8;
+ let Inst{5-3} = Qd8{2-0};
+}
+class Enc_8f7cc3 : OpcodeHexagon {
+ bits <3> Qtt8;
+ let Inst{10-8} = Qtt8{2-0};
+ bits <3> Qdd8;
+ let Inst{5-3} = Qdd8{2-0};
+}
+class Enc_f106e0 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{8-4} = Vv32{4-0};
+ bits <5> Vt32;
+ let Inst{13-9} = Vt32{4-0};
+ bits <4> Vdd16;
+ let Inst{3-0} = Vdd16{3-0};
+}
+class Enc_7db2f8 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{13-9} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{8-4} = Vv32{4-0};
+ bits <4> Vdd16;
+ let Inst{3-0} = Vdd16{3-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_405228 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <3> n1;
- let Inst{28-28} = n1{2-2};
- let Inst{24-23} = n1{1-0};
+class Enc_37c406 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <4> Vdd16;
+ let Inst{7-4} = Vdd16{3-0};
}
-class Enc_81ac1d : OpcodeHexagon {
- bits <24> Ii;
- let Inst{24-16} = Ii{23-15};
- let Inst{13-1} = Ii{14-2};
+class Enc_72a92d : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
}
-class Enc_395cc4 : OpcodeHexagon {
- bits <7> Ii;
- let Inst{6-3} = Ii{6-3};
+class Enc_d7e8ba : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_ce4c54 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_3a81ac : OpcodeHexagon {
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_a51a9a : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-8} = Ii{7-3};
- let Inst{4-2} = Ii{2-0};
+class Enc_6c4697 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_d44e31 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{12-7} = Ii{5-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_b0e553 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_5883d0 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
bits <5> Rt32;
- let Inst{4-0} = Rt32{4-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
}
-class Enc_f77fbc : OpcodeHexagon {
- bits <4> Ii;
- let Inst{13-13} = Ii{3-3};
- let Inst{10-8} = Ii{2-0};
+class Enc_9a895f : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_f3adb6 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_b5d5a7 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
bits <5> Rt32;
let Inst{20-16} = Rt32{4-0};
- bits <3> Os8;
- let Inst{2-0} = Os8{2-0};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
}
-class Enc_d2216a : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+class Enc_5b76ab : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-8} = Ii{8-3};
+ let Inst{2-0} = Ii{2-0};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_85bf58 : OpcodeHexagon {
- bits <7> Ii;
- let Inst{6-3} = Ii{6-3};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
+class Enc_17a474 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_71bb9b : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
- bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <5> Vdd32;
- let Inst{4-0} = Vdd32{4-0};
+class Enc_9a9d62 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_52a5dd : OpcodeHexagon {
- bits <4> Ii;
- let Inst{6-3} = Ii{3-0};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
+class Enc_3a527f : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_5e2823 : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+class Enc_c39a8b : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vss32;
+ let Inst{7-3} = Vss32{4-0};
}
-class Enc_28a2dc : OpcodeHexagon {
- bits <5> Ii;
- let Inst{12-8} = Ii{4-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_908985 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vss32;
+ let Inst{7-3} = Vss32{4-0};
bits <5> Rx32;
- let Inst{4-0} = Rx32{4-0};
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_5138b3 : OpcodeHexagon {
+class Enc_e8ddd5 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vss32;
+ let Inst{7-3} = Vss32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6a4549 : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
bits <5> Rt32;
let Inst{20-16} = Rt32{4-0};
- bits <5> Vx32;
- let Inst{4-0} = Vx32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
}
-class Enc_84d359 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{3-0} = Ii{3-0};
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
+class Enc_932b58 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
}
-class Enc_e07374 : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+class Enc_124cac : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
}
-class Enc_e0820b : OpcodeHexagon {
+class Enc_aceeef : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_2c3281 : OpcodeHexagon {
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_a4ae28 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
bits <5> Vv32;
- let Inst{20-16} = Vv32{4-0};
- bits <2> Qs4;
- let Inst{6-5} = Qs4{1-0};
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Qd8;
+ let Inst{5-3} = Qd8{2-0};
+}
+class Enc_c1652e : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Qd8;
+ let Inst{5-3} = Qd8{2-0};
+}
+class Enc_9aae4a : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+ bits <3> Qd8;
+ let Inst{2-0} = Qd8{2-0};
+}
+class Enc_dcfcbb : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+ let Inst{7-3} = Vd32{4-0};
}
-class Enc_323f2d : OpcodeHexagon {
- bits <6> II;
- let Inst{11-8} = II{5-2};
- let Inst{6-5} = II{1-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
- bits <5> Re32;
- let Inst{20-16} = Re32{4-0};
+class Enc_a7ca29 : OpcodeHexagon {
+ bits <3> Qt8;
+ let Inst{2-0} = Qt8{2-0};
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
}
-class Enc_1a9974 : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{7-7} = Ii{0-0};
- bits <2> Pv4;
- let Inst{6-5} = Pv4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Ru32;
- let Inst{12-8} = Ru32{4-0};
- bits <5> Rtt32;
- let Inst{4-0} = Rtt32{4-0};
+class Enc_dd5f9f : OpcodeHexagon {
+ bits <3> Qtt8;
+ let Inst{2-0} = Qtt8{2-0};
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
}
-class Enc_5de85f : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
+class Enc_7dc746 : OpcodeHexagon {
+ bits <3> Quu8;
+ let Inst{10-8} = Quu8{2-0};
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <3> Ns8;
- let Inst{18-16} = Ns8{2-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Qdd8;
+ let Inst{5-3} = Qdd8{2-0};
}
-class Enc_dd766a : OpcodeHexagon {
+class Enc_fa5efc : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_aac08c : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_9a8c1f : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
bits <5> Vdd32;
- let Inst{4-0} = Vdd32{4-0};
+ let Inst{7-3} = Vdd32{4-0};
}
-class Enc_0b51ce : OpcodeHexagon {
- bits <3> Ii;
- let Inst{10-8} = Ii{2-0};
- bits <2> Qv4;
- let Inst{12-11} = Qv4{1-0};
- bits <5> Vs32;
- let Inst{4-0} = Vs32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+class Enc_a9eee0 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
}
-class Enc_b4e6cf : OpcodeHexagon {
+class Enc_9ce456 : OpcodeHexagon {
bits <10> Ii;
let Inst{21-21} = Ii{9-9};
- let Inst{13-5} = Ii{8-0};
- bits <5> Ru32;
- let Inst{4-0} = Ru32{4-0};
+ let Inst{13-8} = Ii{8-3};
+ let Inst{2-0} = Ii{2-0};
+ bits <5> Vss32;
+ let Inst{7-3} = Vss32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_44215c : OpcodeHexagon {
- bits <6> Ii;
- let Inst{17-16} = Ii{5-4};
- let Inst{6-3} = Ii{3-0};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
+class Enc_96f0fd : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+ bits <3> Qdd8;
+ let Inst{2-0} = Qdd8{2-0};
}
-class Enc_0aa344 : OpcodeHexagon {
- bits <5> Gss32;
- let Inst{20-16} = Gss32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+class Enc_a662ae : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
}
-class Enc_a21d47 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{10-5} = Ii{5-0};
- bits <2> Pt4;
- let Inst{12-11} = Pt4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+class Enc_ec09c9 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <3> Qdd8;
+ let Inst{5-3} = Qdd8{2-0};
}
-class Enc_cc449f : OpcodeHexagon {
- bits <4> Ii;
- let Inst{6-3} = Ii{3-0};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
+class Enc_400b42 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Qdd8;
+ let Inst{5-3} = Qdd8{2-0};
}
-class Enc_645d54 : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{5-5} = Ii{0-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
+class Enc_a5ed8a : OpcodeHexagon {
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
}
-class Enc_667b39 : OpcodeHexagon {
- bits <5> Css32;
- let Inst{20-16} = Css32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+class Enc_134437 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{9-8} = Qs4{1-0};
+ bits <2> Qt4;
+ let Inst{23-22} = Qt4{1-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
}
-class Enc_927852 : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+class Enc_bfbf03 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{9-8} = Qs4{1-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
}
-class Enc_163a3c : OpcodeHexagon {
- bits <7> Ii;
- let Inst{12-7} = Ii{6-1};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_7222b7 : OpcodeHexagon {
bits <5> Rt32;
- let Inst{4-0} = Rt32{4-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
}
-class Enc_a75aa6 : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_f3f408 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_a255dc : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_2ebe3b : OpcodeHexagon {
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_b087ac : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
+class Enc_8d8a30 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
bits <5> Rt32;
let Inst{20-16} = Rt32{4-0};
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
-class Enc_691712 : OpcodeHexagon {
+class Enc_58a8bf : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
bits <2> Pv4;
let Inst{12-11} = Pv4{1-0};
- bits <1> Mu2;
- let Inst{13-13} = Mu2{0-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_b1e1fb : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <5> n1;
- let Inst{28-28} = n1{4-4};
- let Inst{25-23} = n1{3-1};
- let Inst{8-8} = n1{0-0};
-}
-class Enc_1f19b5 : OpcodeHexagon {
- bits <5> Ii;
- let Inst{9-5} = Ii{4-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
-}
-class Enc_b8c967 : OpcodeHexagon {
- bits <8> Ii;
- let Inst{12-5} = Ii{7-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
-class Enc_fb6577 : OpcodeHexagon {
- bits <2> Pu4;
- let Inst{9-8} = Pu4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
-class Enc_2bae10 : OpcodeHexagon {
- bits <4> Ii;
- let Inst{10-8} = Ii{3-1};
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
- bits <4> Rd16;
- let Inst{3-0} = Rd16{3-0};
-}
-class Enc_c4dc92 : OpcodeHexagon {
- bits <2> Qv4;
- let Inst{23-22} = Qv4{1-0};
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
+class Enc_f8c1c4 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_03833b : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
+class Enc_c9e3bc : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
}
-class Enc_dbd70c : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
- bits <2> Pu4;
- let Inst{6-5} = Pu4{1-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+class Enc_27b757 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
}
-class Enc_f6fe0b : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <6> n1;
- let Inst{28-28} = n1{5-5};
- let Inst{24-22} = n1{4-2};
- let Inst{13-13} = n1{1-1};
- let Inst{8-8} = n1{0-0};
+class Enc_865390 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_9e2e1c : OpcodeHexagon {
- bits <5> Ii;
- let Inst{8-5} = Ii{4-1};
+class Enc_1ef990 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
- bits <5> Ryy32;
- let Inst{4-0} = Ryy32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_8df4be : OpcodeHexagon {
- bits <17> Ii;
- let Inst{26-25} = Ii{16-15};
- let Inst{20-16} = Ii{14-10};
- let Inst{13-5} = Ii{9-1};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
-}
-class Enc_66bce1 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <4> Rd16;
- let Inst{11-8} = Rd16{3-0};
+class Enc_b62ef7 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_b8309d : OpcodeHexagon {
- bits <9> Ii;
- let Inst{8-3} = Ii{8-3};
- bits <3> Rtt8;
- let Inst{2-0} = Rtt8{2-0};
+class Enc_d15d19 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_5e8512 : OpcodeHexagon {
- bits <5> Vu32;
- let Inst{12-8} = Vu32{4-0};
+class Enc_f77fbc : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
bits <5> Rt32;
let Inst{20-16} = Rt32{4-0};
- bits <5> Vxx32;
- let Inst{4-0} = Vxx32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
}
-class Enc_4f677b : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{7-7} = Ii{0-0};
- bits <6> II;
- let Inst{11-8} = II{5-2};
- let Inst{6-5} = II{1-0};
+class Enc_f7430e : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
bits <5> Rt32;
let Inst{20-16} = Rt32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
}
-class Enc_3d920a : OpcodeHexagon {
- bits <6> Ii;
- let Inst{8-5} = Ii{5-2};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+class Enc_784502 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_e83554 : OpcodeHexagon {
- bits <5> Ii;
- let Inst{8-5} = Ii{4-1};
+class Enc_372c9d : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_ed48be : OpcodeHexagon {
- bits <2> Ii;
- let Inst{6-5} = Ii{1-0};
- bits <3> Rdd8;
- let Inst{2-0} = Rdd8{2-0};
+class Enc_1aaec1 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_f8c1c4 : OpcodeHexagon {
- bits <2> Pv4;
- let Inst{12-11} = Pv4{1-0};
+class Enc_cf1927 : OpcodeHexagon {
bits <1> Mu2;
let Inst{13-13} = Mu2{0-0};
- bits <5> Vd32;
- let Inst{4-0} = Vd32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
-class Enc_1aa186 : OpcodeHexagon {
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
+class Enc_2ea740 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rxx32;
- let Inst{4-0} = Rxx32{4-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
}
-class Enc_134437 : OpcodeHexagon {
- bits <2> Qs4;
- let Inst{9-8} = Qs4{1-0};
- bits <2> Qt4;
- let Inst{23-22} = Qt4{1-0};
- bits <2> Qd4;
- let Inst{1-0} = Qd4{1-0};
+class Enc_0b51ce : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
}
-class Enc_f3f408 : OpcodeHexagon {
+class Enc_4dff07 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_ff3442 : OpcodeHexagon {
bits <4> Ii;
let Inst{13-13} = Ii{3-3};
let Inst{10-8} = Ii{2-0};
bits <5> Rt32;
let Inst{20-16} = Rt32{4-0};
+}
+class Enc_6c9ee0 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_44661f : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_e7581c : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
-class Enc_97d666 : OpcodeHexagon {
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
- bits <4> Rd16;
- let Inst{3-0} = Rd16{3-0};
+class Enc_45364e : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
}
-class Enc_f82eaf : OpcodeHexagon {
- bits <8> Ii;
- let Inst{10-5} = Ii{7-2};
- bits <2> Pt4;
- let Inst{12-11} = Pt4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+class Enc_f8ecf9 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{20-16} = Vvv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
}
-class Enc_69d63b : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <3> Ns8;
- let Inst{18-16} = Ns8{2-0};
+class Enc_a90628 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
}
-class Enc_f79415 : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{6-6} = Ii{0-0};
- bits <6> II;
- let Inst{5-0} = II{5-0};
- bits <5> Ru32;
- let Inst{20-16} = Ru32{4-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
+class Enc_b43b67 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <2> Qx4;
+ let Inst{6-5} = Qx4{1-0};
}
-class Enc_ce6828 : OpcodeHexagon {
- bits <14> Ii;
- let Inst{26-25} = Ii{13-12};
- let Inst{13-13} = Ii{11-11};
- let Inst{7-0} = Ii{10-3};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
+class Enc_c1d806 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <2> Qe4;
+ let Inst{6-5} = Qe4{1-0};
}
-class Enc_800e04 : OpcodeHexagon {
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <4> Rs16;
- let Inst{19-16} = Rs16{3-0};
- bits <6> n1;
- let Inst{28-28} = n1{5-5};
- let Inst{25-22} = n1{4-1};
- let Inst{13-13} = n1{0-0};
+class Enc_e0820b : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
}
-class Enc_ad1831 : OpcodeHexagon {
- bits <16> Ii;
- let Inst{26-25} = Ii{15-14};
- let Inst{20-16} = Ii{13-9};
- let Inst{13-13} = Ii{8-8};
- let Inst{7-0} = Ii{7-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
+class Enc_71bb9b : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
}
-class Enc_0fa531 : OpcodeHexagon {
- bits <15> Ii;
- let Inst{21-21} = Ii{14-14};
- let Inst{13-13} = Ii{13-13};
- let Inst{11-1} = Ii{12-2};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_3fc427 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
}
-class Enc_7eaeb6 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{6-3} = Ii{5-2};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
+class Enc_a30110 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{23-19} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_0b2e5b : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_7b7ba8 : OpcodeHexagon {
+ bits <2> Qu4;
+ let Inst{9-8} = Qu4{1-0};
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rx32;
- let Inst{20-16} = Rx32{4-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
}
-class Enc_f55a0c : OpcodeHexagon {
- bits <6> Ii;
- let Inst{11-8} = Ii{5-2};
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
- bits <4> Rt16;
- let Inst{3-0} = Rt16{3-0};
+class Enc_895bd9 : OpcodeHexagon {
+ bits <2> Qu4;
+ let Inst{9-8} = Qu4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
}
-class Enc_f20719 : OpcodeHexagon {
- bits <7> Ii;
- let Inst{12-7} = Ii{6-1};
- bits <6> II;
- let Inst{13-13} = II{5-5};
- let Inst{4-0} = II{4-0};
- bits <2> Pv4;
- let Inst{6-5} = Pv4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_c4dc92 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
}
-class Enc_eafd18 : OpcodeHexagon {
- bits <5> II;
- let Inst{12-8} = II{4-0};
- bits <11> Ii;
- let Inst{21-20} = Ii{10-9};
- let Inst{7-1} = Ii{8-2};
- bits <3> Ns8;
- let Inst{18-16} = Ns8{2-0};
+class Enc_0f8bab : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
}
-class Enc_7b523d : OpcodeHexagon {
+class Enc_adf111 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <2> Qx4;
+ let Inst{1-0} = Qx4{1-0};
+}
+class Enc_b087ac : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_5138b3 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_8c2412 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{6-5} = Ps4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_770858 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{6-5} = Ps4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_989021 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_24a7dc : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
bits <5> Vv32;
let Inst{23-19} = Vv32{4-0};
bits <3> Rt8;
let Inst{18-16} = Rt8{2-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_aad80c : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_d6990d : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
bits <5> Vxx32;
let Inst{4-0} = Vxx32{4-0};
}
-class Enc_47ef61 : OpcodeHexagon {
- bits <3> Ii;
- let Inst{7-5} = Ii{2-0};
+class Enc_0e41fa : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Rd32;
- let Inst{4-0} = Rd32{4-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
}
class Enc_cc857d : OpcodeHexagon {
bits <5> Vuu32;
@@ -3394,85 +3780,225 @@ class Enc_cc857d : OpcodeHexagon {
bits <5> Vx32;
let Inst{4-0} = Vx32{4-0};
}
-class Enc_7fa7f6 : OpcodeHexagon {
- bits <6> II;
- let Inst{11-8} = II{5-2};
- let Inst{6-5} = II{1-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
- bits <5> Re32;
- let Inst{20-16} = Re32{4-0};
+class Enc_a7341a : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
}
-class Enc_0f8bab : OpcodeHexagon {
+class Enc_95441f : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
- bits <5> Rt32;
- let Inst{20-16} = Rt32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
bits <2> Qd4;
let Inst{1-0} = Qd4{1-0};
}
-class Enc_7eb485 : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{6-6} = Ii{0-0};
- bits <6> II;
- let Inst{5-0} = II{5-0};
- bits <5> Ru32;
- let Inst{20-16} = Ru32{4-0};
- bits <3> Nt8;
- let Inst{10-8} = Nt8{2-0};
+class Enc_eaa9f8 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <2> Qx4;
+ let Inst{1-0} = Qx4{1-0};
}
-class Enc_864a5a : OpcodeHexagon {
- bits <9> Ii;
- let Inst{12-8} = Ii{8-4};
- let Inst{4-3} = Ii{3-2};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_8b8927 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{4-0} = Vv32{4-0};
}
-class Enc_c2b48e : OpcodeHexagon {
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_158beb : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
- bits <2> Pd4;
- let Inst{1-0} = Pd4{1-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{4-0} = Vv32{4-0};
}
-class Enc_8c6530 : OpcodeHexagon {
+class Enc_28dcbb : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{4-0} = Vvv32{4-0};
+}
+class Enc_4e4a80 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{4-0} = Vvv32{4-0};
+}
+class Enc_217147 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+}
+class Enc_569cfe : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_263841 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
bits <5> Rtt32;
- let Inst{12-8} = Rtt32{4-0};
- bits <5> Rss32;
- let Inst{20-16} = Rss32{4-0};
- bits <2> Pu4;
- let Inst{6-5} = Pu4{1-0};
- bits <5> Rdd32;
- let Inst{4-0} = Rdd32{4-0};
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
}
-class Enc_448f7f : OpcodeHexagon {
- bits <11> Ii;
- let Inst{26-25} = Ii{10-9};
- let Inst{13-13} = Ii{8-8};
- let Inst{7-0} = Ii{7-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_245865 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{23-19} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_cd4705 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_7b523d : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{23-19} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_1178da : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_4b39e4 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_310ba1 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_01d3d0 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
}
-class Enc_da8d43 : OpcodeHexagon {
- bits <6> Ii;
- let Inst{13-13} = Ii{5-5};
- let Inst{7-3} = Ii{4-0};
- bits <2> Pv4;
- let Inst{1-0} = Pv4{1-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
+class Enc_5e8512 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
bits <5> Rt32;
- let Inst{12-8} = Rt32{4-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
}
-class Enc_a6ce9c : OpcodeHexagon {
- bits <6> Ii;
- let Inst{3-0} = Ii{5-2};
- bits <4> Rs16;
- let Inst{7-4} = Rs16{3-0};
+class Enc_31db33 : OpcodeHexagon {
+ bits <2> Qt4;
+ let Inst{6-5} = Qt4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_6f83e7 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_cb785b : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_ad9bef : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_2f2f04 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_d483b9 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_1bd127 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vdddd32;
+ let Inst{4-0} = Vdddd32{4-0};
+}
+class Enc_d7bc34 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vyyyy32;
+ let Inst{4-0} = Vyyyy32{4-0};
}
class Enc_3b7631 : OpcodeHexagon {
bits <5> Vu32;
@@ -3482,20 +4008,67 @@ class Enc_3b7631 : OpcodeHexagon {
bits <3> Rx8;
let Inst{18-16} = Rx8{2-0};
}
-class Enc_eca7c8 : OpcodeHexagon {
- bits <2> Ii;
- let Inst{13-13} = Ii{1-1};
- let Inst{7-7} = Ii{0-0};
- bits <5> Rs32;
- let Inst{20-16} = Rs32{4-0};
- bits <5> Ru32;
- let Inst{12-8} = Ru32{4-0};
+class Enc_bddee3 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vyyyy32;
+ let Inst{4-0} = Vyyyy32{4-0};
+ bits <3> Rx8;
+ let Inst{18-16} = Rx8{2-0};
+}
+class Enc_dd766a : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_16c48b : OpcodeHexagon {
bits <5> Rt32;
- let Inst{4-0} = Rt32{4-0};
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
}
-class Enc_4b39e4 : OpcodeHexagon {
- bits <3> Ii;
- let Inst{7-5} = Ii{2-0};
+class Enc_9be1de : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
+}
+class Enc_a641d0 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
+}
+class Enc_3d6d37 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
+}
+class Enc_3dac0b : OpcodeHexagon {
+ bits <2> Qt4;
+ let Inst{6-5} = Qt4{1-0};
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
bits <5> Vv32;
@@ -3503,3 +4076,117 @@ class Enc_4b39e4 : OpcodeHexagon {
bits <5> Vdd32;
let Inst{4-0} = Vdd32{4-0};
}
+class Enc_500cb0 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_efaed8 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{8-8} = Ii{0-0};
+}
+class Enc_802dc0 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{8-8} = Ii{0-0};
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+}
+class Enc_ef601b : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+}
+class Enc_6baed4 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_691712 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_403871 : OpcodeHexagon {
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_2d829e : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{10-0} = Ii{13-3};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_ca3887 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_9e9047 : OpcodeHexagon {
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_7d1542 : OpcodeHexagon {
+ bits <7> Ss128;
+ let Inst{22-16} = Ss128{6-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_8f7633 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <7> Sd128;
+ let Inst{6-0} = Sd128{6-0};
+}
+class Enc_46f33d : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_d0fe02 : OpcodeHexagon {
+ bits <5> Rxx32;
+ let Inst{20-16} = Rxx32{4-0};
+ bits <0> sgp10;
+}
+class Enc_e32517 : OpcodeHexagon {
+ bits <7> Sss128;
+ let Inst{22-16} = Sss128{6-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_a705fc : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <7> Sdd128;
+ let Inst{6-0} = Sdd128{6-0};
+}
+class Enc_e6abcf : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_b00112 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_598f6c : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index a49051888c77..ccc3f98d8378 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -5,14 +5,14 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
def A2_abs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = abs($Rs32)",
-tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
+tc_d61dfdc3, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -23,7 +23,7 @@ def A2_absp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = abs($Rss32)",
-tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
+tc_d61dfdc3, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000100;
let prefersSlot3 = 1;
@@ -32,7 +32,7 @@ def A2_abssat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = abs($Rs32):sat",
-tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
+tc_d61dfdc3, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -44,15 +44,15 @@ def A2_add : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add($Rs32,$Rt32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_713b66bf, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011000;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_add";
let CextOpcode = "A2_add";
let InputType = "reg";
-let BaseOpcode = "A2_add";
let isCommutable = 1;
let isPredicable = 1;
}
@@ -60,7 +60,7 @@ def A2_addh_h16_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.h):<<16",
-tc_679309b8, TypeALU64>, Enc_bd6011 {
+tc_01d44cb2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -72,7 +72,7 @@ def A2_addh_h16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.l):<<16",
-tc_679309b8, TypeALU64>, Enc_bd6011 {
+tc_01d44cb2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -84,7 +84,7 @@ def A2_addh_h16_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h):<<16",
-tc_679309b8, TypeALU64>, Enc_bd6011 {
+tc_01d44cb2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -96,7 +96,7 @@ def A2_addh_h16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l):<<16",
-tc_679309b8, TypeALU64>, Enc_bd6011 {
+tc_01d44cb2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -108,7 +108,7 @@ def A2_addh_h16_sat_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.h):sat:<<16",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -121,7 +121,7 @@ def A2_addh_h16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.h,$Rs32.l):sat:<<16",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -134,7 +134,7 @@ def A2_addh_h16_sat_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h):sat:<<16",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -147,7 +147,7 @@ def A2_addh_h16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l):sat:<<16",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101010;
@@ -160,7 +160,7 @@ def A2_addh_l16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h)",
-tc_4414d8b1, TypeALU64>, Enc_bd6011 {
+tc_f34c1c21, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -172,7 +172,7 @@ def A2_addh_l16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l)",
-tc_4414d8b1, TypeALU64>, Enc_bd6011 {
+tc_f34c1c21, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -184,7 +184,7 @@ def A2_addh_l16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.h):sat",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -197,7 +197,7 @@ def A2_addh_l16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = add($Rt32.l,$Rs32.l):sat",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101000;
@@ -210,13 +210,13 @@ def A2_addi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = add($Rs32,#$Ii)",
-tc_5a2711e5, TypeALU32_ADDI>, Enc_cb9321, PredNewRel, ImmRegRel {
+tc_713b66bf, TypeALU32_ADDI>, Enc_cb9321, PredNewRel, ImmRegRel {
let Inst{31-28} = 0b1011;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_addi";
let CextOpcode = "A2_add";
let InputType = "imm";
-let BaseOpcode = "A2_addi";
let isPredicable = 1;
let isAdd = 1;
let isExtendable = 1;
@@ -229,7 +229,7 @@ def A2_addp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32)",
-tc_946df596, TypeALU64>, Enc_a56825 {
+tc_5da50c4b, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -240,7 +240,7 @@ def A2_addpsat : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32):sat",
-tc_779080bf, TypeALU64>, Enc_a56825 {
+tc_8a825db2, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -252,7 +252,7 @@ def A2_addsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add($Rs32,$Rt32):sat",
-tc_61830035, TypeALU32_3op>, Enc_5ab2be {
+tc_95a33176, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110010;
@@ -267,14 +267,14 @@ def A2_addsp : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rs32,$Rtt32)",
-tc_679309b8, TypeALU64> {
+tc_01d44cb2, TypeALU64> {
let isPseudo = 1;
}
def A2_addsph : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32):raw:hi",
-tc_679309b8, TypeALU64>, Enc_a56825 {
+tc_01d44cb2, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -284,7 +284,7 @@ def A2_addspl : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = add($Rss32,$Rtt32):raw:lo",
-tc_679309b8, TypeALU64>, Enc_a56825 {
+tc_01d44cb2, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -294,15 +294,15 @@ def A2_and : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = and($Rs32,$Rt32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_713b66bf, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001000;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_and";
let CextOpcode = "A2_and";
let InputType = "reg";
-let BaseOpcode = "A2_and";
let isCommutable = 1;
let isPredicable = 1;
}
@@ -310,7 +310,7 @@ def A2_andir : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = and($Rs32,#$Ii)",
-tc_5a2711e5, TypeALU32_2op>, Enc_140c83, ImmRegRel {
+tc_713b66bf, TypeALU32_2op>, Enc_140c83, ImmRegRel {
let Inst{31-22} = 0b0111011000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -326,7 +326,7 @@ def A2_andp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = and($Rss32,$Rtt32)",
-tc_946df596, TypeALU64>, Enc_a56825 {
+tc_5da50c4b, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -336,7 +336,7 @@ def A2_aslh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = aslh($Rs32)",
-tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_c57d9f39, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000000;
let hasNewValue = 1;
@@ -348,7 +348,7 @@ def A2_asrh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = asrh($Rs32)",
-tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_c57d9f39, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000001;
let hasNewValue = 1;
@@ -360,7 +360,7 @@ def A2_combine_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.h,$Rs32.h)",
-tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
+tc_713b66bf, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011100;
@@ -372,7 +372,7 @@ def A2_combine_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.h,$Rs32.l)",
-tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
+tc_713b66bf, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011101;
@@ -384,7 +384,7 @@ def A2_combine_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.l,$Rs32.h)",
-tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
+tc_713b66bf, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011110;
@@ -396,7 +396,7 @@ def A2_combine_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = combine($Rt32.l,$Rs32.l)",
-tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
+tc_713b66bf, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011111;
@@ -408,7 +408,7 @@ def A2_combineii : HInst<
(outs DoubleRegs:$Rdd32),
(ins s32_0Imm:$Ii, s8_0Imm:$II),
"$Rdd32 = combine(#$Ii,#$II)",
-tc_5a2711e5, TypeALU32_2op>, Enc_18c338 {
+tc_713b66bf, TypeALU32_2op>, Enc_18c338 {
let Inst{31-23} = 0b011111000;
let isReMaterializable = 1;
let isAsCheapAsAMove = 1;
@@ -423,19 +423,19 @@ def A2_combinew : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = combine($Rs32,$Rt32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_be32a5, PredNewRel {
+tc_713b66bf, TypeALU32_3op>, Enc_be32a5, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110101000;
-let InputType = "reg";
let BaseOpcode = "A2_combinew";
+let InputType = "reg";
let isPredicable = 1;
}
def A2_max : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = max($Rs32,$Rt32)",
-tc_779080bf, TypeALU64>, Enc_5ab2be {
+tc_8a825db2, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101110;
@@ -447,7 +447,7 @@ def A2_maxp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = max($Rss32,$Rtt32)",
-tc_779080bf, TypeALU64>, Enc_a56825 {
+tc_8a825db2, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -457,7 +457,7 @@ def A2_maxu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = maxu($Rs32,$Rt32)",
-tc_779080bf, TypeALU64>, Enc_5ab2be {
+tc_8a825db2, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101110;
@@ -469,7 +469,7 @@ def A2_maxup : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = maxu($Rss32,$Rtt32)",
-tc_779080bf, TypeALU64>, Enc_a56825 {
+tc_8a825db2, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -479,7 +479,7 @@ def A2_min : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = min($Rt32,$Rs32)",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101101;
@@ -491,7 +491,7 @@ def A2_minp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = min($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -501,7 +501,7 @@ def A2_minu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = minu($Rt32,$Rs32)",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101101;
@@ -513,7 +513,7 @@ def A2_minup : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = minu($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -523,7 +523,7 @@ def A2_neg : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = neg($Rs32)",
-tc_57890846, TypeALU32_2op> {
+tc_c57d9f39, TypeALU32_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -533,7 +533,7 @@ def A2_negp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = neg($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
+tc_9f6cd987, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000100;
}
@@ -541,7 +541,7 @@ def A2_negsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = neg($Rs32):sat",
-tc_cf8126ae, TypeS_2op>, Enc_5e2823 {
+tc_d61dfdc3, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -553,7 +553,7 @@ def A2_nop : HInst<
(outs),
(ins),
"nop",
-tc_2eabeebe, TypeALU32_2op>, Enc_e3b0c4 {
+tc_b837298f, TypeALU32_2op>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b0111111100000000;
}
@@ -561,7 +561,7 @@ def A2_not : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = not($Rs32)",
-tc_57890846, TypeALU32_2op> {
+tc_c57d9f39, TypeALU32_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -571,7 +571,7 @@ def A2_notp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = not($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
+tc_9f6cd987, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000100;
}
@@ -579,15 +579,15 @@ def A2_or : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = or($Rs32,$Rt32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
+tc_713b66bf, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001001;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_or";
let CextOpcode = "A2_or";
let InputType = "reg";
-let BaseOpcode = "A2_or";
let isCommutable = 1;
let isPredicable = 1;
}
@@ -595,7 +595,7 @@ def A2_orir : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = or($Rs32,#$Ii)",
-tc_5a2711e5, TypeALU32_2op>, Enc_140c83, ImmRegRel {
+tc_713b66bf, TypeALU32_2op>, Enc_140c83, ImmRegRel {
let Inst{31-22} = 0b0111011010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -611,7 +611,7 @@ def A2_orp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = or($Rss32,$Rtt32)",
-tc_946df596, TypeALU64>, Enc_a56825 {
+tc_5da50c4b, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -621,7 +621,7 @@ def A2_paddf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = add($Rs32,$Rt32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011000;
@@ -629,15 +629,15 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_add";
let CextOpcode = "A2_add";
let InputType = "reg";
-let BaseOpcode = "A2_add";
}
def A2_paddfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = add($Rs32,$Rt32)",
-tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_442395f3, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011000;
@@ -646,24 +646,24 @@ let isPredicatedFalse = 1;
let hasNewValue = 1;
let opNewValue = 0;
let isPredicatedNew = 1;
+let BaseOpcode = "A2_add";
let CextOpcode = "A2_add";
let InputType = "reg";
-let BaseOpcode = "A2_add";
}
def A2_paddif : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if (!$Pu4) $Rd32 = add($Rs32,#$Ii)",
-tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_1c2c7a4a, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011101001;
let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_addi";
let CextOpcode = "A2_add";
let InputType = "imm";
-let BaseOpcode = "A2_addi";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -674,7 +674,7 @@ def A2_paddifnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if (!$Pu4.new) $Rd32 = add($Rs32,#$Ii)",
-tc_05c070ec, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_442395f3, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-23} = 0b011101001;
let isPredicated = 1;
@@ -682,9 +682,9 @@ let isPredicatedFalse = 1;
let hasNewValue = 1;
let opNewValue = 0;
let isPredicatedNew = 1;
+let BaseOpcode = "A2_addi";
let CextOpcode = "A2_add";
let InputType = "imm";
-let BaseOpcode = "A2_addi";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -695,15 +695,15 @@ def A2_paddit : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if ($Pu4) $Rd32 = add($Rs32,#$Ii)",
-tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_1c2c7a4a, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011101000;
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_addi";
let CextOpcode = "A2_add";
let InputType = "imm";
-let BaseOpcode = "A2_addi";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -714,16 +714,16 @@ def A2_padditnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"if ($Pu4.new) $Rd32 = add($Rs32,#$Ii)",
-tc_05c070ec, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
+tc_442395f3, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-23} = 0b011101000;
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
let isPredicatedNew = 1;
+let BaseOpcode = "A2_addi";
let CextOpcode = "A2_add";
let InputType = "imm";
-let BaseOpcode = "A2_addi";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -734,22 +734,22 @@ def A2_paddt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = add($Rs32,$Rt32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011000;
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_add";
let CextOpcode = "A2_add";
let InputType = "reg";
-let BaseOpcode = "A2_add";
}
def A2_paddtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = add($Rs32,$Rt32)",
-tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
+tc_442395f3, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011000;
@@ -757,15 +757,15 @@ let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
let isPredicatedNew = 1;
+let BaseOpcode = "A2_add";
let CextOpcode = "A2_add";
let InputType = "reg";
-let BaseOpcode = "A2_add";
}
def A2_pandf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = and($Rs32,$Rt32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001000;
@@ -779,7 +779,7 @@ def A2_pandfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = and($Rs32,$Rt32)",
-tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_442395f3, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001000;
@@ -794,7 +794,7 @@ def A2_pandt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = and($Rs32,$Rt32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001000;
@@ -807,7 +807,7 @@ def A2_pandtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = and($Rs32,$Rt32)",
-tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_442395f3, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001000;
@@ -821,7 +821,7 @@ def A2_porf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = or($Rs32,$Rt32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001001;
@@ -835,7 +835,7 @@ def A2_porfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = or($Rs32,$Rt32)",
-tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_442395f3, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001001;
@@ -850,7 +850,7 @@ def A2_port : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = or($Rs32,$Rt32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001001;
@@ -863,7 +863,7 @@ def A2_portnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = or($Rs32,$Rt32)",
-tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_442395f3, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001001;
@@ -877,7 +877,7 @@ def A2_psubf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = sub($Rt32,$Rs32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011001;
@@ -891,7 +891,7 @@ def A2_psubfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
-tc_05c070ec, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_442395f3, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011001;
@@ -906,7 +906,7 @@ def A2_psubt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = sub($Rt32,$Rs32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111011001;
@@ -919,7 +919,7 @@ def A2_psubtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
-tc_05c070ec, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
+tc_442395f3, TypeALU32_3op>, Enc_9b0bc1, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111011001;
@@ -933,7 +933,7 @@ def A2_pxorf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rd32 = xor($Rs32,$Rt32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001011;
@@ -947,7 +947,7 @@ def A2_pxorfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
-tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_442395f3, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001011;
@@ -962,7 +962,7 @@ def A2_pxort : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rd32 = xor($Rs32,$Rt32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111001011;
@@ -975,7 +975,7 @@ def A2_pxortnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
-tc_05c070ec, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
+tc_442395f3, TypeALU32_3op>, Enc_ea4c54, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111001011;
@@ -989,7 +989,7 @@ def A2_roundsat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = round($Rss32):sat",
-tc_cf8126ae, TypeS_2op>, Enc_90cd8b {
+tc_d61dfdc3, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000110;
let hasNewValue = 1;
@@ -1001,7 +1001,7 @@ def A2_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = sat($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
+tc_9f6cd987, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000110;
let hasNewValue = 1;
@@ -1012,7 +1012,7 @@ def A2_satb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = satb($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
+tc_9f6cd987, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1023,7 +1023,7 @@ def A2_sath : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sath($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
+tc_9f6cd987, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1034,7 +1034,7 @@ def A2_satub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = satub($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
+tc_9f6cd987, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1045,7 +1045,7 @@ def A2_satuh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = satuh($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
+tc_9f6cd987, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100110;
let hasNewValue = 1;
@@ -1056,22 +1056,22 @@ def A2_sub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32,$Rs32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_bd6011, PredNewRel, ImmRegRel {
+tc_713b66bf, TypeALU32_3op>, Enc_bd6011, PredNewRel, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011001;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_sub";
let CextOpcode = "A2_sub";
let InputType = "reg";
-let BaseOpcode = "A2_sub";
let isPredicable = 1;
}
def A2_subh_h16_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.h):<<16",
-tc_679309b8, TypeALU64>, Enc_bd6011 {
+tc_01d44cb2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1083,7 +1083,7 @@ def A2_subh_h16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.l):<<16",
-tc_679309b8, TypeALU64>, Enc_bd6011 {
+tc_01d44cb2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1095,7 +1095,7 @@ def A2_subh_h16_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h):<<16",
-tc_679309b8, TypeALU64>, Enc_bd6011 {
+tc_01d44cb2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1107,7 +1107,7 @@ def A2_subh_h16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l):<<16",
-tc_679309b8, TypeALU64>, Enc_bd6011 {
+tc_01d44cb2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1119,7 +1119,7 @@ def A2_subh_h16_sat_hh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.h):sat:<<16",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1132,7 +1132,7 @@ def A2_subh_h16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.h,$Rs32.l):sat:<<16",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1145,7 +1145,7 @@ def A2_subh_h16_sat_lh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h):sat:<<16",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1158,7 +1158,7 @@ def A2_subh_h16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l):sat:<<16",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101011;
@@ -1171,7 +1171,7 @@ def A2_subh_l16_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h)",
-tc_4414d8b1, TypeALU64>, Enc_bd6011 {
+tc_f34c1c21, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1183,7 +1183,7 @@ def A2_subh_l16_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l)",
-tc_4414d8b1, TypeALU64>, Enc_bd6011 {
+tc_f34c1c21, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1195,7 +1195,7 @@ def A2_subh_l16_sat_hl : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.h):sat",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1208,7 +1208,7 @@ def A2_subh_l16_sat_ll : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32.l,$Rs32.l):sat",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101001;
@@ -1221,7 +1221,7 @@ def A2_subp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = sub($Rtt32,$Rss32)",
-tc_946df596, TypeALU64>, Enc_ea23e4 {
+tc_5da50c4b, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -1230,7 +1230,7 @@ def A2_subri : HInst<
(outs IntRegs:$Rd32),
(ins s32_0Imm:$Ii, IntRegs:$Rs32),
"$Rd32 = sub(#$Ii,$Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_140c83, PredNewRel, ImmRegRel {
+tc_713b66bf, TypeALU32_2op>, Enc_140c83, PredNewRel, ImmRegRel {
let Inst{31-22} = 0b0111011001;
let hasNewValue = 1;
let opNewValue = 0;
@@ -1246,7 +1246,7 @@ def A2_subsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32,$Rs32):sat",
-tc_61830035, TypeALU32_3op>, Enc_bd6011 {
+tc_95a33176, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110110;
@@ -1260,7 +1260,7 @@ def A2_svaddh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vaddh($Rs32,$Rt32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be {
+tc_713b66bf, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110000;
@@ -1273,7 +1273,7 @@ def A2_svaddhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vaddh($Rs32,$Rt32):sat",
-tc_61830035, TypeALU32_3op>, Enc_5ab2be {
+tc_95a33176, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110001;
@@ -1288,7 +1288,7 @@ def A2_svadduhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vadduh($Rs32,$Rt32):sat",
-tc_61830035, TypeALU32_3op>, Enc_5ab2be {
+tc_95a33176, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110011;
@@ -1303,7 +1303,7 @@ def A2_svavgh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vavgh($Rs32,$Rt32)",
-tc_1c80410a, TypeALU32_3op>, Enc_5ab2be {
+tc_8b5bd4f5, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110111000;
@@ -1317,7 +1317,7 @@ def A2_svavghs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vavgh($Rs32,$Rt32):rnd",
-tc_d08ee0f4, TypeALU32_3op>, Enc_5ab2be {
+tc_84a7500d, TypeALU32_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110111001;
@@ -1331,7 +1331,7 @@ def A2_svnavgh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vnavgh($Rt32,$Rs32)",
-tc_1c80410a, TypeALU32_3op>, Enc_bd6011 {
+tc_8b5bd4f5, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110111011;
@@ -1344,7 +1344,7 @@ def A2_svsubh : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vsubh($Rt32,$Rs32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
+tc_713b66bf, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110100;
@@ -1356,7 +1356,7 @@ def A2_svsubhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vsubh($Rt32,$Rs32):sat",
-tc_61830035, TypeALU32_3op>, Enc_bd6011 {
+tc_95a33176, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110101;
@@ -1370,7 +1370,7 @@ def A2_svsubuhs : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = vsubuh($Rt32,$Rs32):sat",
-tc_61830035, TypeALU32_3op>, Enc_bd6011 {
+tc_95a33176, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110110111;
@@ -1384,7 +1384,7 @@ def A2_swiz : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = swiz($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
+tc_9f6cd987, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -1394,7 +1394,7 @@ def A2_sxtb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sxtb($Rs32)",
-tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_c57d9f39, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000101;
let hasNewValue = 1;
@@ -1406,7 +1406,7 @@ def A2_sxth : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sxth($Rs32)",
-tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_c57d9f39, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000111;
let hasNewValue = 1;
@@ -1418,7 +1418,7 @@ def A2_sxtw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = sxtw($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
+tc_9f6cd987, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000100010;
}
@@ -1426,20 +1426,20 @@ def A2_tfr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = $Rs32",
-tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_c57d9f39, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000011;
let hasNewValue = 1;
let opNewValue = 0;
-let InputType = "reg";
let BaseOpcode = "A2_tfr";
+let InputType = "reg";
let isPredicable = 1;
}
def A2_tfrcrr : HInst<
(outs IntRegs:$Rd32),
(ins CtrRegs:$Cs32),
"$Rd32 = $Cs32",
-tc_b9272d6c, TypeCR>, Enc_0cb018 {
+tc_7476d766, TypeCR>, Enc_0cb018 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101010000;
let hasNewValue = 1;
@@ -1449,14 +1449,14 @@ def A2_tfrf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = $Rs32",
-tc_4c5ba658, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_1c2c7a4a, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_tfr";
let CextOpcode = "A2_tfr";
let InputType = "reg";
-let BaseOpcode = "A2_tfr";
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -1464,15 +1464,15 @@ def A2_tfrfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = $Rs32",
-tc_05c070ec, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_442395f3, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
let opNewValue = 0;
let isPredicatedNew = 1;
+let BaseOpcode = "A2_tfr";
let CextOpcode = "A2_tfr";
let InputType = "reg";
-let BaseOpcode = "A2_tfr";
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -1480,7 +1480,7 @@ def A2_tfrih : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u16_0Imm:$Ii),
"$Rx32.h = #$Ii",
-tc_5a2711e5, TypeALU32_2op>, Enc_51436c {
+tc_713b66bf, TypeALU32_2op>, Enc_51436c {
let Inst{21-21} = 0b1;
let Inst{31-24} = 0b01110010;
let hasNewValue = 1;
@@ -1491,7 +1491,7 @@ def A2_tfril : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u16_0Imm:$Ii),
"$Rx32.l = #$Ii",
-tc_5a2711e5, TypeALU32_2op>, Enc_51436c {
+tc_713b66bf, TypeALU32_2op>, Enc_51436c {
let Inst{21-21} = 0b1;
let Inst{31-24} = 0b01110001;
let hasNewValue = 1;
@@ -1502,7 +1502,7 @@ def A2_tfrp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = $Rss32",
-tc_5a2711e5, TypeALU32_2op>, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, PredNewRel {
let BaseOpcode = "A2_tfrp";
let isPredicable = 1;
let isPseudo = 1;
@@ -1511,7 +1511,7 @@ def A2_tfrpf : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if (!$Pu4) $Rdd32 = $Rss32",
-tc_5a2711e5, TypeALU32_2op>, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let BaseOpcode = "A2_tfrp";
@@ -1521,7 +1521,7 @@ def A2_tfrpfnew : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if (!$Pu4.new) $Rdd32 = $Rss32",
-tc_1ae57e39, TypeALU32_2op>, PredNewRel {
+tc_86173609, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let isPredicatedFalse = 1;
let isPredicatedNew = 1;
@@ -1532,7 +1532,7 @@ def A2_tfrpi : HInst<
(outs DoubleRegs:$Rdd32),
(ins s8_0Imm:$Ii),
"$Rdd32 = #$Ii",
-tc_5a2711e5, TypeALU64> {
+tc_713b66bf, TypeALU64> {
let isReMaterializable = 1;
let isAsCheapAsAMove = 1;
let isMoveImm = 1;
@@ -1542,7 +1542,7 @@ def A2_tfrpt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if ($Pu4) $Rdd32 = $Rss32",
-tc_5a2711e5, TypeALU32_2op>, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let BaseOpcode = "A2_tfrp";
let isPseudo = 1;
@@ -1551,7 +1551,7 @@ def A2_tfrptnew : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
"if ($Pu4.new) $Rdd32 = $Rss32",
-tc_1ae57e39, TypeALU32_2op>, PredNewRel {
+tc_86173609, TypeALU32_2op>, PredNewRel {
let isPredicated = 1;
let isPredicatedNew = 1;
let BaseOpcode = "A2_tfrp";
@@ -1561,7 +1561,7 @@ def A2_tfrrcr : HInst<
(outs CtrRegs:$Cd32),
(ins IntRegs:$Rs32),
"$Cd32 = $Rs32",
-tc_434c8e1e, TypeCR>, Enc_bd811a {
+tc_49fdfd4b, TypeCR>, Enc_bd811a {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100010001;
let hasNewValue = 1;
@@ -1571,14 +1571,14 @@ def A2_tfrsi : HInst<
(outs IntRegs:$Rd32),
(ins s32_0Imm:$Ii),
"$Rd32 = #$Ii",
-tc_57890846, TypeALU32_2op>, Enc_5e87ce, PredNewRel, ImmRegRel {
+tc_c57d9f39, TypeALU32_2op>, Enc_5e87ce, PredNewRel, ImmRegRel {
let Inst{21-21} = 0b0;
let Inst{31-24} = 0b01111000;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_tfrsi";
let CextOpcode = "A2_tfr";
let InputType = "imm";
-let BaseOpcode = "A2_tfrsi";
let isPredicable = 1;
let isReMaterializable = 1;
let isAsCheapAsAMove = 1;
@@ -1593,13 +1593,13 @@ def A2_tfrt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = $Rs32",
-tc_4c5ba658, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_1c2c7a4a, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_tfr";
let CextOpcode = "A2_tfr";
let InputType = "reg";
-let BaseOpcode = "A2_tfr";
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -1607,14 +1607,14 @@ def A2_tfrtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = $Rs32",
-tc_05c070ec, TypeALU32_2op>, PredNewRel, ImmRegRel {
+tc_442395f3, TypeALU32_2op>, PredNewRel, ImmRegRel {
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
let isPredicatedNew = 1;
+let BaseOpcode = "A2_tfr";
let CextOpcode = "A2_tfr";
let InputType = "reg";
-let BaseOpcode = "A2_tfr";
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -1622,7 +1622,7 @@ def A2_vabsh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsh($Rss32)",
-tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
+tc_d61dfdc3, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1631,7 +1631,7 @@ def A2_vabshsat : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsh($Rss32):sat",
-tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
+tc_d61dfdc3, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1641,7 +1641,7 @@ def A2_vabsw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsw($Rss32)",
-tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
+tc_d61dfdc3, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1650,7 +1650,7 @@ def A2_vabswsat : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vabsw($Rss32):sat",
-tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
+tc_d61dfdc3, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000010;
let prefersSlot3 = 1;
@@ -1660,7 +1660,7 @@ def A2_vaddb_map : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddb($Rss32,$Rtt32)",
-tc_946df596, TypeMAPPING> {
+tc_5da50c4b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -1668,7 +1668,7 @@ def A2_vaddh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddh($Rss32,$Rtt32)",
-tc_946df596, TypeALU64>, Enc_a56825 {
+tc_5da50c4b, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1677,7 +1677,7 @@ def A2_vaddhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddh($Rss32,$Rtt32):sat",
-tc_779080bf, TypeALU64>, Enc_a56825 {
+tc_8a825db2, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1688,7 +1688,7 @@ def A2_vaddub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddub($Rss32,$Rtt32)",
-tc_946df596, TypeALU64>, Enc_a56825 {
+tc_5da50c4b, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1697,7 +1697,7 @@ def A2_vaddubs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddub($Rss32,$Rtt32):sat",
-tc_779080bf, TypeALU64>, Enc_a56825 {
+tc_8a825db2, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1708,7 +1708,7 @@ def A2_vadduhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vadduh($Rss32,$Rtt32):sat",
-tc_779080bf, TypeALU64>, Enc_a56825 {
+tc_8a825db2, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1719,7 +1719,7 @@ def A2_vaddw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddw($Rss32,$Rtt32)",
-tc_946df596, TypeALU64>, Enc_a56825 {
+tc_5da50c4b, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1728,7 +1728,7 @@ def A2_vaddws : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vaddw($Rss32,$Rtt32):sat",
-tc_779080bf, TypeALU64>, Enc_a56825 {
+tc_8a825db2, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011000;
@@ -1739,7 +1739,7 @@ def A2_vavgh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgh($Rss32,$Rtt32)",
-tc_6132ba3d, TypeALU64>, Enc_a56825 {
+tc_f098b237, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
@@ -1749,7 +1749,7 @@ def A2_vavghcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgh($Rss32,$Rtt32):crnd",
-tc_002cb246, TypeALU64>, Enc_a56825 {
+tc_0dfac0a7, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
@@ -1759,7 +1759,7 @@ def A2_vavghr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgh($Rss32,$Rtt32):rnd",
-tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
+tc_20131976, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
@@ -1769,7 +1769,7 @@ def A2_vavgub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgub($Rss32,$Rtt32)",
-tc_6132ba3d, TypeALU64>, Enc_a56825 {
+tc_f098b237, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
@@ -1779,7 +1779,7 @@ def A2_vavgubr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgub($Rss32,$Rtt32):rnd",
-tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
+tc_20131976, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
@@ -1789,7 +1789,7 @@ def A2_vavguh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguh($Rss32,$Rtt32)",
-tc_6132ba3d, TypeALU64>, Enc_a56825 {
+tc_f098b237, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
@@ -1799,7 +1799,7 @@ def A2_vavguhr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguh($Rss32,$Rtt32):rnd",
-tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
+tc_20131976, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011010;
@@ -1809,7 +1809,7 @@ def A2_vavguw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguw($Rss32,$Rtt32)",
-tc_6132ba3d, TypeALU64>, Enc_a56825 {
+tc_f098b237, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -1819,7 +1819,7 @@ def A2_vavguwr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavguw($Rss32,$Rtt32):rnd",
-tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
+tc_20131976, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -1829,7 +1829,7 @@ def A2_vavgw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgw($Rss32,$Rtt32)",
-tc_6132ba3d, TypeALU64>, Enc_a56825 {
+tc_f098b237, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -1839,7 +1839,7 @@ def A2_vavgwcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgw($Rss32,$Rtt32):crnd",
-tc_002cb246, TypeALU64>, Enc_a56825 {
+tc_0dfac0a7, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -1849,7 +1849,7 @@ def A2_vavgwr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vavgw($Rss32,$Rtt32):rnd",
-tc_e4a7f9f0, TypeALU64>, Enc_a56825 {
+tc_20131976, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011011;
@@ -1859,7 +1859,7 @@ def A2_vcmpbeq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpb.eq($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b110000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1868,7 +1868,7 @@ def A2_vcmpbgtu : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpb.gtu($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b111000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1877,7 +1877,7 @@ def A2_vcmpheq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmph.eq($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1886,7 +1886,7 @@ def A2_vcmphgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmph.gt($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1895,7 +1895,7 @@ def A2_vcmphgtu : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmph.gtu($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1904,7 +1904,7 @@ def A2_vcmpweq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpw.eq($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1913,7 +1913,7 @@ def A2_vcmpwgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpw.gt($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1922,7 +1922,7 @@ def A2_vcmpwgtu : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpw.gtu($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010000;
@@ -1931,7 +1931,7 @@ def A2_vconj : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vconj($Rss32):sat",
-tc_cf8126ae, TypeS_2op>, Enc_b9c5fb {
+tc_d61dfdc3, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000100;
let prefersSlot3 = 1;
@@ -1941,7 +1941,7 @@ def A2_vmaxb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxb($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1951,7 +1951,7 @@ def A2_vmaxh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxh($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1961,7 +1961,7 @@ def A2_vmaxub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxub($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1971,7 +1971,7 @@ def A2_vmaxuh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxuh($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -1981,7 +1981,7 @@ def A2_vmaxuw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxuw($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -1991,7 +1991,7 @@ def A2_vmaxw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vmaxw($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -2001,7 +2001,7 @@ def A2_vminb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminb($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011110;
@@ -2011,7 +2011,7 @@ def A2_vminh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminh($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2021,7 +2021,7 @@ def A2_vminub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminub($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2031,7 +2031,7 @@ def A2_vminuh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminuh($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2041,7 +2041,7 @@ def A2_vminuw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminuw($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2051,7 +2051,7 @@ def A2_vminw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vminw($Rtt32,$Rss32)",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011101;
@@ -2061,7 +2061,7 @@ def A2_vnavgh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgh($Rtt32,$Rss32)",
-tc_6132ba3d, TypeALU64>, Enc_ea23e4 {
+tc_f098b237, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2071,7 +2071,7 @@ def A2_vnavghcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgh($Rtt32,$Rss32):crnd:sat",
-tc_002cb246, TypeALU64>, Enc_ea23e4 {
+tc_0dfac0a7, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2082,7 +2082,7 @@ def A2_vnavghr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgh($Rtt32,$Rss32):rnd:sat",
-tc_002cb246, TypeALU64>, Enc_ea23e4 {
+tc_0dfac0a7, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2093,7 +2093,7 @@ def A2_vnavgw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgw($Rtt32,$Rss32)",
-tc_6132ba3d, TypeALU64>, Enc_ea23e4 {
+tc_f098b237, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2103,7 +2103,7 @@ def A2_vnavgwcr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgw($Rtt32,$Rss32):crnd:sat",
-tc_002cb246, TypeALU64>, Enc_ea23e4 {
+tc_0dfac0a7, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2114,7 +2114,7 @@ def A2_vnavgwr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vnavgw($Rtt32,$Rss32):rnd:sat",
-tc_002cb246, TypeALU64>, Enc_ea23e4 {
+tc_0dfac0a7, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011100;
@@ -2125,7 +2125,7 @@ def A2_vraddub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vraddub($Rss32,$Rtt32)",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -2135,7 +2135,7 @@ def A2_vraddub_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vraddub($Rss32,$Rtt32)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -2146,7 +2146,7 @@ def A2_vrsadub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrsadub($Rss32,$Rtt32)",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -2156,7 +2156,7 @@ def A2_vrsadub_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrsadub($Rss32,$Rtt32)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -2167,7 +2167,7 @@ def A2_vsubb_map : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vsubb($Rss32,$Rtt32)",
-tc_946df596, TypeMAPPING> {
+tc_5da50c4b, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -2175,7 +2175,7 @@ def A2_vsubh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubh($Rtt32,$Rss32)",
-tc_946df596, TypeALU64>, Enc_ea23e4 {
+tc_5da50c4b, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2184,7 +2184,7 @@ def A2_vsubhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubh($Rtt32,$Rss32):sat",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2195,7 +2195,7 @@ def A2_vsubub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubub($Rtt32,$Rss32)",
-tc_946df596, TypeALU64>, Enc_ea23e4 {
+tc_5da50c4b, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2204,7 +2204,7 @@ def A2_vsububs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubub($Rtt32,$Rss32):sat",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2215,7 +2215,7 @@ def A2_vsubuhs : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubuh($Rtt32,$Rss32):sat",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2226,7 +2226,7 @@ def A2_vsubw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubw($Rtt32,$Rss32)",
-tc_946df596, TypeALU64>, Enc_ea23e4 {
+tc_5da50c4b, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2235,7 +2235,7 @@ def A2_vsubws : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vsubw($Rtt32,$Rss32):sat",
-tc_779080bf, TypeALU64>, Enc_ea23e4 {
+tc_8a825db2, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011001;
@@ -2246,14 +2246,14 @@ def A2_xor : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = xor($Rs32,$Rt32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, PredNewRel {
+tc_713b66bf, TypeALU32_3op>, Enc_5ab2be, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001011;
let hasNewValue = 1;
let opNewValue = 0;
-let InputType = "reg";
let BaseOpcode = "A2_xor";
+let InputType = "reg";
let isCommutable = 1;
let isPredicable = 1;
}
@@ -2261,7 +2261,7 @@ def A2_xorp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = xor($Rss32,$Rtt32)",
-tc_946df596, TypeALU64>, Enc_a56825 {
+tc_5da50c4b, TypeALU64>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2271,7 +2271,7 @@ def A2_zxtb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = zxtb($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, PredNewRel {
let hasNewValue = 1;
let opNewValue = 0;
let BaseOpcode = "A2_zxtb";
@@ -2283,7 +2283,7 @@ def A2_zxth : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = zxth($Rs32)",
-tc_57890846, TypeALU32_2op>, Enc_5e2823, PredNewRel {
+tc_c57d9f39, TypeALU32_2op>, Enc_5e2823, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01110000110;
let hasNewValue = 1;
@@ -2295,7 +2295,7 @@ def A4_addp_c : HInst<
(outs DoubleRegs:$Rdd32, PredRegs:$Px4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
"$Rdd32 = add($Rss32,$Rtt32,$Px4):carry",
-tc_9c3ecd83, TypeS_3op>, Enc_2b3f60 {
+tc_1d41f8b7, TypeS_3op>, Enc_2b3f60 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010110;
@@ -2306,7 +2306,7 @@ def A4_andn : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = and($Rt32,~$Rs32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
+tc_713b66bf, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001100;
@@ -2318,7 +2318,7 @@ def A4_andnp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = and($Rtt32,~$Rss32)",
-tc_946df596, TypeALU64>, Enc_ea23e4 {
+tc_5da50c4b, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2327,7 +2327,7 @@ def A4_bitsplit : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = bitsplit($Rs32,$Rt32)",
-tc_4414d8b1, TypeALU64>, Enc_be32a5 {
+tc_f34c1c21, TypeALU64>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010100001;
@@ -2337,7 +2337,7 @@ def A4_bitspliti : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rdd32 = bitsplit($Rs32,#$Ii)",
-tc_4414d8b1, TypeS_2op>, Enc_311abd {
+tc_f34c1c21, TypeS_2op>, Enc_311abd {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001000110;
@@ -2347,14 +2347,14 @@ def A4_boundscheck : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Pd4 = boundscheck($Rs32,$Rtt32)",
-tc_85d5d03f, TypeALU64> {
+tc_4a55d03c, TypeALU64> {
let isPseudo = 1;
}
def A4_boundscheck_hi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = boundscheck($Rss32,$Rtt32):raw:hi",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -2363,7 +2363,7 @@ def A4_boundscheck_lo : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = boundscheck($Rss32,$Rtt32):raw:lo",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -2372,7 +2372,7 @@ def A4_cmpbeq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmpb.eq($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b110000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2385,7 +2385,7 @@ def A4_cmpbeqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u8_0Imm:$Ii),
"$Pd4 = cmpb.eq($Rs32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_a1297125, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101000;
@@ -2398,7 +2398,7 @@ def A4_cmpbgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmpb.gt($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2410,7 +2410,7 @@ def A4_cmpbgti : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s8_0Imm:$Ii),
"$Pd4 = cmpb.gt($Rs32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_a1297125, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101001;
@@ -2422,7 +2422,7 @@ def A4_cmpbgtu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmpb.gtu($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b111000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2434,7 +2434,7 @@ def A4_cmpbgtui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = cmpb.gtu($Rs32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_02553a, ImmRegRel {
+tc_a1297125, TypeALU64>, Enc_02553a, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011101010;
@@ -2451,7 +2451,7 @@ def A4_cmpheq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmph.eq($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2464,7 +2464,7 @@ def A4_cmpheqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmph.eq($Rs32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_a1297125, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101000;
@@ -2482,7 +2482,7 @@ def A4_cmphgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmph.gt($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2494,7 +2494,7 @@ def A4_cmphgti : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmph.gt($Rs32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_08d755, ImmRegRel {
+tc_a1297125, TypeALU64>, Enc_08d755, ImmRegRel {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011101001;
@@ -2511,7 +2511,7 @@ def A4_cmphgtu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmph.gtu($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e, ImmRegRel {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111110;
@@ -2523,7 +2523,7 @@ def A4_cmphgtui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = cmph.gtu($Rs32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_02553a, ImmRegRel {
+tc_a1297125, TypeALU64>, Enc_02553a, ImmRegRel {
let Inst{4-2} = 0b010;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011101010;
@@ -2540,7 +2540,7 @@ def A4_combineii : HInst<
(outs DoubleRegs:$Rdd32),
(ins s8_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = combine(#$Ii,#$II)",
-tc_5a2711e5, TypeALU32_2op>, Enc_f0cca7 {
+tc_713b66bf, TypeALU32_2op>, Enc_f0cca7 {
let Inst{31-21} = 0b01111100100;
let isExtendable = 1;
let opExtendable = 2;
@@ -2552,7 +2552,7 @@ def A4_combineir : HInst<
(outs DoubleRegs:$Rdd32),
(ins s32_0Imm:$Ii, IntRegs:$Rs32),
"$Rdd32 = combine(#$Ii,$Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_9cdba7 {
+tc_713b66bf, TypeALU32_2op>, Enc_9cdba7 {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011001;
let isExtendable = 1;
@@ -2565,7 +2565,7 @@ def A4_combineri : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rdd32 = combine($Rs32,#$Ii)",
-tc_5a2711e5, TypeALU32_2op>, Enc_9cdba7 {
+tc_713b66bf, TypeALU32_2op>, Enc_9cdba7 {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011000;
let isExtendable = 1;
@@ -2578,7 +2578,7 @@ def A4_cround_ri : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = cround($Rs32,#$Ii)",
-tc_002cb246, TypeS_2op>, Enc_a05677 {
+tc_0dfac0a7, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100111;
@@ -2590,7 +2590,7 @@ def A4_cround_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cround($Rs32,$Rt32)",
-tc_002cb246, TypeS_3op>, Enc_5ab2be {
+tc_0dfac0a7, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110110;
@@ -2602,14 +2602,14 @@ def A4_ext : HInst<
(outs),
(ins u26_6Imm:$Ii),
"immext(#$Ii)",
-tc_862b3e70, TypeEXTENDER>, Enc_2b518f {
+tc_112d30d6, TypeEXTENDER>, Enc_2b518f {
let Inst{31-28} = 0b0000;
}
def A4_modwrapu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = modwrap($Rs32,$Rt32)",
-tc_779080bf, TypeALU64>, Enc_5ab2be {
+tc_8a825db2, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2621,7 +2621,7 @@ def A4_orn : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = or($Rt32,~$Rs32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_bd6011 {
+tc_713b66bf, TypeALU32_3op>, Enc_bd6011 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110001101;
@@ -2633,7 +2633,7 @@ def A4_ornp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = or($Rtt32,~$Rss32)",
-tc_946df596, TypeALU64>, Enc_ea23e4 {
+tc_5da50c4b, TypeALU64>, Enc_ea23e4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010011111;
@@ -2642,7 +2642,7 @@ def A4_paslhf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = aslh($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000000;
@@ -2656,7 +2656,7 @@ def A4_paslhfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = aslh($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000000;
@@ -2671,7 +2671,7 @@ def A4_paslht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = aslh($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000000;
@@ -2684,7 +2684,7 @@ def A4_paslhtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = aslh($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000000;
@@ -2698,7 +2698,7 @@ def A4_pasrhf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = asrh($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000001;
@@ -2712,7 +2712,7 @@ def A4_pasrhfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = asrh($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000001;
@@ -2727,7 +2727,7 @@ def A4_pasrht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = asrh($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000001;
@@ -2740,7 +2740,7 @@ def A4_pasrhtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = asrh($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000001;
@@ -2754,7 +2754,7 @@ def A4_psxtbf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = sxtb($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000101;
@@ -2768,7 +2768,7 @@ def A4_psxtbfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = sxtb($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000101;
@@ -2783,7 +2783,7 @@ def A4_psxtbt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = sxtb($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000101;
@@ -2796,7 +2796,7 @@ def A4_psxtbtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = sxtb($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000101;
@@ -2810,7 +2810,7 @@ def A4_psxthf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = sxth($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000111;
@@ -2824,7 +2824,7 @@ def A4_psxthfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = sxth($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000111;
@@ -2839,7 +2839,7 @@ def A4_psxtht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = sxth($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000111;
@@ -2852,7 +2852,7 @@ def A4_psxthtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = sxth($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000111;
@@ -2866,7 +2866,7 @@ def A4_pzxtbf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = zxtb($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000100;
@@ -2880,7 +2880,7 @@ def A4_pzxtbfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = zxtb($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000100;
@@ -2895,7 +2895,7 @@ def A4_pzxtbt : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = zxtb($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000100;
@@ -2908,7 +2908,7 @@ def A4_pzxtbtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = zxtb($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000100;
@@ -2922,7 +2922,7 @@ def A4_pzxthf : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) $Rd32 = zxth($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b01110000110;
@@ -2936,7 +2936,7 @@ def A4_pzxthfnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) $Rd32 = zxth($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1011;
let Inst{31-21} = 0b01110000110;
@@ -2951,7 +2951,7 @@ def A4_pzxtht : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) $Rd32 = zxth($Rs32)",
-tc_5a2711e5, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_713b66bf, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1000;
let Inst{31-21} = 0b01110000110;
@@ -2964,7 +2964,7 @@ def A4_pzxthtnew : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) $Rd32 = zxth($Rs32)",
-tc_1ae57e39, TypeALU32_2op>, Enc_fb6577, PredNewRel {
+tc_86173609, TypeALU32_2op>, Enc_fb6577, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b01110000110;
@@ -2978,7 +2978,7 @@ def A4_rcmpeq : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmp.eq($Rs32,$Rt32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
+tc_713b66bf, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011010;
@@ -2992,7 +2992,7 @@ def A4_rcmpeqi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = cmp.eq($Rs32,#$Ii)",
-tc_5a2711e5, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
+tc_713b66bf, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011010;
let hasNewValue = 1;
@@ -3009,7 +3009,7 @@ def A4_rcmpneq : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = !cmp.eq($Rs32,$Rt32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
+tc_713b66bf, TypeALU32_3op>, Enc_5ab2be, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110011011;
@@ -3023,7 +3023,7 @@ def A4_rcmpneqi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = !cmp.eq($Rs32,#$Ii)",
-tc_5a2711e5, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
+tc_713b66bf, TypeALU32_2op>, Enc_b8c967, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b01110011011;
let hasNewValue = 1;
@@ -3040,7 +3040,7 @@ def A4_round_ri : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = round($Rs32,#$Ii)",
-tc_002cb246, TypeS_2op>, Enc_a05677 {
+tc_0dfac0a7, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100111;
@@ -3052,7 +3052,7 @@ def A4_round_ri_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = round($Rs32,#$Ii):sat",
-tc_002cb246, TypeS_2op>, Enc_a05677 {
+tc_0dfac0a7, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100111;
@@ -3065,7 +3065,7 @@ def A4_round_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = round($Rs32,$Rt32)",
-tc_002cb246, TypeS_3op>, Enc_5ab2be {
+tc_0dfac0a7, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110110;
@@ -3077,7 +3077,7 @@ def A4_round_rr_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = round($Rs32,$Rt32):sat",
-tc_002cb246, TypeS_3op>, Enc_5ab2be {
+tc_0dfac0a7, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110110;
@@ -3090,7 +3090,7 @@ def A4_subp_c : HInst<
(outs DoubleRegs:$Rdd32, PredRegs:$Px4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
"$Rdd32 = sub($Rss32,$Rtt32,$Px4):carry",
-tc_9c3ecd83, TypeS_3op>, Enc_2b3f60 {
+tc_1d41f8b7, TypeS_3op>, Enc_2b3f60 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010111;
@@ -3101,7 +3101,7 @@ def A4_tfrcpp : HInst<
(outs DoubleRegs:$Rdd32),
(ins CtrRegs64:$Css32),
"$Rdd32 = $Css32",
-tc_b9272d6c, TypeCR>, Enc_667b39 {
+tc_7476d766, TypeCR>, Enc_667b39 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101000000;
}
@@ -3109,7 +3109,7 @@ def A4_tfrpcp : HInst<
(outs CtrRegs64:$Cdd32),
(ins DoubleRegs:$Rss32),
"$Cdd32 = $Rss32",
-tc_434c8e1e, TypeCR>, Enc_0ed752 {
+tc_49fdfd4b, TypeCR>, Enc_0ed752 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100011001;
}
@@ -3117,7 +3117,7 @@ def A4_tlbmatch : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Pd4 = tlbmatch($Rss32,$Rt32)",
-tc_4837eefb, TypeALU64>, Enc_03833b {
+tc_d68dca5c, TypeALU64>, Enc_03833b {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3127,7 +3127,7 @@ def A4_vcmpbeq_any : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = any8(vcmpb.eq($Rss32,$Rtt32))",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3136,7 +3136,7 @@ def A4_vcmpbeqi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u8_0Imm:$Ii),
"$Pd4 = vcmpb.eq($Rss32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_0d8adb {
+tc_a1297125, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100000;
@@ -3145,7 +3145,7 @@ def A4_vcmpbgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = vcmpb.gt($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3154,7 +3154,7 @@ def A4_vcmpbgti : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmpb.gt($Rss32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_0d8adb {
+tc_a1297125, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100001;
@@ -3163,7 +3163,7 @@ def A4_vcmpbgtui : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
"$Pd4 = vcmpb.gtu($Rss32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_3680c2 {
+tc_a1297125, TypeALU64>, Enc_3680c2 {
let Inst{4-2} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011100010;
@@ -3172,7 +3172,7 @@ def A4_vcmpheqi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmph.eq($Rss32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_0d8adb {
+tc_a1297125, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100000;
@@ -3181,7 +3181,7 @@ def A4_vcmphgti : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmph.gt($Rss32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_0d8adb {
+tc_a1297125, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100001;
@@ -3190,7 +3190,7 @@ def A4_vcmphgtui : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
"$Pd4 = vcmph.gtu($Rss32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_3680c2 {
+tc_a1297125, TypeALU64>, Enc_3680c2 {
let Inst{4-2} = 0b010;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011100010;
@@ -3199,7 +3199,7 @@ def A4_vcmpweqi : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmpw.eq($Rss32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_0d8adb {
+tc_a1297125, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100000;
@@ -3208,7 +3208,7 @@ def A4_vcmpwgti : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
"$Pd4 = vcmpw.gt($Rss32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_0d8adb {
+tc_a1297125, TypeALU64>, Enc_0d8adb {
let Inst{4-2} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11011100001;
@@ -3217,7 +3217,7 @@ def A4_vcmpwgtui : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
"$Pd4 = vcmpw.gtu($Rss32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_3680c2 {
+tc_a1297125, TypeALU64>, Enc_3680c2 {
let Inst{4-2} = 0b100;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b11011100010;
@@ -3226,7 +3226,7 @@ def A4_vrmaxh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxh($Rss32,$Ru32)",
-tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
+tc_788b1d09, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3237,7 +3237,7 @@ def A4_vrmaxuh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxuh($Rss32,$Ru32)",
-tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
+tc_788b1d09, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3248,7 +3248,7 @@ def A4_vrmaxuw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxuw($Rss32,$Ru32)",
-tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
+tc_788b1d09, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3259,7 +3259,7 @@ def A4_vrmaxw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrmaxw($Rss32,$Ru32)",
-tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
+tc_788b1d09, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3270,7 +3270,7 @@ def A4_vrminh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminh($Rss32,$Ru32)",
-tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
+tc_788b1d09, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3281,7 +3281,7 @@ def A4_vrminuh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminuh($Rss32,$Ru32)",
-tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
+tc_788b1d09, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3292,7 +3292,7 @@ def A4_vrminuw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminuw($Rss32,$Ru32)",
-tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
+tc_788b1d09, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -3303,7 +3303,7 @@ def A4_vrminw : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
"$Rxx32 = vrminw($Rss32,$Ru32)",
-tc_5b54b33f, TypeS_3op>, Enc_412ff0 {
+tc_788b1d09, TypeS_3op>, Enc_412ff0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011001;
@@ -3314,7 +3314,7 @@ def A5_ACS : HInst<
(outs DoubleRegs:$Rxx32, PredRegs:$Pe4),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32,$Pe4 = vacsh($Rss32,$Rtt32)",
-tc_d1aa9eaa, TypeM>, Enc_831a7d, Requires<[HasV55]> {
+tc_38e0bae9, TypeM>, Enc_831a7d, Requires<[HasV55]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -3327,7 +3327,7 @@ def A5_vaddhubs : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vaddhub($Rss32,$Rtt32):sat",
-tc_002cb246, TypeS_3op>, Enc_d2216a {
+tc_0dfac0a7, TypeS_3op>, Enc_d2216a {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -3340,7 +3340,7 @@ def A6_vcmpbeq_notany : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = !any8(vcmpb.eq($Rss32,$Rtt32))",
-tc_1fc97744, TypeALU64>, Enc_fcf7a7, Requires<[HasV65]> {
+tc_407e96f9, TypeALU64>, Enc_fcf7a7, Requires<[HasV65]> {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3349,18 +3349,57 @@ def A6_vminub_RdP : HInst<
(outs DoubleRegs:$Rdd32, PredRegs:$Pe4),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32,$Pe4 = vminub($Rtt32,$Rss32)",
-tc_f9058dd7, TypeM>, Enc_d2c7f1, Requires<[HasV62]> {
+tc_7401744f, TypeM>, Enc_d2c7f1, Requires<[HasV62]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
let isPredicateLate = 1;
let prefersSlot3 = 1;
}
+def A7_clip : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = clip($Rs32,#$Ii)",
+tc_407e96f9, TypeS_2op>, Enc_a05677, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A7_croundd_ri : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = cround($Rss32,#$Ii)",
+tc_9b3c0462, TypeS_2op>, Enc_5eac98, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b10001100111;
+let prefersSlot3 = 1;
+}
+def A7_croundd_rr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = cround($Rss32,$Rt32)",
+tc_9b3c0462, TypeS_3op>, Enc_927852, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110110;
+let prefersSlot3 = 1;
+}
+def A7_vclip : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
+"$Rdd32 = vclip($Rss32,#$Ii)",
+tc_407e96f9, TypeS_2op>, Enc_7e5a82, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001000110;
+}
def C2_all8 : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = all8($Ps4)",
-tc_de554571, TypeCR>, Enc_65d691 {
+tc_151bf368, TypeCR>, Enc_65d691 {
let Inst{13-2} = 0b000000000000;
let Inst{31-18} = 0b01101011101000;
}
@@ -3368,7 +3407,7 @@ def C2_and : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = and($Pt4,$Ps4)",
-tc_640086b5, TypeCR>, Enc_454a26 {
+tc_651cbe02, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011000000;
@@ -3377,7 +3416,7 @@ def C2_andn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = and($Pt4,!$Ps4)",
-tc_640086b5, TypeCR>, Enc_454a26 {
+tc_651cbe02, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011011000;
@@ -3386,7 +3425,7 @@ def C2_any8 : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = any8($Ps4)",
-tc_de554571, TypeCR>, Enc_65d691 {
+tc_151bf368, TypeCR>, Enc_65d691 {
let Inst{13-2} = 0b000000000000;
let Inst{31-18} = 0b01101011100000;
}
@@ -3394,7 +3433,7 @@ def C2_bitsclr : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = bitsclr($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111100;
@@ -3403,7 +3442,7 @@ def C2_bitsclri : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u6_0Imm:$Ii),
"$Pd4 = bitsclr($Rs32,#$Ii)",
-tc_643b4717, TypeS_2op>, Enc_5d6c34 {
+tc_a1297125, TypeS_2op>, Enc_5d6c34 {
let Inst{7-2} = 0b000000;
let Inst{31-21} = 0b10000101100;
}
@@ -3411,7 +3450,7 @@ def C2_bitsset : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = bitsset($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111010;
@@ -3420,7 +3459,7 @@ def C2_ccombinewf : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4) $Rdd32 = combine($Rs32,$Rt32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111101000;
@@ -3432,7 +3471,7 @@ def C2_ccombinewnewf : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
-tc_05c070ec, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_442395f3, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111101000;
@@ -3445,7 +3484,7 @@ def C2_ccombinewnewt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
-tc_05c070ec, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_442395f3, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11111101000;
@@ -3457,7 +3496,7 @@ def C2_ccombinewt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pu4) $Rdd32 = combine($Rs32,$Rt32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_cb4b4e, PredNewRel {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11111101000;
@@ -3468,7 +3507,7 @@ def C2_cmoveif : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if (!$Pu4) $Rd32 = #$Ii",
-tc_5a2711e5, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_713b66bf, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111101;
@@ -3476,9 +3515,9 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_tfrsi";
let CextOpcode = "A2_tfr";
let InputType = "imm";
-let BaseOpcode = "A2_tfrsi";
let isMoveImm = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -3490,16 +3529,16 @@ def C2_cmoveit : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if ($Pu4) $Rd32 = #$Ii",
-tc_5a2711e5, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_713b66bf, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111100;
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let BaseOpcode = "A2_tfrsi";
let CextOpcode = "A2_tfr";
let InputType = "imm";
-let BaseOpcode = "A2_tfrsi";
let isMoveImm = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -3511,7 +3550,7 @@ def C2_cmovenewif : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if (!$Pu4.new) $Rd32 = #$Ii",
-tc_1ae57e39, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_86173609, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111101;
@@ -3520,9 +3559,9 @@ let isPredicatedFalse = 1;
let hasNewValue = 1;
let opNewValue = 0;
let isPredicatedNew = 1;
+let BaseOpcode = "A2_tfrsi";
let CextOpcode = "A2_tfr";
let InputType = "imm";
-let BaseOpcode = "A2_tfrsi";
let isMoveImm = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -3534,7 +3573,7 @@ def C2_cmovenewit : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii),
"if ($Pu4.new) $Rd32 = #$Ii",
-tc_1ae57e39, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
+tc_86173609, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel {
let Inst{13-13} = 0b1;
let Inst{20-20} = 0b0;
let Inst{31-23} = 0b011111100;
@@ -3542,9 +3581,9 @@ let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
let isPredicatedNew = 1;
+let BaseOpcode = "A2_tfrsi";
let CextOpcode = "A2_tfr";
let InputType = "imm";
-let BaseOpcode = "A2_tfrsi";
let isMoveImm = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -3556,7 +3595,7 @@ def C2_cmpeq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.eq($Rs32,$Rt32)",
-tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_9c52f549, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010000;
@@ -3569,7 +3608,7 @@ def C2_cmpeqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmp.eq($Rs32,#$Ii)",
-tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_d33e5eee, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{31-22} = 0b0111010100;
let CextOpcode = "C2_cmpeq";
@@ -3585,7 +3624,7 @@ def C2_cmpeqp : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = cmp.eq($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010100;
@@ -3596,7 +3635,7 @@ def C2_cmpgei : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s8_0Imm:$Ii),
"$Pd4 = cmp.ge($Rs32,#$Ii)",
-tc_56f114f4, TypeALU32_2op> {
+tc_d33e5eee, TypeALU32_2op> {
let isCompare = 1;
let isPseudo = 1;
}
@@ -3604,7 +3643,7 @@ def C2_cmpgeui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u8_0Imm:$Ii),
"$Pd4 = cmp.geu($Rs32,#$Ii)",
-tc_56f114f4, TypeALU32_2op> {
+tc_d33e5eee, TypeALU32_2op> {
let isCompare = 1;
let isPseudo = 1;
}
@@ -3612,7 +3651,7 @@ def C2_cmpgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.gt($Rs32,$Rt32)",
-tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_9c52f549, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010010;
@@ -3624,7 +3663,7 @@ def C2_cmpgti : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = cmp.gt($Rs32,#$Ii)",
-tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_d33e5eee, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{31-22} = 0b0111010101;
let CextOpcode = "C2_cmpgt";
@@ -3640,7 +3679,7 @@ def C2_cmpgtp : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = cmp.gt($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010100;
@@ -3650,7 +3689,7 @@ def C2_cmpgtu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.gtu($Rs32,$Rt32)",
-tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_9c52f549, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010011;
@@ -3662,7 +3701,7 @@ def C2_cmpgtui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = cmp.gtu($Rs32,#$Ii)",
-tc_56f114f4, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
+tc_d33e5eee, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
let Inst{4-2} = 0b000;
let Inst{31-21} = 0b01110101100;
let CextOpcode = "C2_cmpgtu";
@@ -3678,7 +3717,7 @@ def C2_cmpgtup : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = cmp.gtu($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010100;
@@ -3688,7 +3727,7 @@ def C2_cmplt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.lt($Rs32,$Rt32)",
-tc_56f114f4, TypeALU32_3op> {
+tc_d33e5eee, TypeALU32_3op> {
let isCompare = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -3697,7 +3736,7 @@ def C2_cmpltu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = cmp.ltu($Rs32,$Rt32)",
-tc_56f114f4, TypeALU32_3op> {
+tc_d33e5eee, TypeALU32_3op> {
let isCompare = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -3706,7 +3745,7 @@ def C2_mask : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4),
"$Rdd32 = mask($Pt4)",
-tc_0ae0825c, TypeS_2op>, Enc_78e566 {
+tc_9f6cd987, TypeS_2op>, Enc_78e566 {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0000;
let Inst{31-16} = 0b1000011000000000;
@@ -3715,7 +3754,7 @@ def C2_mux : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mux($Pu4,$Rs32,$Rt32)",
-tc_4c5ba658, TypeALU32_3op>, Enc_ea4c54 {
+tc_1c2c7a4a, TypeALU32_3op>, Enc_ea4c54 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110100000;
@@ -3727,7 +3766,7 @@ def C2_muxii : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii, s8_0Imm:$II),
"$Rd32 = mux($Pu4,#$Ii,#$II)",
-tc_4c5ba658, TypeALU32_2op>, Enc_830e5d {
+tc_1c2c7a4a, TypeALU32_2op>, Enc_830e5d {
let Inst{31-25} = 0b0111101;
let hasNewValue = 1;
let opNewValue = 0;
@@ -3741,7 +3780,7 @@ def C2_muxir : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = mux($Pu4,$Rs32,#$Ii)",
-tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f {
+tc_1c2c7a4a, TypeALU32_2op>, Enc_e38e1f {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011100110;
let hasNewValue = 1;
@@ -3757,7 +3796,7 @@ def C2_muxri : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pu4, s32_0Imm:$Ii, IntRegs:$Rs32),
"$Rd32 = mux($Pu4,#$Ii,$Rs32)",
-tc_4c5ba658, TypeALU32_2op>, Enc_e38e1f {
+tc_1c2c7a4a, TypeALU32_2op>, Enc_e38e1f {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b011100111;
let hasNewValue = 1;
@@ -3773,7 +3812,7 @@ def C2_not : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = not($Ps4)",
-tc_de554571, TypeCR>, Enc_65d691 {
+tc_151bf368, TypeCR>, Enc_65d691 {
let Inst{13-2} = 0b000000000000;
let Inst{31-18} = 0b01101011110000;
}
@@ -3781,7 +3820,7 @@ def C2_or : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = or($Pt4,$Ps4)",
-tc_640086b5, TypeCR>, Enc_454a26 {
+tc_651cbe02, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011001000;
@@ -3790,7 +3829,7 @@ def C2_orn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Pt4, PredRegs:$Ps4),
"$Pd4 = or($Pt4,!$Ps4)",
-tc_640086b5, TypeCR>, Enc_454a26 {
+tc_651cbe02, TypeCR>, Enc_454a26 {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011111000;
@@ -3799,7 +3838,7 @@ def C2_pxfer_map : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4),
"$Pd4 = $Ps4",
-tc_640086b5, TypeMAPPING> {
+tc_651cbe02, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -3807,7 +3846,7 @@ def C2_tfrpr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Ps4),
"$Rd32 = $Ps4",
-tc_0ae0825c, TypeS_2op>, Enc_f5e933 {
+tc_9f6cd987, TypeS_2op>, Enc_f5e933 {
let Inst{13-5} = 0b000000000;
let Inst{31-18} = 0b10001001010000;
let hasNewValue = 1;
@@ -3817,7 +3856,7 @@ def C2_tfrrp : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32),
"$Pd4 = $Rs32",
-tc_cfd8378a, TypeS_2op>, Enc_48b75f {
+tc_55b33fda, TypeS_2op>, Enc_48b75f {
let Inst{13-2} = 0b000000000000;
let Inst{31-21} = 0b10000101010;
}
@@ -3825,7 +3864,7 @@ def C2_vitpack : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Rd32 = vitpack($Ps4,$Pt4)",
-tc_4414d8b1, TypeS_2op>, Enc_527412 {
+tc_f34c1c21, TypeS_2op>, Enc_527412 {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b10001001000000;
@@ -3837,7 +3876,7 @@ def C2_vmux : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pu4, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmux($Pu4,$Rss32,$Rtt32)",
-tc_b4b5c03a, TypeALU64>, Enc_329361 {
+tc_6fc5dbea, TypeALU64>, Enc_329361 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010001000;
@@ -3846,7 +3885,7 @@ def C2_xor : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Pd4 = xor($Ps4,$Pt4)",
-tc_640086b5, TypeCR>, Enc_284ebb {
+tc_651cbe02, TypeCR>, Enc_284ebb {
let Inst{7-2} = 0b000000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011010000;
@@ -3855,7 +3894,7 @@ def C4_addipc : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = add(pc,#$Ii)",
-tc_a813cf9a, TypeCR>, Enc_607661 {
+tc_3edca78f, TypeCR>, Enc_607661 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0110101001001001;
@@ -3871,7 +3910,7 @@ def C4_and_and : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,and($Pt4,$Pu4))",
-tc_b31c2e97, TypeCR>, Enc_9ac432 {
+tc_a7a13fac, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011000100;
@@ -3880,7 +3919,7 @@ def C4_and_andn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,and($Pt4,!$Pu4))",
-tc_b31c2e97, TypeCR>, Enc_9ac432 {
+tc_a7a13fac, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011100100;
@@ -3889,7 +3928,7 @@ def C4_and_or : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,or($Pt4,$Pu4))",
-tc_b31c2e97, TypeCR>, Enc_9ac432 {
+tc_a7a13fac, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011001100;
@@ -3898,7 +3937,7 @@ def C4_and_orn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = and($Ps4,or($Pt4,!$Pu4))",
-tc_b31c2e97, TypeCR>, Enc_9ac432 {
+tc_a7a13fac, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011101100;
@@ -3907,7 +3946,7 @@ def C4_cmplte : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !cmp.gt($Rs32,$Rt32)",
-tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_9c52f549, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010010;
@@ -3919,7 +3958,7 @@ def C4_cmpltei : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = !cmp.gt($Rs32,#$Ii)",
-tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_d33e5eee, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b100;
let Inst{31-22} = 0b0111010101;
let CextOpcode = "C4_cmplte";
@@ -3935,7 +3974,7 @@ def C4_cmplteu : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !cmp.gtu($Rs32,$Rt32)",
-tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_9c52f549, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010011;
@@ -3947,7 +3986,7 @@ def C4_cmplteui : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Pd4 = !cmp.gtu($Rs32,#$Ii)",
-tc_56f114f4, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
+tc_d33e5eee, TypeALU32_2op>, Enc_c0cdde, ImmRegRel {
let Inst{4-2} = 0b100;
let Inst{31-21} = 0b01110101100;
let CextOpcode = "C4_cmplteu";
@@ -3963,7 +4002,7 @@ def C4_cmpneq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !cmp.eq($Rs32,$Rt32)",
-tc_de4df740, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
+tc_9c52f549, TypeALU32_3op>, Enc_c2b48e, ImmRegRel {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110010000;
@@ -3976,7 +4015,7 @@ def C4_cmpneqi : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Pd4 = !cmp.eq($Rs32,#$Ii)",
-tc_56f114f4, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
+tc_d33e5eee, TypeALU32_2op>, Enc_bd0b33, ImmRegRel {
let Inst{4-2} = 0b100;
let Inst{31-22} = 0b0111010100;
let CextOpcode = "C4_cmpneq";
@@ -3992,7 +4031,7 @@ def C4_fastcorner9 : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Pd4 = fastcorner9($Ps4,$Pt4)",
-tc_640086b5, TypeCR>, Enc_284ebb {
+tc_651cbe02, TypeCR>, Enc_284ebb {
let Inst{7-2} = 0b100100;
let Inst{13-10} = 0b1000;
let Inst{31-18} = 0b01101011000000;
@@ -4001,7 +4040,7 @@ def C4_fastcorner9_not : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4),
"$Pd4 = !fastcorner9($Ps4,$Pt4)",
-tc_640086b5, TypeCR>, Enc_284ebb {
+tc_651cbe02, TypeCR>, Enc_284ebb {
let Inst{7-2} = 0b100100;
let Inst{13-10} = 0b1000;
let Inst{31-18} = 0b01101011000100;
@@ -4010,7 +4049,7 @@ def C4_nbitsclr : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !bitsclr($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111101;
@@ -4019,7 +4058,7 @@ def C4_nbitsclri : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u6_0Imm:$Ii),
"$Pd4 = !bitsclr($Rs32,#$Ii)",
-tc_643b4717, TypeS_2op>, Enc_5d6c34 {
+tc_a1297125, TypeS_2op>, Enc_5d6c34 {
let Inst{7-2} = 0b000000;
let Inst{31-21} = 0b10000101101;
}
@@ -4027,7 +4066,7 @@ def C4_nbitsset : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !bitsset($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111011;
@@ -4036,7 +4075,7 @@ def C4_or_and : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,and($Pt4,$Pu4))",
-tc_b31c2e97, TypeCR>, Enc_9ac432 {
+tc_a7a13fac, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011010100;
@@ -4045,7 +4084,7 @@ def C4_or_andn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,and($Pt4,!$Pu4))",
-tc_b31c2e97, TypeCR>, Enc_9ac432 {
+tc_a7a13fac, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011110100;
@@ -4054,7 +4093,7 @@ def C4_or_or : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,or($Pt4,$Pu4))",
-tc_b31c2e97, TypeCR>, Enc_9ac432 {
+tc_a7a13fac, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011011100;
@@ -4063,7 +4102,7 @@ def C4_or_orn : HInst<
(outs PredRegs:$Pd4),
(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
"$Pd4 = or($Ps4,or($Pt4,!$Pu4))",
-tc_b31c2e97, TypeCR>, Enc_9ac432 {
+tc_a7a13fac, TypeCR>, Enc_9ac432 {
let Inst{5-2} = 0b0000;
let Inst{13-10} = 0b0000;
let Inst{31-18} = 0b01101011111100;
@@ -4072,7 +4111,7 @@ def F2_conv_d2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_d2df($Rss32)",
-tc_3a867367, TypeS_2op>, Enc_b9c5fb {
+tc_9783714b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4082,7 +4121,7 @@ def F2_conv_d2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_d2sf($Rss32)",
-tc_3a867367, TypeS_2op>, Enc_90cd8b {
+tc_9783714b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -4094,7 +4133,7 @@ def F2_conv_df2d : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2d($Rss32)",
-tc_3a867367, TypeS_2op>, Enc_b9c5fb {
+tc_9783714b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4104,7 +4143,7 @@ def F2_conv_df2d_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2d($Rss32):chop",
-tc_3a867367, TypeS_2op>, Enc_b9c5fb {
+tc_9783714b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4114,7 +4153,7 @@ def F2_conv_df2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2sf($Rss32)",
-tc_3a867367, TypeS_2op>, Enc_90cd8b {
+tc_9783714b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -4126,7 +4165,7 @@ def F2_conv_df2ud : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2ud($Rss32)",
-tc_3a867367, TypeS_2op>, Enc_b9c5fb {
+tc_9783714b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4136,7 +4175,7 @@ def F2_conv_df2ud_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2ud($Rss32):chop",
-tc_3a867367, TypeS_2op>, Enc_b9c5fb {
+tc_9783714b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4146,7 +4185,7 @@ def F2_conv_df2uw : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2uw($Rss32)",
-tc_3a867367, TypeS_2op>, Enc_90cd8b {
+tc_9783714b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -4158,7 +4197,7 @@ def F2_conv_df2uw_chop : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2uw($Rss32):chop",
-tc_3a867367, TypeS_2op>, Enc_90cd8b {
+tc_9783714b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000101;
let hasNewValue = 1;
@@ -4170,7 +4209,7 @@ def F2_conv_df2w : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2w($Rss32)",
-tc_3a867367, TypeS_2op>, Enc_90cd8b {
+tc_9783714b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -4182,7 +4221,7 @@ def F2_conv_df2w_chop : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2w($Rss32):chop",
-tc_3a867367, TypeS_2op>, Enc_90cd8b {
+tc_9783714b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000111;
let hasNewValue = 1;
@@ -4194,7 +4233,7 @@ def F2_conv_sf2d : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2d($Rs32)",
-tc_3a867367, TypeS_2op>, Enc_3a3d62 {
+tc_9783714b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4204,7 +4243,7 @@ def F2_conv_sf2d_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2d($Rs32):chop",
-tc_3a867367, TypeS_2op>, Enc_3a3d62 {
+tc_9783714b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4214,7 +4253,7 @@ def F2_conv_sf2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2df($Rs32)",
-tc_3a867367, TypeS_2op>, Enc_3a3d62 {
+tc_9783714b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4224,7 +4263,7 @@ def F2_conv_sf2ud : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2ud($Rs32)",
-tc_3a867367, TypeS_2op>, Enc_3a3d62 {
+tc_9783714b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4234,7 +4273,7 @@ def F2_conv_sf2ud_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2ud($Rs32):chop",
-tc_3a867367, TypeS_2op>, Enc_3a3d62 {
+tc_9783714b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4244,7 +4283,7 @@ def F2_conv_sf2uw : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2uw($Rs32)",
-tc_3a867367, TypeS_2op>, Enc_5e2823 {
+tc_9783714b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011011;
let hasNewValue = 1;
@@ -4256,7 +4295,7 @@ def F2_conv_sf2uw_chop : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2uw($Rs32):chop",
-tc_3a867367, TypeS_2op>, Enc_5e2823 {
+tc_9783714b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001011011;
let hasNewValue = 1;
@@ -4268,7 +4307,7 @@ def F2_conv_sf2w : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2w($Rs32)",
-tc_3a867367, TypeS_2op>, Enc_5e2823 {
+tc_9783714b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011100;
let hasNewValue = 1;
@@ -4280,7 +4319,7 @@ def F2_conv_sf2w_chop : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2w($Rs32):chop",
-tc_3a867367, TypeS_2op>, Enc_5e2823 {
+tc_9783714b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001011100;
let hasNewValue = 1;
@@ -4292,7 +4331,7 @@ def F2_conv_ud2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_ud2df($Rss32)",
-tc_3a867367, TypeS_2op>, Enc_b9c5fb {
+tc_9783714b, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4302,7 +4341,7 @@ def F2_conv_ud2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_ud2sf($Rss32)",
-tc_3a867367, TypeS_2op>, Enc_90cd8b {
+tc_9783714b, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000001;
let hasNewValue = 1;
@@ -4314,7 +4353,7 @@ def F2_conv_uw2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_uw2df($Rs32)",
-tc_3a867367, TypeS_2op>, Enc_3a3d62 {
+tc_9783714b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4324,7 +4363,7 @@ def F2_conv_uw2sf : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_uw2sf($Rs32)",
-tc_3a867367, TypeS_2op>, Enc_5e2823 {
+tc_9783714b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011001;
let hasNewValue = 1;
@@ -4336,7 +4375,7 @@ def F2_conv_w2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_w2df($Rs32)",
-tc_3a867367, TypeS_2op>, Enc_3a3d62 {
+tc_9783714b, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4346,7 +4385,7 @@ def F2_conv_w2sf : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_w2sf($Rs32)",
-tc_3a867367, TypeS_2op>, Enc_5e2823 {
+tc_9783714b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011010;
let hasNewValue = 1;
@@ -4358,7 +4397,7 @@ def F2_dfadd : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = dfadd($Rss32,$Rtt32)",
-tc_2f7c551d, TypeM>, Enc_a56825, Requires<[HasV66]> {
+tc_f0e8e832, TypeM>, Enc_a56825, Requires<[HasV66]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -4369,7 +4408,7 @@ def F2_dfclass : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Pd4 = dfclass($Rss32,#$Ii)",
-tc_643b4717, TypeALU64>, Enc_1f19b5 {
+tc_a1297125, TypeALU64>, Enc_1f19b5 {
let Inst{4-2} = 0b100;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b11011100100;
@@ -4380,7 +4419,7 @@ def F2_dfcmpeq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.eq($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4392,7 +4431,7 @@ def F2_dfcmpge : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.ge($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4404,7 +4443,7 @@ def F2_dfcmpgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.gt($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4416,7 +4455,7 @@ def F2_dfcmpuo : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.uo($Rss32,$Rtt32)",
-tc_85d5d03f, TypeALU64>, Enc_fcf7a7 {
+tc_4a55d03c, TypeALU64>, Enc_fcf7a7 {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4428,7 +4467,7 @@ def F2_dfimm_n : HInst<
(outs DoubleRegs:$Rdd32),
(ins u10_0Imm:$Ii),
"$Rdd32 = dfmake(#$Ii):neg",
-tc_9e313203, TypeALU64>, Enc_e6c957 {
+tc_65279839, TypeALU64>, Enc_e6c957 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101100101;
let prefersSlot3 = 1;
@@ -4437,16 +4476,84 @@ def F2_dfimm_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins u10_0Imm:$Ii),
"$Rdd32 = dfmake(#$Ii):pos",
-tc_9e313203, TypeALU64>, Enc_e6c957 {
+tc_65279839, TypeALU64>, Enc_e6c957 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101100100;
let prefersSlot3 = 1;
}
+def F2_dfmax : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = dfmax($Rss32,$Rtt32)",
+tc_9b3c0462, TypeM>, Enc_a56825, Requires<[HasV67]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000001;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_dfmin : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = dfmin($Rss32,$Rtt32)",
+tc_9b3c0462, TypeM>, Enc_a56825, Requires<[HasV67]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_dfmpyfix : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = dfmpyfix($Rss32,$Rtt32)",
+tc_f0e8e832, TypeM>, Enc_a56825, Requires<[HasV67]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let isFP = 1;
+let Uses = [USR];
+}
+def F2_dfmpyhh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += dfmpyhh($Rss32,$Rtt32)",
+tc_0a195f2c, TypeM>, Enc_88c16c, Requires<[HasV67]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let isFP = 1;
+let Uses = [USR];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def F2_dfmpylh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += dfmpylh($Rss32,$Rtt32)",
+tc_01e1be3b, TypeM>, Enc_88c16c, Requires<[HasV67]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def F2_dfmpyll : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = dfmpyll($Rss32,$Rtt32)",
+tc_556f6577, TypeM>, Enc_a56825, Requires<[HasV67]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+}
def F2_dfsub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = dfsub($Rss32,$Rtt32)",
-tc_2f7c551d, TypeM>, Enc_a56825, Requires<[HasV66]> {
+tc_f0e8e832, TypeM>, Enc_a56825, Requires<[HasV66]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -4457,7 +4564,7 @@ def F2_sfadd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfadd($Rs32,$Rt32)",
-tc_3b470976, TypeM>, Enc_5ab2be {
+tc_02fe1c65, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011000;
@@ -4471,7 +4578,7 @@ def F2_sfclass : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Pd4 = sfclass($Rs32,#$Ii)",
-tc_643b4717, TypeS_2op>, Enc_83ee64 {
+tc_a1297125, TypeS_2op>, Enc_83ee64 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000101111;
@@ -4482,7 +4589,7 @@ def F2_sfcmpeq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.eq($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4494,7 +4601,7 @@ def F2_sfcmpge : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.ge($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4506,7 +4613,7 @@ def F2_sfcmpgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.gt($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4518,7 +4625,7 @@ def F2_sfcmpuo : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.uo($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4530,7 +4637,7 @@ def F2_sffixupd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sffixupd($Rs32,$Rt32)",
-tc_3b470976, TypeM>, Enc_5ab2be {
+tc_02fe1c65, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011110;
@@ -4542,7 +4649,7 @@ def F2_sffixupn : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sffixupn($Rs32,$Rt32)",
-tc_3b470976, TypeM>, Enc_5ab2be {
+tc_02fe1c65, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011110;
@@ -4554,7 +4661,7 @@ def F2_sffixupr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sffixupr($Rs32)",
-tc_3a867367, TypeS_2op>, Enc_5e2823 {
+tc_9783714b, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011101;
let hasNewValue = 1;
@@ -4565,7 +4672,7 @@ def F2_sffma : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += sfmpy($Rs32,$Rt32)",
-tc_a58fd5cc, TypeM>, Enc_2ae154 {
+tc_9e72dc89, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4579,7 +4686,7 @@ def F2_sffma_lib : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += sfmpy($Rs32,$Rt32):lib",
-tc_a58fd5cc, TypeM>, Enc_2ae154 {
+tc_9e72dc89, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4593,7 +4700,7 @@ def F2_sffma_sc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4),
"$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale",
-tc_4560740b, TypeM>, Enc_437f33 {
+tc_9edb7c77, TypeM>, Enc_437f33 {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111011;
@@ -4607,7 +4714,7 @@ def F2_sffms : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= sfmpy($Rs32,$Rt32)",
-tc_a58fd5cc, TypeM>, Enc_2ae154 {
+tc_9e72dc89, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4621,7 +4728,7 @@ def F2_sffms_lib : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= sfmpy($Rs32,$Rt32):lib",
-tc_a58fd5cc, TypeM>, Enc_2ae154 {
+tc_9e72dc89, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4635,7 +4742,7 @@ def F2_sfimm_n : HInst<
(outs IntRegs:$Rd32),
(ins u10_0Imm:$Ii),
"$Rd32 = sfmake(#$Ii):neg",
-tc_9e313203, TypeALU64>, Enc_6c9440 {
+tc_65279839, TypeALU64>, Enc_6c9440 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101011001;
let hasNewValue = 1;
@@ -4646,7 +4753,7 @@ def F2_sfimm_p : HInst<
(outs IntRegs:$Rd32),
(ins u10_0Imm:$Ii),
"$Rd32 = sfmake(#$Ii):pos",
-tc_9e313203, TypeALU64>, Enc_6c9440 {
+tc_65279839, TypeALU64>, Enc_6c9440 {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101011000;
let hasNewValue = 1;
@@ -4657,7 +4764,7 @@ def F2_sfinvsqrta : HInst<
(outs IntRegs:$Rd32, PredRegs:$Pe4),
(ins IntRegs:$Rs32),
"$Rd32,$Pe4 = sfinvsqrta($Rs32)",
-tc_b8bffe55, TypeS_2op>, Enc_890909 {
+tc_7f7f45f5, TypeS_2op>, Enc_890909 {
let Inst{13-7} = 0b0000000;
let Inst{31-21} = 0b10001011111;
let hasNewValue = 1;
@@ -4669,7 +4776,7 @@ def F2_sfmax : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmax($Rs32,$Rt32)",
-tc_88b4f13d, TypeM>, Enc_5ab2be {
+tc_c20701f0, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011100;
@@ -4683,7 +4790,7 @@ def F2_sfmin : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmin($Rs32,$Rt32)",
-tc_88b4f13d, TypeM>, Enc_5ab2be {
+tc_c20701f0, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011100;
@@ -4697,7 +4804,7 @@ def F2_sfmpy : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmpy($Rs32,$Rt32)",
-tc_3b470976, TypeM>, Enc_5ab2be {
+tc_02fe1c65, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011010;
@@ -4711,7 +4818,7 @@ def F2_sfrecipa : HInst<
(outs IntRegs:$Rd32, PredRegs:$Pe4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)",
-tc_2ff964b4, TypeM>, Enc_a94f3b {
+tc_f7569068, TypeM>, Enc_a94f3b {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011111;
@@ -4724,7 +4831,7 @@ def F2_sfsub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfsub($Rs32,$Rt32)",
-tc_3b470976, TypeM>, Enc_5ab2be {
+tc_02fe1c65, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011000;
@@ -4737,7 +4844,7 @@ def G4_tfrgcpp : HInst<
(outs DoubleRegs:$Rdd32),
(ins GuestRegs64:$Gss32),
"$Rdd32 = $Gss32",
-tc_0d8f5752, TypeCR>, Enc_0aa344 {
+tc_fae9dfa5, TypeCR>, Enc_0aa344 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101000001;
}
@@ -4745,7 +4852,7 @@ def G4_tfrgcrr : HInst<
(outs IntRegs:$Rd32),
(ins GuestRegs:$Gs32),
"$Rd32 = $Gs32",
-tc_0d8f5752, TypeCR>, Enc_44271f {
+tc_fae9dfa5, TypeCR>, Enc_44271f {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01101010001;
let hasNewValue = 1;
@@ -4755,7 +4862,7 @@ def G4_tfrgpcp : HInst<
(outs GuestRegs64:$Gdd32),
(ins DoubleRegs:$Rss32),
"$Gdd32 = $Rss32",
-tc_bcf98408, TypeCR>, Enc_ed5027 {
+tc_6ae3426b, TypeCR>, Enc_ed5027 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100011000;
let hasNewValue = 1;
@@ -4765,7 +4872,7 @@ def G4_tfrgrcr : HInst<
(outs GuestRegs:$Gd32),
(ins IntRegs:$Rs32),
"$Gd32 = $Rs32",
-tc_bcf98408, TypeCR>, Enc_621fba {
+tc_6ae3426b, TypeCR>, Enc_621fba {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b01100010000;
let hasNewValue = 1;
@@ -4775,7 +4882,7 @@ def J2_call : HInst<
(outs),
(ins a30_2Imm:$Ii),
"call $Ii",
-tc_4ae7b58b, TypeJ>, Enc_81ac1d, PredRel {
+tc_44fffc58, TypeJ>, Enc_81ac1d, PredRel {
let Inst{0-0} = 0b0;
let Inst{31-25} = 0b0101101;
let isCall = 1;
@@ -4797,7 +4904,7 @@ def J2_callf : HInst<
(outs),
(ins PredRegs:$Pu4, a30_2Imm:$Ii),
"if (!$Pu4) call $Ii",
-tc_1d81e60e, TypeJ>, Enc_daea09, PredRel {
+tc_69bfb303, TypeJ>, Enc_daea09, PredRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b1;
@@ -4824,7 +4931,7 @@ def J2_callr : HInst<
(outs),
(ins IntRegs:$Rs32),
"callr $Rs32",
-tc_3bd75825, TypeJ>, Enc_ecbcc8 {
+tc_362b0be2, TypeJ>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010000101;
let isCall = 1;
@@ -4838,7 +4945,7 @@ def J2_callrf : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) callr $Rs32",
-tc_1ad90acd, TypeJ>, Enc_88d4d9 {
+tc_dc51281d, TypeJ>, Enc_88d4d9 {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010001001;
@@ -4856,7 +4963,7 @@ def J2_callrt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) callr $Rs32",
-tc_1ad90acd, TypeJ>, Enc_88d4d9 {
+tc_dc51281d, TypeJ>, Enc_88d4d9 {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010001000;
@@ -4873,7 +4980,7 @@ def J2_callt : HInst<
(outs),
(ins PredRegs:$Pu4, a30_2Imm:$Ii),
"if ($Pu4) call $Ii",
-tc_1d81e60e, TypeJ>, Enc_daea09, PredRel {
+tc_69bfb303, TypeJ>, Enc_daea09, PredRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b0;
@@ -4899,7 +5006,7 @@ def J2_endloop0 : HInst<
(outs),
(ins),
"endloop0",
-tc_1b6f7cec, TypeJ> {
+tc_23708a21, TypeJ> {
let Uses = [LC0, SA0];
let Defs = [LC0, P3, PC, USR];
let isBranch = 1;
@@ -4910,7 +5017,7 @@ def J2_endloop01 : HInst<
(outs),
(ins),
"endloop01",
-tc_1b6f7cec, TypeJ> {
+tc_23708a21, TypeJ> {
let Uses = [LC0, LC1, SA0, SA1];
let Defs = [LC0, LC1, P3, PC, USR];
let isPseudo = 1;
@@ -4919,7 +5026,7 @@ def J2_endloop1 : HInst<
(outs),
(ins),
"endloop1",
-tc_1b6f7cec, TypeJ> {
+tc_23708a21, TypeJ> {
let Uses = [LC1, SA1];
let Defs = [LC1, PC];
let isBranch = 1;
@@ -4930,7 +5037,7 @@ def J2_jump : HInst<
(outs),
(ins b30_2Imm:$Ii),
"jump $Ii",
-tc_ae53734a, TypeJ>, Enc_81ac1d, PredNewRel {
+tc_decdde8a, TypeJ>, Enc_81ac1d, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{31-25} = 0b0101100;
let isTerminator = 1;
@@ -4938,8 +5045,8 @@ let isBranch = 1;
let cofRelax2 = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "imm";
let BaseOpcode = "J2_jump";
+let InputType = "imm";
let isBarrier = 1;
let isPredicable = 1;
let isExtendable = 1;
@@ -4952,7 +5059,7 @@ def J2_jumpf : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4) jump:nt $Ii",
-tc_db2bce9c, TypeJ>, Enc_daea09, PredNewRel {
+tc_56a124a7, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b1;
@@ -4965,8 +5072,8 @@ let cofRelax1 = 1;
let cofRelax2 = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "imm";
let BaseOpcode = "J2_jump";
+let InputType = "imm";
let isTaken = Inst{12};
let isExtendable = 1;
let opExtendable = 1;
@@ -4978,7 +5085,7 @@ def J2_jumpf_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, b15_2Imm:$Ii),
"if (!$Pu4) jump $Ii",
-tc_db2bce9c, TypeMAPPING>, Requires<[HasV60]> {
+tc_56a124a7, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -4986,7 +5093,7 @@ def J2_jumpfnew : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4.new) jump:nt $Ii",
-tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
+tc_eeda4109, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b010;
let Inst{21-21} = 0b1;
@@ -5000,8 +5107,8 @@ let cofRelax1 = 1;
let cofRelax2 = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "imm";
let BaseOpcode = "J2_jump";
+let InputType = "imm";
let isTaken = Inst{12};
let isExtendable = 1;
let opExtendable = 1;
@@ -5013,7 +5120,7 @@ def J2_jumpfnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4.new) jump:t $Ii",
-tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
+tc_eeda4109, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b110;
let Inst{21-21} = 0b1;
@@ -5027,8 +5134,8 @@ let cofRelax1 = 1;
let cofRelax2 = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "imm";
let BaseOpcode = "J2_jump";
+let InputType = "imm";
let isTaken = Inst{12};
let isExtendable = 1;
let opExtendable = 1;
@@ -5040,7 +5147,7 @@ def J2_jumpfpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4) jump:t $Ii",
-tc_cd374165, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
+tc_711c805f, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b100;
let Inst{21-21} = 0b1;
@@ -5053,8 +5160,8 @@ let cofRelax1 = 1;
let cofRelax2 = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "imm";
let BaseOpcode = "J2_jump";
+let InputType = "imm";
let isTaken = Inst{12};
let isExtendable = 1;
let opExtendable = 1;
@@ -5066,7 +5173,7 @@ def J2_jumpr : HInst<
(outs),
(ins IntRegs:$Rs32),
"jumpr $Rs32",
-tc_d5b7b0c1, TypeJ>, Enc_ecbcc8, PredNewRel {
+tc_60e324ff, TypeJ>, Enc_ecbcc8, PredNewRel {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010010100;
let isTerminator = 1;
@@ -5074,8 +5181,8 @@ let isIndirectBranch = 1;
let isBranch = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "reg";
let BaseOpcode = "J2_jumpr";
+let InputType = "reg";
let isBarrier = 1;
let isPredicable = 1;
}
@@ -5083,7 +5190,7 @@ def J2_jumprf : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) jumpr:nt $Rs32",
-tc_85c9c08f, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_2f573607, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010011011;
@@ -5094,15 +5201,15 @@ let isIndirectBranch = 1;
let isBranch = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "reg";
let BaseOpcode = "J2_jumpr";
+let InputType = "reg";
let isTaken = Inst{12};
}
def J2_jumprf_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) jumpr $Rs32",
-tc_85c9c08f, TypeMAPPING>, Requires<[HasV60]> {
+tc_2f573607, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5110,7 +5217,7 @@ def J2_jumprfnew : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) jumpr:nt $Rs32",
-tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_ed03645c, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0010;
let Inst{31-21} = 0b01010011011;
@@ -5122,15 +5229,15 @@ let isBranch = 1;
let isPredicatedNew = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "reg";
let BaseOpcode = "J2_jumpr";
+let InputType = "reg";
let isTaken = Inst{12};
}
def J2_jumprfnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4.new) jumpr:t $Rs32",
-tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_ed03645c, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0110;
let Inst{31-21} = 0b01010011011;
@@ -5142,15 +5249,15 @@ let isBranch = 1;
let isPredicatedNew = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "reg";
let BaseOpcode = "J2_jumpr";
+let InputType = "reg";
let isTaken = Inst{12};
}
def J2_jumprfpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) jumpr:t $Rs32",
-tc_e78647bd, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
+tc_42ff66ba, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0100;
let Inst{31-21} = 0b01010011011;
@@ -5161,15 +5268,15 @@ let isIndirectBranch = 1;
let isBranch = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "reg";
let BaseOpcode = "J2_jumpr";
+let InputType = "reg";
let isTaken = Inst{12};
}
def J2_jumprgtez : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32>=#0) jump:nt $Ii",
-tc_d9d43ecb, TypeCR>, Enc_0fa531 {
+tc_57a55b54, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000101;
@@ -5187,7 +5294,7 @@ def J2_jumprgtezpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32>=#0) jump:t $Ii",
-tc_d9d43ecb, TypeCR>, Enc_0fa531 {
+tc_57a55b54, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000101;
@@ -5205,7 +5312,7 @@ def J2_jumprltez : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32<=#0) jump:nt $Ii",
-tc_d9d43ecb, TypeCR>, Enc_0fa531 {
+tc_57a55b54, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000111;
@@ -5223,7 +5330,7 @@ def J2_jumprltezpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32<=#0) jump:t $Ii",
-tc_d9d43ecb, TypeCR>, Enc_0fa531 {
+tc_57a55b54, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000111;
@@ -5241,7 +5348,7 @@ def J2_jumprnz : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32==#0) jump:nt $Ii",
-tc_d9d43ecb, TypeCR>, Enc_0fa531 {
+tc_57a55b54, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000110;
@@ -5259,7 +5366,7 @@ def J2_jumprnzpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32==#0) jump:t $Ii",
-tc_d9d43ecb, TypeCR>, Enc_0fa531 {
+tc_57a55b54, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000110;
@@ -5277,7 +5384,7 @@ def J2_jumprt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) jumpr:nt $Rs32",
-tc_85c9c08f, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_2f573607, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b01010011010;
@@ -5287,15 +5394,15 @@ let isIndirectBranch = 1;
let isBranch = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "reg";
let BaseOpcode = "J2_jumpr";
+let InputType = "reg";
let isTaken = Inst{12};
}
def J2_jumprt_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) jumpr $Rs32",
-tc_85c9c08f, TypeMAPPING>, Requires<[HasV60]> {
+tc_2f573607, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5303,7 +5410,7 @@ def J2_jumprtnew : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) jumpr:nt $Rs32",
-tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_ed03645c, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0010;
let Inst{31-21} = 0b01010011010;
@@ -5314,15 +5421,15 @@ let isBranch = 1;
let isPredicatedNew = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "reg";
let BaseOpcode = "J2_jumpr";
+let InputType = "reg";
let isTaken = Inst{12};
}
def J2_jumprtnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4.new) jumpr:t $Rs32",
-tc_b51dc29a, TypeJ>, Enc_88d4d9, PredNewRel {
+tc_ed03645c, TypeJ>, Enc_88d4d9, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0110;
let Inst{31-21} = 0b01010011010;
@@ -5333,15 +5440,15 @@ let isBranch = 1;
let isPredicatedNew = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "reg";
let BaseOpcode = "J2_jumpr";
+let InputType = "reg";
let isTaken = Inst{12};
}
def J2_jumprtpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) jumpr:t $Rs32",
-tc_e78647bd, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
+tc_42ff66ba, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0100;
let Inst{31-21} = 0b01010011010;
@@ -5351,15 +5458,15 @@ let isIndirectBranch = 1;
let isBranch = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "reg";
let BaseOpcode = "J2_jumpr";
+let InputType = "reg";
let isTaken = Inst{12};
}
def J2_jumprz : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32!=#0) jump:nt $Ii",
-tc_d9d43ecb, TypeCR>, Enc_0fa531 {
+tc_57a55b54, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b0;
let Inst{31-22} = 0b0110000100;
@@ -5377,7 +5484,7 @@ def J2_jumprzpt : HInst<
(outs),
(ins IntRegs:$Rs32, b13_2Imm:$Ii),
"if ($Rs32!=#0) jump:t $Ii",
-tc_d9d43ecb, TypeCR>, Enc_0fa531 {
+tc_57a55b54, TypeCR>, Enc_0fa531 {
let Inst{0-0} = 0b0;
let Inst{12-12} = 0b1;
let Inst{31-22} = 0b0110000100;
@@ -5395,7 +5502,7 @@ def J2_jumpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4) jump:nt $Ii",
-tc_db2bce9c, TypeJ>, Enc_daea09, PredNewRel {
+tc_56a124a7, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b000;
let Inst{21-21} = 0b0;
@@ -5407,8 +5514,8 @@ let cofRelax1 = 1;
let cofRelax2 = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "imm";
let BaseOpcode = "J2_jump";
+let InputType = "imm";
let isTaken = Inst{12};
let isExtendable = 1;
let opExtendable = 1;
@@ -5420,7 +5527,7 @@ def J2_jumpt_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, b15_2Imm:$Ii),
"if ($Pu4) jump $Ii",
-tc_db2bce9c, TypeMAPPING>, Requires<[HasV60]> {
+tc_56a124a7, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5428,7 +5535,7 @@ def J2_jumptnew : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4.new) jump:nt $Ii",
-tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
+tc_eeda4109, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b010;
let Inst{21-21} = 0b0;
@@ -5441,8 +5548,8 @@ let cofRelax1 = 1;
let cofRelax2 = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "imm";
let BaseOpcode = "J2_jump";
+let InputType = "imm";
let isTaken = Inst{12};
let isExtendable = 1;
let opExtendable = 1;
@@ -5454,7 +5561,7 @@ def J2_jumptnewpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4.new) jump:t $Ii",
-tc_20cdee80, TypeJ>, Enc_daea09, PredNewRel {
+tc_eeda4109, TypeJ>, Enc_daea09, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b110;
let Inst{21-21} = 0b0;
@@ -5467,8 +5574,8 @@ let cofRelax1 = 1;
let cofRelax2 = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "imm";
let BaseOpcode = "J2_jump";
+let InputType = "imm";
let isTaken = Inst{12};
let isExtendable = 1;
let opExtendable = 1;
@@ -5480,7 +5587,7 @@ def J2_jumptpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4) jump:t $Ii",
-tc_cd374165, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
+tc_711c805f, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b100;
let Inst{21-21} = 0b0;
@@ -5492,8 +5599,8 @@ let cofRelax1 = 1;
let cofRelax2 = 1;
let cofMax1 = 1;
let Defs = [PC];
-let InputType = "imm";
let BaseOpcode = "J2_jump";
+let InputType = "imm";
let isTaken = Inst{12};
let isExtendable = 1;
let opExtendable = 1;
@@ -5505,7 +5612,7 @@ def J2_loop0i : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"loop0($Ii,#$II)",
-tc_a9d88b22, TypeCR>, Enc_4dc228 {
+tc_1248597c, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001000;
@@ -5522,7 +5629,7 @@ def J2_loop0r : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"loop0($Ii,$Rs32)",
-tc_df3319ed, TypeCR>, Enc_864a5a {
+tc_9406230a, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5540,7 +5647,7 @@ def J2_loop1i : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"loop1($Ii,#$II)",
-tc_a9d88b22, TypeCR>, Enc_4dc228 {
+tc_1248597c, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001001;
@@ -5557,7 +5664,7 @@ def J2_loop1r : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"loop1($Ii,$Rs32)",
-tc_df3319ed, TypeCR>, Enc_864a5a {
+tc_9406230a, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5575,7 +5682,7 @@ def J2_pause : HInst<
(outs),
(ins u8_0Imm:$Ii),
"pause(#$Ii)",
-tc_8d9d0154, TypeJ>, Enc_a51a9a {
+tc_d57d649c, TypeJ>, Enc_a51a9a {
let Inst{1-0} = 0b00;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5586,7 +5693,7 @@ def J2_ploop1si : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"p3 = sp1loop0($Ii,#$II)",
-tc_1c4528a2, TypeCR>, Enc_4dc228 {
+tc_4abdbdc6, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001101;
@@ -5604,7 +5711,7 @@ def J2_ploop1sr : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"p3 = sp1loop0($Ii,$Rs32)",
-tc_32779c6f, TypeCR>, Enc_864a5a {
+tc_6d861a95, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5623,7 +5730,7 @@ def J2_ploop2si : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"p3 = sp2loop0($Ii,#$II)",
-tc_1c4528a2, TypeCR>, Enc_4dc228 {
+tc_4abdbdc6, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001110;
@@ -5641,7 +5748,7 @@ def J2_ploop2sr : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"p3 = sp2loop0($Ii,$Rs32)",
-tc_32779c6f, TypeCR>, Enc_864a5a {
+tc_6d861a95, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5660,7 +5767,7 @@ def J2_ploop3si : HInst<
(outs),
(ins b30_2Imm:$Ii, u10_0Imm:$II),
"p3 = sp3loop0($Ii,#$II)",
-tc_1c4528a2, TypeCR>, Enc_4dc228 {
+tc_4abdbdc6, TypeCR>, Enc_4dc228 {
let Inst{2-2} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01101001111;
@@ -5678,7 +5785,7 @@ def J2_ploop3sr : HInst<
(outs),
(ins b30_2Imm:$Ii, IntRegs:$Rs32),
"p3 = sp3loop0($Ii,$Rs32)",
-tc_32779c6f, TypeCR>, Enc_864a5a {
+tc_6d861a95, TypeCR>, Enc_864a5a {
let Inst{2-0} = 0b000;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5697,7 +5804,7 @@ def J2_trap0 : HInst<
(outs),
(ins u8_0Imm:$Ii),
"trap0(#$Ii)",
-tc_fc3999b4, TypeJ>, Enc_a51a9a {
+tc_45f9d1be, TypeJ>, Enc_a51a9a {
let Inst{1-0} = 0b00;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5709,7 +5816,7 @@ def J2_trap1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u8_0Imm:$Ii),
"trap1($Rx32,#$Ii)",
-tc_b9e09e03, TypeJ>, Enc_33f8ba {
+tc_53c851ab, TypeJ>, Enc_33f8ba, Requires<[HasV65]> {
let Inst{1-0} = 0b00;
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
@@ -5726,7 +5833,7 @@ def J2_trap1_noregmap : HInst<
(outs),
(ins u8_0Imm:$Ii),
"trap1(#$Ii)",
-tc_b9e09e03, TypeMAPPING> {
+tc_53c851ab, TypeMAPPING>, Requires<[HasV65]> {
let hasSideEffects = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -5735,7 +5842,7 @@ def J4_cmpeq_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -5761,7 +5868,7 @@ def J4_cmpeq_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -5787,7 +5894,7 @@ def J4_cmpeq_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010001;
@@ -5813,7 +5920,7 @@ def J4_cmpeq_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010001;
@@ -5839,7 +5946,7 @@ def J4_cmpeq_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010001;
@@ -5865,7 +5972,7 @@ def J4_cmpeq_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010001;
@@ -5891,7 +5998,7 @@ def J4_cmpeq_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -5916,7 +6023,7 @@ def J4_cmpeq_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -5941,7 +6048,7 @@ def J4_cmpeq_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010000;
@@ -5966,7 +6073,7 @@ def J4_cmpeq_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010000;
@@ -5991,7 +6098,7 @@ def J4_cmpeq_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010000;
@@ -6016,7 +6123,7 @@ def J4_cmpeq_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010000;
@@ -6041,7 +6148,7 @@ def J4_cmpeqi_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6067,7 +6174,7 @@ def J4_cmpeqi_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$II)) jump:t $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6093,7 +6200,7 @@ def J4_cmpeqi_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000001;
@@ -6119,7 +6226,7 @@ def J4_cmpeqi_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000001;
@@ -6145,7 +6252,7 @@ def J4_cmpeqi_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001001;
@@ -6171,7 +6278,7 @@ def J4_cmpeqi_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001001;
@@ -6197,7 +6304,7 @@ def J4_cmpeqi_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6222,7 +6329,7 @@ def J4_cmpeqi_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$II)) jump:t $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6247,7 +6354,7 @@ def J4_cmpeqi_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000000;
@@ -6272,7 +6379,7 @@ def J4_cmpeqi_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000000;
@@ -6297,7 +6404,7 @@ def J4_cmpeqi_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001000;
@@ -6322,7 +6429,7 @@ def J4_cmpeqi_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001000;
@@ -6347,7 +6454,7 @@ def J4_cmpeqn1_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_e90a15, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_e90a15, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -6373,7 +6480,7 @@ def J4_cmpeqn1_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_5a18b3, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_5a18b3, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -6399,7 +6506,7 @@ def J4_cmpeqn1_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_1de724, PredRel {
+tc_24f426ab, TypeCJ>, Enc_1de724, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001000111;
@@ -6425,7 +6532,7 @@ def J4_cmpeqn1_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14640c, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14640c, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001000111;
@@ -6451,7 +6558,7 @@ def J4_cmpeqn1_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_668704, PredRel {
+tc_24f426ab, TypeCJ>, Enc_668704, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001001111;
@@ -6477,7 +6584,7 @@ def J4_cmpeqn1_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_800e04, PredRel {
+tc_24f426ab, TypeCJ>, Enc_800e04, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001001111;
@@ -6503,7 +6610,7 @@ def J4_cmpeqn1_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_4aca3a, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_4aca3a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -6528,7 +6635,7 @@ def J4_cmpeqn1_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_f7ea77, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_f7ea77, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -6553,7 +6660,7 @@ def J4_cmpeqn1_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_405228, PredRel {
+tc_24f426ab, TypeCJ>, Enc_405228, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001000110;
@@ -6578,7 +6685,7 @@ def J4_cmpeqn1_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_3a2484, PredRel {
+tc_24f426ab, TypeCJ>, Enc_3a2484, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001000110;
@@ -6603,7 +6710,7 @@ def J4_cmpeqn1_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_736575, PredRel {
+tc_24f426ab, TypeCJ>, Enc_736575, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{31-22} = 0b0001001110;
@@ -6628,7 +6735,7 @@ def J4_cmpeqn1_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_8e583a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_8e583a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{31-22} = 0b0001001110;
@@ -6653,7 +6760,7 @@ def J4_cmpgt_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6679,7 +6786,7 @@ def J4_cmpgt_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6705,7 +6812,7 @@ def J4_cmpgt_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010011;
@@ -6731,7 +6838,7 @@ def J4_cmpgt_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010011;
@@ -6757,7 +6864,7 @@ def J4_cmpgt_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010011;
@@ -6783,7 +6890,7 @@ def J4_cmpgt_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010011;
@@ -6809,7 +6916,7 @@ def J4_cmpgt_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6834,7 +6941,7 @@ def J4_cmpgt_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -6859,7 +6966,7 @@ def J4_cmpgt_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010010;
@@ -6884,7 +6991,7 @@ def J4_cmpgt_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010010;
@@ -6909,7 +7016,7 @@ def J4_cmpgt_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010010;
@@ -6934,7 +7041,7 @@ def J4_cmpgt_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010010;
@@ -6959,7 +7066,7 @@ def J4_cmpgti_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -6985,7 +7092,7 @@ def J4_cmpgti_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$II)) jump:t $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7011,7 +7118,7 @@ def J4_cmpgti_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000011;
@@ -7037,7 +7144,7 @@ def J4_cmpgti_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000011;
@@ -7063,7 +7170,7 @@ def J4_cmpgti_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001011;
@@ -7089,7 +7196,7 @@ def J4_cmpgti_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001011;
@@ -7115,7 +7222,7 @@ def J4_cmpgti_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7140,7 +7247,7 @@ def J4_cmpgti_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$II)) jump:t $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7165,7 +7272,7 @@ def J4_cmpgti_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000010;
@@ -7190,7 +7297,7 @@ def J4_cmpgti_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000010;
@@ -7215,7 +7322,7 @@ def J4_cmpgti_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001010;
@@ -7240,7 +7347,7 @@ def J4_cmpgti_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001010;
@@ -7265,7 +7372,7 @@ def J4_cmpgtn1_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_3694bd, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_3694bd, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -7291,7 +7398,7 @@ def J4_cmpgtn1_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (!cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_a6853f, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_a6853f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -7317,7 +7424,7 @@ def J4_cmpgtn1_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_a42857, PredRel {
+tc_24f426ab, TypeCJ>, Enc_a42857, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001000111;
@@ -7343,7 +7450,7 @@ def J4_cmpgtn1_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_f6fe0b, PredRel {
+tc_24f426ab, TypeCJ>, Enc_f6fe0b, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001000111;
@@ -7369,7 +7476,7 @@ def J4_cmpgtn1_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_3e3989, PredRel {
+tc_24f426ab, TypeCJ>, Enc_3e3989, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001001111;
@@ -7395,7 +7502,7 @@ def J4_cmpgtn1_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_b909d2, PredRel {
+tc_24f426ab, TypeCJ>, Enc_b909d2, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001001111;
@@ -7421,7 +7528,7 @@ def J4_cmpgtn1_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_f82302, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_f82302, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -7446,7 +7553,7 @@ def J4_cmpgtn1_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
"if (cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_6413b6, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_6413b6, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -7471,7 +7578,7 @@ def J4_cmpgtn1_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_b78edd, PredRel {
+tc_24f426ab, TypeCJ>, Enc_b78edd, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001000110;
@@ -7496,7 +7603,7 @@ def J4_cmpgtn1_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_041d7b, PredRel {
+tc_24f426ab, TypeCJ>, Enc_041d7b, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001000110;
@@ -7521,7 +7628,7 @@ def J4_cmpgtn1_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_b1e1fb, PredRel {
+tc_24f426ab, TypeCJ>, Enc_b1e1fb, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000001;
let Inst{31-22} = 0b0001001110;
@@ -7546,7 +7653,7 @@ def J4_cmpgtn1_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
"p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_178717, PredRel {
+tc_24f426ab, TypeCJ>, Enc_178717, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100001;
let Inst{31-22} = 0b0001001110;
@@ -7571,7 +7678,7 @@ def J4_cmpgtu_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7597,7 +7704,7 @@ def J4_cmpgtu_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7623,7 +7730,7 @@ def J4_cmpgtu_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010101;
@@ -7649,7 +7756,7 @@ def J4_cmpgtu_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010101;
@@ -7675,7 +7782,7 @@ def J4_cmpgtu_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010101;
@@ -7701,7 +7808,7 @@ def J4_cmpgtu_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010101;
@@ -7727,7 +7834,7 @@ def J4_cmpgtu_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7752,7 +7859,7 @@ def J4_cmpgtu_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
-tc_9bfd761f, TypeNCJ>, Enc_c9a18e, PredRel {
+tc_24e109c7, TypeNCJ>, Enc_c9a18e, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7777,7 +7884,7 @@ def J4_cmpgtu_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001010100;
@@ -7802,7 +7909,7 @@ def J4_cmpgtu_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b10;
let Inst{31-22} = 0b0001010100;
@@ -7827,7 +7934,7 @@ def J4_cmpgtu_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-22} = 0b0001010100;
@@ -7852,7 +7959,7 @@ def J4_cmpgtu_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:t $Ii",
-tc_56336eb0, TypeCJ>, Enc_6a5972, PredRel {
+tc_9e27f2f9, TypeCJ>, Enc_6a5972, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b11;
let Inst{31-22} = 0b0001010100;
@@ -7877,7 +7984,7 @@ def J4_cmpgtui_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -7903,7 +8010,7 @@ def J4_cmpgtui_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (!cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -7929,7 +8036,7 @@ def J4_cmpgtui_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000101;
@@ -7955,7 +8062,7 @@ def J4_cmpgtui_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000101;
@@ -7981,7 +8088,7 @@ def J4_cmpgtui_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001101;
@@ -8007,7 +8114,7 @@ def J4_cmpgtui_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001101;
@@ -8033,7 +8140,7 @@ def J4_cmpgtui_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8058,7 +8165,7 @@ def J4_cmpgtui_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
"if (cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
-tc_bd8382d1, TypeNCJ>, Enc_eafd18, PredRel {
+tc_f6e2aff9, TypeNCJ>, Enc_eafd18, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8083,7 +8190,7 @@ def J4_cmpgtui_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001000100;
@@ -8108,7 +8215,7 @@ def J4_cmpgtui_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001000100;
@@ -8133,7 +8240,7 @@ def J4_cmpgtui_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:nt $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-22} = 0b0001001100;
@@ -8158,7 +8265,7 @@ def J4_cmpgtui_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
"p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:t $Ii",
-tc_3d495a39, TypeCJ>, Enc_14d27a, PredRel {
+tc_24f426ab, TypeCJ>, Enc_14d27a, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-22} = 0b0001001100;
@@ -8183,7 +8290,7 @@ def J4_cmplt_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
+tc_975a4e54, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8209,7 +8316,7 @@ def J4_cmplt_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
-tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
+tc_975a4e54, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8235,7 +8342,7 @@ def J4_cmplt_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
+tc_975a4e54, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8260,7 +8367,7 @@ def J4_cmplt_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
-tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
+tc_975a4e54, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8285,7 +8392,7 @@ def J4_cmpltu_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
+tc_975a4e54, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8311,7 +8418,7 @@ def J4_cmpltu_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
-tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
+tc_975a4e54, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8337,7 +8444,7 @@ def J4_cmpltu_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
-tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
+tc_975a4e54, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b0;
let Inst{19-19} = 0b0;
@@ -8362,7 +8469,7 @@ def J4_cmpltu_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
"if (cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
-tc_b343892a, TypeNCJ>, Enc_5de85f, PredRel {
+tc_975a4e54, TypeNCJ>, Enc_5de85f, PredRel {
let Inst{0-0} = 0b0;
let Inst{13-13} = 0b1;
let Inst{19-19} = 0b0;
@@ -8387,7 +8494,7 @@ def J4_hintjumpr : HInst<
(outs),
(ins IntRegs:$Rs32),
"hintjr($Rs32)",
-tc_d5b7b0c1, TypeJ>, Enc_ecbcc8 {
+tc_60e324ff, TypeJ>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010010101;
let isTerminator = 1;
@@ -8399,7 +8506,7 @@ def J4_jumpseti : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u6_0Imm:$II, b30_2Imm:$Ii),
"$Rd16 = #$II ; jump $Ii",
-tc_0663f615, TypeCJ>, Enc_9e4c3f {
+tc_5502c366, TypeCJ>, Enc_9e4c3f {
let Inst{0-0} = 0b0;
let Inst{31-22} = 0b0001011000;
let hasNewValue = 1;
@@ -8419,7 +8526,7 @@ def J4_jumpsetr : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"$Rd16 = $Rs16 ; jump $Ii",
-tc_0663f615, TypeCJ>, Enc_66bce1 {
+tc_5502c366, TypeCJ>, Enc_66bce1 {
let Inst{0-0} = 0b0;
let Inst{13-12} = 0b00;
let Inst{31-22} = 0b0001011100;
@@ -8440,7 +8547,7 @@ def J4_tstbit0_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!tstbit($Ns8.new,#0)) jump:nt $Ii",
-tc_8c945be0, TypeNCJ>, Enc_69d63b {
+tc_7b9187d3, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -8465,7 +8572,7 @@ def J4_tstbit0_f_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (!tstbit($Ns8.new,#0)) jump:t $Ii",
-tc_8c945be0, TypeNCJ>, Enc_69d63b {
+tc_7b9187d3, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -8490,7 +8597,7 @@ def J4_tstbit0_fp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (!p0.new) jump:nt $Ii",
-tc_2332b92e, TypeCJ>, Enc_ad1c74 {
+tc_f999c66e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001000111;
@@ -8515,7 +8622,7 @@ def J4_tstbit0_fp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (!p0.new) jump:t $Ii",
-tc_2332b92e, TypeCJ>, Enc_ad1c74 {
+tc_f999c66e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001000111;
@@ -8540,7 +8647,7 @@ def J4_tstbit0_fp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (!p1.new) jump:nt $Ii",
-tc_2332b92e, TypeCJ>, Enc_ad1c74 {
+tc_f999c66e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001001111;
@@ -8565,7 +8672,7 @@ def J4_tstbit0_fp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (!p1.new) jump:t $Ii",
-tc_2332b92e, TypeCJ>, Enc_ad1c74 {
+tc_f999c66e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001001111;
@@ -8590,7 +8697,7 @@ def J4_tstbit0_t_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (tstbit($Ns8.new,#0)) jump:nt $Ii",
-tc_8c945be0, TypeNCJ>, Enc_69d63b {
+tc_7b9187d3, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000000;
let Inst{19-19} = 0b0;
@@ -8614,7 +8721,7 @@ def J4_tstbit0_t_jumpnv_t : HInst<
(outs),
(ins IntRegs:$Ns8, b30_2Imm:$Ii),
"if (tstbit($Ns8.new,#0)) jump:t $Ii",
-tc_8c945be0, TypeNCJ>, Enc_69d63b {
+tc_7b9187d3, TypeNCJ>, Enc_69d63b {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100000;
let Inst{19-19} = 0b0;
@@ -8638,7 +8745,7 @@ def J4_tstbit0_tp0_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (p0.new) jump:nt $Ii",
-tc_2332b92e, TypeCJ>, Enc_ad1c74 {
+tc_f999c66e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001000110;
@@ -8662,7 +8769,7 @@ def J4_tstbit0_tp0_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p0 = tstbit($Rs16,#0); if (p0.new) jump:t $Ii",
-tc_2332b92e, TypeCJ>, Enc_ad1c74 {
+tc_f999c66e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001000110;
@@ -8686,7 +8793,7 @@ def J4_tstbit0_tp1_jump_nt : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (p1.new) jump:nt $Ii",
-tc_2332b92e, TypeCJ>, Enc_ad1c74 {
+tc_f999c66e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b000011;
let Inst{31-22} = 0b0001001110;
@@ -8710,7 +8817,7 @@ def J4_tstbit0_tp1_jump_t : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
"p1 = tstbit($Rs16,#0); if (p1.new) jump:t $Ii",
-tc_2332b92e, TypeCJ>, Enc_ad1c74 {
+tc_f999c66e, TypeCJ>, Enc_ad1c74 {
let Inst{0-0} = 0b0;
let Inst{13-8} = 0b100011;
let Inst{31-22} = 0b0001001110;
@@ -8734,7 +8841,7 @@ def L2_deallocframe : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = deallocframe($Rs32):raw",
-tc_15aa71c5, TypeLD>, Enc_3a3d62 {
+tc_e9170fb7, TypeLD>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10010000000;
let accessSize = DoubleWordAccess;
@@ -8746,7 +8853,7 @@ def L2_loadalignb_io : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Ryy32 = memb_fifo($Rs32+#$Ii)",
-tc_5ef37dc4, TypeLD>, Enc_a27588 {
+tc_fedb7e19, TypeLD>, Enc_a27588 {
let Inst{24-21} = 0b0100;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -8763,7 +8870,7 @@ def L2_loadalignb_pbr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++$Mu2:brev)",
-tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
+tc_1c7522a8, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110100;
let addrMode = PostInc;
@@ -8775,7 +8882,7 @@ def L2_loadalignb_pci : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++#$Ii:circ($Mu2))",
-tc_785f65a7, TypeLD>, Enc_74aef2 {
+tc_76bb5435, TypeLD>, Enc_74aef2 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000100;
let addrMode = PostInc;
@@ -8788,7 +8895,7 @@ def L2_loadalignb_pcr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++I:circ($Mu2))",
-tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
+tc_1c7522a8, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000100;
let addrMode = PostInc;
@@ -8801,7 +8908,7 @@ def L2_loadalignb_pi : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii),
"$Ryy32 = memb_fifo($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_6b197f {
+tc_1c7522a8, TypeLD>, Enc_6b197f {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010100;
let addrMode = PostInc;
@@ -8813,7 +8920,7 @@ def L2_loadalignb_pr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memb_fifo($Rx32++$Mu2)",
-tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
+tc_1c7522a8, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100100;
let addrMode = PostInc;
@@ -8825,7 +8932,7 @@ def L2_loadalignb_zomap : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
"$Ryy32 = memb_fifo($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let Constraints = "$Ryy32 = $Ryy32in";
@@ -8834,7 +8941,7 @@ def L2_loadalignh_io : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s31_1Imm:$Ii),
"$Ryy32 = memh_fifo($Rs32+#$Ii)",
-tc_5ef37dc4, TypeLD>, Enc_5cd7e9 {
+tc_fedb7e19, TypeLD>, Enc_5cd7e9 {
let Inst{24-21} = 0b0010;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -8851,7 +8958,7 @@ def L2_loadalignh_pbr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++$Mu2:brev)",
-tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
+tc_1c7522a8, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110010;
let addrMode = PostInc;
@@ -8863,7 +8970,7 @@ def L2_loadalignh_pci : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++#$Ii:circ($Mu2))",
-tc_785f65a7, TypeLD>, Enc_9e2e1c {
+tc_76bb5435, TypeLD>, Enc_9e2e1c {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000010;
let addrMode = PostInc;
@@ -8876,7 +8983,7 @@ def L2_loadalignh_pcr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++I:circ($Mu2))",
-tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
+tc_1c7522a8, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000010;
let addrMode = PostInc;
@@ -8889,7 +8996,7 @@ def L2_loadalignh_pi : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Ryy32 = memh_fifo($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_bd1cbc {
+tc_1c7522a8, TypeLD>, Enc_bd1cbc {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010010;
let addrMode = PostInc;
@@ -8901,7 +9008,7 @@ def L2_loadalignh_pr : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
"$Ryy32 = memh_fifo($Rx32++$Mu2)",
-tc_3c76b0ff, TypeLD>, Enc_1f5d8f {
+tc_1c7522a8, TypeLD>, Enc_1f5d8f {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100010;
let addrMode = PostInc;
@@ -8913,7 +9020,7 @@ def L2_loadalignh_zomap : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
"$Ryy32 = memh_fifo($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let Constraints = "$Ryy32 = $Ryy32in";
@@ -8922,7 +9029,7 @@ def L2_loadbsw2_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = membh($Rs32+#$Ii)",
-tc_17e0d2cd, TypeLD>, Enc_de0214 {
+tc_4222e6bf, TypeLD>, Enc_de0214 {
let Inst{24-21} = 0b0001;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -8940,7 +9047,7 @@ def L2_loadbsw2_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++$Mu2:brev)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110001;
let hasNewValue = 1;
@@ -8954,7 +9061,7 @@ def L2_loadbsw2_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++#$Ii:circ($Mu2))",
-tc_e93a3d71, TypeLD>, Enc_e83554 {
+tc_5ceb2f9e, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000001;
let hasNewValue = 1;
@@ -8969,7 +9076,7 @@ def L2_loadbsw2_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++I:circ($Mu2))",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000001;
let hasNewValue = 1;
@@ -8984,7 +9091,7 @@ def L2_loadbsw2_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = membh($Rx32++#$Ii)",
-tc_44d3da28, TypeLD>, Enc_152467 {
+tc_075c8dd8, TypeLD>, Enc_152467 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010001;
let hasNewValue = 1;
@@ -8998,7 +9105,7 @@ def L2_loadbsw2_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = membh($Rx32++$Mu2)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100001;
let hasNewValue = 1;
@@ -9012,7 +9119,7 @@ def L2_loadbsw2_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = membh($Rs32)",
-tc_17e0d2cd, TypeMAPPING> {
+tc_4222e6bf, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9022,7 +9129,7 @@ def L2_loadbsw4_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s30_2Imm:$Ii),
"$Rdd32 = membh($Rs32+#$Ii)",
-tc_17e0d2cd, TypeLD>, Enc_2d7491 {
+tc_4222e6bf, TypeLD>, Enc_2d7491 {
let Inst{24-21} = 0b0111;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -9038,7 +9145,7 @@ def L2_loadbsw4_pbr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++$Mu2:brev)",
-tc_44d3da28, TypeLD>, Enc_7eee72 {
+tc_075c8dd8, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110111;
let addrMode = PostInc;
@@ -9050,7 +9157,7 @@ def L2_loadbsw4_pci : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++#$Ii:circ($Mu2))",
-tc_e93a3d71, TypeLD>, Enc_70b24b {
+tc_5ceb2f9e, TypeLD>, Enc_70b24b {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000111;
let addrMode = PostInc;
@@ -9063,7 +9170,7 @@ def L2_loadbsw4_pcr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++I:circ($Mu2))",
-tc_44d3da28, TypeLD>, Enc_7eee72 {
+tc_075c8dd8, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000111;
let addrMode = PostInc;
@@ -9076,7 +9183,7 @@ def L2_loadbsw4_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
"$Rdd32 = membh($Rx32++#$Ii)",
-tc_44d3da28, TypeLD>, Enc_71f1b4 {
+tc_075c8dd8, TypeLD>, Enc_71f1b4 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010111;
let addrMode = PostInc;
@@ -9088,7 +9195,7 @@ def L2_loadbsw4_pr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = membh($Rx32++$Mu2)",
-tc_44d3da28, TypeLD>, Enc_7eee72 {
+tc_075c8dd8, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100111;
let addrMode = PostInc;
@@ -9100,7 +9207,7 @@ def L2_loadbsw4_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = membh($Rs32)",
-tc_17e0d2cd, TypeMAPPING> {
+tc_4222e6bf, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -9108,7 +9215,7 @@ def L2_loadbzw2_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = memubh($Rs32+#$Ii)",
-tc_17e0d2cd, TypeLD>, Enc_de0214 {
+tc_4222e6bf, TypeLD>, Enc_de0214 {
let Inst{24-21} = 0b0011;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9126,7 +9233,7 @@ def L2_loadbzw2_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++$Mu2:brev)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110011;
let hasNewValue = 1;
@@ -9140,7 +9247,7 @@ def L2_loadbzw2_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++#$Ii:circ($Mu2))",
-tc_e93a3d71, TypeLD>, Enc_e83554 {
+tc_5ceb2f9e, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000011;
let hasNewValue = 1;
@@ -9155,7 +9262,7 @@ def L2_loadbzw2_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++I:circ($Mu2))",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000011;
let hasNewValue = 1;
@@ -9170,7 +9277,7 @@ def L2_loadbzw2_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = memubh($Rx32++#$Ii)",
-tc_44d3da28, TypeLD>, Enc_152467 {
+tc_075c8dd8, TypeLD>, Enc_152467 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010011;
let hasNewValue = 1;
@@ -9184,7 +9291,7 @@ def L2_loadbzw2_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memubh($Rx32++$Mu2)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100011;
let hasNewValue = 1;
@@ -9198,7 +9305,7 @@ def L2_loadbzw2_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memubh($Rs32)",
-tc_17e0d2cd, TypeMAPPING> {
+tc_4222e6bf, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9208,7 +9315,7 @@ def L2_loadbzw4_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s30_2Imm:$Ii),
"$Rdd32 = memubh($Rs32+#$Ii)",
-tc_17e0d2cd, TypeLD>, Enc_2d7491 {
+tc_4222e6bf, TypeLD>, Enc_2d7491 {
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
@@ -9224,7 +9331,7 @@ def L2_loadbzw4_pbr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++$Mu2:brev)",
-tc_44d3da28, TypeLD>, Enc_7eee72 {
+tc_075c8dd8, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011110101;
let addrMode = PostInc;
@@ -9236,7 +9343,7 @@ def L2_loadbzw4_pci : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++#$Ii:circ($Mu2))",
-tc_e93a3d71, TypeLD>, Enc_70b24b {
+tc_5ceb2f9e, TypeLD>, Enc_70b24b {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011000101;
let addrMode = PostInc;
@@ -9249,7 +9356,7 @@ def L2_loadbzw4_pcr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++I:circ($Mu2))",
-tc_44d3da28, TypeLD>, Enc_7eee72 {
+tc_075c8dd8, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011000101;
let addrMode = PostInc;
@@ -9262,7 +9369,7 @@ def L2_loadbzw4_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
"$Rdd32 = memubh($Rx32++#$Ii)",
-tc_44d3da28, TypeLD>, Enc_71f1b4 {
+tc_075c8dd8, TypeLD>, Enc_71f1b4 {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011010101;
let addrMode = PostInc;
@@ -9274,7 +9381,7 @@ def L2_loadbzw4_pr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memubh($Rx32++$Mu2)",
-tc_44d3da28, TypeLD>, Enc_7eee72 {
+tc_075c8dd8, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011100101;
let addrMode = PostInc;
@@ -9286,7 +9393,7 @@ def L2_loadbzw4_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = memubh($Rs32)",
-tc_17e0d2cd, TypeMAPPING> {
+tc_4222e6bf, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -9294,7 +9401,7 @@ def L2_loadrb_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = memb($Rs32+#$Ii)",
-tc_17e0d2cd, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
+tc_4222e6bf, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9302,8 +9409,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrb";
let BaseOpcode = "L2_loadrb_io";
+let CextOpcode = "L2_loadrb";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -9315,7 +9422,7 @@ def L2_loadrb_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++$Mu2:brev)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111000;
let hasNewValue = 1;
@@ -9329,7 +9436,7 @@ def L2_loadrb_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++#$Ii:circ($Mu2))",
-tc_e93a3d71, TypeLD>, Enc_e0a47a {
+tc_5ceb2f9e, TypeLD>, Enc_e0a47a {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001000;
let hasNewValue = 1;
@@ -9344,7 +9451,7 @@ def L2_loadrb_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++I:circ($Mu2))",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001000;
let hasNewValue = 1;
@@ -9359,7 +9466,7 @@ def L2_loadrb_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii),
"$Rd32 = memb($Rx32++#$Ii)",
-tc_44d3da28, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
+tc_075c8dd8, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011000;
let hasNewValue = 1;
@@ -9367,8 +9474,8 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = ByteAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrb";
let BaseOpcode = "L2_loadrb_pi";
+let CextOpcode = "L2_loadrb";
let isPredicable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -9376,7 +9483,7 @@ def L2_loadrb_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memb($Rx32++$Mu2)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101000;
let hasNewValue = 1;
@@ -9390,7 +9497,7 @@ def L2_loadrb_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memb($Rs32)",
-tc_17e0d2cd, TypeMAPPING> {
+tc_4222e6bf, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9400,7 +9507,7 @@ def L2_loadrbgp : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memb(gp+#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9419,14 +9526,14 @@ def L2_loadrd_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, s29_3Imm:$Ii),
"$Rdd32 = memd($Rs32+#$Ii)",
-tc_17e0d2cd, TypeLD>, Enc_fa3ba4, AddrModeRel, PostInc_BaseImm {
+tc_4222e6bf, TypeLD>, Enc_fa3ba4, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b10010;
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrd";
let BaseOpcode = "L2_loadrd_io";
+let CextOpcode = "L2_loadrd";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -9438,7 +9545,7 @@ def L2_loadrd_pbr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++$Mu2:brev)",
-tc_44d3da28, TypeLD>, Enc_7eee72 {
+tc_075c8dd8, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111110;
let addrMode = PostInc;
@@ -9450,7 +9557,7 @@ def L2_loadrd_pci : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++#$Ii:circ($Mu2))",
-tc_e93a3d71, TypeLD>, Enc_b05839 {
+tc_5ceb2f9e, TypeLD>, Enc_b05839 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001110;
let addrMode = PostInc;
@@ -9463,7 +9570,7 @@ def L2_loadrd_pcr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++I:circ($Mu2))",
-tc_44d3da28, TypeLD>, Enc_7eee72 {
+tc_075c8dd8, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001110;
let addrMode = PostInc;
@@ -9476,14 +9583,14 @@ def L2_loadrd_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii),
"$Rdd32 = memd($Rx32++#$Ii)",
-tc_44d3da28, TypeLD>, Enc_5bdd42, PredNewRel, PostInc_BaseImm {
+tc_075c8dd8, TypeLD>, Enc_5bdd42, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011110;
let addrMode = PostInc;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrd";
let BaseOpcode = "L2_loadrd_pi";
+let CextOpcode = "L2_loadrd";
let isPredicable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -9491,7 +9598,7 @@ def L2_loadrd_pr : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rdd32 = memd($Rx32++$Mu2)",
-tc_44d3da28, TypeLD>, Enc_7eee72 {
+tc_075c8dd8, TypeLD>, Enc_7eee72 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101110;
let addrMode = PostInc;
@@ -9503,7 +9610,7 @@ def L2_loadrd_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = memd($Rs32)",
-tc_17e0d2cd, TypeMAPPING> {
+tc_4222e6bf, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -9511,7 +9618,7 @@ def L2_loadrdgp : HInst<
(outs DoubleRegs:$Rdd32),
(ins u29_3Imm:$Ii),
"$Rdd32 = memd(gp+#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_509701, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_509701, AddrModeRel {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b01001;
let accessSize = DoubleWordAccess;
@@ -9528,7 +9635,7 @@ def L2_loadrh_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = memh($Rs32+#$Ii)",
-tc_17e0d2cd, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
+tc_4222e6bf, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9536,8 +9643,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrh";
let BaseOpcode = "L2_loadrh_io";
+let CextOpcode = "L2_loadrh";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -9549,7 +9656,7 @@ def L2_loadrh_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++$Mu2:brev)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111010;
let hasNewValue = 1;
@@ -9563,7 +9670,7 @@ def L2_loadrh_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++#$Ii:circ($Mu2))",
-tc_e93a3d71, TypeLD>, Enc_e83554 {
+tc_5ceb2f9e, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001010;
let hasNewValue = 1;
@@ -9578,7 +9685,7 @@ def L2_loadrh_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++I:circ($Mu2))",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001010;
let hasNewValue = 1;
@@ -9593,7 +9700,7 @@ def L2_loadrh_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = memh($Rx32++#$Ii)",
-tc_44d3da28, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
+tc_075c8dd8, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011010;
let hasNewValue = 1;
@@ -9601,8 +9708,8 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrh";
let BaseOpcode = "L2_loadrh_pi";
+let CextOpcode = "L2_loadrh";
let isPredicable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -9610,7 +9717,7 @@ def L2_loadrh_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memh($Rx32++$Mu2)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101010;
let hasNewValue = 1;
@@ -9624,7 +9731,7 @@ def L2_loadrh_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memh($Rs32)",
-tc_17e0d2cd, TypeMAPPING> {
+tc_4222e6bf, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9634,7 +9741,7 @@ def L2_loadrhgp : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memh(gp+#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9653,7 +9760,7 @@ def L2_loadri_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s30_2Imm:$Ii),
"$Rd32 = memw($Rs32+#$Ii)",
-tc_17e0d2cd, TypeLD>, Enc_2a3787, AddrModeRel, PostInc_BaseImm {
+tc_4222e6bf, TypeLD>, Enc_2a3787, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9661,8 +9768,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadri";
let BaseOpcode = "L2_loadri_io";
+let CextOpcode = "L2_loadri";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -9674,7 +9781,7 @@ def L2_loadri_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++$Mu2:brev)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111100;
let hasNewValue = 1;
@@ -9688,7 +9795,7 @@ def L2_loadri_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++#$Ii:circ($Mu2))",
-tc_e93a3d71, TypeLD>, Enc_27fd0e {
+tc_5ceb2f9e, TypeLD>, Enc_27fd0e {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001100;
let hasNewValue = 1;
@@ -9703,7 +9810,7 @@ def L2_loadri_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++I:circ($Mu2))",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001100;
let hasNewValue = 1;
@@ -9718,7 +9825,7 @@ def L2_loadri_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
"$Rd32 = memw($Rx32++#$Ii)",
-tc_44d3da28, TypeLD>, Enc_3d920a, PredNewRel, PostInc_BaseImm {
+tc_075c8dd8, TypeLD>, Enc_3d920a, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011100;
let hasNewValue = 1;
@@ -9726,8 +9833,8 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = WordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadri";
let BaseOpcode = "L2_loadri_pi";
+let CextOpcode = "L2_loadri";
let isPredicable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -9735,7 +9842,7 @@ def L2_loadri_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memw($Rx32++$Mu2)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101100;
let hasNewValue = 1;
@@ -9749,7 +9856,7 @@ def L2_loadri_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memw($Rs32)",
-tc_17e0d2cd, TypeMAPPING> {
+tc_4222e6bf, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9759,7 +9866,7 @@ def L2_loadrigp : HInst<
(outs IntRegs:$Rd32),
(ins u30_2Imm:$Ii),
"$Rd32 = memw(gp+#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9778,7 +9885,7 @@ def L2_loadrub_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rd32 = memub($Rs32+#$Ii)",
-tc_17e0d2cd, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
+tc_4222e6bf, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1001;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9786,8 +9893,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrub";
let BaseOpcode = "L2_loadrub_io";
+let CextOpcode = "L2_loadrub";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -9799,7 +9906,7 @@ def L2_loadrub_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++$Mu2:brev)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111001;
let hasNewValue = 1;
@@ -9813,7 +9920,7 @@ def L2_loadrub_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++#$Ii:circ($Mu2))",
-tc_e93a3d71, TypeLD>, Enc_e0a47a {
+tc_5ceb2f9e, TypeLD>, Enc_e0a47a {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001001;
let hasNewValue = 1;
@@ -9828,7 +9935,7 @@ def L2_loadrub_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++I:circ($Mu2))",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001001;
let hasNewValue = 1;
@@ -9843,7 +9950,7 @@ def L2_loadrub_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii),
"$Rd32 = memub($Rx32++#$Ii)",
-tc_44d3da28, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
+tc_075c8dd8, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011001;
let hasNewValue = 1;
@@ -9851,8 +9958,8 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = ByteAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrub";
let BaseOpcode = "L2_loadrub_pi";
+let CextOpcode = "L2_loadrub";
let isPredicable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -9860,7 +9967,7 @@ def L2_loadrub_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memub($Rx32++$Mu2)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101001;
let hasNewValue = 1;
@@ -9874,7 +9981,7 @@ def L2_loadrub_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memub($Rs32)",
-tc_17e0d2cd, TypeMAPPING> {
+tc_4222e6bf, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -9884,7 +9991,7 @@ def L2_loadrubgp : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memub(gp+#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1001;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -9903,7 +10010,7 @@ def L2_loadruh_io : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s31_1Imm:$Ii),
"$Rd32 = memuh($Rs32+#$Ii)",
-tc_17e0d2cd, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
+tc_4222e6bf, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b10010;
let hasNewValue = 1;
@@ -9911,8 +10018,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadruh";
let BaseOpcode = "L2_loadruh_io";
+let CextOpcode = "L2_loadruh";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -9924,7 +10031,7 @@ def L2_loadruh_pbr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++$Mu2:brev)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011111011;
let hasNewValue = 1;
@@ -9938,7 +10045,7 @@ def L2_loadruh_pci : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++#$Ii:circ($Mu2))",
-tc_e93a3d71, TypeLD>, Enc_e83554 {
+tc_5ceb2f9e, TypeLD>, Enc_e83554 {
let Inst{12-9} = 0b0000;
let Inst{31-21} = 0b10011001011;
let hasNewValue = 1;
@@ -9953,7 +10060,7 @@ def L2_loadruh_pcr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++I:circ($Mu2))",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b10011001011;
let hasNewValue = 1;
@@ -9968,7 +10075,7 @@ def L2_loadruh_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
"$Rd32 = memuh($Rx32++#$Ii)",
-tc_44d3da28, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
+tc_075c8dd8, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm {
let Inst{13-9} = 0b00000;
let Inst{31-21} = 0b10011011011;
let hasNewValue = 1;
@@ -9976,8 +10083,8 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadruh";
let BaseOpcode = "L2_loadruh_pi";
+let CextOpcode = "L2_loadruh";
let isPredicable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -9985,7 +10092,7 @@ def L2_loadruh_pr : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2),
"$Rd32 = memuh($Rx32++$Mu2)",
-tc_44d3da28, TypeLD>, Enc_74d4e5 {
+tc_075c8dd8, TypeLD>, Enc_74d4e5 {
let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b10011101011;
let hasNewValue = 1;
@@ -9999,7 +10106,7 @@ def L2_loadruh_zomap : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memuh($Rs32)",
-tc_17e0d2cd, TypeMAPPING> {
+tc_4222e6bf, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10009,7 +10116,7 @@ def L2_loadruhgp : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memuh(gp+#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -10028,7 +10135,7 @@ def L2_loadw_locked : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = memw_locked($Rs32)",
-tc_b43e7930, TypeLD>, Enc_5e2823 {
+tc_64b00d8a, TypeLD>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10010010000;
let hasNewValue = 1;
@@ -10041,7 +10148,7 @@ def L2_ploadrbf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memb($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101000;
let isPredicated = 1;
@@ -10051,8 +10158,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrb";
let BaseOpcode = "L2_loadrb_io";
+let CextOpcode = "L2_loadrb";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10063,7 +10170,7 @@ def L2_ploadrbf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memb($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10080,7 +10187,7 @@ def L2_ploadrbf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memb($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10090,7 +10197,7 @@ def L2_ploadrbfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111000;
let isPredicated = 1;
@@ -10101,8 +10208,8 @@ let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadrb";
let BaseOpcode = "L2_loadrb_io";
+let CextOpcode = "L2_loadrb";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10113,7 +10220,7 @@ def L2_ploadrbfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10131,7 +10238,7 @@ def L2_ploadrbfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memb($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10141,7 +10248,7 @@ def L2_ploadrbt_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memb($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001000;
let isPredicated = 1;
@@ -10150,8 +10257,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrb";
let BaseOpcode = "L2_loadrb_io";
+let CextOpcode = "L2_loadrb";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10162,7 +10269,7 @@ def L2_ploadrbt_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4) $Rd32 = memb($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10178,7 +10285,7 @@ def L2_ploadrbt_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memb($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10188,7 +10295,7 @@ def L2_ploadrbtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011000;
let isPredicated = 1;
@@ -10198,8 +10305,8 @@ let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadrb";
let BaseOpcode = "L2_loadrb_io";
+let CextOpcode = "L2_loadrb";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10210,7 +10317,7 @@ def L2_ploadrbtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011000;
let isPredicated = 1;
@@ -10227,7 +10334,7 @@ def L2_ploadrbtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memb($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10237,7 +10344,7 @@ def L2_ploadrdf_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if (!$Pt4) $Rdd32 = memd($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101110;
let isPredicated = 1;
@@ -10245,8 +10352,8 @@ let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrd";
let BaseOpcode = "L2_loadrd_io";
+let CextOpcode = "L2_loadrd";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10257,7 +10364,7 @@ def L2_ploadrdf_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if (!$Pt4) $Rdd32 = memd($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_9d1247, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10272,7 +10379,7 @@ def L2_ploadrdf_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rdd32 = memd($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10280,7 +10387,7 @@ def L2_ploadrdfnew_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if (!$Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111110;
let isPredicated = 1;
@@ -10289,8 +10396,8 @@ let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadrd";
let BaseOpcode = "L2_loadrd_io";
+let CextOpcode = "L2_loadrd";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10301,7 +10408,7 @@ def L2_ploadrdfnew_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if (!$Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_9d1247, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10317,7 +10424,7 @@ def L2_ploadrdfnew_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rdd32 = memd($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10325,15 +10432,15 @@ def L2_ploadrdt_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if ($Pt4) $Rdd32 = memd($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001110;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrd";
let BaseOpcode = "L2_loadrd_io";
+let CextOpcode = "L2_loadrd";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10344,7 +10451,7 @@ def L2_ploadrdt_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if ($Pt4) $Rdd32 = memd($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_9d1247, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10358,7 +10465,7 @@ def L2_ploadrdt_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rdd32 = memd($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10366,7 +10473,7 @@ def L2_ploadrdtnew_io : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
"if ($Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_acd6ed, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011110;
let isPredicated = 1;
@@ -10374,8 +10481,8 @@ let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadrd";
let BaseOpcode = "L2_loadrd_io";
+let CextOpcode = "L2_loadrd";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10386,7 +10493,7 @@ def L2_ploadrdtnew_pi : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
"if ($Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_9d1247, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_9d1247, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011110;
let isPredicated = 1;
@@ -10401,7 +10508,7 @@ def L2_ploadrdtnew_zomap : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rdd32 = memd($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -10409,7 +10516,7 @@ def L2_ploadrhf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memh($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101010;
let isPredicated = 1;
@@ -10419,8 +10526,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrh";
let BaseOpcode = "L2_loadrh_io";
+let CextOpcode = "L2_loadrh";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10431,7 +10538,7 @@ def L2_ploadrhf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memh($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10448,7 +10555,7 @@ def L2_ploadrhf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memh($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10458,7 +10565,7 @@ def L2_ploadrhfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111010;
let isPredicated = 1;
@@ -10469,8 +10576,8 @@ let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadrh";
let BaseOpcode = "L2_loadrh_io";
+let CextOpcode = "L2_loadrh";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10481,7 +10588,7 @@ def L2_ploadrhfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10499,7 +10606,7 @@ def L2_ploadrhfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memh($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10509,7 +10616,7 @@ def L2_ploadrht_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4) $Rd32 = memh($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001010;
let isPredicated = 1;
@@ -10518,8 +10625,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrh";
let BaseOpcode = "L2_loadrh_io";
+let CextOpcode = "L2_loadrh";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10530,7 +10637,7 @@ def L2_ploadrht_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4) $Rd32 = memh($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10546,7 +10653,7 @@ def L2_ploadrht_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memh($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10556,7 +10663,7 @@ def L2_ploadrhtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011010;
let isPredicated = 1;
@@ -10566,8 +10673,8 @@ let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadrh";
let BaseOpcode = "L2_loadrh_io";
+let CextOpcode = "L2_loadrh";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10578,7 +10685,7 @@ def L2_ploadrhtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011010;
let isPredicated = 1;
@@ -10595,7 +10702,7 @@ def L2_ploadrhtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memh($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10605,7 +10712,7 @@ def L2_ploadrif_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if (!$Pt4) $Rd32 = memw($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101100;
let isPredicated = 1;
@@ -10615,8 +10722,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadri";
let BaseOpcode = "L2_loadri_io";
+let CextOpcode = "L2_loadri";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10627,7 +10734,7 @@ def L2_ploadrif_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if (!$Pt4) $Rd32 = memw($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_b97f71, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10644,7 +10751,7 @@ def L2_ploadrif_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memw($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10654,7 +10761,7 @@ def L2_ploadrifnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111100;
let isPredicated = 1;
@@ -10665,8 +10772,8 @@ let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadri";
let BaseOpcode = "L2_loadri_io";
+let CextOpcode = "L2_loadri";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10677,7 +10784,7 @@ def L2_ploadrifnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_b97f71, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10695,7 +10802,7 @@ def L2_ploadrifnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memw($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10705,7 +10812,7 @@ def L2_ploadrit_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if ($Pt4) $Rd32 = memw($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001100;
let isPredicated = 1;
@@ -10714,8 +10821,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadri";
let BaseOpcode = "L2_loadri_io";
+let CextOpcode = "L2_loadri";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10726,7 +10833,7 @@ def L2_ploadrit_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if ($Pt4) $Rd32 = memw($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_b97f71, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10742,7 +10849,7 @@ def L2_ploadrit_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memw($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10752,7 +10859,7 @@ def L2_ploadritnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
"if ($Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_f82eaf, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011100;
let isPredicated = 1;
@@ -10762,8 +10869,8 @@ let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadri";
let BaseOpcode = "L2_loadri_io";
+let CextOpcode = "L2_loadri";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10774,7 +10881,7 @@ def L2_ploadritnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
"if ($Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_b97f71, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_b97f71, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011100;
let isPredicated = 1;
@@ -10791,7 +10898,7 @@ def L2_ploadritnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memw($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10801,7 +10908,7 @@ def L2_ploadrubf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memub($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101001;
let isPredicated = 1;
@@ -10811,8 +10918,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrub";
let BaseOpcode = "L2_loadrub_io";
+let CextOpcode = "L2_loadrub";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10823,7 +10930,7 @@ def L2_ploadrubf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memub($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10840,7 +10947,7 @@ def L2_ploadrubf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memub($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10850,7 +10957,7 @@ def L2_ploadrubfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111001;
let isPredicated = 1;
@@ -10861,8 +10968,8 @@ let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadrub";
let BaseOpcode = "L2_loadrub_io";
+let CextOpcode = "L2_loadrub";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10873,7 +10980,7 @@ def L2_ploadrubfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10891,7 +10998,7 @@ def L2_ploadrubfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memub($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10901,7 +11008,7 @@ def L2_ploadrubt_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memub($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001001;
let isPredicated = 1;
@@ -10910,8 +11017,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadrub";
let BaseOpcode = "L2_loadrub_io";
+let CextOpcode = "L2_loadrub";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10922,7 +11029,7 @@ def L2_ploadrubt_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4) $Rd32 = memub($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_f4413a, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10938,7 +11045,7 @@ def L2_ploadrubt_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memub($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10948,7 +11055,7 @@ def L2_ploadrubtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_a21d47, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_a21d47, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011001;
let isPredicated = 1;
@@ -10958,8 +11065,8 @@ let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadrub";
let BaseOpcode = "L2_loadrub_io";
+let CextOpcode = "L2_loadrub";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -10970,7 +11077,7 @@ def L2_ploadrubtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_f4413a, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_f4413a, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011001;
let isPredicated = 1;
@@ -10987,7 +11094,7 @@ def L2_ploadrubtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memub($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -10997,7 +11104,7 @@ def L2_ploadruhf_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memuh($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000101011;
let isPredicated = 1;
@@ -11007,8 +11114,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadruh";
let BaseOpcode = "L2_loadruh_io";
+let CextOpcode = "L2_loadruh";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -11019,7 +11126,7 @@ def L2_ploadruhf_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4) $Rd32 = memuh($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -11036,7 +11143,7 @@ def L2_ploadruhf_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4) $Rd32 = memuh($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -11046,7 +11153,7 @@ def L2_ploadruhfnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000111011;
let isPredicated = 1;
@@ -11057,8 +11164,8 @@ let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadruh";
let BaseOpcode = "L2_loadruh_io";
+let CextOpcode = "L2_loadruh";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -11069,7 +11176,7 @@ def L2_ploadruhfnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -11087,7 +11194,7 @@ def L2_ploadruhfnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if (!$Pt4.new) $Rd32 = memuh($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -11097,7 +11204,7 @@ def L2_ploadruht_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4) $Rd32 = memuh($Rs32+#$Ii)",
-tc_5ef37dc4, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_fedb7e19, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000001011;
let isPredicated = 1;
@@ -11106,8 +11213,8 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
-let CextOpcode = "L2_loadruh";
let BaseOpcode = "L2_loadruh_io";
+let CextOpcode = "L2_loadruh";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -11118,7 +11225,7 @@ def L2_ploadruht_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4) $Rd32 = memuh($Rx32++#$Ii)",
-tc_3c76b0ff, TypeLD>, Enc_733b27, PredNewRel {
+tc_1c7522a8, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -11134,7 +11241,7 @@ def L2_ploadruht_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4) $Rd32 = memuh($Rs32)",
-tc_5ef37dc4, TypeMAPPING> {
+tc_fedb7e19, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -11144,7 +11251,7 @@ def L2_ploadruhtnew_io : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
-tc_44d3da28, TypeV2LDST>, Enc_a198f6, AddrModeRel {
+tc_075c8dd8, TypeV2LDST>, Enc_a198f6, AddrModeRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b01000011011;
let isPredicated = 1;
@@ -11154,8 +11261,8 @@ let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
-let CextOpcode = "L2_loadruh";
let BaseOpcode = "L2_loadruh_io";
+let CextOpcode = "L2_loadruh";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 0;
@@ -11166,7 +11273,7 @@ def L2_ploadruhtnew_pi : HInst<
(outs IntRegs:$Rd32, IntRegs:$Rx32),
(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
"if ($Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
-tc_e9f3243f, TypeLD>, Enc_733b27, PredNewRel {
+tc_5f2afaf7, TypeLD>, Enc_733b27, PredNewRel {
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011011011;
let isPredicated = 1;
@@ -11183,7 +11290,7 @@ def L2_ploadruhtnew_zomap : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, IntRegs:$Rs32),
"if ($Pt4.new) $Rd32 = memuh($Rs32)",
-tc_44d3da28, TypeMAPPING> {
+tc_075c8dd8, TypeMAPPING> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -11193,7 +11300,7 @@ def L4_add_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) += $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_d44e31 {
+tc_9bcfb2ee, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -11212,7 +11319,7 @@ def L4_add_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) += $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11220,7 +11327,7 @@ def L4_add_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) += $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_163a3c {
+tc_9bcfb2ee, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -11239,7 +11346,7 @@ def L4_add_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) += $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11247,7 +11354,7 @@ def L4_add_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) += $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_226535 {
+tc_9bcfb2ee, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -11266,7 +11373,7 @@ def L4_add_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) += $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11274,7 +11381,7 @@ def L4_and_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) &= $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_d44e31 {
+tc_9bcfb2ee, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -11293,7 +11400,7 @@ def L4_and_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) &= $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11301,7 +11408,7 @@ def L4_and_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) &= $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_163a3c {
+tc_9bcfb2ee, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -11320,7 +11427,7 @@ def L4_and_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) &= $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11328,7 +11435,7 @@ def L4_and_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) &= $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_226535 {
+tc_9bcfb2ee, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -11347,7 +11454,7 @@ def L4_and_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) &= $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11355,7 +11462,7 @@ def L4_iadd_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) += #$II",
-tc_096199d3, TypeV4LDST>, Enc_46c951 {
+tc_158aa3f7, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11374,7 +11481,7 @@ def L4_iadd_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) += #$II",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11382,7 +11489,7 @@ def L4_iadd_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) += #$II",
-tc_096199d3, TypeV4LDST>, Enc_e66a97 {
+tc_158aa3f7, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11401,7 +11508,7 @@ def L4_iadd_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) += #$II",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11409,7 +11516,7 @@ def L4_iadd_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) += #$II",
-tc_096199d3, TypeV4LDST>, Enc_84b2cd {
+tc_158aa3f7, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b00;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11428,7 +11535,7 @@ def L4_iadd_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) += #$II",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11436,7 +11543,7 @@ def L4_iand_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) = clrbit(#$II)",
-tc_096199d3, TypeV4LDST>, Enc_46c951 {
+tc_158aa3f7, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11455,7 +11562,7 @@ def L4_iand_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) = clrbit(#$II)",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11463,7 +11570,7 @@ def L4_iand_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) = clrbit(#$II)",
-tc_096199d3, TypeV4LDST>, Enc_e66a97 {
+tc_158aa3f7, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11482,7 +11589,7 @@ def L4_iand_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) = clrbit(#$II)",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11490,7 +11597,7 @@ def L4_iand_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) = clrbit(#$II)",
-tc_096199d3, TypeV4LDST>, Enc_84b2cd {
+tc_158aa3f7, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11509,7 +11616,7 @@ def L4_iand_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) = clrbit(#$II)",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11517,7 +11624,7 @@ def L4_ior_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) = setbit(#$II)",
-tc_096199d3, TypeV4LDST>, Enc_46c951 {
+tc_158aa3f7, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11536,7 +11643,7 @@ def L4_ior_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) = setbit(#$II)",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11544,7 +11651,7 @@ def L4_ior_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) = setbit(#$II)",
-tc_096199d3, TypeV4LDST>, Enc_e66a97 {
+tc_158aa3f7, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11563,7 +11670,7 @@ def L4_ior_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) = setbit(#$II)",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11571,7 +11678,7 @@ def L4_ior_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) = setbit(#$II)",
-tc_096199d3, TypeV4LDST>, Enc_84b2cd {
+tc_158aa3f7, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11590,7 +11697,7 @@ def L4_ior_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) = setbit(#$II)",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11598,7 +11705,7 @@ def L4_isub_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
"memb($Rs32+#$Ii) -= #$II",
-tc_096199d3, TypeV4LDST>, Enc_46c951 {
+tc_158aa3f7, TypeV4LDST>, Enc_46c951 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111000;
@@ -11617,7 +11724,7 @@ def L4_isub_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memb($Rs32) -= #$II",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11625,7 +11732,7 @@ def L4_isub_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
"memh($Rs32+#$Ii) -= #$II",
-tc_096199d3, TypeV4LDST>, Enc_e66a97 {
+tc_158aa3f7, TypeV4LDST>, Enc_e66a97 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111001;
@@ -11644,7 +11751,7 @@ def L4_isub_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memh($Rs32) -= #$II",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11652,7 +11759,7 @@ def L4_isub_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
"memw($Rs32+#$Ii) -= #$II",
-tc_096199d3, TypeV4LDST>, Enc_84b2cd {
+tc_158aa3f7, TypeV4LDST>, Enc_84b2cd {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111111010;
@@ -11671,7 +11778,7 @@ def L4_isub_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, u5_0Imm:$II),
"memw($Rs32) -= #$II",
-tc_096199d3, TypeMAPPING> {
+tc_158aa3f7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -11679,7 +11786,7 @@ def L4_loadalignb_ap : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Re32),
(ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
"$Ryy32 = memb_fifo($Re32=#$II)",
-tc_7a91e76a, TypeLD>, Enc_f394d3 {
+tc_ac65613f, TypeLD>, Enc_f394d3 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010100;
@@ -11699,7 +11806,7 @@ def L4_loadalignb_ur : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Ryy32 = memb_fifo($Rt32<<#$Ii+#$II)",
-tc_a5d4aeec, TypeLD>, Enc_04c959 {
+tc_a32e03e7, TypeLD>, Enc_04c959 {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100100;
let addrMode = BaseLongOffset;
@@ -11719,7 +11826,7 @@ def L4_loadalignh_ap : HInst<
(outs DoubleRegs:$Ryy32, IntRegs:$Re32),
(ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
"$Ryy32 = memh_fifo($Re32=#$II)",
-tc_7a91e76a, TypeLD>, Enc_f394d3 {
+tc_ac65613f, TypeLD>, Enc_f394d3 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010010;
@@ -11739,7 +11846,7 @@ def L4_loadalignh_ur : HInst<
(outs DoubleRegs:$Ryy32),
(ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Ryy32 = memh_fifo($Rt32<<#$Ii+#$II)",
-tc_a5d4aeec, TypeLD>, Enc_04c959 {
+tc_a32e03e7, TypeLD>, Enc_04c959 {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100010;
let addrMode = BaseLongOffset;
@@ -11759,7 +11866,7 @@ def L4_loadbsw2_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = membh($Re32=#$II)",
-tc_3b5b7ef9, TypeLD>, Enc_323f2d {
+tc_822c3c68, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010001;
@@ -11780,7 +11887,7 @@ def L4_loadbsw2_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = membh($Rt32<<#$Ii+#$II)",
-tc_bab0eed9, TypeLD>, Enc_4f677b {
+tc_abfd9a6d, TypeLD>, Enc_4f677b {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100001;
let hasNewValue = 1;
@@ -11801,7 +11908,7 @@ def L4_loadbsw4_ap : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rdd32 = membh($Re32=#$II)",
-tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
+tc_822c3c68, TypeLD>, Enc_7fa7f6 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010111;
@@ -11820,7 +11927,7 @@ def L4_loadbsw4_ur : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = membh($Rt32<<#$Ii+#$II)",
-tc_bab0eed9, TypeLD>, Enc_6185fe {
+tc_abfd9a6d, TypeLD>, Enc_6185fe {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100111;
let addrMode = BaseLongOffset;
@@ -11839,7 +11946,7 @@ def L4_loadbzw2_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memubh($Re32=#$II)",
-tc_3b5b7ef9, TypeLD>, Enc_323f2d {
+tc_822c3c68, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010011;
@@ -11860,7 +11967,7 @@ def L4_loadbzw2_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memubh($Rt32<<#$Ii+#$II)",
-tc_bab0eed9, TypeLD>, Enc_4f677b {
+tc_abfd9a6d, TypeLD>, Enc_4f677b {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100011;
let hasNewValue = 1;
@@ -11881,7 +11988,7 @@ def L4_loadbzw4_ap : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rdd32 = memubh($Re32=#$II)",
-tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
+tc_822c3c68, TypeLD>, Enc_7fa7f6 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011010101;
@@ -11900,7 +12007,7 @@ def L4_loadbzw4_ur : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = memubh($Rt32<<#$Ii+#$II)",
-tc_bab0eed9, TypeLD>, Enc_6185fe {
+tc_abfd9a6d, TypeLD>, Enc_6185fe {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011100101;
let addrMode = BaseLongOffset;
@@ -11919,7 +12026,7 @@ def L4_loadd_locked : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = memd_locked($Rs32)",
-tc_b43e7930, TypeLD>, Enc_3a3d62 {
+tc_64b00d8a, TypeLD>, Enc_3a3d62 {
let Inst{13-5} = 0b010000000;
let Inst{31-21} = 0b10010010000;
let accessSize = DoubleWordAccess;
@@ -11930,7 +12037,7 @@ def L4_loadrb_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memb($Re32=#$II)",
-tc_3b5b7ef9, TypeLD>, Enc_323f2d {
+tc_822c3c68, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011000;
@@ -11951,7 +12058,7 @@ def L4_loadrb_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf2ffc0f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010000;
let hasNewValue = 1;
@@ -11959,16 +12066,16 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrb_rr";
let CextOpcode = "L2_loadrb";
let InputType = "reg";
-let BaseOpcode = "L4_loadrb_rr";
let isPredicable = 1;
}
def L4_loadrb_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memb($Rt32<<#$Ii+#$II)",
-tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_abfd9a6d, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101000;
let hasNewValue = 1;
@@ -11990,7 +12097,7 @@ def L4_loadrd_ap : HInst<
(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rdd32 = memd($Re32=#$II)",
-tc_3b5b7ef9, TypeLD>, Enc_7fa7f6 {
+tc_822c3c68, TypeLD>, Enc_7fa7f6 {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011110;
@@ -12009,22 +12116,22 @@ def L4_loadrd_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_bf061958, TypeLD>, Enc_84bff1, AddrModeRel, ImmRegShl {
+tc_bf2ffc0f, TypeLD>, Enc_84bff1, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010110;
let addrMode = BaseRegOffset;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrd_rr";
let CextOpcode = "L2_loadrd";
let InputType = "reg";
-let BaseOpcode = "L4_loadrd_rr";
let isPredicable = 1;
}
def L4_loadrd_ur : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rdd32 = memd($Rt32<<#$Ii+#$II)",
-tc_bab0eed9, TypeLD>, Enc_6185fe, AddrModeRel, ImmRegShl {
+tc_abfd9a6d, TypeLD>, Enc_6185fe, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101110;
let addrMode = BaseLongOffset;
@@ -12044,7 +12151,7 @@ def L4_loadrh_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memh($Re32=#$II)",
-tc_3b5b7ef9, TypeLD>, Enc_323f2d {
+tc_822c3c68, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011010;
@@ -12065,7 +12172,7 @@ def L4_loadrh_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf2ffc0f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010010;
let hasNewValue = 1;
@@ -12073,16 +12180,16 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrh_rr";
let CextOpcode = "L2_loadrh";
let InputType = "reg";
-let BaseOpcode = "L4_loadrh_rr";
let isPredicable = 1;
}
def L4_loadrh_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memh($Rt32<<#$Ii+#$II)",
-tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_abfd9a6d, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101010;
let hasNewValue = 1;
@@ -12104,7 +12211,7 @@ def L4_loadri_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memw($Re32=#$II)",
-tc_3b5b7ef9, TypeLD>, Enc_323f2d {
+tc_822c3c68, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011100;
@@ -12125,7 +12232,7 @@ def L4_loadri_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf2ffc0f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010100;
let hasNewValue = 1;
@@ -12133,16 +12240,16 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = WordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadri_rr";
let CextOpcode = "L2_loadri";
let InputType = "reg";
-let BaseOpcode = "L4_loadri_rr";
let isPredicable = 1;
}
def L4_loadri_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memw($Rt32<<#$Ii+#$II)",
-tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_abfd9a6d, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101100;
let hasNewValue = 1;
@@ -12164,7 +12271,7 @@ def L4_loadrub_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memub($Re32=#$II)",
-tc_3b5b7ef9, TypeLD>, Enc_323f2d {
+tc_822c3c68, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011001;
@@ -12185,7 +12292,7 @@ def L4_loadrub_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf2ffc0f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010001;
let hasNewValue = 1;
@@ -12193,16 +12300,16 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrub_rr";
let CextOpcode = "L2_loadrub";
let InputType = "reg";
-let BaseOpcode = "L4_loadrub_rr";
let isPredicable = 1;
}
def L4_loadrub_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memub($Rt32<<#$Ii+#$II)",
-tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_abfd9a6d, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101001;
let hasNewValue = 1;
@@ -12224,7 +12331,7 @@ def L4_loadruh_ap : HInst<
(outs IntRegs:$Rd32, IntRegs:$Re32),
(ins u32_0Imm:$II),
"$Rd32 = memuh($Re32=#$II)",
-tc_3b5b7ef9, TypeLD>, Enc_323f2d {
+tc_822c3c68, TypeLD>, Enc_323f2d {
let Inst{7-7} = 0b0;
let Inst{13-12} = 0b01;
let Inst{31-21} = 0b10011011011;
@@ -12245,7 +12352,7 @@ def L4_loadruh_rr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_bf061958, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
+tc_bf2ffc0f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111010011;
let hasNewValue = 1;
@@ -12253,16 +12360,16 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadruh_rr";
let CextOpcode = "L2_loadruh";
let InputType = "reg";
-let BaseOpcode = "L4_loadruh_rr";
let isPredicable = 1;
}
def L4_loadruh_ur : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
"$Rd32 = memuh($Rt32<<#$Ii+#$II)",
-tc_bab0eed9, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
+tc_abfd9a6d, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl {
let Inst{12-12} = 0b1;
let Inst{31-21} = 0b10011101011;
let hasNewValue = 1;
@@ -12284,7 +12391,7 @@ def L4_or_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) |= $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_d44e31 {
+tc_9bcfb2ee, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -12303,7 +12410,7 @@ def L4_or_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) |= $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -12311,7 +12418,7 @@ def L4_or_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) |= $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_163a3c {
+tc_9bcfb2ee, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -12330,7 +12437,7 @@ def L4_or_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) |= $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -12338,7 +12445,7 @@ def L4_or_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) |= $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_226535 {
+tc_9bcfb2ee, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -12357,7 +12464,7 @@ def L4_or_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) |= $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -12365,7 +12472,7 @@ def L4_ploadrbf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memb(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111000;
@@ -12377,8 +12484,8 @@ let addrMode = Absolute;
let accessSize = ByteAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrb";
let BaseOpcode = "L4_loadrb_abs";
+let CextOpcode = "L2_loadrb";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12390,7 +12497,7 @@ def L4_ploadrbf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001000;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12399,15 +12506,15 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrb_rr";
let CextOpcode = "L2_loadrb";
let InputType = "reg";
-let BaseOpcode = "L4_loadrb_rr";
}
def L4_ploadrbfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memb(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111000;
@@ -12420,8 +12527,8 @@ let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrb";
let BaseOpcode = "L4_loadrb_abs";
+let CextOpcode = "L2_loadrb";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12433,7 +12540,7 @@ def L4_ploadrbfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011000;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12443,15 +12550,15 @@ let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrb_rr";
let CextOpcode = "L2_loadrb";
let InputType = "reg";
-let BaseOpcode = "L4_loadrb_rr";
}
def L4_ploadrbt_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memb(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111000;
@@ -12462,8 +12569,8 @@ let addrMode = Absolute;
let accessSize = ByteAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrb";
let BaseOpcode = "L4_loadrb_abs";
+let CextOpcode = "L2_loadrb";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12475,7 +12582,7 @@ def L4_ploadrbt_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000000;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12483,15 +12590,15 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrb_rr";
let CextOpcode = "L2_loadrb";
let InputType = "reg";
-let BaseOpcode = "L4_loadrb_rr";
}
def L4_ploadrbtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memb(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111000;
@@ -12503,8 +12610,8 @@ let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrb";
let BaseOpcode = "L4_loadrb_abs";
+let CextOpcode = "L2_loadrb";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12516,7 +12623,7 @@ def L4_ploadrbtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010000;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12525,15 +12632,15 @@ let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrb_rr";
let CextOpcode = "L2_loadrb";
let InputType = "reg";
-let BaseOpcode = "L4_loadrb_rr";
}
def L4_ploadrdf_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rdd32 = memd(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111110;
@@ -12543,8 +12650,8 @@ let addrMode = Absolute;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrd";
let BaseOpcode = "L4_loadrd_abs";
+let CextOpcode = "L2_loadrd";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12556,22 +12663,22 @@ def L4_ploadrdf_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110001110;
let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseRegOffset;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrd_rr";
let CextOpcode = "L2_loadrd";
let InputType = "reg";
-let BaseOpcode = "L4_loadrd_rr";
}
def L4_ploadrdfnew_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rdd32 = memd(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111110;
@@ -12582,8 +12689,8 @@ let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrd";
let BaseOpcode = "L4_loadrd_abs";
+let CextOpcode = "L2_loadrd";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12595,7 +12702,7 @@ def L4_ploadrdfnew_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110011110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12603,15 +12710,15 @@ let addrMode = BaseRegOffset;
let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrd_rr";
let CextOpcode = "L2_loadrd";
let InputType = "reg";
-let BaseOpcode = "L4_loadrd_rr";
}
def L4_ploadrdt_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rdd32 = memd(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111110;
@@ -12620,8 +12727,8 @@ let addrMode = Absolute;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrd";
let BaseOpcode = "L4_loadrd_abs";
+let CextOpcode = "L2_loadrd";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12633,21 +12740,21 @@ def L4_ploadrdt_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110000110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrd_rr";
let CextOpcode = "L2_loadrd";
let InputType = "reg";
-let BaseOpcode = "L4_loadrd_rr";
}
def L4_ploadrdtnew_abs : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rdd32 = memd(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2a7b91, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2a7b91, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111110;
@@ -12657,8 +12764,8 @@ let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrd";
let BaseOpcode = "L4_loadrd_abs";
+let CextOpcode = "L2_loadrd";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12670,22 +12777,22 @@ def L4_ploadrdtnew_rr : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_98c0b8, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_98c0b8, AddrModeRel {
let Inst{31-21} = 0b00110010110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrd_rr";
let CextOpcode = "L2_loadrd";
let InputType = "reg";
-let BaseOpcode = "L4_loadrd_rr";
}
def L4_ploadrhf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memh(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111010;
@@ -12697,8 +12804,8 @@ let addrMode = Absolute;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrh";
let BaseOpcode = "L4_loadrh_abs";
+let CextOpcode = "L2_loadrh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12710,7 +12817,7 @@ def L4_ploadrhf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001010;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12719,15 +12826,15 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrh_rr";
let CextOpcode = "L2_loadrh";
let InputType = "reg";
-let BaseOpcode = "L4_loadrh_rr";
}
def L4_ploadrhfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memh(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111010;
@@ -12740,8 +12847,8 @@ let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrh";
let BaseOpcode = "L4_loadrh_abs";
+let CextOpcode = "L2_loadrh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12753,7 +12860,7 @@ def L4_ploadrhfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011010;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12763,15 +12870,15 @@ let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrh_rr";
let CextOpcode = "L2_loadrh";
let InputType = "reg";
-let BaseOpcode = "L4_loadrh_rr";
}
def L4_ploadrht_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memh(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111010;
@@ -12782,8 +12889,8 @@ let addrMode = Absolute;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrh";
let BaseOpcode = "L4_loadrh_abs";
+let CextOpcode = "L2_loadrh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12795,7 +12902,7 @@ def L4_ploadrht_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000010;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12803,15 +12910,15 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrh_rr";
let CextOpcode = "L2_loadrh";
let InputType = "reg";
-let BaseOpcode = "L4_loadrh_rr";
}
def L4_ploadrhtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memh(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111010;
@@ -12823,8 +12930,8 @@ let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrh";
let BaseOpcode = "L4_loadrh_abs";
+let CextOpcode = "L2_loadrh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12836,7 +12943,7 @@ def L4_ploadrhtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010010;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12845,15 +12952,15 @@ let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrh_rr";
let CextOpcode = "L2_loadrh";
let InputType = "reg";
-let BaseOpcode = "L4_loadrh_rr";
}
def L4_ploadrif_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memw(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111100;
@@ -12865,8 +12972,8 @@ let addrMode = Absolute;
let accessSize = WordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadri";
let BaseOpcode = "L4_loadri_abs";
+let CextOpcode = "L2_loadri";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12878,7 +12985,7 @@ def L4_ploadrif_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12887,15 +12994,15 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = WordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadri_rr";
let CextOpcode = "L2_loadri";
let InputType = "reg";
-let BaseOpcode = "L4_loadri_rr";
}
def L4_ploadrifnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memw(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111100;
@@ -12908,8 +13015,8 @@ let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadri";
let BaseOpcode = "L4_loadri_abs";
+let CextOpcode = "L2_loadri";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12921,7 +13028,7 @@ def L4_ploadrifnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -12931,15 +13038,15 @@ let addrMode = BaseRegOffset;
let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadri_rr";
let CextOpcode = "L2_loadri";
let InputType = "reg";
-let BaseOpcode = "L4_loadri_rr";
}
def L4_ploadrit_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memw(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111100;
@@ -12950,8 +13057,8 @@ let addrMode = Absolute;
let accessSize = WordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadri";
let BaseOpcode = "L4_loadri_abs";
+let CextOpcode = "L2_loadri";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -12963,7 +13070,7 @@ def L4_ploadrit_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000100;
let isPredicated = 1;
let hasNewValue = 1;
@@ -12971,15 +13078,15 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = WordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadri_rr";
let CextOpcode = "L2_loadri";
let InputType = "reg";
-let BaseOpcode = "L4_loadri_rr";
}
def L4_ploadritnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memw(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111100;
@@ -12991,8 +13098,8 @@ let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadri";
let BaseOpcode = "L4_loadri_abs";
+let CextOpcode = "L2_loadri";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -13004,7 +13111,7 @@ def L4_ploadritnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010100;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13013,15 +13120,15 @@ let addrMode = BaseRegOffset;
let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadri_rr";
let CextOpcode = "L2_loadri";
let InputType = "reg";
-let BaseOpcode = "L4_loadri_rr";
}
def L4_ploadrubf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memub(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111001;
@@ -13033,8 +13140,8 @@ let addrMode = Absolute;
let accessSize = ByteAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrub";
let BaseOpcode = "L4_loadrub_abs";
+let CextOpcode = "L2_loadrub";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -13046,7 +13153,7 @@ def L4_ploadrubf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001001;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13055,15 +13162,15 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrub_rr";
let CextOpcode = "L2_loadrub";
let InputType = "reg";
-let BaseOpcode = "L4_loadrub_rr";
}
def L4_ploadrubfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memub(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111001;
@@ -13076,8 +13183,8 @@ let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrub";
let BaseOpcode = "L4_loadrub_abs";
+let CextOpcode = "L2_loadrub";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -13089,7 +13196,7 @@ def L4_ploadrubfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011001;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13099,15 +13206,15 @@ let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrub_rr";
let CextOpcode = "L2_loadrub";
let InputType = "reg";
-let BaseOpcode = "L4_loadrub_rr";
}
def L4_ploadrubt_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memub(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111001;
@@ -13118,8 +13225,8 @@ let addrMode = Absolute;
let accessSize = ByteAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrub";
let BaseOpcode = "L4_loadrub_abs";
+let CextOpcode = "L2_loadrub";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -13131,7 +13238,7 @@ def L4_ploadrubt_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000001;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13139,15 +13246,15 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrub_rr";
let CextOpcode = "L2_loadrub";
let InputType = "reg";
-let BaseOpcode = "L4_loadrub_rr";
}
def L4_ploadrubtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memub(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111001;
@@ -13159,8 +13266,8 @@ let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrub";
let BaseOpcode = "L4_loadrub_abs";
+let CextOpcode = "L2_loadrub";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -13172,7 +13279,7 @@ def L4_ploadrubtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010001;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13181,15 +13288,15 @@ let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadrub_rr";
let CextOpcode = "L2_loadrub";
let InputType = "reg";
-let BaseOpcode = "L4_loadrub_rr";
}
def L4_ploadruhf_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4) $Rd32 = memuh(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b101;
let Inst{31-21} = 0b10011111011;
@@ -13201,8 +13308,8 @@ let addrMode = Absolute;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadruh";
let BaseOpcode = "L4_loadruh_abs";
+let CextOpcode = "L2_loadruh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -13214,7 +13321,7 @@ def L4_ploadruhf_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110001011;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13223,15 +13330,15 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadruh_rr";
let CextOpcode = "L2_loadruh";
let InputType = "reg";
-let BaseOpcode = "L4_loadruh_rr";
}
def L4_ploadruhfnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if (!$Pt4.new) $Rd32 = memuh(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b111;
let Inst{31-21} = 0b10011111011;
@@ -13244,8 +13351,8 @@ let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadruh";
let BaseOpcode = "L4_loadruh_abs";
+let CextOpcode = "L2_loadruh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -13257,7 +13364,7 @@ def L4_ploadruhfnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if (!$Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110011011;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -13267,15 +13374,15 @@ let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadruh_rr";
let CextOpcode = "L2_loadruh";
let InputType = "reg";
-let BaseOpcode = "L4_loadruh_rr";
}
def L4_ploadruht_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4) $Rd32 = memuh(#$Ii)",
-tc_7646c131, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_7c6d32e4, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b100;
let Inst{31-21} = 0b10011111011;
@@ -13286,8 +13393,8 @@ let addrMode = Absolute;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadruh";
let BaseOpcode = "L4_loadruh_abs";
+let CextOpcode = "L2_loadruh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -13299,7 +13406,7 @@ def L4_ploadruht_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_e4b3cb20, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_45791fb8, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110000011;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13307,15 +13414,15 @@ let opNewValue = 0;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayLoad = 1;
+let BaseOpcode = "L4_loadruh_rr";
let CextOpcode = "L2_loadruh";
let InputType = "reg";
-let BaseOpcode = "L4_loadruh_rr";
}
def L4_ploadruhtnew_abs : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pt4, u32_0Imm:$Ii),
"if ($Pt4.new) $Rd32 = memuh(#$Ii)",
-tc_3b5b7ef9, TypeLD>, Enc_2301d6, AddrModeRel {
+tc_822c3c68, TypeLD>, Enc_2301d6, AddrModeRel {
let Inst{7-5} = 0b100;
let Inst{13-11} = 0b110;
let Inst{31-21} = 0b10011111011;
@@ -13327,8 +13434,8 @@ let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadruh";
let BaseOpcode = "L4_loadruh_abs";
+let CextOpcode = "L2_loadruh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -13340,7 +13447,7 @@ def L4_ploadruhtnew_rr : HInst<
(outs IntRegs:$Rd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
"if ($Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
-tc_25a78932, TypeLD>, Enc_2e1979, AddrModeRel {
+tc_b7c4062a, TypeLD>, Enc_2e1979, AddrModeRel {
let Inst{31-21} = 0b00110010011;
let isPredicated = 1;
let hasNewValue = 1;
@@ -13349,15 +13456,15 @@ let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayLoad = 1;
+let BaseOpcode = "L4_loadruh_rr";
let CextOpcode = "L2_loadruh";
let InputType = "reg";
-let BaseOpcode = "L4_loadruh_rr";
}
def L4_return : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = dealloc_return($Rs32):raw",
-tc_675e4897, TypeLD>, Enc_3a3d62, PredNewRel {
+tc_40d64c94, TypeLD>, Enc_3a3d62, PredNewRel {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10010110000;
let isTerminator = 1;
@@ -13378,7 +13485,7 @@ def L4_return_f : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if (!$Pv4) $Rdd32 = dealloc_return($Rs32):raw",
-tc_2b8da4c2, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_df5d53f9, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1100;
let Inst{31-21} = 0b10010110000;
@@ -13400,7 +13507,7 @@ def L4_return_fnew_pnt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if (!$Pv4.new) $Rdd32 = dealloc_return($Rs32):nt:raw",
-tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_14ab4f41, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1010;
let Inst{31-21} = 0b10010110000;
@@ -13423,7 +13530,7 @@ def L4_return_fnew_pt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if (!$Pv4.new) $Rdd32 = dealloc_return($Rs32):t:raw",
-tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_14ab4f41, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b1110;
let Inst{31-21} = 0b10010110000;
@@ -13446,7 +13553,7 @@ def L4_return_map_to_raw_f : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4) dealloc_return",
-tc_2b8da4c2, TypeMAPPING>, Requires<[HasV65]> {
+tc_df5d53f9, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13454,7 +13561,7 @@ def L4_return_map_to_raw_fnew_pnt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4.new) dealloc_return:nt",
-tc_9da59d12, TypeMAPPING>, Requires<[HasV65]> {
+tc_14ab4f41, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13462,7 +13569,7 @@ def L4_return_map_to_raw_fnew_pt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4.new) dealloc_return:t",
-tc_9da59d12, TypeMAPPING>, Requires<[HasV65]> {
+tc_14ab4f41, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13470,7 +13577,7 @@ def L4_return_map_to_raw_t : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4) dealloc_return",
-tc_4d5fa3a1, TypeMAPPING>, Requires<[HasV65]> {
+tc_f38f92e1, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13478,7 +13585,7 @@ def L4_return_map_to_raw_tnew_pnt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4.new) dealloc_return:nt",
-tc_e06f432a, TypeMAPPING>, Requires<[HasV65]> {
+tc_1981450d, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13486,7 +13593,7 @@ def L4_return_map_to_raw_tnew_pt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4.new) dealloc_return:t",
-tc_e06f432a, TypeMAPPING>, Requires<[HasV65]> {
+tc_1981450d, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13494,7 +13601,7 @@ def L4_return_t : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if ($Pv4) $Rdd32 = dealloc_return($Rs32):raw",
-tc_2b8da4c2, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_df5d53f9, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0100;
let Inst{31-21} = 0b10010110000;
@@ -13515,7 +13622,7 @@ def L4_return_tnew_pnt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if ($Pv4.new) $Rdd32 = dealloc_return($Rs32):nt:raw",
-tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_14ab4f41, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0010;
let Inst{31-21} = 0b10010110000;
@@ -13537,7 +13644,7 @@ def L4_return_tnew_pt : HInst<
(outs DoubleRegs:$Rdd32),
(ins PredRegs:$Pv4, IntRegs:$Rs32),
"if ($Pv4.new) $Rdd32 = dealloc_return($Rs32):t:raw",
-tc_9da59d12, TypeLD>, Enc_b7fad3, PredNewRel {
+tc_14ab4f41, TypeLD>, Enc_b7fad3, PredNewRel {
let Inst{7-5} = 0b000;
let Inst{13-10} = 0b0110;
let Inst{31-21} = 0b10010110000;
@@ -13559,7 +13666,7 @@ def L4_sub_memopb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) -= $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_d44e31 {
+tc_9bcfb2ee, TypeV4LDST>, Enc_d44e31 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110000;
@@ -13578,7 +13685,7 @@ def L4_sub_memopb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) -= $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13586,7 +13693,7 @@ def L4_sub_memoph_io : HInst<
(outs),
(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) -= $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_163a3c {
+tc_9bcfb2ee, TypeV4LDST>, Enc_163a3c {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110001;
@@ -13605,7 +13712,7 @@ def L4_sub_memoph_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) -= $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13613,7 +13720,7 @@ def L4_sub_memopw_io : HInst<
(outs),
(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) -= $Rt32",
-tc_7186d325, TypeV4LDST>, Enc_226535 {
+tc_9bcfb2ee, TypeV4LDST>, Enc_226535 {
let Inst{6-5} = 0b01;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00111110010;
@@ -13632,7 +13739,7 @@ def L4_sub_memopw_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) -= $Rt32",
-tc_7186d325, TypeMAPPING> {
+tc_9bcfb2ee, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13640,7 +13747,7 @@ def L6_deallocframe_map_to_raw : HInst<
(outs),
(ins),
"deallocframe",
-tc_15aa71c5, TypeMAPPING>, Requires<[HasV65]> {
+tc_e9170fb7, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13648,7 +13755,7 @@ def L6_memcpy : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32, ModRegs:$Mu2),
"memcpy($Rs32,$Rt32,$Mu2)",
-tc_a6b1eca9, TypeLD>, Enc_a75aa6, Requires<[HasV66]> {
+tc_5944960d, TypeLD>, Enc_a75aa6, Requires<[HasV66]> {
let Inst{7-0} = 0b01000000;
let Inst{31-21} = 0b10010010000;
let mayLoad = 1;
@@ -13659,7 +13766,7 @@ def L6_return_map_to_raw : HInst<
(outs),
(ins),
"dealloc_return",
-tc_675e4897, TypeMAPPING>, Requires<[HasV65]> {
+tc_40d64c94, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13667,7 +13774,7 @@ def M2_acci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += add($Rs32,$Rt32)",
-tc_f675fee8, TypeM>, Enc_2ae154, ImmRegRel {
+tc_2c13e7f5, TypeM>, Enc_2ae154, ImmRegRel {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -13682,7 +13789,7 @@ def M2_accii : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 += add($Rs32,#$Ii)",
-tc_f675fee8, TypeM>, Enc_c90aca, ImmRegRel {
+tc_2c13e7f5, TypeM>, Enc_c90aca, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100010000;
let hasNewValue = 1;
@@ -13701,7 +13808,7 @@ def M2_cmaci_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpyi($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13712,7 +13819,7 @@ def M2_cmacr_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpyr($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13723,7 +13830,7 @@ def M2_cmacs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32):sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13735,7 +13842,7 @@ def M2_cmacs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -13747,7 +13854,7 @@ def M2_cmacsc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32*):sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111010;
@@ -13759,7 +13866,7 @@ def M2_cmacsc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += cmpy($Rs32,$Rt32*):<<1:sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111110;
@@ -13771,7 +13878,7 @@ def M2_cmpyi_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpyi($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13781,7 +13888,7 @@ def M2_cmpyr_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpyr($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13791,7 +13898,7 @@ def M2_cmpyrs_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32):rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101001;
@@ -13804,7 +13911,7 @@ def M2_cmpyrs_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -13817,7 +13924,7 @@ def M2_cmpyrsc_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32*):rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101011;
@@ -13830,7 +13937,7 @@ def M2_cmpyrsc_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = cmpy($Rs32,$Rt32*):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101111;
@@ -13843,7 +13950,7 @@ def M2_cmpys_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32):sat",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13854,7 +13961,7 @@ def M2_cmpys_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -13865,7 +13972,7 @@ def M2_cmpysc_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32*):sat",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -13876,7 +13983,7 @@ def M2_cmpysc_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = cmpy($Rs32,$Rt32*):<<1:sat",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101110;
@@ -13887,7 +13994,7 @@ def M2_cnacs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32):sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13899,7 +14006,7 @@ def M2_cnacs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -13911,7 +14018,7 @@ def M2_cnacsc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32*):sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111010;
@@ -13923,7 +14030,7 @@ def M2_cnacsc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= cmpy($Rs32,$Rt32*):<<1:sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111110;
@@ -13935,7 +14042,7 @@ def M2_dpmpyss_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -13946,7 +14053,7 @@ def M2_dpmpyss_nac_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111001;
@@ -13957,7 +14064,7 @@ def M2_dpmpyss_rnd_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32):rnd",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101001;
@@ -13969,7 +14076,7 @@ def M2_dpmpyss_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -13979,7 +14086,7 @@ def M2_dpmpyuu_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111010;
@@ -13990,7 +14097,7 @@ def M2_dpmpyuu_nac_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111011;
@@ -14001,7 +14108,7 @@ def M2_dpmpyuu_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -14011,7 +14118,7 @@ def M2_hmmpyh_rs1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.h):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -14024,7 +14131,7 @@ def M2_hmmpyh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.h):<<1:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -14037,7 +14144,7 @@ def M2_hmmpyl_rs1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.l):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101111;
@@ -14050,7 +14157,7 @@ def M2_hmmpyl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32.l):<<1:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -14063,7 +14170,7 @@ def M2_maci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyi($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_2ae154, ImmRegRel {
+tc_7f8ae742, TypeM>, Enc_2ae154, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -14078,7 +14185,7 @@ def M2_macsin : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rx32 -= mpyi($Rs32,#$Ii)",
-tc_05d3a09b, TypeM>, Enc_c90aca {
+tc_a154b476, TypeM>, Enc_c90aca {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100001100;
let hasNewValue = 1;
@@ -14096,7 +14203,7 @@ def M2_macsip : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rx32 += mpyi($Rs32,#$Ii)",
-tc_05d3a09b, TypeM>, Enc_c90aca, ImmRegRel {
+tc_a154b476, TypeM>, Enc_c90aca, ImmRegRel {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100001000;
let hasNewValue = 1;
@@ -14115,7 +14222,7 @@ def M2_mmachs_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):rnd:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -14127,7 +14234,7 @@ def M2_mmachs_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -14139,7 +14246,7 @@ def M2_mmachs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -14151,7 +14258,7 @@ def M2_mmachs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -14163,7 +14270,7 @@ def M2_mmacls_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):rnd:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -14175,7 +14282,7 @@ def M2_mmacls_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -14187,7 +14294,7 @@ def M2_mmacls_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -14199,7 +14306,7 @@ def M2_mmacls_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -14211,7 +14318,7 @@ def M2_mmacuhs_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):rnd:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -14223,7 +14330,7 @@ def M2_mmacuhs_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -14235,7 +14342,7 @@ def M2_mmacuhs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -14247,7 +14354,7 @@ def M2_mmacuhs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010110;
@@ -14259,7 +14366,7 @@ def M2_mmaculs_rs0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):rnd:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -14271,7 +14378,7 @@ def M2_mmaculs_rs1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -14283,7 +14390,7 @@ def M2_mmaculs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -14295,7 +14402,7 @@ def M2_mmaculs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010110;
@@ -14307,7 +14414,7 @@ def M2_mmpyh_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):rnd:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -14318,7 +14425,7 @@ def M2_mmpyh_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -14329,7 +14436,7 @@ def M2_mmpyh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -14340,7 +14447,7 @@ def M2_mmpyh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -14351,7 +14458,7 @@ def M2_mmpyl_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):rnd:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -14362,7 +14469,7 @@ def M2_mmpyl_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -14373,7 +14480,7 @@ def M2_mmpyl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -14384,7 +14491,7 @@ def M2_mmpyl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -14395,7 +14502,7 @@ def M2_mmpyuh_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):rnd:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -14406,7 +14513,7 @@ def M2_mmpyuh_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -14417,7 +14524,7 @@ def M2_mmpyuh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -14428,7 +14535,7 @@ def M2_mmpyuh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -14439,7 +14546,7 @@ def M2_mmpyul_rs0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):rnd:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -14450,7 +14557,7 @@ def M2_mmpyul_rs1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -14461,7 +14568,7 @@ def M2_mmpyul_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -14472,7 +14579,7 @@ def M2_mmpyul_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -14483,7 +14590,7 @@ def M2_mnaci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyi($Rs32,$Rt32)",
-tc_bdceeac1, TypeM>, Enc_2ae154, Requires<[HasV66]> {
+tc_01e1be3b, TypeM>, Enc_2ae154, Requires<[HasV66]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111100;
@@ -14496,7 +14603,7 @@ def M2_mpy_acc_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14509,7 +14616,7 @@ def M2_mpy_acc_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14522,7 +14629,7 @@ def M2_mpy_acc_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14535,7 +14642,7 @@ def M2_mpy_acc_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14548,7 +14655,7 @@ def M2_mpy_acc_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14561,7 +14668,7 @@ def M2_mpy_acc_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14574,7 +14681,7 @@ def M2_mpy_acc_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14587,7 +14694,7 @@ def M2_mpy_acc_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14600,7 +14707,7 @@ def M2_mpy_acc_sat_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h):sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14614,7 +14721,7 @@ def M2_mpy_acc_sat_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14628,7 +14735,7 @@ def M2_mpy_acc_sat_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l):sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14642,7 +14749,7 @@ def M2_mpy_acc_sat_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14656,7 +14763,7 @@ def M2_mpy_acc_sat_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h):sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14670,7 +14777,7 @@ def M2_mpy_acc_sat_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14684,7 +14791,7 @@ def M2_mpy_acc_sat_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l):sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110000;
@@ -14698,7 +14805,7 @@ def M2_mpy_acc_sat_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110100;
@@ -14712,7 +14819,7 @@ def M2_mpy_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h)",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14724,7 +14831,7 @@ def M2_mpy_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14736,7 +14843,7 @@ def M2_mpy_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l)",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14748,7 +14855,7 @@ def M2_mpy_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14760,7 +14867,7 @@ def M2_mpy_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h)",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14772,7 +14879,7 @@ def M2_mpy_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14784,7 +14891,7 @@ def M2_mpy_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l)",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -14796,7 +14903,7 @@ def M2_mpy_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -14808,7 +14915,7 @@ def M2_mpy_nac_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14821,7 +14928,7 @@ def M2_mpy_nac_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14834,7 +14941,7 @@ def M2_mpy_nac_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14847,7 +14954,7 @@ def M2_mpy_nac_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14860,7 +14967,7 @@ def M2_mpy_nac_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14873,7 +14980,7 @@ def M2_mpy_nac_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14886,7 +14993,7 @@ def M2_mpy_nac_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14899,7 +15006,7 @@ def M2_mpy_nac_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14912,7 +15019,7 @@ def M2_mpy_nac_sat_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h):sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14926,7 +15033,7 @@ def M2_mpy_nac_sat_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14940,7 +15047,7 @@ def M2_mpy_nac_sat_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l):sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14954,7 +15061,7 @@ def M2_mpy_nac_sat_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14968,7 +15075,7 @@ def M2_mpy_nac_sat_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h):sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -14982,7 +15089,7 @@ def M2_mpy_nac_sat_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -14996,7 +15103,7 @@ def M2_mpy_nac_sat_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l):sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110001;
@@ -15010,7 +15117,7 @@ def M2_mpy_nac_sat_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110101;
@@ -15024,7 +15131,7 @@ def M2_mpy_rnd_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):rnd",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15036,7 +15143,7 @@ def M2_mpy_rnd_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15048,7 +15155,7 @@ def M2_mpy_rnd_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):rnd",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15060,7 +15167,7 @@ def M2_mpy_rnd_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15072,7 +15179,7 @@ def M2_mpy_rnd_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):rnd",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15084,7 +15191,7 @@ def M2_mpy_rnd_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15096,7 +15203,7 @@ def M2_mpy_rnd_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):rnd",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15108,7 +15215,7 @@ def M2_mpy_rnd_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15120,7 +15227,7 @@ def M2_mpy_sat_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15133,7 +15240,7 @@ def M2_mpy_sat_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15146,7 +15253,7 @@ def M2_mpy_sat_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15159,7 +15266,7 @@ def M2_mpy_sat_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15172,7 +15279,7 @@ def M2_mpy_sat_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15185,7 +15292,7 @@ def M2_mpy_sat_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15198,7 +15305,7 @@ def M2_mpy_sat_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100000;
@@ -15211,7 +15318,7 @@ def M2_mpy_sat_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100100;
@@ -15224,7 +15331,7 @@ def M2_mpy_sat_rnd_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15237,7 +15344,7 @@ def M2_mpy_sat_rnd_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15250,7 +15357,7 @@ def M2_mpy_sat_rnd_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15263,7 +15370,7 @@ def M2_mpy_sat_rnd_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15276,7 +15383,7 @@ def M2_mpy_sat_rnd_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15289,7 +15396,7 @@ def M2_mpy_sat_rnd_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15302,7 +15409,7 @@ def M2_mpy_sat_rnd_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100001;
@@ -15315,7 +15422,7 @@ def M2_mpy_sat_rnd_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100101;
@@ -15328,7 +15435,7 @@ def M2_mpy_up : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101000;
@@ -15340,7 +15447,7 @@ def M2_mpy_up_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32):<<1",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -15352,7 +15459,7 @@ def M2_mpy_up_s1_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpy($Rs32,$Rt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101111;
@@ -15365,7 +15472,7 @@ def M2_mpyd_acc_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15376,7 +15483,7 @@ def M2_mpyd_acc_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15387,7 +15494,7 @@ def M2_mpyd_acc_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15398,7 +15505,7 @@ def M2_mpyd_acc_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.h,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15409,7 +15516,7 @@ def M2_mpyd_acc_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15420,7 +15527,7 @@ def M2_mpyd_acc_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15431,7 +15538,7 @@ def M2_mpyd_acc_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110000;
@@ -15442,7 +15549,7 @@ def M2_mpyd_acc_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpy($Rs32.l,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110100;
@@ -15453,7 +15560,7 @@ def M2_mpyd_hh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15463,7 +15570,7 @@ def M2_mpyd_hh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15473,7 +15580,7 @@ def M2_mpyd_hl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15483,7 +15590,7 @@ def M2_mpyd_hl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15493,7 +15600,7 @@ def M2_mpyd_lh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15503,7 +15610,7 @@ def M2_mpyd_lh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15513,7 +15620,7 @@ def M2_mpyd_ll_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100000;
@@ -15523,7 +15630,7 @@ def M2_mpyd_ll_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100100;
@@ -15533,7 +15640,7 @@ def M2_mpyd_nac_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15544,7 +15651,7 @@ def M2_mpyd_nac_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15555,7 +15662,7 @@ def M2_mpyd_nac_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15566,7 +15673,7 @@ def M2_mpyd_nac_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.h,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15577,7 +15684,7 @@ def M2_mpyd_nac_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15588,7 +15695,7 @@ def M2_mpyd_nac_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15599,7 +15706,7 @@ def M2_mpyd_nac_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110001;
@@ -15610,7 +15717,7 @@ def M2_mpyd_nac_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpy($Rs32.l,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110101;
@@ -15621,7 +15728,7 @@ def M2_mpyd_rnd_hh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h):rnd",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15631,7 +15738,7 @@ def M2_mpyd_rnd_hh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15641,7 +15748,7 @@ def M2_mpyd_rnd_hl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l):rnd",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15651,7 +15758,7 @@ def M2_mpyd_rnd_hl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15661,7 +15768,7 @@ def M2_mpyd_rnd_lh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h):rnd",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15671,7 +15778,7 @@ def M2_mpyd_rnd_lh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15681,7 +15788,7 @@ def M2_mpyd_rnd_ll_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l):rnd",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100001;
@@ -15691,7 +15798,7 @@ def M2_mpyd_rnd_ll_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100101;
@@ -15701,7 +15808,7 @@ def M2_mpyi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyi($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_5ab2be, ImmRegRel {
+tc_c21d7447, TypeM>, Enc_5ab2be, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101000;
@@ -15715,7 +15822,7 @@ def M2_mpysin : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u8_0Imm:$Ii),
"$Rd32 = -mpyi($Rs32,#$Ii)",
-tc_c8ce0b5c, TypeM>, Enc_b8c967 {
+tc_38382228, TypeM>, Enc_b8c967 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100000100;
let hasNewValue = 1;
@@ -15726,7 +15833,7 @@ def M2_mpysip : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rd32 = +mpyi($Rs32,#$Ii)",
-tc_c8ce0b5c, TypeM>, Enc_b8c967 {
+tc_38382228, TypeM>, Enc_b8c967 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100000000;
let hasNewValue = 1;
@@ -15742,7 +15849,7 @@ def M2_mpysmi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, m32_0Imm:$Ii),
"$Rd32 = mpyi($Rs32,#$Ii)",
-tc_c8ce0b5c, TypeM>, ImmRegRel {
+tc_38382228, TypeM>, ImmRegRel {
let hasNewValue = 1;
let opNewValue = 0;
let CextOpcode = "M2_mpyi";
@@ -15758,7 +15865,7 @@ def M2_mpysu_up : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpysu($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101011;
@@ -15770,7 +15877,7 @@ def M2_mpyu_acc_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15783,7 +15890,7 @@ def M2_mpyu_acc_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15796,7 +15903,7 @@ def M2_mpyu_acc_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15809,7 +15916,7 @@ def M2_mpyu_acc_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.h,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15822,7 +15929,7 @@ def M2_mpyu_acc_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15835,7 +15942,7 @@ def M2_mpyu_acc_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15848,7 +15955,7 @@ def M2_mpyu_acc_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110010;
@@ -15861,7 +15968,7 @@ def M2_mpyu_acc_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpyu($Rs32.l,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110110;
@@ -15874,7 +15981,7 @@ def M2_mpyu_hh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.h)",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15886,7 +15993,7 @@ def M2_mpyu_hh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.h):<<1",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15898,7 +16005,7 @@ def M2_mpyu_hl_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.l)",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15910,7 +16017,7 @@ def M2_mpyu_hl_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.h,$Rt32.l):<<1",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15922,7 +16029,7 @@ def M2_mpyu_lh_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.h)",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15934,7 +16041,7 @@ def M2_mpyu_lh_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.h):<<1",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15946,7 +16053,7 @@ def M2_mpyu_ll_s0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.l)",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100010;
@@ -15958,7 +16065,7 @@ def M2_mpyu_ll_s1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32.l,$Rt32.l):<<1",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101100110;
@@ -15970,7 +16077,7 @@ def M2_mpyu_nac_hh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -15983,7 +16090,7 @@ def M2_mpyu_nac_hh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -15996,7 +16103,7 @@ def M2_mpyu_nac_hl_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -16009,7 +16116,7 @@ def M2_mpyu_nac_hl_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -16022,7 +16129,7 @@ def M2_mpyu_nac_lh_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -16035,7 +16142,7 @@ def M2_mpyu_nac_lh_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -16048,7 +16155,7 @@ def M2_mpyu_nac_ll_s0 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110011;
@@ -16061,7 +16168,7 @@ def M2_mpyu_nac_ll_s1 : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101110111;
@@ -16074,7 +16181,7 @@ def M2_mpyu_up : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyu($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101010;
@@ -16086,7 +16193,7 @@ def M2_mpyud_acc_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16097,7 +16204,7 @@ def M2_mpyud_acc_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16108,7 +16215,7 @@ def M2_mpyud_acc_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16119,7 +16226,7 @@ def M2_mpyud_acc_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.h,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16130,7 +16237,7 @@ def M2_mpyud_acc_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16141,7 +16248,7 @@ def M2_mpyud_acc_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16152,7 +16259,7 @@ def M2_mpyud_acc_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110010;
@@ -16163,7 +16270,7 @@ def M2_mpyud_acc_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += mpyu($Rs32.l,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110110;
@@ -16174,7 +16281,7 @@ def M2_mpyud_hh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.h)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16184,7 +16291,7 @@ def M2_mpyud_hh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.h):<<1",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16194,7 +16301,7 @@ def M2_mpyud_hl_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.l)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16204,7 +16311,7 @@ def M2_mpyud_hl_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.h,$Rt32.l):<<1",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16214,7 +16321,7 @@ def M2_mpyud_lh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.h)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16224,7 +16331,7 @@ def M2_mpyud_lh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.h):<<1",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16234,7 +16341,7 @@ def M2_mpyud_ll_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.l)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100010;
@@ -16244,7 +16351,7 @@ def M2_mpyud_ll_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = mpyu($Rs32.l,$Rt32.l):<<1",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100100110;
@@ -16254,7 +16361,7 @@ def M2_mpyud_nac_hh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16265,7 +16372,7 @@ def M2_mpyud_nac_hh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16276,7 +16383,7 @@ def M2_mpyud_nac_hl_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16287,7 +16394,7 @@ def M2_mpyud_nac_hl_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16298,7 +16405,7 @@ def M2_mpyud_nac_lh_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.h)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16309,7 +16416,7 @@ def M2_mpyud_nac_lh_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16320,7 +16427,7 @@ def M2_mpyud_nac_ll_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.l)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110011;
@@ -16331,7 +16438,7 @@ def M2_mpyud_nac_ll_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100110111;
@@ -16342,7 +16449,7 @@ def M2_mpyui : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = mpyui($Rs32,$Rt32)",
-tc_bafaade3, TypeM> {
+tc_c21d7447, TypeM> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -16352,7 +16459,7 @@ def M2_nacci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= add($Rs32,$Rt32)",
-tc_f675fee8, TypeM>, Enc_2ae154 {
+tc_2c13e7f5, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111100;
@@ -16366,7 +16473,7 @@ def M2_naccii : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 -= add($Rs32,#$Ii)",
-tc_f675fee8, TypeM>, Enc_c90aca {
+tc_2c13e7f5, TypeM>, Enc_c90aca {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100010100;
let hasNewValue = 1;
@@ -16384,7 +16491,7 @@ def M2_subacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rt32, IntRegs:$Rs32),
"$Rx32 += sub($Rt32,$Rs32)",
-tc_f675fee8, TypeM>, Enc_a568d4 {
+tc_2c13e7f5, TypeM>, Enc_a568d4 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -16398,7 +16505,7 @@ def M2_vabsdiffh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffh($Rtt32,$Rss32)",
-tc_002cb246, TypeM>, Enc_ea23e4 {
+tc_0dfac0a7, TypeM>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -16408,7 +16515,7 @@ def M2_vabsdiffw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffw($Rtt32,$Rss32)",
-tc_002cb246, TypeM>, Enc_ea23e4 {
+tc_0dfac0a7, TypeM>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -16418,7 +16525,7 @@ def M2_vcmac_s0_sat_i : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vcmpyi($Rss32,$Rtt32):sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -16430,7 +16537,7 @@ def M2_vcmac_s0_sat_r : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vcmpyr($Rss32,$Rtt32):sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -16442,7 +16549,7 @@ def M2_vcmpy_s0_sat_i : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyi($Rss32,$Rtt32):sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -16453,7 +16560,7 @@ def M2_vcmpy_s0_sat_r : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyr($Rss32,$Rtt32):sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -16464,7 +16571,7 @@ def M2_vcmpy_s1_sat_i : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyi($Rss32,$Rtt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -16475,7 +16582,7 @@ def M2_vcmpy_s1_sat_r : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vcmpyr($Rss32,$Rtt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -16486,7 +16593,7 @@ def M2_vdmacs_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vdmpy($Rss32,$Rtt32):sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16498,7 +16605,7 @@ def M2_vdmacs_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vdmpy($Rss32,$Rtt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -16510,7 +16617,7 @@ def M2_vdmpyrs_s0 : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vdmpy($Rss32,$Rtt32):rnd:sat",
-tc_bafaade3, TypeM>, Enc_d2216a {
+tc_c21d7447, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001000;
@@ -16523,7 +16630,7 @@ def M2_vdmpyrs_s1 : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vdmpy($Rss32,$Rtt32):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_d2216a {
+tc_c21d7447, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001100;
@@ -16536,7 +16643,7 @@ def M2_vdmpys_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vdmpy($Rss32,$Rtt32):sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16547,7 +16654,7 @@ def M2_vdmpys_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vdmpy($Rss32,$Rtt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -16558,7 +16665,7 @@ def M2_vmac2 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyh($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111001;
@@ -16569,7 +16676,7 @@ def M2_vmac2es : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyeh($Rss32,$Rtt32)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -16580,7 +16687,7 @@ def M2_vmac2es_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyeh($Rss32,$Rtt32):sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16592,7 +16699,7 @@ def M2_vmac2es_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vmpyeh($Rss32,$Rtt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -16604,7 +16711,7 @@ def M2_vmac2s_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyh($Rs32,$Rt32):sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111000;
@@ -16616,7 +16723,7 @@ def M2_vmac2s_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyh($Rs32,$Rt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -16628,7 +16735,7 @@ def M2_vmac2su_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyhsu($Rs32,$Rt32):sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111011;
@@ -16640,7 +16747,7 @@ def M2_vmac2su_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpyhsu($Rs32,$Rt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111111;
@@ -16652,7 +16759,7 @@ def M2_vmpy2es_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyeh($Rss32,$Rtt32):sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16663,7 +16770,7 @@ def M2_vmpy2es_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vmpyeh($Rss32,$Rtt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -16674,7 +16781,7 @@ def M2_vmpy2s_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyh($Rs32,$Rt32):sat",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -16685,7 +16792,7 @@ def M2_vmpy2s_s0pack : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vmpyh($Rs32,$Rt32):rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101001;
@@ -16698,7 +16805,7 @@ def M2_vmpy2s_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyh($Rs32,$Rt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -16709,7 +16816,7 @@ def M2_vmpy2s_s1pack : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = vmpyh($Rs32,$Rt32):<<1:rnd:sat",
-tc_bafaade3, TypeM>, Enc_5ab2be {
+tc_c21d7447, TypeM>, Enc_5ab2be {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101101101;
@@ -16722,7 +16829,7 @@ def M2_vmpy2su_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyhsu($Rs32,$Rt32):sat",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101000;
@@ -16733,7 +16840,7 @@ def M2_vmpy2su_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpyhsu($Rs32,$Rt32):<<1:sat",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -16744,7 +16851,7 @@ def M2_vraddh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vraddh($Rss32,$Rtt32)",
-tc_bafaade3, TypeM>, Enc_d2216a {
+tc_c21d7447, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001001;
@@ -16756,7 +16863,7 @@ def M2_vradduh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vradduh($Rss32,$Rtt32)",
-tc_bafaade3, TypeM>, Enc_d2216a {
+tc_c21d7447, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001000;
@@ -16768,7 +16875,7 @@ def M2_vrcmaci_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyi($Rss32,$Rtt32)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16779,7 +16886,7 @@ def M2_vrcmaci_s0c : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyi($Rss32,$Rtt32*)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010010;
@@ -16790,7 +16897,7 @@ def M2_vrcmacr_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyr($Rss32,$Rtt32)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16801,7 +16908,7 @@ def M2_vrcmacr_s0c : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpyr($Rss32,$Rtt32*)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -16812,7 +16919,7 @@ def M2_vrcmpyi_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyi($Rss32,$Rtt32)",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16822,7 +16929,7 @@ def M2_vrcmpyi_s0c : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyi($Rss32,$Rtt32*)",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -16832,7 +16939,7 @@ def M2_vrcmpyr_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyr($Rss32,$Rtt32)",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16842,7 +16949,7 @@ def M2_vrcmpyr_s0c : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpyr($Rss32,$Rtt32*)",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000011;
@@ -16852,7 +16959,7 @@ def M2_vrcmpys_acc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += vrcmpys($Rss32,$Rt32):<<1:sat",
-tc_d773585a, TypeM> {
+tc_7f8ae742, TypeM> {
let isPseudo = 1;
let Constraints = "$Rxx32 = $Rxx32in";
}
@@ -16860,7 +16967,7 @@ def M2_vrcmpys_acc_s1_h : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -16872,7 +16979,7 @@ def M2_vrcmpys_acc_s1_l : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -16884,14 +16991,14 @@ def M2_vrcmpys_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vrcmpys($Rss32,$Rt32):<<1:sat",
-tc_bafaade3, TypeM> {
+tc_c21d7447, TypeM> {
let isPseudo = 1;
}
def M2_vrcmpys_s1_h : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -16902,7 +17009,7 @@ def M2_vrcmpys_s1_l : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -16913,7 +17020,7 @@ def M2_vrcmpys_s1rp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = vrcmpys($Rss32,$Rt32):<<1:rnd:sat",
-tc_bafaade3, TypeM> {
+tc_c21d7447, TypeM> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -16922,7 +17029,7 @@ def M2_vrcmpys_s1rp_h : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:hi",
-tc_bafaade3, TypeM>, Enc_d2216a {
+tc_c21d7447, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001101;
@@ -16935,7 +17042,7 @@ def M2_vrcmpys_s1rp_l : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:lo",
-tc_bafaade3, TypeM>, Enc_d2216a {
+tc_c21d7447, TypeM>, Enc_d2216a {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101001101;
@@ -16948,7 +17055,7 @@ def M2_vrmac_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpyh($Rss32,$Rtt32)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010000;
@@ -16959,7 +17066,7 @@ def M2_vrmpy_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpyh($Rss32,$Rtt32)",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000000;
@@ -16969,7 +17076,7 @@ def M2_xor_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= xor($Rs32,$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111100;
@@ -16983,7 +17090,7 @@ def M4_and_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= and($Rs32,$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -16997,7 +17104,7 @@ def M4_and_andn : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= and($Rs32,~$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111001;
@@ -17011,7 +17118,7 @@ def M4_and_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= or($Rs32,$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -17025,7 +17132,7 @@ def M4_and_xor : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= xor($Rs32,$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -17039,7 +17146,7 @@ def M4_cmpyi_wh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyiwh($Rss32,$Rt32):<<1:rnd:sat",
-tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
+tc_c21d7447, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -17052,7 +17159,7 @@ def M4_cmpyi_whc : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
+tc_c21d7447, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -17065,7 +17172,7 @@ def M4_cmpyr_wh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyrwh($Rss32,$Rt32):<<1:rnd:sat",
-tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
+tc_c21d7447, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -17078,7 +17185,7 @@ def M4_cmpyr_whc : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_bafaade3, TypeS_3op>, Enc_3d5b28 {
+tc_c21d7447, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -17091,7 +17198,7 @@ def M4_mac_up_s1_sat : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += mpy($Rs32,$Rt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111011;
@@ -17106,7 +17213,7 @@ def M4_mpyri_addi : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii, IntRegs:$Rs32, u6_0Imm:$II),
"$Rd32 = add(#$Ii,mpyi($Rs32,#$II))",
-tc_05d3a09b, TypeALU64>, Enc_322e1b, ImmRegRel {
+tc_a154b476, TypeALU64>, Enc_322e1b, Requires<[UseCompound]>, ImmRegRel {
let Inst{31-24} = 0b11011000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17122,7 +17229,7 @@ def M4_mpyri_addr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Ru32, IntRegs:$Rs32, u32_0Imm:$Ii),
"$Rd32 = add($Ru32,mpyi($Rs32,#$Ii))",
-tc_05d3a09b, TypeALU64>, Enc_420cf3, ImmRegRel {
+tc_a154b476, TypeALU64>, Enc_420cf3, Requires<[UseCompound]>, ImmRegRel {
let Inst{31-23} = 0b110111111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17139,7 +17246,7 @@ def M4_mpyri_addr_u2 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Ru32, u6_2Imm:$Ii, IntRegs:$Rs32),
"$Rd32 = add($Ru32,mpyi(#$Ii,$Rs32))",
-tc_1a2fd869, TypeALU64>, Enc_277737 {
+tc_503ce0f3, TypeALU64>, Enc_277737, Requires<[UseCompound]> {
let Inst{31-23} = 0b110111110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17149,7 +17256,7 @@ def M4_mpyrr_addi : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add(#$Ii,mpyi($Rs32,$Rt32))",
-tc_d773585a, TypeALU64>, Enc_a7b8e8, ImmRegRel {
+tc_7f8ae742, TypeALU64>, Enc_a7b8e8, Requires<[UseCompound]>, ImmRegRel {
let Inst{31-23} = 0b110101110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -17166,7 +17273,7 @@ def M4_mpyrr_addr : HInst<
(outs IntRegs:$Ry32),
(ins IntRegs:$Ru32, IntRegs:$Ry32in, IntRegs:$Rs32),
"$Ry32 = add($Ru32,mpyi($Ry32in,$Rs32))",
-tc_d773585a, TypeM>, Enc_7f1a05, ImmRegRel {
+tc_7f8ae742, TypeM>, Enc_7f1a05, Requires<[UseCompound]>, ImmRegRel {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100011000;
@@ -17181,7 +17288,7 @@ def M4_nac_up_s1_sat : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= mpy($Rs32,$Rt32):<<1:sat",
-tc_d773585a, TypeM>, Enc_2ae154 {
+tc_7f8ae742, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111011;
@@ -17196,7 +17303,7 @@ def M4_or_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= and($Rs32,$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111010;
@@ -17210,7 +17317,7 @@ def M4_or_andn : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= and($Rs32,~$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111001;
@@ -17224,7 +17331,7 @@ def M4_or_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= or($Rs32,$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17238,7 +17345,7 @@ def M4_or_xor : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= xor($Rs32,$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17252,7 +17359,7 @@ def M4_pmpyw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = pmpyw($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -17262,7 +17369,7 @@ def M4_pmpyw_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 ^= pmpyw($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111001;
@@ -17273,7 +17380,7 @@ def M4_vpmpyh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vpmpyh($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101110;
@@ -17283,7 +17390,7 @@ def M4_vpmpyh_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 ^= vpmpyh($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111101;
@@ -17294,7 +17401,7 @@ def M4_vrmpyeh_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpyweh($Rss32,$Rtt32)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -17305,7 +17412,7 @@ def M4_vrmpyeh_acc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpyweh($Rss32,$Rtt32):<<1",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -17316,7 +17423,7 @@ def M4_vrmpyeh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpyweh($Rss32,$Rtt32)",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000010;
@@ -17326,7 +17433,7 @@ def M4_vrmpyeh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpyweh($Rss32,$Rtt32):<<1",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -17336,7 +17443,7 @@ def M4_vrmpyoh_acc_s0 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpywoh($Rss32,$Rtt32)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010011;
@@ -17347,7 +17454,7 @@ def M4_vrmpyoh_acc_s1 : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpywoh($Rss32,$Rtt32):<<1",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -17358,7 +17465,7 @@ def M4_vrmpyoh_s0 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpywoh($Rss32,$Rtt32)",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000001;
@@ -17368,7 +17475,7 @@ def M4_vrmpyoh_s1 : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpywoh($Rss32,$Rtt32):<<1",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -17378,7 +17485,7 @@ def M4_xor_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= and($Rs32,$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17392,7 +17499,7 @@ def M4_xor_andn : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= and($Rs32,~$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111001;
@@ -17406,7 +17513,7 @@ def M4_xor_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 ^= or($Rs32,$Rt32)",
-tc_f429765c, TypeM>, Enc_2ae154 {
+tc_a4e22bbd, TypeM>, Enc_2ae154 {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111110;
@@ -17420,7 +17527,7 @@ def M4_xor_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 ^= xor($Rss32,$Rtt32)",
-tc_f429765c, TypeS_3op>, Enc_88c16c {
+tc_a4e22bbd, TypeS_3op>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001010100;
@@ -17431,7 +17538,7 @@ def M5_vdmacbsu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -17443,7 +17550,7 @@ def M5_vdmpybsu : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -17454,7 +17561,7 @@ def M5_vmacbsu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpybsu($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111110;
@@ -17465,7 +17572,7 @@ def M5_vmacbuu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rxx32 += vmpybu($Rs32,$Rt32)",
-tc_d773585a, TypeM>, Enc_61f0b0 {
+tc_7f8ae742, TypeM>, Enc_61f0b0 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100111100;
@@ -17476,7 +17583,7 @@ def M5_vmpybsu : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpybsu($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101010;
@@ -17486,7 +17593,7 @@ def M5_vmpybuu : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = vmpybu($Rs32,$Rt32)",
-tc_bafaade3, TypeM>, Enc_be32a5 {
+tc_c21d7447, TypeM>, Enc_be32a5 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11100101100;
@@ -17496,7 +17603,7 @@ def M5_vrmacbsu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpybsu($Rss32,$Rtt32)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010110;
@@ -17507,7 +17614,7 @@ def M5_vrmacbuu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vrmpybu($Rss32,$Rtt32)",
-tc_d773585a, TypeM>, Enc_88c16c {
+tc_7f8ae742, TypeM>, Enc_88c16c {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010100;
@@ -17518,7 +17625,7 @@ def M5_vrmpybsu : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpybsu($Rss32,$Rtt32)",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000110;
@@ -17528,7 +17635,7 @@ def M5_vrmpybuu : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vrmpybu($Rss32,$Rtt32)",
-tc_bafaade3, TypeM>, Enc_a56825 {
+tc_c21d7447, TypeM>, Enc_a56825 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000100;
@@ -17538,7 +17645,7 @@ def M6_vabsdiffb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffb($Rtt32,$Rss32)",
-tc_9461ff31, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
+tc_9b3c0462, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -17548,17 +17655,222 @@ def M6_vabsdiffub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffub($Rtt32,$Rss32)",
-tc_9461ff31, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
+tc_9b3c0462, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
let prefersSlot3 = 1;
}
+def M7_dcmpyiw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = cmpyiw($Rss32,$Rtt32)",
+tc_5a4b5e58, TypeM>, Enc_a56825, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000011;
+let prefersSlot3 = 1;
+}
+def M7_dcmpyiw_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += cmpyiw($Rss32,$Rtt32)",
+tc_197dce51, TypeM>, Enc_88c16c, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M7_dcmpyiwc : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = cmpyiw($Rss32,$Rtt32*)",
+tc_5a4b5e58, TypeM>, Enc_a56825, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000111;
+let prefersSlot3 = 1;
+}
+def M7_dcmpyiwc_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += cmpyiw($Rss32,$Rtt32*)",
+tc_197dce51, TypeM>, Enc_88c16c, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M7_dcmpyrw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = cmpyrw($Rss32,$Rtt32)",
+tc_5a4b5e58, TypeM>, Enc_a56825, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let prefersSlot3 = 1;
+}
+def M7_dcmpyrw_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += cmpyrw($Rss32,$Rtt32)",
+tc_197dce51, TypeM>, Enc_88c16c, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M7_dcmpyrwc : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = cmpyrw($Rss32,$Rtt32*)",
+tc_5a4b5e58, TypeM>, Enc_a56825, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let prefersSlot3 = 1;
+}
+def M7_dcmpyrwc_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += cmpyrw($Rss32,$Rtt32*)",
+tc_197dce51, TypeM>, Enc_88c16c, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M7_vdmpy : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vdmpyw($Rss32,$Rtt32)",
+tc_5a4b5e58, TypeM>, Requires<[HasV67]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def M7_vdmpy_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vdmpyw($Rss32,$Rtt32)",
+tc_197dce51, TypeM>, Requires<[HasV67]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M7_wcmpyiw : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = cmpyiw($Rss32,$Rtt32):<<1:sat",
+tc_5a4b5e58, TypeM>, Enc_d2216a, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M7_wcmpyiw_rnd : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = cmpyiw($Rss32,$Rtt32):<<1:rnd:sat",
+tc_5a4b5e58, TypeM>, Enc_d2216a, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M7_wcmpyiwc : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = cmpyiw($Rss32,$Rtt32*):<<1:sat",
+tc_5a4b5e58, TypeM>, Enc_d2216a, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M7_wcmpyiwc_rnd : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = cmpyiw($Rss32,$Rtt32*):<<1:rnd:sat",
+tc_5a4b5e58, TypeM>, Enc_d2216a, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M7_wcmpyrw : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = cmpyrw($Rss32,$Rtt32):<<1:sat",
+tc_5a4b5e58, TypeM>, Enc_d2216a, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M7_wcmpyrw_rnd : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = cmpyrw($Rss32,$Rtt32):<<1:rnd:sat",
+tc_5a4b5e58, TypeM>, Enc_d2216a, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M7_wcmpyrwc : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = cmpyrw($Rss32,$Rtt32*):<<1:sat",
+tc_5a4b5e58, TypeM>, Enc_d2216a, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M7_wcmpyrwc_rnd : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = cmpyrw($Rss32,$Rtt32*):<<1:rnd:sat",
+tc_5a4b5e58, TypeM>, Enc_d2216a, Requires<[HasV67,UseAudio]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
def PS_loadrbabs : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memb(#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17567,8 +17879,8 @@ let addrMode = Absolute;
let accessSize = ByteAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrb";
let BaseOpcode = "L4_loadrb_abs";
+let CextOpcode = "L2_loadrb";
let isPredicable = 1;
let DecoderNamespace = "MustExtend";
let isExtended = 1;
@@ -17581,15 +17893,15 @@ def PS_loadrdabs : HInst<
(outs DoubleRegs:$Rdd32),
(ins u29_3Imm:$Ii),
"$Rdd32 = memd(#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_509701, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_509701, AddrModeRel {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
let accessSize = DoubleWordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrd";
let BaseOpcode = "L4_loadrd_abs";
+let CextOpcode = "L2_loadrd";
let isPredicable = 1;
let DecoderNamespace = "MustExtend";
let isExtended = 1;
@@ -17602,7 +17914,7 @@ def PS_loadrhabs : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memh(#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17611,8 +17923,8 @@ let addrMode = Absolute;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrh";
let BaseOpcode = "L4_loadrh_abs";
+let CextOpcode = "L2_loadrh";
let isPredicable = 1;
let DecoderNamespace = "MustExtend";
let isExtended = 1;
@@ -17625,7 +17937,7 @@ def PS_loadriabs : HInst<
(outs IntRegs:$Rd32),
(ins u30_2Imm:$Ii),
"$Rd32 = memw(#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_4f4ed7, AddrModeRel {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17634,8 +17946,8 @@ let addrMode = Absolute;
let accessSize = WordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadri";
let BaseOpcode = "L4_loadri_abs";
+let CextOpcode = "L2_loadri";
let isPredicable = 1;
let DecoderNamespace = "MustExtend";
let isExtended = 1;
@@ -17648,7 +17960,7 @@ def PS_loadrubabs : HInst<
(outs IntRegs:$Rd32),
(ins u32_0Imm:$Ii),
"$Rd32 = memub(#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_25bef0, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_25bef0, AddrModeRel {
let Inst{24-21} = 0b1001;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17657,8 +17969,8 @@ let addrMode = Absolute;
let accessSize = ByteAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadrub";
let BaseOpcode = "L4_loadrub_abs";
+let CextOpcode = "L2_loadrub";
let isPredicable = 1;
let DecoderNamespace = "MustExtend";
let isExtended = 1;
@@ -17671,7 +17983,7 @@ def PS_loadruhabs : HInst<
(outs IntRegs:$Rd32),
(ins u31_1Imm:$Ii),
"$Rd32 = memuh(#$Ii)",
-tc_c4db48cb, TypeV2LDST>, Enc_8df4be, AddrModeRel {
+tc_8a6d0d94, TypeV2LDST>, Enc_8df4be, AddrModeRel {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b01001;
let hasNewValue = 1;
@@ -17680,8 +17992,8 @@ let addrMode = Absolute;
let accessSize = HalfWordAccess;
let mayLoad = 1;
let isExtended = 1;
-let CextOpcode = "L2_loadruh";
let BaseOpcode = "L4_loadruh_abs";
+let CextOpcode = "L2_loadruh";
let isPredicable = 1;
let DecoderNamespace = "MustExtend";
let isExtended = 1;
@@ -17694,15 +18006,15 @@ def PS_storerbabs : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Rt32),
"memb(#$Ii) = $Rt32",
-tc_0371abea, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
+tc_0655b949, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
let Inst{24-21} = 0b0000;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
let accessSize = ByteAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerbabs";
+let CextOpcode = "S2_storerb";
let isPredicable = 1;
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
@@ -17716,7 +18028,7 @@ def PS_storerbnewabs : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Nt8),
"memb(#$Ii) = $Nt8.new",
-tc_5bf126a6, TypeV2LDST>, Enc_ad1831, AddrModeRel {
+tc_6e20402a, TypeV2LDST>, Enc_ad1831, AddrModeRel {
let Inst{12-11} = 0b00;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -17727,8 +18039,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerbabs";
+let CextOpcode = "S2_storerb";
let isPredicable = 1;
let DecoderNamespace = "MustExtend";
let isExtended = 1;
@@ -17742,15 +18054,15 @@ def PS_storerdabs : HInst<
(outs),
(ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd(#$Ii) = $Rtt32",
-tc_0371abea, TypeV2LDST>, Enc_5c124a, AddrModeRel {
+tc_0655b949, TypeV2LDST>, Enc_5c124a, AddrModeRel {
let Inst{24-21} = 0b0110;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
let accessSize = DoubleWordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerd";
let BaseOpcode = "S2_storerdabs";
+let CextOpcode = "S2_storerd";
let isPredicable = 1;
let DecoderNamespace = "MustExtend";
let isExtended = 1;
@@ -17763,15 +18075,15 @@ def PS_storerfabs : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(#$Ii) = $Rt32.h",
-tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0655b949, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0011;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
let accessSize = HalfWordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerf";
let BaseOpcode = "S2_storerfabs";
+let CextOpcode = "S2_storerf";
let isPredicable = 1;
let DecoderNamespace = "MustExtend";
let isExtended = 1;
@@ -17784,15 +18096,15 @@ def PS_storerhabs : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(#$Ii) = $Rt32",
-tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0655b949, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0010;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
let accessSize = HalfWordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerhabs";
+let CextOpcode = "S2_storerh";
let isPredicable = 1;
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
@@ -17806,7 +18118,7 @@ def PS_storerhnewabs : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Nt8),
"memh(#$Ii) = $Nt8.new",
-tc_5bf126a6, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
+tc_6e20402a, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
let Inst{12-11} = 0b01;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -17817,8 +18129,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerhabs";
+let CextOpcode = "S2_storerh";
let isPredicable = 1;
let DecoderNamespace = "MustExtend";
let isExtended = 1;
@@ -17832,15 +18144,15 @@ def PS_storeriabs : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Rt32),
"memw(#$Ii) = $Rt32",
-tc_0371abea, TypeV2LDST>, Enc_541f26, AddrModeRel {
+tc_0655b949, TypeV2LDST>, Enc_541f26, AddrModeRel {
let Inst{24-21} = 0b0100;
let Inst{31-27} = 0b01001;
let addrMode = Absolute;
let accessSize = WordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeriabs";
+let CextOpcode = "S2_storeri";
let isPredicable = 1;
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
@@ -17854,7 +18166,7 @@ def PS_storerinewabs : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Nt8),
"memw(#$Ii) = $Nt8.new",
-tc_5bf126a6, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
+tc_6e20402a, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
let Inst{12-11} = 0b10;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -17865,8 +18177,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeriabs";
+let CextOpcode = "S2_storeri";
let isPredicable = 1;
let DecoderNamespace = "MustExtend";
let isExtended = 1;
@@ -17876,11 +18188,22 @@ let opExtentBits = 18;
let opExtentAlign = 2;
let opNewValue = 1;
}
+def PS_trap1 : HInst<
+(outs),
+(ins u8_0Imm:$Ii),
+"trap1(#$Ii)",
+tc_53c851ab, TypeJ>, Enc_a51a9a, Requires<[HasPreV65]> {
+let Inst{1-0} = 0b00;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0101010010000000;
+let isSolo = 1;
+}
def S2_addasl_rrri : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32, u3_0Imm:$Ii),
"$Rd32 = addasl($Rt32,$Rs32,#$Ii)",
-tc_f675fee8, TypeS_3op>, Enc_47ef61 {
+tc_2c13e7f5, TypeS_3op>, Enc_47ef61 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000100000;
let hasNewValue = 1;
@@ -17891,7 +18214,7 @@ def S2_allocframe : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, u11_3Imm:$Ii),
"allocframe($Rx32,#$Ii):raw",
-tc_b44ecf75, TypeST>, Enc_22c845 {
+tc_934753bb, TypeST>, Enc_22c845 {
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b10100000100;
let hasNewValue = 1;
@@ -17907,7 +18230,7 @@ def S2_asl_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asl($Rss32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_5eac98 {
+tc_5da50c4b, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000000000;
}
@@ -17915,7 +18238,7 @@ def S2_asl_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += asl($Rss32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_70fb07 {
+tc_2c13e7f5, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -17925,7 +18248,7 @@ def S2_asl_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= asl($Rss32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_70fb07 {
+tc_a4e22bbd, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -17935,7 +18258,7 @@ def S2_asl_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= asl($Rss32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_70fb07 {
+tc_2c13e7f5, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -17945,7 +18268,7 @@ def S2_asl_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= asl($Rss32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_70fb07 {
+tc_a4e22bbd, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b110;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -17955,7 +18278,7 @@ def S2_asl_i_p_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 ^= asl($Rss32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_70fb07 {
+tc_a4e22bbd, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10000010100;
let prefersSlot3 = 1;
@@ -17965,7 +18288,7 @@ def S2_asl_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asl($Rs32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_a05677 {
+tc_5da50c4b, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -17976,7 +18299,7 @@ def S2_asl_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += asl($Rs32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_28a2dc {
+tc_2c13e7f5, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -17989,7 +18312,7 @@ def S2_asl_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= asl($Rs32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_28a2dc {
+tc_a4e22bbd, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -18002,7 +18325,7 @@ def S2_asl_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= asl($Rs32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_28a2dc {
+tc_2c13e7f5, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -18015,7 +18338,7 @@ def S2_asl_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= asl($Rs32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_28a2dc {
+tc_a4e22bbd, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -18028,7 +18351,7 @@ def S2_asl_i_r_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asl($Rs32,#$Ii):sat",
-tc_779080bf, TypeS_2op>, Enc_a05677 {
+tc_8a825db2, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100010;
@@ -18041,7 +18364,7 @@ def S2_asl_i_r_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 ^= asl($Rs32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_28a2dc {
+tc_a4e22bbd, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110100;
@@ -18054,7 +18377,7 @@ def S2_asl_i_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vaslh($Rss32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_12b6e9 {
+tc_5da50c4b, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b010;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000100;
@@ -18063,7 +18386,7 @@ def S2_asl_i_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rdd32 = vaslw($Rss32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_7e5a82 {
+tc_5da50c4b, TypeS_2op>, Enc_7e5a82 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000000010;
@@ -18072,7 +18395,7 @@ def S2_asl_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = asl($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -18081,7 +18404,7 @@ def S2_asl_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += asl($Rss32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_1aa186 {
+tc_2c13e7f5, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -18092,7 +18415,7 @@ def S2_asl_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= asl($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -18103,7 +18426,7 @@ def S2_asl_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= asl($Rss32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_1aa186 {
+tc_2c13e7f5, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -18114,7 +18437,7 @@ def S2_asl_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= asl($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -18125,7 +18448,7 @@ def S2_asl_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= asl($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -18136,7 +18459,7 @@ def S2_asl_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asl($Rs32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_5ab2be {
+tc_5da50c4b, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -18147,7 +18470,7 @@ def S2_asl_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += asl($Rs32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_2ae154 {
+tc_2c13e7f5, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -18160,7 +18483,7 @@ def S2_asl_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= asl($Rs32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_2ae154 {
+tc_a4e22bbd, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -18173,7 +18496,7 @@ def S2_asl_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= asl($Rs32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_2ae154 {
+tc_2c13e7f5, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -18186,7 +18509,7 @@ def S2_asl_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= asl($Rs32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_2ae154 {
+tc_a4e22bbd, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -18199,7 +18522,7 @@ def S2_asl_r_r_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asl($Rs32,$Rt32):sat",
-tc_779080bf, TypeS_3op>, Enc_5ab2be {
+tc_8a825db2, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110000;
@@ -18212,7 +18535,7 @@ def S2_asl_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vaslh($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -18221,7 +18544,7 @@ def S2_asl_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vaslw($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
@@ -18230,7 +18553,7 @@ def S2_asr_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asr($Rss32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_5eac98 {
+tc_5da50c4b, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10000000000;
}
@@ -18238,7 +18561,7 @@ def S2_asr_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += asr($Rss32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_70fb07 {
+tc_2c13e7f5, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -18248,7 +18571,7 @@ def S2_asr_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= asr($Rss32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_70fb07 {
+tc_a4e22bbd, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -18258,7 +18581,7 @@ def S2_asr_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= asr($Rss32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_70fb07 {
+tc_2c13e7f5, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -18268,7 +18591,7 @@ def S2_asr_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= asr($Rss32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_70fb07 {
+tc_a4e22bbd, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b100;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -18278,7 +18601,7 @@ def S2_asr_i_p_rnd : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asr($Rss32,#$Ii):rnd",
-tc_002cb246, TypeS_2op>, Enc_5eac98 {
+tc_0dfac0a7, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18287,14 +18610,14 @@ def S2_asr_i_p_rnd_goodsyntax : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asrrnd($Rss32,#$Ii)",
-tc_002cb246, TypeS_2op> {
+tc_0dfac0a7, TypeS_2op> {
let isPseudo = 1;
}
def S2_asr_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asr($Rs32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_a05677 {
+tc_5da50c4b, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -18305,7 +18628,7 @@ def S2_asr_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += asr($Rs32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_28a2dc {
+tc_2c13e7f5, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -18318,7 +18641,7 @@ def S2_asr_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= asr($Rs32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_28a2dc {
+tc_a4e22bbd, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -18331,7 +18654,7 @@ def S2_asr_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= asr($Rs32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_28a2dc {
+tc_2c13e7f5, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -18344,7 +18667,7 @@ def S2_asr_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= asr($Rs32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_28a2dc {
+tc_a4e22bbd, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -18357,7 +18680,7 @@ def S2_asr_i_r_rnd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asr($Rs32,#$Ii):rnd",
-tc_002cb246, TypeS_2op>, Enc_a05677 {
+tc_0dfac0a7, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100010;
@@ -18369,7 +18692,7 @@ def S2_asr_i_r_rnd_goodsyntax : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = asrrnd($Rs32,#$Ii)",
-tc_002cb246, TypeS_2op> {
+tc_0dfac0a7, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -18378,7 +18701,7 @@ def S2_asr_i_svw_trun : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rd32 = vasrw($Rss32,#$Ii)",
-tc_4414d8b1, TypeS_2op>, Enc_8dec2e {
+tc_f34c1c21, TypeS_2op>, Enc_8dec2e {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001000110;
@@ -18390,7 +18713,7 @@ def S2_asr_i_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_12b6e9 {
+tc_5da50c4b, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000100;
@@ -18399,7 +18722,7 @@ def S2_asr_i_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rdd32 = vasrw($Rss32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_7e5a82 {
+tc_5da50c4b, TypeS_2op>, Enc_7e5a82 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000000010;
@@ -18408,7 +18731,7 @@ def S2_asr_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = asr($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -18417,7 +18740,7 @@ def S2_asr_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += asr($Rss32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_1aa186 {
+tc_2c13e7f5, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -18428,7 +18751,7 @@ def S2_asr_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= asr($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -18439,7 +18762,7 @@ def S2_asr_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= asr($Rss32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_1aa186 {
+tc_2c13e7f5, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -18450,7 +18773,7 @@ def S2_asr_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= asr($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -18461,7 +18784,7 @@ def S2_asr_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= asr($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -18472,7 +18795,7 @@ def S2_asr_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asr($Rs32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_5ab2be {
+tc_5da50c4b, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -18483,7 +18806,7 @@ def S2_asr_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += asr($Rs32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_2ae154 {
+tc_2c13e7f5, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -18496,7 +18819,7 @@ def S2_asr_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= asr($Rs32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_2ae154 {
+tc_a4e22bbd, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -18509,7 +18832,7 @@ def S2_asr_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= asr($Rs32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_2ae154 {
+tc_2c13e7f5, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -18522,7 +18845,7 @@ def S2_asr_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= asr($Rs32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_2ae154 {
+tc_a4e22bbd, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -18535,7 +18858,7 @@ def S2_asr_r_r_sat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = asr($Rs32,$Rt32):sat",
-tc_779080bf, TypeS_3op>, Enc_5ab2be {
+tc_8a825db2, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110000;
@@ -18548,7 +18871,7 @@ def S2_asr_r_svw_trun : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = vasrw($Rss32,$Rt32)",
-tc_4414d8b1, TypeS_3op>, Enc_3d5b28 {
+tc_f34c1c21, TypeS_3op>, Enc_3d5b28 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -18560,7 +18883,7 @@ def S2_asr_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vasrh($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -18569,7 +18892,7 @@ def S2_asr_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vasrw($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
@@ -18578,7 +18901,7 @@ def S2_brev : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = brev($Rs32)",
-tc_14b5c689, TypeS_2op>, Enc_5e2823 {
+tc_a7bdb22c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -18589,7 +18912,7 @@ def S2_brevp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = brev($Rss32)",
-tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
+tc_a7bdb22c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18598,7 +18921,7 @@ def S2_cabacdecbin : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = decbin($Rss32,$Rtt32)",
-tc_76851da1, TypeS_3op>, Enc_a56825 {
+tc_db596beb, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -18610,7 +18933,7 @@ def S2_cl0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = cl0($Rs32)",
-tc_14b5c689, TypeS_2op>, Enc_5e2823 {
+tc_a7bdb22c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18621,7 +18944,7 @@ def S2_cl0p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = cl0($Rss32)",
-tc_14b5c689, TypeS_2op>, Enc_90cd8b {
+tc_a7bdb22c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -18632,7 +18955,7 @@ def S2_cl1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = cl1($Rs32)",
-tc_14b5c689, TypeS_2op>, Enc_5e2823 {
+tc_a7bdb22c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18643,7 +18966,7 @@ def S2_cl1p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = cl1($Rss32)",
-tc_14b5c689, TypeS_2op>, Enc_90cd8b {
+tc_a7bdb22c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -18654,7 +18977,7 @@ def S2_clb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = clb($Rs32)",
-tc_14b5c689, TypeS_2op>, Enc_5e2823 {
+tc_a7bdb22c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18665,7 +18988,7 @@ def S2_clbnorm : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = normamt($Rs32)",
-tc_14b5c689, TypeS_2op>, Enc_5e2823 {
+tc_a7bdb22c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100000;
let hasNewValue = 1;
@@ -18676,7 +18999,7 @@ def S2_clbp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = clb($Rss32)",
-tc_14b5c689, TypeS_2op>, Enc_90cd8b {
+tc_a7bdb22c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -18687,7 +19010,7 @@ def S2_clrbit_i : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = clrbit($Rs32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_a05677 {
+tc_5da50c4b, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100110;
@@ -18698,7 +19021,7 @@ def S2_clrbit_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = clrbit($Rs32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_5ab2be {
+tc_5da50c4b, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -18709,7 +19032,7 @@ def S2_ct0 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = ct0($Rs32)",
-tc_14b5c689, TypeS_2op>, Enc_5e2823 {
+tc_a7bdb22c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -18720,7 +19043,7 @@ def S2_ct0p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = ct0($Rss32)",
-tc_14b5c689, TypeS_2op>, Enc_90cd8b {
+tc_a7bdb22c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000111;
let hasNewValue = 1;
@@ -18731,7 +19054,7 @@ def S2_ct1 : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = ct1($Rs32)",
-tc_14b5c689, TypeS_2op>, Enc_5e2823 {
+tc_a7bdb22c, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -18742,7 +19065,7 @@ def S2_ct1p : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = ct1($Rss32)",
-tc_14b5c689, TypeS_2op>, Enc_90cd8b {
+tc_a7bdb22c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000111;
let hasNewValue = 1;
@@ -18753,7 +19076,7 @@ def S2_deinterleave : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = deinterleave($Rss32)",
-tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
+tc_a7bdb22c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18762,7 +19085,7 @@ def S2_extractu : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
"$Rd32 = extractu($Rs32,#$Ii,#$II)",
-tc_f675fee8, TypeS_2op>, Enc_b388cf {
+tc_2c13e7f5, TypeS_2op>, Enc_b388cf {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b100011010;
let hasNewValue = 1;
@@ -18773,7 +19096,7 @@ def S2_extractu_rp : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rd32 = extractu($Rs32,$Rtt32)",
-tc_002cb246, TypeS_3op>, Enc_e07374 {
+tc_a08b630b, TypeS_3op>, Enc_e07374 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001001000;
@@ -18785,7 +19108,7 @@ def S2_extractup : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
"$Rdd32 = extractu($Rss32,#$Ii,#$II)",
-tc_f675fee8, TypeS_2op>, Enc_b84c4c {
+tc_2c13e7f5, TypeS_2op>, Enc_b84c4c {
let Inst{31-24} = 0b10000001;
let prefersSlot3 = 1;
}
@@ -18793,7 +19116,7 @@ def S2_extractup_rp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = extractu($Rss32,$Rtt32)",
-tc_002cb246, TypeS_3op>, Enc_a56825 {
+tc_a08b630b, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -18803,7 +19126,7 @@ def S2_insert : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = insert($Rs32,#$Ii,#$II)",
-tc_bfec0f01, TypeS_2op>, Enc_a1e29d {
+tc_bb831a7c, TypeS_2op>, Enc_a1e29d {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b100011110;
let hasNewValue = 1;
@@ -18815,7 +19138,7 @@ def S2_insert_rp : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rx32 = insert($Rs32,$Rtt32)",
-tc_f429765c, TypeS_3op>, Enc_179b35 {
+tc_a4e22bbd, TypeS_3op>, Enc_179b35 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001000000;
@@ -18828,7 +19151,7 @@ def S2_insertp : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
"$Rxx32 = insert($Rss32,#$Ii,#$II)",
-tc_bfec0f01, TypeS_2op>, Enc_143a3c {
+tc_bb831a7c, TypeS_2op>, Enc_143a3c {
let Inst{31-24} = 0b10000011;
let prefersSlot3 = 1;
let Constraints = "$Rxx32 = $Rxx32in";
@@ -18837,7 +19160,7 @@ def S2_insertp_rp : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 = insert($Rss32,$Rtt32)",
-tc_f429765c, TypeS_3op>, Enc_88c16c {
+tc_a4e22bbd, TypeS_3op>, Enc_88c16c {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001010000;
@@ -18848,7 +19171,7 @@ def S2_interleave : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = interleave($Rss32)",
-tc_14b5c689, TypeS_2op>, Enc_b9c5fb {
+tc_a7bdb22c, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18857,7 +19180,7 @@ def S2_lfsp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = lfs($Rss32,$Rtt32)",
-tc_002cb246, TypeS_3op>, Enc_a56825 {
+tc_a08b630b, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -18867,7 +19190,7 @@ def S2_lsl_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = lsl($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -18876,7 +19199,7 @@ def S2_lsl_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += lsl($Rss32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_1aa186 {
+tc_2c13e7f5, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -18887,7 +19210,7 @@ def S2_lsl_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= lsl($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -18898,7 +19221,7 @@ def S2_lsl_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= lsl($Rss32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_1aa186 {
+tc_2c13e7f5, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -18909,7 +19232,7 @@ def S2_lsl_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= lsl($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -18920,7 +19243,7 @@ def S2_lsl_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= lsl($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -18931,7 +19254,7 @@ def S2_lsl_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = lsl($Rs32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_5ab2be {
+tc_5da50c4b, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -18942,7 +19265,7 @@ def S2_lsl_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += lsl($Rs32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_2ae154 {
+tc_2c13e7f5, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -18955,7 +19278,7 @@ def S2_lsl_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= lsl($Rs32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_2ae154 {
+tc_a4e22bbd, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -18968,7 +19291,7 @@ def S2_lsl_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= lsl($Rs32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_2ae154 {
+tc_2c13e7f5, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -18981,7 +19304,7 @@ def S2_lsl_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= lsl($Rs32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_2ae154 {
+tc_a4e22bbd, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -18994,7 +19317,7 @@ def S2_lsl_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlslh($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -19003,7 +19326,7 @@ def S2_lsl_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlslw($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
@@ -19012,7 +19335,7 @@ def S2_lsr_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = lsr($Rss32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_5eac98 {
+tc_5da50c4b, TypeS_2op>, Enc_5eac98 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000000000;
}
@@ -19020,7 +19343,7 @@ def S2_lsr_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += lsr($Rss32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_70fb07 {
+tc_2c13e7f5, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -19030,7 +19353,7 @@ def S2_lsr_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= lsr($Rss32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_70fb07 {
+tc_a4e22bbd, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -19040,7 +19363,7 @@ def S2_lsr_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= lsr($Rss32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_70fb07 {
+tc_2c13e7f5, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -19050,7 +19373,7 @@ def S2_lsr_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= lsr($Rss32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_70fb07 {
+tc_a4e22bbd, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b101;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -19060,7 +19383,7 @@ def S2_lsr_i_p_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 ^= lsr($Rss32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_70fb07 {
+tc_a4e22bbd, TypeS_2op>, Enc_70fb07 {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b10000010100;
let prefersSlot3 = 1;
@@ -19070,7 +19393,7 @@ def S2_lsr_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = lsr($Rs32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_a05677 {
+tc_5da50c4b, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -19081,7 +19404,7 @@ def S2_lsr_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += lsr($Rs32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_28a2dc {
+tc_2c13e7f5, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -19094,7 +19417,7 @@ def S2_lsr_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= lsr($Rs32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_28a2dc {
+tc_a4e22bbd, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -19107,7 +19430,7 @@ def S2_lsr_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= lsr($Rs32,#$Ii)",
-tc_f675fee8, TypeS_2op>, Enc_28a2dc {
+tc_2c13e7f5, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -19120,7 +19443,7 @@ def S2_lsr_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= lsr($Rs32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_28a2dc {
+tc_a4e22bbd, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -19133,7 +19456,7 @@ def S2_lsr_i_r_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 ^= lsr($Rs32,#$Ii)",
-tc_f429765c, TypeS_2op>, Enc_28a2dc {
+tc_a4e22bbd, TypeS_2op>, Enc_28a2dc {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110100;
@@ -19146,7 +19469,7 @@ def S2_lsr_i_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vlsrh($Rss32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_12b6e9 {
+tc_5da50c4b, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b001;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000100;
@@ -19155,7 +19478,7 @@ def S2_lsr_i_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Rdd32 = vlsrw($Rss32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_7e5a82 {
+tc_5da50c4b, TypeS_2op>, Enc_7e5a82 {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000000010;
@@ -19164,7 +19487,7 @@ def S2_lsr_r_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = lsr($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011100;
@@ -19173,7 +19496,7 @@ def S2_lsr_r_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += lsr($Rss32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_1aa186 {
+tc_2c13e7f5, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011110;
@@ -19184,7 +19507,7 @@ def S2_lsr_r_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 &= lsr($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011010;
@@ -19195,7 +19518,7 @@ def S2_lsr_r_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 -= lsr($Rss32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_1aa186 {
+tc_2c13e7f5, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011100;
@@ -19206,7 +19529,7 @@ def S2_lsr_r_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 |= lsr($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011000;
@@ -19217,7 +19540,7 @@ def S2_lsr_r_p_xor : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 ^= lsr($Rss32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_1aa186 {
+tc_a4e22bbd, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001011011;
@@ -19228,7 +19551,7 @@ def S2_lsr_r_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = lsr($Rs32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_5ab2be {
+tc_5da50c4b, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110010;
@@ -19239,7 +19562,7 @@ def S2_lsr_r_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += lsr($Rs32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_2ae154 {
+tc_2c13e7f5, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100110;
@@ -19252,7 +19575,7 @@ def S2_lsr_r_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 &= lsr($Rs32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_2ae154 {
+tc_a4e22bbd, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100010;
@@ -19265,7 +19588,7 @@ def S2_lsr_r_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= lsr($Rs32,$Rt32)",
-tc_f675fee8, TypeS_3op>, Enc_2ae154 {
+tc_2c13e7f5, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100100;
@@ -19278,7 +19601,7 @@ def S2_lsr_r_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 |= lsr($Rs32,$Rt32)",
-tc_f429765c, TypeS_3op>, Enc_2ae154 {
+tc_a4e22bbd, TypeS_3op>, Enc_2ae154 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001100000;
@@ -19291,7 +19614,7 @@ def S2_lsr_r_vh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlsrh($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011010;
@@ -19300,7 +19623,7 @@ def S2_lsr_r_vw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vlsrw($Rss32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_927852 {
+tc_5da50c4b, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011000;
@@ -19309,7 +19632,7 @@ def S2_mask : HInst<
(outs IntRegs:$Rd32),
(ins u5_0Imm:$Ii, u5_0Imm:$II),
"$Rd32 = mask(#$Ii,#$II)",
-tc_9461ff31, TypeS_2op>, Enc_c85e2a, Requires<[HasV66]> {
+tc_1fcb8495, TypeS_2op>, Enc_c85e2a, Requires<[HasV66]> {
let Inst{13-13} = 0b1;
let Inst{20-16} = 0b00000;
let Inst{31-23} = 0b100011010;
@@ -19321,7 +19644,7 @@ def S2_packhl : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = packhl($Rs32,$Rt32)",
-tc_5a2711e5, TypeALU32_3op>, Enc_be32a5 {
+tc_713b66bf, TypeALU32_3op>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11110101100;
@@ -19331,7 +19654,7 @@ def S2_parityp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = parity($Rss32,$Rtt32)",
-tc_002cb246, TypeALU64>, Enc_d2216a {
+tc_a08b630b, TypeALU64>, Enc_d2216a {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010000000;
@@ -19343,7 +19666,7 @@ def S2_pstorerbf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb($Rs32+#$Ii) = $Rt32",
-tc_f8e23f0b, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_8035e91f, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100000;
let isPredicated = 1;
@@ -19351,9 +19674,9 @@ let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S2_storerb_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -19365,7 +19688,7 @@ def S2_pstorerbf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb($Rx32++#$Ii) = $Rt32",
-tc_24b66c99, TypeST>, Enc_cc449f, AddrModeRel {
+tc_9edefe01, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19383,7 +19706,7 @@ def S2_pstorerbf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memb($Rs32) = $Rt32",
-tc_f8e23f0b, TypeMAPPING> {
+tc_8035e91f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19391,7 +19714,7 @@ def S2_pstorerbfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rx32++#$Ii) = $Rt32",
-tc_53559e35, TypeST>, Enc_cc449f, AddrModeRel {
+tc_449acf79, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19410,7 +19733,7 @@ def S2_pstorerbnewf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb($Rs32+#$Ii) = $Nt8.new",
-tc_8fb7ab1b, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_011e0e9d, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000100101;
@@ -19422,9 +19745,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S2_storerb_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -19436,7 +19759,7 @@ def S2_pstorerbnewf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb($Rx32++#$Ii) = $Nt8.new",
-tc_838b34ea, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_ce59038e, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b100;
@@ -19449,8 +19772,8 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerb_pi";
+let CextOpcode = "S2_storerb";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -19458,7 +19781,7 @@ def S2_pstorerbnewf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4) memb($Rs32) = $Nt8.new",
-tc_8fb7ab1b, TypeMAPPING> {
+tc_011e0e9d, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -19467,7 +19790,7 @@ def S2_pstorerbnewfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d65dbf51, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_f529831b, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -19481,8 +19804,8 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerb_pi";
+let CextOpcode = "S2_storerb";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -19490,7 +19813,7 @@ def S2_pstorerbnewt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb($Rs32+#$Ii) = $Nt8.new",
-tc_8fb7ab1b, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_011e0e9d, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000000101;
@@ -19501,9 +19824,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S2_storerb_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -19515,7 +19838,7 @@ def S2_pstorerbnewt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb($Rx32++#$Ii) = $Nt8.new",
-tc_838b34ea, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_ce59038e, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b100;
@@ -19527,8 +19850,8 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerb_pi";
+let CextOpcode = "S2_storerb";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -19536,7 +19859,7 @@ def S2_pstorerbnewt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4) memb($Rs32) = $Nt8.new",
-tc_8fb7ab1b, TypeMAPPING> {
+tc_011e0e9d, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -19545,7 +19868,7 @@ def S2_pstorerbnewtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
-tc_d65dbf51, TypeST>, Enc_52a5dd, AddrModeRel {
+tc_f529831b, TypeST>, Enc_52a5dd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -19558,8 +19881,8 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerb_pi";
+let CextOpcode = "S2_storerb";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -19567,16 +19890,16 @@ def S2_pstorerbt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb($Rs32+#$Ii) = $Rt32",
-tc_f8e23f0b, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_8035e91f, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000000;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S2_storerb_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -19588,7 +19911,7 @@ def S2_pstorerbt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb($Rx32++#$Ii) = $Rt32",
-tc_24b66c99, TypeST>, Enc_cc449f, AddrModeRel {
+tc_9edefe01, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19605,7 +19928,7 @@ def S2_pstorerbt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memb($Rs32) = $Rt32",
-tc_f8e23f0b, TypeMAPPING> {
+tc_8035e91f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19613,7 +19936,7 @@ def S2_pstorerbtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rx32++#$Ii) = $Rt32",
-tc_53559e35, TypeST>, Enc_cc449f, AddrModeRel {
+tc_449acf79, TypeST>, Enc_cc449f, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19631,7 +19954,7 @@ def S2_pstorerdf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rs32+#$Ii) = $Rtt32",
-tc_f8e23f0b, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_8035e91f, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100110;
let isPredicated = 1;
@@ -19639,9 +19962,9 @@ let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerd_io";
let CextOpcode = "S2_storerd";
let InputType = "imm";
-let BaseOpcode = "S2_storerd_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -19652,7 +19975,7 @@ def S2_pstorerdf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rx32++#$Ii) = $Rtt32",
-tc_24b66c99, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_9edefe01, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19662,15 +19985,15 @@ let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = DoubleWordAccess;
let mayStore = 1;
-let CextOpcode = "S2_storerd";
let BaseOpcode = "S2_storerd_pi";
+let CextOpcode = "S2_storerd";
let Constraints = "$Rx32 = $Rx32in";
}
def S2_pstorerdf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rs32) = $Rtt32",
-tc_f8e23f0b, TypeMAPPING> {
+tc_8035e91f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19678,7 +20001,7 @@ def S2_pstorerdfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
-tc_53559e35, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_449acf79, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19689,24 +20012,24 @@ let addrMode = PostInc;
let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerd";
let BaseOpcode = "S2_storerd_pi";
+let CextOpcode = "S2_storerd";
let Constraints = "$Rx32 = $Rx32in";
}
def S2_pstorerdt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rs32+#$Ii) = $Rtt32",
-tc_f8e23f0b, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_8035e91f, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000110;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerd_io";
let CextOpcode = "S2_storerd";
let InputType = "imm";
-let BaseOpcode = "S2_storerd_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -19717,7 +20040,7 @@ def S2_pstorerdt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rx32++#$Ii) = $Rtt32",
-tc_24b66c99, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_9edefe01, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19726,15 +20049,15 @@ let isPredicated = 1;
let addrMode = PostInc;
let accessSize = DoubleWordAccess;
let mayStore = 1;
-let CextOpcode = "S2_storerd";
let BaseOpcode = "S2_storerd_pi";
+let CextOpcode = "S2_storerd";
let Constraints = "$Rx32 = $Rx32in";
}
def S2_pstorerdt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rs32) = $Rtt32",
-tc_f8e23f0b, TypeMAPPING> {
+tc_8035e91f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19742,7 +20065,7 @@ def S2_pstorerdtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
-tc_53559e35, TypeST>, Enc_9a33d5, AddrModeRel {
+tc_449acf79, TypeST>, Enc_9a33d5, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19752,15 +20075,15 @@ let addrMode = PostInc;
let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerd";
let BaseOpcode = "S2_storerd_pi";
+let CextOpcode = "S2_storerd";
let Constraints = "$Rx32 = $Rx32in";
}
def S2_pstorerff_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+#$Ii) = $Rt32.h",
-tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_8035e91f, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100011;
let isPredicated = 1;
@@ -19768,9 +20091,9 @@ let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerf_io";
let CextOpcode = "S2_storerf";
let InputType = "imm";
-let BaseOpcode = "S2_storerf_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -19781,7 +20104,7 @@ def S2_pstorerff_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rx32++#$Ii) = $Rt32.h",
-tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
+tc_9edefe01, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19791,15 +20114,15 @@ let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayStore = 1;
-let CextOpcode = "S2_storerf";
let BaseOpcode = "S2_storerf_pi";
+let CextOpcode = "S2_storerf";
let Constraints = "$Rx32 = $Rx32in";
}
def S2_pstorerff_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32) = $Rt32.h",
-tc_f8e23f0b, TypeMAPPING> {
+tc_8035e91f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19807,7 +20130,7 @@ def S2_pstorerffnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
-tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
+tc_449acf79, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19818,24 +20141,24 @@ let addrMode = PostInc;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerf";
let BaseOpcode = "S2_storerf_pi";
+let CextOpcode = "S2_storerf";
let Constraints = "$Rx32 = $Rx32in";
}
def S2_pstorerft_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+#$Ii) = $Rt32.h",
-tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_8035e91f, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000011;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerf_io";
let CextOpcode = "S2_storerf";
let InputType = "imm";
-let BaseOpcode = "S2_storerf_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -19846,7 +20169,7 @@ def S2_pstorerft_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rx32++#$Ii) = $Rt32.h",
-tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
+tc_9edefe01, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19855,15 +20178,15 @@ let isPredicated = 1;
let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayStore = 1;
-let CextOpcode = "S2_storerf";
let BaseOpcode = "S2_storerf_pi";
+let CextOpcode = "S2_storerf";
let Constraints = "$Rx32 = $Rx32in";
}
def S2_pstorerft_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32) = $Rt32.h",
-tc_f8e23f0b, TypeMAPPING> {
+tc_8035e91f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19871,7 +20194,7 @@ def S2_pstorerftnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
-tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
+tc_449acf79, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19881,15 +20204,15 @@ let addrMode = PostInc;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerf";
let BaseOpcode = "S2_storerf_pi";
+let CextOpcode = "S2_storerf";
let Constraints = "$Rx32 = $Rx32in";
}
def S2_pstorerhf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+#$Ii) = $Rt32",
-tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_8035e91f, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100010;
let isPredicated = 1;
@@ -19897,9 +20220,9 @@ let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S2_storerh_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -19911,7 +20234,7 @@ def S2_pstorerhf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rx32++#$Ii) = $Rt32",
-tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
+tc_9edefe01, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -19929,7 +20252,7 @@ def S2_pstorerhf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32) = $Rt32",
-tc_f8e23f0b, TypeMAPPING> {
+tc_8035e91f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -19937,7 +20260,7 @@ def S2_pstorerhfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32",
-tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
+tc_449acf79, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -19956,7 +20279,7 @@ def S2_pstorerhnewf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh($Rs32+#$Ii) = $Nt8.new",
-tc_8fb7ab1b, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_011e0e9d, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000100101;
@@ -19968,9 +20291,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S2_storerh_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -19982,7 +20305,7 @@ def S2_pstorerhnewf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh($Rx32++#$Ii) = $Nt8.new",
-tc_838b34ea, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_ce59038e, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b101;
@@ -19995,8 +20318,8 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerh_pi";
+let CextOpcode = "S2_storerh";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20004,7 +20327,7 @@ def S2_pstorerhnewf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4) memh($Rs32) = $Nt8.new",
-tc_8fb7ab1b, TypeMAPPING> {
+tc_011e0e9d, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -20013,7 +20336,7 @@ def S2_pstorerhnewfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d65dbf51, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_f529831b, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -20027,8 +20350,8 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerh_pi";
+let CextOpcode = "S2_storerh";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20036,7 +20359,7 @@ def S2_pstorerhnewt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh($Rs32+#$Ii) = $Nt8.new",
-tc_8fb7ab1b, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_011e0e9d, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000000101;
@@ -20047,9 +20370,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S2_storerh_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -20061,7 +20384,7 @@ def S2_pstorerhnewt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh($Rx32++#$Ii) = $Nt8.new",
-tc_838b34ea, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_ce59038e, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b101;
@@ -20073,8 +20396,8 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerh_pi";
+let CextOpcode = "S2_storerh";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20082,7 +20405,7 @@ def S2_pstorerhnewt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4) memh($Rs32) = $Nt8.new",
-tc_8fb7ab1b, TypeMAPPING> {
+tc_011e0e9d, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -20091,7 +20414,7 @@ def S2_pstorerhnewtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
-tc_d65dbf51, TypeST>, Enc_31aa6a, AddrModeRel {
+tc_f529831b, TypeST>, Enc_31aa6a, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -20104,8 +20427,8 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerh_pi";
+let CextOpcode = "S2_storerh";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20113,16 +20436,16 @@ def S2_pstorerht_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+#$Ii) = $Rt32",
-tc_f8e23f0b, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_8035e91f, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000010;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S2_storerh_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -20134,7 +20457,7 @@ def S2_pstorerht_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rx32++#$Ii) = $Rt32",
-tc_24b66c99, TypeST>, Enc_b886fd, AddrModeRel {
+tc_9edefe01, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -20151,7 +20474,7 @@ def S2_pstorerht_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32) = $Rt32",
-tc_f8e23f0b, TypeMAPPING> {
+tc_8035e91f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20159,7 +20482,7 @@ def S2_pstorerhtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32",
-tc_53559e35, TypeST>, Enc_b886fd, AddrModeRel {
+tc_449acf79, TypeST>, Enc_b886fd, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -20177,7 +20500,7 @@ def S2_pstorerif_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw($Rs32+#$Ii) = $Rt32",
-tc_f8e23f0b, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_8035e91f, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000100100;
let isPredicated = 1;
@@ -20185,9 +20508,9 @@ let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S2_storeri_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -20199,7 +20522,7 @@ def S2_pstorerif_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw($Rx32++#$Ii) = $Rt32",
-tc_24b66c99, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_9edefe01, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -20217,7 +20540,7 @@ def S2_pstorerif_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4) memw($Rs32) = $Rt32",
-tc_f8e23f0b, TypeMAPPING> {
+tc_8035e91f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20225,7 +20548,7 @@ def S2_pstorerifnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rx32++#$Ii) = $Rt32",
-tc_53559e35, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_449acf79, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -20236,8 +20559,8 @@ let addrMode = PostInc;
let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeri_pi";
+let CextOpcode = "S2_storeri";
let isNVStorable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20245,7 +20568,7 @@ def S2_pstorerinewf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw($Rs32+#$Ii) = $Nt8.new",
-tc_8fb7ab1b, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_011e0e9d, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000100101;
@@ -20257,9 +20580,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S2_storeri_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -20271,7 +20594,7 @@ def S2_pstorerinewf_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw($Rx32++#$Ii) = $Nt8.new",
-tc_838b34ea, TypeST>, Enc_65f095, AddrModeRel {
+tc_ce59038e, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b110;
@@ -20284,8 +20607,8 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeri_pi";
+let CextOpcode = "S2_storeri";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20293,7 +20616,7 @@ def S2_pstorerinewf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4) memw($Rs32) = $Nt8.new",
-tc_8fb7ab1b, TypeMAPPING> {
+tc_011e0e9d, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -20302,7 +20625,7 @@ def S2_pstorerinewfnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d65dbf51, TypeST>, Enc_65f095, AddrModeRel {
+tc_f529831b, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -20316,8 +20639,8 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeri_pi";
+let CextOpcode = "S2_storeri";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20325,7 +20648,7 @@ def S2_pstorerinewt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw($Rs32+#$Ii) = $Nt8.new",
-tc_8fb7ab1b, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_011e0e9d, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000000101;
@@ -20336,9 +20659,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S2_storeri_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -20350,7 +20673,7 @@ def S2_pstorerinewt_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw($Rx32++#$Ii) = $Nt8.new",
-tc_838b34ea, TypeST>, Enc_65f095, AddrModeRel {
+tc_ce59038e, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b110;
@@ -20362,8 +20685,8 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeri_pi";
+let CextOpcode = "S2_storeri";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20371,7 +20694,7 @@ def S2_pstorerinewt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4) memw($Rs32) = $Nt8.new",
-tc_8fb7ab1b, TypeMAPPING> {
+tc_011e0e9d, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -20380,7 +20703,7 @@ def S2_pstorerinewtnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
-tc_d65dbf51, TypeST>, Enc_65f095, AddrModeRel {
+tc_f529831b, TypeST>, Enc_65f095, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -20393,8 +20716,8 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeri_pi";
+let CextOpcode = "S2_storeri";
let opNewValue = 4;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20402,16 +20725,16 @@ def S2_pstorerit_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw($Rs32+#$Ii) = $Rt32",
-tc_f8e23f0b, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_8035e91f, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000000100;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S2_storeri_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -20423,7 +20746,7 @@ def S2_pstorerit_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw($Rx32++#$Ii) = $Rt32",
-tc_24b66c99, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_9edefe01, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
@@ -20440,7 +20763,7 @@ def S2_pstorerit_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4) memw($Rs32) = $Rt32",
-tc_f8e23f0b, TypeMAPPING> {
+tc_8035e91f, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20448,7 +20771,7 @@ def S2_pstoreritnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rx32++#$Ii) = $Rt32",
-tc_53559e35, TypeST>, Enc_7eaeb6, AddrModeRel {
+tc_449acf79, TypeST>, Enc_7eaeb6, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -20466,7 +20789,7 @@ def S2_setbit_i : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = setbit($Rs32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_a05677 {
+tc_5da50c4b, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100110;
@@ -20477,7 +20800,7 @@ def S2_setbit_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = setbit($Rs32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_5ab2be {
+tc_5da50c4b, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -20488,7 +20811,7 @@ def S2_shuffeb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = shuffeb($Rss32,$Rtt32)",
-tc_946df596, TypeS_3op>, Enc_a56825 {
+tc_5da50c4b, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -20497,7 +20820,7 @@ def S2_shuffeh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = shuffeh($Rss32,$Rtt32)",
-tc_946df596, TypeS_3op>, Enc_a56825 {
+tc_5da50c4b, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -20506,7 +20829,7 @@ def S2_shuffob : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = shuffob($Rtt32,$Rss32)",
-tc_946df596, TypeS_3op>, Enc_ea23e4 {
+tc_5da50c4b, TypeS_3op>, Enc_ea23e4 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001000;
@@ -20515,7 +20838,7 @@ def S2_shuffoh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = shuffoh($Rtt32,$Rss32)",
-tc_946df596, TypeS_3op>, Enc_ea23e4 {
+tc_5da50c4b, TypeS_3op>, Enc_ea23e4 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -20524,15 +20847,15 @@ def S2_storerb_io : HInst<
(outs),
(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+#$Ii) = $Rt32",
-tc_30b9bb4a, TypeST>, Enc_448f7f, AddrModeRel, PostInc_BaseImm {
+tc_ae5babd7, TypeST>, Enc_448f7f, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1000;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S2_storerb_io";
let isPredicable = 1;
let isNVStorable = 1;
let isExtendable = 1;
@@ -20545,7 +20868,7 @@ def S2_storerb_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++$Mu2:brev) = $Rt32",
-tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_a2b365d2, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111000;
let addrMode = PostInc;
@@ -20559,7 +20882,7 @@ def S2_storerb_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_e86aa961, TypeST>, Enc_b15941, AddrModeRel {
+tc_b4dc7630, TypeST>, Enc_b15941, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001000;
@@ -20575,7 +20898,7 @@ def S2_storerb_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++I:circ($Mu2)) = $Rt32",
-tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_a2b365d2, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001000;
let addrMode = PostInc;
@@ -20590,7 +20913,7 @@ def S2_storerb_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rx32++#$Ii) = $Rt32",
-tc_da97ee82, TypeST>, Enc_10bc21, AddrModeRel, PostInc_BaseImm {
+tc_a2b365d2, TypeST>, Enc_10bc21, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -20598,8 +20921,8 @@ let Inst{31-21} = 0b10101011000;
let addrMode = PostInc;
let accessSize = ByteAccess;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerb_pi";
+let CextOpcode = "S2_storerb";
let isPredicable = 1;
let isNVStorable = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -20608,12 +20931,13 @@ def S2_storerb_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memb($Rx32++$Mu2) = $Rt32",
-tc_da97ee82, TypeST>, Enc_d5c73f {
+tc_a2b365d2, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101000;
let addrMode = PostInc;
let accessSize = ByteAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_pr";
let isNVStorable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20621,7 +20945,7 @@ def S2_storerb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memb($Rs32) = $Rt32",
-tc_30b9bb4a, TypeMAPPING> {
+tc_ae5babd7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20629,7 +20953,7 @@ def S2_storerbgp : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Rt32),
"memb(gp+#$Ii) = $Rt32",
-tc_0371abea, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
+tc_0655b949, TypeV2LDST>, Enc_1b64fb, AddrModeRel {
let Inst{24-21} = 0b0000;
let Inst{31-27} = 0b01001;
let accessSize = ByteAccess;
@@ -20647,7 +20971,7 @@ def S2_storerbnew_io : HInst<
(outs),
(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Nt8),
"memb($Rs32+#$Ii) = $Nt8.new",
-tc_be9602ff, TypeST>, Enc_4df4e9, AddrModeRel {
+tc_5deb5e47, TypeST>, Enc_4df4e9, AddrModeRel {
let Inst{12-11} = 0b00;
let Inst{24-21} = 0b1101;
let Inst{31-27} = 0b10100;
@@ -20657,9 +20981,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S2_storerb_io";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 1;
@@ -20672,7 +20996,7 @@ def S2_storerbnew_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++$Mu2:brev) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_92240447, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101111101;
@@ -20690,7 +21014,7 @@ def S2_storerbnew_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_d5c0729a, TypeST>, Enc_96ce4f, AddrModeRel {
+tc_addc37a8, TypeST>, Enc_96ce4f, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{12-11} = 0b00;
@@ -20710,7 +21034,7 @@ def S2_storerbnew_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_92240447, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101001101;
@@ -20729,7 +21053,7 @@ def S2_storerbnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
"memb($Rx32++#$Ii) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_c7cd90, AddrModeRel {
+tc_92240447, TypeST>, Enc_c7cd90, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b000;
@@ -20750,7 +21074,7 @@ def S2_storerbnew_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memb($Rx32++$Mu2) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_8dbe85 {
+tc_92240447, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101101101;
@@ -20760,6 +21084,7 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_pr";
let opNewValue = 3;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20767,7 +21092,7 @@ def S2_storerbnew_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Nt8),
"memb($Rs32) = $Nt8.new",
-tc_be9602ff, TypeMAPPING> {
+tc_5deb5e47, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 1;
@@ -20776,7 +21101,7 @@ def S2_storerbnewgp : HInst<
(outs),
(ins u32_0Imm:$Ii, IntRegs:$Nt8),
"memb(gp+#$Ii) = $Nt8.new",
-tc_5bf126a6, TypeV2LDST>, Enc_ad1831, AddrModeRel {
+tc_6e20402a, TypeV2LDST>, Enc_ad1831, AddrModeRel {
let Inst{12-11} = 0b00;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -20798,15 +21123,15 @@ def S2_storerd_io : HInst<
(outs),
(ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd($Rs32+#$Ii) = $Rtt32",
-tc_30b9bb4a, TypeST>, Enc_ce6828, AddrModeRel, PostInc_BaseImm {
+tc_ae5babd7, TypeST>, Enc_ce6828, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1110;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerd_io";
let CextOpcode = "S2_storerd";
let InputType = "imm";
-let BaseOpcode = "S2_storerd_io";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 1;
@@ -20818,7 +21143,7 @@ def S2_storerd_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++$Mu2:brev) = $Rtt32",
-tc_da97ee82, TypeST>, Enc_928ca1 {
+tc_a2b365d2, TypeST>, Enc_928ca1 {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111110;
let addrMode = PostInc;
@@ -20830,7 +21155,7 @@ def S2_storerd_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++#$Ii:circ($Mu2)) = $Rtt32",
-tc_e86aa961, TypeST>, Enc_395cc4 {
+tc_b4dc7630, TypeST>, Enc_395cc4 {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001110;
@@ -20844,7 +21169,7 @@ def S2_storerd_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++I:circ($Mu2)) = $Rtt32",
-tc_da97ee82, TypeST>, Enc_928ca1 {
+tc_a2b365d2, TypeST>, Enc_928ca1 {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001110;
let addrMode = PostInc;
@@ -20857,7 +21182,7 @@ def S2_storerd_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd($Rx32++#$Ii) = $Rtt32",
-tc_da97ee82, TypeST>, Enc_85bf58, AddrModeRel, PostInc_BaseImm {
+tc_a2b365d2, TypeST>, Enc_85bf58, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -20865,8 +21190,8 @@ let Inst{31-21} = 0b10101011110;
let addrMode = PostInc;
let accessSize = DoubleWordAccess;
let mayStore = 1;
-let CextOpcode = "S2_storerd";
let BaseOpcode = "S2_storerd_pi";
+let CextOpcode = "S2_storerd";
let isPredicable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20874,7 +21199,7 @@ def S2_storerd_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
"memd($Rx32++$Mu2) = $Rtt32",
-tc_da97ee82, TypeST>, Enc_928ca1 {
+tc_a2b365d2, TypeST>, Enc_928ca1 {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101110;
let addrMode = PostInc;
@@ -20886,7 +21211,7 @@ def S2_storerd_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"memd($Rs32) = $Rtt32",
-tc_30b9bb4a, TypeMAPPING> {
+tc_ae5babd7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -20894,7 +21219,7 @@ def S2_storerdgp : HInst<
(outs),
(ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"memd(gp+#$Ii) = $Rtt32",
-tc_0371abea, TypeV2LDST>, Enc_5c124a, AddrModeRel {
+tc_0655b949, TypeV2LDST>, Enc_5c124a, AddrModeRel {
let Inst{24-21} = 0b0110;
let Inst{31-27} = 0b01001;
let accessSize = DoubleWordAccess;
@@ -20911,15 +21236,15 @@ def S2_storerf_io : HInst<
(outs),
(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) = $Rt32.h",
-tc_30b9bb4a, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
+tc_ae5babd7, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1011;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerf_io";
let CextOpcode = "S2_storerf";
let InputType = "imm";
-let BaseOpcode = "S2_storerf_io";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 1;
@@ -20931,7 +21256,7 @@ def S2_storerf_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2:brev) = $Rt32.h",
-tc_da97ee82, TypeST>, Enc_d5c73f {
+tc_a2b365d2, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111011;
let addrMode = PostInc;
@@ -20943,7 +21268,7 @@ def S2_storerf_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++#$Ii:circ($Mu2)) = $Rt32.h",
-tc_e86aa961, TypeST>, Enc_935d9b {
+tc_b4dc7630, TypeST>, Enc_935d9b {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001011;
@@ -20957,7 +21282,7 @@ def S2_storerf_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++I:circ($Mu2)) = $Rt32.h",
-tc_da97ee82, TypeST>, Enc_d5c73f {
+tc_a2b365d2, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001011;
let addrMode = PostInc;
@@ -20970,7 +21295,7 @@ def S2_storerf_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rx32++#$Ii) = $Rt32.h",
-tc_da97ee82, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
+tc_a2b365d2, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -20978,8 +21303,8 @@ let Inst{31-21} = 0b10101011011;
let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayStore = 1;
-let CextOpcode = "S2_storerf";
let BaseOpcode = "S2_storerf_pi";
+let CextOpcode = "S2_storerf";
let isPredicable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -20987,7 +21312,7 @@ def S2_storerf_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2) = $Rt32.h",
-tc_da97ee82, TypeST>, Enc_d5c73f {
+tc_a2b365d2, TypeST>, Enc_d5c73f {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101011;
let addrMode = PostInc;
@@ -20999,7 +21324,7 @@ def S2_storerf_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) = $Rt32.h",
-tc_30b9bb4a, TypeMAPPING> {
+tc_ae5babd7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -21007,7 +21332,7 @@ def S2_storerfgp : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(gp+#$Ii) = $Rt32.h",
-tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0655b949, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0011;
let Inst{31-27} = 0b01001;
let accessSize = HalfWordAccess;
@@ -21024,15 +21349,15 @@ def S2_storerh_io : HInst<
(outs),
(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+#$Ii) = $Rt32",
-tc_30b9bb4a, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
+tc_ae5babd7, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1010;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S2_storerh_io";
let isPredicable = 1;
let isNVStorable = 1;
let isExtendable = 1;
@@ -21045,7 +21370,7 @@ def S2_storerh_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2:brev) = $Rt32",
-tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_a2b365d2, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111010;
let addrMode = PostInc;
@@ -21059,7 +21384,7 @@ def S2_storerh_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_e86aa961, TypeST>, Enc_935d9b, AddrModeRel {
+tc_b4dc7630, TypeST>, Enc_935d9b, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001010;
@@ -21075,7 +21400,7 @@ def S2_storerh_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++I:circ($Mu2)) = $Rt32",
-tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_a2b365d2, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001010;
let addrMode = PostInc;
@@ -21090,7 +21415,7 @@ def S2_storerh_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
"memh($Rx32++#$Ii) = $Rt32",
-tc_da97ee82, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
+tc_a2b365d2, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -21098,8 +21423,8 @@ let Inst{31-21} = 0b10101011010;
let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerh_pi";
+let CextOpcode = "S2_storerh";
let isPredicable = 1;
let isNVStorable = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -21108,12 +21433,13 @@ def S2_storerh_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memh($Rx32++$Mu2) = $Rt32",
-tc_da97ee82, TypeST>, Enc_d5c73f {
+tc_a2b365d2, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101010;
let addrMode = PostInc;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_pr";
let isNVStorable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -21121,7 +21447,7 @@ def S2_storerh_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memh($Rs32) = $Rt32",
-tc_30b9bb4a, TypeMAPPING> {
+tc_ae5babd7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -21129,7 +21455,7 @@ def S2_storerhgp : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Rt32),
"memh(gp+#$Ii) = $Rt32",
-tc_0371abea, TypeV2LDST>, Enc_fda92c, AddrModeRel {
+tc_0655b949, TypeV2LDST>, Enc_fda92c, AddrModeRel {
let Inst{24-21} = 0b0010;
let Inst{31-27} = 0b01001;
let accessSize = HalfWordAccess;
@@ -21147,7 +21473,7 @@ def S2_storerhnew_io : HInst<
(outs),
(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Nt8),
"memh($Rs32+#$Ii) = $Nt8.new",
-tc_be9602ff, TypeST>, Enc_0d8870, AddrModeRel {
+tc_5deb5e47, TypeST>, Enc_0d8870, AddrModeRel {
let Inst{12-11} = 0b01;
let Inst{24-21} = 0b1101;
let Inst{31-27} = 0b10100;
@@ -21157,9 +21483,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S2_storerh_io";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 1;
@@ -21172,7 +21498,7 @@ def S2_storerhnew_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++$Mu2:brev) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_92240447, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101111101;
@@ -21190,7 +21516,7 @@ def S2_storerhnew_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_d5c0729a, TypeST>, Enc_91b9fe, AddrModeRel {
+tc_addc37a8, TypeST>, Enc_91b9fe, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{12-11} = 0b01;
@@ -21210,7 +21536,7 @@ def S2_storerhnew_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_92240447, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101001101;
@@ -21229,7 +21555,7 @@ def S2_storerhnew_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
"memh($Rx32++#$Ii) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_e26546, AddrModeRel {
+tc_92240447, TypeST>, Enc_e26546, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b001;
@@ -21250,7 +21576,7 @@ def S2_storerhnew_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memh($Rx32++$Mu2) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_8dbe85 {
+tc_92240447, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101101101;
@@ -21260,6 +21586,7 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_pr";
let opNewValue = 3;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -21267,7 +21594,7 @@ def S2_storerhnew_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Nt8),
"memh($Rs32) = $Nt8.new",
-tc_be9602ff, TypeMAPPING> {
+tc_5deb5e47, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 1;
@@ -21276,7 +21603,7 @@ def S2_storerhnewgp : HInst<
(outs),
(ins u31_1Imm:$Ii, IntRegs:$Nt8),
"memh(gp+#$Ii) = $Nt8.new",
-tc_5bf126a6, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
+tc_6e20402a, TypeV2LDST>, Enc_bc03e5, AddrModeRel {
let Inst{12-11} = 0b01;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -21298,15 +21625,15 @@ def S2_storeri_io : HInst<
(outs),
(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+#$Ii) = $Rt32",
-tc_30b9bb4a, TypeST>, Enc_143445, AddrModeRel, PostInc_BaseImm {
+tc_ae5babd7, TypeST>, Enc_143445, AddrModeRel, PostInc_BaseImm {
let Inst{24-21} = 0b1100;
let Inst{31-27} = 0b10100;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S2_storeri_io";
let isPredicable = 1;
let isNVStorable = 1;
let isExtendable = 1;
@@ -21319,7 +21646,7 @@ def S2_storeri_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++$Mu2:brev) = $Rt32",
-tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_a2b365d2, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101111100;
let addrMode = PostInc;
@@ -21333,7 +21660,7 @@ def S2_storeri_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++#$Ii:circ($Mu2)) = $Rt32",
-tc_e86aa961, TypeST>, Enc_79b8c8, AddrModeRel {
+tc_b4dc7630, TypeST>, Enc_79b8c8, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b10101001100;
@@ -21349,7 +21676,7 @@ def S2_storeri_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++I:circ($Mu2)) = $Rt32",
-tc_da97ee82, TypeST>, Enc_d5c73f, AddrModeRel {
+tc_a2b365d2, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{31-21} = 0b10101001100;
let addrMode = PostInc;
@@ -21364,7 +21691,7 @@ def S2_storeri_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
"memw($Rx32++#$Ii) = $Rt32",
-tc_da97ee82, TypeST>, Enc_db40cd, AddrModeRel, PostInc_BaseImm {
+tc_a2b365d2, TypeST>, Enc_db40cd, AddrModeRel, PostInc_BaseImm {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
@@ -21372,8 +21699,8 @@ let Inst{31-21} = 0b10101011100;
let addrMode = PostInc;
let accessSize = WordAccess;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeri_pi";
+let CextOpcode = "S2_storeri";
let isPredicable = 1;
let isNVStorable = 1;
let Constraints = "$Rx32 = $Rx32in";
@@ -21382,12 +21709,13 @@ def S2_storeri_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
"memw($Rx32++$Mu2) = $Rt32",
-tc_da97ee82, TypeST>, Enc_d5c73f {
+tc_a2b365d2, TypeST>, Enc_d5c73f, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b10101101100;
let addrMode = PostInc;
let accessSize = WordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_pr";
let isNVStorable = 1;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -21395,7 +21723,7 @@ def S2_storeri_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw($Rs32) = $Rt32",
-tc_30b9bb4a, TypeMAPPING> {
+tc_ae5babd7, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -21403,7 +21731,7 @@ def S2_storerigp : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Rt32),
"memw(gp+#$Ii) = $Rt32",
-tc_0371abea, TypeV2LDST>, Enc_541f26, AddrModeRel {
+tc_0655b949, TypeV2LDST>, Enc_541f26, AddrModeRel {
let Inst{24-21} = 0b0100;
let Inst{31-27} = 0b01001;
let accessSize = WordAccess;
@@ -21421,7 +21749,7 @@ def S2_storerinew_io : HInst<
(outs),
(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Nt8),
"memw($Rs32+#$Ii) = $Nt8.new",
-tc_be9602ff, TypeST>, Enc_690862, AddrModeRel {
+tc_5deb5e47, TypeST>, Enc_690862, AddrModeRel {
let Inst{12-11} = 0b10;
let Inst{24-21} = 0b1101;
let Inst{31-27} = 0b10100;
@@ -21431,9 +21759,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S2_storeri_io";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 1;
@@ -21446,7 +21774,7 @@ def S2_storerinew_pbr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++$Mu2:brev) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_92240447, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101111101;
@@ -21464,7 +21792,7 @@ def S2_storerinew_pci : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
-tc_d5c0729a, TypeST>, Enc_3f97c8, AddrModeRel {
+tc_addc37a8, TypeST>, Enc_3f97c8, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{12-11} = 0b10;
@@ -21484,7 +21812,7 @@ def S2_storerinew_pcr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++I:circ($Mu2)) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_8dbe85, AddrModeRel {
+tc_92240447, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000010;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101001101;
@@ -21503,7 +21831,7 @@ def S2_storerinew_pi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
"memw($Rx32++#$Ii) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_223005, AddrModeRel {
+tc_92240447, TypeST>, Enc_223005, AddrModeRel {
let Inst{2-0} = 0b000;
let Inst{7-7} = 0b0;
let Inst{13-11} = 0b010;
@@ -21523,7 +21851,7 @@ def S2_storerinew_pr : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
"memw($Rx32++$Mu2) = $Nt8.new",
-tc_c79a189f, TypeST>, Enc_8dbe85 {
+tc_92240447, TypeST>, Enc_8dbe85, AddrModeRel {
let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101101101;
@@ -21533,6 +21861,7 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_pr";
let opNewValue = 3;
let Constraints = "$Rx32 = $Rx32in";
}
@@ -21540,7 +21869,7 @@ def S2_storerinew_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Nt8),
"memw($Rs32) = $Nt8.new",
-tc_be9602ff, TypeMAPPING> {
+tc_5deb5e47, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 1;
@@ -21549,7 +21878,7 @@ def S2_storerinewgp : HInst<
(outs),
(ins u30_2Imm:$Ii, IntRegs:$Nt8),
"memw(gp+#$Ii) = $Nt8.new",
-tc_5bf126a6, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
+tc_6e20402a, TypeV2LDST>, Enc_78cbf0, AddrModeRel {
let Inst{12-11} = 0b10;
let Inst{24-21} = 0b0101;
let Inst{31-27} = 0b01001;
@@ -21571,7 +21900,7 @@ def S2_storew_locked : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"memw_locked($Rs32,$Pd4) = $Rt32",
-tc_5abb5e3f, TypeST>, Enc_c2b48e {
+tc_6f42bc60, TypeST>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100000101;
@@ -21584,7 +21913,7 @@ def S2_svsathb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = vsathb($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
+tc_9f6cd987, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -21595,7 +21924,7 @@ def S2_svsathub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = vsathub($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
+tc_9f6cd987, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001100100;
let hasNewValue = 1;
@@ -21606,7 +21935,7 @@ def S2_tableidxb : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxb($Rs32,#$Ii,#$II):raw",
-tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
+tc_bb831a7c, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011100;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21617,7 +21946,7 @@ def S2_tableidxb_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxb($Rs32,#$Ii,#$II)",
-tc_bfec0f01, TypeS_2op> {
+tc_bb831a7c, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21628,7 +21957,7 @@ def S2_tableidxd : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxd($Rs32,#$Ii,#$II):raw",
-tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
+tc_bb831a7c, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21639,7 +21968,7 @@ def S2_tableidxd_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxd($Rs32,#$Ii,#$II)",
-tc_bfec0f01, TypeS_2op> {
+tc_bb831a7c, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21649,7 +21978,7 @@ def S2_tableidxh : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxh($Rs32,#$Ii,#$II):raw",
-tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
+tc_bb831a7c, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011101;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21660,7 +21989,7 @@ def S2_tableidxh_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxh($Rs32,#$Ii,#$II)",
-tc_bfec0f01, TypeS_2op> {
+tc_bb831a7c, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21670,7 +21999,7 @@ def S2_tableidxw : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
"$Rx32 = tableidxw($Rs32,#$Ii,#$II):raw",
-tc_bfec0f01, TypeS_2op>, Enc_cd82bc {
+tc_bb831a7c, TypeS_2op>, Enc_cd82bc {
let Inst{31-22} = 0b1000011110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -21681,7 +22010,7 @@ def S2_tableidxw_goodsyntax : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
"$Rx32 = tableidxw($Rs32,#$Ii,#$II)",
-tc_bfec0f01, TypeS_2op> {
+tc_bb831a7c, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -21691,7 +22020,7 @@ def S2_togglebit_i : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = togglebit($Rs32,#$Ii)",
-tc_946df596, TypeS_2op>, Enc_a05677 {
+tc_5da50c4b, TypeS_2op>, Enc_a05677 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100110;
@@ -21702,7 +22031,7 @@ def S2_togglebit_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = togglebit($Rs32,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_5ab2be {
+tc_5da50c4b, TypeS_3op>, Enc_5ab2be {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -21713,7 +22042,7 @@ def S2_tstbit_i : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Pd4 = tstbit($Rs32,#$Ii)",
-tc_643b4717, TypeS_2op>, Enc_83ee64 {
+tc_a1297125, TypeS_2op>, Enc_83ee64 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000101000;
@@ -21722,7 +22051,7 @@ def S2_tstbit_r : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = tstbit($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111000;
@@ -21731,7 +22060,7 @@ def S2_valignib : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, u3_0Imm:$Ii),
"$Rdd32 = valignb($Rtt32,$Rss32,#$Ii)",
-tc_b4b5c03a, TypeS_3op>, Enc_729ff7 {
+tc_6fc5dbea, TypeS_3op>, Enc_729ff7 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000000000;
}
@@ -21739,7 +22068,7 @@ def S2_valignrb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, PredRegs:$Pu4),
"$Rdd32 = valignb($Rtt32,$Rss32,$Pu4)",
-tc_b4b5c03a, TypeS_3op>, Enc_8c6530 {
+tc_6fc5dbea, TypeS_3op>, Enc_8c6530 {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010000;
@@ -21748,7 +22077,7 @@ def S2_vcnegh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vcnegh($Rss32,$Rt32)",
-tc_779080bf, TypeS_3op>, Enc_927852 {
+tc_8a825db2, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011110;
@@ -21759,7 +22088,7 @@ def S2_vcrotate : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rdd32 = vcrotate($Rss32,$Rt32)",
-tc_002cb246, TypeS_3op>, Enc_927852 {
+tc_0dfac0a7, TypeS_3op>, Enc_927852 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000011110;
@@ -21770,7 +22099,7 @@ def S2_vrcnegh : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rxx32 += vrcnegh($Rss32,$Rt32)",
-tc_d773585a, TypeS_3op>, Enc_1aa186 {
+tc_7f8ae742, TypeS_3op>, Enc_1aa186 {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11001011001;
@@ -21781,7 +22110,7 @@ def S2_vrndpackwh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vrndwh($Rss32)",
-tc_14b5c689, TypeS_2op>, Enc_90cd8b {
+tc_e3d699e3, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21792,7 +22121,7 @@ def S2_vrndpackwhs : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vrndwh($Rss32):sat",
-tc_cf8126ae, TypeS_2op>, Enc_90cd8b {
+tc_d61dfdc3, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21804,7 +22133,7 @@ def S2_vsathb : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsathb($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
+tc_9f6cd987, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21815,7 +22144,7 @@ def S2_vsathb_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsathb($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
+tc_9f6cd987, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21824,7 +22153,7 @@ def S2_vsathub : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsathub($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
+tc_9f6cd987, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21835,7 +22164,7 @@ def S2_vsathub_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsathub($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
+tc_9f6cd987, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21844,7 +22173,7 @@ def S2_vsatwh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsatwh($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
+tc_9f6cd987, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21855,7 +22184,7 @@ def S2_vsatwh_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsatwh($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
+tc_9f6cd987, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21864,7 +22193,7 @@ def S2_vsatwuh : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vsatwuh($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
+tc_9f6cd987, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -21875,7 +22204,7 @@ def S2_vsatwuh_nopack : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = vsatwuh($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_b9c5fb {
+tc_9f6cd987, TypeS_2op>, Enc_b9c5fb {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000000000;
let Defs = [USR_OVF];
@@ -21884,7 +22213,7 @@ def S2_vsplatrb : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = vsplatb($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_5e2823 {
+tc_9f6cd987, TypeS_2op>, Enc_5e2823 {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10001100010;
let hasNewValue = 1;
@@ -21896,7 +22225,7 @@ def S2_vsplatrh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsplath($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
+tc_9f6cd987, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000100010;
let isReMaterializable = 1;
@@ -21906,7 +22235,7 @@ def S2_vspliceib : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, u3_0Imm:$Ii),
"$Rdd32 = vspliceb($Rss32,$Rtt32,#$Ii)",
-tc_b4b5c03a, TypeS_3op>, Enc_d50cd3 {
+tc_6fc5dbea, TypeS_3op>, Enc_d50cd3 {
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000000100;
}
@@ -21914,7 +22243,7 @@ def S2_vsplicerb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Pu4),
"$Rdd32 = vspliceb($Rss32,$Rtt32,$Pu4)",
-tc_b4b5c03a, TypeS_3op>, Enc_dbd70c {
+tc_6fc5dbea, TypeS_3op>, Enc_dbd70c {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000010100;
@@ -21923,7 +22252,7 @@ def S2_vsxtbh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsxtbh($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
+tc_9f6cd987, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -21933,7 +22262,7 @@ def S2_vsxthw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsxthw($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
+tc_9f6cd987, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -21943,7 +22272,7 @@ def S2_vtrunehb : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vtrunehb($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
+tc_9f6cd987, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21953,7 +22282,7 @@ def S2_vtrunewh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunewh($Rss32,$Rtt32)",
-tc_946df596, TypeS_3op>, Enc_a56825 {
+tc_5da50c4b, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -21962,7 +22291,7 @@ def S2_vtrunohb : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = vtrunohb($Rss32)",
-tc_0ae0825c, TypeS_2op>, Enc_90cd8b {
+tc_9f6cd987, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -21972,7 +22301,7 @@ def S2_vtrunowh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunowh($Rss32,$Rtt32)",
-tc_946df596, TypeS_3op>, Enc_a56825 {
+tc_5da50c4b, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -21981,7 +22310,7 @@ def S2_vzxtbh : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vzxtbh($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
+tc_9f6cd987, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -21991,7 +22320,7 @@ def S2_vzxthw : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vzxthw($Rs32)",
-tc_0ae0825c, TypeS_2op>, Enc_3a3d62 {
+tc_9f6cd987, TypeS_2op>, Enc_3a3d62 {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000100000;
let isReMaterializable = 1;
@@ -22001,7 +22330,7 @@ def S4_addaddi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Ru32, s32_0Imm:$Ii),
"$Rd32 = add($Rs32,add($Ru32,#$Ii))",
-tc_f675fee8, TypeALU64>, Enc_8b8d61 {
+tc_2c13e7f5, TypeALU64>, Enc_8b8d61, Requires<[UseCompound]> {
let Inst{31-23} = 0b110110110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -22016,7 +22345,7 @@ def S4_addi_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = add(#$Ii,asl($Rx32in,#$II))",
-tc_f675fee8, TypeALU64>, Enc_c31910 {
+tc_2c13e7f5, TypeALU64>, Enc_c31910, Requires<[UseCompound]> {
let Inst{2-0} = 0b100;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -22034,7 +22363,7 @@ def S4_addi_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = add(#$Ii,lsr($Rx32in,#$II))",
-tc_f675fee8, TypeALU64>, Enc_c31910 {
+tc_2c13e7f5, TypeALU64>, Enc_c31910, Requires<[UseCompound]> {
let Inst{2-0} = 0b100;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -22052,7 +22381,7 @@ def S4_andi_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = and(#$Ii,asl($Rx32in,#$II))",
-tc_f429765c, TypeALU64>, Enc_c31910 {
+tc_a4e22bbd, TypeALU64>, Enc_c31910, Requires<[UseCompound]> {
let Inst{2-0} = 0b000;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -22070,7 +22399,7 @@ def S4_andi_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = and(#$Ii,lsr($Rx32in,#$II))",
-tc_f429765c, TypeALU64>, Enc_c31910 {
+tc_a4e22bbd, TypeALU64>, Enc_c31910, Requires<[UseCompound]> {
let Inst{2-0} = 0b000;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -22088,7 +22417,7 @@ def S4_clbaddi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s6_0Imm:$Ii),
"$Rd32 = add(clb($Rs32),#$Ii)",
-tc_002cb246, TypeS_2op>, Enc_9fae8a {
+tc_a08b630b, TypeS_2op>, Enc_9fae8a {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b10001100001;
let hasNewValue = 1;
@@ -22099,7 +22428,7 @@ def S4_clbpaddi : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, s6_0Imm:$Ii),
"$Rd32 = add(clb($Rss32),#$Ii)",
-tc_002cb246, TypeS_2op>, Enc_a1640c {
+tc_a08b630b, TypeS_2op>, Enc_a1640c {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -22110,7 +22439,7 @@ def S4_clbpnorm : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = normamt($Rss32)",
-tc_14b5c689, TypeS_2op>, Enc_90cd8b {
+tc_a7bdb22c, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -22121,7 +22450,7 @@ def S4_extract : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
"$Rd32 = extract($Rs32,#$Ii,#$II)",
-tc_f675fee8, TypeS_2op>, Enc_b388cf {
+tc_2c13e7f5, TypeS_2op>, Enc_b388cf {
let Inst{13-13} = 0b0;
let Inst{31-23} = 0b100011011;
let hasNewValue = 1;
@@ -22132,7 +22461,7 @@ def S4_extract_rp : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"$Rd32 = extract($Rs32,$Rtt32)",
-tc_002cb246, TypeS_3op>, Enc_e07374 {
+tc_a08b630b, TypeS_3op>, Enc_e07374 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11001001000;
@@ -22144,7 +22473,7 @@ def S4_extractp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
"$Rdd32 = extract($Rss32,#$Ii,#$II)",
-tc_f675fee8, TypeS_2op>, Enc_b84c4c {
+tc_2c13e7f5, TypeS_2op>, Enc_b84c4c {
let Inst{31-24} = 0b10001010;
let prefersSlot3 = 1;
}
@@ -22152,7 +22481,7 @@ def S4_extractp_rp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = extract($Rss32,$Rtt32)",
-tc_002cb246, TypeS_3op>, Enc_a56825 {
+tc_a08b630b, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -22162,7 +22491,7 @@ def S4_lsli : HInst<
(outs IntRegs:$Rd32),
(ins s6_0Imm:$Ii, IntRegs:$Rt32),
"$Rd32 = lsl(#$Ii,$Rt32)",
-tc_946df596, TypeS_3op>, Enc_fef969 {
+tc_5da50c4b, TypeS_3op>, Enc_fef969 {
let Inst{7-6} = 0b11;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000110100;
@@ -22173,7 +22502,7 @@ def S4_ntstbit_i : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Pd4 = !tstbit($Rs32,#$Ii)",
-tc_643b4717, TypeS_2op>, Enc_83ee64 {
+tc_a1297125, TypeS_2op>, Enc_83ee64 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000101001;
@@ -22182,7 +22511,7 @@ def S4_ntstbit_r : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = !tstbit($Rs32,$Rt32)",
-tc_85d5d03f, TypeS_3op>, Enc_c2b48e {
+tc_4a55d03c, TypeS_3op>, Enc_c2b48e {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111001;
@@ -22191,7 +22520,7 @@ def S4_or_andi : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 |= and($Rs32,#$Ii)",
-tc_f429765c, TypeALU64>, Enc_b0e9d8 {
+tc_a4e22bbd, TypeALU64>, Enc_b0e9d8 {
let Inst{31-22} = 0b1101101000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -22208,7 +22537,7 @@ def S4_or_andix : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Ru32, IntRegs:$Rx32in, s32_0Imm:$Ii),
"$Rx32 = or($Ru32,and($Rx32in,#$Ii))",
-tc_f429765c, TypeALU64>, Enc_b4e6cf {
+tc_a4e22bbd, TypeALU64>, Enc_b4e6cf, Requires<[UseCompound]> {
let Inst{31-22} = 0b1101101001;
let hasNewValue = 1;
let opNewValue = 0;
@@ -22224,7 +22553,7 @@ def S4_or_ori : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
"$Rx32 |= or($Rs32,#$Ii)",
-tc_f429765c, TypeALU64>, Enc_b0e9d8 {
+tc_a4e22bbd, TypeALU64>, Enc_b0e9d8 {
let Inst{31-22} = 0b1101101010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -22241,7 +22570,7 @@ def S4_ori_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = or(#$Ii,asl($Rx32in,#$II))",
-tc_f429765c, TypeALU64>, Enc_c31910 {
+tc_a4e22bbd, TypeALU64>, Enc_c31910, Requires<[UseCompound]> {
let Inst{2-0} = 0b010;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -22259,7 +22588,7 @@ def S4_ori_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = or(#$Ii,lsr($Rx32in,#$II))",
-tc_f429765c, TypeALU64>, Enc_c31910 {
+tc_a4e22bbd, TypeALU64>, Enc_c31910, Requires<[UseCompound]> {
let Inst{2-0} = 0b010;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -22277,7 +22606,7 @@ def S4_parity : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = parity($Rs32,$Rt32)",
-tc_002cb246, TypeALU64>, Enc_5ab2be {
+tc_a08b630b, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101111;
@@ -22289,7 +22618,7 @@ def S4_pstorerbf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb(#$Ii) = $Rt32",
-tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_ba9255a6, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22300,8 +22629,8 @@ let addrMode = Absolute;
let accessSize = ByteAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerbabs";
+let CextOpcode = "S2_storerb";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -22314,23 +22643,23 @@ def S4_pstorerbf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
+tc_1fe4ab69, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101000;
let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storerb_rr";
let CextOpcode = "S2_storerb";
let InputType = "reg";
-let BaseOpcode = "S4_storerb_rr";
let isNVStorable = 1;
}
def S4_pstorerbfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb(#$Ii) = $Rt32",
-tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22342,8 +22671,8 @@ let accessSize = ByteAccess;
let isPredicatedNew = 1;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerbabs";
+let CextOpcode = "S2_storerb";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -22356,7 +22685,7 @@ def S4_pstorerbfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rs32+#$Ii) = $Rt32",
-tc_da97ee82, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_a2b365d2, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110000;
let isPredicated = 1;
@@ -22365,9 +22694,9 @@ let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S2_storerb_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -22379,7 +22708,7 @@ def S4_pstorerbfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
+tc_8e82e8ca, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111000;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22387,16 +22716,16 @@ let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storerb_rr";
let CextOpcode = "S2_storerb";
let InputType = "reg";
-let BaseOpcode = "S4_storerb_rr";
let isNVStorable = 1;
}
def S4_pstorerbfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memb($Rs32) = $Rt32",
-tc_da97ee82, TypeMAPPING> {
+tc_a2b365d2, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22404,7 +22733,7 @@ def S4_pstorerbnewf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb(#$Ii) = $Nt8.new",
-tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
+tc_cfa0e29b, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b000;
@@ -22418,8 +22747,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerbabs";
+let CextOpcode = "S2_storerb";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -22432,7 +22761,7 @@ def S4_pstorerbnewf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_0a6c20ae, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110101101;
let isPredicated = 1;
@@ -22443,16 +22772,16 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storerb_rr";
let CextOpcode = "S2_storerb";
let InputType = "reg";
-let BaseOpcode = "S4_storerb_rr";
let opNewValue = 4;
}
def S4_pstorerbnewfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb(#$Ii) = $Nt8.new",
-tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
+tc_0fac1eb8, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -22467,8 +22796,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerbabs";
+let CextOpcode = "S2_storerb";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -22481,7 +22810,7 @@ def S4_pstorerbnewfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
-tc_c79a189f, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_92240447, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000110101;
@@ -22494,9 +22823,9 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S2_storerb_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -22508,7 +22837,7 @@ def S4_pstorerbnewfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_829d8a86, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110111101;
let isPredicated = 1;
@@ -22520,16 +22849,16 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storerb_rr";
let CextOpcode = "S2_storerb";
let InputType = "reg";
-let BaseOpcode = "S4_storerb_rr";
let opNewValue = 4;
}
def S4_pstorerbnewfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4.new) memb($Rs32) = $Nt8.new",
-tc_c79a189f, TypeMAPPING> {
+tc_92240447, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -22538,7 +22867,7 @@ def S4_pstorerbnewt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb(#$Ii) = $Nt8.new",
-tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
+tc_cfa0e29b, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b000;
@@ -22551,8 +22880,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerbabs";
+let CextOpcode = "S2_storerb";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -22565,7 +22894,7 @@ def S4_pstorerbnewt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_0a6c20ae, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110100101;
let isPredicated = 1;
@@ -22575,16 +22904,16 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storerb_rr";
let CextOpcode = "S2_storerb";
let InputType = "reg";
-let BaseOpcode = "S4_storerb_rr";
let opNewValue = 4;
}
def S4_pstorerbnewtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb(#$Ii) = $Nt8.new",
-tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
+tc_0fac1eb8, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b100;
@@ -22598,8 +22927,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerbabs";
+let CextOpcode = "S2_storerb";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -22612,7 +22941,7 @@ def S4_pstorerbnewtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
-tc_c79a189f, TypeV2LDST>, Enc_585242, AddrModeRel {
+tc_92240447, TypeV2LDST>, Enc_585242, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b01000010101;
@@ -22624,9 +22953,9 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S2_storerb_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -22638,7 +22967,7 @@ def S4_pstorerbnewtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_829d8a86, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b00;
let Inst{31-21} = 0b00110110101;
let isPredicated = 1;
@@ -22649,16 +22978,16 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storerb_rr";
let CextOpcode = "S2_storerb";
let InputType = "reg";
-let BaseOpcode = "S4_storerb_rr";
let opNewValue = 4;
}
def S4_pstorerbnewtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4.new) memb($Rs32) = $Nt8.new",
-tc_c79a189f, TypeMAPPING> {
+tc_92240447, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -22667,7 +22996,7 @@ def S4_pstorerbt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb(#$Ii) = $Rt32",
-tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_ba9255a6, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22677,8 +23006,8 @@ let addrMode = Absolute;
let accessSize = ByteAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerbabs";
+let CextOpcode = "S2_storerb";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -22691,22 +23020,22 @@ def S4_pstorerbt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
+tc_1fe4ab69, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100000;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storerb_rr";
let CextOpcode = "S2_storerb";
let InputType = "reg";
-let BaseOpcode = "S4_storerb_rr";
let isNVStorable = 1;
}
def S4_pstorerbtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb(#$Ii) = $Rt32",
-tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22717,8 +23046,8 @@ let accessSize = ByteAccess;
let isPredicatedNew = 1;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S2_storerbabs";
+let CextOpcode = "S2_storerb";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -22731,7 +23060,7 @@ def S4_pstorerbtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rs32+#$Ii) = $Rt32",
-tc_da97ee82, TypeV2LDST>, Enc_da8d43, AddrModeRel {
+tc_a2b365d2, TypeV2LDST>, Enc_da8d43, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010000;
let isPredicated = 1;
@@ -22739,9 +23068,9 @@ let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S2_storerb_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -22753,23 +23082,23 @@ def S4_pstorerbtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
+tc_8e82e8ca, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110000;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storerb_rr";
let CextOpcode = "S2_storerb";
let InputType = "reg";
-let BaseOpcode = "S4_storerb_rr";
let isNVStorable = 1;
}
def S4_pstorerbtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memb($Rs32) = $Rt32",
-tc_da97ee82, TypeMAPPING> {
+tc_a2b365d2, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22777,7 +23106,7 @@ def S4_pstorerdf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd(#$Ii) = $Rtt32",
-tc_362c6592, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_ba9255a6, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22788,8 +23117,8 @@ let addrMode = Absolute;
let accessSize = DoubleWordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerd";
let BaseOpcode = "S2_storerdabs";
+let CextOpcode = "S2_storerd";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -22801,22 +23130,22 @@ def S4_pstorerdf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_3962fa26, TypeST>, Enc_1a9974, AddrModeRel {
+tc_1fe4ab69, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110101110;
let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseRegOffset;
let accessSize = DoubleWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerd_rr";
let CextOpcode = "S2_storerd";
let InputType = "reg";
-let BaseOpcode = "S2_storerd_rr";
}
def S4_pstorerdfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd(#$Ii) = $Rtt32",
-tc_da4a37ed, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22828,8 +23157,8 @@ let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerd";
let BaseOpcode = "S2_storerdabs";
+let CextOpcode = "S2_storerd";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -22841,7 +23170,7 @@ def S4_pstorerdfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
-tc_da97ee82, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_a2b365d2, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110110;
let isPredicated = 1;
@@ -22850,9 +23179,9 @@ let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerd_io";
let CextOpcode = "S2_storerd";
let InputType = "imm";
-let BaseOpcode = "S2_storerd_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -22863,7 +23192,7 @@ def S4_pstorerdfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_40116ca8, TypeST>, Enc_1a9974, AddrModeRel {
+tc_8e82e8ca, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110111110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -22871,15 +23200,15 @@ let addrMode = BaseRegOffset;
let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerd_rr";
let CextOpcode = "S2_storerd";
let InputType = "reg";
-let BaseOpcode = "S2_storerd_rr";
}
def S4_pstorerdfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if (!$Pv4.new) memd($Rs32) = $Rtt32",
-tc_da97ee82, TypeMAPPING> {
+tc_a2b365d2, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22887,7 +23216,7 @@ def S4_pstorerdt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd(#$Ii) = $Rtt32",
-tc_362c6592, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_ba9255a6, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -22897,8 +23226,8 @@ let addrMode = Absolute;
let accessSize = DoubleWordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerd";
let BaseOpcode = "S2_storerdabs";
+let CextOpcode = "S2_storerd";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -22910,21 +23239,21 @@ def S4_pstorerdt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_3962fa26, TypeST>, Enc_1a9974, AddrModeRel {
+tc_1fe4ab69, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110100110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = DoubleWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerd_rr";
let CextOpcode = "S2_storerd";
let InputType = "reg";
-let BaseOpcode = "S2_storerd_rr";
}
def S4_pstorerdtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd(#$Ii) = $Rtt32",
-tc_da4a37ed, TypeST>, Enc_50b5ac, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_50b5ac, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -22935,8 +23264,8 @@ let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerd";
let BaseOpcode = "S2_storerdabs";
+let CextOpcode = "S2_storerd";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -22948,7 +23277,7 @@ def S4_pstorerdtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
-tc_da97ee82, TypeV2LDST>, Enc_57a33e, AddrModeRel {
+tc_a2b365d2, TypeV2LDST>, Enc_57a33e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010110;
let isPredicated = 1;
@@ -22956,9 +23285,9 @@ let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerd_io";
let CextOpcode = "S2_storerd";
let InputType = "imm";
-let BaseOpcode = "S2_storerd_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -22969,22 +23298,22 @@ def S4_pstorerdtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_40116ca8, TypeST>, Enc_1a9974, AddrModeRel {
+tc_8e82e8ca, TypeST>, Enc_1a9974, AddrModeRel {
let Inst{31-21} = 0b00110110110;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = DoubleWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerd_rr";
let CextOpcode = "S2_storerd";
let InputType = "reg";
-let BaseOpcode = "S2_storerd_rr";
}
def S4_pstorerdtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
"if ($Pv4.new) memd($Rs32) = $Rtt32",
-tc_da97ee82, TypeMAPPING> {
+tc_a2b365d2, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -22992,7 +23321,7 @@ def S4_pstorerff_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh(#$Ii) = $Rt32.h",
-tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_ba9255a6, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23003,8 +23332,8 @@ let addrMode = Absolute;
let accessSize = HalfWordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerf";
let BaseOpcode = "S2_storerfabs";
+let CextOpcode = "S2_storerf";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -23016,22 +23345,22 @@ def S4_pstorerff_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
+tc_1fe4ab69, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101011;
let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storerf_rr";
let CextOpcode = "S2_storerf";
let InputType = "reg";
-let BaseOpcode = "S4_storerf_rr";
}
def S4_pstorerffnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh(#$Ii) = $Rt32.h",
-tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23043,8 +23372,8 @@ let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerf";
let BaseOpcode = "S2_storerfabs";
+let CextOpcode = "S2_storerf";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -23056,7 +23385,7 @@ def S4_pstorerffnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
-tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_a2b365d2, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110011;
let isPredicated = 1;
@@ -23065,9 +23394,9 @@ let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerf_io";
let CextOpcode = "S2_storerf";
let InputType = "imm";
-let BaseOpcode = "S2_storerf_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -23078,7 +23407,7 @@ def S4_pstorerffnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
+tc_8e82e8ca, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111011;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23086,15 +23415,15 @@ let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storerf_rr";
let CextOpcode = "S2_storerf";
let InputType = "reg";
-let BaseOpcode = "S4_storerf_rr";
}
def S4_pstorerffnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32) = $Rt32.h",
-tc_da97ee82, TypeMAPPING> {
+tc_a2b365d2, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23102,7 +23431,7 @@ def S4_pstorerft_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh(#$Ii) = $Rt32.h",
-tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_ba9255a6, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23112,8 +23441,8 @@ let addrMode = Absolute;
let accessSize = HalfWordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerf";
let BaseOpcode = "S2_storerfabs";
+let CextOpcode = "S2_storerf";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -23125,21 +23454,21 @@ def S4_pstorerft_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
+tc_1fe4ab69, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100011;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storerf_rr";
let CextOpcode = "S2_storerf";
let InputType = "reg";
-let BaseOpcode = "S4_storerf_rr";
}
def S4_pstorerftnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh(#$Ii) = $Rt32.h",
-tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23150,8 +23479,8 @@ let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerf";
let BaseOpcode = "S2_storerfabs";
+let CextOpcode = "S2_storerf";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -23163,7 +23492,7 @@ def S4_pstorerftnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
-tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_a2b365d2, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010011;
let isPredicated = 1;
@@ -23171,9 +23500,9 @@ let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerf_io";
let CextOpcode = "S2_storerf";
let InputType = "imm";
-let BaseOpcode = "S2_storerf_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -23184,22 +23513,22 @@ def S4_pstorerftnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
+tc_8e82e8ca, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110011;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storerf_rr";
let CextOpcode = "S2_storerf";
let InputType = "reg";
-let BaseOpcode = "S4_storerf_rr";
}
def S4_pstorerftnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32) = $Rt32.h",
-tc_da97ee82, TypeMAPPING> {
+tc_a2b365d2, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23207,7 +23536,7 @@ def S4_pstorerhf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh(#$Ii) = $Rt32",
-tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_ba9255a6, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23218,8 +23547,8 @@ let addrMode = Absolute;
let accessSize = HalfWordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerhabs";
+let CextOpcode = "S2_storerh";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -23232,23 +23561,23 @@ def S4_pstorerhf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
+tc_1fe4ab69, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101010;
let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_rr";
let CextOpcode = "S2_storerh";
let InputType = "reg";
-let BaseOpcode = "S2_storerh_rr";
let isNVStorable = 1;
}
def S4_pstorerhfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh(#$Ii) = $Rt32",
-tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23260,8 +23589,8 @@ let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerhabs";
+let CextOpcode = "S2_storerh";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -23274,7 +23603,7 @@ def S4_pstorerhfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32",
-tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_a2b365d2, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110010;
let isPredicated = 1;
@@ -23283,9 +23612,9 @@ let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S2_storerh_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -23297,7 +23626,7 @@ def S4_pstorerhfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
+tc_8e82e8ca, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111010;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23305,16 +23634,16 @@ let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_rr";
let CextOpcode = "S2_storerh";
let InputType = "reg";
-let BaseOpcode = "S2_storerh_rr";
let isNVStorable = 1;
}
def S4_pstorerhfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memh($Rs32) = $Rt32",
-tc_da97ee82, TypeMAPPING> {
+tc_a2b365d2, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23322,7 +23651,7 @@ def S4_pstorerhnewf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh(#$Ii) = $Nt8.new",
-tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
+tc_cfa0e29b, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b001;
@@ -23336,8 +23665,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerhabs";
+let CextOpcode = "S2_storerh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -23350,7 +23679,7 @@ def S4_pstorerhnewf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_0a6c20ae, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110101101;
let isPredicated = 1;
@@ -23361,16 +23690,16 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_rr";
let CextOpcode = "S2_storerh";
let InputType = "reg";
-let BaseOpcode = "S2_storerh_rr";
let opNewValue = 4;
}
def S4_pstorerhnewfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh(#$Ii) = $Nt8.new",
-tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
+tc_0fac1eb8, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -23385,8 +23714,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerhabs";
+let CextOpcode = "S2_storerh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -23399,7 +23728,7 @@ def S4_pstorerhnewfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
-tc_c79a189f, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_92240447, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000110101;
@@ -23412,9 +23741,9 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S2_storerh_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -23426,7 +23755,7 @@ def S4_pstorerhnewfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_829d8a86, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110111101;
let isPredicated = 1;
@@ -23438,16 +23767,16 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_rr";
let CextOpcode = "S2_storerh";
let InputType = "reg";
-let BaseOpcode = "S2_storerh_rr";
let opNewValue = 4;
}
def S4_pstorerhnewfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4.new) memh($Rs32) = $Nt8.new",
-tc_c79a189f, TypeMAPPING> {
+tc_92240447, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -23456,7 +23785,7 @@ def S4_pstorerhnewt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh(#$Ii) = $Nt8.new",
-tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
+tc_cfa0e29b, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b001;
@@ -23469,8 +23798,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerhabs";
+let CextOpcode = "S2_storerh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -23483,7 +23812,7 @@ def S4_pstorerhnewt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_0a6c20ae, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110100101;
let isPredicated = 1;
@@ -23493,16 +23822,16 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_rr";
let CextOpcode = "S2_storerh";
let InputType = "reg";
-let BaseOpcode = "S2_storerh_rr";
let opNewValue = 4;
}
def S4_pstorerhnewtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh(#$Ii) = $Nt8.new",
-tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
+tc_0fac1eb8, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b101;
@@ -23516,8 +23845,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerhabs";
+let CextOpcode = "S2_storerh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -23530,7 +23859,7 @@ def S4_pstorerhnewtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
-tc_c79a189f, TypeV2LDST>, Enc_f44229, AddrModeRel {
+tc_92240447, TypeV2LDST>, Enc_f44229, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b01000010101;
@@ -23542,9 +23871,9 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S2_storerh_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -23556,7 +23885,7 @@ def S4_pstorerhnewtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_829d8a86, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b01;
let Inst{31-21} = 0b00110110101;
let isPredicated = 1;
@@ -23567,16 +23896,16 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_rr";
let CextOpcode = "S2_storerh";
let InputType = "reg";
-let BaseOpcode = "S2_storerh_rr";
let opNewValue = 4;
}
def S4_pstorerhnewtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4.new) memh($Rs32) = $Nt8.new",
-tc_c79a189f, TypeMAPPING> {
+tc_92240447, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -23585,7 +23914,7 @@ def S4_pstorerht_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh(#$Ii) = $Rt32",
-tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_ba9255a6, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23595,8 +23924,8 @@ let addrMode = Absolute;
let accessSize = HalfWordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerhabs";
+let CextOpcode = "S2_storerh";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -23609,22 +23938,22 @@ def S4_pstorerht_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
+tc_1fe4ab69, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100010;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_rr";
let CextOpcode = "S2_storerh";
let InputType = "reg";
-let BaseOpcode = "S2_storerh_rr";
let isNVStorable = 1;
}
def S4_pstorerhtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh(#$Ii) = $Rt32",
-tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23635,8 +23964,8 @@ let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerhabs";
+let CextOpcode = "S2_storerh";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -23649,7 +23978,7 @@ def S4_pstorerhtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32",
-tc_da97ee82, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
+tc_a2b365d2, TypeV2LDST>, Enc_e8c45e, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010010;
let isPredicated = 1;
@@ -23657,9 +23986,9 @@ let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S2_storerh_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -23671,23 +24000,23 @@ def S4_pstorerhtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
+tc_8e82e8ca, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110010;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_rr";
let CextOpcode = "S2_storerh";
let InputType = "reg";
-let BaseOpcode = "S2_storerh_rr";
let isNVStorable = 1;
}
def S4_pstorerhtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memh($Rs32) = $Rt32",
-tc_da97ee82, TypeMAPPING> {
+tc_a2b365d2, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23695,7 +24024,7 @@ def S4_pstorerif_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw(#$Ii) = $Rt32",
-tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_ba9255a6, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -23706,8 +24035,8 @@ let addrMode = Absolute;
let accessSize = WordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeriabs";
+let CextOpcode = "S2_storeri";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -23720,23 +24049,23 @@ def S4_pstorerif_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
+tc_1fe4ab69, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110101100;
let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseRegOffset;
let accessSize = WordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_rr";
let CextOpcode = "S2_storeri";
let InputType = "reg";
-let BaseOpcode = "S2_storeri_rr";
let isNVStorable = 1;
}
def S4_pstorerifnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw(#$Ii) = $Rt32",
-tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -23748,8 +24077,8 @@ let accessSize = WordAccess;
let isPredicatedNew = 1;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeriabs";
+let CextOpcode = "S2_storeri";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -23762,7 +24091,7 @@ def S4_pstorerifnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rs32+#$Ii) = $Rt32",
-tc_da97ee82, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_a2b365d2, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000110100;
let isPredicated = 1;
@@ -23771,9 +24100,9 @@ let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S2_storeri_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -23785,7 +24114,7 @@ def S4_pstorerifnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
+tc_8e82e8ca, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110111100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -23793,16 +24122,16 @@ let addrMode = BaseRegOffset;
let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_rr";
let CextOpcode = "S2_storeri";
let InputType = "reg";
-let BaseOpcode = "S2_storeri_rr";
let isNVStorable = 1;
}
def S4_pstorerifnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if (!$Pv4.new) memw($Rs32) = $Rt32",
-tc_da97ee82, TypeMAPPING> {
+tc_a2b365d2, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -23810,7 +24139,7 @@ def S4_pstorerinewf_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw(#$Ii) = $Nt8.new",
-tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
+tc_cfa0e29b, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b010;
@@ -23824,8 +24153,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeriabs";
+let CextOpcode = "S2_storeri";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -23838,7 +24167,7 @@ def S4_pstorerinewf_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_0a6c20ae, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110101101;
let isPredicated = 1;
@@ -23849,16 +24178,16 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_rr";
let CextOpcode = "S2_storeri";
let InputType = "reg";
-let BaseOpcode = "S2_storeri_rr";
let opNewValue = 4;
}
def S4_pstorerinewfnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw(#$Ii) = $Nt8.new",
-tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
+tc_0fac1eb8, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b1;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -23873,8 +24202,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeriabs";
+let CextOpcode = "S2_storeri";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -23887,7 +24216,7 @@ def S4_pstorerinewfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
-tc_c79a189f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_92240447, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000110101;
@@ -23900,9 +24229,9 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S2_storeri_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -23914,7 +24243,7 @@ def S4_pstorerinewfnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_829d8a86, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110111101;
let isPredicated = 1;
@@ -23926,16 +24255,16 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_rr";
let CextOpcode = "S2_storeri";
let InputType = "reg";
-let BaseOpcode = "S2_storeri_rr";
let opNewValue = 4;
}
def S4_pstorerinewfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if (!$Pv4.new) memw($Rs32) = $Nt8.new",
-tc_c79a189f, TypeMAPPING> {
+tc_92240447, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -23944,7 +24273,7 @@ def S4_pstorerinewt_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw(#$Ii) = $Nt8.new",
-tc_4b68bce4, TypeST>, Enc_44215c, AddrModeRel {
+tc_cfa0e29b, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b010;
@@ -23957,8 +24286,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeriabs";
+let CextOpcode = "S2_storeri";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -23971,7 +24300,7 @@ def S4_pstorerinewt_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_e95795ec, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_0a6c20ae, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110100101;
let isPredicated = 1;
@@ -23981,16 +24310,16 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_rr";
let CextOpcode = "S2_storeri";
let InputType = "reg";
-let BaseOpcode = "S2_storeri_rr";
let opNewValue = 4;
}
def S4_pstorerinewtnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw(#$Ii) = $Nt8.new",
-tc_d2e63d61, TypeST>, Enc_44215c, AddrModeRel {
+tc_0fac1eb8, TypeST>, Enc_44215c, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-11} = 0b110;
@@ -24004,8 +24333,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeriabs";
+let CextOpcode = "S2_storeri";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 1;
@@ -24018,7 +24347,7 @@ def S4_pstorerinewtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
-tc_c79a189f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
+tc_92240447, TypeV2LDST>, Enc_8dbdfe, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b01000010101;
@@ -24030,9 +24359,9 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S2_storeri_io";
let isExtendable = 1;
let opExtendable = 2;
let isExtentSigned = 0;
@@ -24044,7 +24373,7 @@ def S4_pstorerinewtnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_b90a29b1, TypeST>, Enc_47ee5e, AddrModeRel {
+tc_829d8a86, TypeST>, Enc_47ee5e, AddrModeRel {
let Inst{4-3} = 0b10;
let Inst{31-21} = 0b00110110101;
let isPredicated = 1;
@@ -24055,16 +24384,16 @@ let isPredicatedNew = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_rr";
let CextOpcode = "S2_storeri";
let InputType = "reg";
-let BaseOpcode = "S2_storeri_rr";
let opNewValue = 4;
}
def S4_pstorerinewtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
"if ($Pv4.new) memw($Rs32) = $Nt8.new",
-tc_c79a189f, TypeMAPPING> {
+tc_92240447, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let opNewValue = 2;
@@ -24073,7 +24402,7 @@ def S4_pstorerit_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw(#$Ii) = $Rt32",
-tc_362c6592, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_ba9255a6, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
@@ -24083,8 +24412,8 @@ let addrMode = Absolute;
let accessSize = WordAccess;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeriabs";
+let CextOpcode = "S2_storeri";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -24097,22 +24426,22 @@ def S4_pstorerit_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_3962fa26, TypeST>, Enc_6339d5, AddrModeRel {
+tc_1fe4ab69, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110100100;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = WordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_rr";
let CextOpcode = "S2_storeri";
let InputType = "reg";
-let BaseOpcode = "S2_storeri_rr";
let isNVStorable = 1;
}
def S4_pstoreritnew_abs : HInst<
(outs),
(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw(#$Ii) = $Rt32",
-tc_da4a37ed, TypeST>, Enc_1cf4ca, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_1cf4ca, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
@@ -24123,8 +24452,8 @@ let accessSize = WordAccess;
let isPredicatedNew = 1;
let isExtended = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeriabs";
+let CextOpcode = "S2_storeri";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -24137,7 +24466,7 @@ def S4_pstoreritnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rs32+#$Ii) = $Rt32",
-tc_da97ee82, TypeV2LDST>, Enc_397f23, AddrModeRel {
+tc_a2b365d2, TypeV2LDST>, Enc_397f23, AddrModeRel {
let Inst{2-2} = 0b0;
let Inst{31-21} = 0b01000010100;
let isPredicated = 1;
@@ -24145,9 +24474,9 @@ let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S2_storeri_io";
let isNVStorable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -24159,23 +24488,23 @@ def S4_pstoreritnew_rr : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_40116ca8, TypeST>, Enc_6339d5, AddrModeRel {
+tc_8e82e8ca, TypeST>, Enc_6339d5, AddrModeRel {
let Inst{31-21} = 0b00110110100;
let isPredicated = 1;
let addrMode = BaseRegOffset;
let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_rr";
let CextOpcode = "S2_storeri";
let InputType = "reg";
-let BaseOpcode = "S2_storeri_rr";
let isNVStorable = 1;
}
def S4_pstoreritnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
"if ($Pv4.new) memw($Rs32) = $Rt32",
-tc_da97ee82, TypeMAPPING> {
+tc_a2b365d2, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24183,7 +24512,7 @@ def S4_stored_locked : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"memd_locked($Rs32,$Pd4) = $Rtt32",
-tc_5abb5e3f, TypeST>, Enc_d7dc10 {
+tc_6f42bc60, TypeST>, Enc_d7dc10 {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100000111;
@@ -24196,14 +24525,14 @@ def S4_storeirb_io : HInst<
(outs),
(ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"memb($Rs32+#$Ii) = #$II",
-tc_b83e6d73, TypeST>, Enc_8203bb, PredNewRel {
+tc_7c31e19a, TypeST>, Enc_8203bb, PredNewRel {
let Inst{31-21} = 0b00111100000;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storeirb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S4_storeirb_io";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -24215,7 +24544,7 @@ def S4_storeirb_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, s8_0Imm:$II),
"memb($Rs32) = #$II",
-tc_b83e6d73, TypeMAPPING> {
+tc_7c31e19a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24223,16 +24552,16 @@ def S4_storeirbf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4) memb($Rs32+#$Ii) = #$II",
-tc_0b2be201, TypeST>, Enc_d7a65e, PredNewRel {
+tc_d03278fd, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111000100;
let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storeirb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S4_storeirb_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24243,7 +24572,7 @@ def S4_storeirbf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4) memb($Rs32) = #$II",
-tc_0b2be201, TypeMAPPING> {
+tc_d03278fd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24251,7 +24580,7 @@ def S4_storeirbfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4.new) memb($Rs32+#$Ii) = #$II",
-tc_c4f596e3, TypeST>, Enc_d7a65e, PredNewRel {
+tc_65cbd974, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111001100;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24259,9 +24588,9 @@ let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storeirb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S4_storeirb_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24272,7 +24601,7 @@ def S4_storeirbfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4.new) memb($Rs32) = #$II",
-tc_c4f596e3, TypeMAPPING> {
+tc_65cbd974, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24280,15 +24609,15 @@ def S4_storeirbt_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if ($Pv4) memb($Rs32+#$Ii) = #$II",
-tc_0b2be201, TypeST>, Enc_d7a65e, PredNewRel {
+tc_d03278fd, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111000000;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storeirb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S4_storeirb_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24299,7 +24628,7 @@ def S4_storeirbt_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4) memb($Rs32) = #$II",
-tc_0b2be201, TypeMAPPING> {
+tc_d03278fd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24307,16 +24636,16 @@ def S4_storeirbtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
"if ($Pv4.new) memb($Rs32+#$Ii) = #$II",
-tc_c4f596e3, TypeST>, Enc_d7a65e, PredNewRel {
+tc_65cbd974, TypeST>, Enc_d7a65e, PredNewRel {
let Inst{31-21} = 0b00111001000;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storeirb_io";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S4_storeirb_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24327,7 +24656,7 @@ def S4_storeirbtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4.new) memb($Rs32) = #$II",
-tc_c4f596e3, TypeMAPPING> {
+tc_65cbd974, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24335,14 +24664,14 @@ def S4_storeirh_io : HInst<
(outs),
(ins IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"memh($Rs32+#$Ii) = #$II",
-tc_b83e6d73, TypeST>, Enc_a803e0, PredNewRel {
+tc_7c31e19a, TypeST>, Enc_a803e0, PredNewRel {
let Inst{31-21} = 0b00111100001;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storeirh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S4_storeirh_io";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -24354,7 +24683,7 @@ def S4_storeirh_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, s8_0Imm:$II),
"memh($Rs32) = #$II",
-tc_b83e6d73, TypeMAPPING> {
+tc_7c31e19a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24362,16 +24691,16 @@ def S4_storeirhf_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4) memh($Rs32+#$Ii) = #$II",
-tc_0b2be201, TypeST>, Enc_f20719, PredNewRel {
+tc_d03278fd, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111000101;
let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storeirh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S4_storeirh_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24382,7 +24711,7 @@ def S4_storeirhf_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4) memh($Rs32) = #$II",
-tc_0b2be201, TypeMAPPING> {
+tc_d03278fd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24390,7 +24719,7 @@ def S4_storeirhfnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4.new) memh($Rs32+#$Ii) = #$II",
-tc_c4f596e3, TypeST>, Enc_f20719, PredNewRel {
+tc_65cbd974, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111001101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24398,9 +24727,9 @@ let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storeirh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S4_storeirh_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24411,7 +24740,7 @@ def S4_storeirhfnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4.new) memh($Rs32) = #$II",
-tc_c4f596e3, TypeMAPPING> {
+tc_65cbd974, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24419,15 +24748,15 @@ def S4_storeirht_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if ($Pv4) memh($Rs32+#$Ii) = #$II",
-tc_0b2be201, TypeST>, Enc_f20719, PredNewRel {
+tc_d03278fd, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111000001;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storeirh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S4_storeirh_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24438,7 +24767,7 @@ def S4_storeirht_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4) memh($Rs32) = #$II",
-tc_0b2be201, TypeMAPPING> {
+tc_d03278fd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24446,16 +24775,16 @@ def S4_storeirhtnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
"if ($Pv4.new) memh($Rs32+#$Ii) = #$II",
-tc_c4f596e3, TypeST>, Enc_f20719, PredNewRel {
+tc_65cbd974, TypeST>, Enc_f20719, PredNewRel {
let Inst{31-21} = 0b00111001001;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storeirh_io";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S4_storeirh_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24466,7 +24795,7 @@ def S4_storeirhtnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4.new) memh($Rs32) = #$II",
-tc_c4f596e3, TypeMAPPING> {
+tc_65cbd974, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24474,14 +24803,14 @@ def S4_storeiri_io : HInst<
(outs),
(ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"memw($Rs32+#$Ii) = #$II",
-tc_b83e6d73, TypeST>, Enc_f37377, PredNewRel {
+tc_7c31e19a, TypeST>, Enc_f37377, PredNewRel {
let Inst{31-21} = 0b00111100010;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storeiri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S4_storeiri_io";
let isPredicable = 1;
let isExtendable = 1;
let opExtendable = 2;
@@ -24493,7 +24822,7 @@ def S4_storeiri_zomap : HInst<
(outs),
(ins IntRegs:$Rs32, s8_0Imm:$II),
"memw($Rs32) = #$II",
-tc_b83e6d73, TypeMAPPING> {
+tc_7c31e19a, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24501,16 +24830,16 @@ def S4_storeirif_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4) memw($Rs32+#$Ii) = #$II",
-tc_0b2be201, TypeST>, Enc_5ccba9, PredNewRel {
+tc_d03278fd, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111000110;
let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storeiri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S4_storeiri_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24521,7 +24850,7 @@ def S4_storeirif_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4) memw($Rs32) = #$II",
-tc_0b2be201, TypeMAPPING> {
+tc_d03278fd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24529,7 +24858,7 @@ def S4_storeirifnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if (!$Pv4.new) memw($Rs32+#$Ii) = #$II",
-tc_c4f596e3, TypeST>, Enc_5ccba9, PredNewRel {
+tc_65cbd974, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111001110;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -24537,9 +24866,9 @@ let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storeiri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S4_storeiri_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24550,7 +24879,7 @@ def S4_storeirifnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if (!$Pv4.new) memw($Rs32) = #$II",
-tc_c4f596e3, TypeMAPPING> {
+tc_65cbd974, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24558,15 +24887,15 @@ def S4_storeirit_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if ($Pv4) memw($Rs32+#$Ii) = #$II",
-tc_0b2be201, TypeST>, Enc_5ccba9, PredNewRel {
+tc_d03278fd, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111000010;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storeiri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S4_storeiri_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24577,7 +24906,7 @@ def S4_storeirit_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4) memw($Rs32) = #$II",
-tc_0b2be201, TypeMAPPING> {
+tc_d03278fd, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24585,16 +24914,16 @@ def S4_storeiritnew_io : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
"if ($Pv4.new) memw($Rs32+#$Ii) = #$II",
-tc_c4f596e3, TypeST>, Enc_5ccba9, PredNewRel {
+tc_65cbd974, TypeST>, Enc_5ccba9, PredNewRel {
let Inst{31-21} = 0b00111001010;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
let isPredicatedNew = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storeiri_io";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S4_storeiri_io";
let isExtendable = 1;
let opExtendable = 3;
let isExtentSigned = 1;
@@ -24605,7 +24934,7 @@ def S4_storeiritnew_zomap : HInst<
(outs),
(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
"if ($Pv4.new) memw($Rs32) = #$II",
-tc_c4f596e3, TypeMAPPING> {
+tc_65cbd974, TypeMAPPING> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -24613,7 +24942,7 @@ def S4_storerb_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memb($Re32=#$II) = $Rt32",
-tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_8bcba4, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011000;
@@ -24634,15 +24963,15 @@ def S4_storerb_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memb($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_280f7fe1, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011000;
let addrMode = BaseRegOffset;
let accessSize = ByteAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storerb_rr";
let CextOpcode = "S2_storerb";
let InputType = "reg";
-let BaseOpcode = "S4_storerb_rr";
let isNVStorable = 1;
let isPredicable = 1;
}
@@ -24650,16 +24979,16 @@ def S4_storerb_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memb($Ru32<<#$Ii+#$II) = $Rt32",
-tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_887d1bb7, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101000;
let addrMode = BaseLongOffset;
let accessSize = ByteAccess;
let isExtended = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storerb_ur";
let CextOpcode = "S2_storerb";
let InputType = "imm";
-let BaseOpcode = "S4_storerb_ur";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -24672,7 +25001,7 @@ def S4_storerbnew_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Nt8),
"memb($Re32=#$II) = $Nt8.new",
-tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
+tc_0fac1eb8, TypeST>, Enc_724154, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b10101011101;
@@ -24696,7 +25025,7 @@ def S4_storerbnew_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
+tc_96ef76ef, TypeST>, Enc_c6220b, AddrModeRel {
let Inst{6-3} = 0b0000;
let Inst{31-21} = 0b00111011101;
let addrMode = BaseRegOffset;
@@ -24705,9 +25034,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storerb_rr";
let CextOpcode = "S2_storerb";
let InputType = "reg";
-let BaseOpcode = "S4_storerb_rr";
let isPredicable = 1;
let opNewValue = 3;
}
@@ -24715,7 +25044,7 @@ def S4_storerbnew_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
"memb($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
+tc_55a9a350, TypeST>, Enc_7eb485, AddrModeRel {
let Inst{7-7} = 0b1;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b10101101101;
@@ -24726,8 +25055,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerb";
let BaseOpcode = "S4_storerb_ur";
+let CextOpcode = "S2_storerb";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -24740,7 +25069,7 @@ def S4_storerd_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, DoubleRegs:$Rtt32),
"memd($Re32=#$II) = $Rtt32",
-tc_da4a37ed, TypeST>, Enc_c7a204 {
+tc_bb07f2c5, TypeST>, Enc_c7a204 {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011110;
@@ -24760,31 +25089,31 @@ def S4_storerd_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
"memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
-tc_5aee39f7, TypeST>, Enc_55355c, AddrModeRel, ImmRegShl {
+tc_280f7fe1, TypeST>, Enc_55355c, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011110;
let addrMode = BaseRegOffset;
let accessSize = DoubleWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerd_rr";
let CextOpcode = "S2_storerd";
let InputType = "reg";
-let BaseOpcode = "S2_storerd_rr";
let isPredicable = 1;
}
def S4_storerd_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, DoubleRegs:$Rtt32),
"memd($Ru32<<#$Ii+#$II) = $Rtt32",
-tc_14b272fa, TypeST>, Enc_f79415, AddrModeRel, ImmRegShl {
+tc_887d1bb7, TypeST>, Enc_f79415, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101110;
let addrMode = BaseLongOffset;
let accessSize = DoubleWordAccess;
let isExtended = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerd_ur";
let CextOpcode = "S2_storerd";
let InputType = "imm";
-let BaseOpcode = "S2_storerd_ur";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -24796,7 +25125,7 @@ def S4_storerf_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memh($Re32=#$II) = $Rt32.h",
-tc_da4a37ed, TypeST>, Enc_8bcba4 {
+tc_bb07f2c5, TypeST>, Enc_8bcba4 {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011011;
@@ -24816,31 +25145,31 @@ def S4_storerf_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
-tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_280f7fe1, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011011;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S4_storerf_rr";
let CextOpcode = "S2_storerf";
let InputType = "reg";
-let BaseOpcode = "S4_storerf_rr";
let isPredicable = 1;
}
def S4_storerf_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memh($Ru32<<#$Ii+#$II) = $Rt32.h",
-tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_887d1bb7, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101011;
let addrMode = BaseLongOffset;
let accessSize = HalfWordAccess;
let isExtended = 1;
let mayStore = 1;
+let BaseOpcode = "S4_storerf_rr";
let CextOpcode = "S2_storerf";
let InputType = "imm";
-let BaseOpcode = "S4_storerf_rr";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -24852,7 +25181,7 @@ def S4_storerh_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memh($Re32=#$II) = $Rt32",
-tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_8bcba4, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011010;
@@ -24873,15 +25202,15 @@ def S4_storerh_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memh($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_280f7fe1, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011010;
let addrMode = BaseRegOffset;
let accessSize = HalfWordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_rr";
let CextOpcode = "S2_storerh";
let InputType = "reg";
-let BaseOpcode = "S2_storerh_rr";
let isNVStorable = 1;
let isPredicable = 1;
}
@@ -24889,16 +25218,16 @@ def S4_storerh_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memh($Ru32<<#$Ii+#$II) = $Rt32",
-tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_887d1bb7, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101010;
let addrMode = BaseLongOffset;
let accessSize = HalfWordAccess;
let isExtended = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_ur";
let CextOpcode = "S2_storerh";
let InputType = "imm";
-let BaseOpcode = "S2_storerh_ur";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -24911,7 +25240,7 @@ def S4_storerhnew_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Nt8),
"memh($Re32=#$II) = $Nt8.new",
-tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
+tc_0fac1eb8, TypeST>, Enc_724154, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-11} = 0b001;
let Inst{31-21} = 0b10101011101;
@@ -24935,7 +25264,7 @@ def S4_storerhnew_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
+tc_96ef76ef, TypeST>, Enc_c6220b, AddrModeRel {
let Inst{6-3} = 0b0001;
let Inst{31-21} = 0b00111011101;
let addrMode = BaseRegOffset;
@@ -24944,9 +25273,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storerh_rr";
let CextOpcode = "S2_storerh";
let InputType = "reg";
-let BaseOpcode = "S2_storerh_rr";
let isPredicable = 1;
let opNewValue = 3;
}
@@ -24954,7 +25283,7 @@ def S4_storerhnew_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
"memh($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
+tc_55a9a350, TypeST>, Enc_7eb485, AddrModeRel {
let Inst{7-7} = 0b1;
let Inst{12-11} = 0b01;
let Inst{31-21} = 0b10101101101;
@@ -24965,8 +25294,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storerh";
let BaseOpcode = "S2_storerh_ur";
+let CextOpcode = "S2_storerh";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -24979,7 +25308,7 @@ def S4_storeri_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Rt32),
"memw($Re32=#$II) = $Rt32",
-tc_da4a37ed, TypeST>, Enc_8bcba4, AddrModeRel {
+tc_bb07f2c5, TypeST>, Enc_8bcba4, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10101011100;
@@ -25000,15 +25329,15 @@ def S4_storeri_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
"memw($Rs32+$Ru32<<#$Ii) = $Rt32",
-tc_5aee39f7, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
+tc_280f7fe1, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl {
let Inst{6-5} = 0b00;
let Inst{31-21} = 0b00111011100;
let addrMode = BaseRegOffset;
let accessSize = WordAccess;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_rr";
let CextOpcode = "S2_storeri";
let InputType = "reg";
-let BaseOpcode = "S2_storeri_rr";
let isNVStorable = 1;
let isPredicable = 1;
}
@@ -25016,16 +25345,16 @@ def S4_storeri_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
"memw($Ru32<<#$Ii+#$II) = $Rt32",
-tc_14b272fa, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
+tc_887d1bb7, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b10101101100;
let addrMode = BaseLongOffset;
let accessSize = WordAccess;
let isExtended = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_ur";
let CextOpcode = "S2_storeri";
let InputType = "imm";
-let BaseOpcode = "S2_storeri_ur";
let isNVStorable = 1;
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
@@ -25038,7 +25367,7 @@ def S4_storerinew_ap : HInst<
(outs IntRegs:$Re32),
(ins u32_0Imm:$II, IntRegs:$Nt8),
"memw($Re32=#$II) = $Nt8.new",
-tc_d2e63d61, TypeST>, Enc_724154, AddrModeRel {
+tc_0fac1eb8, TypeST>, Enc_724154, AddrModeRel {
let Inst{7-6} = 0b10;
let Inst{13-11} = 0b010;
let Inst{31-21} = 0b10101011101;
@@ -25062,7 +25391,7 @@ def S4_storerinew_rr : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
"memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
-tc_67435e81, TypeST>, Enc_c6220b, AddrModeRel {
+tc_96ef76ef, TypeST>, Enc_c6220b, AddrModeRel {
let Inst{6-3} = 0b0010;
let Inst{31-21} = 0b00111011101;
let addrMode = BaseRegOffset;
@@ -25071,9 +25400,9 @@ let isNVStore = 1;
let isNewValue = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
+let BaseOpcode = "S2_storeri_rr";
let CextOpcode = "S2_storeri";
let InputType = "reg";
-let BaseOpcode = "S2_storeri_rr";
let isPredicable = 1;
let opNewValue = 3;
}
@@ -25081,7 +25410,7 @@ def S4_storerinew_ur : HInst<
(outs),
(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
"memw($Ru32<<#$Ii+#$II) = $Nt8.new",
-tc_fcc3ddf9, TypeST>, Enc_7eb485, AddrModeRel {
+tc_55a9a350, TypeST>, Enc_7eb485, AddrModeRel {
let Inst{7-7} = 0b1;
let Inst{12-11} = 0b10;
let Inst{31-21} = 0b10101101101;
@@ -25092,8 +25421,8 @@ let isNewValue = 1;
let isExtended = 1;
let isRestrictNoSlot1Store = 1;
let mayStore = 1;
-let CextOpcode = "S2_storeri";
let BaseOpcode = "S2_storeri_ur";
+let CextOpcode = "S2_storeri";
let DecoderNamespace = "MustExtend";
let isExtendable = 1;
let opExtendable = 2;
@@ -25106,7 +25435,7 @@ def S4_subaddi : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Ru32),
"$Rd32 = add($Rs32,sub(#$Ii,$Ru32))",
-tc_f675fee8, TypeALU64>, Enc_8b8d61 {
+tc_2c13e7f5, TypeALU64>, Enc_8b8d61, Requires<[UseCompound]> {
let Inst{31-23} = 0b110110111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25121,7 +25450,7 @@ def S4_subi_asl_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = sub(#$Ii,asl($Rx32in,#$II))",
-tc_f675fee8, TypeALU64>, Enc_c31910 {
+tc_2c13e7f5, TypeALU64>, Enc_c31910, Requires<[UseCompound]> {
let Inst{2-0} = 0b110;
let Inst{4-4} = 0b0;
let Inst{31-24} = 0b11011110;
@@ -25139,7 +25468,7 @@ def S4_subi_lsr_ri : HInst<
(outs IntRegs:$Rx32),
(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
"$Rx32 = sub(#$Ii,lsr($Rx32in,#$II))",
-tc_f675fee8, TypeALU64>, Enc_c31910 {
+tc_2c13e7f5, TypeALU64>, Enc_c31910, Requires<[UseCompound]> {
let Inst{2-0} = 0b110;
let Inst{4-4} = 0b1;
let Inst{31-24} = 0b11011110;
@@ -25157,7 +25486,7 @@ def S4_vrcrotate : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rdd32 = vrcrotate($Rss32,$Rt32,#$Ii)",
-tc_13bfbcf9, TypeS_3op>, Enc_645d54 {
+tc_f0cdeccf, TypeS_3op>, Enc_645d54 {
let Inst{7-6} = 0b11;
let Inst{31-21} = 0b11000011110;
let prefersSlot3 = 1;
@@ -25166,7 +25495,7 @@ def S4_vrcrotate_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
"$Rxx32 += vrcrotate($Rss32,$Rt32,#$Ii)",
-tc_9debc299, TypeS_3op>, Enc_b72622 {
+tc_a38c45dc, TypeS_3op>, Enc_b72622 {
let Inst{7-6} = 0b00;
let Inst{31-21} = 0b11001011101;
let prefersSlot3 = 1;
@@ -25176,7 +25505,7 @@ def S4_vxaddsubh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxaddsubh($Rss32,$Rtt32):sat",
-tc_779080bf, TypeS_3op>, Enc_a56825 {
+tc_8a825db2, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25187,7 +25516,7 @@ def S4_vxaddsubhr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxaddsubh($Rss32,$Rtt32):rnd:>>1:sat",
-tc_002cb246, TypeS_3op>, Enc_a56825 {
+tc_0dfac0a7, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -25198,7 +25527,7 @@ def S4_vxaddsubw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxaddsubw($Rss32,$Rtt32):sat",
-tc_779080bf, TypeS_3op>, Enc_a56825 {
+tc_8a825db2, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25209,7 +25538,7 @@ def S4_vxsubaddh : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxsubaddh($Rss32,$Rtt32):sat",
-tc_779080bf, TypeS_3op>, Enc_a56825 {
+tc_8a825db2, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25220,7 +25549,7 @@ def S4_vxsubaddhr : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxsubaddh($Rss32,$Rtt32):rnd:>>1:sat",
-tc_002cb246, TypeS_3op>, Enc_a56825 {
+tc_0dfac0a7, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001110;
@@ -25231,7 +25560,7 @@ def S4_vxsubaddw : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vxsubaddw($Rss32,$Rtt32):sat",
-tc_779080bf, TypeS_3op>, Enc_a56825 {
+tc_8a825db2, TypeS_3op>, Enc_a56825 {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -25242,7 +25571,7 @@ def S5_asrhub_rnd_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):raw",
-tc_002cb246, TypeS_2op>, Enc_11a146 {
+tc_0dfac0a7, TypeS_2op>, Enc_11a146 {
let Inst{7-5} = 0b100;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10001000011;
@@ -25255,7 +25584,7 @@ def S5_asrhub_rnd_sat_goodsyntax : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat",
-tc_002cb246, TypeS_2op> {
+tc_0dfac0a7, TypeS_2op> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -25264,7 +25593,7 @@ def S5_asrhub_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):sat",
-tc_002cb246, TypeS_2op>, Enc_11a146 {
+tc_0dfac0a7, TypeS_2op>, Enc_11a146 {
let Inst{7-5} = 0b101;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10001000011;
@@ -25277,7 +25606,7 @@ def S5_popcountp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = popcount($Rss32)",
-tc_703e822c, TypeS_2op>, Enc_90cd8b {
+tc_d3632d88, TypeS_2op>, Enc_90cd8b {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -25288,7 +25617,7 @@ def S5_vasrhrnd : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii):raw",
-tc_002cb246, TypeS_2op>, Enc_12b6e9 {
+tc_0dfac0a7, TypeS_2op>, Enc_12b6e9 {
let Inst{7-5} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000001;
@@ -25298,14 +25627,14 @@ def S5_vasrhrnd_goodsyntax : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii):rnd",
-tc_002cb246, TypeS_2op> {
+tc_0dfac0a7, TypeS_2op> {
let isPseudo = 1;
}
def S6_allocframe_to_raw : HInst<
(outs),
(ins u11_3Imm:$Ii),
"allocframe(#$Ii)",
-tc_b44ecf75, TypeMAPPING>, Requires<[HasV65]> {
+tc_934753bb, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -25313,7 +25642,7 @@ def S6_rol_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = rol($Rss32,#$Ii)",
-tc_1fc97744, TypeS_2op>, Enc_5eac98, Requires<[HasV60]> {
+tc_407e96f9, TypeS_2op>, Enc_5eac98, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000000000;
}
@@ -25321,7 +25650,7 @@ def S6_rol_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += rol($Rss32,#$Ii)",
-tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_5e4cf0e8, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -25331,7 +25660,7 @@ def S6_rol_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= rol($Rss32,#$Ii)",
-tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_5e4cf0e8, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -25341,7 +25670,7 @@ def S6_rol_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= rol($Rss32,#$Ii)",
-tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_5e4cf0e8, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -25351,7 +25680,7 @@ def S6_rol_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= rol($Rss32,#$Ii)",
-tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_5e4cf0e8, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -25361,7 +25690,7 @@ def S6_rol_i_p_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 ^= rol($Rss32,#$Ii)",
-tc_784490da, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
+tc_5e4cf0e8, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010100;
let prefersSlot3 = 1;
@@ -25371,7 +25700,7 @@ def S6_rol_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = rol($Rs32,#$Ii)",
-tc_1fc97744, TypeS_2op>, Enc_a05677, Requires<[HasV60]> {
+tc_407e96f9, TypeS_2op>, Enc_a05677, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -25382,7 +25711,7 @@ def S6_rol_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += rol($Rs32,#$Ii)",
-tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_5e4cf0e8, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -25395,7 +25724,7 @@ def S6_rol_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= rol($Rs32,#$Ii)",
-tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_5e4cf0e8, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -25408,7 +25737,7 @@ def S6_rol_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= rol($Rs32,#$Ii)",
-tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_5e4cf0e8, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -25421,7 +25750,7 @@ def S6_rol_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= rol($Rs32,#$Ii)",
-tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_5e4cf0e8, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -25434,7 +25763,7 @@ def S6_rol_i_r_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 ^= rol($Rs32,#$Ii)",
-tc_784490da, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
+tc_5e4cf0e8, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110100;
@@ -25447,7 +25776,7 @@ def S6_vsplatrbp : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsplatb($Rs32)",
-tc_a1c00888, TypeS_2op>, Enc_3a3d62, Requires<[HasV62]> {
+tc_ef921005, TypeS_2op>, Enc_3a3d62, Requires<[HasV62]> {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100010;
}
@@ -25455,7 +25784,7 @@ def S6_vtrunehb_ppp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunehb($Rss32,$Rtt32)",
-tc_1fc97744, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
+tc_407e96f9, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -25464,7 +25793,7 @@ def S6_vtrunohb_ppp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunohb($Rss32,$Rtt32)",
-tc_1fc97744, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
+tc_407e96f9, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -25473,7 +25802,7 @@ def SA1_addi : HInst<
(outs GeneralSubRegs:$Rx16),
(ins IntRegs:$Rx16in, s32_0Imm:$Ii),
"$Rx16 = add($Rx16in,#$Ii)",
-tc_0a705168, TypeSUBINSN>, Enc_93af4c {
+tc_5b347363, TypeSUBINSN>, Enc_93af4c {
let Inst{12-11} = 0b00;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25490,7 +25819,7 @@ def SA1_addrx : HInst<
(outs GeneralSubRegs:$Rx16),
(ins IntRegs:$Rx16in, GeneralSubRegs:$Rs16),
"$Rx16 = add($Rx16in,$Rs16)",
-tc_0a705168, TypeSUBINSN>, Enc_0527db {
+tc_5b347363, TypeSUBINSN>, Enc_0527db {
let Inst{12-8} = 0b11000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25502,7 +25831,7 @@ def SA1_addsp : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u6_2Imm:$Ii),
"$Rd16 = add(r29,#$Ii)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_2df31d {
+tc_3d14a17b, TypeSUBINSN>, Enc_2df31d {
let Inst{12-10} = 0b011;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25514,7 +25843,7 @@ def SA1_and1 : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = and($Rs16,#1)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
+tc_3d14a17b, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25525,7 +25854,7 @@ def SA1_clrf : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (!p0) $Rd16 = #0",
-tc_a1123dda, TypeSUBINSN>, Enc_1f5ba6 {
+tc_3fbf1042, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100111;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25539,7 +25868,7 @@ def SA1_clrfnew : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (!p0.new) $Rd16 = #0",
-tc_8b3e402a, TypeSUBINSN>, Enc_1f5ba6 {
+tc_63567288, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25554,7 +25883,7 @@ def SA1_clrt : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (p0) $Rd16 = #0",
-tc_a1123dda, TypeSUBINSN>, Enc_1f5ba6 {
+tc_3fbf1042, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100110;
let isPredicated = 1;
let hasNewValue = 1;
@@ -25567,7 +25896,7 @@ def SA1_clrtnew : HInst<
(outs GeneralSubRegs:$Rd16),
(ins),
"if (p0.new) $Rd16 = #0",
-tc_8b3e402a, TypeSUBINSN>, Enc_1f5ba6 {
+tc_63567288, TypeSUBINSN>, Enc_1f5ba6 {
let Inst{12-4} = 0b110100100;
let isPredicated = 1;
let hasNewValue = 1;
@@ -25581,7 +25910,7 @@ def SA1_cmpeqi : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u2_0Imm:$Ii),
"p0 = cmp.eq($Rs16,#$Ii)",
-tc_5b7c0967, TypeSUBINSN>, Enc_63eaeb {
+tc_59a7822c, TypeSUBINSN>, Enc_63eaeb {
let Inst{3-2} = 0b00;
let Inst{12-8} = 0b11001;
let AsmVariantName = "NonParsable";
@@ -25592,7 +25921,7 @@ def SA1_combine0i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#0,#$Ii)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
+tc_3d14a17b, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b00;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25604,7 +25933,7 @@ def SA1_combine1i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#1,#$Ii)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
+tc_3d14a17b, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b01;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25616,7 +25945,7 @@ def SA1_combine2i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#2,#$Ii)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
+tc_3d14a17b, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b10;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25628,7 +25957,7 @@ def SA1_combine3i : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u2_0Imm:$Ii),
"$Rdd8 = combine(#3,#$Ii)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_ed48be {
+tc_3d14a17b, TypeSUBINSN>, Enc_ed48be {
let Inst{4-3} = 0b11;
let Inst{12-7} = 0b111000;
let hasNewValue = 1;
@@ -25640,7 +25969,7 @@ def SA1_combinerz : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins GeneralSubRegs:$Rs16),
"$Rdd8 = combine($Rs16,#0)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_399e12 {
+tc_3d14a17b, TypeSUBINSN>, Enc_399e12 {
let Inst{3-3} = 0b1;
let Inst{12-8} = 0b11101;
let hasNewValue = 1;
@@ -25652,7 +25981,7 @@ def SA1_combinezr : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins GeneralSubRegs:$Rs16),
"$Rdd8 = combine(#0,$Rs16)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_399e12 {
+tc_3d14a17b, TypeSUBINSN>, Enc_399e12 {
let Inst{3-3} = 0b0;
let Inst{12-8} = 0b11101;
let hasNewValue = 1;
@@ -25664,7 +25993,7 @@ def SA1_dec : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, n1Const:$n1),
"$Rd16 = add($Rs16,#$n1)",
-tc_0a705168, TypeSUBINSN>, Enc_ee5ed0 {
+tc_5b347363, TypeSUBINSN>, Enc_ee5ed0 {
let Inst{12-8} = 0b10011;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25675,7 +26004,7 @@ def SA1_inc : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = add($Rs16,#1)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
+tc_3d14a17b, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10001;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25686,7 +26015,7 @@ def SA1_seti : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u32_0Imm:$Ii),
"$Rd16 = #$Ii",
-tc_9fc3dae0, TypeSUBINSN>, Enc_e39bb2 {
+tc_3d14a17b, TypeSUBINSN>, Enc_e39bb2 {
let Inst{12-10} = 0b010;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25702,7 +26031,7 @@ def SA1_setin1 : HInst<
(outs GeneralSubRegs:$Rd16),
(ins n1Const:$n1),
"$Rd16 = #$n1",
-tc_9fc3dae0, TypeSUBINSN>, Enc_7a0ea6 {
+tc_3d14a17b, TypeSUBINSN>, Enc_7a0ea6 {
let Inst{12-4} = 0b110100000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25713,7 +26042,7 @@ def SA1_sxtb : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = sxtb($Rs16)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
+tc_3d14a17b, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10101;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25724,7 +26053,7 @@ def SA1_sxth : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = sxth($Rs16)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
+tc_3d14a17b, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10100;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25735,7 +26064,7 @@ def SA1_tfr : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = $Rs16",
-tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
+tc_3d14a17b, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10000;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25746,7 +26075,7 @@ def SA1_zxtb : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = and($Rs16,#255)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
+tc_3d14a17b, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10111;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25757,7 +26086,7 @@ def SA1_zxth : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16),
"$Rd16 = zxth($Rs16)",
-tc_9fc3dae0, TypeSUBINSN>, Enc_97d666 {
+tc_3d14a17b, TypeSUBINSN>, Enc_97d666 {
let Inst{12-8} = 0b10110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25768,7 +26097,7 @@ def SL1_loadri_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
"$Rd16 = memw($Rs16+#$Ii)",
-tc_17e0d2cd, TypeSUBINSN>, Enc_53dca9 {
+tc_4222e6bf, TypeSUBINSN>, Enc_53dca9 {
let Inst{12-12} = 0b0;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25782,7 +26111,7 @@ def SL1_loadrub_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
"$Rd16 = memub($Rs16+#$Ii)",
-tc_17e0d2cd, TypeSUBINSN>, Enc_c175d0 {
+tc_4222e6bf, TypeSUBINSN>, Enc_c175d0 {
let Inst{12-12} = 0b1;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25796,20 +26125,20 @@ def SL2_deallocframe : HInst<
(outs),
(ins),
"deallocframe",
-tc_39dfefe8, TypeSUBINSN>, Enc_e3b0c4 {
+tc_937dd41c, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111100000000;
let accessSize = DoubleWordAccess;
let AsmVariantName = "NonParsable";
let mayLoad = 1;
let Uses = [FRAMEKEY, R30];
-let Defs = [R30, R29, R31];
+let Defs = [R29, R30, R31];
let DecoderNamespace = "SUBINSN_L2";
}
def SL2_jumpr31 : HInst<
(outs),
(ins),
"jumpr r31",
-tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
+tc_a4ee89db, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000000;
let isTerminator = 1;
let isIndirectBranch = 1;
@@ -25824,7 +26153,7 @@ def SL2_jumpr31_f : HInst<
(outs),
(ins),
"if (!p0) jumpr r31",
-tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
+tc_a4ee89db, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25842,7 +26171,7 @@ def SL2_jumpr31_fnew : HInst<
(outs),
(ins),
"if (!p0.new) jumpr:nt r31",
-tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
+tc_a4ee89db, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000111;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25861,7 +26190,7 @@ def SL2_jumpr31_t : HInst<
(outs),
(ins),
"if (p0) jumpr r31",
-tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
+tc_a4ee89db, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000100;
let isPredicated = 1;
let isTerminator = 1;
@@ -25878,7 +26207,7 @@ def SL2_jumpr31_tnew : HInst<
(outs),
(ins),
"if (p0.new) jumpr:nt r31",
-tc_b4407292, TypeSUBINSN>, Enc_e3b0c4 {
+tc_a4ee89db, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111111000110;
let isPredicated = 1;
let isTerminator = 1;
@@ -25896,7 +26225,7 @@ def SL2_loadrb_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u3_0Imm:$Ii),
"$Rd16 = memb($Rs16+#$Ii)",
-tc_17e0d2cd, TypeSUBINSN>, Enc_2fbf3c {
+tc_4222e6bf, TypeSUBINSN>, Enc_2fbf3c {
let Inst{12-11} = 0b10;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25910,7 +26239,7 @@ def SL2_loadrd_sp : HInst<
(outs GeneralDoubleLow8Regs:$Rdd8),
(ins u5_3Imm:$Ii),
"$Rdd8 = memd(r29+#$Ii)",
-tc_c4db48cb, TypeSUBINSN>, Enc_86a14b {
+tc_8a6d0d94, TypeSUBINSN>, Enc_86a14b {
let Inst{12-8} = 0b11110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25925,7 +26254,7 @@ def SL2_loadrh_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
"$Rd16 = memh($Rs16+#$Ii)",
-tc_17e0d2cd, TypeSUBINSN>, Enc_2bae10 {
+tc_4222e6bf, TypeSUBINSN>, Enc_2bae10 {
let Inst{12-11} = 0b00;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25939,7 +26268,7 @@ def SL2_loadri_sp : HInst<
(outs GeneralSubRegs:$Rd16),
(ins u5_2Imm:$Ii),
"$Rd16 = memw(r29+#$Ii)",
-tc_c4db48cb, TypeSUBINSN>, Enc_51635c {
+tc_8a6d0d94, TypeSUBINSN>, Enc_51635c {
let Inst{12-9} = 0b1110;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25954,7 +26283,7 @@ def SL2_loadruh_io : HInst<
(outs GeneralSubRegs:$Rd16),
(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
"$Rd16 = memuh($Rs16+#$Ii)",
-tc_17e0d2cd, TypeSUBINSN>, Enc_2bae10 {
+tc_4222e6bf, TypeSUBINSN>, Enc_2bae10 {
let Inst{12-11} = 0b01;
let hasNewValue = 1;
let opNewValue = 0;
@@ -25968,7 +26297,7 @@ def SL2_return : HInst<
(outs),
(ins),
"dealloc_return",
-tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
+tc_c818ff7f, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000000;
let isTerminator = 1;
let isIndirectBranch = 1;
@@ -25979,14 +26308,14 @@ let cofMax1 = 1;
let isRestrictNoSlot1Store = 1;
let isReturn = 1;
let Uses = [FRAMEKEY, R30];
-let Defs = [PC, R30, R29, R31];
+let Defs = [PC, R29, R30, R31];
let DecoderNamespace = "SUBINSN_L2";
}
def SL2_return_f : HInst<
(outs),
(ins),
"if (!p0) dealloc_return",
-tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
+tc_c818ff7f, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000101;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -25999,7 +26328,7 @@ let cofMax1 = 1;
let isRestrictNoSlot1Store = 1;
let isReturn = 1;
let Uses = [FRAMEKEY, P0, R30];
-let Defs = [PC, R30, R29, R31];
+let Defs = [PC, R29, R30, R31];
let isTaken = Inst{4};
let DecoderNamespace = "SUBINSN_L2";
}
@@ -26007,7 +26336,7 @@ def SL2_return_fnew : HInst<
(outs),
(ins),
"if (!p0.new) dealloc_return:nt",
-tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
+tc_c818ff7f, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000111;
let isPredicated = 1;
let isPredicatedFalse = 1;
@@ -26021,7 +26350,7 @@ let cofMax1 = 1;
let isRestrictNoSlot1Store = 1;
let isReturn = 1;
let Uses = [FRAMEKEY, P0, R30];
-let Defs = [PC, R30, R29, R31];
+let Defs = [PC, R29, R30, R31];
let isTaken = Inst{4};
let DecoderNamespace = "SUBINSN_L2";
}
@@ -26029,7 +26358,7 @@ def SL2_return_t : HInst<
(outs),
(ins),
"if (p0) dealloc_return",
-tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
+tc_c818ff7f, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000100;
let isPredicated = 1;
let isTerminator = 1;
@@ -26041,7 +26370,7 @@ let cofMax1 = 1;
let isRestrictNoSlot1Store = 1;
let isReturn = 1;
let Uses = [FRAMEKEY, P0, R30];
-let Defs = [PC, R30, R29, R31];
+let Defs = [PC, R29, R30, R31];
let isTaken = Inst{4};
let DecoderNamespace = "SUBINSN_L2";
}
@@ -26049,7 +26378,7 @@ def SL2_return_tnew : HInst<
(outs),
(ins),
"if (p0.new) dealloc_return:nt",
-tc_36153880, TypeSUBINSN>, Enc_e3b0c4 {
+tc_c818ff7f, TypeSUBINSN>, Enc_e3b0c4 {
let Inst{12-0} = 0b1111101000110;
let isPredicated = 1;
let isTerminator = 1;
@@ -26062,7 +26391,7 @@ let cofMax1 = 1;
let isRestrictNoSlot1Store = 1;
let isReturn = 1;
let Uses = [FRAMEKEY, P0, R30];
-let Defs = [PC, R30, R29, R31];
+let Defs = [PC, R29, R30, R31];
let isTaken = Inst{4};
let DecoderNamespace = "SUBINSN_L2";
}
@@ -26070,7 +26399,7 @@ def SS1_storeb_io : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii, GeneralSubRegs:$Rt16),
"memb($Rs16+#$Ii) = $Rt16",
-tc_30b9bb4a, TypeSUBINSN>, Enc_b38ffc {
+tc_ae5babd7, TypeSUBINSN>, Enc_b38ffc {
let Inst{12-12} = 0b1;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
@@ -26082,7 +26411,7 @@ def SS1_storew_io : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii, GeneralSubRegs:$Rt16),
"memw($Rs16+#$Ii) = $Rt16",
-tc_30b9bb4a, TypeSUBINSN>, Enc_f55a0c {
+tc_ae5babd7, TypeSUBINSN>, Enc_f55a0c {
let Inst{12-12} = 0b0;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26094,22 +26423,22 @@ def SS2_allocframe : HInst<
(outs),
(ins u5_3Imm:$Ii),
"allocframe(#$Ii)",
-tc_49a8207d, TypeSUBINSN>, Enc_6f70ca {
+tc_1242dc2a, TypeSUBINSN>, Enc_6f70ca {
let Inst{3-0} = 0b0000;
let Inst{12-9} = 0b1110;
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
let AsmVariantName = "NonParsable";
let mayStore = 1;
-let Uses = [FRAMEKEY, FRAMELIMIT, R30, R29, R31];
-let Defs = [R30, R29];
+let Uses = [FRAMEKEY, FRAMELIMIT, R29, R30, R31];
+let Defs = [R29, R30];
let DecoderNamespace = "SUBINSN_S2";
}
def SS2_storebi0 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
"memb($Rs16+#$Ii) = #0",
-tc_89e94ad3, TypeSUBINSN>, Enc_84d359 {
+tc_44d5a428, TypeSUBINSN>, Enc_84d359 {
let Inst{12-8} = 0b10010;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
@@ -26121,7 +26450,7 @@ def SS2_storebi1 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
"memb($Rs16+#$Ii) = #1",
-tc_89e94ad3, TypeSUBINSN>, Enc_84d359 {
+tc_44d5a428, TypeSUBINSN>, Enc_84d359 {
let Inst{12-8} = 0b10011;
let addrMode = BaseImmOffset;
let accessSize = ByteAccess;
@@ -26133,7 +26462,7 @@ def SS2_stored_sp : HInst<
(outs),
(ins s6_3Imm:$Ii, GeneralDoubleLow8Regs:$Rtt8),
"memd(r29+#$Ii) = $Rtt8",
-tc_0371abea, TypeSUBINSN>, Enc_b8309d {
+tc_0655b949, TypeSUBINSN>, Enc_b8309d {
let Inst{12-9} = 0b0101;
let addrMode = BaseImmOffset;
let accessSize = DoubleWordAccess;
@@ -26146,7 +26475,7 @@ def SS2_storeh_io : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii, GeneralSubRegs:$Rt16),
"memh($Rs16+#$Ii) = $Rt16",
-tc_30b9bb4a, TypeSUBINSN>, Enc_625deb {
+tc_ae5babd7, TypeSUBINSN>, Enc_625deb {
let Inst{12-11} = 0b00;
let addrMode = BaseImmOffset;
let accessSize = HalfWordAccess;
@@ -26158,7 +26487,7 @@ def SS2_storew_sp : HInst<
(outs),
(ins u5_2Imm:$Ii, GeneralSubRegs:$Rt16),
"memw(r29+#$Ii) = $Rt16",
-tc_0371abea, TypeSUBINSN>, Enc_87c142 {
+tc_0655b949, TypeSUBINSN>, Enc_87c142 {
let Inst{12-9} = 0b0100;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26171,7 +26500,7 @@ def SS2_storewi0 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
"memw($Rs16+#$Ii) = #0",
-tc_89e94ad3, TypeSUBINSN>, Enc_a6ce9c {
+tc_44d5a428, TypeSUBINSN>, Enc_a6ce9c {
let Inst{12-8} = 0b10000;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26183,7 +26512,7 @@ def SS2_storewi1 : HInst<
(outs),
(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
"memw($Rs16+#$Ii) = #1",
-tc_89e94ad3, TypeSUBINSN>, Enc_a6ce9c {
+tc_44d5a428, TypeSUBINSN>, Enc_a6ce9c {
let Inst{12-8} = 0b10001;
let addrMode = BaseImmOffset;
let accessSize = WordAccess;
@@ -26198,6 +26527,7 @@ def V6_MAP_equb : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26207,6 +26537,7 @@ def V6_MAP_equb_and : HInst<
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.eq($Vu32.ub,$Vv32.ub)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26218,6 +26549,7 @@ def V6_MAP_equb_ior : HInst<
"$Qx4 |= vcmp.eq($Vu32.ub,$Vv32.ub)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26228,6 +26560,7 @@ def V6_MAP_equb_xor : HInst<
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.eq($Vu32.ub,$Vv32.ub)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26240,6 +26573,7 @@ def V6_MAP_equh : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26249,6 +26583,7 @@ def V6_MAP_equh_and : HInst<
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.eq($Vu32.uh,$Vv32.uh)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26260,6 +26595,7 @@ def V6_MAP_equh_ior : HInst<
"$Qx4 |= vcmp.eq($Vu32.uh,$Vv32.uh)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26270,6 +26606,7 @@ def V6_MAP_equh_xor : HInst<
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.eq($Vu32.uh,$Vv32.uh)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26282,6 +26619,7 @@ def V6_MAP_equw : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26291,6 +26629,7 @@ def V6_MAP_equw_and : HInst<
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 &= vcmp.eq($Vu32.uw,$Vv32.uw)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26302,6 +26641,7 @@ def V6_MAP_equw_ior : HInst<
"$Qx4 |= vcmp.eq($Vu32.uw,$Vv32.uw)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26312,6 +26652,7 @@ def V6_MAP_equw_xor : HInst<
(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
"$Qx4 ^= vcmp.eq($Vu32.uw,$Vv32.uw)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26327,6 +26668,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10010010000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isSolo = 1;
let mayLoad = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26338,6 +26680,7 @@ def V6_extractw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26349,6 +26692,7 @@ def V6_hi : HInst<
CVI_VA, TypeCVI_VA>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -26359,6 +26703,7 @@ def V6_ld0 : HInst<
PSEUDO, TypeCVI_VM_LD>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26370,6 +26715,7 @@ def V6_ldcnp0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26381,6 +26727,7 @@ def V6_ldcnpnt0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26392,6 +26739,7 @@ def V6_ldcp0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26403,6 +26751,7 @@ def V6_ldcpnt0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26414,6 +26763,7 @@ def V6_ldnp0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26425,6 +26775,7 @@ def V6_ldnpnt0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26436,6 +26787,7 @@ def V6_ldnt0 : HInst<
PSEUDO, TypeCVI_VM_LD>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26458,6 +26810,7 @@ def V6_ldp0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26469,6 +26822,7 @@ def V6_ldpnt0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26480,6 +26834,7 @@ def V6_ldtnp0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26491,6 +26846,7 @@ def V6_ldtnpnt0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26502,6 +26858,7 @@ def V6_ldtp0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26513,6 +26870,7 @@ def V6_ldtpnt0 : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26524,6 +26882,7 @@ def V6_ldu0 : HInst<
PSEUDO, TypeCVI_VM_LD>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26535,6 +26894,7 @@ def V6_lo : HInst<
CVI_VA, TypeCVI_VA>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -26542,22 +26902,24 @@ def V6_lvsplatb : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32),
"$Vd32.b = vsplat($Rt32)",
-tc_c4edf264, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
+tc_c4edf264, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[UseHVXV62]> {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b00011001110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_lvsplath : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32),
"$Vd32.h = vsplat($Rt32)",
-tc_c4edf264, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> {
+tc_c4edf264, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[UseHVXV62]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b00011001110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_lvsplatw : HInst<
@@ -26569,6 +26931,7 @@ let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_pred_and : HInst<
@@ -26582,6 +26945,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_pred_and_n : HInst<
@@ -26595,6 +26959,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_pred_not : HInst<
@@ -26607,6 +26972,7 @@ let Inst{13-10} = 0b0000;
let Inst{31-16} = 0b0001111000000011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_pred_or : HInst<
@@ -26620,6 +26986,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_pred_or_n : HInst<
@@ -26633,6 +27000,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_pred_scalar2 : HInst<
@@ -26644,6 +27012,7 @@ let Inst{13-2} = 0b000000010001;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_pred_scalar2v2 : HInst<
@@ -26655,6 +27024,7 @@ let Inst{13-2} = 0b000000010011;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_pred_xor : HInst<
@@ -26668,6 +27038,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_shuffeqh : HInst<
@@ -26681,6 +27052,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_shuffeqw : HInst<
@@ -26694,6 +27066,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_st0 : HInst<
@@ -26701,6 +27074,7 @@ def V6_st0 : HInst<
(ins IntRegs:$Rt32, HvxVR:$Vs32),
"vmem($Rt32) = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26710,6 +27084,7 @@ def V6_stn0 : HInst<
(ins IntRegs:$Rt32, HvxVR:$Os8),
"vmem($Rt32) = $Os8.new",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26720,6 +27095,7 @@ def V6_stnnt0 : HInst<
(ins IntRegs:$Rt32, HvxVR:$Os8),
"vmem($Rt32):nt = $Os8.new",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26730,6 +27106,7 @@ def V6_stnp0 : HInst<
(ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rt32) = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26739,6 +27116,7 @@ def V6_stnpnt0 : HInst<
(ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32),
"if (!$Pv4) vmem($Rt32):nt = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26748,6 +27126,7 @@ def V6_stnq0 : HInst<
(ins HvxQR:$Qv4, IntRegs:$Rt32, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rt32) = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26757,6 +27136,7 @@ def V6_stnqnt0 : HInst<
(ins HvxQR:$Qv4, IntRegs:$Rt32, HvxVR:$Vs32),
"if (!$Qv4) vmem($Rt32):nt = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26766,6 +27146,7 @@ def V6_stnt0 : HInst<
(ins IntRegs:$Rt32, HvxVR:$Vs32),
"vmem($Rt32):nt = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26775,6 +27156,7 @@ def V6_stp0 : HInst<
(ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32),
"if ($Pv4) vmem($Rt32) = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26784,6 +27166,7 @@ def V6_stpnt0 : HInst<
(ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32),
"if ($Pv4) vmem($Rt32):nt = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26793,6 +27176,7 @@ def V6_stq0 : HInst<
(ins HvxQR:$Qv4, IntRegs:$Rt32, HvxVR:$Vs32),
"if ($Qv4) vmem($Rt32) = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26802,6 +27186,7 @@ def V6_stqnt0 : HInst<
(ins HvxQR:$Qv4, IntRegs:$Rt32, HvxVR:$Vs32),
"if ($Qv4) vmem($Rt32):nt = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26811,6 +27196,7 @@ def V6_stu0 : HInst<
(ins IntRegs:$Rt32, HvxVR:$Vs32),
"vmemu($Rt32) = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26820,6 +27206,7 @@ def V6_stunp0 : HInst<
(ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32),
"if (!$Pv4) vmemu($Rt32) = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26829,6 +27216,7 @@ def V6_stup0 : HInst<
(ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32),
"if ($Pv4) vmemu($Rt32) = $Vs32",
PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26846,6 +27234,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26863,6 +27252,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_pi";
@@ -26881,6 +27271,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -26899,6 +27290,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_ai";
@@ -26919,6 +27311,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
@@ -26940,6 +27333,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
@@ -26961,6 +27355,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
@@ -26982,6 +27377,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
@@ -27002,6 +27398,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
@@ -27022,6 +27419,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
@@ -27043,6 +27441,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
@@ -27063,6 +27462,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
@@ -27083,6 +27483,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
@@ -27104,6 +27505,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_ai";
@@ -27124,6 +27526,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_pi";
@@ -27144,6 +27547,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_ppu";
@@ -27163,6 +27567,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27184,6 +27589,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isNonTemporal = 1;
@@ -27206,6 +27612,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isNonTemporal = 1;
@@ -27228,6 +27635,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isNonTemporal = 1;
@@ -27250,6 +27658,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isNonTemporal = 1;
@@ -27271,6 +27680,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isNonTemporal = 1;
@@ -27292,6 +27702,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isNonTemporal = 1;
@@ -27314,6 +27725,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isNonTemporal = 1;
@@ -27335,6 +27747,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isNonTemporal = 1;
@@ -27356,6 +27769,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let CVINew = 1;
let mayLoad = 1;
let isNonTemporal = 1;
@@ -27378,6 +27792,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27399,6 +27814,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27420,6 +27836,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27440,6 +27857,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27461,6 +27879,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27483,6 +27902,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27503,6 +27923,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27523,6 +27944,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27543,6 +27965,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27564,6 +27987,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27585,6 +28009,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27606,6 +28031,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27626,6 +28052,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27646,6 +28073,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27667,6 +28095,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27687,6 +28116,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27707,6 +28137,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isNonTemporal = 1;
let isRestrictNoSlot1Store = 1;
@@ -27727,6 +28158,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_pi";
@@ -27747,6 +28179,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_ppu";
@@ -27768,6 +28201,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_ai";
@@ -27787,6 +28221,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_pi";
@@ -27806,6 +28241,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_ppu";
@@ -27825,6 +28261,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ai";
@@ -27845,6 +28282,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ai";
@@ -27865,6 +28303,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_pi";
@@ -27885,6 +28324,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ppu";
@@ -27904,6 +28344,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_pi";
@@ -27923,6 +28364,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ppu";
@@ -27943,6 +28385,7 @@ let opNewValue = 0;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ai";
@@ -27962,6 +28405,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_pi";
@@ -27981,6 +28425,7 @@ let opNewValue = 0;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isCVLoad = 1;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let BaseOpcode = "V6_vL32b_tmp_ppu";
@@ -27997,6 +28442,7 @@ let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32Ub_ai";
let isPredicable = 1;
@@ -28013,6 +28459,7 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32Ub_ai";
let DecoderNamespace = "EXT_mmvec";
@@ -28029,6 +28476,7 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32Ub_pi";
let DecoderNamespace = "EXT_mmvec";
@@ -28045,6 +28493,7 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32Ub_ppu";
let DecoderNamespace = "EXT_mmvec";
@@ -28060,6 +28509,7 @@ let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32Ub_pi";
let isPredicable = 1;
@@ -28075,6 +28525,7 @@ let Inst{12-5} = 0b00000111;
let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32Ub_ppu";
let isPredicable = 1;
@@ -28091,6 +28542,7 @@ let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32Ub_ai";
let DecoderNamespace = "EXT_mmvec";
@@ -28106,6 +28558,7 @@ let Inst{31-21} = 0b00101001101;
let isPredicated = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32Ub_pi";
let DecoderNamespace = "EXT_mmvec";
@@ -28121,6 +28574,7 @@ let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32Ub_ppu";
let DecoderNamespace = "EXT_mmvec";
@@ -28136,6 +28590,7 @@ let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ai";
let isNVStorable = 1;
@@ -28153,6 +28608,7 @@ let Inst{31-21} = 0b00101000001;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let mayStore = 1;
@@ -28173,6 +28629,7 @@ let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let mayStore = 1;
@@ -28193,6 +28650,7 @@ let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let mayStore = 1;
@@ -28213,6 +28671,7 @@ let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let mayStore = 1;
@@ -28232,6 +28691,7 @@ let Inst{31-21} = 0b00101001001;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let mayStore = 1;
@@ -28251,6 +28711,7 @@ let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let mayStore = 1;
@@ -28271,6 +28732,7 @@ let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let mayStore = 1;
@@ -28290,6 +28752,7 @@ let isPredicated = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let mayStore = 1;
@@ -28309,6 +28772,7 @@ let isPredicated = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let mayStore = 1;
@@ -28328,6 +28792,7 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ai";
let isNVStorable = 1;
@@ -28345,6 +28810,7 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_pi";
let isNVStorable = 1;
@@ -28362,6 +28828,7 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ppu";
let isNVStorable = 1;
@@ -28377,6 +28844,7 @@ let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101000100;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -28390,6 +28858,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
@@ -28403,6 +28872,7 @@ let Inst{10-5} = 0b000001;
let Inst{31-21} = 0b00101011100;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
@@ -28417,6 +28887,7 @@ let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000011;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ai";
@@ -28435,6 +28906,7 @@ let Inst{31-21} = 0b00101000011;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let isNonTemporal = 1;
@@ -28456,6 +28928,7 @@ let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let isNonTemporal = 1;
@@ -28477,6 +28950,7 @@ let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let isNonTemporal = 1;
@@ -28498,6 +28972,7 @@ let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let isNonTemporal = 1;
@@ -28518,6 +28993,7 @@ let Inst{31-21} = 0b00101001011;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let isNonTemporal = 1;
@@ -28538,6 +29014,7 @@ let Inst{31-21} = 0b00101011011;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let isNonTemporal = 1;
@@ -28559,6 +29036,7 @@ let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let isNonTemporal = 1;
@@ -28579,6 +29057,7 @@ let isPredicated = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let isNonTemporal = 1;
@@ -28599,6 +29078,7 @@ let isPredicated = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
let isNVStore = 1;
+let isCVI = 1;
let CVINew = 1;
let isNewValue = 1;
let isNonTemporal = 1;
@@ -28619,6 +29099,7 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ai";
@@ -28637,6 +29118,7 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_pi";
@@ -28655,6 +29137,7 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ppu";
@@ -28671,6 +29154,7 @@ let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101000110;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28685,6 +29169,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28699,6 +29184,7 @@ let Inst{10-5} = 0b000001;
let Inst{31-21} = 0b00101011110;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28714,6 +29200,7 @@ let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001011;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_pi";
@@ -28731,6 +29218,7 @@ let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101011011;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ppu";
@@ -28749,6 +29237,7 @@ let Inst{31-21} = 0b00101000111;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ai";
@@ -28766,6 +29255,7 @@ let Inst{31-21} = 0b00101001111;
let isPredicated = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_pi";
@@ -28783,6 +29273,7 @@ let Inst{31-21} = 0b00101011111;
let isPredicated = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ppu";
@@ -28799,6 +29290,7 @@ let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101000110;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28813,6 +29305,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001110;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28827,6 +29320,7 @@ let Inst{10-5} = 0b000000;
let Inst{31-21} = 0b00101011110;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let isNonTemporal = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28842,6 +29336,7 @@ let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_pi";
let isNVStorable = 1;
@@ -28858,7 +29353,9 @@ let Inst{12-5} = 0b00000000;
let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ppu";
let isNVStorable = 1;
let isPredicable = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28874,6 +29371,7 @@ let Inst{31-21} = 0b00101000101;
let isPredicated = 1;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ai";
let isNVStorable = 1;
@@ -28890,6 +29388,7 @@ let Inst{31-21} = 0b00101001101;
let isPredicated = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_pi";
let isNVStorable = 1;
@@ -28906,6 +29405,7 @@ let Inst{31-21} = 0b00101011101;
let isPredicated = 1;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let BaseOpcode = "V6_vS32b_ppu";
let isNVStorable = 1;
@@ -28921,6 +29421,7 @@ let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101000100;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -28934,6 +29435,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101001100;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
@@ -28947,6 +29449,7 @@ let Inst{10-5} = 0b000000;
let Inst{31-21} = 0b00101011100;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx32 = $Rx32in";
@@ -28961,6 +29464,7 @@ let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101000001;
let addrMode = BaseImmOffset;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let CVINew = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28975,6 +29479,7 @@ let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101001001;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let CVINew = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -28989,6 +29494,7 @@ let Inst{12-0} = 0b0000000101000;
let Inst{31-21} = 0b00101011001;
let addrMode = PostInc;
let accessSize = HVXVectorAccess;
+let isCVI = 1;
let CVINew = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29004,6 +29510,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vabsb_alt : HInst<
@@ -29013,6 +29520,7 @@ def V6_vabsb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29027,6 +29535,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vabsb_sat_alt : HInst<
@@ -29036,6 +29545,7 @@ def V6_vabsb_sat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29050,6 +29560,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vabsdiffh_alt : HInst<
@@ -29059,6 +29570,7 @@ def V6_vabsdiffh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29073,6 +29585,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vabsdiffub_alt : HInst<
@@ -29082,6 +29595,7 @@ def V6_vabsdiffub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29096,6 +29610,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vabsdiffuh_alt : HInst<
@@ -29105,6 +29620,7 @@ def V6_vabsdiffuh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29119,6 +29635,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vabsdiffw_alt : HInst<
@@ -29128,6 +29645,7 @@ def V6_vabsdiffw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29142,6 +29660,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vabsh_alt : HInst<
@@ -29151,6 +29670,7 @@ def V6_vabsh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29165,6 +29685,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vabsh_sat_alt : HInst<
@@ -29174,6 +29695,7 @@ def V6_vabsh_sat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29185,6 +29707,7 @@ def V6_vabsub_alt : HInst<
tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29196,6 +29719,7 @@ def V6_vabsuh_alt : HInst<
tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29207,6 +29731,7 @@ def V6_vabsuw_alt : HInst<
tc_0ec46cf9, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29221,6 +29746,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vabsw_alt : HInst<
@@ -29230,6 +29756,7 @@ def V6_vabsw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29244,6 +29771,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vabsw_sat_alt : HInst<
@@ -29253,6 +29781,7 @@ def V6_vabsw_sat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29267,6 +29796,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddb_alt : HInst<
@@ -29276,6 +29806,7 @@ def V6_vaddb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29290,6 +29821,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddb_dv_alt : HInst<
@@ -29299,6 +29831,7 @@ def V6_vaddb_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29315,6 +29848,7 @@ let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -29326,6 +29860,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29343,6 +29878,7 @@ let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -29354,6 +29890,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29369,6 +29906,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddbsat_alt : HInst<
@@ -29378,6 +29916,7 @@ def V6_vaddbsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29392,6 +29931,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddbsat_dv_alt : HInst<
@@ -29401,6 +29941,7 @@ def V6_vaddbsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29415,6 +29956,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -29422,7 +29964,7 @@ def V6_vaddcarryo : HInst<
(outs HvxVR:$Vd32, HvxQR:$Qe4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w,$Qe4 = vadd($Vu32.w,$Vv32.w):carry",
-tc_e35c1e93, TypeCOPROC_VX>, Enc_c1d806, Requires<[UseHVXV66]> {
+tc_e35c1e93, TypeCVI_VA>, Enc_c1d806, Requires<[UseHVXV66]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011101101;
@@ -29430,6 +29972,7 @@ let hasNewValue = 1;
let opNewValue = 0;
let hasNewValue2 = 1;
let opNewValue2 = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddcarrysat : HInst<
@@ -29442,6 +29985,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011101100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddclbh : HInst<
@@ -29454,6 +29998,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddclbw : HInst<
@@ -29466,6 +30011,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddh : HInst<
@@ -29478,6 +30024,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddh_alt : HInst<
@@ -29487,6 +30034,7 @@ def V6_vaddh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29501,6 +30049,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddh_dv_alt : HInst<
@@ -29510,6 +30059,7 @@ def V6_vaddh_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29526,6 +30076,7 @@ let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -29537,6 +30088,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29554,6 +30106,7 @@ let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -29565,6 +30118,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29580,6 +30134,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddhsat_alt : HInst<
@@ -29589,6 +30144,7 @@ def V6_vaddhsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29603,6 +30159,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddhsat_dv_alt : HInst<
@@ -29612,6 +30169,7 @@ def V6_vaddhsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29626,6 +30184,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddhw_acc : HInst<
@@ -29639,6 +30198,7 @@ let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -29650,6 +30210,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29662,6 +30223,7 @@ def V6_vaddhw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29676,6 +30238,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddubh_acc : HInst<
@@ -29689,6 +30252,7 @@ let Inst{31-21} = 0b00011100010;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -29700,6 +30264,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29712,6 +30277,7 @@ def V6_vaddubh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29726,6 +30292,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddubsat_alt : HInst<
@@ -29735,6 +30302,7 @@ def V6_vaddubsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29749,6 +30317,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddubsat_dv_alt : HInst<
@@ -29758,6 +30327,7 @@ def V6_vaddubsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29772,6 +30342,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vadduhsat : HInst<
@@ -29784,6 +30355,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vadduhsat_alt : HInst<
@@ -29793,6 +30365,7 @@ def V6_vadduhsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29807,6 +30380,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vadduhsat_dv_alt : HInst<
@@ -29816,6 +30390,7 @@ def V6_vadduhsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29830,6 +30405,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vadduhw_acc : HInst<
@@ -29843,6 +30419,7 @@ let Inst{31-21} = 0b00011100010;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -29854,6 +30431,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29866,6 +30444,7 @@ def V6_vadduhw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29880,6 +30459,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vadduwsat_alt : HInst<
@@ -29889,6 +30469,7 @@ def V6_vadduwsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29903,6 +30484,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vadduwsat_dv_alt : HInst<
@@ -29912,6 +30494,7 @@ def V6_vadduwsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29926,6 +30509,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddw_alt : HInst<
@@ -29935,6 +30519,7 @@ def V6_vaddw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29949,6 +30534,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddw_dv_alt : HInst<
@@ -29958,6 +30544,7 @@ def V6_vaddw_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -29974,6 +30561,7 @@ let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -29985,6 +30573,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30002,6 +30591,7 @@ let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -30013,6 +30603,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30028,6 +30619,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddwsat_alt : HInst<
@@ -30037,6 +30629,7 @@ def V6_vaddwsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30051,6 +30644,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaddwsat_dv_alt : HInst<
@@ -30060,6 +30654,7 @@ def V6_vaddwsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30074,6 +30669,7 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_valignbi : HInst<
@@ -30085,6 +30681,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vand : HInst<
@@ -30097,31 +30694,34 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vandnqrt : HInst<
(outs HvxVR:$Vd32),
(ins HvxQR:$Qu4, IntRegs:$Rt32),
"$Vd32 = vand(!$Qu4,$Rt32)",
-tc_ac4046bc, TypeCVI_VX>, Enc_7b7ba8, Requires<[UseHVXV62]> {
+tc_ac4046bc, TypeCVI_VX_LATE>, Enc_7b7ba8, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b101;
let Inst{13-10} = 0b0001;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vandnqrt_acc : HInst<
(outs HvxVR:$Vx32),
(ins HvxVR:$Vx32in, HvxQR:$Qu4, IntRegs:$Rt32),
"$Vx32 |= vand(!$Qu4,$Rt32)",
-tc_2e8f5f6e, TypeCVI_VX>, Enc_895bd9, Requires<[UseHVXV62]> {
+tc_2e8f5f6e, TypeCVI_VX_LATE>, Enc_895bd9, Requires<[UseHVXV62]> {
let Inst{7-5} = 0b011;
let Inst{13-10} = 0b1001;
let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -30133,6 +30733,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30145,6 +30746,7 @@ def V6_vandnqrt_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30159,6 +30761,7 @@ let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vandqrt_acc : HInst<
@@ -30172,6 +30775,7 @@ let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -30183,6 +30787,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30195,6 +30800,7 @@ def V6_vandqrt_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30210,6 +30816,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vandvqv : HInst<
@@ -30223,6 +30830,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vandvrt : HInst<
@@ -30235,6 +30843,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vandvrt_acc : HInst<
@@ -30246,6 +30855,7 @@ let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001011;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -30255,6 +30865,7 @@ def V6_vandvrt_acc_alt : HInst<
"$Qx4.ub |= vand($Vu32.ub,$Rt32.ub)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30267,6 +30878,7 @@ def V6_vandvrt_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30281,6 +30893,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaslh_acc : HInst<
@@ -30294,6 +30907,7 @@ let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -30305,6 +30919,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30317,6 +30932,7 @@ def V6_vaslh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30331,6 +30947,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaslhv_alt : HInst<
@@ -30340,6 +30957,7 @@ def V6_vaslhv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30354,6 +30972,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaslw_acc : HInst<
@@ -30367,6 +30986,7 @@ let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -30378,6 +30998,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30390,6 +31011,7 @@ def V6_vaslw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30404,6 +31026,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vaslwv_alt : HInst<
@@ -30413,6 +31036,7 @@ def V6_vaslwv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30427,6 +31051,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011010101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -30437,6 +31062,7 @@ def V6_vasr_into_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30452,6 +31078,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vasrh_acc : HInst<
@@ -30465,6 +31092,7 @@ let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -30476,6 +31104,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30488,6 +31117,7 @@ def V6_vasrh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30502,18 +31132,9 @@ let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
-def V6_vasrhbrndsat_alt : HInst<
-(outs HvxVR:$Vd32),
-(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
-"$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
-let hasNewValue = 1;
-let opNewValue = 0;
-let isPseudo = 1;
-let isCodeGenOnly = 1;
-}
def V6_vasrhbsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
@@ -30524,6 +31145,7 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vasrhubrndsat : HInst<
@@ -30536,18 +31158,9 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
-def V6_vasrhubrndsat_alt : HInst<
-(outs HvxVR:$Vd32),
-(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
-"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
-let hasNewValue = 1;
-let opNewValue = 0;
-let isPseudo = 1;
-let isCodeGenOnly = 1;
-}
def V6_vasrhubsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
@@ -30558,18 +31171,9 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
-def V6_vasrhubsat_alt : HInst<
-(outs HvxVR:$Vd32),
-(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
-"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):sat",
-tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
-let hasNewValue = 1;
-let opNewValue = 0;
-let isPseudo = 1;
-let isCodeGenOnly = 1;
-}
def V6_vasrhv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -30580,6 +31184,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vasrhv_alt : HInst<
@@ -30589,6 +31194,7 @@ def V6_vasrhv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30603,6 +31209,7 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vasruhubsat : HInst<
@@ -30615,6 +31222,7 @@ let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vasruwuhrndsat : HInst<
@@ -30627,6 +31235,7 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vasruwuhsat : HInst<
@@ -30639,6 +31248,7 @@ let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vasrw : HInst<
@@ -30651,6 +31261,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vasrw_acc : HInst<
@@ -30664,6 +31275,7 @@ let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -30675,6 +31287,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30687,6 +31300,7 @@ def V6_vasrw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30701,18 +31315,9 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
-def V6_vasrwh_alt : HInst<
-(outs HvxVR:$Vd32),
-(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
-"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8)",
-tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
-let hasNewValue = 1;
-let opNewValue = 0;
-let isPseudo = 1;
-let isCodeGenOnly = 1;
-}
def V6_vasrwhrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
@@ -30723,18 +31328,9 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
-def V6_vasrwhrndsat_alt : HInst<
-(outs HvxVR:$Vd32),
-(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
-"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
-let hasNewValue = 1;
-let opNewValue = 0;
-let isPseudo = 1;
-let isCodeGenOnly = 1;
-}
def V6_vasrwhsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
@@ -30745,18 +31341,9 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
-def V6_vasrwhsat_alt : HInst<
-(outs HvxVR:$Vd32),
-(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
-"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):sat",
-tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
-let hasNewValue = 1;
-let opNewValue = 0;
-let isPseudo = 1;
-let isCodeGenOnly = 1;
-}
def V6_vasrwuhrndsat : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
@@ -30767,6 +31354,7 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vasrwuhsat : HInst<
@@ -30779,18 +31367,9 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
-def V6_vasrwuhsat_alt : HInst<
-(outs HvxVR:$Vd32),
-(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
-"$Vd32 = vasrwuh($Vu32,$Vv32,$Rt8):sat",
-tc_16ff9ef8, TypeMAPPING>, Requires<[HasV60]> {
-let hasNewValue = 1;
-let opNewValue = 0;
-let isPseudo = 1;
-let isCodeGenOnly = 1;
-}
def V6_vasrwv : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -30801,6 +31380,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vasrwv_alt : HInst<
@@ -30810,6 +31390,7 @@ def V6_vasrwv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30824,6 +31405,7 @@ let Inst{13-13} = 0b1;
let Inst{31-16} = 0b0001111000000011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vassignp : HInst<
@@ -30833,6 +31415,7 @@ def V6_vassignp : HInst<
CVI_VA, TypeCVI_VA_DV>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -30846,6 +31429,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavgb_alt : HInst<
@@ -30855,6 +31439,7 @@ def V6_vavgb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30869,6 +31454,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavgbrnd_alt : HInst<
@@ -30878,6 +31464,7 @@ def V6_vavgbrnd_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30892,6 +31479,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavgh_alt : HInst<
@@ -30901,6 +31489,7 @@ def V6_vavgh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30915,6 +31504,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavghrnd_alt : HInst<
@@ -30924,6 +31514,7 @@ def V6_vavghrnd_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30938,6 +31529,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavgub_alt : HInst<
@@ -30947,6 +31539,7 @@ def V6_vavgub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30961,6 +31554,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavgubrnd_alt : HInst<
@@ -30970,6 +31564,7 @@ def V6_vavgubrnd_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -30984,6 +31579,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavguh_alt : HInst<
@@ -30993,6 +31589,7 @@ def V6_vavguh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31007,6 +31604,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavguhrnd_alt : HInst<
@@ -31016,6 +31614,7 @@ def V6_vavguhrnd_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31030,6 +31629,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavguw_alt : HInst<
@@ -31039,6 +31639,7 @@ def V6_vavguw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31053,6 +31654,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavguwrnd_alt : HInst<
@@ -31062,6 +31664,7 @@ def V6_vavguwrnd_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31076,6 +31679,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavgw_alt : HInst<
@@ -31085,6 +31689,7 @@ def V6_vavgw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31099,6 +31704,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vavgwrnd_alt : HInst<
@@ -31108,6 +31714,7 @@ def V6_vavgwrnd_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31123,6 +31730,7 @@ let Inst{31-21} = 0b00011010011;
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vcl0h : HInst<
@@ -31135,6 +31743,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vcl0h_alt : HInst<
@@ -31144,6 +31753,7 @@ def V6_vcl0h_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31158,6 +31768,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vcl0w_alt : HInst<
@@ -31167,6 +31778,7 @@ def V6_vcl0w_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31182,6 +31794,7 @@ let Inst{31-16} = 0b0001101000000000;
let isPredicated = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vcombine : HInst<
@@ -31194,6 +31807,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isRegSequence = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -31204,6 +31818,7 @@ def V6_vd0 : HInst<
CVI_VA, TypeCVI_VA>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31215,6 +31830,7 @@ def V6_vdd0 : HInst<
tc_718b5c53, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31231,6 +31847,7 @@ let hasNewValue = 1;
let opNewValue = 0;
let hasNewValue2 = 1;
let opNewValue2 = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
}
@@ -31244,6 +31861,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdealb4w : HInst<
@@ -31256,6 +31874,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdealb4w_alt : HInst<
@@ -31265,6 +31884,7 @@ def V6_vdealb4w_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31276,6 +31896,7 @@ def V6_vdealb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31290,6 +31911,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdealh_alt : HInst<
@@ -31299,6 +31921,7 @@ def V6_vdealh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31313,6 +31936,7 @@ let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdelta : HInst<
@@ -31325,6 +31949,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdmpybus : HInst<
@@ -31337,6 +31962,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdmpybus_acc : HInst<
@@ -31350,6 +31976,7 @@ let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -31361,6 +31988,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31373,6 +32001,7 @@ def V6_vdmpybus_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31387,6 +32016,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdmpybus_dv_acc : HInst<
@@ -31400,6 +32030,7 @@ let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -31411,6 +32042,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31423,6 +32055,7 @@ def V6_vdmpybus_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31437,6 +32070,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdmpyhb_acc : HInst<
@@ -31450,6 +32084,7 @@ let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -31461,6 +32096,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31473,6 +32109,7 @@ def V6_vdmpyhb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31487,6 +32124,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdmpyhb_dv_acc : HInst<
@@ -31500,6 +32138,7 @@ let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -31511,6 +32150,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31523,6 +32163,7 @@ def V6_vdmpyhb_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31537,6 +32178,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdmpyhisat_acc : HInst<
@@ -31550,6 +32192,7 @@ let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -31561,6 +32204,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31573,6 +32217,7 @@ def V6_vdmpyhisat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31587,6 +32232,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdmpyhsat_acc : HInst<
@@ -31600,6 +32246,7 @@ let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -31611,6 +32258,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31623,6 +32271,7 @@ def V6_vdmpyhsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31637,6 +32286,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdmpyhsuisat_acc : HInst<
@@ -31650,6 +32300,7 @@ let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -31661,6 +32312,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31673,6 +32325,7 @@ def V6_vdmpyhsuisat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31687,6 +32340,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdmpyhsusat_acc : HInst<
@@ -31700,6 +32354,7 @@ let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -31711,6 +32366,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31723,6 +32379,7 @@ def V6_vdmpyhsusat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31737,6 +32394,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdmpyhvsat_acc : HInst<
@@ -31750,6 +32408,7 @@ let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -31761,6 +32420,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31773,6 +32433,7 @@ def V6_vdmpyhvsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31787,6 +32448,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vdsaduh_acc : HInst<
@@ -31800,6 +32462,7 @@ let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -31811,6 +32474,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31823,6 +32487,7 @@ def V6_vdsaduh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -31837,6 +32502,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_veqb_and : HInst<
@@ -31847,6 +32513,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -31859,6 +32526,7 @@ let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -31870,6 +32538,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -31883,6 +32552,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_veqh_and : HInst<
@@ -31893,6 +32563,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -31905,6 +32576,7 @@ let Inst{7-2} = 0b010001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -31916,6 +32588,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -31929,6 +32602,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_veqw_and : HInst<
@@ -31939,6 +32613,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -31951,6 +32626,7 @@ let Inst{7-2} = 0b010010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -31962,6 +32638,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -31976,6 +32653,7 @@ let hasNewValue = 1;
let opNewValue = 0;
let accessSize = HalfWordAccess;
let isCVLoad = 1;
+let isCVI = 1;
let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
@@ -31992,6 +32670,7 @@ let hasNewValue = 1;
let opNewValue = 0;
let accessSize = HalfWordAccess;
let isCVLoad = 1;
+let isCVI = 1;
let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
@@ -32001,13 +32680,14 @@ def V6_vgathermhw : HInst<
(outs),
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32),
"vtmp.h = vgather($Rt32,$Mu2,$Vvv32.w).h",
-tc_05058f6f, TypeCVI_GATHER>, Enc_28dcbb, Requires<[UseHVXV65]> {
+tc_05058f6f, TypeCVI_GATHER_DV>, Enc_28dcbb, Requires<[UseHVXV65]> {
let Inst{12-5} = 0b00010000;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
let opNewValue = 0;
let accessSize = HalfWordAccess;
let isCVLoad = 1;
+let isCVI = 1;
let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
@@ -32017,13 +32697,14 @@ def V6_vgathermhwq : HInst<
(outs),
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32),
"if ($Qs4) vtmp.h = vgather($Rt32,$Mu2,$Vvv32.w).h",
-tc_fd7610da, TypeCVI_GATHER>, Enc_4e4a80, Requires<[UseHVXV65]> {
+tc_fd7610da, TypeCVI_GATHER_DV>, Enc_4e4a80, Requires<[UseHVXV65]> {
let Inst{12-7} = 0b001100;
let Inst{31-21} = 0b00101111000;
let hasNewValue = 1;
let opNewValue = 0;
let accessSize = HalfWordAccess;
let isCVLoad = 1;
+let isCVI = 1;
let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
@@ -32040,6 +32721,7 @@ let hasNewValue = 1;
let opNewValue = 0;
let accessSize = WordAccess;
let isCVLoad = 1;
+let isCVI = 1;
let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
@@ -32056,6 +32738,7 @@ let hasNewValue = 1;
let opNewValue = 0;
let accessSize = WordAccess;
let isCVLoad = 1;
+let isCVI = 1;
let hasTmpDst = 1;
let mayLoad = 1;
let Defs = [VTMP];
@@ -32071,6 +32754,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vgtb_and : HInst<
@@ -32081,6 +32765,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32093,6 +32778,7 @@ let Inst{7-2} = 0b010100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32104,6 +32790,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100100;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32117,6 +32804,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vgth_and : HInst<
@@ -32127,6 +32815,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32139,6 +32828,7 @@ let Inst{7-2} = 0b010101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32150,6 +32840,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100101;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32163,6 +32854,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vgtub_and : HInst<
@@ -32173,6 +32865,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32185,6 +32878,7 @@ let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32196,6 +32890,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b101000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32209,6 +32904,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vgtuh_and : HInst<
@@ -32219,6 +32915,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32231,6 +32928,7 @@ let Inst{7-2} = 0b011001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32242,6 +32940,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b101001;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32255,6 +32954,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vgtuw_and : HInst<
@@ -32265,6 +32965,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b001010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32277,6 +32978,7 @@ let Inst{7-2} = 0b011010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32288,6 +32990,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b101010;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32301,6 +33004,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vgtw_and : HInst<
@@ -32311,6 +33015,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b000110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32323,6 +33028,7 @@ let Inst{7-2} = 0b010110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32334,6 +33040,7 @@ tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> {
let Inst{7-2} = 0b100110;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -32344,6 +33051,7 @@ def V6_vhist : HInst<
tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV60]> {
let Inst{13-0} = 0b10000010000000;
let Inst{31-16} = 0b0001111000000000;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vhistq : HInst<
@@ -32354,6 +33062,7 @@ tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV60]> {
let Inst{13-0} = 0b10000010000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vinsertwr : HInst<
@@ -32365,6 +33074,7 @@ let Inst{13-5} = 0b100000001;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -32378,6 +33088,7 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlalignbi : HInst<
@@ -32389,6 +33100,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlsrb : HInst<
@@ -32401,6 +33113,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlsrh : HInst<
@@ -32413,6 +33126,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlsrh_alt : HInst<
@@ -32422,6 +33136,7 @@ def V6_vlsrh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32436,6 +33151,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlsrhv_alt : HInst<
@@ -32445,6 +33161,7 @@ def V6_vlsrhv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32459,6 +33176,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlsrw_alt : HInst<
@@ -32468,6 +33186,7 @@ def V6_vlsrw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32482,6 +33201,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlsrwv_alt : HInst<
@@ -32491,6 +33211,7 @@ def V6_vlsrwv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32505,6 +33226,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlutvvb : HInst<
@@ -32517,6 +33239,7 @@ let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlutvvb_nm : HInst<
@@ -32529,6 +33252,7 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlutvvb_oracc : HInst<
@@ -32542,6 +33266,7 @@ let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -32555,6 +33280,7 @@ let Inst{31-21} = 0b00011100110;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -32567,6 +33293,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlutvwh : HInst<
@@ -32579,6 +33306,7 @@ let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlutvwh_nm : HInst<
@@ -32591,6 +33319,7 @@ let Inst{13-13} = 0b0;
let Inst{31-24} = 0b00011000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vlutvwh_oracc : HInst<
@@ -32604,6 +33333,7 @@ let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -32617,6 +33347,7 @@ let Inst{31-21} = 0b00011100111;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -32629,6 +33360,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmaxb : HInst<
@@ -32641,6 +33373,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmaxb_alt : HInst<
@@ -32650,6 +33383,7 @@ def V6_vmaxb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32664,6 +33398,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmaxh_alt : HInst<
@@ -32673,6 +33408,7 @@ def V6_vmaxh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32687,6 +33423,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmaxub_alt : HInst<
@@ -32696,6 +33433,7 @@ def V6_vmaxub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32710,6 +33448,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmaxuh_alt : HInst<
@@ -32719,6 +33458,7 @@ def V6_vmaxuh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32733,6 +33473,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmaxw_alt : HInst<
@@ -32742,6 +33483,7 @@ def V6_vmaxw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32756,6 +33498,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vminb_alt : HInst<
@@ -32765,6 +33508,7 @@ def V6_vminb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32779,6 +33523,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vminh_alt : HInst<
@@ -32788,6 +33533,7 @@ def V6_vminh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32802,6 +33548,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vminub_alt : HInst<
@@ -32811,6 +33558,7 @@ def V6_vminub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32825,6 +33573,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vminuh_alt : HInst<
@@ -32834,6 +33583,7 @@ def V6_vminuh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32848,6 +33598,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vminw_alt : HInst<
@@ -32857,6 +33608,7 @@ def V6_vminw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32871,6 +33623,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpabus_acc : HInst<
@@ -32884,6 +33637,7 @@ let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -32895,6 +33649,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32907,6 +33662,7 @@ def V6_vmpabus_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32921,6 +33677,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpabusv_alt : HInst<
@@ -32930,6 +33687,7 @@ def V6_vmpabusv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32944,6 +33702,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpabuu_acc : HInst<
@@ -32957,6 +33716,7 @@ let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -32968,6 +33728,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32980,6 +33741,7 @@ def V6_vmpabuu_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -32994,6 +33756,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpabuuv_alt : HInst<
@@ -33003,6 +33766,7 @@ def V6_vmpabuuv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33017,6 +33781,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpahb_acc : HInst<
@@ -33030,6 +33795,7 @@ let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -33041,6 +33807,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33053,6 +33820,7 @@ def V6_vmpahb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33067,6 +33835,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -33080,6 +33849,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpauhb_acc : HInst<
@@ -33093,6 +33863,7 @@ let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -33104,6 +33875,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33116,6 +33888,7 @@ def V6_vmpauhb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33130,6 +33903,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -33143,6 +33917,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -33156,6 +33931,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpybus_acc : HInst<
@@ -33169,6 +33945,7 @@ let Inst{31-21} = 0b00011001001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -33180,6 +33957,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33192,6 +33970,7 @@ def V6_vmpybus_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33206,6 +33985,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpybusv_acc : HInst<
@@ -33219,6 +33999,7 @@ let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -33230,6 +34011,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33242,6 +34024,7 @@ def V6_vmpybusv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33256,6 +34039,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpybv_acc : HInst<
@@ -33269,6 +34053,7 @@ let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -33280,6 +34065,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33292,6 +34078,7 @@ def V6_vmpybv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33306,6 +34093,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyewuh_64 : HInst<
@@ -33318,6 +34106,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyewuh_alt : HInst<
@@ -33327,6 +34116,7 @@ def V6_vmpyewuh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33341,6 +34131,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyh_acc : HInst<
@@ -33354,6 +34145,7 @@ let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -33365,6 +34157,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33377,6 +34170,7 @@ def V6_vmpyh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33392,6 +34186,7 @@ let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -33403,6 +34198,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33418,6 +34214,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyhsrs_alt : HInst<
@@ -33427,6 +34224,7 @@ def V6_vmpyhsrs_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33441,6 +34239,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyhss_alt : HInst<
@@ -33450,6 +34249,7 @@ def V6_vmpyhss_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33464,6 +34264,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyhus_acc : HInst<
@@ -33477,6 +34278,7 @@ let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -33488,6 +34290,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33500,6 +34303,7 @@ def V6_vmpyhus_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33514,6 +34318,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyhv_acc : HInst<
@@ -33527,6 +34332,7 @@ let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -33538,6 +34344,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33550,6 +34357,7 @@ def V6_vmpyhv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33564,6 +34372,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyhvsrs_alt : HInst<
@@ -33573,6 +34382,7 @@ def V6_vmpyhvsrs_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33587,6 +34397,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyiewh_acc : HInst<
@@ -33600,6 +34411,7 @@ let Inst{31-21} = 0b00011100010;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -33611,6 +34423,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33626,6 +34439,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyiewuh_acc : HInst<
@@ -33639,6 +34453,7 @@ let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -33650,6 +34465,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33662,6 +34478,7 @@ def V6_vmpyiewuh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33676,6 +34493,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyih_acc : HInst<
@@ -33689,6 +34507,7 @@ let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -33700,6 +34519,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33712,6 +34532,7 @@ def V6_vmpyih_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33726,6 +34547,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyihb_acc : HInst<
@@ -33739,6 +34561,7 @@ let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -33750,6 +34573,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33762,6 +34586,7 @@ def V6_vmpyihb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33776,6 +34601,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyiowh_alt : HInst<
@@ -33785,6 +34611,7 @@ def V6_vmpyiowh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33799,6 +34626,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyiwb_acc : HInst<
@@ -33812,6 +34640,7 @@ let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -33823,6 +34652,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33835,6 +34665,7 @@ def V6_vmpyiwb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33849,6 +34680,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyiwh_acc : HInst<
@@ -33862,6 +34694,7 @@ let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -33873,6 +34706,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33885,6 +34719,7 @@ def V6_vmpyiwh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33899,6 +34734,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyiwub_acc : HInst<
@@ -33912,6 +34748,7 @@ let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -33923,6 +34760,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33935,6 +34773,7 @@ def V6_vmpyiwub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33949,6 +34788,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyowh_64_acc : HInst<
@@ -33962,6 +34802,7 @@ let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -33972,6 +34813,7 @@ def V6_vmpyowh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -33986,6 +34828,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyowh_rnd_alt : HInst<
@@ -33995,6 +34838,7 @@ def V6_vmpyowh_rnd_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34010,6 +34854,7 @@ let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -34021,6 +34866,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
@@ -34036,6 +34882,7 @@ let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -34047,6 +34894,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
@@ -34061,6 +34909,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyub_acc : HInst<
@@ -34074,6 +34923,7 @@ let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -34085,6 +34935,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34097,6 +34948,7 @@ def V6_vmpyub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34111,6 +34963,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyubv_acc : HInst<
@@ -34124,6 +34977,7 @@ let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -34135,6 +34989,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34147,6 +35002,7 @@ def V6_vmpyubv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34161,6 +35017,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyuh_acc : HInst<
@@ -34174,6 +35031,7 @@ let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -34185,6 +35043,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34197,6 +35056,7 @@ def V6_vmpyuh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34211,6 +35071,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyuhe_acc : HInst<
@@ -34224,6 +35085,7 @@ let Inst{31-21} = 0b00011001100;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -34237,6 +35099,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vmpyuhv_acc : HInst<
@@ -34250,6 +35113,7 @@ let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -34261,6 +35125,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34273,6 +35138,7 @@ def V6_vmpyuhv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34287,6 +35153,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vnavgb : HInst<
@@ -34299,6 +35166,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011111000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vnavgb_alt : HInst<
@@ -34308,6 +35176,7 @@ def V6_vnavgb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34322,6 +35191,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vnavgh_alt : HInst<
@@ -34331,6 +35201,7 @@ def V6_vnavgh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34345,6 +35216,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vnavgub_alt : HInst<
@@ -34354,6 +35226,7 @@ def V6_vnavgub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34368,6 +35241,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vnavgw_alt : HInst<
@@ -34377,6 +35251,7 @@ def V6_vnavgw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34393,6 +35268,7 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vncmov : HInst<
@@ -34407,6 +35283,7 @@ let isPredicated = 1;
let isPredicatedFalse = 1;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vnormamth : HInst<
@@ -34419,6 +35296,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vnormamth_alt : HInst<
@@ -34428,6 +35306,7 @@ def V6_vnormamth_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34442,6 +35321,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vnormamtw_alt : HInst<
@@ -34451,6 +35331,7 @@ def V6_vnormamtw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34465,6 +35346,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vor : HInst<
@@ -34477,6 +35359,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vpackeb : HInst<
@@ -34489,6 +35372,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vpackeb_alt : HInst<
@@ -34498,6 +35382,7 @@ def V6_vpackeb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34512,6 +35397,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vpackeh_alt : HInst<
@@ -34521,6 +35407,7 @@ def V6_vpackeh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34535,6 +35422,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vpackhb_sat_alt : HInst<
@@ -34544,6 +35432,7 @@ def V6_vpackhb_sat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34558,6 +35447,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vpackhub_sat_alt : HInst<
@@ -34567,6 +35457,7 @@ def V6_vpackhub_sat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34581,6 +35472,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vpackob_alt : HInst<
@@ -34590,6 +35482,7 @@ def V6_vpackob_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34604,6 +35497,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vpackoh_alt : HInst<
@@ -34613,6 +35507,7 @@ def V6_vpackoh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34627,6 +35522,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vpackwh_sat_alt : HInst<
@@ -34636,6 +35532,7 @@ def V6_vpackwh_sat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34650,6 +35547,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vpackwuh_sat_alt : HInst<
@@ -34659,6 +35557,7 @@ def V6_vpackwuh_sat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34673,6 +35572,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vpopcounth_alt : HInst<
@@ -34682,6 +35582,7 @@ def V6_vpopcounth_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34696,6 +35597,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vprefixqh : HInst<
@@ -34708,6 +35610,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vprefixqw : HInst<
@@ -34720,6 +35623,7 @@ let Inst{21-16} = 0b000011;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrdelta : HInst<
@@ -34732,6 +35636,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpybub_rtt : HInst<
@@ -34744,6 +35649,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpybub_rtt_acc : HInst<
@@ -34757,6 +35663,7 @@ let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -34768,6 +35675,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34780,6 +35688,7 @@ def V6_vrmpybub_rtt_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34794,6 +35703,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpybus_acc : HInst<
@@ -34807,6 +35717,7 @@ let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -34818,6 +35729,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34830,6 +35742,7 @@ def V6_vrmpybus_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34844,6 +35757,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpybusi_acc : HInst<
@@ -34857,6 +35771,7 @@ let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -34868,6 +35783,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34880,6 +35796,7 @@ def V6_vrmpybusi_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34894,6 +35811,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpybusv_acc : HInst<
@@ -34907,6 +35825,7 @@ let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -34918,6 +35837,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34930,6 +35850,7 @@ def V6_vrmpybusv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34944,6 +35865,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpybv_acc : HInst<
@@ -34957,6 +35879,7 @@ let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -34968,6 +35891,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34980,6 +35904,7 @@ def V6_vrmpybv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -34994,6 +35919,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpyub_acc : HInst<
@@ -35007,6 +35933,7 @@ let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -35018,6 +35945,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35030,6 +35958,7 @@ def V6_vrmpyub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35044,6 +35973,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpyub_rtt_acc : HInst<
@@ -35057,6 +35987,7 @@ let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -35068,6 +35999,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35080,6 +36012,7 @@ def V6_vrmpyub_rtt_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35094,6 +36027,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpyubi_acc : HInst<
@@ -35107,6 +36041,7 @@ let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -35118,6 +36053,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35130,6 +36066,7 @@ def V6_vrmpyubi_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35144,6 +36081,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpyubv_acc : HInst<
@@ -35157,6 +36095,7 @@ let Inst{31-21} = 0b00011100000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -35168,6 +36107,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35180,6 +36120,7 @@ def V6_vrmpyubv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35194,6 +36135,7 @@ let Inst{13-13} = 0b0;
let Inst{31-19} = 0b0001100111101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpyzbb_rt_acc : HInst<
@@ -35207,6 +36149,7 @@ let Inst{31-19} = 0b0001100111000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vyyyy32 = $Vyyyy32in";
}
@@ -35220,6 +36163,7 @@ let Inst{13-13} = 0b0;
let Inst{31-19} = 0b0001100111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx8 = $Rx8in";
}
@@ -35234,6 +36178,7 @@ let Inst{31-19} = 0b0001100111001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
}
@@ -35247,6 +36192,7 @@ let Inst{13-13} = 0b0;
let Inst{31-19} = 0b0001100111111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpyzbub_rt_acc : HInst<
@@ -35260,6 +36206,7 @@ let Inst{31-19} = 0b0001100111010;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vyyyy32 = $Vyyyy32in";
}
@@ -35273,6 +36220,7 @@ let Inst{13-13} = 0b0;
let Inst{31-19} = 0b0001100111110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx8 = $Rx8in";
}
@@ -35287,6 +36235,7 @@ let Inst{31-19} = 0b0001100111011;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
}
@@ -35300,6 +36249,7 @@ let Inst{13-13} = 0b0;
let Inst{31-19} = 0b0001100111101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpyzcb_rt_acc : HInst<
@@ -35313,6 +36263,7 @@ let Inst{31-19} = 0b0001100111000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vyyyy32 = $Vyyyy32in";
}
@@ -35326,6 +36277,7 @@ let Inst{13-13} = 0b0;
let Inst{31-19} = 0b0001100111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx8 = $Rx8in";
}
@@ -35340,6 +36292,7 @@ let Inst{31-19} = 0b0001100111001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
}
@@ -35353,6 +36306,7 @@ let Inst{13-13} = 0b0;
let Inst{31-19} = 0b0001100111101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpyzcbs_rt_acc : HInst<
@@ -35366,6 +36320,7 @@ let Inst{31-19} = 0b0001100111000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vyyyy32 = $Vyyyy32in";
}
@@ -35379,6 +36334,7 @@ let Inst{13-13} = 0b0;
let Inst{31-19} = 0b0001100111100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx8 = $Rx8in";
}
@@ -35393,6 +36349,7 @@ let Inst{31-19} = 0b0001100111001;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
}
@@ -35406,6 +36363,7 @@ let Inst{13-13} = 0b0;
let Inst{31-19} = 0b0001100111111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrmpyznb_rt_acc : HInst<
@@ -35419,6 +36377,7 @@ let Inst{31-19} = 0b0001100111010;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vyyyy32 = $Vyyyy32in";
}
@@ -35432,6 +36391,7 @@ let Inst{13-13} = 0b0;
let Inst{31-19} = 0b0001100111110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Rx8 = $Rx8in";
}
@@ -35446,6 +36406,7 @@ let Inst{31-19} = 0b0001100111011;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vyyyy32 = $Vyyyy32in, $Rx8 = $Rx8in";
}
@@ -35459,6 +36420,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrotr : HInst<
@@ -35471,6 +36433,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011010100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrotr_alt : HInst<
@@ -35480,6 +36443,7 @@ def V6_vrotr_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35494,6 +36458,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vroundhb_alt : HInst<
@@ -35503,6 +36468,7 @@ def V6_vroundhb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35517,6 +36483,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vroundhub_alt : HInst<
@@ -35526,6 +36493,7 @@ def V6_vroundhub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35540,6 +36508,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrounduhub_alt : HInst<
@@ -35549,6 +36518,7 @@ def V6_vrounduhub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35563,6 +36533,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111111;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrounduwuh_alt : HInst<
@@ -35572,6 +36543,7 @@ def V6_vrounduwuh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35586,6 +36558,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vroundwh_alt : HInst<
@@ -35595,6 +36568,7 @@ def V6_vroundwh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35609,6 +36583,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vroundwuh_alt : HInst<
@@ -35618,6 +36593,7 @@ def V6_vroundwuh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35632,6 +36608,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vrsadubi_acc : HInst<
@@ -35645,6 +36622,7 @@ let Inst{31-21} = 0b00011001010;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -35656,6 +36634,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35668,6 +36647,7 @@ def V6_vrsadubi_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35682,18 +36662,20 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011101100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsathub : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.ub = vsat($Vu32.h,$Vv32.h)",
-tc_8772086c, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_8772086c, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b010;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsathub_alt : HInst<
@@ -35703,6 +36685,7 @@ def V6_vsathub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35717,6 +36700,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsatuwuh_alt : HInst<
@@ -35726,6 +36710,7 @@ def V6_vsatuwuh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35734,12 +36719,13 @@ def V6_vsatwh : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.h = vsat($Vu32.w,$Vv32.w)",
-tc_8772086c, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> {
+tc_8772086c, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsatwh_alt : HInst<
@@ -35749,6 +36735,7 @@ def V6_vsatwh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35763,6 +36750,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsb_alt : HInst<
@@ -35772,6 +36760,7 @@ def V6_vsb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35784,6 +36773,7 @@ tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b001;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -35796,6 +36786,7 @@ let Inst{7-5} = 0b101;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
let isAccumulator = 1;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -35805,6 +36796,7 @@ def V6_vscattermh_add_alt : HInst<
"vscatter($Rt32,$Mu2,$Vv32.h) += $Vw32.h",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35814,6 +36806,7 @@ def V6_vscattermh_alt : HInst<
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vv32.h) = $Vw32.h",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35826,6 +36819,7 @@ tc_8e420e4d, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
let Inst{7-7} = 0b1;
let Inst{31-21} = 0b00101111100;
let accessSize = HalfWordAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -35834,6 +36828,7 @@ def V6_vscattermhq_alt : HInst<
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.h) = $Vw32.h",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35846,6 +36841,7 @@ tc_7273323b, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b010;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -35858,6 +36854,7 @@ let Inst{7-5} = 0b110;
let Inst{31-21} = 0b00101111001;
let accessSize = HalfWordAccess;
let isAccumulator = 1;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -35869,6 +36866,7 @@ tc_58d21193, TypeCVI_SCATTER_DV>, Enc_3d6d37, Requires<[UseHVXV65]> {
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b00101111101;
let accessSize = HalfWordAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -35880,6 +36878,7 @@ tc_9f363d21, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> {
let Inst{7-5} = 0b000;
let Inst{31-21} = 0b00101111001;
let accessSize = WordAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -35892,6 +36891,7 @@ let Inst{7-5} = 0b100;
let Inst{31-21} = 0b00101111001;
let accessSize = WordAccess;
let isAccumulator = 1;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -35901,6 +36901,7 @@ def V6_vscattermw_add_alt : HInst<
"vscatter($Rt32,$Mu2,$Vv32.w) += $Vw32.w",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35910,6 +36911,7 @@ def V6_vscattermw_alt : HInst<
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vv32.w) = $Vw32.w",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35920,6 +36922,7 @@ def V6_vscattermwh_add_alt : HInst<
"vscatter($Rt32,$Mu2,$Vvv32.w) += $Vw32.h",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35929,6 +36932,7 @@ def V6_vscattermwh_alt : HInst<
(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32),
"vscatter($Rt32,$Mu2,$Vvv32.w) = $Vw32.h",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35938,6 +36942,7 @@ def V6_vscattermwhq_alt : HInst<
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32),
"if ($Qs4) vscatter($Rt32,$Mu2,$Vvv32.w) = $Vw32.h",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35950,6 +36955,7 @@ tc_8e420e4d, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> {
let Inst{7-7} = 0b0;
let Inst{31-21} = 0b00101111100;
let accessSize = WordAccess;
+let isCVI = 1;
let mayStore = 1;
let DecoderNamespace = "EXT_mmvec";
}
@@ -35958,6 +36964,7 @@ def V6_vscattermwq_alt : HInst<
(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32),
"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.w) = $Vw32.w",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35972,6 +36979,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsh_alt : HInst<
@@ -35981,6 +36989,7 @@ def V6_vsh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -35995,6 +37004,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vshufeh_alt : HInst<
@@ -36004,6 +37014,7 @@ def V6_vshufeh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36020,6 +37031,7 @@ let hasNewValue = 1;
let opNewValue = 0;
let hasNewValue2 = 1;
let opNewValue2 = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
}
@@ -36033,6 +37045,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vshuffb_alt : HInst<
@@ -36042,6 +37055,7 @@ def V6_vshuffb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36056,6 +37070,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vshuffeb_alt : HInst<
@@ -36065,6 +37080,7 @@ def V6_vshuffeb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36079,6 +37095,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vshuffh_alt : HInst<
@@ -36088,6 +37105,7 @@ def V6_vshuffh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36102,6 +37120,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vshuffob_alt : HInst<
@@ -36111,6 +37130,7 @@ def V6_vshuffob_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36125,6 +37145,7 @@ let Inst{13-13} = 0b1;
let Inst{31-24} = 0b00011011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vshufoeb : HInst<
@@ -36137,6 +37158,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vshufoeb_alt : HInst<
@@ -36146,6 +37168,7 @@ def V6_vshufoeb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36160,6 +37183,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vshufoeh_alt : HInst<
@@ -36169,6 +37193,7 @@ def V6_vshufoeh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36183,6 +37208,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vshufoh_alt : HInst<
@@ -36192,6 +37218,7 @@ def V6_vshufoh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36206,6 +37233,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubb_alt : HInst<
@@ -36215,6 +37243,7 @@ def V6_vsubb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36229,6 +37258,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubb_dv_alt : HInst<
@@ -36238,6 +37268,7 @@ def V6_vsubb_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36253,6 +37284,7 @@ let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -36263,6 +37295,7 @@ def V6_vsubbnq_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36279,6 +37312,7 @@ let Inst{21-16} = 0b000001;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -36289,6 +37323,7 @@ def V6_vsubbq_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36304,6 +37339,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubbsat_alt : HInst<
@@ -36313,6 +37349,7 @@ def V6_vsubbsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36327,6 +37364,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubbsat_dv_alt : HInst<
@@ -36336,6 +37374,7 @@ def V6_vsubbsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36350,6 +37389,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011100101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Qx4 = $Qx4in";
}
@@ -36357,7 +37397,7 @@ def V6_vsubcarryo : HInst<
(outs HvxVR:$Vd32, HvxQR:$Qe4),
(ins HvxVR:$Vu32, HvxVR:$Vv32),
"$Vd32.w,$Qe4 = vsub($Vu32.w,$Vv32.w):carry",
-tc_e35c1e93, TypeCOPROC_VX>, Enc_c1d806, Requires<[UseHVXV66]> {
+tc_e35c1e93, TypeCVI_VA>, Enc_c1d806, Requires<[UseHVXV66]> {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011101101;
@@ -36365,6 +37405,7 @@ let hasNewValue = 1;
let opNewValue = 0;
let hasNewValue2 = 1;
let opNewValue2 = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubh : HInst<
@@ -36377,6 +37418,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubh_alt : HInst<
@@ -36386,6 +37428,7 @@ def V6_vsubh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36400,6 +37443,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubh_dv_alt : HInst<
@@ -36409,6 +37453,7 @@ def V6_vsubh_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36424,6 +37469,7 @@ let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -36434,6 +37480,7 @@ def V6_vsubhnq_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36450,6 +37497,7 @@ let Inst{21-16} = 0b000001;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -36460,6 +37508,7 @@ def V6_vsubhq_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36475,6 +37524,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubhsat_alt : HInst<
@@ -36484,6 +37534,7 @@ def V6_vsubhsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36498,6 +37549,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubhsat_dv_alt : HInst<
@@ -36507,6 +37559,7 @@ def V6_vsubhsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36521,6 +37574,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubhw_alt : HInst<
@@ -36530,6 +37584,7 @@ def V6_vsubhw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36544,6 +37599,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsububh_alt : HInst<
@@ -36553,6 +37609,7 @@ def V6_vsububh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36567,6 +37624,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsububsat_alt : HInst<
@@ -36576,6 +37634,7 @@ def V6_vsububsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36590,6 +37649,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsububsat_dv_alt : HInst<
@@ -36599,6 +37659,7 @@ def V6_vsububsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36613,6 +37674,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubuhsat : HInst<
@@ -36625,6 +37687,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubuhsat_alt : HInst<
@@ -36634,6 +37697,7 @@ def V6_vsubuhsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36648,6 +37712,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubuhsat_dv_alt : HInst<
@@ -36657,6 +37722,7 @@ def V6_vsubuhsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36671,6 +37737,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubuhw_alt : HInst<
@@ -36680,6 +37747,7 @@ def V6_vsubuhw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36694,6 +37762,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011111110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubuwsat_alt : HInst<
@@ -36703,6 +37772,7 @@ def V6_vsubuwsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36717,6 +37787,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011110101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubuwsat_dv_alt : HInst<
@@ -36726,6 +37797,7 @@ def V6_vsubuwsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36740,6 +37812,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubw_alt : HInst<
@@ -36749,6 +37822,7 @@ def V6_vsubw_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36763,6 +37837,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100100;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubw_dv_alt : HInst<
@@ -36772,6 +37847,7 @@ def V6_vsubw_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36787,6 +37863,7 @@ let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -36797,6 +37874,7 @@ def V6_vsubwnq_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36813,6 +37891,7 @@ let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vx32 = $Vx32in";
}
@@ -36823,6 +37902,7 @@ def V6_vsubwq_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36838,6 +37918,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100011;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubwsat_alt : HInst<
@@ -36847,6 +37928,7 @@ def V6_vsubwsat_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36861,6 +37943,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vsubwsat_dv_alt : HInst<
@@ -36870,6 +37953,7 @@ def V6_vsubwsat_dv_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36884,6 +37968,7 @@ let Inst{13-13} = 0b1;
let Inst{31-21} = 0b00011110101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vtmpyb : HInst<
@@ -36896,6 +37981,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vtmpyb_acc : HInst<
@@ -36909,6 +37995,7 @@ let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -36920,6 +38007,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36932,6 +38020,7 @@ def V6_vtmpyb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36946,6 +38035,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vtmpybus_acc : HInst<
@@ -36959,6 +38049,7 @@ let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -36970,6 +38061,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36982,6 +38074,7 @@ def V6_vtmpybus_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -36996,6 +38089,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vtmpyhb_acc : HInst<
@@ -37009,6 +38103,7 @@ let Inst{31-21} = 0b00011001000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -37020,6 +38115,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37032,6 +38128,7 @@ def V6_vtmpyhb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37045,6 +38142,7 @@ let hasNewValue = 1;
let opNewValue = 0;
let hasNewValue2 = 1;
let opNewValue2 = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37060,6 +38158,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vunpackb_alt : HInst<
@@ -37069,6 +38168,7 @@ def V6_vunpackb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37083,6 +38183,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vunpackh_alt : HInst<
@@ -37092,6 +38193,7 @@ def V6_vunpackh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37107,6 +38209,7 @@ let Inst{31-16} = 0b0001111000000000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -37118,6 +38221,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
@@ -37133,6 +38237,7 @@ let Inst{31-16} = 0b0001111000000000;
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
let Constraints = "$Vxx32 = $Vxx32in";
}
@@ -37144,6 +38249,7 @@ PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isAccumulator = 1;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37159,6 +38265,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vunpackub_alt : HInst<
@@ -37168,6 +38275,7 @@ def V6_vunpackub_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37182,6 +38290,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vunpackuh_alt : HInst<
@@ -37191,6 +38300,7 @@ def V6_vunpackuh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37202,6 +38312,7 @@ def V6_vwhist128 : HInst<
tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10010010000000;
let Inst{31-16} = 0b0001111000000000;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vwhist128m : HInst<
@@ -37212,6 +38323,7 @@ tc_b28e51aa, TypeCVI_HIST>, Enc_efaed8, Requires<[UseHVXV62]> {
let Inst{7-0} = 0b10000000;
let Inst{13-9} = 0b10011;
let Inst{31-16} = 0b0001111000000000;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vwhist128q : HInst<
@@ -37222,6 +38334,7 @@ tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10010010000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vwhist128qm : HInst<
@@ -37233,6 +38346,7 @@ let Inst{7-0} = 0b10000000;
let Inst{13-9} = 0b10011;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vwhist256 : HInst<
@@ -37242,6 +38356,7 @@ def V6_vwhist256 : HInst<
tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001010000000;
let Inst{31-16} = 0b0001111000000000;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vwhist256_sat : HInst<
@@ -37251,6 +38366,7 @@ def V6_vwhist256_sat : HInst<
tc_1381a97c, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001110000000;
let Inst{31-16} = 0b0001111000000000;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vwhist256q : HInst<
@@ -37261,6 +38377,7 @@ tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001010000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vwhist256q_sat : HInst<
@@ -37271,6 +38388,7 @@ tc_e3f68a46, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> {
let Inst{13-0} = 0b10001110000000;
let Inst{21-16} = 0b000010;
let Inst{31-24} = 0b00011110;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vxor : HInst<
@@ -37283,6 +38401,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00011100001;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vzb : HInst<
@@ -37295,6 +38414,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vzb_alt : HInst<
@@ -37304,6 +38424,7 @@ def V6_vzb_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37318,6 +38439,7 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0001111000000010;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_vzh_alt : HInst<
@@ -37327,6 +38449,7 @@ def V6_vzh_alt : HInst<
PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> {
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37340,6 +38463,7 @@ let Inst{7-0} = 0b00000000;
let Inst{12-11} = 0b00;
let Inst{31-21} = 0b00101100000;
let addrMode = BaseImmOffset;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37353,6 +38477,7 @@ let Inst{7-0} = 0b00000000;
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b00101101000;
let addrMode = PostInc;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37366,6 +38491,7 @@ tc_a0dbea28, TypeCVI_ZW>, Enc_44661f, Requires<[UseHVXV66,UseZReg]> {
let Inst{12-0} = 0b0000000000001;
let Inst{31-21} = 0b00101101000;
let addrMode = PostInc;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37380,6 +38506,7 @@ let Inst{7-0} = 0b00000000;
let Inst{31-21} = 0b00101100100;
let isPredicated = 1;
let addrMode = BaseImmOffset;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37394,6 +38521,7 @@ let Inst{13-13} = 0b0;
let Inst{31-21} = 0b00101101100;
let isPredicated = 1;
let addrMode = PostInc;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37408,6 +38536,7 @@ let Inst{10-0} = 0b00000000001;
let Inst{31-21} = 0b00101101100;
let isPredicated = 1;
let addrMode = PostInc;
+let isCVI = 1;
let mayLoad = 1;
let isRestrictNoSlot1Store = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37422,6 +38551,7 @@ let Inst{13-5} = 0b000001001;
let Inst{31-21} = 0b00011001101;
let hasNewValue = 1;
let opNewValue = 0;
+let isCVI = 1;
let DecoderNamespace = "EXT_mmvec";
}
def V6_zld0 : HInst<
@@ -37429,6 +38559,7 @@ def V6_zld0 : HInst<
(ins IntRegs:$Rt32),
"z = vmem($Rt32)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37438,6 +38569,7 @@ def V6_zldp0 : HInst<
(ins PredRegs:$Pv4, IntRegs:$Rt32),
"if ($Pv4) z = vmem($Rt32)",
PSEUDO, TypeMAPPING>, Requires<[UseHVXV66]> {
+let isCVI = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
let DecoderNamespace = "EXT_mmvec";
@@ -37446,7 +38578,7 @@ def Y2_barrier : HInst<
(outs),
(ins),
"barrier",
-tc_8c99de45, TypeST>, Enc_e3b0c4 {
+tc_77f94a5e, TypeST>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b1010100000000000;
let isSoloAX = 1;
@@ -37456,7 +38588,7 @@ def Y2_break : HInst<
(outs),
(ins),
"brkpt",
-tc_9ad9998f, TypeCR>, Enc_e3b0c4 {
+tc_55255f2b, TypeCR>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b0110110000100000;
let isSolo = 1;
@@ -37465,7 +38597,7 @@ def Y2_dccleana : HInst<
(outs),
(ins IntRegs:$Rs32),
"dccleana($Rs32)",
-tc_b857bf4e, TypeST>, Enc_ecbcc8 {
+tc_b1ae5f67, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000000;
let isRestrictSlot1AOK = 1;
@@ -37475,7 +38607,7 @@ def Y2_dccleaninva : HInst<
(outs),
(ins IntRegs:$Rs32),
"dccleaninva($Rs32)",
-tc_b857bf4e, TypeST>, Enc_ecbcc8 {
+tc_b1ae5f67, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000010;
let isRestrictSlot1AOK = 1;
@@ -37485,7 +38617,7 @@ def Y2_dcfetch : HInst<
(outs),
(ins IntRegs:$Rs32),
"dcfetch($Rs32)",
-tc_d63f638c, TypeMAPPING> {
+tc_d45ba9cd, TypeMAPPING> {
let hasSideEffects = 1;
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -37494,7 +38626,7 @@ def Y2_dcfetchbo : HInst<
(outs),
(ins IntRegs:$Rs32, u11_3Imm:$Ii),
"dcfetch($Rs32+#$Ii)",
-tc_9ca930f7, TypeLD>, Enc_2d829e {
+tc_2237d952, TypeLD>, Enc_2d829e {
let Inst{13-11} = 0b000;
let Inst{31-21} = 0b10010100000;
let addrMode = BaseImmOffset;
@@ -37505,7 +38637,7 @@ def Y2_dcinva : HInst<
(outs),
(ins IntRegs:$Rs32),
"dcinva($Rs32)",
-tc_b857bf4e, TypeST>, Enc_ecbcc8 {
+tc_b1ae5f67, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000001;
let isRestrictSlot1AOK = 1;
@@ -37515,7 +38647,7 @@ def Y2_dczeroa : HInst<
(outs),
(ins IntRegs:$Rs32),
"dczeroa($Rs32)",
-tc_b857bf4e, TypeST>, Enc_ecbcc8 {
+tc_b1ae5f67, TypeST>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b10100000110;
let isRestrictSlot1AOK = 1;
@@ -37526,7 +38658,7 @@ def Y2_icinva : HInst<
(outs),
(ins IntRegs:$Rs32),
"icinva($Rs32)",
-tc_5d7f5414, TypeJ>, Enc_ecbcc8 {
+tc_0ba0d5da, TypeJ>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01010110110;
let isSolo = 1;
@@ -37535,7 +38667,7 @@ def Y2_isync : HInst<
(outs),
(ins),
"isync",
-tc_8b121f4a, TypeJ>, Enc_e3b0c4 {
+tc_9b34f5e0, TypeJ>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000010;
let Inst{31-16} = 0b0101011111000000;
let isSolo = 1;
@@ -37544,7 +38676,7 @@ def Y2_syncht : HInst<
(outs),
(ins),
"syncht",
-tc_8c99de45, TypeST>, Enc_e3b0c4 {
+tc_77f94a5e, TypeST>, Enc_e3b0c4 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b1010100001000000;
let isSolo = 1;
@@ -37553,7 +38685,7 @@ def Y2_wait : HInst<
(outs),
(ins IntRegs:$Rs32),
"wait($Rs32)",
-tc_174516e8, TypeCR>, Enc_ecbcc8, Requires<[HasV65]> {
+tc_d7718fbe, TypeCR>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01100100010;
let isSolo = 1;
@@ -37562,7 +38694,7 @@ def Y4_l2fetch : HInst<
(outs),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"l2fetch($Rs32,$Rt32)",
-tc_fe211424, TypeST>, Enc_ca3887 {
+tc_a3070909, TypeST>, Enc_ca3887 {
let Inst{7-0} = 0b00000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100110000;
@@ -37574,7 +38706,7 @@ def Y4_trace : HInst<
(outs),
(ins IntRegs:$Rs32),
"trace($Rs32)",
-tc_6b25e783, TypeCR>, Enc_ecbcc8 {
+tc_d7718fbe, TypeCR>, Enc_ecbcc8 {
let Inst{13-0} = 0b00000000000000;
let Inst{31-21} = 0b01100010010;
let isSoloAX = 1;
@@ -37583,7 +38715,7 @@ def Y5_l2fetch : HInst<
(outs),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"l2fetch($Rs32,$Rtt32)",
-tc_fe211424, TypeST>, Enc_e6abcf {
+tc_a3070909, TypeST>, Enc_e6abcf {
let Inst{7-0} = 0b00000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100110100;
@@ -37591,11 +38723,37 @@ let isSoloAX = 1;
let mayStore = 1;
let hasSideEffects = 1;
}
+def Y6_diag : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"diag($Rs32)",
+tc_2c3e17fc, TypeCR>, Enc_ecbcc8, Requires<[HasV67]> {
+let Inst{13-0} = 0b00000000100000;
+let Inst{31-21} = 0b01100010010;
+}
+def Y6_diag0 : HInst<
+(outs),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"diag0($Rss32,$Rtt32)",
+tc_28e55c6f, TypeCR>, Enc_b00112, Requires<[HasV67]> {
+let Inst{7-0} = 0b01000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100010010;
+}
+def Y6_diag1 : HInst<
+(outs),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"diag1($Rss32,$Rtt32)",
+tc_28e55c6f, TypeCR>, Enc_b00112, Requires<[HasV67]> {
+let Inst{7-0} = 0b01100000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100010010;
+}
def dep_A2_addsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = add($Rs32,$Rt32):sat:deprecated",
-tc_779080bf, TypeALU64>, Enc_5ab2be {
+tc_8a825db2, TypeALU64>, Enc_5ab2be {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101100;
@@ -37608,7 +38766,7 @@ def dep_A2_subsat : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rt32, IntRegs:$Rs32),
"$Rd32 = sub($Rt32,$Rs32):sat:deprecated",
-tc_779080bf, TypeALU64>, Enc_bd6011 {
+tc_8a825db2, TypeALU64>, Enc_bd6011 {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010101100;
@@ -37621,8 +38779,476 @@ def dep_S2_packhl : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rdd32 = packhl($Rs32,$Rt32):deprecated",
-tc_946df596, TypeALU64>, Enc_be32a5 {
+tc_5da50c4b, TypeALU64>, Enc_be32a5 {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010100000;
}
+def dup_A2_add : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = add($Rs32,$Rt32)",
+tc_388f9897, TypeALU32_3op>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+}
+def dup_A2_addi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = add($Rs32,#$Ii)",
+tc_388f9897, TypeALU32_ADDI>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def dup_A2_andir : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = and($Rs32,#$Ii)",
+tc_388f9897, TypeALU32_2op>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def dup_A2_combineii : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins s32_0Imm:$Ii, s8_0Imm:$II),
+"$Rdd32 = combine(#$Ii,#$II)",
+tc_388f9897, TypeALU32_2op>, Requires<[HasV67]> {
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def dup_A2_sxtb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = sxtb($Rs32)",
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+}
+def dup_A2_sxth : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = sxth($Rs32)",
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+}
+def dup_A2_tfr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = $Rs32",
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+}
+def dup_A2_tfrsi : HInst<
+(outs IntRegs:$Rd32),
+(ins s32_0Imm:$Ii),
+"$Rd32 = #$Ii",
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def dup_A2_zxtb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = zxtb($Rs32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+}
+def dup_A2_zxth : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = zxth($Rs32)",
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+}
+def dup_A4_combineii : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins s8_0Imm:$Ii, u32_0Imm:$II),
+"$Rdd32 = combine(#$Ii,#$II)",
+tc_388f9897, TypeALU32_2op>, Requires<[HasV67]> {
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def dup_A4_combineir : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins s32_0Imm:$Ii, IntRegs:$Rs32),
+"$Rdd32 = combine(#$Ii,$Rs32)",
+tc_388f9897, TypeALU32_2op>, Requires<[HasV67]> {
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def dup_A4_combineri : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rdd32 = combine($Rs32,#$Ii)",
+tc_388f9897, TypeALU32_2op>, Requires<[HasV67]> {
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def dup_C2_cmoveif : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if (!$Pu4) $Rd32 = #$Ii",
+tc_388f9897, TypeALU32_2op>, Requires<[HasV67]> {
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def dup_C2_cmoveit : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if ($Pu4) $Rd32 = #$Ii",
+tc_388f9897, TypeALU32_2op>, Requires<[HasV67]> {
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def dup_C2_cmovenewif : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if (!$Pu4.new) $Rd32 = #$Ii",
+tc_4ac61d92, TypeALU32_2op>, Requires<[HasV67]> {
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPredicatedNew = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def dup_C2_cmovenewit : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if ($Pu4.new) $Rd32 = #$Ii",
+tc_4ac61d92, TypeALU32_2op>, Requires<[HasV67]> {
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPredicatedNew = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def dup_C2_cmpeqi : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = cmp.eq($Rs32,#$Ii)",
+tc_388f9897, TypeALU32_2op>, Requires<[HasV67]> {
+let AsmVariantName = "NonParsable";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def dup_L2_deallocframe : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = deallocframe($Rs32):raw",
+tc_aee6250c, TypeLD>, Requires<[HasV67]> {
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let Uses = [FRAMEKEY];
+let Defs = [R29];
+let isPseudo = 1;
+}
+def dup_L2_loadrb_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = memb($Rs32+#$Ii)",
+tc_eed07714, TypeLD>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+}
+def dup_L2_loadrd_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, s29_3Imm:$Ii),
+"$Rdd32 = memd($Rs32+#$Ii)",
+tc_eed07714, TypeLD>, Requires<[HasV67]> {
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 14;
+let opExtentAlign = 3;
+}
+def dup_L2_loadrh_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Rd32 = memh($Rs32+#$Ii)",
+tc_eed07714, TypeLD>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def dup_L2_loadri_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii),
+"$Rd32 = memw($Rs32+#$Ii)",
+tc_eed07714, TypeLD>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+}
+def dup_L2_loadrub_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = memub($Rs32+#$Ii)",
+tc_eed07714, TypeLD>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+}
+def dup_L2_loadruh_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Rd32 = memuh($Rs32+#$Ii)",
+tc_eed07714, TypeLD>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def dup_S2_allocframe : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, u11_3Imm:$Ii),
+"allocframe($Rx32,#$Ii):raw",
+tc_74a42bda, TypeST>, Requires<[HasV67]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let Uses = [FRAMEKEY, FRAMELIMIT, R30, R31];
+let Defs = [R30];
+let isPseudo = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def dup_S2_storerb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+#$Ii) = $Rt32",
+tc_a9edeffa, TypeST>, Requires<[HasV67]> {
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+}
+def dup_S2_storerd_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"memd($Rs32+#$Ii) = $Rtt32",
+tc_a9edeffa, TypeST>, Requires<[HasV67]> {
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 14;
+let opExtentAlign = 3;
+}
+def dup_S2_storerh_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) = $Rt32",
+tc_a9edeffa, TypeST>, Requires<[HasV67]> {
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def dup_S2_storeri_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+#$Ii) = $Rt32",
+tc_a9edeffa, TypeST>, Requires<[HasV67]> {
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+}
+def dup_S4_storeirb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
+"memb($Rs32+#$Ii) = #$II",
+tc_838c4d7a, TypeV4LDST>, Requires<[HasV67]> {
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def dup_S4_storeiri_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
+"memw($Rs32+#$Ii) = #$II",
+tc_838c4d7a, TypeV4LDST>, Requires<[HasV67]> {
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
index 61a1df5eb94b..0143d6f44d88 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -1,2750 +1,2680 @@
-//===-------------------------------------------------------*- tablegen -*-===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
// V5 Scalar Instructions.
-def: Pat<(int_hexagon_S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsatwh DoubleRegs:$src1),
- (S2_vsatwh DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpysu_up IntRegs:$src1, IntRegs:$src2),
- (M2_mpysu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmpysc_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_cmpysc_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmpysc_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_cmpysc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_cmpyi_whc DoubleRegs:$src1, IntRegs:$src2),
- (M4_cmpyi_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_shuffoh DoubleRegs:$src1, DoubleRegs:$src2),
- (S2_shuffoh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfmax IntRegs:$src1, IntRegs:$src2),
- (F2_sfmax IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vabswsat DoubleRegs:$src1),
- (A2_vabswsat DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
- (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2),
- (A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vcmpy_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vcmpy_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_notp DoubleRegs:$src1),
- (A2_notp DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_hl_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_hl_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_or_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
- (C4_or_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmac2s_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_vmac2s_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmac2s_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_vmac2s_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_brevp DoubleRegs:$src1),
- (S2_brevp DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_pmpyw_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_pmpyw_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_cl1 IntRegs:$src1),
- (S2_cl1 IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_cmplte IntRegs:$src1, IntRegs:$src2),
- (C4_cmplte IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyul_s0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyul_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_maxup DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_maxup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
- (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_interleave DoubleRegs:$src1),
- (S2_interleave DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_abssat IntRegs:$src1),
- (A2_abssat IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vcmpwgtu DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vcmpwgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeq IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (C2_cmpeq IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgt IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (C2_cmpgt IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C2_cmpgtu IntRegs:$src1, IntRegs:$src2),
- (C2_cmpgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+ (C2_tfrpr (C2_cmpgtu IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2),
- (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2),
- (A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2),
- (C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyi IntRegs:$src1, IntRegs:$src2),
- (M2_mpyi IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_df2uw_chop DoubleRegs:$src1),
- (F2_conv_df2uw_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmpheq IntRegs:$src1, IntRegs:$src2),
- (A4_cmpheq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3),
- (S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_ntstbit_r IntRegs:$src1, IntRegs:$src2),
- (S4_ntstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_w2sf IntRegs:$src1),
- (F2_conv_w2sf IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_not PredRegs:$src1),
- (C2_not PredRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_tfrpr PredRegs:$src1),
- (C2_tfrpr PredRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_ll_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_ll_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmpbgt IntRegs:$src1, IntRegs:$src2),
- (A4_cmpbgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+ (C2_tfrpr (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
(A4_rcmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_orp DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_orp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_up IntRegs:$src1, IntRegs:$src2),
- (M2_mpyu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
- (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmpbgtu IntRegs:$src1, IntRegs:$src2),
- (A4_cmpbgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpeq IntRegs:$src1, IntRegs:$src2),
+ (A4_rcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_rcmpneq IntRegs:$src1, IntRegs:$src2),
+ (A4_rcmpneq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsset IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (C2_bitsset IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsclr IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (C2_bitsclr IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsset IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (C4_nbitsset IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsclr IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (C4_nbitsclr IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (C2_tfrpr (C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (C2_tfrpr (C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2),
+ (C2_tfrpr (C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2),
+ (C2_tfrpr (C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2),
+ (C2_tfrpr (C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (C2_tfrpr (C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (C2_tfrpr (C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2),
+ (C2_tfrpr (C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmpneq IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (C4_cmpneq IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplte IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (C4_cmplte IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_cmplteu IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (C4_cmplteu IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_and PredRegs:$src1, PredRegs:$src2),
+ (C2_tfrpr (C2_and (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_or PredRegs:$src1, PredRegs:$src2),
+ (C2_tfrpr (C2_or (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_xor PredRegs:$src1, PredRegs:$src2),
+ (C2_tfrpr (C2_xor (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_andn PredRegs:$src1, PredRegs:$src2),
+ (C2_tfrpr (C2_andn (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_not PredRegs:$src1),
+ (C2_tfrpr (C2_not (C2_tfrrp PredRegs:$src1)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_orn PredRegs:$src1, PredRegs:$src2),
+ (C2_tfrpr (C2_orn (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C2_tfrpr (C4_and_and (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2), (C2_tfrrp PredRegs:$src3)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C2_tfrpr (C4_and_or (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2), (C2_tfrrp PredRegs:$src3)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C2_tfrpr (C4_or_and (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2), (C2_tfrrp PredRegs:$src3)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C2_tfrpr (C4_or_or (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2), (C2_tfrrp PredRegs:$src3)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C2_tfrpr (C4_and_andn (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2), (C2_tfrrp PredRegs:$src3)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C2_tfrpr (C4_and_orn (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2), (C2_tfrrp PredRegs:$src3)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C2_tfrpr (C4_or_andn (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2), (C2_tfrrp PredRegs:$src3)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_or_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (C2_tfrpr (C4_or_orn (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2), (C2_tfrrp PredRegs:$src3)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_pxfer_map PredRegs:$src1),
+ (C2_tfrpr (C2_pxfer_map (C2_tfrrp PredRegs:$src1)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_any8 PredRegs:$src1),
+ (C2_tfrpr (C2_any8 (C2_tfrrp PredRegs:$src1)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_all8 PredRegs:$src1),
+ (C2_tfrpr (C2_all8 (C2_tfrrp PredRegs:$src1)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_vitpack PredRegs:$src1, PredRegs:$src2),
+ (C2_vitpack (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (C2_mux (C2_tfrrp PredRegs:$src1), IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3),
+ (C2_muxii (C2_tfrrp PredRegs:$src1), s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (C2_muxir (C2_tfrrp PredRegs:$src1), IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3),
+ (C2_muxri (C2_tfrrp PredRegs:$src1), s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (C2_vmux (C2_tfrrp PredRegs:$src1), DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_mask PredRegs:$src1),
+ (C2_mask (C2_tfrrp PredRegs:$src1))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpbeq DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (A2_vcmpbeq DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2),
- (A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+ (C2_tfrpr (A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpbgtu DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (A2_vcmpbgtu DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (A4_vcmpbgt DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbeq IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (A4_cmpbeq IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgtu IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (A4_cmpbgtu IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpbgt IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (A4_cmpbgt IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred_timm:$src2),
- (A4_cmpbgti IntRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addsp IntRegs:$src1, DoubleRegs:$src2),
- (A2_addsp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2),
- (S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+ (C2_tfrpr (A4_cmpbgti IntRegs:$src1, s8_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpheq DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (A2_vcmpheq DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmphgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (A2_vcmphgt DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmphgtu DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (A2_vcmphgtu DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
- (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2),
- (S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_pmpyw IntRegs:$src1, IntRegs:$src2),
- (M4_pmpyw IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsathb DoubleRegs:$src1),
- (S2_vsathb DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
- (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_absp DoubleRegs:$src1),
- (A2_absp DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_all8 PredRegs:$src1),
- (C2_all8 PredRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vrminuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (A4_vrminuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sffma_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (F2_sffma_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_vrmpyoh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M4_vrmpyoh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
- (M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_bitsset IntRegs:$src1, IntRegs:$src2),
- (C2_bitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred_timm:$src2),
- (M2_mpysip IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred_timm:$src2),
- (M2_mpysin IntRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+ (C2_tfrpr (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpheq IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (A4_cmpheq IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgt IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (A4_cmphgt IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgtu IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (A4_cmphgtu IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpwgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (A2_vcmpwgt DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vcmpwgtu DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (A2_vcmpwgtu DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2),
+ (C2_tfrpr (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A4_boundscheck IntRegs:$src1, DoubleRegs:$src2),
- (A4_boundscheck IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2),
- (M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+ (C2_tfrpr (A4_boundscheck IntRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_tlbmatch DoubleRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (A4_tlbmatch DoubleRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_tfrpr PredRegs:$src1),
+ (C2_tfrpr (C2_tfrrp PredRegs:$src1))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C2_tfrrp IntRegs:$src1),
+ (C2_tfrpr (C2_tfrrp IntRegs:$src1))>, Requires<[HasV5]>;
def: Pat<(int_hexagon_C4_fastcorner9 PredRegs:$src1, PredRegs:$src2),
- (C4_fastcorner9 PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrcmpys_s1rp DoubleRegs:$src1, IntRegs:$src2),
- (M2_vrcmpys_s1rp DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subsat IntRegs:$src1, IntRegs:$src2),
- (A2_subsat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_r IntRegs:$src1, IntRegs:$src2),
- (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2),
- (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vnavgh DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vnavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+ (C2_tfrpr (C4_fastcorner9 (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2),
+ (C2_tfrpr (C4_fastcorner9_not (C2_tfrrp PredRegs:$src1), (C2_tfrrp PredRegs:$src2)))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_acc_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_acc_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpy_nac_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_ud2df DoubleRegs:$src1),
- (F2_conv_ud2df DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vzxthw IntRegs:$src1),
- (S2_vzxthw IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfadd IntRegs:$src1, IntRegs:$src2),
- (F2_sfadd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_sub IntRegs:$src1, IntRegs:$src2),
- (A2_sub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmac2su_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_vmac2su_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4),
- (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_packhl IntRegs:$src1, IntRegs:$src2),
- (S2_packhl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
- (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_svsubhs IntRegs:$src1, IntRegs:$src2),
- (A2_svsubhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_l16_hl IntRegs:$src1, IntRegs:$src2),
- (A2_addh_l16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_d2df DoubleRegs:$src1),
- (F2_conv_d2df DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2),
- (C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vconj DoubleRegs:$src1),
- (A2_vconj DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2),
- (S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_r_vh DoubleRegs:$src1, IntRegs:$src2),
- (S2_lsr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_l16_hl IntRegs:$src1, IntRegs:$src2),
- (A2_subh_l16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_vxsubaddhr DoubleRegs:$src1, DoubleRegs:$src2),
- (S4_vxsubaddhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_clbp DoubleRegs:$src1),
- (S2_clbp DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_deinterleave DoubleRegs:$src1),
- (S2_deinterleave DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_any8 PredRegs:$src1),
- (C2_any8 PredRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_togglebit_r IntRegs:$src1, IntRegs:$src2),
- (S2_togglebit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_uw2sf IntRegs:$src1),
- (F2_conv_uw2sf IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsathb_nopack DoubleRegs:$src1),
- (S2_vsathb_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_cmacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_cmacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_sat_hh_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_sat_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_sat_hh_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_sat_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmacuhs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmacuhs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmacuhs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmacuhs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_clrbit_r IntRegs:$src1, IntRegs:$src2),
- (S2_clrbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
- (C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2),
- (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vrmaxh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (A4_vrmaxh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vcmpbeq DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vcmpbeq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vcmphgt DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vcmphgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vnavgwcr DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vnavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2),
- (M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred_timm:$src1),
- (A2_tfrsi s32_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_svnavgh IntRegs:$src1, IntRegs:$src2),
- (A2_svnavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2),
- (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_svavgh IntRegs:$src1, IntRegs:$src2),
- (A2_svavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
- (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_combine_hl IntRegs:$src1, IntRegs:$src2),
- (A2_combine_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_up IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_combine_hh IntRegs:$src1, IntRegs:$src2),
- (A2_combine_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_negsat IntRegs:$src1),
- (A2_negsat IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_hl_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_hl_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_bitsplit IntRegs:$src1, IntRegs:$src2),
- (A4_bitsplit IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vabshsat DoubleRegs:$src1),
- (A2_vabshsat DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyui IntRegs:$src1, IntRegs:$src2),
- (M2_mpyui IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_l16_sat_ll IntRegs:$src1, IntRegs:$src2),
- (A2_addh_l16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyul_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyul_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
- (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmachs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmachs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmachs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmachs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrcmpyr_s0c DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vrcmpyr_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_sat_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sffixupn IntRegs:$src1, IntRegs:$src2),
- (F2_sffixupn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vadduhs DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vadduhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vsubuhs DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vsubuhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_h16_hl IntRegs:$src1, IntRegs:$src2),
- (A2_subh_h16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_h16_hh IntRegs:$src1, IntRegs:$src2),
- (A2_subh_h16_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_xorp DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_xorp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_tfrpcp DoubleRegs:$src1),
- (A4_tfrpcp DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_h16_lh IntRegs:$src1, IntRegs:$src2),
- (A2_addh_h16_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_h16_sat_hl IntRegs:$src1, IntRegs:$src2),
- (A2_addh_h16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_h16_ll IntRegs:$src1, IntRegs:$src2),
- (A2_addh_h16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_h16_sat_hh IntRegs:$src1, IntRegs:$src2),
- (A2_addh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_zxtb IntRegs:$src1),
- (A2_zxtb IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_zxth IntRegs:$src1),
- (A2_zxth IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vnavgwr DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vnavgwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_or_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_or_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M5_vmacbsu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M5_vmacbsu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
- (C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_sat DoubleRegs:$src1),
- (A2_sat DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyd_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyd_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addsat IntRegs:$src1, IntRegs:$src2),
- (A2_addsat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_svavghs IntRegs:$src1, IntRegs:$src2),
- (A2_svavghs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2),
- (C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2),
- (A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2),
- (A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmaculs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmaculs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmaculs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmaculs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vradduh DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vradduh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_addp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
- (A4_addp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_xor PredRegs:$src1, PredRegs:$src2),
- (C2_xor PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyh_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyh_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_df2ud_chop DoubleRegs:$src1),
- (F2_conv_df2ud_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_or_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
- (C4_or_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_vxaddsubhr DoubleRegs:$src1, DoubleRegs:$src2),
- (S4_vxaddsubhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsathub DoubleRegs:$src1),
- (S2_vsathub DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_df2sf DoubleRegs:$src1),
- (F2_conv_df2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_hmmpyh_rs1 IntRegs:$src1, IntRegs:$src2),
- (M2_hmmpyh_rs1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_hmmpyh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_hmmpyh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavgwr DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavgwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_sxth IntRegs:$src1),
- (A2_sxth IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_sxtb IntRegs:$src1),
- (A2_sxtb IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_or_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
- (C4_or_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrcmaci_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vrcmaci_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_sxtw IntRegs:$src1),
- (A2_sxtw IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vabsdiffh DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vabsdiffh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_hmmpyl_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_hmmpyl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_cl1p DoubleRegs:$src1),
- (S2_cl1p DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vabsdiffw DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vabsdiffw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_andnp DoubleRegs:$src1, DoubleRegs:$src2),
- (A4_andnp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_parityp DoubleRegs:$src1, DoubleRegs:$src2),
- (S2_parityp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyd_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyu_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfcmpeq IntRegs:$src1, IntRegs:$src2),
- (F2_sfcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vaddb_map DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vcmpheq DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vcmpheq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_clbnorm IntRegs:$src1),
- (S2_clbnorm IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3),
- (S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_acc_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_tstbit_r IntRegs:$src1, IntRegs:$src2),
- (S2_tstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3),
- (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_mpyud_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_hl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_lh_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_ll_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyud_ll_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyud_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2),
+ (M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3),
+ (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3),
+ (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_up IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2),
(M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2),
- (S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_or_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_or_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_hh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyu_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_hh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyu_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_ll_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_ll_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_w2df IntRegs:$src1),
- (F2_conv_w2df IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
- (A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
- (C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vcnegh DoubleRegs:$src1, IntRegs:$src2),
- (S2_vcnegh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
- (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpy_up_s1_sat IntRegs:$src1, IntRegs:$src2),
+ (M2_mpy_up_s1_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyu_up IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpysu_up IntRegs:$src1, IntRegs:$src2),
+ (M2_mpysu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_nac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_nac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyi IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyi IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mpyui IntRegs:$src1, IntRegs:$src2),
+ (M2_mpyui IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3),
+ (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3),
+ (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3),
+ (M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2s_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2s_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2s_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2s_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2su_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2su_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2),
+ (M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2es_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vmpy2es_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmpy2es_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vmpy2es_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vmac2es DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vmac2es DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrmac_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrmac_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_xor_xacc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M4_xor_xacc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vdmpys_s1 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vdmpys_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2),
+ (M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
+ (M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vrmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M5_vrmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmpybuu IntRegs:$src1, IntRegs:$src2),
+ (M5_vmpybuu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmpybsu IntRegs:$src1, IntRegs:$src2),
+ (M5_vmpybsu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vmacbsu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M5_vmacbsu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
+ (M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M5_vdmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M5_vdmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmacs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vdmacs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmacs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vdmacs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_vdmpys_s0 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_vdmpys_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavgubr DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavgubr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_hl_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyu_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_hl_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyu_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_cl0p DoubleRegs:$src1),
- (S2_cl0p DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3),
- (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sffixupd IntRegs:$src1, IntRegs:$src2),
- (F2_sffixupd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vdmpys_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vdmpys_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_cmacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_cmacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_cmacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_cmacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_ct1 IntRegs:$src1),
- (S2_ct1 IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_ct0 IntRegs:$src1),
- (S2_ct0 IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpys_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpys_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpys_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpys_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpysc_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpysc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpysc_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpysc_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2),
+ (M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpys_s1rp DoubleRegs:$src1, IntRegs:$src2),
+ (M2_vrcmpys_s1rp DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmachs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmachs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyh_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyeh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyeh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyoh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyl_rs1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyl_rs1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyh_rs1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyh_rs1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyl_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_hmmpyh_s1 IntRegs:$src1, IntRegs:$src2),
+ (M2_hmmpyh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyul_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyul_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyuh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyuh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmaculs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmaculs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyul_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyul_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sffixupr IntRegs:$src1),
- (F2_sffixupr IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vcmphgtu DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vcmphgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_andn PredRegs:$src1, PredRegs:$src2),
- (C2_andn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2),
- (M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
- (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
- (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
(M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_max IntRegs:$src1, IntRegs:$src2),
- (A2_max IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_round_rr IntRegs:$src1, IntRegs:$src2),
- (A4_round_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_combineii s8_0ImmPred_timm:$src1, u32_0ImmPred_timm:$src2),
- (A4_combineii s8_0ImmPred_timm:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2),
- (A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
- (C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_rcmpeq IntRegs:$src1, IntRegs:$src2),
- (A4_rcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2),
- (M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vzxtbh IntRegs:$src1),
- (S2_vzxtbh IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_r_sat IntRegs:$src1, IntRegs:$src2),
- (S2_asr_r_r_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_combinew IntRegs:$src1, IntRegs:$src2),
- (A2_combinew IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmaci_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmaci_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmacr_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmacr_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmaci_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmaci_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyr_s0 DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyr_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vrcmpyr_s0c DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vrcmpyr_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2),
(M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_nbitsset IntRegs:$src1, IntRegs:$src2),
- (C4_nbitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_ll_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyu_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_ll_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyu_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_l16_ll IntRegs:$src1, IntRegs:$src2),
- (A2_addh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_modwrapu IntRegs:$src1, IntRegs:$src2),
- (A4_modwrapu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_rcmpneq IntRegs:$src1, IntRegs:$src2),
- (A4_rcmpneq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred_timm:$src1),
- (F2_sfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred_timm:$src1),
- (F2_sfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2),
+ (M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2),
+ (M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2),
(M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavgub DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavgub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_d2sf DoubleRegs:$src1),
- (F2_conv_d2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2),
- (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfcmpuo IntRegs:$src1, IntRegs:$src2),
- (F2_sfcmpuo IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsatwh_nopack DoubleRegs:$src1),
- (S2_vsatwh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_hh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyi_whc DoubleRegs:$src1, IntRegs:$src2),
+ (M4_cmpyi_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2),
+ (M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmac_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vcmac_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vcmac_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M2_vcmac_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vcrotate DoubleRegs:$src1, IntRegs:$src2),
+ (S2_vcrotate DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4),
+ (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3),
+ (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vcnegh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_vcnegh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_pmpyw IntRegs:$src1, IntRegs:$src2),
+ (M4_pmpyw IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vpmpyh IntRegs:$src1, IntRegs:$src2),
+ (M4_vpmpyh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_pmpyw_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_pmpyw_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_add IntRegs:$src1, IntRegs:$src2),
+ (A2_add IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sub IntRegs:$src1, IntRegs:$src2),
+ (A2_sub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addsat IntRegs:$src1, IntRegs:$src2),
+ (A2_addsat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subsat IntRegs:$src1, IntRegs:$src2),
+ (A2_subsat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addh_h16_sat_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_addh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_aslh IntRegs:$src1),
+ (A2_aslh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_asrh IntRegs:$src1),
+ (A2_asrh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_addp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_addsp IntRegs:$src1, DoubleRegs:$src2),
+ (A2_addsp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_subp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_negsat IntRegs:$src1),
+ (A2_negsat IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_abs IntRegs:$src1),
+ (A2_abs IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_abssat IntRegs:$src1),
+ (A2_abssat IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vconj DoubleRegs:$src1),
+ (A2_vconj DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_negp DoubleRegs:$src1),
+ (A2_negp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_absp DoubleRegs:$src1),
+ (A2_absp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_max IntRegs:$src1, IntRegs:$src2),
+ (A2_max IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxu IntRegs:$src1, IntRegs:$src2),
+ (A2_maxu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_min IntRegs:$src1, IntRegs:$src2),
+ (A2_min IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_minu IntRegs:$src1, IntRegs:$src2),
(A2_minu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_lh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_maxp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_maxup DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_maxup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_minp DoubleRegs:$src1, DoubleRegs:$src2),
(A2_minp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_minup DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_minup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfr IntRegs:$src1),
+ (A2_tfr IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred_timm:$src1),
+ (A2_tfrsi s32_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrp DoubleRegs:$src1),
+ (A2_tfrp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_zxtb IntRegs:$src1),
+ (A2_zxtb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxtb IntRegs:$src1),
+ (A2_sxtb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_zxth IntRegs:$src1),
+ (A2_zxth IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxth IntRegs:$src1),
+ (A2_sxth IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combinew IntRegs:$src1, IntRegs:$src2),
+ (A2_combinew IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2),
+ (A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2),
+ (A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_hh IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_hl IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_lh IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_combine_ll IntRegs:$src1, IntRegs:$src2),
+ (A2_combine_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2),
+ (A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2),
+ (A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_and IntRegs:$src1, IntRegs:$src2),
+ (A2_and IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_or IntRegs:$src1, IntRegs:$src2),
+ (A2_or IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_xor IntRegs:$src1, IntRegs:$src2),
+ (A2_xor IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_xacc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M4_xor_xacc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_andn IntRegs:$src1, IntRegs:$src2),
+ (A4_andn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_orn IntRegs:$src1, IntRegs:$src2),
+ (A4_orn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_andnp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A4_andnp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_ornp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A4_ornp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3),
+ (S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_or_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_or_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
- (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyuh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyuh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyuh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyuh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfcmpge IntRegs:$src1, IntRegs:$src2),
- (F2_sfcmpge IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfmin IntRegs:$src1, IntRegs:$src2),
- (F2_sfmin IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfcmpgt IntRegs:$src1, IntRegs:$src2),
- (F2_sfcmpgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_vpmpyh IntRegs:$src1, IntRegs:$src2),
- (M4_vpmpyh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmacuhs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmacuhs_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+ (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
+ (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_xor_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2),
+ (A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2),
+ (A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_andp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_andp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_orp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_orp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_xorp DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_xorp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_notp DoubleRegs:$src1),
+ (A2_notp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sxtw IntRegs:$src1),
+ (A2_sxtw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_sat DoubleRegs:$src1),
+ (A2_sat DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_roundsat DoubleRegs:$src1),
(A2_roundsat DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_ct1p DoubleRegs:$src1),
- (S2_ct1p DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_extract_rp IntRegs:$src1, DoubleRegs:$src2),
- (S4_extract_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2),
- (C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_tfrcpp CtrRegs64:$src1),
- (A4_tfrcpp CtrRegs64:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2),
- (A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmphgtu IntRegs:$src1, IntRegs:$src2),
- (A4_cmphgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2),
- (S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vsubws DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vsubws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_sath IntRegs:$src1),
(A2_sath IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_satuh IntRegs:$src1),
+ (A2_satuh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_satub IntRegs:$src1),
+ (A2_satub IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_satb IntRegs:$src1),
(A2_satb IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4),
- (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2),
- (S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddb_map DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vadduhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vadduhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2),
(S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S4_vxaddsubh DoubleRegs:$src1, DoubleRegs:$src2),
(S4_vxaddsubh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_asrh IntRegs:$src1),
- (A2_asrh IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_extractp_rp DoubleRegs:$src1, DoubleRegs:$src2),
- (S4_extractp_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_or PredRegs:$src1, PredRegs:$src2),
- (C2_or PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyul_s1 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyul_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrcmacr_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vrcmacr_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_xor IntRegs:$src1, IntRegs:$src2),
- (A2_xor IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_add IntRegs:$src1, IntRegs:$src2),
- (A2_add IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vsububs DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vsububs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmpy2s_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_vmpy2s_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmpy2s_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_vmpy2s_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vraddub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (A2_vraddub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfinvsqrta IntRegs:$src1),
- (F2_sfinvsqrta IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_ct0p DoubleRegs:$src1),
- (S2_ct0p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxaddsubhr DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxaddsubhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_vxsubaddhr DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_vxsubaddhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svavgh IntRegs:$src1, IntRegs:$src2),
+ (A2_svavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svavghs IntRegs:$src1, IntRegs:$src2),
+ (A2_svavghs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svnavgh IntRegs:$src1, IntRegs:$src2),
+ (A2_svnavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_svaddh IntRegs:$src1, IntRegs:$src2),
(A2_svaddh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vcrotate DoubleRegs:$src1, IntRegs:$src2),
- (S2_vcrotate DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_aslh IntRegs:$src1),
- (A2_aslh IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_h16_lh IntRegs:$src1, IntRegs:$src2),
- (A2_subh_h16_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_h16_ll IntRegs:$src1, IntRegs:$src2),
- (A2_subh_h16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_hmmpyl_rs1 IntRegs:$src1, IntRegs:$src2),
- (M2_hmmpyl_rs1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2),
- (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsplatrh IntRegs:$src1),
- (S2_vsplatrh IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_r IntRegs:$src1, IntRegs:$src2),
- (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_h16_hl IntRegs:$src1, IntRegs:$src2),
- (A2_addh_h16_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsplatrb IntRegs:$src1),
- (S2_vsplatrb IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_h16_hh IntRegs:$src1, IntRegs:$src2),
- (A2_addh_h16_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3),
- (C2_muxri PredRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_pxfer_map PredRegs:$src1),
- (C2_pxfer_map PredRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2),
- (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3),
- (M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3),
- (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_tfrcrr CtrRegs:$src1),
- (A2_tfrcrr CtrRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3),
- (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_orn PredRegs:$src1, PredRegs:$src2),
- (C2_orn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfmpy IntRegs:$src1, IntRegs:$src2),
- (F2_sfmpy IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_vw DoubleRegs:$src1, IntRegs:$src2),
- (S2_asr_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_and_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_and_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2),
- (S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_mask PredRegs:$src1),
- (C2_mask PredRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_up_s1_sat IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_up_s1_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpbgt DoubleRegs:$src1, DoubleRegs:$src2),
- (A4_vcmpbgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M5_vrmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M5_vrmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svaddhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svaddhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svadduhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svadduhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubh IntRegs:$src1, IntRegs:$src2),
+ (A2_svsubh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svsubhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_svsubuhs IntRegs:$src1, IntRegs:$src2),
+ (A2_svsubuhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vraddub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vraddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vraddub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (A2_vraddub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vradduh DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vradduh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubb_map DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsububs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsububs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubuhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubuhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vsubws DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vsubws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabsh DoubleRegs:$src1),
+ (A2_vabsh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabshsat DoubleRegs:$src1),
+ (A2_vabshsat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabsw DoubleRegs:$src1),
+ (A2_vabsw DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vabswsat DoubleRegs:$src1),
+ (A2_vabswsat DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vabsdiffw DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vabsdiffw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_M2_vabsdiffh DoubleRegs:$src1, DoubleRegs:$src2),
+ (M2_vabsdiffh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vrsadub DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vrsadub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_tfrrcr IntRegs:$src1),
- (A2_tfrrcr IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2),
- (F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
- (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2),
- (A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgwr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgwr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavgwcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavghcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavgubr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavgubr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavguhr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavguhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_rr IntRegs:$src1, IntRegs:$src2),
+ (A4_round_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_round_rr_sat IntRegs:$src1, IntRegs:$src2),
+ (A4_round_rr_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_cround_rr IntRegs:$src1, IntRegs:$src2),
+ (A4_cround_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminb DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vmaxb DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vmaxb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2),
(A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsxthw IntRegs:$src1),
- (S2_vsxthw IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_cmpgt IntRegs:$src1, IntRegs:$src2),
- (C2_cmpgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_df2d_chop DoubleRegs:$src1),
- (F2_conv_df2d_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_sf2w IntRegs:$src1),
- (F2_conv_sf2w IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vminuw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vminuw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_vmaxuw DoubleRegs:$src1, DoubleRegs:$src2),
+ (A2_vmaxuw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_modwrapu IntRegs:$src1, IntRegs:$src2),
+ (A4_modwrapu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfadd IntRegs:$src1, IntRegs:$src2),
+ (F2_sfadd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfsub IntRegs:$src1, IntRegs:$src2),
+ (F2_sfsub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmpy IntRegs:$src1, IntRegs:$src2),
+ (F2_sfmpy IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4),
+ (F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, (C2_tfrrp PredRegs:$src4))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffma_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffma_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpeq IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (F2_sfcmpeq IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpgt IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (F2_sfcmpgt IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpge IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (F2_sfcmpge IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfcmpuo IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (F2_sfcmpuo IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmax IntRegs:$src1, IntRegs:$src2),
+ (F2_sfmax IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfmin IntRegs:$src1, IntRegs:$src2),
+ (F2_sfmin IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (F2_sfclass IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3),
- (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
- (M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2),
- (A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addp DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_addp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2),
- (M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_clbpnorm DoubleRegs:$src1),
- (S4_clbpnorm DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_round_rr_sat IntRegs:$src1, IntRegs:$src2),
- (A4_round_rr_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2),
- (S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+ (C2_tfrpr (F2_sfclass IntRegs:$src1, u5_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred_timm:$src1),
+ (F2_sfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred_timm:$src1),
+ (F2_sfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupn IntRegs:$src1, IntRegs:$src2),
+ (F2_sffixupn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupd IntRegs:$src1, IntRegs:$src2),
+ (F2_sffixupd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_sffixupr IntRegs:$src1),
+ (F2_sffixupr IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpeq DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (F2_dfcmpeq DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpgt DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (F2_dfcmpgt DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfcmpuo DoubleRegs:$src1, DoubleRegs:$src2),
+ (C2_tfrpr (F2_dfcmpuo DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
+ (C2_tfrpr (F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred_timm:$src1),
+ (F2_dfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred_timm:$src1),
+ (F2_dfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2df IntRegs:$src1),
+ (F2_conv_sf2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2sf DoubleRegs:$src1),
+ (F2_conv_df2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_uw2sf IntRegs:$src1),
+ (F2_conv_uw2sf IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_uw2df IntRegs:$src1),
+ (F2_conv_uw2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_w2sf IntRegs:$src1),
+ (F2_conv_w2sf IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_w2df IntRegs:$src1),
+ (F2_conv_w2df IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_ud2sf DoubleRegs:$src1),
+ (F2_conv_ud2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_ud2df DoubleRegs:$src1),
+ (F2_conv_ud2df DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_d2sf DoubleRegs:$src1),
+ (F2_conv_d2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_d2df DoubleRegs:$src1),
+ (F2_conv_d2df DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_sf2uw IntRegs:$src1),
(F2_conv_sf2uw IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vsubh DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vsubh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2w IntRegs:$src1),
+ (F2_conv_sf2w IntRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_sf2ud IntRegs:$src1),
(F2_conv_sf2ud IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vsubw DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vsubw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vcmpwgt DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vcmpwgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_xor_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_xor_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2d IntRegs:$src1),
+ (F2_conv_sf2d IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2uw DoubleRegs:$src1),
+ (F2_conv_df2uw DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2w DoubleRegs:$src1),
+ (F2_conv_df2w DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2ud DoubleRegs:$src1),
+ (F2_conv_df2ud DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2d DoubleRegs:$src1),
+ (F2_conv_df2d DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_F2_conv_sf2uw_chop IntRegs:$src1),
(F2_conv_sf2uw_chop IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_vw DoubleRegs:$src1, IntRegs:$src2),
- (S2_asl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsatwuh_nopack DoubleRegs:$src1),
- (S2_vsatwuh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_vh DoubleRegs:$src1, IntRegs:$src2),
- (S2_asl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_svsubuhs IntRegs:$src1, IntRegs:$src2),
- (A2_svsubuhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M5_vmpybsu IntRegs:$src1, IntRegs:$src2),
- (M5_vmpybsu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_l16_sat_ll IntRegs:$src1, IntRegs:$src2),
- (A2_subh_l16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_and_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
- (C4_and_and PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2),
- (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2w_chop IntRegs:$src1),
+ (F2_conv_sf2w_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2ud_chop IntRegs:$src1),
+ (F2_conv_sf2ud_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_sf2d_chop IntRegs:$src1),
+ (F2_conv_sf2d_chop IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2uw_chop DoubleRegs:$src1),
+ (F2_conv_df2uw_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2w_chop DoubleRegs:$src1),
+ (F2_conv_df2w_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2ud_chop DoubleRegs:$src1),
+ (F2_conv_df2ud_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_F2_conv_df2d_chop DoubleRegs:$src1),
+ (F2_conv_df2d_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r IntRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r IntRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsr_r_r IntRegs:$src1, IntRegs:$src2),
(S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_subp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
- (A4_subp_c DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vsubhs DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vsubhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_vitpack PredRegs:$src1, PredRegs:$src2),
- (C2_vitpack PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavguhr DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavguhr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsplicerb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
- (S2_vsplicerb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_nbitsclr IntRegs:$src1, IntRegs:$src2),
- (C4_nbitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vcmpbgtu DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vcmpbgtu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmpys_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_cmpys_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmpys_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_cmpys_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_dfcmpuo DoubleRegs:$src1, DoubleRegs:$src2),
- (F2_dfcmpuo DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_shuffob DoubleRegs:$src1, DoubleRegs:$src2),
- (S2_shuffob DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_and PredRegs:$src1, PredRegs:$src2),
- (C2_and PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S5_popcountp DoubleRegs:$src1),
- (S5_popcountp DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3),
- (S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_cl0 IntRegs:$src1),
- (S2_cl0 IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2),
- (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_cmpneq IntRegs:$src1, IntRegs:$src2),
- (C4_cmpneq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmac2es DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vmac2es DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vdmacs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vdmacs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vdmacs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vdmacs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_ll_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyud_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_ll_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyud_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_clb IntRegs:$src1),
- (S2_clb IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_hh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyud_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrmac_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vrmac_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_lh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_r_sat IntRegs:$src1, IntRegs:$src2),
- (S2_asl_r_r_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_sf2d IntRegs:$src1),
- (F2_conv_sf2d IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r IntRegs:$src1, IntRegs:$src2),
+ (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred_timm:$src1),
- (F2_dfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmphgt IntRegs:$src1, IntRegs:$src2),
- (A4_cmphgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred_timm:$src1),
- (F2_dfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3),
- (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3),
- (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cnacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_cnacs_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_maxu IntRegs:$src1, IntRegs:$src2),
- (A2_maxu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_maxp DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_maxp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2),
- (A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfrecipa IntRegs:$src1, IntRegs:$src2),
- (F2_sfrecipa IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2),
- (A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_orn IntRegs:$src1, IntRegs:$src2),
- (A4_orn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2),
- (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2),
- (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsl_r_r IntRegs:$src1, IntRegs:$src2),
- (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2),
- (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_or IntRegs:$src1, IntRegs:$src2),
- (A2_or IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_dfcmpeq DoubleRegs:$src1, DoubleRegs:$src2),
- (F2_dfcmpeq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_cmpeq IntRegs:$src1, IntRegs:$src2),
- (C2_cmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_tfrp DoubleRegs:$src1),
- (A2_tfrp DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_and_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
- (C4_and_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsathub_nopack DoubleRegs:$src1),
- (S2_vsathub_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_satuh IntRegs:$src1),
- (A2_satuh IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_satub IntRegs:$src1),
- (A2_satub IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2),
- (M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
- (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2),
- (C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2),
- (A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2),
- (A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3),
- (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vtrunehb DoubleRegs:$src1),
- (S2_vtrunehb DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vabsw DoubleRegs:$src1),
- (A2_vabsw DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vabsh DoubleRegs:$src1),
- (A2_vabsh DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sfsub IntRegs:$src1, IntRegs:$src2),
- (F2_sfsub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3),
- (C2_muxii PredRegs:$src1, s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
- (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_swiz IntRegs:$src1),
- (A2_swiz IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vraddub DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vraddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_tlbmatch DoubleRegs:$src1, IntRegs:$src2),
- (A4_tlbmatch DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_df2w_chop DoubleRegs:$src1),
- (F2_conv_df2w_chop DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_and IntRegs:$src1, IntRegs:$src2),
- (A2_and IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
(S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3),
- (S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_r_sat IntRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_r_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_r_sat IntRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_r_sat IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
+ (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
+ (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
+ (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
(S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_ud2sf DoubleRegs:$src1),
- (F2_conv_ud2sf DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_tfr IntRegs:$src1),
- (A2_tfr IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2),
- (A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M5_vmpybuu IntRegs:$src1, IntRegs:$src2),
- (M5_vmpybuu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_brev IntRegs:$src1),
- (S2_brev IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
- (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_hl_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyud_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3),
- (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vrndpackwhs DoubleRegs:$src1),
- (S2_vrndpackwhs DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2),
- (S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_ll_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4),
- (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_uw2df IntRegs:$src1),
- (F2_conv_uw2df IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2),
- (A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_andp DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_andp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2),
- (S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_min IntRegs:$src1, IntRegs:$src2),
- (A2_min IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2),
- (M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyu_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyu_acc_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_svw_trun DoubleRegs:$src1, IntRegs:$src2),
- (S2_asr_r_svw_trun DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_sf2df IntRegs:$src1),
- (F2_conv_sf2df IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vtrunohb DoubleRegs:$src1),
- (S2_vtrunohb DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_sf2d_chop IntRegs:$src1),
- (F2_conv_sf2d_chop IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_df2w DoubleRegs:$src1),
- (F2_conv_df2w DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
- (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
(S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_df2d DoubleRegs:$src1),
- (F2_conv_df2d DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmaculs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmaculs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_svadduhs IntRegs:$src1, IntRegs:$src2),
- (A2_svadduhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_sf2w_chop IntRegs:$src1),
- (F2_conv_sf2w_chop IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_svsathub IntRegs:$src1),
- (S2_svsathub IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_rnd_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_setbit_r IntRegs:$src1, IntRegs:$src2),
- (S2_setbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4),
- (F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
- (F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_df2ud DoubleRegs:$src1),
- (F2_conv_df2ud DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_df2uw DoubleRegs:$src1),
- (F2_conv_df2uw DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2),
- (C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_cmplteu IntRegs:$src1, IntRegs:$src2),
- (C4_cmplteu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vsubb_map DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2),
- (A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred_timm:$src2),
(S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyd_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_minup DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_minup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_valignrb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
- (S2_valignrb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmpyl_rs0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_mmpyl_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrcmaci_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vrcmaci_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vaddub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_combine_lh IntRegs:$src1, IntRegs:$src2),
- (A2_combine_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M5_vdmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M5_vdmacbsu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_combine_ll IntRegs:$src1, IntRegs:$src2),
- (A2_combine_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
(S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_svaddhs IntRegs:$src1, IntRegs:$src2),
- (A2_svaddhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vminw DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vminw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vminh DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vminh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vrcmpyr_s0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vrcmpyr_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vminb DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vminb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vcmac_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vcmac_s0_sat_i DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_lh_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyud_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S4_lsli s6_0ImmPred_timm:$src1, IntRegs:$src2),
(S4_lsli s6_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3),
+ (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[UseCompound, HasV5]>;
+def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3),
+ (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_valignrb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ (S2_valignrb DoubleRegs:$src1, DoubleRegs:$src2, (C2_tfrrp PredRegs:$src3))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3),
+ (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplicerb DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ (S2_vsplicerb DoubleRegs:$src1, DoubleRegs:$src2, (C2_tfrrp PredRegs:$src3))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplatrh IntRegs:$src1),
+ (S2_vsplatrh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsplatrb IntRegs:$src1),
+ (S2_vsplatrb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4),
+ (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A4_bitsplit IntRegs:$src1, IntRegs:$src2),
+ (A4_bitsplit IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3),
+ (S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3),
+ (S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4),
+ (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3),
+ (S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3),
+ (S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3),
+ (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extract_rp IntRegs:$src1, DoubleRegs:$src2),
+ (S4_extract_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2),
+ (S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_extractp_rp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S4_extractp_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (C2_tfrpr (S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (C2_tfrpr (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_tstbit_r IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (S2_tstbit_r IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_ntstbit_r IntRegs:$src1, IntRegs:$src2),
+ (C2_tfrpr (S4_ntstbit_r IntRegs:$src1, IntRegs:$src2))>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_setbit_r IntRegs:$src1, IntRegs:$src2),
+ (S2_setbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_togglebit_r IntRegs:$src1, IntRegs:$src2),
+ (S2_togglebit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clrbit_r IntRegs:$src1, IntRegs:$src2),
+ (S2_clrbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
+ (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
+ (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
+ (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
+ (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsr_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_svw_trun DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_svw_trun DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asr_r_vw DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asr_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_asl_r_vw DoubleRegs:$src1, IntRegs:$src2),
+ (S2_asl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2),
+ (S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2),
(S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_vrmpyeh_s0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M4_vrmpyeh_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_vrmpyeh_s1 DoubleRegs:$src1, DoubleRegs:$src2),
- (M4_vrmpyeh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_tfrrp IntRegs:$src1),
- (C2_tfrrp IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrndpackwh DoubleRegs:$src1),
+ (S2_vrndpackwh DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vrndpackwhs DoubleRegs:$src1),
+ (S2_vrndpackwhs DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsxtbh IntRegs:$src1),
+ (S2_vsxtbh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vzxtbh IntRegs:$src1),
+ (S2_vzxtbh IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathub DoubleRegs:$src1),
+ (S2_vsathub DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_svsathub IntRegs:$src1),
+ (S2_svsathub IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_svsathb IntRegs:$src1),
+ (S2_svsathb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathb DoubleRegs:$src1),
+ (S2_vsathb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunohb DoubleRegs:$src1),
+ (S2_vtrunohb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vtrunowh DoubleRegs:$src1, DoubleRegs:$src2),
(S2_vtrunowh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_abs IntRegs:$src1),
- (A2_abs IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmpbeq IntRegs:$src1, IntRegs:$src2),
- (A4_cmpbeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_negp DoubleRegs:$src1),
- (A2_negp DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2),
- (A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vtrunehb DoubleRegs:$src1),
+ (S2_vtrunehb DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsxthw IntRegs:$src1),
+ (S2_vsxthw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vzxthw IntRegs:$src1),
+ (S2_vzxthw IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwh DoubleRegs:$src1),
+ (S2_vsatwh DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_vsatwuh DoubleRegs:$src1),
(S2_vsatwuh DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_dfcmpgt DoubleRegs:$src1, DoubleRegs:$src2),
- (F2_dfcmpgt DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_svsathb IntRegs:$src1),
- (S2_svsathb IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2),
- (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2),
- (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cround_rr IntRegs:$src1, IntRegs:$src2),
- (A4_cround_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_packhl IntRegs:$src1, IntRegs:$src2),
+ (S2_packhl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_A2_swiz IntRegs:$src1),
+ (A2_swiz IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathub_nopack DoubleRegs:$src1),
+ (S2_vsathub_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsathb_nopack DoubleRegs:$src1),
+ (S2_vsathb_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwh_nopack DoubleRegs:$src1),
+ (S2_vsatwh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_vsatwuh_nopack DoubleRegs:$src1),
+ (S2_vsatwuh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffob DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_shuffob DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S2_shuffeb DoubleRegs:$src1, DoubleRegs:$src2),
(S2_shuffeb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vminuw DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vminuw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vaddhs DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vaddhs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3),
- (S2_insert_rp IntRegs:$src1, IntRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vminub DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vminub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3),
- (S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_svsubh IntRegs:$src1, IntRegs:$src2),
- (A2_svsubh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2),
- (S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vsxtbh IntRegs:$src1),
- (S2_vsxtbh IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_nac_ll_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_nac_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subp DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_subp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmpy2es_s1 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vmpy2es_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmpy2es_s0 DoubleRegs:$src1, DoubleRegs:$src2),
- (M2_vmpy2es_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffoh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_shuffoh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S5_popcountp DoubleRegs:$src1),
+ (S5_popcountp DoubleRegs:$src1)>, Requires<[HasV5]>;
def: Pat<(int_hexagon_S4_parity IntRegs:$src1, IntRegs:$src2),
(S4_parity IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2),
- (A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_F2_conv_sf2ud_chop IntRegs:$src1),
- (F2_conv_sf2ud_chop IntRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_acc_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_acc_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
- (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_h16_sat_lh IntRegs:$src1, IntRegs:$src2),
- (A2_addh_h16_sat_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_addh_h16_sat_ll IntRegs:$src1, IntRegs:$src2),
- (A2_addh_h16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M4_nac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M4_nac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vavghcr DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmacls_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmacls_rs0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_andn IntRegs:$src1, IntRegs:$src2),
- (A4_andn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2),
- (M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_vrndpackwh DoubleRegs:$src1),
- (S2_vrndpackwh DoubleRegs:$src1)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vcmac_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (M2_vcmac_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_vmaxuw DoubleRegs:$src1, DoubleRegs:$src2),
- (A2_vmaxuw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C2_bitsclr IntRegs:$src1, IntRegs:$src2),
- (C2_bitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2),
- (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A4_ornp DoubleRegs:$src1, DoubleRegs:$src2),
- (A4_ornp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
- (C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_sat_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_mpy_nac_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- (M2_mpy_nac_sat_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_h16_sat_ll IntRegs:$src1, IntRegs:$src2),
- (A2_subh_h16_sat_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_A2_subh_h16_sat_lh IntRegs:$src1, IntRegs:$src2),
- (A2_subh_h16_sat_lh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2),
- (M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2),
- (M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2),
- (C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2),
- (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
-def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>;
-
-// V55 Scalar Instructions.
-
-def: Pat<(int_hexagon_A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
- (A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV55]>;
+def: Pat<(int_hexagon_S2_parityp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_parityp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clbnorm IntRegs:$src1),
+ (S2_clbnorm IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2),
+ (S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbpnorm DoubleRegs:$src1),
+ (S4_clbpnorm DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2),
+ (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clb IntRegs:$src1),
+ (S2_clb IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl0 IntRegs:$src1),
+ (S2_cl0 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl1 IntRegs:$src1),
+ (S2_cl1 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_clbp DoubleRegs:$src1),
+ (S2_clbp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl0p DoubleRegs:$src1),
+ (S2_cl0p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_cl1p DoubleRegs:$src1),
+ (S2_cl1p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_brev IntRegs:$src1),
+ (S2_brev IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_brevp DoubleRegs:$src1),
+ (S2_brevp DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct0 IntRegs:$src1),
+ (S2_ct0 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct1 IntRegs:$src1),
+ (S2_ct1 IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct0p DoubleRegs:$src1),
+ (S2_ct0p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_ct1p DoubleRegs:$src1),
+ (S2_ct1p DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_interleave DoubleRegs:$src1),
+ (S2_interleave DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_S2_deinterleave DoubleRegs:$src1),
+ (S2_deinterleave DoubleRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_Y2_dczeroa IntRegs:$src1),
+ (Y2_dczeroa IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_Y2_dccleana IntRegs:$src1),
+ (Y2_dccleana IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_Y2_dccleaninva IntRegs:$src1),
+ (Y2_dccleaninva IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_Y2_dcinva IntRegs:$src1),
+ (Y2_dcinva IntRegs:$src1)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_Y4_l2fetch IntRegs:$src1, IntRegs:$src2),
+ (Y4_l2fetch IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>;
+def: Pat<(int_hexagon_Y5_l2fetch IntRegs:$src1, DoubleRegs:$src2),
+ (Y5_l2fetch IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>;
// V60 Scalar Instructions.
-def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV60]>;
def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
(S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
- (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
(S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
- (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
-def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2),
- (S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV60]>;
def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
(S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3),
+ (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
+def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
+ (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3),
(S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>;
// V62 Scalar Instructions.
-def: Pat<(int_hexagon_S6_vtrunehb_ppp DoubleRegs:$src1, DoubleRegs:$src2),
- (S6_vtrunehb_ppp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
-def: Pat<(int_hexagon_V6_ldntnt0 IntRegs:$src1),
- (V6_ldntnt0 IntRegs:$src1)>, Requires<[HasV62]>;
-def: Pat<(int_hexagon_M6_vabsdiffub DoubleRegs:$src1, DoubleRegs:$src2),
- (M6_vabsdiffub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
-def: Pat<(int_hexagon_S6_vtrunohb_ppp DoubleRegs:$src1, DoubleRegs:$src2),
- (S6_vtrunohb_ppp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
def: Pat<(int_hexagon_M6_vabsdiffb DoubleRegs:$src1, DoubleRegs:$src2),
(M6_vabsdiffb DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
-def: Pat<(int_hexagon_A6_vminub_RdP DoubleRegs:$src1, DoubleRegs:$src2),
- (A6_vminub_RdP DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_M6_vabsdiffub DoubleRegs:$src1, DoubleRegs:$src2),
+ (M6_vabsdiffub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
def: Pat<(int_hexagon_S6_vsplatrbp IntRegs:$src1),
(S6_vsplatrbp IntRegs:$src1)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_S6_vtrunehb_ppp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S6_vtrunehb_ppp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
+def: Pat<(int_hexagon_S6_vtrunohb_ppp DoubleRegs:$src1, DoubleRegs:$src2),
+ (S6_vtrunohb_ppp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV62]>;
// V65 Scalar Instructions.
def: Pat<(int_hexagon_A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2),
- (A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV65]>;
+ (C2_tfrpr (A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2))>, Requires<[HasV65]>;
// V66 Scalar Instructions.
-def: Pat<(int_hexagon_F2_dfsub DoubleRegs:$src1, DoubleRegs:$src2),
- (F2_dfsub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>;
-def: Pat<(int_hexagon_F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2),
- (F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>;
def: Pat<(int_hexagon_M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
(M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>;
+def: Pat<(int_hexagon_F2_dfsub DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfsub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>;
def: Pat<(int_hexagon_S2_mask u5_0ImmPred_timm:$src1, u5_0ImmPred_timm:$src2),
(S2_mask u5_0ImmPred_timm:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV66]>;
+// V67 Scalar Instructions.
+
+def: Pat<(int_hexagon_M7_dcmpyrw DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_dcmpyrw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_dcmpyrw_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M7_dcmpyrw_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_dcmpyrwc DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_dcmpyrwc DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_dcmpyrwc_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M7_dcmpyrwc_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_dcmpyiw DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_dcmpyiw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_dcmpyiw_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M7_dcmpyiw_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_dcmpyiwc DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_dcmpyiwc DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_dcmpyiwc_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M7_dcmpyiwc_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_vdmpy DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_dcmpyrwc DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_vdmpy_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (M7_dcmpyrwc_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_wcmpyrw DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_wcmpyrw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_wcmpyrwc DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_wcmpyrwc DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_wcmpyiw DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_wcmpyiw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_wcmpyiwc DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_wcmpyiwc DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_wcmpyrw_rnd DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_wcmpyrw_rnd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_wcmpyrwc_rnd DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_wcmpyrwc_rnd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_wcmpyiw_rnd DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_wcmpyiw_rnd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_M7_wcmpyiwc_rnd DoubleRegs:$src1, DoubleRegs:$src2),
+ (M7_wcmpyiwc_rnd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_A7_croundd_ri DoubleRegs:$src1, u6_0ImmPred_timm:$src2),
+ (A7_croundd_ri DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_A7_croundd_rr DoubleRegs:$src1, IntRegs:$src2),
+ (A7_croundd_rr DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_A7_clip IntRegs:$src1, u5_0ImmPred_timm:$src2),
+ (A7_clip IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_A7_vclip DoubleRegs:$src1, u5_0ImmPred_timm:$src2),
+ (A7_vclip DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_F2_dfmax DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfmax DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_F2_dfmin DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfmin DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_F2_dfmpyfix DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfmpyfix DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_F2_dfmpyll DoubleRegs:$src1, DoubleRegs:$src2),
+ (F2_dfmpyll DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_F2_dfmpylh DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (F2_dfmpylh DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV67]>;
+def: Pat<(int_hexagon_F2_dfmpyhh DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (F2_dfmpyhh DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV67]>;
+
// V60 HVX Instructions.
-def: Pat<(int_hexagon_V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_veqb_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vminub HvxVR:$src1, HvxVR:$src2),
- (V6_vminub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vminub_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vminub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaslw_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyhvsrs_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsathub HvxVR:$src1, HvxVR:$src2),
- (V6_vsathub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsathub_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsathub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddh_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddh_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
- (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
- (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vshufoh HvxVR:$src1, HvxVR:$src2),
- (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vshufoh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrwv HvxVR:$src1, HvxVR:$src2),
- (V6_vasrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrwv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vasrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2),
- (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsuisat_128B HvxWR:$src1, IntRegs:$src2),
- (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
- (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
- (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vnavgw HvxVR:$src1, HvxVR:$src2),
- (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vnavgw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vnavgh HvxVR:$src1, HvxVR:$src2),
- (V6_vnavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vnavgh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vnavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vavgub HvxVR:$src1, HvxVR:$src2),
- (V6_vavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vavgub_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubb HvxVR:$src1, HvxVR:$src2),
- (V6_vsubb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubb_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsubb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vavgubrnd HvxVR:$src1, HvxVR:$src2),
- (V6_vavgubrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vavgubrnd_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vavgubrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybusv HvxVR:$src1, HvxVR:$src2),
- (V6_vrmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybusv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vrmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vroundhb HvxVR:$src1, HvxVR:$src2),
- (V6_vroundhb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vroundhb_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vroundhb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vadduhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsububsat HvxVR:$src1, HvxVR:$src2),
- (V6_vsububsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsububsat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsububsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpabus_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmux_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyhus HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyhus HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyhus_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyhus HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vS32b_qpred_ai HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
+ (V6_vS32b_qpred_ai HvxQR:$src1, IntRegs:$src2, 0, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vS32b_qpred_ai_128B HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
+ (V6_vS32b_qpred_ai HvxQR:$src1, IntRegs:$src2, 0, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vS32b_nqpred_ai HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
+ (V6_vS32b_nqpred_ai HvxQR:$src1, IntRegs:$src2, 0, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vS32b_nqpred_ai_128B HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
+ (V6_vS32b_nqpred_ai HvxQR:$src1, IntRegs:$src2, 0, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vS32b_nt_qpred_ai HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
+ (V6_vS32b_nt_qpred_ai HvxQR:$src1, IntRegs:$src2, 0, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vS32b_nt_qpred_ai_128B HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
+ (V6_vS32b_nt_qpred_ai HvxQR:$src1, IntRegs:$src2, 0, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vS32b_nt_nqpred_ai HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
+ (V6_vS32b_nt_nqpred_ai HvxQR:$src1, IntRegs:$src2, 0, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vS32b_nt_nqpred_ai_128B HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
+ (V6_vS32b_nt_nqpred_ai HvxQR:$src1, IntRegs:$src2, 0, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_valignb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlalignb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vror HvxVR:$src1, IntRegs:$src2),
+ (V6_vror HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vror_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vror HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackub HvxVR:$src1),
+ (V6_vunpackub HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackub_128B HvxVR:$src1),
+ (V6_vunpackub HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackb HvxVR:$src1),
+ (V6_vunpackb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackb_128B HvxVR:$src1),
+ (V6_vunpackb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackuh HvxVR:$src1),
+ (V6_vunpackuh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackuh_128B HvxVR:$src1),
+ (V6_vunpackuh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackh HvxVR:$src1),
+ (V6_vunpackh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackh_128B HvxVR:$src1),
+ (V6_vunpackh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackob HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackob HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackob_128B HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackob HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vunpackoh HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackoh HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vunpackoh_128B HvxWR:$src1, HvxVR:$src2),
+ (V6_vunpackoh HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vpackeb HvxVR:$src1, HvxVR:$src2),
(V6_vpackeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vpackeb_128B HvxVR:$src1, HvxVR:$src2),
(V6_vpackeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubhnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vavghrnd HvxVR:$src1, HvxVR:$src2),
- (V6_vavghrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vavghrnd_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vavghrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vtran2x2_map_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vtran2x2_map HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdelta HvxVR:$src1, HvxVR:$src2),
- (V6_vdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdelta_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtuh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vtmpyhb HvxWR:$src1, IntRegs:$src2),
- (V6_vtmpyhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vtmpyhb_128B HvxWR:$src1, IntRegs:$src2),
- (V6_vtmpyhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackeh HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackeh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vpackob HvxVR:$src1, HvxVR:$src2),
(V6_vpackob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vpackob_128B HvxVR:$src1, HvxVR:$src2),
(V6_vpackob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmaxh HvxVR:$src1, HvxVR:$src2),
- (V6_vmaxh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmaxh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmaxh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vtmpybus_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubuhsat HvxVR:$src1, HvxVR:$src2),
- (V6_vsubuhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubuhsat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsubuhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrw_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_pred_or HvxQR:$src1, HvxQR:$src2),
- (V6_pred_or HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_pred_or_128B HvxQR:$src1, HvxQR:$src2),
- (V6_pred_or HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpyub_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_lo HvxWR:$src1),
- (V6_lo HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_lo_128B HvxWR:$src1),
- (V6_lo HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubb_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vsubb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubb_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vsubb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyiwh HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyiwh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyiwh_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyiwh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyiwb HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyiwb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyiwb_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyiwb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldu0 IntRegs:$src1),
- (V6_ldu0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldu0_128B IntRegs:$src1),
- (V6_ldu0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtuh_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgth_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vavgh HvxVR:$src1, HvxVR:$src2),
- (V6_vavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vavgh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlalignb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vlalignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsh HvxVR:$src1),
- (V6_vsh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsh_128B HvxVR:$src1),
- (V6_vsh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_pred_and_n HvxQR:$src1, HvxQR:$src2),
- (V6_pred_and_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_pred_and_n_128B HvxQR:$src1, HvxQR:$src2),
- (V6_pred_and_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackoh HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackoh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackhub_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackhb_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackwuh_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpackwh_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vzb HvxVR:$src1),
+ (V6_vzb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vzb_128B HvxVR:$src1),
+ (V6_vzb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vsb HvxVR:$src1),
(V6_vsb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsb_128B HvxVR:$src1),
(V6_vsb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vroundwuh HvxVR:$src1, HvxVR:$src2),
- (V6_vroundwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vroundwuh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vroundwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrhv HvxVR:$src1, HvxVR:$src2),
- (V6_vasrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrhv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vasrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vshuffh HvxVR:$src1),
- (V6_vshuffh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vshuffh_128B HvxVR:$src1),
- (V6_vshuffh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vnavgub HvxVR:$src1, HvxVR:$src2),
- (V6_vnavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vnavgub_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vnavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybv HvxVR:$src1, HvxVR:$src2),
- (V6_vrmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vrmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vnormamth HvxVR:$src1),
- (V6_vnormamth HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vnormamth_128B HvxVR:$src1),
- (V6_vnormamth HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhb HvxVR:$src1, IntRegs:$src2),
- (V6_vdmpyhb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhb_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vdmpyhb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vavguh HvxVR:$src1, HvxVR:$src2),
- (V6_vavguh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vavguh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vavguh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlsrwv HvxVR:$src1, HvxVR:$src2),
- (V6_vlsrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlsrwv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vlsrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlsrhv HvxVR:$src1, HvxVR:$src2),
- (V6_vlsrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlsrhv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vlsrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2),
- (V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhisat_128B HvxWR:$src1, IntRegs:$src2),
- (V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2),
- (V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhvsat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddw HvxVR:$src1, HvxVR:$src2),
- (V6_vaddw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaddw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vzh HvxVR:$src1),
(V6_vzh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vzh_128B HvxVR:$src1),
(V6_vzh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddh HvxVR:$src1, HvxVR:$src2),
- (V6_vaddh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaddh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmaxub HvxVR:$src1, HvxVR:$src2),
- (V6_vmaxub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmaxub_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmaxub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyhv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vadduhsat HvxVR:$src1, HvxVR:$src2),
- (V6_vadduhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vadduhsat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vadduhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vshufoeh HvxVR:$src1, HvxVR:$src2),
- (V6_vshufoeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vshufoeh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vshufoeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyuhv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_veqh HvxVR:$src1, HvxVR:$src2),
- (V6_veqh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_veqh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_veqh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpabuuv HvxWR:$src1, HvxWR:$src2),
- (V6_vmpabuuv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpabuuv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vmpabuuv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrwhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vminuh HvxVR:$src1, HvxVR:$src2),
- (V6_vminuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vminuh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vminuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vror HvxVR:$src1, IntRegs:$src2),
- (V6_vror HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vror_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vror HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyowh_rnd_sacc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmaxuh HvxVR:$src1, HvxVR:$src2),
- (V6_vmaxuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmaxuh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmaxuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vabsh_sat HvxVR:$src1),
- (V6_vabsh_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vabsh_sat_128B HvxVR:$src1),
- (V6_vabsh_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_pred_or_n HvxQR:$src1, HvxQR:$src2),
- (V6_pred_or_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_pred_or_n_128B HvxQR:$src1, HvxQR:$src2),
- (V6_pred_or_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdealb HvxVR:$src1),
- (V6_vdealb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdealb_128B HvxVR:$src1),
- (V6_vdealb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpybusv HvxVR:$src1, HvxVR:$src2),
- (V6_vmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpybusv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vzb HvxVR:$src1),
- (V6_vzb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vzb_128B HvxVR:$src1),
- (V6_vzb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsh HvxVR:$src1),
+ (V6_vsh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsh_128B HvxVR:$src1),
+ (V6_vsh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2),
(V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vdmpybus_dv_128B HvxWR:$src1, IntRegs:$src2),
(V6_vdmpybus_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddbq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddb HvxVR:$src1, HvxVR:$src2),
- (V6_vaddb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddb_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaddb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddwq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vshufoeb HvxVR:$src1, HvxVR:$src2),
- (V6_vshufoeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vshufoeb_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vshufoeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2),
- (V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vpackhub_sat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vpackhub_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyiwh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpybus_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vdmpyhvsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhvsat_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsat_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhisat_acc_128B HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsusat_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdmpyhsuisat_acc_128B HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vtmpyb HvxWR:$src1, IntRegs:$src2),
(V6_vtmpyb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vtmpyb_128B HvxWR:$src1, IntRegs:$src2),
(V6_vtmpyb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpabusv HvxWR:$src1, HvxWR:$src2),
- (V6_vmpabusv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpabusv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vmpabusv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_pred_and HvxQR:$src1, HvxQR:$src2),
- (V6_pred_and HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_pred_and_128B HvxQR:$src1, HvxQR:$src2),
- (V6_pred_and HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubwnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2),
- (V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vpackwuh_sat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vpackwuh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vswap_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpybus HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpybus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpybus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpybus_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpybus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpyhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vtmpyhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vtmpyhb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyub_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpyub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vrmpyubv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vrmpyubv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtb_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybv HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybus HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vrmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybus_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrmpybusv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdsaduh HvxWR:$src1, IntRegs:$src2),
+ (V6_vdsaduh HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vdsaduh HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdsaduh_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
+ (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
+ (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrw HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrw_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vaslw HvxVR:$src1, IntRegs:$src2),
(V6_vaslw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vaslw_128B HvxVR:$src1, IntRegs:$src2),
(V6_vaslw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2),
- (V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vpackhb_sat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vpackhb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyih_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vshuffvdd_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddb_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vaddb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddb_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vaddb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vunpackub HvxVR:$src1),
- (V6_vunpackub HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vunpackub_128B HvxVR:$src1),
- (V6_vunpackub HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtuw HvxVR:$src1, HvxVR:$src2),
- (V6_vgtuw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtuw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vgtuw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvwh_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtub HvxVR:$src1, HvxVR:$src2),
- (V6_vgtub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtub_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vgtub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyowh HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyowh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyieoh HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyieoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyieoh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyieoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_extractw HvxVR:$src1, IntRegs:$src2),
- (V6_extractw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_extractw_128B HvxVR:$src1, IntRegs:$src2),
- (V6_extractw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vavgwrnd HvxVR:$src1, HvxVR:$src2),
- (V6_vavgwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vavgwrnd_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vavgwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsat_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vdmpyhsat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtub_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyub HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyub_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyuh HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyuh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyuh_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyuh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vunpackob HvxWR:$src1, HvxVR:$src2),
- (V6_vunpackob HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vunpackob_128B HvxWR:$src1, HvxVR:$src2),
- (V6_vunpackob HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpahb HvxWR:$src1, IntRegs:$src2),
- (V6_vmpahb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpahb_128B HvxWR:$src1, IntRegs:$src2),
- (V6_vmpahb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_veqw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vandqrt HvxQR:$src1, IntRegs:$src2),
- (V6_vandqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vandqrt_128B HvxQR:$src1, IntRegs:$src2),
- (V6_vandqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vxor HvxVR:$src1, HvxVR:$src2),
- (V6_vxor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vxor_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vxor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrw HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrw_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwv HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslwv HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslwv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrwv HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrwv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrh HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vasrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslh HvxVR:$src1, IntRegs:$src2),
+ (V6_vaslh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vaslh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrh HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vlsrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vasrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaslhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlsrhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlsrhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vlsrhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwh_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
(V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vasrwhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
(V6_vasrwhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyhsat_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybus_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vrmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubhw HvxVR:$src1, HvxVR:$src2),
- (V6_vsubhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubhw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsubhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdealb4w HvxVR:$src1, HvxVR:$src2),
- (V6_vdealb4w HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdealb4w_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vdealb4w HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyowh_sacc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpybv HvxVR:$src1, HvxVR:$src2),
- (V6_vmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpybv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vabsdiffh HvxVR:$src1, HvxVR:$src2),
- (V6_vabsdiffh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vabsdiffh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vabsdiffh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vshuffob HvxVR:$src1, HvxVR:$src2),
- (V6_vshuffob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vshuffob_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vshuffob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyub_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vnormamtw HvxVR:$src1),
- (V6_vnormamtw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vnormamtw_128B HvxVR:$src1),
- (V6_vnormamtw HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vunpackuh HvxVR:$src1),
- (V6_vunpackuh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vunpackuh_128B HvxVR:$src1),
- (V6_vunpackuh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtuh_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyiewuh_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vunpackoh HvxWR:$src1, HvxVR:$src2),
- (V6_vunpackoh HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vunpackoh_128B HvxWR:$src1, HvxVR:$src2),
- (V6_vunpackoh HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2),
- (V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsat_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vdmpyhsat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyubv HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyubv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyhss HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyhss HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyhss_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyhss HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_hi HvxWR:$src1),
- (V6_hi HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_hi_128B HvxWR:$src1),
- (V6_hi HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
(V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vasrwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
(V6_vasrwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_veqw HvxVR:$src1, HvxVR:$src2),
- (V6_veqw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_veqw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_veqw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdsaduh HvxWR:$src1, IntRegs:$src2),
- (V6_vdsaduh HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdsaduh_128B HvxWR:$src1, IntRegs:$src2),
- (V6_vdsaduh HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundwh HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundwh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundwuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundwuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhbrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundhb HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundhb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vroundhub HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vroundhub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vroundhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslw_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrw_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrw_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddb HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubb HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddb_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddb_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubb_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubb_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubb_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddh HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddh_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddh_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubh_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubh_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddw HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vsubw HvxVR:$src1, HvxVR:$src2),
(V6_vsubw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsubw_128B HvxVR:$src1, HvxVR:$src2),
(V6_vsubw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddw_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddw_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vsubw_dv HvxWR:$src1, HvxWR:$src2),
(V6_vsubw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsubw_dv_128B HvxWR:$src1, HvxWR:$src2),
(V6_vsubw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_veqb_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyih HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyih HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyih_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyih HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vtmpyb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vtmpyb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybus HvxVR:$src1, IntRegs:$src2),
- (V6_vrmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybus_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vrmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpybus_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgth_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vsubhsat HvxVR:$src1, HvxVR:$src2),
(V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsubhsat_128B HvxVR:$src1, HvxVR:$src2),
(V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
- (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
- (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vabsw HvxVR:$src1),
- (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vabsw_128B HvxVR:$src1),
- (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2),
(V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vaddwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
(V6_vaddwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlsrw HvxVR:$src1, IntRegs:$src2),
- (V6_vlsrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlsrw_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vlsrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vabsh HvxVR:$src1),
- (V6_vabsh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vabsh_128B HvxVR:$src1),
- (V6_vabsh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlsrh HvxVR:$src1, IntRegs:$src2),
- (V6_vlsrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlsrh_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vlsrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_valignb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_valignb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgub HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgubrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgubrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgubrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgubrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguh HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguhrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguhrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguhrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguhrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgh HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavghrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavghrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavghrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavghrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgh HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgw HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgwrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgwrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgw HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffub HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffh HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsdiffw HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsdiffw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vabsdiffw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnavgub HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnavgub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vnavgub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubh HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsububh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsububh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsububh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vadduhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuhw HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuhw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vd0 ),
+ (V6_vd0 )>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vd0_128B ),
+ (V6_vd0 )>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsubhq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vsubhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vpackoh HvxVR:$src1, HvxVR:$src2),
- (V6_vpackoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vpackoh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vpackoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpybus_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vdmpybus_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhvsat_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vdmpyhvsat_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vrmpybv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddhsat HvxVR:$src1, HvxVR:$src2),
- (V6_vaddhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddhsat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaddhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vcombine HvxVR:$src1, HvxVR:$src2),
- (V6_vcombine HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vcombine_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vcombine HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
- (V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vandqrt_acc_128B HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
- (V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaslhv HvxVR:$src1, HvxVR:$src2),
- (V6_vaslhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaslhv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaslhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vinsertwr HvxVR:$src1, IntRegs:$src2),
- (V6_vinsertwr HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vinsertwr_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vinsertwr HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubh_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vsubh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubh_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vsubh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vshuffb HvxVR:$src1),
- (V6_vshuffb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vshuffb_128B HvxVR:$src1),
- (V6_vshuffb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vand HvxVR:$src1, HvxVR:$src2),
- (V6_vand HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vand_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vand HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubhnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddwnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubwnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vsubwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsh HvxVR:$src1),
+ (V6_vabsh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsh_128B HvxVR:$src1),
+ (V6_vabsh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsh_sat HvxVR:$src1),
+ (V6_vabsh_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsh_sat_128B HvxVR:$src1),
+ (V6_vabsh_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsw HvxVR:$src1),
+ (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsw_128B HvxVR:$src1),
+ (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsw_sat HvxVR:$src1),
+ (V6_vabsw_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsw_sat_128B HvxVR:$src1),
+ (V6_vabsw_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyubv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyubv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybusv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpybusv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybusv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabusv HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabusv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabusv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabusv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuuv HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabuuv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuuv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vmpabuuv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpyhv HvxVR:$src1, HvxVR:$src2),
(V6_vmpyhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpyhv_128B HvxVR:$src1, HvxVR:$src2),
(V6_vmpyhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsuisat_acc_128B HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vdmpyhsuisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsububsat_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vsububsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtb_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdsaduh_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vdsaduh_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpyub HvxVR:$src1, IntRegs:$src2),
- (V6_vrmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpyub_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vrmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyuh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vcl0h HvxVR:$src1),
- (V6_vcl0h HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vcl0h_128B HvxVR:$src1),
- (V6_vcl0h HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyuhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyuhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyuhv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhvsrs_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhvsrs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhus HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhus HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhus_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyhus HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpyhus_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vmpyhus_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpybv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
- (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
- (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vshufeh HvxVR:$src1, HvxVR:$src2),
- (V6_vshufeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vshufeh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vshufeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyih HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyih HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyih_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyih HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyih_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyih_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpyewuh HvxVR:$src1, HvxVR:$src2),
(V6_vmpyewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpyewuh_128B HvxVR:$src1, HvxVR:$src2),
(V6_vmpyewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyhsrs_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpybus_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vdmpybus_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddubh HvxVR:$src1, HvxVR:$src2),
- (V6_vaddubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddubh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaddubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrwh_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ld0 IntRegs:$src1),
- (V6_ld0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ld0_128B IntRegs:$src1),
- (V6_ld0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vpopcounth HvxVR:$src1),
- (V6_vpopcounth HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vpopcounth_128B HvxVR:$src1),
- (V6_vpopcounth HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldnt0 IntRegs:$src1),
- (V6_ldnt0 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldnt0_128B IntRegs:$src1),
- (V6_ldnt0 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgth_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddubsat_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vaddubsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vpackeh HvxVR:$src1, HvxVR:$src2),
- (V6_vpackeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vpackeh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vpackeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyh HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyh_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vminh HvxVR:$src1, HvxVR:$src2),
- (V6_vminh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vminh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vminh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_pred_scalar2 IntRegs:$src1),
- (V6_pred_scalar2 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_pred_scalar2_128B IntRegs:$src1),
- (V6_pred_scalar2 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdealh HvxVR:$src1),
- (V6_vdealh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdealh_128B HvxVR:$src1),
- (V6_vdealh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2),
- (V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vpackwh_sat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vpackwh_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaslh HvxVR:$src1, IntRegs:$src2),
- (V6_vaslh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaslh_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vaslh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtuw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vor HvxVR:$src1, HvxVR:$src2),
- (V6_vor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vor_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvvb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_sacc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_rnd_sacc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_rnd_sacc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyieoh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyieoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyieoh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyieoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpyiowh HvxVR:$src1, HvxVR:$src2),
(V6_vmpyiowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpyiowh_128B HvxVR:$src1, HvxVR:$src2),
(V6_vmpyiowh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
- (V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvvb_oracc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
- (V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vandvrt HvxVR:$src1, IntRegs:$src2),
- (V6_vandvrt HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vandvrt_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vandvrt HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_veqh_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vadduhw HvxVR:$src1, HvxVR:$src2),
- (V6_vadduhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vadduhw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vadduhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vcl0w HvxVR:$src1),
- (V6_vcl0w HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vcl0w_128B HvxVR:$src1),
- (V6_vcl0w HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyihb HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyihb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyihb_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyihb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vtmpybus HvxWR:$src1, IntRegs:$src2),
- (V6_vtmpybus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vtmpybus_128B HvxWR:$src1, IntRegs:$src2),
- (V6_vtmpybus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vd0 ),
- (V6_vd0 )>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vd0_128B ),
- (V6_vd0 )>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_veqh_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpybus HvxVR:$src1, IntRegs:$src2),
- (V6_vdmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpybus_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vdmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtub_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewh_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiewuh_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyiewuh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyub HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyub_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyub_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyub_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpybus HvxVR:$src1, IntRegs:$src2),
(V6_vmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpybus_128B HvxVR:$src1, IntRegs:$src2),
(V6_vmpybus HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vdmpyhb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vandvrt_acc_128B HvxQR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vassign HvxVR:$src1),
- (V6_vassign HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vassign_128B HvxVR:$src1),
- (V6_vassign HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddwnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddwnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtub_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2),
- (V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhb_dv_128B HvxWR:$src1, IntRegs:$src2),
- (V6_vdmpyhb_dv HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vunpackb HvxVR:$src1),
- (V6_vunpackb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vunpackb_128B HvxVR:$src1),
- (V6_vunpackb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vunpackh HvxVR:$src1),
- (V6_vunpackh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vunpackh_128B HvxVR:$src1),
- (V6_vunpackh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpybus_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpybus_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabus HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabus_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabus_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabus_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpahb HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpahb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpahb_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpahb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
(V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpahb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
(V6_vmpahb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
- (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
- (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsatwh HvxVR:$src1, HvxVR:$src2),
- (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsatwh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtuh HvxVR:$src1, HvxVR:$src2),
- (V6_vgtuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtuh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vgtuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyh HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhsat_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyhsat_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhss HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhss HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhss_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhss HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyhsrs_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyhsrs HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuh HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyuh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyuh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyihb HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyihb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyihb_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyihb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
(V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpyihb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
(V6_vmpyihb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybusv_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vrmpybusv_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrdelta HvxVR:$src1, HvxVR:$src2),
- (V6_vrdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrdelta_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vrdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vroundwh HvxVR:$src1, HvxVR:$src2),
- (V6_vroundwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vroundwh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vroundwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddw_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vaddw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddw_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vaddw_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwb_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
(V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpyiwb_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
(V6_vmpyiwb_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubbq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vsubbq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_veqh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
- (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
- (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddwsat HvxVR:$src1, HvxVR:$src2),
- (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddwsat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyiwh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vand HvxVR:$src1, HvxVR:$src2),
+ (V6_vand HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vand_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vand HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vor HvxVR:$src1, HvxVR:$src2),
+ (V6_vor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vor_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vxor HvxVR:$src1, HvxVR:$src2),
+ (V6_vxor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vxor_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vxor HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnot HvxVR:$src1),
+ (V6_vnot HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnot_128B HvxVR:$src1),
+ (V6_vnot HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandqrt HvxQR:$src1, IntRegs:$src2),
+ (V6_vandqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandqrt_128B HvxQR:$src1, IntRegs:$src2),
+ (V6_vandqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandqrt_acc_128B HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvrt HvxVR:$src1, IntRegs:$src2),
+ (V6_vandvrt HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvrt_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vandvrt HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvrt_acc_128B HvxQR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vandvrt_acc HvxQR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw HvxVR:$src1, HvxVR:$src2),
+ (V6_veqw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_veqw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_veqw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vabsdiffub HvxVR:$src1, HvxVR:$src2),
- (V6_vabsdiffub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vabsdiffub_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vabsdiffub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vshuffeb HvxVR:$src1, HvxVR:$src2),
- (V6_vshuffeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vshuffeb_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vshuffeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2),
- (V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vabsdiffuh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vabsdiffuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_veqw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_veqw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_veqw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
@@ -2753,584 +2683,664 @@ def: Pat<(int_hexagon_V6_vgth HvxVR:$src1, HvxVR:$src2),
(V6_vgth HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vgth_128B HvxVR:$src1, HvxVR:$src2),
(V6_vgth HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtuw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgth_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgth_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh HvxVR:$src1, HvxVR:$src2),
+ (V6_veqh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqh_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vgtb HvxVR:$src1, HvxVR:$src2),
(V6_vgtb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vgtb_128B HvxVR:$src1, HvxVR:$src2),
(V6_vgtb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtw HvxVR:$src1, HvxVR:$src2),
- (V6_vgtw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vgtw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubwq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vsubwq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vnot HvxVR:$src1),
- (V6_vnot HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vnot_128B HvxVR:$src1),
- (V6_vnot HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vgtb_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vgtb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtb_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb HvxVR:$src1, HvxVR:$src2),
+ (V6_veqb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_veqb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_veqb_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vgtuw_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vgtuw_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddubsat HvxVR:$src1, HvxVR:$src2),
- (V6_vaddubsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddubsat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaddubsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtuh_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtuh_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vgtub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vgtub_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vgtub_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_or HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_or_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_and HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_and_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_not HvxQR:$src1),
+ (V6_pred_not HvxQR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_not_128B HvxQR:$src1),
+ (V6_pred_not HvxQR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_xor HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_xor HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_xor_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_xor HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_and_n HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_and_n_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_and_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_or_n HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_or_n_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_pred_or_n HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2 IntRegs:$src1),
+ (V6_pred_scalar2 IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_pred_scalar2_128B IntRegs:$src1),
+ (V6_pred_scalar2 IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmux_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmux HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vswap_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vswap HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxub HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminub HvxVR:$src1, HvxVR:$src2),
+ (V6_vminub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vminuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxh HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminh HvxVR:$src1, HvxVR:$src2),
+ (V6_vminh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmaxw HvxVR:$src1, HvxVR:$src2),
(V6_vmaxw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmaxw_128B HvxVR:$src1, HvxVR:$src2),
(V6_vmaxw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaslwv HvxVR:$src1, HvxVR:$src2),
- (V6_vaslwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaslwv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaslwv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vabsw_sat HvxVR:$src1),
- (V6_vabsw_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vabsw_sat_128B HvxVR:$src1),
- (V6_vabsw_sat HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vsubwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vroundhub HvxVR:$src1, HvxVR:$src2),
- (V6_vroundhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vroundhub_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vroundhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhisat_acc_128B HvxVR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vdmpyhisat_acc HvxVR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpabus HvxWR:$src1, IntRegs:$src2),
- (V6_vmpabus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpabus_128B HvxWR:$src1, IntRegs:$src2),
- (V6_vmpabus HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vassignp HvxWR:$src1),
- (V6_vassignp HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vassignp_128B HvxWR:$src1),
- (V6_vassignp HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_veqb HvxVR:$src1, HvxVR:$src2),
- (V6_veqb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_veqb_128B HvxVR:$src1, HvxVR:$src2),
- (V6_veqb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsububh HvxVR:$src1, HvxVR:$src2),
- (V6_vsububh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsububh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsububh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminw HvxVR:$src1, HvxVR:$src2),
+ (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsathub HvxVR:$src1, HvxVR:$src2),
+ (V6_vsathub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsathub_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsathub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsatwh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsatwh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffeb HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffeb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffob HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffob_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshuffob HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufeh HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufeh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoh HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffvdd_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vshuffvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealvdd_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoeh HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoeh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshufoeb HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshufoeb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vshufoeb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealh HvxVR:$src1),
+ (V6_vdealh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealh_128B HvxVR:$src1),
+ (V6_vdealh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealb HvxVR:$src1),
+ (V6_vdealb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealb_128B HvxVR:$src1),
+ (V6_vdealb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdealb4w HvxVR:$src1, HvxVR:$src2),
+ (V6_vdealb4w HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdealb4w_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vdealb4w HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffh HvxVR:$src1),
+ (V6_vshuffh HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffh_128B HvxVR:$src1),
+ (V6_vshuffh HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vshuffb HvxVR:$src1),
+ (V6_vshuffb HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vshuffb_128B HvxVR:$src1),
+ (V6_vshuffb HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_extractw HvxVR:$src1, IntRegs:$src2),
+ (V6_extractw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_extractw_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_extractw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vinsertwr HvxVR:$src1, IntRegs:$src2),
+ (V6_vinsertwr HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vinsertwr_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vinsertwr HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_lvsplatw IntRegs:$src1),
(V6_lvsplatw IntRegs:$src1)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_lvsplatw_128B IntRegs:$src1),
(V6_lvsplatw IntRegs:$src1)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddhnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddhnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2),
- (V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsusat_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vdmpyhsusat HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_pred_not HvxQR:$src1),
- (V6_pred_not HvxQR:$src1)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_pred_not_128B HvxQR:$src1),
- (V6_pred_not HvxQR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vassignp HvxWR:$src1),
+ (V6_vassignp HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vassignp_128B HvxWR:$src1),
+ (V6_vassignp HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vassign HvxVR:$src1),
+ (V6_vassign HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vassign_128B HvxVR:$src1),
+ (V6_vassign HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcombine HvxVR:$src1, HvxVR:$src2),
+ (V6_vcombine HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcombine_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vcombine HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdelta HvxVR:$src1, HvxVR:$src2),
+ (V6_vdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdelta_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrdelta HvxVR:$src1, HvxVR:$src2),
+ (V6_vrdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrdelta_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrdelta HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcl0w HvxVR:$src1),
+ (V6_vcl0w HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcl0w_128B HvxVR:$src1),
+ (V6_vcl0w HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcl0h HvxVR:$src1),
+ (V6_vcl0h HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcl0h_128B HvxVR:$src1),
+ (V6_vcl0h HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnormamtw HvxVR:$src1),
+ (V6_vnormamtw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnormamtw_128B HvxVR:$src1),
+ (V6_vnormamtw HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vnormamth HvxVR:$src1),
+ (V6_vnormamth HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vnormamth_128B HvxVR:$src1),
+ (V6_vnormamth HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vpopcounth HvxVR:$src1),
+ (V6_vpopcounth HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vpopcounth_128B HvxVR:$src1),
+ (V6_vpopcounth HvxVR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+ (V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
+ (V6_vlutvvb_oracc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
(V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vlutvwh_oracc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4),
(V6_vlutvwh_oracc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, IntRegsLow8:$src4)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyiewh_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyiewh_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdealvdd_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vdealvdd HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vavgw HvxVR:$src1, HvxVR:$src2),
- (V6_vavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vavgw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdmpyhsusat_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vdmpyhsusat_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vgtw_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vgtw_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vtmpyhb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vtmpyhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddhw HvxVR:$src1, HvxVR:$src2),
- (V6_vaddhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddhw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaddhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddhq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddhq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpyubv HvxVR:$src1, HvxVR:$src2),
- (V6_vrmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpyubv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vrmpyubv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubh HvxVR:$src1, HvxVR:$src2),
- (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
- (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3),
- (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vminw HvxVR:$src1, HvxVR:$src2),
- (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vminw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyubv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyubv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_pred_xor HvxQR:$src1, HvxQR:$src2),
- (V6_pred_xor HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_pred_xor_128B HvxQR:$src1, HvxQR:$src2),
- (V6_pred_xor HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_veqb_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_veqb_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyiewuh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyiewuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpybusv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpybusv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vavguhrnd HvxVR:$src1, HvxVR:$src2),
- (V6_vavguhrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vavguhrnd_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vavguhrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyowh_rnd_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyowh_rnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubwsat HvxVR:$src1, HvxVR:$src2),
- (V6_vsubwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubwsat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsubwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubuhw HvxVR:$src1, HvxVR:$src2),
- (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubuhw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
- (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4),
- (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrw HvxVR:$src1, IntRegs:$src2),
- (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrw_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrh HvxVR:$src1, IntRegs:$src2),
- (V6_vasrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrh_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vasrh HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyuhv HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyuhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyuhv_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyuhv HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrhbrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrhbrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubuhsat_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vsubuhsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vabsdiffw HvxVR:$src1, HvxVR:$src2),
- (V6_vabsdiffw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vabsdiffw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vabsdiffw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_hi HvxWR:$src1),
+ (V6_hi HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_hi_128B HvxWR:$src1),
+ (V6_hi HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lo HvxWR:$src1),
+ (V6_lo HvxWR:$src1)>, Requires<[HasV60, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lo_128B HvxWR:$src1),
+ (V6_lo HvxWR:$src1)>, Requires<[HasV60, UseHVX128B]>;
// V62 HVX Instructions.
-def: Pat<(int_hexagon_V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
- (V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vandnqrt_acc_128B HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
- (V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddclbh HvxVR:$src1, HvxVR:$src2),
- (V6_vaddclbh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddclbh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaddclbh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyowh_64_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyewuh_64_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsatuwuh HvxVR:$src1, HvxVR:$src2),
- (V6_vsatuwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsatuwuh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsatuwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_shuffeqh HvxQR:$src1, HvxQR:$src2),
- (V6_shuffeqh HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_shuffeqh_128B HvxQR:$src1, HvxQR:$src2),
- (V6_shuffeqh HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_shuffeqw HvxQR:$src1, HvxQR:$src2),
- (V6_shuffeqw HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_shuffeqw_128B HvxQR:$src1, HvxQR:$src2),
- (V6_shuffeqw HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldcnpnt0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldcnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
- (V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubcarry_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
- (V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrhbsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vminb HvxVR:$src1, HvxVR:$src2),
- (V6_vminb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vminb_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vminb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpauhb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddhw_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vlsrb HvxVR:$src1, IntRegs:$src2),
(V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vlsrb_128B HvxVR:$src1, IntRegs:$src2),
(V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
- (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
- (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2),
- (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddububb_sat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubbsat_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldtp0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldtp0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4),
- (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4),
- (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubuwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldpnt0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldpnt0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vandvnqv HvxQR:$src1, HvxVR:$src2),
- (V6_vandvnqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vandvnqv_128B HvxQR:$src1, HvxVR:$src2),
- (V6_vandvnqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_lvsplatb IntRegs:$src1),
- (V6_lvsplatb IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_lvsplatb_128B IntRegs:$src1),
- (V6_lvsplatb IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_lvsplath IntRegs:$src1),
- (V6_lvsplath IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_lvsplath_128B IntRegs:$src1),
- (V6_lvsplath IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldtpnt0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldtpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvwh_nm_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldnpnt0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpauhb HvxWR:$src1, IntRegs:$src2),
- (V6_vmpauhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpauhb_128B HvxWR:$src1, IntRegs:$src2),
- (V6_vmpauhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldtnp0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldtnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldtnp0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldtnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrhbsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasrhbsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrounduwuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vrounduwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrounduwuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrounduwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vrounduhub HvxVR:$src1, HvxVR:$src2),
(V6_vrounduhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vrounduhub_128B HvxVR:$src1, HvxVR:$src2),
(V6_vrounduhub HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vadduhw_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldcp0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldcp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldcp0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldcp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vadduwsat HvxVR:$src1, HvxVR:$src2),
(V6_vadduwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vadduwsat_128B HvxVR:$src1, HvxVR:$src2),
(V6_vadduwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldtnpnt0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldtnpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubuwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vaddbsat HvxVR:$src1, HvxVR:$src2),
(V6_vaddbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vaddbsat_128B HvxVR:$src1, HvxVR:$src2),
(V6_vaddbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vandnqrt HvxQR:$src1, IntRegs:$src2),
- (V6_vandnqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vandnqrt_128B HvxQR:$src1, IntRegs:$src2),
- (V6_vandnqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddbsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbsat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubbsat_dv_128B HvxWR:$src1, HvxWR:$src2),
+ (V6_vsubbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddcarry_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubcarry_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vsubcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddububb_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsubububb_sat_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddhw_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadduhw_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vadduhw_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddubh_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyewuh_64_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmpyewuh_64 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyowh_64_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+ (V6_vmpyowh_64_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhb HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpauhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpauhb HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhb_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpauhb_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyiwub_128B HvxVR:$src1, IntRegs:$src2),
+ (V6_vmpyiwub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
(V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpyiwub_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
(V6_vmpyiwub_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmaxb HvxVR:$src1, HvxVR:$src2),
- (V6_vmaxb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmaxb_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vmaxb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandnqrt HvxQR:$src1, IntRegs:$src2),
+ (V6_vandnqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandnqrt_128B HvxQR:$src1, IntRegs:$src2),
+ (V6_vandnqrt HvxQR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandnqrt_acc_128B HvxVR:$src1, HvxQR:$src2, IntRegs:$src3),
+ (V6_vandnqrt_acc HvxVR:$src1, HvxQR:$src2, IntRegs:$src3)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vandvqv HvxQR:$src1, HvxVR:$src2),
(V6_vandvqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vandvqv_128B HvxQR:$src1, HvxVR:$src2),
(V6_vandvqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
- (V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddcarry_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
- (V6_vaddcarry HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
- (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
- (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubuwsat HvxVR:$src1, HvxVR:$src2),
- (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubuwsat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddbsat_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vaddbsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldnp0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldnp0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasruwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasruwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrounduwuh HvxVR:$src1, HvxVR:$src2),
- (V6_vrounduwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrounduwuh_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vrounduwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlutvvb_nm_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vandvnqv HvxQR:$src1, HvxVR:$src2),
+ (V6_vandvnqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vandvnqv_128B HvxQR:$src1, HvxVR:$src2),
+ (V6_vandvnqv HvxQR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_pred_scalar2v2 IntRegs:$src1),
(V6_pred_scalar2v2 IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_pred_scalar2v2_128B IntRegs:$src1),
(V6_pred_scalar2v2 IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldp0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldp0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddubh_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
- (V6_vaddubh_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_shuffeqw HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqw HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_shuffeqw_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqw HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_shuffeqh HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqh HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_shuffeqh_128B HvxQR:$src1, HvxQR:$src2),
+ (V6_shuffeqh HvxQR:$src1, HvxQR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmaxb HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmaxb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vmaxb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vminb HvxVR:$src1, HvxVR:$src2),
+ (V6_vminb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vminb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vminb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsatuwuh HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatuwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsatuwuh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vsatuwuh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lvsplath IntRegs:$src1),
+ (V6_lvsplath IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lvsplath_128B IntRegs:$src1),
+ (V6_lvsplath IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_lvsplatb IntRegs:$src1),
+ (V6_lvsplatb IntRegs:$src1)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_lvsplatb_128B IntRegs:$src1),
+ (V6_lvsplatb IntRegs:$src1)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vaddclbw HvxVR:$src1, HvxVR:$src2),
(V6_vaddclbw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vaddclbw_128B HvxVR:$src1, HvxVR:$src2),
(V6_vaddclbw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldcpnt0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldcpnt0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2),
- (V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vadduwsat_dv_128B HvxWR:$src1, HvxWR:$src2),
- (V6_vadduwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyiwub HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyiwub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyiwub_128B HvxVR:$src1, IntRegs:$src2),
- (V6_vmpyiwub HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2),
- (V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubububb_sat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsubububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_ldcnp0 PredRegs:$src1, IntRegs:$src2),
- (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_ldcnp0_128B PredRegs:$src1, IntRegs:$src2),
- (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddclbh HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddclbh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddclbh_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vaddclbh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4),
+ (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4),
+ (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3),
+ (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4),
(V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4),
(V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vsubbsat HvxVR:$src1, HvxVR:$src2),
- (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vsubbsat_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvvb_nm_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvvb_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlutvwh_nm_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vlutvwh_nm HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>;
// V65 HVX Instructions.
+def: Pat<(int_hexagon_V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasruhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
+ (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
(V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vasruhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
(V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2),
- (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybub_rtt_128B HvxVR:$src1, DoubleRegs:$src2),
- (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
- (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpahhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
- (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaslh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vasrh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavguw HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavguw_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vavguwrnd HvxVR:$src1, HvxVR:$src2),
(V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vavguwrnd_128B HvxVR:$src1, HvxVR:$src2),
(V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgb HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgb_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vavgbrnd HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vavgbrnd_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vnavgb HvxVR:$src1, HvxVR:$src2),
(V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vnavgb_128B HvxVR:$src1, HvxVR:$src2),
(V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasrh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
- (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpauhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
- (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vdd0 ),
+ (V6_vdd0 )>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vdd0_128B ),
+ (V6_vdd0 )>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsb HvxVR:$src1),
+ (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsb_128B HvxVR:$src1),
+ (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vabsb_sat HvxVR:$src1),
+ (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vabsb_sat_128B HvxVR:$src1),
+ (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuu HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_128B HvxWR:$src1, IntRegs:$src2),
+ (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpabuu_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
+ (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
(V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpyh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3),
(V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
- (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
- (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vavgb HvxVR:$src1, HvxVR:$src2),
- (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vavgb_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaslh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vavguw HvxVR:$src1, HvxVR:$src2),
- (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vavguw_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vlut4 HvxVR:$src1, DoubleRegs:$src2),
- (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vlut4_128B HvxVR:$src1, DoubleRegs:$src2),
- (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpyuhe_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
- (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2),
- (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpyub_rtt_128B HvxVR:$src1, DoubleRegs:$src2),
- (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpahhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpauhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
+ (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
(V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpsuhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3),
(V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasruhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vlut4 HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vlut4_128B HvxVR:$src1, DoubleRegs:$src2),
+ (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vmpyuhe HvxVR:$src1, IntRegs:$src2),
(V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vmpyuhe_128B HvxVR:$src1, IntRegs:$src2),
(V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
- (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3),
- (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vasruwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3),
- (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpabuu_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3),
- (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vprefixqw HvxQR:$src1),
- (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vprefixqw_128B HvxQR:$src1),
- (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vprefixqh HvxQR:$src1),
- (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vprefixqh_128B HvxQR:$src1),
- (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3),
+ (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vscattermw_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vscattermh_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vscattermw_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vscattermh_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4),
+ (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vscattermwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vscattermhq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5),
+ (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vscattermhw_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5),
+ (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vscattermhwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5),
+ (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vscattermhw_add_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4),
+ (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vprefixqb HvxQR:$src1),
(V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vprefixqb_128B HvxQR:$src1),
(V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vabsb HvxVR:$src1),
- (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vabsb_128B HvxVR:$src1),
- (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vavgbrnd HvxVR:$src1, HvxVR:$src2),
- (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vavgbrnd_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vdd0 ),
- (V6_vdd0 )>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vdd0_128B ),
- (V6_vdd0 )>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vmpabuu HvxWR:$src1, IntRegs:$src2),
- (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vmpabuu_128B HvxWR:$src1, IntRegs:$src2),
- (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vabsb_sat HvxVR:$src1),
- (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vabsb_sat_128B HvxVR:$src1),
- (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vprefixqh HvxQR:$src1),
+ (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vprefixqh_128B HvxQR:$src1),
+ (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vprefixqw HvxQR:$src1),
+ (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vprefixqw_128B HvxQR:$src1),
+ (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX128B]>;
// V66 HVX Instructions.
-def: Pat<(int_hexagon_V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
- (V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV66, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vaddcarrysat_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
- (V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV66, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vrotr HvxVR:$src1, HvxVR:$src2),
+ (V6_vrotr HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vrotr_128B HvxVR:$src1, HvxVR:$src2),
+ (V6_vrotr HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV66, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vasr_into_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
(V6_vasr_into HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV66, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV66, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vaddcarrysat_128B HvxVR:$src1, HvxVR:$src2, HvxQR:$src3),
+ (V6_vaddcarrysat HvxVR:$src1, HvxVR:$src2, HvxQR:$src3)>, Requires<[HasV66, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vsatdw HvxVR:$src1, HvxVR:$src2),
(V6_vsatdw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vsatdw_128B HvxVR:$src1, HvxVR:$src2),
(V6_vsatdw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX128B]>;
-def: Pat<(int_hexagon_V6_vrotr HvxVR:$src1, HvxVR:$src2),
- (V6_vrotr HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX64B]>;
-def: Pat<(int_hexagon_V6_vrotr_128B HvxVR:$src1, HvxVR:$src2),
- (V6_vrotr HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV66, UseHVX128B]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMappings.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMappings.td
index 22ee495b25e6..3fca1aee9a60 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMappings.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMappings.td
@@ -5,7 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
def A2_negAlias : InstAlias<"$Rd32 = neg($Rs32)", (A2_subri IntRegs:$Rd32, 0, IntRegs:$Rs32)>;
@@ -94,6 +94,8 @@ def L4_sub_memopw_zomapAlias : InstAlias<"memw($Rs32) -= $Rt32", (L4_sub_memopw_
def L6_deallocframe_map_to_rawAlias : InstAlias<"deallocframe", (L2_deallocframe D15, R30)>;
def L6_return_map_to_rawAlias : InstAlias<"dealloc_return", (L4_return D15, R30)>;
def M2_mpyuiAlias : InstAlias<"$Rd32 = mpyui($Rs32,$Rt32)", (M2_mpyi IntRegs:$Rd32, IntRegs:$Rs32, IntRegs:$Rt32)>;
+def M7_vdmpyAlias : InstAlias<"$Rdd32 = vdmpyw($Rss32,$Rtt32)", (M7_dcmpyrwc DoubleRegs:$Rdd32, DoubleRegs:$Rss32, DoubleRegs:$Rtt32)>;
+def M7_vdmpy_accAlias : InstAlias<"$Rxx32 += vdmpyw($Rss32,$Rtt32)", (M7_dcmpyrwc_acc DoubleRegs:$Rxx32, DoubleRegs:$Rss32, DoubleRegs:$Rtt32)>;
def S2_pstorerbf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32) = $Rt32", (S2_pstorerbf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
def S2_pstorerbnewf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32) = $Nt8.new", (S2_pstorerbnewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
def S2_pstorerbnewt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32) = $Nt8.new", (S2_pstorerbnewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
@@ -253,16 +255,9 @@ def V6_vaslwv_altAlias : InstAlias<"$Vd32 = vaslw($Vu32,$Vv32)", (V6_vaslwv HvxV
def V6_vasr_into_altAlias : InstAlias<"$Vxx32 = vasrinto($Vu32,$Vv32)", (V6_vasr_into HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vasrh_acc_altAlias : InstAlias<"$Vx32 += vasrh($Vu32,$Rt32)", (V6_vasrh_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
def V6_vasrh_altAlias : InstAlias<"$Vd32 = vasrh($Vu32,$Rt32)", (V6_vasrh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
-def V6_vasrhbrndsat_altAlias : InstAlias<"$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhbrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>;
-def V6_vasrhubrndsat_altAlias : InstAlias<"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhubrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>;
-def V6_vasrhubsat_altAlias : InstAlias<"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):sat", (V6_vasrhubsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>;
def V6_vasrhv_altAlias : InstAlias<"$Vd32 = vasrh($Vu32,$Vv32)", (V6_vasrhv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vasrw_acc_altAlias : InstAlias<"$Vx32 += vasrw($Vu32,$Rt32)", (V6_vasrw_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
def V6_vasrw_altAlias : InstAlias<"$Vd32 = vasrw($Vu32,$Rt32)", (V6_vasrw HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
-def V6_vasrwh_altAlias : InstAlias<"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8)", (V6_vasrwhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>;
-def V6_vasrwhrndsat_altAlias : InstAlias<"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrwhrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>;
-def V6_vasrwhsat_altAlias : InstAlias<"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):sat", (V6_vasrwhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>;
-def V6_vasrwuhsat_altAlias : InstAlias<"$Vd32 = vasrwuh($Vu32,$Vv32,$Rt8):sat", (V6_vasrwuhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>;
def V6_vasrwv_altAlias : InstAlias<"$Vd32 = vasrw($Vu32,$Vv32)", (V6_vasrwv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vavgb_altAlias : InstAlias<"$Vd32 = vavgb($Vu32,$Vv32)", (V6_vavgb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
def V6_vavgbrnd_altAlias : InstAlias<"$Vd32 = vavgb($Vu32,$Vv32):rnd", (V6_vavgbrnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMask.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMask.h
new file mode 100644
index 000000000000..742fe2d14d5b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepMask.h
@@ -0,0 +1,2821 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPMASK_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPMASK_H
+
+// clang-format off
+HexagonInstruction InstructionEncodings[] = {
+{ /*Tag:A2_addi*/
+ /*Rd32=add(Rs32,#s16)*/
+ 0xf0000000,
+ 0xb0000000,
+ 0x0fe03fe0,
+ 0 },
+{ /*Tag:A2_andir*/
+ /*Rd32=and(Rs32,#s10)*/
+ 0xffc00000,
+ 0x76000000,
+ 0x00203fe0,
+ 0 },
+{ /*Tag:A2_combineii*/
+ /*Rdd32=combine(#s8,#S8)*/
+ 0xff800000,
+ 0x7c000000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:A2_orir*/
+ /*Rd32=or(Rs32,#s10)*/
+ 0xffc00000,
+ 0x76800000,
+ 0x00203fe0,
+ 0 },
+{ /*Tag:A2_paddif*/
+ /*if (!Pu4) Rd32=add(Rs32,#s8)*/
+ 0xff802000,
+ 0x74800000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:A2_paddifnew*/
+ /*if (!Pu4.new) Rd32=add(Rs32,#s8)*/
+ 0xff802000,
+ 0x74802000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:A2_paddit*/
+ /*if (Pu4) Rd32=add(Rs32,#s8)*/
+ 0xff802000,
+ 0x74000000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:A2_padditnew*/
+ /*if (Pu4.new) Rd32=add(Rs32,#s8)*/
+ 0xff802000,
+ 0x74002000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:A2_subri*/
+ /*Rd32=sub(#s10,Rs32)*/
+ 0xffc00000,
+ 0x76400000,
+ 0x00203fe0,
+ 0 },
+{ /*Tag:A2_tfrsi*/
+ /*Rd32=#s16*/
+ 0xff000000,
+ 0x78000000,
+ 0x00df3fe0,
+ 0 },
+{ /*Tag:A4_cmpbgtui*/
+ /*Pd4=cmpb.gtu(Rs32,#u7)*/
+ 0xff601018,
+ 0xdd400000,
+ 0x00000fe0,
+ 0 },
+{ /*Tag:A4_cmpheqi*/
+ /*Pd4=cmph.eq(Rs32,#s8)*/
+ 0xff600018,
+ 0xdd000008,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:A4_cmphgti*/
+ /*Pd4=cmph.gt(Rs32,#s8)*/
+ 0xff600018,
+ 0xdd200008,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:A4_cmphgtui*/
+ /*Pd4=cmph.gtu(Rs32,#u7)*/
+ 0xff601018,
+ 0xdd400008,
+ 0x00000fe0,
+ 0 },
+{ /*Tag:A4_combineii*/
+ /*Rdd32=combine(#s8,#U6)*/
+ 0xff800000,
+ 0x7c800000,
+ 0x001f2000,
+ 0 },
+{ /*Tag:A4_combineir*/
+ /*Rdd32=combine(#s8,Rs32)*/
+ 0xff602000,
+ 0x73202000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:A4_combineri*/
+ /*Rdd32=combine(Rs32,#s8)*/
+ 0xff602000,
+ 0x73002000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:A4_rcmpeqi*/
+ /*Rd32=cmp.eq(Rs32,#s8)*/
+ 0xff602000,
+ 0x73402000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:A4_rcmpneqi*/
+ /*Rd32=!cmp.eq(Rs32,#s8)*/
+ 0xff602000,
+ 0x73602000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:C2_cmoveif*/
+ /*if (!Pu4) Rd32=#s12*/
+ 0xff902000,
+ 0x7e800000,
+ 0x000f1fe0,
+ 0 },
+{ /*Tag:C2_cmoveit*/
+ /*if (Pu4) Rd32=#s12*/
+ 0xff902000,
+ 0x7e000000,
+ 0x000f1fe0,
+ 0 },
+{ /*Tag:C2_cmovenewif*/
+ /*if (!Pu4.new) Rd32=#s12*/
+ 0xff902000,
+ 0x7e802000,
+ 0x000f1fe0,
+ 0 },
+{ /*Tag:C2_cmovenewit*/
+ /*if (Pu4.new) Rd32=#s12*/
+ 0xff902000,
+ 0x7e002000,
+ 0x000f1fe0,
+ 0 },
+{ /*Tag:C2_cmpeqi*/
+ /*Pd4=cmp.eq(Rs32,#s10)*/
+ 0xffc0001c,
+ 0x75000000,
+ 0x00203fe0,
+ 0 },
+{ /*Tag:C2_cmpgti*/
+ /*Pd4=cmp.gt(Rs32,#s10)*/
+ 0xffc0001c,
+ 0x75400000,
+ 0x00203fe0,
+ 0 },
+{ /*Tag:C2_cmpgtui*/
+ /*Pd4=cmp.gtu(Rs32,#u9)*/
+ 0xffe0001c,
+ 0x75800000,
+ 0x00003fe0,
+ 0 },
+{ /*Tag:C2_muxii*/
+ /*Rd32=mux(Pu4,#s8,#S8)*/
+ 0xfe000000,
+ 0x7a000000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:C2_muxir*/
+ /*Rd32=mux(Pu4,Rs32,#s8)*/
+ 0xff802000,
+ 0x73000000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:C2_muxri*/
+ /*Rd32=mux(Pu4,#s8,Rs32)*/
+ 0xff802000,
+ 0x73800000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:C4_addipc*/
+ /*Rd32=add(pc,#u6)*/
+ 0xffff0000,
+ 0x6a490000,
+ 0x00001f80,
+ 0 },
+{ /*Tag:C4_cmpltei*/
+ /*Pd4=!cmp.gt(Rs32,#s10)*/
+ 0xffc0001c,
+ 0x75400010,
+ 0x00203fe0,
+ 0 },
+{ /*Tag:C4_cmplteui*/
+ /*Pd4=!cmp.gtu(Rs32,#u9)*/
+ 0xffe0001c,
+ 0x75800010,
+ 0x00003fe0,
+ 0 },
+{ /*Tag:C4_cmpneqi*/
+ /*Pd4=!cmp.eq(Rs32,#s10)*/
+ 0xffc0001c,
+ 0x75000010,
+ 0x00203fe0,
+ 0 },
+{ /*Tag:J2_call*/
+ /*call #r22:2*/
+ 0xfe000001,
+ 0x5a000000,
+ 0x01ff3ffe,
+ 0 },
+{ /*Tag:J2_callf*/
+ /*if (!Pu4) call #r15:2*/
+ 0xff200800,
+ 0x5d200000,
+ 0x00df20fe,
+ 0 },
+{ /*Tag:J2_callt*/
+ /*if (Pu4) call #r15:2*/
+ 0xff200800,
+ 0x5d000000,
+ 0x00df20fe,
+ 0 },
+{ /*Tag:J2_jump*/
+ /*jump #r22:2*/
+ 0xfe000000,
+ 0x58000000,
+ 0x01ff3ffe,
+ 0 },
+{ /*Tag:J2_jumpf*/
+ /*if (!Pu4) jump:nt #r15:2*/
+ 0xff201800,
+ 0x5c200000,
+ 0x00df20fe,
+ 0 },
+{ /*Tag:J2_jumpfnew*/
+ /*if (!Pu4.new) jump:nt #r15:2*/
+ 0xff201800,
+ 0x5c200800,
+ 0x00df20fe,
+ 0 },
+{ /*Tag:J2_jumpfnewpt*/
+ /*if (!Pu4.new) jump:t #r15:2*/
+ 0xff201800,
+ 0x5c201800,
+ 0x00df20fe,
+ 0 },
+{ /*Tag:J2_jumpfpt*/
+ /*if (!Pu4) jump:t #r15:2*/
+ 0xff201800,
+ 0x5c201000,
+ 0x00df20fe,
+ 0 },
+{ /*Tag:J2_jumpt*/
+ /*if (Pu4) jump:nt #r15:2*/
+ 0xff201800,
+ 0x5c000000,
+ 0x00df20fe,
+ 0 },
+{ /*Tag:J2_jumptnew*/
+ /*if (Pu4.new) jump:nt #r15:2*/
+ 0xff201800,
+ 0x5c000800,
+ 0x00df20fe,
+ 0 },
+{ /*Tag:J2_jumptnewpt*/
+ /*if (Pu4.new) jump:t #r15:2*/
+ 0xff201800,
+ 0x5c001800,
+ 0x00df20fe,
+ 0 },
+{ /*Tag:J2_jumptpt*/
+ /*if (Pu4) jump:t #r15:2*/
+ 0xff201800,
+ 0x5c001000,
+ 0x00df20fe,
+ 0 },
+{ /*Tag:J2_loop0i*/
+ /*loop0(#r7:2,#U10)*/
+ 0xffe00000,
+ 0x69000000,
+ 0x00001f18,
+ 0 },
+{ /*Tag:J2_loop0r*/
+ /*loop0(#r7:2,Rs32)*/
+ 0xffe00000,
+ 0x60000000,
+ 0x00001f18,
+ 0 },
+{ /*Tag:J2_loop1i*/
+ /*loop1(#r7:2,#U10)*/
+ 0xffe00000,
+ 0x69200000,
+ 0x00001f18,
+ 0 },
+{ /*Tag:J2_loop1r*/
+ /*loop1(#r7:2,Rs32)*/
+ 0xffe00000,
+ 0x60200000,
+ 0x00001f18,
+ 0 },
+{ /*Tag:J2_ploop1si*/
+ /*p3=sp1loop0(#r7:2,#U10)*/
+ 0xffe00000,
+ 0x69a00000,
+ 0x00001f18,
+ 0 },
+{ /*Tag:J2_ploop1sr*/
+ /*p3=sp1loop0(#r7:2,Rs32)*/
+ 0xffe00000,
+ 0x60a00000,
+ 0x00001f18,
+ 0 },
+{ /*Tag:J2_ploop2si*/
+ /*p3=sp2loop0(#r7:2,#U10)*/
+ 0xffe00000,
+ 0x69c00000,
+ 0x00001f18,
+ 0 },
+{ /*Tag:J2_ploop2sr*/
+ /*p3=sp2loop0(#r7:2,Rs32)*/
+ 0xffe00000,
+ 0x60c00000,
+ 0x00001f18,
+ 0 },
+{ /*Tag:J2_ploop3si*/
+ /*p3=sp3loop0(#r7:2,#U10)*/
+ 0xffe00000,
+ 0x69e00000,
+ 0x00001f18,
+ 0 },
+{ /*Tag:J2_ploop3sr*/
+ /*p3=sp3loop0(#r7:2,Rs32)*/
+ 0xffe00000,
+ 0x60e00000,
+ 0x00001f18,
+ 0 },
+{ /*Tag:J4_cmpeq_f_jumpnv_nt*/
+ /*if (!cmp.eq(Ns8.new,Rt32)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x20400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeq_f_jumpnv_t*/
+ /*if (!cmp.eq(Ns8.new,Rt32)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x20402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeq_fp0_jump_nt*/
+ /*p0=cmp.eq(Rs16,Rt16); if (!p0.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x14400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeq_fp0_jump_t*/
+ /*p0=cmp.eq(Rs16,Rt16); if (!p0.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x14402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeq_fp1_jump_nt*/
+ /*p1=cmp.eq(Rs16,Rt16); if (!p1.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x14401000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeq_fp1_jump_t*/
+ /*p1=cmp.eq(Rs16,Rt16); if (!p1.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x14403000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeq_t_jumpnv_nt*/
+ /*if (cmp.eq(Ns8.new,Rt32)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x20000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeq_t_jumpnv_t*/
+ /*if (cmp.eq(Ns8.new,Rt32)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x20002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeq_tp0_jump_nt*/
+ /*p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x14000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeq_tp0_jump_t*/
+ /*p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x14002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeq_tp1_jump_nt*/
+ /*p1=cmp.eq(Rs16,Rt16); if (p1.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x14001000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeq_tp1_jump_t*/
+ /*p1=cmp.eq(Rs16,Rt16); if (p1.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x14003000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_f_jumpnv_nt*/
+ /*if (!cmp.eq(Ns8.new,#U5)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x24400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_f_jumpnv_t*/
+ /*if (!cmp.eq(Ns8.new,#U5)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x24402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_fp0_jump_nt*/
+ /*p0=cmp.eq(Rs16,#U5); if (!p0.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x10400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_fp0_jump_t*/
+ /*p0=cmp.eq(Rs16,#U5); if (!p0.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x10402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_fp1_jump_nt*/
+ /*p1=cmp.eq(Rs16,#U5); if (!p1.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x12400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_fp1_jump_t*/
+ /*p1=cmp.eq(Rs16,#U5); if (!p1.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x12402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_t_jumpnv_nt*/
+ /*if (cmp.eq(Ns8.new,#U5)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x24000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_t_jumpnv_t*/
+ /*if (cmp.eq(Ns8.new,#U5)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x24002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_tp0_jump_nt*/
+ /*p0=cmp.eq(Rs16,#U5); if (p0.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x10000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_tp0_jump_t*/
+ /*p0=cmp.eq(Rs16,#U5); if (p0.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x10002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_tp1_jump_nt*/
+ /*p1=cmp.eq(Rs16,#U5); if (p1.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x12000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqi_tp1_jump_t*/
+ /*p1=cmp.eq(Rs16,#U5); if (p1.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x12002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_f_jumpnv_nt*/
+ /*if (!cmp.eq(Ns8.new,#-1)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x26400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_f_jumpnv_t*/
+ /*if (!cmp.eq(Ns8.new,#-1)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x26402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_fp0_jump_nt*/
+ /*p0=cmp.eq(Rs16,#-1); if (!p0.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x11c00000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_fp0_jump_t*/
+ /*p0=cmp.eq(Rs16,#-1); if (!p0.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x11c02000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_fp1_jump_nt*/
+ /*p1=cmp.eq(Rs16,#-1); if (!p1.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x13c00000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_fp1_jump_t*/
+ /*p1=cmp.eq(Rs16,#-1); if (!p1.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x13c02000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_t_jumpnv_nt*/
+ /*if (cmp.eq(Ns8.new,#-1)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x26000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_t_jumpnv_t*/
+ /*if (cmp.eq(Ns8.new,#-1)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x26002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_tp0_jump_nt*/
+ /*p0=cmp.eq(Rs16,#-1); if (p0.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x11800000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_tp0_jump_t*/
+ /*p0=cmp.eq(Rs16,#-1); if (p0.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x11802000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_tp1_jump_nt*/
+ /*p1=cmp.eq(Rs16,#-1); if (p1.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x13800000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpeqn1_tp1_jump_t*/
+ /*p1=cmp.eq(Rs16,#-1); if (p1.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x13802000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_f_jumpnv_nt*/
+ /*if (!cmp.gt(Ns8.new,Rt32)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x20c00000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_f_jumpnv_t*/
+ /*if (!cmp.gt(Ns8.new,Rt32)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x20c02000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_fp0_jump_nt*/
+ /*p0=cmp.gt(Rs16,Rt16); if (!p0.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x14c00000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_fp0_jump_t*/
+ /*p0=cmp.gt(Rs16,Rt16); if (!p0.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x14c02000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_fp1_jump_nt*/
+ /*p1=cmp.gt(Rs16,Rt16); if (!p1.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x14c01000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_fp1_jump_t*/
+ /*p1=cmp.gt(Rs16,Rt16); if (!p1.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x14c03000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_t_jumpnv_nt*/
+ /*if (cmp.gt(Ns8.new,Rt32)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x20800000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_t_jumpnv_t*/
+ /*if (cmp.gt(Ns8.new,Rt32)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x20802000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_tp0_jump_nt*/
+ /*p0=cmp.gt(Rs16,Rt16); if (p0.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x14800000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_tp0_jump_t*/
+ /*p0=cmp.gt(Rs16,Rt16); if (p0.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x14802000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_tp1_jump_nt*/
+ /*p1=cmp.gt(Rs16,Rt16); if (p1.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x14801000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgt_tp1_jump_t*/
+ /*p1=cmp.gt(Rs16,Rt16); if (p1.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x14803000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_f_jumpnv_nt*/
+ /*if (!cmp.gt(Ns8.new,#U5)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x24c00000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_f_jumpnv_t*/
+ /*if (!cmp.gt(Ns8.new,#U5)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x24c02000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_fp0_jump_nt*/
+ /*p0=cmp.gt(Rs16,#U5); if (!p0.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x10c00000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_fp0_jump_t*/
+ /*p0=cmp.gt(Rs16,#U5); if (!p0.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x10c02000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_fp1_jump_nt*/
+ /*p1=cmp.gt(Rs16,#U5); if (!p1.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x12c00000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_fp1_jump_t*/
+ /*p1=cmp.gt(Rs16,#U5); if (!p1.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x12c02000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_t_jumpnv_nt*/
+ /*if (cmp.gt(Ns8.new,#U5)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x24800000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_t_jumpnv_t*/
+ /*if (cmp.gt(Ns8.new,#U5)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x24802000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_tp0_jump_nt*/
+ /*p0=cmp.gt(Rs16,#U5); if (p0.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x10800000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_tp0_jump_t*/
+ /*p0=cmp.gt(Rs16,#U5); if (p0.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x10802000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_tp1_jump_nt*/
+ /*p1=cmp.gt(Rs16,#U5); if (p1.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x12800000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgti_tp1_jump_t*/
+ /*p1=cmp.gt(Rs16,#U5); if (p1.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x12802000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_f_jumpnv_nt*/
+ /*if (!cmp.gt(Ns8.new,#-1)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x26c00000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_f_jumpnv_t*/
+ /*if (!cmp.gt(Ns8.new,#-1)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x26c02000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_fp0_jump_nt*/
+ /*p0=cmp.gt(Rs16,#-1); if (!p0.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x11c00100,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_fp0_jump_t*/
+ /*p0=cmp.gt(Rs16,#-1); if (!p0.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x11c02100,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_fp1_jump_nt*/
+ /*p1=cmp.gt(Rs16,#-1); if (!p1.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x13c00100,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_fp1_jump_t*/
+ /*p1=cmp.gt(Rs16,#-1); if (!p1.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x13c02100,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_t_jumpnv_nt*/
+ /*if (cmp.gt(Ns8.new,#-1)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x26800000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_t_jumpnv_t*/
+ /*if (cmp.gt(Ns8.new,#-1)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x26802000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_tp0_jump_nt*/
+ /*p0=cmp.gt(Rs16,#-1); if (p0.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x11800100,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_tp0_jump_t*/
+ /*p0=cmp.gt(Rs16,#-1); if (p0.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x11802100,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_tp1_jump_nt*/
+ /*p1=cmp.gt(Rs16,#-1); if (p1.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x13800100,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtn1_tp1_jump_t*/
+ /*p1=cmp.gt(Rs16,#-1); if (p1.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x13802100,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_f_jumpnv_nt*/
+ /*if (!cmp.gtu(Ns8.new,Rt32)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x21400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_f_jumpnv_t*/
+ /*if (!cmp.gtu(Ns8.new,Rt32)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x21402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_fp0_jump_nt*/
+ /*p0=cmp.gtu(Rs16,Rt16); if (!p0.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x15400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_fp0_jump_t*/
+ /*p0=cmp.gtu(Rs16,Rt16); if (!p0.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x15402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_fp1_jump_nt*/
+ /*p1=cmp.gtu(Rs16,Rt16); if (!p1.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x15401000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_fp1_jump_t*/
+ /*p1=cmp.gtu(Rs16,Rt16); if (!p1.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x15403000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_t_jumpnv_nt*/
+ /*if (cmp.gtu(Ns8.new,Rt32)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x21000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_t_jumpnv_t*/
+ /*if (cmp.gtu(Ns8.new,Rt32)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x21002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_tp0_jump_nt*/
+ /*p0=cmp.gtu(Rs16,Rt16); if (p0.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x15000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_tp0_jump_t*/
+ /*p0=cmp.gtu(Rs16,Rt16); if (p0.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x15002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_tp1_jump_nt*/
+ /*p1=cmp.gtu(Rs16,Rt16); if (p1.new) jump:nt #r9:2*/
+ 0xffc03000,
+ 0x15001000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtu_tp1_jump_t*/
+ /*p1=cmp.gtu(Rs16,Rt16); if (p1.new) jump:t #r9:2*/
+ 0xffc03000,
+ 0x15003000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_f_jumpnv_nt*/
+ /*if (!cmp.gtu(Ns8.new,#U5)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x25400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_f_jumpnv_t*/
+ /*if (!cmp.gtu(Ns8.new,#U5)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x25402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_fp0_jump_nt*/
+ /*p0=cmp.gtu(Rs16,#U5); if (!p0.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x11400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_fp0_jump_t*/
+ /*p0=cmp.gtu(Rs16,#U5); if (!p0.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x11402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_fp1_jump_nt*/
+ /*p1=cmp.gtu(Rs16,#U5); if (!p1.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x13400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_fp1_jump_t*/
+ /*p1=cmp.gtu(Rs16,#U5); if (!p1.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x13402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_t_jumpnv_nt*/
+ /*if (cmp.gtu(Ns8.new,#U5)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x25000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_t_jumpnv_t*/
+ /*if (cmp.gtu(Ns8.new,#U5)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x25002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_tp0_jump_nt*/
+ /*p0=cmp.gtu(Rs16,#U5); if (p0.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x11000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_tp0_jump_t*/
+ /*p0=cmp.gtu(Rs16,#U5); if (p0.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x11002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_tp1_jump_nt*/
+ /*p1=cmp.gtu(Rs16,#U5); if (p1.new) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x13000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpgtui_tp1_jump_t*/
+ /*p1=cmp.gtu(Rs16,#U5); if (p1.new) jump:t #r9:2*/
+ 0xffc02000,
+ 0x13002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmplt_f_jumpnv_nt*/
+ /*if (!cmp.gt(Rt32,Ns8.new)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x21c00000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmplt_f_jumpnv_t*/
+ /*if (!cmp.gt(Rt32,Ns8.new)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x21c02000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmplt_t_jumpnv_nt*/
+ /*if (cmp.gt(Rt32,Ns8.new)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x21800000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmplt_t_jumpnv_t*/
+ /*if (cmp.gt(Rt32,Ns8.new)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x21802000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpltu_f_jumpnv_nt*/
+ /*if (!cmp.gtu(Rt32,Ns8.new)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x22400000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpltu_f_jumpnv_t*/
+ /*if (!cmp.gtu(Rt32,Ns8.new)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x22402000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpltu_t_jumpnv_nt*/
+ /*if (cmp.gtu(Rt32,Ns8.new)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x22000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_cmpltu_t_jumpnv_t*/
+ /*if (cmp.gtu(Rt32,Ns8.new)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x22002000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_jumpseti*/
+ /*Rd16=#U6 ; jump #r9:2*/
+ 0xff000000,
+ 0x16000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_jumpsetr*/
+ /*Rd16=Rs16 ; jump #r9:2*/
+ 0xff000000,
+ 0x17000000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_f_jumpnv_nt*/
+ /*if (!tstbit(Ns8.new,#0)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x25c00000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_f_jumpnv_t*/
+ /*if (!tstbit(Ns8.new,#0)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x25c02000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_fp0_jump_nt*/
+ /*p0=tstbit(Rs16,#0); if (!p0.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x11c00300,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_fp0_jump_t*/
+ /*p0=tstbit(Rs16,#0); if (!p0.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x11c02300,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_fp1_jump_nt*/
+ /*p1=tstbit(Rs16,#0); if (!p1.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x13c00300,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_fp1_jump_t*/
+ /*p1=tstbit(Rs16,#0); if (!p1.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x13c02300,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_t_jumpnv_nt*/
+ /*if (tstbit(Ns8.new,#0)) jump:nt #r9:2*/
+ 0xffc02000,
+ 0x25800000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_t_jumpnv_t*/
+ /*if (tstbit(Ns8.new,#0)) jump:t #r9:2*/
+ 0xffc02000,
+ 0x25802000,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_tp0_jump_nt*/
+ /*p0=tstbit(Rs16,#0); if (p0.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x11800300,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_tp0_jump_t*/
+ /*p0=tstbit(Rs16,#0); if (p0.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x11802300,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_tp1_jump_nt*/
+ /*p1=tstbit(Rs16,#0); if (p1.new) jump:nt #r9:2*/
+ 0xffc02300,
+ 0x13800300,
+ 0x003000fe,
+ 0 },
+{ /*Tag:J4_tstbit0_tp1_jump_t*/
+ /*p1=tstbit(Rs16,#0); if (p1.new) jump:t #r9:2*/
+ 0xffc02300,
+ 0x13802300,
+ 0x003000fe,
+ 0 },
+{ /*Tag:L2_loadalignb_io*/
+ /*Ryy32=memb_fifo(Rs32+#s11:0)*/
+ 0xf9e00000,
+ 0x90800000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadalignh_io*/
+ /*Ryy32=memh_fifo(Rs32+#s11:1)*/
+ 0xf9e00000,
+ 0x90400000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadbsw2_io*/
+ /*Rd32=membh(Rs32+#s11:1)*/
+ 0xf9e00000,
+ 0x90200000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadbsw4_io*/
+ /*Rdd32=membh(Rs32+#s11:2)*/
+ 0xf9e00000,
+ 0x90e00000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadbzw2_io*/
+ /*Rd32=memubh(Rs32+#s11:1)*/
+ 0xf9e00000,
+ 0x90600000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadbzw4_io*/
+ /*Rdd32=memubh(Rs32+#s11:2)*/
+ 0xf9e00000,
+ 0x90a00000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadrb_io*/
+ /*Rd32=memb(Rs32+#s11:0)*/
+ 0xf9e00000,
+ 0x91000000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadrbgp*/
+ /*Rd32=memb(gp+#u16:0)*/
+ 0xf9e00000,
+ 0x49000000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:L2_loadrd_io*/
+ /*Rdd32=memd(Rs32+#s11:3)*/
+ 0xf9e00000,
+ 0x91c00000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadrdgp*/
+ /*Rdd32=memd(gp+#u16:3)*/
+ 0xf9e00000,
+ 0x49c00000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:L2_loadrh_io*/
+ /*Rd32=memh(Rs32+#s11:1)*/
+ 0xf9e00000,
+ 0x91400000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadrhgp*/
+ /*Rd32=memh(gp+#u16:1)*/
+ 0xf9e00000,
+ 0x49400000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:L2_loadri_io*/
+ /*Rd32=memw(Rs32+#s11:2)*/
+ 0xf9e00000,
+ 0x91800000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadrigp*/
+ /*Rd32=memw(gp+#u16:2)*/
+ 0xf9e00000,
+ 0x49800000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:L2_loadrub_io*/
+ /*Rd32=memub(Rs32+#s11:0)*/
+ 0xf9e00000,
+ 0x91200000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadrubgp*/
+ /*Rd32=memub(gp+#u16:0)*/
+ 0xf9e00000,
+ 0x49200000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:L2_loadruh_io*/
+ /*Rd32=memuh(Rs32+#s11:1)*/
+ 0xf9e00000,
+ 0x91600000,
+ 0x06003fe0,
+ 0 },
+{ /*Tag:L2_loadruhgp*/
+ /*Rd32=memuh(gp+#u16:1)*/
+ 0xf9e00000,
+ 0x49600000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:L2_ploadrbf_io*/
+ /*if (!Pt4) Rd32=memb(Rs32+#u6:0)*/
+ 0xffe02000,
+ 0x45000000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrbfnew_io*/
+ /*if (!Pt4.new) Rd32=memb(Rs32+#u6:0)*/
+ 0xffe02000,
+ 0x47000000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrbt_io*/
+ /*if (Pt4) Rd32=memb(Rs32+#u6:0)*/
+ 0xffe02000,
+ 0x41000000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrbtnew_io*/
+ /*if (Pt4.new) Rd32=memb(Rs32+#u6:0)*/
+ 0xffe02000,
+ 0x43000000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrdf_io*/
+ /*if (!Pt4) Rdd32=memd(Rs32+#u6:3)*/
+ 0xffe02000,
+ 0x45c00000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrdfnew_io*/
+ /*if (!Pt4.new) Rdd32=memd(Rs32+#u6:3)*/
+ 0xffe02000,
+ 0x47c00000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrdt_io*/
+ /*if (Pt4) Rdd32=memd(Rs32+#u6:3)*/
+ 0xffe02000,
+ 0x41c00000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrdtnew_io*/
+ /*if (Pt4.new) Rdd32=memd(Rs32+#u6:3)*/
+ 0xffe02000,
+ 0x43c00000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrhf_io*/
+ /*if (!Pt4) Rd32=memh(Rs32+#u6:1)*/
+ 0xffe02000,
+ 0x45400000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrhfnew_io*/
+ /*if (!Pt4.new) Rd32=memh(Rs32+#u6:1)*/
+ 0xffe02000,
+ 0x47400000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrht_io*/
+ /*if (Pt4) Rd32=memh(Rs32+#u6:1)*/
+ 0xffe02000,
+ 0x41400000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrhtnew_io*/
+ /*if (Pt4.new) Rd32=memh(Rs32+#u6:1)*/
+ 0xffe02000,
+ 0x43400000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrif_io*/
+ /*if (!Pt4) Rd32=memw(Rs32+#u6:2)*/
+ 0xffe02000,
+ 0x45800000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrifnew_io*/
+ /*if (!Pt4.new) Rd32=memw(Rs32+#u6:2)*/
+ 0xffe02000,
+ 0x47800000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrit_io*/
+ /*if (Pt4) Rd32=memw(Rs32+#u6:2)*/
+ 0xffe02000,
+ 0x41800000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadritnew_io*/
+ /*if (Pt4.new) Rd32=memw(Rs32+#u6:2)*/
+ 0xffe02000,
+ 0x43800000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrubf_io*/
+ /*if (!Pt4) Rd32=memub(Rs32+#u6:0)*/
+ 0xffe02000,
+ 0x45200000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrubfnew_io*/
+ /*if (!Pt4.new) Rd32=memub(Rs32+#u6:0)*/
+ 0xffe02000,
+ 0x47200000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrubt_io*/
+ /*if (Pt4) Rd32=memub(Rs32+#u6:0)*/
+ 0xffe02000,
+ 0x41200000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadrubtnew_io*/
+ /*if (Pt4.new) Rd32=memub(Rs32+#u6:0)*/
+ 0xffe02000,
+ 0x43200000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadruhf_io*/
+ /*if (!Pt4) Rd32=memuh(Rs32+#u6:1)*/
+ 0xffe02000,
+ 0x45600000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadruhfnew_io*/
+ /*if (!Pt4.new) Rd32=memuh(Rs32+#u6:1)*/
+ 0xffe02000,
+ 0x47600000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadruht_io*/
+ /*if (Pt4) Rd32=memuh(Rs32+#u6:1)*/
+ 0xffe02000,
+ 0x41600000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L2_ploadruhtnew_io*/
+ /*if (Pt4.new) Rd32=memuh(Rs32+#u6:1)*/
+ 0xffe02000,
+ 0x43600000,
+ 0x000007e0,
+ 0 },
+{ /*Tag:L4_add_memopb_io*/
+ /*memb(Rs32+#u6:0)+=Rt32*/
+ 0xff602060,
+ 0x3e000000,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_add_memoph_io*/
+ /*memh(Rs32+#u6:1)+=Rt32*/
+ 0xff602060,
+ 0x3e200000,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_add_memopw_io*/
+ /*memw(Rs32+#u6:2)+=Rt32*/
+ 0xff602060,
+ 0x3e400000,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_and_memopb_io*/
+ /*memb(Rs32+#u6:0)&=Rt32*/
+ 0xff602060,
+ 0x3e000040,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_and_memoph_io*/
+ /*memh(Rs32+#u6:1)&=Rt32*/
+ 0xff602060,
+ 0x3e200040,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_and_memopw_io*/
+ /*memw(Rs32+#u6:2)&=Rt32*/
+ 0xff602060,
+ 0x3e400040,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_iadd_memopb_io*/
+ /*memb(Rs32+#u6:0)+=#U5*/
+ 0xff602060,
+ 0x3f000000,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_iadd_memoph_io*/
+ /*memh(Rs32+#u6:1)+=#U5*/
+ 0xff602060,
+ 0x3f200000,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_iadd_memopw_io*/
+ /*memw(Rs32+#u6:2)+=#U5*/
+ 0xff602060,
+ 0x3f400000,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_iand_memopb_io*/
+ /*memb(Rs32+#u6:0)=clrbit(#U5)*/
+ 0xff602060,
+ 0x3f000040,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_iand_memoph_io*/
+ /*memh(Rs32+#u6:1)=clrbit(#U5)*/
+ 0xff602060,
+ 0x3f200040,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_iand_memopw_io*/
+ /*memw(Rs32+#u6:2)=clrbit(#U5)*/
+ 0xff602060,
+ 0x3f400040,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_ior_memopb_io*/
+ /*memb(Rs32+#u6:0)=setbit(#U5)*/
+ 0xff602060,
+ 0x3f000060,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_ior_memoph_io*/
+ /*memh(Rs32+#u6:1)=setbit(#U5)*/
+ 0xff602060,
+ 0x3f200060,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_ior_memopw_io*/
+ /*memw(Rs32+#u6:2)=setbit(#U5)*/
+ 0xff602060,
+ 0x3f400060,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_isub_memopb_io*/
+ /*memb(Rs32+#u6:0)-=#U5*/
+ 0xff602060,
+ 0x3f000020,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_isub_memoph_io*/
+ /*memh(Rs32+#u6:1)-=#U5*/
+ 0xff602060,
+ 0x3f200020,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_isub_memopw_io*/
+ /*memw(Rs32+#u6:2)-=#U5*/
+ 0xff602060,
+ 0x3f400020,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_loadalignb_ap*/
+ /*Ryy32=memb_fifo(Re32=#U6)*/
+ 0xffe03000,
+ 0x9a801000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadalignb_ur*/
+ /*Ryy32=memb_fifo(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9c801000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadalignh_ap*/
+ /*Ryy32=memh_fifo(Re32=#U6)*/
+ 0xffe03000,
+ 0x9a401000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadalignh_ur*/
+ /*Ryy32=memh_fifo(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9c401000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadbsw2_ap*/
+ /*Rd32=membh(Re32=#U6)*/
+ 0xffe03000,
+ 0x9a201000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadbsw2_ur*/
+ /*Rd32=membh(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9c201000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadbsw4_ap*/
+ /*Rdd32=membh(Re32=#U6)*/
+ 0xffe03000,
+ 0x9ae01000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadbsw4_ur*/
+ /*Rdd32=membh(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9ce01000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadbzw2_ap*/
+ /*Rd32=memubh(Re32=#U6)*/
+ 0xffe03000,
+ 0x9a601000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadbzw2_ur*/
+ /*Rd32=memubh(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9c601000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadbzw4_ap*/
+ /*Rdd32=memubh(Re32=#U6)*/
+ 0xffe03000,
+ 0x9aa01000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadbzw4_ur*/
+ /*Rdd32=memubh(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9ca01000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadrb_ap*/
+ /*Rd32=memb(Re32=#U6)*/
+ 0xffe03000,
+ 0x9b001000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadrb_ur*/
+ /*Rd32=memb(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9d001000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadrd_ap*/
+ /*Rdd32=memd(Re32=#U6)*/
+ 0xffe03000,
+ 0x9bc01000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadrd_ur*/
+ /*Rdd32=memd(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9dc01000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadrh_ap*/
+ /*Rd32=memh(Re32=#U6)*/
+ 0xffe03000,
+ 0x9b401000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadrh_ur*/
+ /*Rd32=memh(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9d401000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadri_ap*/
+ /*Rd32=memw(Re32=#U6)*/
+ 0xffe03000,
+ 0x9b801000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadri_ur*/
+ /*Rd32=memw(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9d801000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadrub_ap*/
+ /*Rd32=memub(Re32=#U6)*/
+ 0xffe03000,
+ 0x9b201000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadrub_ur*/
+ /*Rd32=memub(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9d201000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadruh_ap*/
+ /*Rd32=memuh(Re32=#U6)*/
+ 0xffe03000,
+ 0x9b601000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_loadruh_ur*/
+ /*Rd32=memuh(Rt32<<#u2+#U6)*/
+ 0xffe01000,
+ 0x9d601000,
+ 0x00000f60,
+ 0 },
+{ /*Tag:L4_or_memopb_io*/
+ /*memb(Rs32+#u6:0)|=Rt32*/
+ 0xff602060,
+ 0x3e000060,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_or_memoph_io*/
+ /*memh(Rs32+#u6:1)|=Rt32*/
+ 0xff602060,
+ 0x3e200060,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_or_memopw_io*/
+ /*memw(Rs32+#u6:2)|=Rt32*/
+ 0xff602060,
+ 0x3e400060,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_ploadrbf_abs*/
+ /*if (!Pt4) Rd32=memb(#u6)*/
+ 0xffe03880,
+ 0x9f002880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrbfnew_abs*/
+ /*if (!Pt4.new) Rd32=memb(#u6)*/
+ 0xffe03880,
+ 0x9f003880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrbt_abs*/
+ /*if (Pt4) Rd32=memb(#u6)*/
+ 0xffe03880,
+ 0x9f002080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrbtnew_abs*/
+ /*if (Pt4.new) Rd32=memb(#u6)*/
+ 0xffe03880,
+ 0x9f003080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrdf_abs*/
+ /*if (!Pt4) Rdd32=memd(#u6)*/
+ 0xffe03880,
+ 0x9fc02880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrdfnew_abs*/
+ /*if (!Pt4.new) Rdd32=memd(#u6)*/
+ 0xffe03880,
+ 0x9fc03880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrdt_abs*/
+ /*if (Pt4) Rdd32=memd(#u6)*/
+ 0xffe03880,
+ 0x9fc02080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrdtnew_abs*/
+ /*if (Pt4.new) Rdd32=memd(#u6)*/
+ 0xffe03880,
+ 0x9fc03080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrhf_abs*/
+ /*if (!Pt4) Rd32=memh(#u6)*/
+ 0xffe03880,
+ 0x9f402880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrhfnew_abs*/
+ /*if (!Pt4.new) Rd32=memh(#u6)*/
+ 0xffe03880,
+ 0x9f403880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrht_abs*/
+ /*if (Pt4) Rd32=memh(#u6)*/
+ 0xffe03880,
+ 0x9f402080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrhtnew_abs*/
+ /*if (Pt4.new) Rd32=memh(#u6)*/
+ 0xffe03880,
+ 0x9f403080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrif_abs*/
+ /*if (!Pt4) Rd32=memw(#u6)*/
+ 0xffe03880,
+ 0x9f802880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrifnew_abs*/
+ /*if (!Pt4.new) Rd32=memw(#u6)*/
+ 0xffe03880,
+ 0x9f803880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrit_abs*/
+ /*if (Pt4) Rd32=memw(#u6)*/
+ 0xffe03880,
+ 0x9f802080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadritnew_abs*/
+ /*if (Pt4.new) Rd32=memw(#u6)*/
+ 0xffe03880,
+ 0x9f803080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrubf_abs*/
+ /*if (!Pt4) Rd32=memub(#u6)*/
+ 0xffe03880,
+ 0x9f202880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrubfnew_abs*/
+ /*if (!Pt4.new) Rd32=memub(#u6)*/
+ 0xffe03880,
+ 0x9f203880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrubt_abs*/
+ /*if (Pt4) Rd32=memub(#u6)*/
+ 0xffe03880,
+ 0x9f202080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadrubtnew_abs*/
+ /*if (Pt4.new) Rd32=memub(#u6)*/
+ 0xffe03880,
+ 0x9f203080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadruhf_abs*/
+ /*if (!Pt4) Rd32=memuh(#u6)*/
+ 0xffe03880,
+ 0x9f602880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadruhfnew_abs*/
+ /*if (!Pt4.new) Rd32=memuh(#u6)*/
+ 0xffe03880,
+ 0x9f603880,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadruht_abs*/
+ /*if (Pt4) Rd32=memuh(#u6)*/
+ 0xffe03880,
+ 0x9f602080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_ploadruhtnew_abs*/
+ /*if (Pt4.new) Rd32=memuh(#u6)*/
+ 0xffe03880,
+ 0x9f603080,
+ 0x001f0100,
+ 0 },
+{ /*Tag:L4_sub_memopb_io*/
+ /*memb(Rs32+#u6:0)-=Rt32*/
+ 0xff602060,
+ 0x3e000020,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_sub_memoph_io*/
+ /*memh(Rs32+#u6:1)-=Rt32*/
+ 0xff602060,
+ 0x3e200020,
+ 0x00001f80,
+ 0 },
+{ /*Tag:L4_sub_memopw_io*/
+ /*memw(Rs32+#u6:2)-=Rt32*/
+ 0xff602060,
+ 0x3e400020,
+ 0x00001f80,
+ 0 },
+{ /*Tag:M2_accii*/
+ /*Rx32+=add(Rs32,#s8)*/
+ 0xff802000,
+ 0xe2000000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:M2_macsin*/
+ /*Rx32-=mpyi(Rs32,#u8)*/
+ 0xff802000,
+ 0xe1800000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:M2_macsip*/
+ /*Rx32+=mpyi(Rs32,#u8)*/
+ 0xff802000,
+ 0xe1000000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:M2_mpysip*/
+ /*Rd32=+mpyi(Rs32,#u8)*/
+ 0xff802000,
+ 0xe0000000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:M2_naccii*/
+ /*Rx32-=add(Rs32,#s8)*/
+ 0xff802000,
+ 0xe2800000,
+ 0x00001fe0,
+ 0 },
+{ /*Tag:M4_mpyri_addi*/
+ /*Rd32=add(#u6,mpyi(Rs32,#U6))*/
+ 0xff000000,
+ 0xd8000000,
+ 0x006020e0,
+ 0 },
+{ /*Tag:M4_mpyri_addr*/
+ /*Rd32=add(Ru32,mpyi(Rs32,#u6))*/
+ 0xff800000,
+ 0xdf800000,
+ 0x006020e0,
+ 0 },
+{ /*Tag:M4_mpyrr_addi*/
+ /*Rd32=add(#u6,mpyi(Rs32,Rt32))*/
+ 0xff800000,
+ 0xd7000000,
+ 0x006020e0,
+ 0 },
+{ /*Tag:PS_loadrbabs*/
+ /*Rd32=memb(#u16:0)*/
+ 0xf9e00000,
+ 0x49000000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:PS_loadrdabs*/
+ /*Rdd32=memd(#u16:3)*/
+ 0xf9e00000,
+ 0x49c00000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:PS_loadrhabs*/
+ /*Rd32=memh(#u16:1)*/
+ 0xf9e00000,
+ 0x49400000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:PS_loadriabs*/
+ /*Rd32=memw(#u16:2)*/
+ 0xf9e00000,
+ 0x49800000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:PS_loadrubabs*/
+ /*Rd32=memub(#u16:0)*/
+ 0xf9e00000,
+ 0x49200000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:PS_loadruhabs*/
+ /*Rd32=memuh(#u16:1)*/
+ 0xf9e00000,
+ 0x49600000,
+ 0x061f3fe0,
+ 0 },
+{ /*Tag:PS_storerbabs*/
+ /*memb(#u16:0)=Rt32*/
+ 0xf9e00000,
+ 0x48000000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:PS_storerbnewabs*/
+ /*memb(#u16:0)=Nt8.new*/
+ 0xf9e01800,
+ 0x48a00000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:PS_storerdabs*/
+ /*memd(#u16:3)=Rtt32*/
+ 0xf9e00000,
+ 0x48c00000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:PS_storerfabs*/
+ /*memh(#u16:1)=Rt32.h*/
+ 0xf9e00000,
+ 0x48600000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:PS_storerhabs*/
+ /*memh(#u16:1)=Rt32*/
+ 0xf9e00000,
+ 0x48400000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:PS_storerhnewabs*/
+ /*memh(#u16:1)=Nt8.new*/
+ 0xf9e01800,
+ 0x48a00800,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:PS_storeriabs*/
+ /*memw(#u16:2)=Rt32*/
+ 0xf9e00000,
+ 0x48800000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:PS_storerinewabs*/
+ /*memw(#u16:2)=Nt8.new*/
+ 0xf9e01800,
+ 0x48a01000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:S2_pstorerbf_io*/
+ /*if (!Pv4) memb(Rs32+#u6:0)=Rt32*/
+ 0xffe00004,
+ 0x44000000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerbnewf_io*/
+ /*if (!Pv4) memb(Rs32+#u6:0)=Nt8.new*/
+ 0xffe01804,
+ 0x44a00000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerbnewt_io*/
+ /*if (Pv4) memb(Rs32+#u6:0)=Nt8.new*/
+ 0xffe01804,
+ 0x40a00000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerbt_io*/
+ /*if (Pv4) memb(Rs32+#u6:0)=Rt32*/
+ 0xffe00004,
+ 0x40000000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerdf_io*/
+ /*if (!Pv4) memd(Rs32+#u6:3)=Rtt32*/
+ 0xffe00004,
+ 0x44c00000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerdt_io*/
+ /*if (Pv4) memd(Rs32+#u6:3)=Rtt32*/
+ 0xffe00004,
+ 0x40c00000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerff_io*/
+ /*if (!Pv4) memh(Rs32+#u6:1)=Rt32.h*/
+ 0xffe00004,
+ 0x44600000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerft_io*/
+ /*if (Pv4) memh(Rs32+#u6:1)=Rt32.h*/
+ 0xffe00004,
+ 0x40600000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerhf_io*/
+ /*if (!Pv4) memh(Rs32+#u6:1)=Rt32*/
+ 0xffe00004,
+ 0x44400000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerhnewf_io*/
+ /*if (!Pv4) memh(Rs32+#u6:1)=Nt8.new*/
+ 0xffe01804,
+ 0x44a00800,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerhnewt_io*/
+ /*if (Pv4) memh(Rs32+#u6:1)=Nt8.new*/
+ 0xffe01804,
+ 0x40a00800,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerht_io*/
+ /*if (Pv4) memh(Rs32+#u6:1)=Rt32*/
+ 0xffe00004,
+ 0x40400000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerif_io*/
+ /*if (!Pv4) memw(Rs32+#u6:2)=Rt32*/
+ 0xffe00004,
+ 0x44800000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerinewf_io*/
+ /*if (!Pv4) memw(Rs32+#u6:2)=Nt8.new*/
+ 0xffe01804,
+ 0x44a01000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerinewt_io*/
+ /*if (Pv4) memw(Rs32+#u6:2)=Nt8.new*/
+ 0xffe01804,
+ 0x40a01000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_pstorerit_io*/
+ /*if (Pv4) memw(Rs32+#u6:2)=Rt32*/
+ 0xffe00004,
+ 0x40800000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S2_storerb_io*/
+ /*memb(Rs32+#s11:0)=Rt32*/
+ 0xf9e00000,
+ 0xa1000000,
+ 0x060020ff,
+ 0 },
+{ /*Tag:S2_storerbgp*/
+ /*memb(gp+#u16:0)=Rt32*/
+ 0xf9e00000,
+ 0x48000000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:S2_storerbnew_io*/
+ /*memb(Rs32+#s11:0)=Nt8.new*/
+ 0xf9e01800,
+ 0xa1a00000,
+ 0x060020ff,
+ 0 },
+{ /*Tag:S2_storerbnewgp*/
+ /*memb(gp+#u16:0)=Nt8.new*/
+ 0xf9e01800,
+ 0x48a00000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:S2_storerd_io*/
+ /*memd(Rs32+#s11:3)=Rtt32*/
+ 0xf9e00000,
+ 0xa1c00000,
+ 0x060020ff,
+ 0 },
+{ /*Tag:S2_storerdgp*/
+ /*memd(gp+#u16:3)=Rtt32*/
+ 0xf9e00000,
+ 0x48c00000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:S2_storerf_io*/
+ /*memh(Rs32+#s11:1)=Rt32.h*/
+ 0xf9e00000,
+ 0xa1600000,
+ 0x060020ff,
+ 0 },
+{ /*Tag:S2_storerfgp*/
+ /*memh(gp+#u16:1)=Rt32.h*/
+ 0xf9e00000,
+ 0x48600000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:S2_storerh_io*/
+ /*memh(Rs32+#s11:1)=Rt32*/
+ 0xf9e00000,
+ 0xa1400000,
+ 0x060020ff,
+ 0 },
+{ /*Tag:S2_storerhgp*/
+ /*memh(gp+#u16:1)=Rt32*/
+ 0xf9e00000,
+ 0x48400000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:S2_storerhnew_io*/
+ /*memh(Rs32+#s11:1)=Nt8.new*/
+ 0xf9e01800,
+ 0xa1a00800,
+ 0x060020ff,
+ 0 },
+{ /*Tag:S2_storerhnewgp*/
+ /*memh(gp+#u16:1)=Nt8.new*/
+ 0xf9e01800,
+ 0x48a00800,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:S2_storeri_io*/
+ /*memw(Rs32+#s11:2)=Rt32*/
+ 0xf9e00000,
+ 0xa1800000,
+ 0x060020ff,
+ 0 },
+{ /*Tag:S2_storerigp*/
+ /*memw(gp+#u16:2)=Rt32*/
+ 0xf9e00000,
+ 0x48800000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:S2_storerinew_io*/
+ /*memw(Rs32+#s11:2)=Nt8.new*/
+ 0xf9e01800,
+ 0xa1a01000,
+ 0x060020ff,
+ 0 },
+{ /*Tag:S2_storerinewgp*/
+ /*memw(gp+#u16:2)=Nt8.new*/
+ 0xf9e01800,
+ 0x48a01000,
+ 0x061f20ff,
+ 0 },
+{ /*Tag:S4_addaddi*/
+ /*Rd32=add(Rs32,add(Ru32,#s6))*/
+ 0xff800000,
+ 0xdb000000,
+ 0x006020e0,
+ 0 },
+{ /*Tag:S4_addi_asl_ri*/
+ /*Rx32=add(#u8,asl(Rx32,#U5))*/
+ 0xff000016,
+ 0xde000004,
+ 0x00e020e8,
+ 0 },
+{ /*Tag:S4_addi_lsr_ri*/
+ /*Rx32=add(#u8,lsr(Rx32,#U5))*/
+ 0xff000016,
+ 0xde000014,
+ 0x00e020e8,
+ 0 },
+{ /*Tag:S4_andi_asl_ri*/
+ /*Rx32=and(#u8,asl(Rx32,#U5))*/
+ 0xff000016,
+ 0xde000000,
+ 0x00e020e8,
+ 0 },
+{ /*Tag:S4_andi_lsr_ri*/
+ /*Rx32=and(#u8,lsr(Rx32,#U5))*/
+ 0xff000016,
+ 0xde000010,
+ 0x00e020e8,
+ 0 },
+{ /*Tag:S4_or_andi*/
+ /*Rx32|=and(Rs32,#s10)*/
+ 0xffc00000,
+ 0xda000000,
+ 0x00203fe0,
+ 0 },
+{ /*Tag:S4_or_andix*/
+ /*Rx32=or(Ru32,and(Rx32,#s10))*/
+ 0xffc00000,
+ 0xda400000,
+ 0x00203fe0,
+ 0 },
+{ /*Tag:S4_or_ori*/
+ /*Rx32|=or(Rs32,#s10)*/
+ 0xffc00000,
+ 0xda800000,
+ 0x00203fe0,
+ 0 },
+{ /*Tag:S4_ori_asl_ri*/
+ /*Rx32=or(#u8,asl(Rx32,#U5))*/
+ 0xff000016,
+ 0xde000002,
+ 0x00e020e8,
+ 0 },
+{ /*Tag:S4_ori_lsr_ri*/
+ /*Rx32=or(#u8,lsr(Rx32,#U5))*/
+ 0xff000016,
+ 0xde000012,
+ 0x00e020e8,
+ 0 },
+{ /*Tag:S4_pstorerbf_abs*/
+ /*if (!Pv4) memb(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf000084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerbfnew_abs*/
+ /*if (!Pv4.new) memb(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf002084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerbfnew_io*/
+ /*if (!Pv4.new) memb(Rs32+#u6:0)=Rt32*/
+ 0xffe00004,
+ 0x46000000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerbnewf_abs*/
+ /*if (!Pv4) memb(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa00084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerbnewfnew_abs*/
+ /*if (!Pv4.new) memb(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa02084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerbnewfnew_io*/
+ /*if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new*/
+ 0xffe01804,
+ 0x46a00000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerbnewt_abs*/
+ /*if (Pv4) memb(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa00080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerbnewtnew_abs*/
+ /*if (Pv4.new) memb(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa02080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerbnewtnew_io*/
+ /*if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new*/
+ 0xffe01804,
+ 0x42a00000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerbt_abs*/
+ /*if (Pv4) memb(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf000080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerbtnew_abs*/
+ /*if (Pv4.new) memb(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf002080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerbtnew_io*/
+ /*if (Pv4.new) memb(Rs32+#u6:0)=Rt32*/
+ 0xffe00004,
+ 0x42000000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerdf_abs*/
+ /*if (!Pv4) memd(#u6)=Rtt32*/
+ 0xffe02084,
+ 0xafc00084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerdfnew_abs*/
+ /*if (!Pv4.new) memd(#u6)=Rtt32*/
+ 0xffe02084,
+ 0xafc02084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerdfnew_io*/
+ /*if (!Pv4.new) memd(Rs32+#u6:3)=Rtt32*/
+ 0xffe00004,
+ 0x46c00000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerdt_abs*/
+ /*if (Pv4) memd(#u6)=Rtt32*/
+ 0xffe02084,
+ 0xafc00080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerdtnew_abs*/
+ /*if (Pv4.new) memd(#u6)=Rtt32*/
+ 0xffe02084,
+ 0xafc02080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerdtnew_io*/
+ /*if (Pv4.new) memd(Rs32+#u6:3)=Rtt32*/
+ 0xffe00004,
+ 0x42c00000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerff_abs*/
+ /*if (!Pv4) memh(#u6)=Rt32.h*/
+ 0xffe02084,
+ 0xaf600084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerffnew_abs*/
+ /*if (!Pv4.new) memh(#u6)=Rt32.h*/
+ 0xffe02084,
+ 0xaf602084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerffnew_io*/
+ /*if (!Pv4.new) memh(Rs32+#u6:1)=Rt32.h*/
+ 0xffe00004,
+ 0x46600000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerft_abs*/
+ /*if (Pv4) memh(#u6)=Rt32.h*/
+ 0xffe02084,
+ 0xaf600080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerftnew_abs*/
+ /*if (Pv4.new) memh(#u6)=Rt32.h*/
+ 0xffe02084,
+ 0xaf602080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerftnew_io*/
+ /*if (Pv4.new) memh(Rs32+#u6:1)=Rt32.h*/
+ 0xffe00004,
+ 0x42600000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerhf_abs*/
+ /*if (!Pv4) memh(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf400084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerhfnew_abs*/
+ /*if (!Pv4.new) memh(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf402084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerhfnew_io*/
+ /*if (!Pv4.new) memh(Rs32+#u6:1)=Rt32*/
+ 0xffe00004,
+ 0x46400000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerhnewf_abs*/
+ /*if (!Pv4) memh(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa00884,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerhnewfnew_abs*/
+ /*if (!Pv4.new) memh(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa02884,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerhnewfnew_io*/
+ /*if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new*/
+ 0xffe01804,
+ 0x46a00800,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerhnewt_abs*/
+ /*if (Pv4) memh(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa00880,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerhnewtnew_abs*/
+ /*if (Pv4.new) memh(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa02880,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerhnewtnew_io*/
+ /*if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new*/
+ 0xffe01804,
+ 0x42a00800,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerht_abs*/
+ /*if (Pv4) memh(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf400080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerhtnew_abs*/
+ /*if (Pv4.new) memh(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf402080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerhtnew_io*/
+ /*if (Pv4.new) memh(Rs32+#u6:1)=Rt32*/
+ 0xffe00004,
+ 0x42400000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerif_abs*/
+ /*if (!Pv4) memw(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf800084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerifnew_abs*/
+ /*if (!Pv4.new) memw(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf802084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerifnew_io*/
+ /*if (!Pv4.new) memw(Rs32+#u6:2)=Rt32*/
+ 0xffe00004,
+ 0x46800000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerinewf_abs*/
+ /*if (!Pv4) memw(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa01084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerinewfnew_abs*/
+ /*if (!Pv4.new) memw(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa03084,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerinewfnew_io*/
+ /*if (!Pv4.new) memw(Rs32+#u6:2)=Nt8.new*/
+ 0xffe01804,
+ 0x46a01000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerinewt_abs*/
+ /*if (Pv4) memw(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa01080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerinewtnew_abs*/
+ /*if (Pv4.new) memw(#u6)=Nt8.new*/
+ 0xffe03884,
+ 0xafa03080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstorerinewtnew_io*/
+ /*if (Pv4.new) memw(Rs32+#u6:2)=Nt8.new*/
+ 0xffe01804,
+ 0x42a01000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_pstorerit_abs*/
+ /*if (Pv4) memw(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf800080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstoreritnew_abs*/
+ /*if (Pv4.new) memw(#u6)=Rt32*/
+ 0xffe02084,
+ 0xaf802080,
+ 0x00030078,
+ 0 },
+{ /*Tag:S4_pstoreritnew_io*/
+ /*if (Pv4.new) memw(Rs32+#u6:2)=Rt32*/
+ 0xffe00004,
+ 0x42800000,
+ 0x000020f8,
+ 0 },
+{ /*Tag:S4_storeirb_io*/
+ /*memb(Rs32+#u6:0)=#S8*/
+ 0xfe600000,
+ 0x3c000000,
+ 0x0000207f,
+ 0 },
+{ /*Tag:S4_storeirbf_io*/
+ /*if (!Pv4) memb(Rs32+#u6:0)=#S6*/
+ 0xffe00000,
+ 0x38800000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storeirbfnew_io*/
+ /*if (!Pv4.new) memb(Rs32+#u6:0)=#S6*/
+ 0xffe00000,
+ 0x39800000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storeirbt_io*/
+ /*if (Pv4) memb(Rs32+#u6:0)=#S6*/
+ 0xffe00000,
+ 0x38000000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storeirbtnew_io*/
+ /*if (Pv4.new) memb(Rs32+#u6:0)=#S6*/
+ 0xffe00000,
+ 0x39000000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storeirh_io*/
+ /*memh(Rs32+#u6:1)=#S8*/
+ 0xfe600000,
+ 0x3c200000,
+ 0x0000207f,
+ 0 },
+{ /*Tag:S4_storeirhf_io*/
+ /*if (!Pv4) memh(Rs32+#u6:1)=#S6*/
+ 0xffe00000,
+ 0x38a00000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storeirhfnew_io*/
+ /*if (!Pv4.new) memh(Rs32+#u6:1)=#S6*/
+ 0xffe00000,
+ 0x39a00000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storeirht_io*/
+ /*if (Pv4) memh(Rs32+#u6:1)=#S6*/
+ 0xffe00000,
+ 0x38200000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storeirhtnew_io*/
+ /*if (Pv4.new) memh(Rs32+#u6:1)=#S6*/
+ 0xffe00000,
+ 0x39200000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storeiri_io*/
+ /*memw(Rs32+#u6:2)=#S8*/
+ 0xfe600000,
+ 0x3c400000,
+ 0x0000207f,
+ 0 },
+{ /*Tag:S4_storeirif_io*/
+ /*if (!Pv4) memw(Rs32+#u6:2)=#S6*/
+ 0xffe00000,
+ 0x38c00000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storeirifnew_io*/
+ /*if (!Pv4.new) memw(Rs32+#u6:2)=#S6*/
+ 0xffe00000,
+ 0x39c00000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storeirit_io*/
+ /*if (Pv4) memw(Rs32+#u6:2)=#S6*/
+ 0xffe00000,
+ 0x38400000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storeiritnew_io*/
+ /*if (Pv4.new) memw(Rs32+#u6:2)=#S6*/
+ 0xffe00000,
+ 0x39400000,
+ 0x0000201f,
+ 0 },
+{ /*Tag:S4_storerb_ap*/
+ /*memb(Re32=#U6)=Rt32*/
+ 0xffe02080,
+ 0xab000080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerb_ur*/
+ /*memb(Ru32<<#u2+#U6)=Rt32*/
+ 0xffe00080,
+ 0xad000080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerbnew_ap*/
+ /*memb(Re32=#U6)=Nt8.new*/
+ 0xffe03880,
+ 0xaba00080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerbnew_ur*/
+ /*memb(Ru32<<#u2+#U6)=Nt8.new*/
+ 0xffe01880,
+ 0xada00080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerd_ap*/
+ /*memd(Re32=#U6)=Rtt32*/
+ 0xffe02080,
+ 0xabc00080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerd_ur*/
+ /*memd(Ru32<<#u2+#U6)=Rtt32*/
+ 0xffe00080,
+ 0xadc00080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerf_ap*/
+ /*memh(Re32=#U6)=Rt32.h*/
+ 0xffe02080,
+ 0xab600080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerf_ur*/
+ /*memh(Ru32<<#u2+#U6)=Rt32.h*/
+ 0xffe00080,
+ 0xad600080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerh_ap*/
+ /*memh(Re32=#U6)=Rt32*/
+ 0xffe02080,
+ 0xab400080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerh_ur*/
+ /*memh(Ru32<<#u2+#U6)=Rt32*/
+ 0xffe00080,
+ 0xad400080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerhnew_ap*/
+ /*memh(Re32=#U6)=Nt8.new*/
+ 0xffe03880,
+ 0xaba00880,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerhnew_ur*/
+ /*memh(Ru32<<#u2+#U6)=Nt8.new*/
+ 0xffe01880,
+ 0xada00880,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storeri_ap*/
+ /*memw(Re32=#U6)=Rt32*/
+ 0xffe02080,
+ 0xab800080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storeri_ur*/
+ /*memw(Ru32<<#u2+#U6)=Rt32*/
+ 0xffe00080,
+ 0xad800080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerinew_ap*/
+ /*memw(Re32=#U6)=Nt8.new*/
+ 0xffe03880,
+ 0xaba01080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_storerinew_ur*/
+ /*memw(Ru32<<#u2+#U6)=Nt8.new*/
+ 0xffe01880,
+ 0xada01080,
+ 0x0000003f,
+ 0 },
+{ /*Tag:S4_subaddi*/
+ /*Rd32=add(Rs32,sub(#s6,Ru32))*/
+ 0xff800000,
+ 0xdb800000,
+ 0x006020e0,
+ 0 },
+{ /*Tag:S4_subi_asl_ri*/
+ /*Rx32=sub(#u8,asl(Rx32,#U5))*/
+ 0xff000016,
+ 0xde000006,
+ 0x00e020e8,
+ 0 },
+{ /*Tag:S4_subi_lsr_ri*/
+ /*Rx32=sub(#u8,lsr(Rx32,#U5))*/
+ 0xff000016,
+ 0xde000016,
+ 0x00e020e8,
+ 0 },
+{ /*Tag:SA1_addi*/
+ /*Rx16=add(Rx16,#s7)*/
+ 0xf8002000,
+ 0x20002000,
+ 0x07f00000,
+ 1 },
+{ /*Tag:SA1_addi*/
+ /*Rx16=add(Rx16,#s7)*/
+ 0xf8002000,
+ 0x40000000,
+ 0x07f00000,
+ 1 },
+{ /*Tag:SA1_addi*/
+ /*Rx16=add(Rx16,#s7)*/
+ 0xf8002000,
+ 0x40002000,
+ 0x07f00000,
+ 1 },
+{ /*Tag:SA1_addi*/
+ /*Rx16=add(Rx16,#s7)*/
+ 0xf8002000,
+ 0x60000000,
+ 0x07f00000,
+ 1 },
+{ /*Tag:SA1_addi*/
+ /*Rx16=add(Rx16,#s7)*/
+ 0xf8002000,
+ 0x60002000,
+ 0x07f00000,
+ 1 },
+{ /*Tag:SA1_seti*/
+ /*Rd16=#u6*/
+ 0xfc002000,
+ 0x28002000,
+ 0x03f00000,
+ 1 },
+{ /*Tag:SA1_seti*/
+ /*Rd16=#u6*/
+ 0xfc002000,
+ 0x48000000,
+ 0x03f00000,
+ 1 },
+{ /*Tag:SA1_seti*/
+ /*Rd16=#u6*/
+ 0xfc002000,
+ 0x48002000,
+ 0x03f00000,
+ 1 },
+{ /*Tag:SA1_seti*/
+ /*Rd16=#u6*/
+ 0xfc002000,
+ 0x68000000,
+ 0x03f00000,
+ 1 },
+{ /*Tag:SA1_seti*/
+ /*Rd16=#u6*/
+ 0xfc002000,
+ 0x68002000,
+ 0x03f00000,
+ 1 },
+{ /*Tag:dup_A2_addi*/
+ /*Rd32=add(Rs32,#s16)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_A2_andir*/
+ /*Rd32=and(Rs32,#s10)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_A2_combineii*/
+ /*Rdd32=combine(#s8,#S8)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_A2_tfrsi*/
+ /*Rd32=#s16*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_A4_combineii*/
+ /*Rdd32=combine(#s8,#U6)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00002404,
+ 0 },
+{ /*Tag:dup_A4_combineir*/
+ /*Rdd32=combine(#s8,Rs32)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_A4_combineri*/
+ /*Rdd32=combine(Rs32,#s8)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_C2_cmoveif*/
+ /*if (!Pu4) Rd32=#s12*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_C2_cmoveit*/
+ /*if (Pu4) Rd32=#s12*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_C2_cmovenewif*/
+ /*if (!Pu4.new) Rd32=#s12*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_C2_cmovenewit*/
+ /*if (Pu4.new) Rd32=#s12*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_C2_cmpeqi*/
+ /*Pd4=cmp.eq(Rs32,#s10)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_L2_loadrb_io*/
+ /*Rd32=memb(Rs32+#s11:0)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_L2_loadrd_io*/
+ /*Rdd32=memd(Rs32+#s11:3)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_L2_loadrh_io*/
+ /*Rd32=memh(Rs32+#s11:1)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_L2_loadri_io*/
+ /*Rd32=memw(Rs32+#s11:2)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_L2_loadrub_io*/
+ /*Rd32=memub(Rs32+#s11:0)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_L2_loadruh_io*/
+ /*Rd32=memuh(Rs32+#s11:1)*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_S2_storerb_io*/
+ /*memb(Rs32+#s11:0)=Rt32*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_S2_storerd_io*/
+ /*memd(Rs32+#s11:3)=Rtt32*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_S2_storerh_io*/
+ /*memh(Rs32+#s11:1)=Rt32*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_S2_storeri_io*/
+ /*memw(Rs32+#s11:2)=Rt32*/
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0 },
+{ /*Tag:dup_S4_storeirb_io*/
+ /*memb(Rs32+#u6:0)=#S8*/
+ 0x00000000,
+ 0x00000000,
+ 0x00002404,
+ 0 },
+{ /*Tag:dup_S4_storeiri_io*/
+ /*memw(Rs32+#u6:2)=#S8*/
+ 0x00000000,
+ 0x00000000,
+ 0x00002404,
+ 0 }
+};
+// clang-format off
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPMASK_H
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepOperands.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepOperands.td
index 8a94d96522cc..6ef668d30764 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepOperands.td
@@ -5,7 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
multiclass ImmOpPred<code pred, ValueType vt = i32> {
@@ -13,120 +13,120 @@ multiclass ImmOpPred<code pred, ValueType vt = i32> {
def _timm : PatLeaf<(vt timm), pred>;
}
-def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; }
-defm s4_0ImmPred : ImmOpPred<[{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
-def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; }
-defm s29_3ImmPred : ImmOpPred<[{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
-def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; }
-def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u6_0ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
-def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
-def a30_2Imm : Operand<i32> { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
-defm a30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
-def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; }
-def u29_3Imm : Operand<i32> { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u29_3ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>;
+def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; }
+defm s32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand; let DecoderMethod = "s8_0ImmDecoder"; }
defm s8_0ImmPred : ImmOpPred<[{ return isShiftedInt<8, 0>(N->getSExtValue());}]>;
+def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; }
+def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u16_0ImmPred : ImmOpPred<[{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>;
+def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; }
+def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u5_0ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>;
+def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; }
+def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u8_0ImmPred : ImmOpPred<[{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>;
def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; let RenderMethod = "addImmOperands"; }
def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u32_0ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>;
-def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; }
-def u4_2Imm : Operand<i32> { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u4_2ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>;
-def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; }
-def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u3_0ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>;
+def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; }
+def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u26_6ImmPred : ImmOpPred<[{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>;
+def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; }
+def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u7_0ImmPred : ImmOpPred<[{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>;
+def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; }
+def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u6_0ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
+def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; }
+def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u10_0ImmPred : ImmOpPred<[{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>;
+def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def a30_2Imm : Operand<i32> { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
+defm a30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
+def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def b30_2Imm : Operand<OtherVT> { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
+defm b30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
def b15_2ImmOperand : AsmOperandClass { let Name = "b15_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def b15_2Imm : Operand<OtherVT> { let ParserMatchClass = b15_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
defm b15_2ImmPred : ImmOpPred<[{ return isShiftedInt<15, 2>(N->getSExtValue());}]>;
-def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; }
-def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u11_3ImmPred : ImmOpPred<[{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>;
-def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; }
-defm s4_3ImmPred : ImmOpPred<[{ return isShiftedInt<4, 3>(N->getSExtValue());}]>;
-def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; }
-def m32_0Imm : Operand<i32> { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm m32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
-def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; }
-def u3_1Imm : Operand<i32> { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u3_1ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>;
-def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; }
-def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u1_0ImmPred : ImmOpPred<[{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>;
+def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def b13_2Imm : Operand<OtherVT> { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
+defm b13_2ImmPred : ImmOpPred<[{ return isShiftedInt<13, 2>(N->getSExtValue());}]>;
+def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; }
+defm s4_0ImmPred : ImmOpPred<[{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; }
def s31_1Imm : Operand<i32> { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; }
defm s31_1ImmPred : ImmOpPred<[{ return isShiftedInt<32, 1>(N->getSExtValue());}]>;
-def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s3_0Imm : Operand<i32> { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; }
-defm s3_0ImmPred : ImmOpPred<[{ return isShiftedInt<3, 0>(N->getSExtValue());}]>;
+def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; }
+defm s4_1ImmPred : ImmOpPred<[{ return isShiftedInt<4, 1>(N->getSExtValue());}]>;
def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def s30_2Imm : Operand<i32> { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; }
defm s30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
+def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; }
+defm s4_2ImmPred : ImmOpPred<[{ return isShiftedInt<4, 2>(N->getSExtValue());}]>;
+def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; }
+defm s29_3ImmPred : ImmOpPred<[{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
+def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; }
+defm s4_3ImmPred : ImmOpPred<[{ return isShiftedInt<4, 3>(N->getSExtValue());}]>;
+def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; }
+def u29_3Imm : Operand<i32> { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u29_3ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>;
+def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; }
+def u31_1Imm : Operand<i32> { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u31_1ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>;
+def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; }
+def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u30_2ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
+def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; }
+def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u2_0ImmPred : ImmOpPred<[{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>;
+def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; }
+def m32_0Imm : Operand<i32> { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm m32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
+def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; }
+def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u6_2ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>;
+def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; }
+def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u3_0ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>;
+def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; }
+def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u11_3ImmPred : ImmOpPred<[{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>;
def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; let RenderMethod = "addImmOperands"; }
def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u4_0ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>;
def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; }
defm s6_0ImmPred : ImmOpPred<[{ return isShiftedInt<6, 0>(N->getSExtValue());}]>;
-def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; }
-def u5_3Imm : Operand<i32> { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u5_3ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>;
-def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; }
-defm s32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
-def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s6_3Imm : Operand<i32> { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; }
-defm s6_3ImmPred : ImmOpPred<[{ return isShiftedInt<6, 3>(N->getSExtValue());}]>;
-def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; }
-def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u10_0ImmPred : ImmOpPred<[{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>;
-def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; }
-def u31_1Imm : Operand<i32> { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u31_1ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>;
-def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; }
-defm s4_1ImmPred : ImmOpPred<[{ return isShiftedInt<4, 1>(N->getSExtValue());}]>;
-def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; }
-def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u16_0ImmPred : ImmOpPred<[{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>;
def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; let RenderMethod = "addImmOperands"; }
def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u6_1ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>;
+def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; }
+def u4_2Imm : Operand<i32> { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u4_2ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>;
+def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; }
+def u5_3Imm : Operand<i32> { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u5_3ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>;
+def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; }
+def u3_1Imm : Operand<i32> { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u3_1ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>;
def u5_2ImmOperand : AsmOperandClass { let Name = "u5_2Imm"; let RenderMethod = "addImmOperands"; }
def u5_2Imm : Operand<i32> { let ParserMatchClass = u5_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
defm u5_2ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>;
-def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; }
-def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u26_6ImmPred : ImmOpPred<[{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>;
-def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; }
-def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u6_2ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>;
-def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; }
-def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u7_0ImmPred : ImmOpPred<[{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>;
-def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; }
-def b13_2Imm : Operand<OtherVT> { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
-defm b13_2ImmPred : ImmOpPred<[{ return isShiftedInt<13, 2>(N->getSExtValue());}]>;
-def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; }
-def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u5_0ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>;
-def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; }
-def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u2_0ImmPred : ImmOpPred<[{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>;
-def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; }
-defm s4_2ImmPred : ImmOpPred<[{ return isShiftedInt<4, 2>(N->getSExtValue());}]>;
-def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
-def b30_2Imm : Operand<OtherVT> { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
-defm b30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
-def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; }
-def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u8_0ImmPred : ImmOpPred<[{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>;
-def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; }
-def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
-defm u30_2ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
+def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s6_3Imm : Operand<i32> { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; }
+defm s6_3ImmPred : ImmOpPred<[{ return isShiftedInt<6, 3>(N->getSExtValue());}]>;
+def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s3_0Imm : Operand<i32> { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; }
+defm s3_0ImmPred : ImmOpPred<[{ return isShiftedInt<3, 0>(N->getSExtValue());}]>;
+def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; }
+def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+defm u1_0ImmPred : ImmOpPred<[{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
index b6be74f848bb..dba39232433d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
@@ -5,137 +5,147 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
+// Automatically generated file, do not edit!
//===----------------------------------------------------------------------===//
-#ifndef TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H
-#define TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPTIMINGCLASSES_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPTIMINGCLASSES_H
#include "HexagonInstrInfo.h"
namespace llvm {
-inline bool is_TC3x(unsigned SchedClass) {
+inline bool is_TC1(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_05d3a09b:
- case Hexagon::Sched::tc_0d8f5752:
- case Hexagon::Sched::tc_13bfbcf9:
- case Hexagon::Sched::tc_174516e8:
- case Hexagon::Sched::tc_1a2fd869:
- case Hexagon::Sched::tc_1c4528a2:
- case Hexagon::Sched::tc_32779c6f:
- case Hexagon::Sched::tc_5b54b33f:
- case Hexagon::Sched::tc_6b25e783:
- case Hexagon::Sched::tc_76851da1:
- case Hexagon::Sched::tc_9debc299:
- case Hexagon::Sched::tc_a9d88b22:
- case Hexagon::Sched::tc_bafaade3:
- case Hexagon::Sched::tc_bcf98408:
- case Hexagon::Sched::tc_bdceeac1:
- case Hexagon::Sched::tc_c8ce0b5c:
- case Hexagon::Sched::tc_d1aa9eaa:
- case Hexagon::Sched::tc_d773585a:
- case Hexagon::Sched::tc_df3319ed:
+ case Hexagon::Sched::tc_112d30d6:
+ case Hexagon::Sched::tc_151bf368:
+ case Hexagon::Sched::tc_1c2c7a4a:
+ case Hexagon::Sched::tc_1d41f8b7:
+ case Hexagon::Sched::tc_23708a21:
+ case Hexagon::Sched::tc_24f426ab:
+ case Hexagon::Sched::tc_2f573607:
+ case Hexagon::Sched::tc_388f9897:
+ case Hexagon::Sched::tc_3d14a17b:
+ case Hexagon::Sched::tc_3fbf1042:
+ case Hexagon::Sched::tc_407e96f9:
+ case Hexagon::Sched::tc_42ff66ba:
+ case Hexagon::Sched::tc_4a55d03c:
+ case Hexagon::Sched::tc_5502c366:
+ case Hexagon::Sched::tc_55b33fda:
+ case Hexagon::Sched::tc_56a124a7:
+ case Hexagon::Sched::tc_57a55b54:
+ case Hexagon::Sched::tc_59a7822c:
+ case Hexagon::Sched::tc_5b347363:
+ case Hexagon::Sched::tc_5da50c4b:
+ case Hexagon::Sched::tc_60e324ff:
+ case Hexagon::Sched::tc_651cbe02:
+ case Hexagon::Sched::tc_6fc5dbea:
+ case Hexagon::Sched::tc_711c805f:
+ case Hexagon::Sched::tc_713b66bf:
+ case Hexagon::Sched::tc_9124c04f:
+ case Hexagon::Sched::tc_9c52f549:
+ case Hexagon::Sched::tc_9e27f2f9:
+ case Hexagon::Sched::tc_9f6cd987:
+ case Hexagon::Sched::tc_a1297125:
+ case Hexagon::Sched::tc_a7a13fac:
+ case Hexagon::Sched::tc_b837298f:
+ case Hexagon::Sched::tc_c57d9f39:
+ case Hexagon::Sched::tc_d33e5eee:
+ case Hexagon::Sched::tc_decdde8a:
+ case Hexagon::Sched::tc_ed03645c:
+ case Hexagon::Sched::tc_eeda4109:
+ case Hexagon::Sched::tc_ef921005:
+ case Hexagon::Sched::tc_f999c66e:
return true;
default:
return false;
}
}
-inline bool is_TC2early(unsigned SchedClass) {
+inline bool is_TC2(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_b4407292:
- case Hexagon::Sched::tc_fc3999b4:
+ case Hexagon::Sched::tc_01d44cb2:
+ case Hexagon::Sched::tc_0dfac0a7:
+ case Hexagon::Sched::tc_1fcb8495:
+ case Hexagon::Sched::tc_20131976:
+ case Hexagon::Sched::tc_2c13e7f5:
+ case Hexagon::Sched::tc_3edca78f:
+ case Hexagon::Sched::tc_5e4cf0e8:
+ case Hexagon::Sched::tc_65279839:
+ case Hexagon::Sched::tc_7401744f:
+ case Hexagon::Sched::tc_84a7500d:
+ case Hexagon::Sched::tc_8a825db2:
+ case Hexagon::Sched::tc_8b5bd4f5:
+ case Hexagon::Sched::tc_95a33176:
+ case Hexagon::Sched::tc_9b3c0462:
+ case Hexagon::Sched::tc_a08b630b:
+ case Hexagon::Sched::tc_a4e22bbd:
+ case Hexagon::Sched::tc_a7bdb22c:
+ case Hexagon::Sched::tc_bb831a7c:
+ case Hexagon::Sched::tc_c20701f0:
+ case Hexagon::Sched::tc_d3632d88:
+ case Hexagon::Sched::tc_d61dfdc3:
+ case Hexagon::Sched::tc_e3d699e3:
+ case Hexagon::Sched::tc_f098b237:
+ case Hexagon::Sched::tc_f34c1c21:
return true;
default:
return false;
}
}
-inline bool is_TC4x(unsigned SchedClass) {
+inline bool is_TC3x(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_2f7c551d:
- case Hexagon::Sched::tc_2ff964b4:
- case Hexagon::Sched::tc_3a867367:
- case Hexagon::Sched::tc_3b470976:
- case Hexagon::Sched::tc_4560740b:
- case Hexagon::Sched::tc_a58fd5cc:
- case Hexagon::Sched::tc_b8bffe55:
+ case Hexagon::Sched::tc_01e1be3b:
+ case Hexagon::Sched::tc_1248597c:
+ case Hexagon::Sched::tc_197dce51:
+ case Hexagon::Sched::tc_28e55c6f:
+ case Hexagon::Sched::tc_2c3e17fc:
+ case Hexagon::Sched::tc_38382228:
+ case Hexagon::Sched::tc_38e0bae9:
+ case Hexagon::Sched::tc_4abdbdc6:
+ case Hexagon::Sched::tc_503ce0f3:
+ case Hexagon::Sched::tc_556f6577:
+ case Hexagon::Sched::tc_5a4b5e58:
+ case Hexagon::Sched::tc_6ae3426b:
+ case Hexagon::Sched::tc_6d861a95:
+ case Hexagon::Sched::tc_788b1d09:
+ case Hexagon::Sched::tc_7f8ae742:
+ case Hexagon::Sched::tc_9406230a:
+ case Hexagon::Sched::tc_a154b476:
+ case Hexagon::Sched::tc_a38c45dc:
+ case Hexagon::Sched::tc_c21d7447:
+ case Hexagon::Sched::tc_d7718fbe:
+ case Hexagon::Sched::tc_db596beb:
+ case Hexagon::Sched::tc_f0cdeccf:
+ case Hexagon::Sched::tc_fae9dfa5:
return true;
default:
return false;
}
}
-inline bool is_TC2(unsigned SchedClass) {
+inline bool is_TC2early(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_002cb246:
- case Hexagon::Sched::tc_14b5c689:
- case Hexagon::Sched::tc_1c80410a:
- case Hexagon::Sched::tc_4414d8b1:
- case Hexagon::Sched::tc_6132ba3d:
- case Hexagon::Sched::tc_61830035:
- case Hexagon::Sched::tc_679309b8:
- case Hexagon::Sched::tc_703e822c:
- case Hexagon::Sched::tc_779080bf:
- case Hexagon::Sched::tc_784490da:
- case Hexagon::Sched::tc_88b4f13d:
- case Hexagon::Sched::tc_9461ff31:
- case Hexagon::Sched::tc_9e313203:
- case Hexagon::Sched::tc_a813cf9a:
- case Hexagon::Sched::tc_bfec0f01:
- case Hexagon::Sched::tc_cf8126ae:
- case Hexagon::Sched::tc_d08ee0f4:
- case Hexagon::Sched::tc_e4a7f9f0:
- case Hexagon::Sched::tc_f429765c:
- case Hexagon::Sched::tc_f675fee8:
- case Hexagon::Sched::tc_f9058dd7:
+ case Hexagon::Sched::tc_45f9d1be:
+ case Hexagon::Sched::tc_a4ee89db:
return true;
default:
return false;
}
}
-inline bool is_TC1(unsigned SchedClass) {
+inline bool is_TC4x(unsigned SchedClass) {
switch (SchedClass) {
- case Hexagon::Sched::tc_0663f615:
- case Hexagon::Sched::tc_0a705168:
- case Hexagon::Sched::tc_0ae0825c:
- case Hexagon::Sched::tc_1b6f7cec:
- case Hexagon::Sched::tc_1fc97744:
- case Hexagon::Sched::tc_20cdee80:
- case Hexagon::Sched::tc_2332b92e:
- case Hexagon::Sched::tc_2eabeebe:
- case Hexagon::Sched::tc_3d495a39:
- case Hexagon::Sched::tc_4c5ba658:
- case Hexagon::Sched::tc_56336eb0:
- case Hexagon::Sched::tc_56f114f4:
- case Hexagon::Sched::tc_57890846:
- case Hexagon::Sched::tc_5a2711e5:
- case Hexagon::Sched::tc_5b7c0967:
- case Hexagon::Sched::tc_640086b5:
- case Hexagon::Sched::tc_643b4717:
- case Hexagon::Sched::tc_85c9c08f:
- case Hexagon::Sched::tc_85d5d03f:
- case Hexagon::Sched::tc_862b3e70:
- case Hexagon::Sched::tc_946df596:
- case Hexagon::Sched::tc_9c3ecd83:
- case Hexagon::Sched::tc_9fc3dae0:
- case Hexagon::Sched::tc_a1123dda:
- case Hexagon::Sched::tc_a1c00888:
- case Hexagon::Sched::tc_ae53734a:
- case Hexagon::Sched::tc_b31c2e97:
- case Hexagon::Sched::tc_b4b5c03a:
- case Hexagon::Sched::tc_b51dc29a:
- case Hexagon::Sched::tc_cd374165:
- case Hexagon::Sched::tc_cfd8378a:
- case Hexagon::Sched::tc_d5b7b0c1:
- case Hexagon::Sched::tc_d9d43ecb:
- case Hexagon::Sched::tc_db2bce9c:
- case Hexagon::Sched::tc_de4df740:
- case Hexagon::Sched::tc_de554571:
- case Hexagon::Sched::tc_e78647bd:
+ case Hexagon::Sched::tc_02fe1c65:
+ case Hexagon::Sched::tc_0a195f2c:
+ case Hexagon::Sched::tc_7f7f45f5:
+ case Hexagon::Sched::tc_9783714b:
+ case Hexagon::Sched::tc_9e72dc89:
+ case Hexagon::Sched::tc_9edb7c77:
+ case Hexagon::Sched::tc_f0e8e832:
+ case Hexagon::Sched::tc_f7569068:
return true;
default:
return false;
@@ -143,4 +153,4 @@ inline bool is_TC1(unsigned SchedClass) {
}
} // namespace llvm
-#endif \ No newline at end of file
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPTIMINGCLASSES_H
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index d0285a7aa377..a431af17e6d0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -1017,18 +1017,20 @@ void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB,
PredB->removeSuccessor(SuccB);
PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end());
PredB->transferSuccessorsAndUpdatePHIs(SuccB);
+ MachineBasicBlock *OldLayoutSuccessor = SuccB->getNextNode();
removeBlock(SuccB);
if (!TermOk)
- PredB->updateTerminator();
+ PredB->updateTerminator(OldLayoutSuccessor);
}
void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) {
+ MachineBasicBlock *OldLayoutSuccessor = FP.SplitB->getNextNode();
if (FP.TrueB)
removeBlock(FP.TrueB);
if (FP.FalseB)
removeBlock(FP.FalseB);
- FP.SplitB->updateTerminator();
+ FP.SplitB->updateTerminator(OldLayoutSuccessor);
if (FP.SplitB->succ_size() != 1)
return;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index d21de8ccb5ab..97a4b351af66 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -20,7 +20,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/PassSupport.h"
+#include "llvm/Pass.h"
using namespace llvm;
@@ -114,7 +114,7 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
// First pass - compute the offset of each basic block.
for (const MachineBasicBlock &MBB : MF) {
- if (MBB.getAlignment() != Align::None()) {
+ if (MBB.getAlignment() != Align(1)) {
// Although we don't know the exact layout of the final code, we need
// to account for alignment padding somehow. This heuristic pads each
// aligned basic block according to the alignment value.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index aff8e57b0a94..010b7171ce17 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -261,20 +261,20 @@ static unsigned getMax32BitSubRegister(unsigned Reg,
}
/// Returns the callee saved register with the largest id in the vector.
-static unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
+static unsigned getMaxCalleeSavedReg(ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo &TRI) {
- static_assert(Hexagon::R1 > 0,
- "Assume physical registers are encoded as positive integers");
- if (CSI.empty())
- return 0;
-
- unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
- for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
- unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
- if (Reg > Max)
- Max = Reg;
- }
- return Max;
+ static_assert(Hexagon::R1 > 0,
+ "Assume physical registers are encoded as positive integers");
+ if (CSI.empty())
+ return 0;
+
+ unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
+ for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
+ unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
+ if (Reg > Max)
+ Max = Reg;
+ }
+ return Max;
}
/// Checks if the basic block contains any instruction that needs a stack
@@ -395,6 +395,9 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
static unsigned ShrinkCounter = 0;
+ if (MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() &&
+ MF.getFunction().isVarArg())
+ return;
if (ShrinkLimit.getPosition()) {
if (ShrinkCounter >= ShrinkLimit)
return;
@@ -588,7 +591,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
- unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment());
+ Align MaxAlign = std::max(MFI.getMaxAlign(), getStackAlign());
// Calculate the total stack frame size.
// Get the number of bytes to allocate from the FrameInfo.
@@ -600,7 +603,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign);
MFI.setStackSize(FrameSize);
- bool AlignStack = (MaxAlign > getStackAlignment());
+ bool AlignStack = (MaxAlign > getStackAlign());
// Get the number of bytes to allocate from the FrameInfo.
unsigned NumBytes = MFI.getStackSize();
@@ -622,12 +625,124 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
DebugLoc dl = MBB.findDebugLoc(InsertPt);
+ if (MF.getFunction().isVarArg() &&
+ MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {
+ // Calculate the size of register saved area.
+ int NumVarArgRegs = 6 - FirstVarArgSavedReg;
+ int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0)
+ ? NumVarArgRegs * 4
+ : NumVarArgRegs * 4 + 4;
+ if (RegisterSavedAreaSizePlusPadding > 0) {
+ // Decrement the stack pointer by size of register saved area plus
+ // padding if any.
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+ .addReg(SP)
+ .addImm(-RegisterSavedAreaSizePlusPadding)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ int NumBytes = 0;
+ // Copy all the named arguments below register saved area.
+ auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ for (int i = HMFI.getFirstNamedArgFrameIndex(),
+ e = HMFI.getLastNamedArgFrameIndex(); i >= e; --i) {
+ uint64_t ObjSize = MFI.getObjectSize(i);
+ Align ObjAlign = MFI.getObjectAlign(i);
+
+ // Determine the kind of load/store that should be used.
+ unsigned LDOpc, STOpc;
+ uint64_t OpcodeChecker = ObjAlign.value();
+
+ // Handle cases where alignment of an object is > its size.
+ if (ObjAlign > ObjSize) {
+ if (ObjSize <= 1)
+ OpcodeChecker = 1;
+ else if (ObjSize <= 2)
+ OpcodeChecker = 2;
+ else if (ObjSize <= 4)
+ OpcodeChecker = 4;
+ else if (ObjSize > 4)
+ OpcodeChecker = 8;
+ }
+
+ switch (OpcodeChecker) {
+ case 1:
+ LDOpc = Hexagon::L2_loadrb_io;
+ STOpc = Hexagon::S2_storerb_io;
+ break;
+ case 2:
+ LDOpc = Hexagon::L2_loadrh_io;
+ STOpc = Hexagon::S2_storerh_io;
+ break;
+ case 4:
+ LDOpc = Hexagon::L2_loadri_io;
+ STOpc = Hexagon::S2_storeri_io;
+ break;
+ case 8:
+ default:
+ LDOpc = Hexagon::L2_loadrd_io;
+ STOpc = Hexagon::S2_storerd_io;
+ break;
+ }
+
+ unsigned RegUsed = LDOpc == Hexagon::L2_loadrd_io ? Hexagon::D3
+ : Hexagon::R6;
+ int LoadStoreCount = ObjSize / OpcodeChecker;
+
+ if (ObjSize % OpcodeChecker)
+ ++LoadStoreCount;
+
+ // Get the start location of the load. NumBytes is basically the
+ // offset from the stack pointer of previous function, which would be
+ // the caller in this case, as this function has variable argument
+ // list.
+ if (NumBytes != 0)
+ NumBytes = alignTo(NumBytes, ObjAlign);
+
+ int Count = 0;
+ while (Count < LoadStoreCount) {
+ // Load the value of the named argument on stack.
+ BuildMI(MBB, InsertPt, dl, HII.get(LDOpc), RegUsed)
+ .addReg(SP)
+ .addImm(RegisterSavedAreaSizePlusPadding +
+ ObjAlign.value() * Count + NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Store it below the register saved area plus padding.
+ BuildMI(MBB, InsertPt, dl, HII.get(STOpc))
+ .addReg(SP)
+ .addImm(ObjAlign.value() * Count + NumBytes)
+ .addReg(RegUsed)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ Count++;
+ }
+ NumBytes += MFI.getObjectSize(i);
+ }
+
+ // Make NumBytes 8 byte aligned
+ NumBytes = alignTo(NumBytes, 8);
+
+ // If the number of registers having variable arguments is odd,
+ // leave 4 bytes of padding to get to the location where first
+ // variable argument which was passed through register was copied.
+ NumBytes = (NumVarArgRegs % 2 == 0) ? NumBytes : NumBytes + 4;
+
+ for (int j = FirstVarArgSavedReg, i = 0; j < 6; ++j, ++i) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_storeri_io))
+ .addReg(SP)
+ .addImm(NumBytes + 4 * i)
+ .addReg(Hexagon::R0 + j)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+ }
+
if (hasFP(MF)) {
insertAllocframe(MBB, InsertPt, NumBytes);
if (AlignStack) {
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
.addReg(SP)
- .addImm(-int64_t(MaxAlign));
+ .addImm(-int64_t(MaxAlign.value()));
}
// If the stack-checking is enabled, and we spilled the callee-saved
// registers inline (i.e. did not use a spill function), then call
@@ -655,7 +770,16 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
if (!hasFP(MF)) {
MachineFrameInfo &MFI = MF.getFrameInfo();
- if (unsigned NumBytes = MFI.getStackSize()) {
+ unsigned NumBytes = MFI.getStackSize();
+ if (MF.getFunction().isVarArg() &&
+ MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {
+ // On Hexagon Linux, deallocate the stack for the register saved area.
+ int NumVarArgRegs = 6 - FirstVarArgSavedReg;
+ int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?
+ (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);
+ NumBytes += RegisterSavedAreaSizePlusPadding;
+ }
+ if (NumBytes) {
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
.addReg(SP)
.addImm(NumBytes);
@@ -710,24 +834,49 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
NeedsDeallocframe = false;
}
- if (!NeedsDeallocframe)
- return;
- // If the returning instruction is PS_jmpret, replace it with dealloc_return,
- // otherwise just add deallocframe. The function could be returning via a
- // tail call.
- if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
- BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
+ if (!MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() ||
+ !MF.getFunction().isVarArg()) {
+ if (!NeedsDeallocframe)
+ return;
+ // If the returning instruction is PS_jmpret, replace it with
+ // dealloc_return, otherwise just add deallocframe. The function
+ // could be returning via a tail call.
+ if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
.addDef(Hexagon::D15)
.addReg(Hexagon::R30);
- return;
- }
- unsigned NewOpc = Hexagon::L4_return;
- MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))
+ return;
+ }
+ unsigned NewOpc = Hexagon::L4_return;
+ MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))
.addDef(Hexagon::D15)
.addReg(Hexagon::R30);
- // Transfer the function live-out registers.
- NewI->copyImplicitOps(MF, *RetI);
- MBB.erase(RetI);
+ // Transfer the function live-out registers.
+ NewI->copyImplicitOps(MF, *RetI);
+ MBB.erase(RetI);
+ } else {
+ // L2_deallocframe instruction after it.
+ // Calculate the size of register saved area.
+ int NumVarArgRegs = 6 - FirstVarArgSavedReg;
+ int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?
+ (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);
+
+ MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
+ MachineBasicBlock::iterator I = (Term == MBB.begin()) ? MBB.end()
+ : std::prev(Term);
+ if (I == MBB.end() ||
+ (I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT &&
+ I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC &&
+ I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 &&
+ I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC))
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
+ .addDef(Hexagon::D15)
+ .addReg(Hexagon::R30);
+ if (RegisterSavedAreaSizePlusPadding != 0)
+ BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
+ .addReg(SP)
+ .addImm(RegisterSavedAreaSizePlusPadding);
+ }
}
void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
@@ -744,7 +893,7 @@ void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
// Create a dummy memory operand to avoid allocframe from being treated as
// a volatile memory reference.
auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0),
- MachineMemOperand::MOStore, 4, 4);
+ MachineMemOperand::MOStore, 4, Align(4));
DebugLoc dl = MBB.findDebugLoc(InsertPt);
unsigned SP = HRI.getStackRegister();
@@ -907,9 +1056,9 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
// | +-- Old SP (before allocframe)
// +-- New FP (after allocframe)
//
- // MCCFIInstruction::createDefCfa subtracts the offset from the register.
+ // MCCFIInstruction::cfiDefCfa adds the offset from the register.
// MCCFIInstruction::createOffset takes the offset without sign change.
- auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
+ auto DefCfa = MCCFIInstruction::cfiDefCfa(FrameLabel, DwFPReg, 8);
BuildMI(MBB, At, DL, CFID)
.addCFIIndex(MF.addFrameInst(DefCfa));
// R31 (return addr) = CFA - 4
@@ -954,7 +1103,7 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
// Instead, get the offset (relative to the FP) directly.
Offset = MFI.getObjectOffset(F->getFrameIdx());
} else {
- unsigned FrameReg;
+ Register FrameReg;
Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg);
}
// Subtract 8 to make room for R30 and R31, which are added above.
@@ -1108,7 +1257,8 @@ static const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType,
}
int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI, unsigned &FrameReg) const {
+ int FI,
+ Register &FrameReg) const {
auto &MFI = MF.getFrameInfo();
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
@@ -1119,9 +1269,9 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
unsigned FrameSize = MFI.getStackSize();
- unsigned SP = HRI.getStackRegister();
- unsigned FP = HRI.getFrameRegister();
- unsigned AP = HMFI.getStackAlignBasePhysReg();
+ Register SP = HRI.getStackRegister();
+ Register FP = HRI.getFrameRegister();
+ Register AP = HMFI.getStackAlignBasePhysReg();
// It may happen that AP will be absent even HasAlloca && HasExtraAlign
// is true. HasExtraAlign may be set because of vector spills, without
// aligned locals or aligned outgoing function arguments. Since vector
@@ -1358,7 +1508,7 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
// via AP, which may not be available at the particular place in the program.
MachineFrameInfo &MFI = MF.getFrameInfo();
bool HasAlloca = MFI.hasVarSizedObjects();
- bool NeedsAlign = (MFI.getMaxAlignment() > getStackAlignment());
+ bool NeedsAlign = (MFI.getMaxAlign() > getStackAlign());
if (!HasAlloca || !NeedsAlign)
return;
@@ -1371,8 +1521,8 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
unsigned S = MFI.getObjectSize(i);
// Reduce the alignment to at most 8. This will require unaligned vector
// stores if they happen here.
- unsigned A = std::max(MFI.getObjectAlignment(i), 8U);
- MFI.setObjectAlignment(i, 8);
+ Align A = std::max(MFI.getObjectAlign(i), Align(8));
+ MFI.setObjectAlignment(i, Align(8));
LFS = alignTo(LFS+S, A);
MFI.mapLocalFrameObject(i, -static_cast<int64_t>(LFS));
DealignSlots.insert(i);
@@ -1398,12 +1548,11 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
if (auto *FS = dyn_cast_or_null<FixedStackPseudoSourceValue>(PV)) {
int FI = FS->getFrameIndex();
if (DealignSlots.count(FI)) {
- unsigned A = MFI.getObjectAlignment(FI);
- auto *NewMMO = MF.getMachineMemOperand(MMO->getPointerInfo(),
- MMO->getFlags(), MMO->getSize(), A,
- MMO->getAAInfo(), MMO->getRanges(),
- MMO->getSyncScopeID(), MMO->getOrdering(),
- MMO->getFailureOrdering());
+ auto *NewMMO = MF.getMachineMemOperand(
+ MMO->getPointerInfo(), MMO->getFlags(), MMO->getSize(),
+ MFI.getObjectAlign(FI), MMO->getAAInfo(), MMO->getRanges(),
+ MMO->getSyncScopeID(), MMO->getOrdering(),
+ MMO->getFailureOrdering());
new_memops.push_back(NewMMO);
KeepOld = false;
continue;
@@ -1562,9 +1711,8 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
unsigned Size = TRI->getSpillSize(*RC);
int Off = MinOffset - Size;
- unsigned Align = std::min(TRI->getSpillAlignment(*RC), getStackAlignment());
- assert(isPowerOf2_32(Align));
- Off &= -Align;
+ Align Alignment = std::min(TRI->getSpillAlign(*RC), getStackAlign());
+ Off &= -Alignment.value();
int FI = MFI.CreateFixedSpillStackObject(Size, Off);
MinOffset = std::min(MinOffset, Off);
CSI.push_back(CalleeSavedInfo(R, FI));
@@ -1787,11 +1935,11 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
bool NeedsAligna = needsAligna(MF);
unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
- unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
- unsigned HasAlign = MFI.getObjectAlignment(FI);
+ Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
+ Align HasAlign = MFI.getObjectAlign(FI);
unsigned StoreOpc;
- auto UseAligned = [&] (unsigned NeedAlign, unsigned HasAlign) {
+ auto UseAligned = [&](Align NeedAlign, Align HasAlign) {
return !NeedsAligna && (NeedAlign <= HasAlign);
};
@@ -1839,11 +1987,11 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
bool NeedsAligna = needsAligna(MF);
unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
- unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
- unsigned HasAlign = MFI.getObjectAlignment(FI);
+ Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
+ Align HasAlign = MFI.getObjectAlign(FI);
unsigned LoadOpc;
- auto UseAligned = [&] (unsigned NeedAlign, unsigned HasAlign) {
+ auto UseAligned = [&](Align NeedAlign, Align HasAlign) {
return !NeedsAligna && (NeedAlign <= HasAlign);
};
@@ -1883,8 +2031,8 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
bool IsKill = MI->getOperand(2).isKill();
int FI = MI->getOperand(0).getIndex();
- unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
- unsigned HasAlign = MFI.getObjectAlignment(FI);
+ Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
+ Align HasAlign = MFI.getObjectAlign(FI);
bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign);
unsigned StoreOpc = UseAligned ? Hexagon::V6_vS32b_ai
: Hexagon::V6_vS32Ub_ai;
@@ -1913,8 +2061,8 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
Register DstR = MI->getOperand(0).getReg();
int FI = MI->getOperand(1).getIndex();
- unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
- unsigned HasAlign = MFI.getObjectAlignment(FI);
+ Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
+ Align HasAlign = MFI.getObjectAlign(FI);
bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign);
unsigned LoadOpc = UseAligned ? Hexagon::V6_vL32b_ai
: Hexagon::V6_vL32Ub_ai;
@@ -2016,7 +2164,8 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
Num = 2; // Vector predicate spills also need a vector register.
break;
}
- unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC);
+ unsigned S = HRI.getSpillSize(*RC);
+ Align A = HRI.getSpillAlign(*RC);
for (unsigned i = 0; i < Num; i++) {
int NewFI = MFI.CreateSpillStackObject(S, A);
RS->addScavengingFrameIndex(NewFI);
@@ -2473,6 +2622,8 @@ void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
/// checks are performed, which may still lead to the inline code.
bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
const CSIVect &CSI) const {
+ if (MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl())
+ return true;
if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
return true;
if (!hasFP(MF))
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 27265dd53794..87d385e1ce3c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -29,8 +29,10 @@ class TargetRegisterClass;
class HexagonFrameLowering : public TargetFrameLowering {
public:
+ // First register which could possibly hold a variable argument.
+ int FirstVarArgSavedReg;
explicit HexagonFrameLowering()
- : TargetFrameLowering(StackGrowsDown, Align(8), 0, Align::None(), true) {}
+ : TargetFrameLowering(StackGrowsDown, Align(8), 0, Align(1), true) {}
// All of the prolog/epilog functionality, including saving and restoring
// callee-saved registers is handled in emitPrologue. This is to have the
@@ -43,14 +45,17 @@ public:
bool enableCalleeSaveSkip(const MachineFunction &MF) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override {
+ MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override {
return true;
}
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override {
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override {
return true;
}
@@ -78,7 +83,7 @@ public:
}
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
bool hasFP(const MachineFunction &MF) const override;
const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries)
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
index 342ca21525c5..d9307190ae16 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -221,15 +221,16 @@ bool HexagonGenExtract::convert(Instruction *In) {
}
bool HexagonGenExtract::visitBlock(BasicBlock *B) {
+ bool Changed = false;
+
// Depth-first, bottom-up traversal.
for (auto *DTN : children<DomTreeNode*>(DT->getNode(B)))
- visitBlock(DTN->getBlock());
+ Changed |= visitBlock(DTN->getBlock());
// Allow limiting the number of generated extracts for debugging purposes.
bool HasCutoff = ExtractCutoff.getPosition();
unsigned Cutoff = ExtractCutoff;
- bool Changed = false;
BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin();
while (true) {
if (HasCutoff && (ExtractCount >= Cutoff))
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 1cf1500bc832..4833935f8d24 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -467,7 +467,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
if (!PredI->isCompare())
return false;
- unsigned CmpReg1 = 0, CmpReg2 = 0;
+ Register CmpReg1, CmpReg2;
int CmpImm = 0, CmpMask = 0;
bool CmpAnalyzed =
TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
@@ -640,7 +640,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
if (!TB || (FB && TB != Header && FB != Header))
return nullptr;
- // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
+ // Branches of form "if (!P) ..." cause HexagonInstrInfo::analyzeBranch
// to put imm(0), followed by P in the vector Cond.
// If TB is not the header, it means that the "not-taken" path must lead
// to the header.
@@ -651,7 +651,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
MachineInstr *CondI = MRI->getVRegDef(PredReg);
unsigned CondOpc = CondI->getOpcode();
- unsigned CmpReg1 = 0, CmpReg2 = 0;
+ Register CmpReg1, CmpReg2;
int Mask = 0, ImmValue = 0;
bool AnalyzedCmp =
TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
@@ -1455,7 +1455,7 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
E = MRI->use_instr_nodbg_end(); I != E; ++I) {
MachineInstr *MI = &*I;
- unsigned CmpReg1 = 0, CmpReg2 = 0;
+ Register CmpReg1, CmpReg2;
int CmpMask = 0, CmpValue = 0;
if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
@@ -1657,7 +1657,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
MachineBasicBlock *TB = nullptr, *FB = nullptr;
SmallVector<MachineOperand,2> Cond;
- // AnalyzeBranch returns true if it fails to analyze branch.
+ // analyzeBranch returns true if it fails to analyze branch.
bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
if (NotAnalyzed || Cond.empty())
return false;
@@ -1693,7 +1693,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
// Expecting a predicate register as a condition. It won't be a hardware
// predicate register at this point yet, just a vreg.
- // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
+ // HexagonInstrInfo::analyzeBranch for negated branches inserts imm(0)
// into Cond, followed by the predicate register. For non-negated branches
// it's just the register.
unsigned CSz = Cond.size();
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIICScalar.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIICScalar.td
index d37cc3a2cc3e..e9239ab5ad22 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIICScalar.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIICScalar.td
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
// These itinerary class descriptions are based on the instruction timing
-// classes as per V62. Curretnly, they are just extracted from
+// classes as per V62. Currently, they are just extracted from
// HexagonScheduleV62.td but will soon be auto-generated by HexagonGen.py.
class PseudoItin {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 9cf5b257a00a..b4b389a7b956 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -734,8 +734,8 @@ void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
MachineFrameInfo &MFI = MF->getFrameInfo();
const HexagonFrameLowering *HFI = HST->getFrameLowering();
int FX = cast<FrameIndexSDNode>(N)->getIndex();
- unsigned StkA = HFI->getStackAlignment();
- unsigned MaxA = MFI.getMaxAlignment();
+ Align StkA = HFI->getStackAlign();
+ Align MaxA = MFI.getMaxAlign();
SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
SDLoc DL(N);
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
@@ -787,10 +787,18 @@ void HexagonDAGToDAGISel::SelectVAlign(SDNode *N) {
MVT::i64, Ops);
// Shift right by "(Addr & 0x3) * 8" bytes.
+ SDNode *C;
SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32);
SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32);
- SDNode *C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
- M0, N->getOperand(2), M1);
+ if (HST->useCompound()) {
+ C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
+ M0, N->getOperand(2), M1);
+ } else {
+ SDNode *T = CurDAG->getMachineNode(Hexagon::S2_asl_i_r, dl, MVT::i32,
+ N->getOperand(2), M1);
+ C = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
+ SDValue(T, 0), M0);
+ }
SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
SDValue(R, 0), SDValue(C, 0));
SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
@@ -1179,7 +1187,7 @@ void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
Ops[i] = U->getOperand(i);
EVT BVT = Ops[I1N].getValueType();
- SDLoc dl(U);
+ const SDLoc &dl(U);
SDValue C0 = DAG.getConstant(0, dl, BVT);
SDValue C1 = DAG.getConstant(1, dl, BVT);
SDValue If0, If1;
@@ -1197,8 +1205,15 @@ void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
Ops[I1N] = C1;
If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
}
- SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0);
- DAG.ReplaceAllUsesWith(U, Sel.getNode());
+ // We're generating a SELECT way after legalization, so keep the types
+ // simple.
+ unsigned UW = UVT.getSizeInBits();
+ EVT SVT = (UW == 32 || UW == 64) ? MVT::getIntegerVT(UW) : UVT;
+ SDValue Sel = DAG.getNode(ISD::SELECT, dl, SVT, OpI1,
+ DAG.getBitcast(SVT, If1),
+ DAG.getBitcast(SVT, If0));
+ SDValue Ret = DAG.getBitcast(UVT, Sel);
+ DAG.ReplaceAllUsesWith(U, Ret.getNode());
}
}
}
@@ -1260,7 +1275,7 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
}
}
-void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
+void HexagonDAGToDAGISel::emitFunctionEntryCode() {
auto &HST = MF->getSubtarget<HexagonSubtarget>();
auto &HFI = *HST.getFrameLowering();
if (!HFI.needsAligna(*MF))
@@ -1269,9 +1284,9 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
MachineFrameInfo &MFI = MF->getFrameInfo();
MachineBasicBlock *EntryBB = &MF->front();
unsigned AR = FuncInfo->CreateReg(MVT::i32);
- unsigned EntryMaxA = MFI.getMaxAlignment();
+ Align EntryMaxA = MFI.getMaxAlign();
BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR)
- .addImm(EntryMaxA);
+ .addImm(EntryMaxA.value());
MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
}
@@ -1281,7 +1296,7 @@ void HexagonDAGToDAGISel::updateAligna() {
return;
auto *AlignaI = const_cast<MachineInstr*>(HFI.getAlignaInstr(*MF));
assert(AlignaI != nullptr);
- unsigned MaxA = MF->getFrameInfo().getMaxAlignment();
+ unsigned MaxA = MF->getFrameInfo().getMaxAlign().value();
if (AlignaI->getOperand(1).getImm() < MaxA)
AlignaI->getOperand(1).setImm(MaxA);
}
@@ -1300,28 +1315,28 @@ bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) {
}
inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) {
- return SelectGlobalAddress(N, R, false, 0);
+ return SelectGlobalAddress(N, R, false, Align(1));
}
inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) {
- return SelectGlobalAddress(N, R, true, 0);
+ return SelectGlobalAddress(N, R, true, Align(1));
}
inline bool HexagonDAGToDAGISel::SelectAnyImm(SDValue &N, SDValue &R) {
- return SelectAnyImmediate(N, R, 0);
+ return SelectAnyImmediate(N, R, Align(1));
}
inline bool HexagonDAGToDAGISel::SelectAnyImm0(SDValue &N, SDValue &R) {
- return SelectAnyImmediate(N, R, 0);
+ return SelectAnyImmediate(N, R, Align(1));
}
inline bool HexagonDAGToDAGISel::SelectAnyImm1(SDValue &N, SDValue &R) {
- return SelectAnyImmediate(N, R, 1);
+ return SelectAnyImmediate(N, R, Align(2));
}
inline bool HexagonDAGToDAGISel::SelectAnyImm2(SDValue &N, SDValue &R) {
- return SelectAnyImmediate(N, R, 2);
+ return SelectAnyImmediate(N, R, Align(4));
}
inline bool HexagonDAGToDAGISel::SelectAnyImm3(SDValue &N, SDValue &R) {
- return SelectAnyImmediate(N, R, 3);
+ return SelectAnyImmediate(N, R, Align(8));
}
inline bool HexagonDAGToDAGISel::SelectAnyInt(SDValue &N, SDValue &R) {
@@ -1333,17 +1348,13 @@ inline bool HexagonDAGToDAGISel::SelectAnyInt(SDValue &N, SDValue &R) {
}
bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R,
- uint32_t LogAlign) {
- auto IsAligned = [LogAlign] (uint64_t V) -> bool {
- return alignTo(V, (uint64_t)1 << LogAlign) == V;
- };
-
+ Align Alignment) {
switch (N.getOpcode()) {
case ISD::Constant: {
if (N.getValueType() != MVT::i32)
return false;
int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
- if (!IsAligned(V))
+ if (!isAligned(Alignment, V))
return false;
R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
return true;
@@ -1351,37 +1362,34 @@ bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R,
case HexagonISD::JT:
case HexagonISD::CP:
// These are assumed to always be aligned at least 8-byte boundary.
- if (LogAlign > 3)
+ if (Alignment > Align(8))
return false;
R = N.getOperand(0);
return true;
case ISD::ExternalSymbol:
// Symbols may be aligned at any boundary.
- if (LogAlign > 0)
+ if (Alignment > Align(1))
return false;
R = N;
return true;
case ISD::BlockAddress:
// Block address is always aligned at least 4-byte boundary.
- if (LogAlign > 2 || !IsAligned(cast<BlockAddressSDNode>(N)->getOffset()))
+ if (Alignment > Align(4) ||
+ !isAligned(Alignment, cast<BlockAddressSDNode>(N)->getOffset()))
return false;
R = N;
return true;
}
- if (SelectGlobalAddress(N, R, false, LogAlign) ||
- SelectGlobalAddress(N, R, true, LogAlign))
+ if (SelectGlobalAddress(N, R, false, Alignment) ||
+ SelectGlobalAddress(N, R, true, Alignment))
return true;
return false;
}
bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
- bool UseGP, uint32_t LogAlign) {
- auto IsAligned = [LogAlign] (uint64_t V) -> bool {
- return alignTo(V, (uint64_t)1 << LogAlign) == V;
- };
-
+ bool UseGP, Align Alignment) {
switch (N.getOpcode()) {
case ISD::ADD: {
SDValue N0 = N.getOperand(0);
@@ -1392,10 +1400,9 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
if (!UseGP && GAOpc != HexagonISD::CONST32)
return false;
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
- SDValue Addr = N0.getOperand(0);
- // For the purpose of alignment, sextvalue and zextvalue are the same.
- if (!IsAligned(Const->getZExtValue()))
+ if (!isAligned(Alignment, Const->getZExtValue()))
return false;
+ SDValue Addr = N0.getOperand(0);
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
if (GA->getOpcode() == ISD::TargetGlobalAddress) {
uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
index 6c77d8803359..1e50385a7b4b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
@@ -25,7 +25,6 @@ namespace llvm {
class MachineFunction;
class HexagonInstrInfo;
class HexagonRegisterInfo;
-class HexagonTargetLowering;
class HexagonDAGToDAGISel : public SelectionDAGISel {
const HexagonSubtarget *HST;
@@ -51,7 +50,7 @@ public:
return true;
}
void PreprocessISelDAG() override;
- void EmitFunctionEntryCode() override;
+ void emitFunctionEntryCode() override;
void Select(SDNode *N) override;
@@ -60,9 +59,8 @@ public:
inline bool SelectAddrGP(SDValue &N, SDValue &R);
inline bool SelectAnyImm(SDValue &N, SDValue &R);
inline bool SelectAnyInt(SDValue &N, SDValue &R);
- bool SelectAnyImmediate(SDValue &N, SDValue &R, uint32_t LogAlign);
- bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP,
- uint32_t LogAlign);
+ bool SelectAnyImmediate(SDValue &N, SDValue &R, Align Alignment);
+ bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP, Align Alignment);
bool SelectAddrFI(SDValue &N, SDValue &R);
bool DetectUseSxtw(SDValue &N, SDValue &R);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 7e143a349400..c0f92042e5da 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1199,7 +1199,7 @@ OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
ResultStack &Results) {
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
MVT ByteTy = getSingleVT(MVT::i8);
- MVT BoolTy = MVT::getVectorVT(MVT::i1, 8*HwLen); // XXX
+ MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
const SDLoc &dl(Results.InpNode);
SDValue B = getVectorConstant(Bytes, dl);
Results.push(Hexagon::V6_vd0, ByteTy, {});
@@ -2201,30 +2201,30 @@ void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) {
SDNode *Result;
switch (IID) {
case Intrinsic::hexagon_V6_vaddcarry: {
- SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2),
- N->getOperand(3) };
- SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1);
+ std::array<SDValue, 3> Ops = {
+ {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
+ SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
break;
}
case Intrinsic::hexagon_V6_vaddcarry_128B: {
- SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2),
- N->getOperand(3) };
- SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1);
+ std::array<SDValue, 3> Ops = {
+ {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
+ SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
break;
}
case Intrinsic::hexagon_V6_vsubcarry: {
- SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2),
- N->getOperand(3) };
- SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1);
+ std::array<SDValue, 3> Ops = {
+ {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
+ SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
break;
}
case Intrinsic::hexagon_V6_vsubcarry_128B: {
- SmallVector<SDValue, 3> Ops = { N->getOperand(1), N->getOperand(2),
- N->getOperand(3) };
- SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1);
+ std::array<SDValue, 3> Ops = {
+ {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
+ SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index e11ecdc7d035..768fea639cf9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -109,6 +109,11 @@ static cl::opt<bool> AlignLoads("hexagon-align-loads",
cl::Hidden, cl::init(false),
cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
+static cl::opt<bool>
+ DisableArgsMinAlignment("hexagon-disable-args-min-alignment", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable minimum alignment of 1 for "
+ "arguments passed by value on stack"));
namespace {
@@ -167,10 +172,10 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
SDValue Chain, ISD::ArgFlagsTy Flags,
SelectionDAG &DAG, const SDLoc &dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
- return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- /*isTailCall=*/false,
- MachinePointerInfo(), MachinePointerInfo());
+ return DAG.getMemcpy(
+ Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo());
}
bool
@@ -387,19 +392,22 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFrameInfo &MFI = MF.getFrameInfo();
auto PtrVT = getPointerTy(MF.getDataLayout());
- unsigned NumParams = CLI.CS.getInstruction()
- ? CLI.CS.getFunctionType()->getNumParams()
- : 0;
+ unsigned NumParams = CLI.CB ? CLI.CB->getFunctionType()->getNumParams() : 0;
if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
+ // Linux ABI treats var-arg calls the same way as regular ones.
+ bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
+ HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext(),
NumParams);
if (Subtarget.useHVXOps())
CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
+ else if (DisableArgsMinAlignment)
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_Legacy);
else
CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
@@ -429,7 +437,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
bool NeedsArgAlign = false;
- unsigned LargestAlignSeen = 0;
+ Align LargestAlignSeen;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -466,8 +474,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
StackPtr.getValueType());
MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
if (ArgAlign)
- LargestAlignSeen = std::max(LargestAlignSeen,
- (unsigned)VA.getLocVT().getStoreSizeInBits() >> 3);
+ LargestAlignSeen = std::max(
+ LargestAlignSeen, Align(VA.getLocVT().getStoreSizeInBits() / 8));
if (Flags.isByVal()) {
// The argument is a struct passed by value. According to LLVM, "Arg"
// is a pointer.
@@ -490,7 +498,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (NeedsArgAlign && Subtarget.hasV60Ops()) {
LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
- unsigned VecAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
+ Align VecAlign(HRI.getSpillAlignment(Hexagon::HvxVRRegClass));
LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
MFI.ensureMaxAlignment(LargestAlignSeen);
}
@@ -726,7 +734,7 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
auto &HFI = *Subtarget.getFrameLowering();
// "Zero" means natural stack alignment.
if (A == 0)
- A = HFI.getStackAlignment();
+ A = HFI.getStackAlign().value();
LLVM_DEBUG({
dbgs () << __func__ << " Align: " << A << " Size: ";
@@ -750,13 +758,19 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Linux ABI treats var-arg calls the same way as regular ones.
+ bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
+
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
+ HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs,
+ *DAG.getContext(),
MF.getFunction().getFunctionType()->getNumParams());
if (Subtarget.useHVXOps())
CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
+ else if (DisableArgsMinAlignment)
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_Legacy);
else
CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
@@ -766,8 +780,24 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
// caller's stack is passed only when the struct size is smaller than (and
// equal to) 8 bytes. If not, no address will be passed into callee and
// callee return the result direclty through R0/R1.
+ auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) {
+ switch (RC.getID()) {
+ case Hexagon::IntRegsRegClassID:
+ return Reg - Hexagon::R0 + 1;
+ case Hexagon::DoubleRegsRegClassID:
+ return (Reg - Hexagon::D0 + 1) * 2;
+ case Hexagon::HvxVRRegClassID:
+ return Reg - Hexagon::V0 + 1;
+ case Hexagon::HvxWRRegClassID:
+ return (Reg - Hexagon::W0 + 1) * 2;
+ }
+ llvm_unreachable("Unexpected register class");
+ };
+ auto &HFL = const_cast<HexagonFrameLowering&>(*Subtarget.getFrameLowering());
auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
+ HFL.FirstVarArgSavedReg = 0;
+ HMFI.setFirstNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -811,6 +841,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
}
InVals.push_back(Copy);
MRI.addLiveIn(VA.getLocReg(), VReg);
+ HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg());
} else {
assert(VA.isMemLoc() && "Argument should be passed in memory");
@@ -838,8 +869,48 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
}
}
+ if (IsVarArg && Subtarget.isEnvironmentMusl()) {
+ for (int i = HFL.FirstVarArgSavedReg; i < 6; i++)
+ MRI.addLiveIn(Hexagon::R0+i);
+ }
+
+ if (IsVarArg && Subtarget.isEnvironmentMusl()) {
+ HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - 1);
+ HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
+
+ // Create Frame index for the start of register saved area.
+ int NumVarArgRegs = 6 - HFL.FirstVarArgSavedReg;
+ bool RequiresPadding = (NumVarArgRegs & 1);
+ int RegSaveAreaSizePlusPadding = RequiresPadding
+ ? (NumVarArgRegs + 1) * 4
+ : NumVarArgRegs * 4;
+
+ if (RegSaveAreaSizePlusPadding > 0) {
+ // The offset to saved register area should be 8 byte aligned.
+ int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
+ if (!(RegAreaStart % 8))
+ RegAreaStart = (RegAreaStart + 7) & -8;
- if (IsVarArg) {
+ int RegSaveAreaFrameIndex =
+ MFI.CreateFixedObject(RegSaveAreaSizePlusPadding, RegAreaStart, true);
+ HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex);
+
+ // This will point to the next argument passed via stack.
+ int Offset = RegAreaStart + RegSaveAreaSizePlusPadding;
+ int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
+ HMFI.setVarArgsFrameIndex(FI);
+ } else {
+ // This will point to the next argument passed via stack, when
+ // there is no saved register area.
+ int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
+ int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
+ HMFI.setRegSavedAreaStartFrameIndex(FI);
+ HMFI.setVarArgsFrameIndex(FI);
+ }
+ }
+
+
+ if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
// This will point to the next argument passed via stack.
int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
@@ -857,8 +928,81 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
- MachinePointerInfo(SV));
+
+ if (!Subtarget.isEnvironmentMusl()) {
+ return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
+ MachinePointerInfo(SV));
+ }
+ auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
+ auto &HFL = *Subtarget.getFrameLowering();
+ SDLoc DL(Op);
+ SmallVector<SDValue, 8> MemOps;
+
+ // Get frame index of va_list.
+ SDValue FIN = Op.getOperand(1);
+
+ // If first Vararg register is odd, add 4 bytes to start of
+ // saved register area to point to the first register location.
+ // This is because the saved register area has to be 8 byte aligned.
+ // Incase of an odd start register, there will be 4 bytes of padding in
+ // the beginning of saved register area. If all registers area used up,
+ // the following condition will handle it correctly.
+ SDValue SavedRegAreaStartFrameIndex =
+ DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), MVT::i32);
+
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+ if (HFL.FirstVarArgSavedReg & 1)
+ SavedRegAreaStartFrameIndex =
+ DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(),
+ MVT::i32),
+ DAG.getIntPtrConstant(4, DL));
+
+ // Store the saved register area start pointer.
+ SDValue Store =
+ DAG.getStore(Op.getOperand(0), DL,
+ SavedRegAreaStartFrameIndex,
+ FIN, MachinePointerInfo(SV));
+ MemOps.push_back(Store);
+
+ // Store saved register area end pointer.
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
+ FIN, DAG.getIntPtrConstant(4, DL));
+ Store = DAG.getStore(Op.getOperand(0), DL,
+ DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
+ PtrVT),
+ FIN, MachinePointerInfo(SV, 4));
+ MemOps.push_back(Store);
+
+ // Store overflow area pointer.
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
+ FIN, DAG.getIntPtrConstant(4, DL));
+ Store = DAG.getStore(Op.getOperand(0), DL,
+ DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
+ PtrVT),
+ FIN, MachinePointerInfo(SV, 8));
+ MemOps.push_back(Store);
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
+}
+
+SDValue
+HexagonTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+ // Assert that the linux ABI is enabled for the current compilation.
+ assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled");
+ SDValue Chain = Op.getOperand(0);
+ SDValue DestPtr = Op.getOperand(1);
+ SDValue SrcPtr = Op.getOperand(2);
+ const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ SDLoc DL(Op);
+ // Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
+ // we need to memcopy 12 bytes from va_list to another similar list.
+ return DAG.getMemcpy(Chain, DL, DestPtr, SrcPtr,
+ DAG.getIntPtrConstant(12, DL), Align(4),
+ /*isVolatile*/ false, false, false,
+ MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
}
SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -943,57 +1087,40 @@ HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-static Constant *convert_i1_to_i8(const Constant *ConstVal) {
- SmallVector<Constant *, 128> NewConst;
- const ConstantVector *CV = dyn_cast<ConstantVector>(ConstVal);
- if (!CV)
- return nullptr;
-
- LLVMContext &Ctx = ConstVal->getContext();
- IRBuilder<> IRB(Ctx);
- unsigned NumVectorElements = CV->getNumOperands();
- assert(isPowerOf2_32(NumVectorElements) &&
- "conversion only supported for pow2 VectorSize!");
-
- for (unsigned i = 0; i < NumVectorElements / 8; ++i) {
- uint8_t x = 0;
- for (unsigned j = 0; j < 8; ++j) {
- uint8_t y = CV->getOperand(i * 8 + j)->getUniqueInteger().getZExtValue();
- x |= y << (7 - j);
- }
- assert((x == 0 || x == 255) && "Either all 0's or all 1's expected!");
- NewConst.push_back(IRB.getInt8(x));
- }
- return ConstantVector::get(NewConst);
-}
-
SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
Constant *CVal = nullptr;
bool isVTi1Type = false;
- if (const Constant *ConstVal = dyn_cast<Constant>(CPN->getConstVal())) {
- Type *CValTy = ConstVal->getType();
- if (CValTy->isVectorTy() &&
- CValTy->getVectorElementType()->isIntegerTy(1)) {
- CVal = convert_i1_to_i8(ConstVal);
- isVTi1Type = (CVal != nullptr);
+ if (auto *CV = dyn_cast<ConstantVector>(CPN->getConstVal())) {
+ if (cast<VectorType>(CV->getType())->getElementType()->isIntegerTy(1)) {
+ IRBuilder<> IRB(CV->getContext());
+ SmallVector<Constant*, 128> NewConst;
+ unsigned VecLen = CV->getNumOperands();
+ assert(isPowerOf2_32(VecLen) &&
+ "conversion only supported for pow2 VectorSize");
+ for (unsigned i = 0; i < VecLen; ++i)
+ NewConst.push_back(IRB.getInt8(CV->getOperand(i)->isZeroValue()));
+
+ CVal = ConstantVector::get(NewConst);
+ isVTi1Type = true;
}
}
- unsigned Align = CPN->getAlignment();
+ Align Alignment = CPN->getAlign();
bool IsPositionIndependent = isPositionIndependent();
unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
unsigned Offset = 0;
SDValue T;
if (CPN->isMachineConstantPoolEntry())
- T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, Offset,
- TF);
+ T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Alignment,
+ Offset, TF);
else if (isVTi1Type)
- T = DAG.getTargetConstantPool(CVal, ValTy, Align, Offset, TF);
+ T = DAG.getTargetConstantPool(CVal, ValTy, Alignment, Offset, TF);
else
- T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset, TF);
+ T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Alignment, Offset,
+ TF);
assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
"Inconsistent target flag encountered");
@@ -1375,7 +1502,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
- setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ if (Subtarget.isEnvironmentMusl())
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
@@ -1621,6 +1751,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FADD, MVT::f64, Legal);
setOperationAction(ISD::FSUB, MVT::f64, Legal);
}
+ if (Subtarget.hasV67Ops()) {
+ setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::f64, Legal);
+ }
setTargetDAGCombine(ISD::VSELECT);
@@ -1855,8 +1990,7 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// The offset value comes through Modifier register. For now, assume the
// offset is 0.
Info.offset = 0;
- Info.align =
- MaybeAlign(DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont)));
+ Info.align = DL.getABITypeAlign(Info.memVT.getTypeForEVT(Cont));
Info.flags = MachineMemOperand::MOLoad;
return true;
}
@@ -2139,13 +2273,16 @@ HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
const SDLoc &dl(Op);
// Handle conversion from i8 to v8i1.
- if (ResTy == MVT::v8i1) {
- SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
- SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
- return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
+ if (InpTy == MVT::i8) {
+ if (ResTy == MVT::v8i1) {
+ SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
+ SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
+ return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
+ }
+ return SDValue();
}
- return SDValue();
+ return Op;
}
bool
@@ -2779,10 +2916,10 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
MachineMemOperand *WideMMO = nullptr;
if (MachineMemOperand *MMO = LN->getMemOperand()) {
MachineFunction &MF = DAG.getMachineFunction();
- WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(),
- 2*LoadLen, LoadLen, MMO->getAAInfo(), MMO->getRanges(),
- MMO->getSyncScopeID(), MMO->getOrdering(),
- MMO->getFailureOrdering());
+ WideMMO = MF.getMachineMemOperand(
+ MMO->getPointerInfo(), MMO->getFlags(), 2 * LoadLen, Align(LoadLen),
+ MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
+ MMO->getOrdering(), MMO->getFailureOrdering());
}
SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
@@ -2928,6 +3065,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
@@ -2946,6 +3084,12 @@ void
HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
+ if (isHvxOperation(N)) {
+ LowerHvxOperationWrapper(N, Results, DAG);
+ if (!Results.empty())
+ return;
+ }
+
// We are only custom-lowering stores to verify the alignment of the
// address if it is a compile-time constant. Since a store can be modified
// during type-legalization (the value being stored may need legalization),
@@ -2959,6 +3103,12 @@ void
HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
+ if (isHvxOperation(N)) {
+ ReplaceHvxNodeResults(N, Results, DAG);
+ if (!Results.empty())
+ return;
+ }
+
const SDLoc &dl(N);
switch (N->getOpcode()) {
case ISD::SRL:
@@ -3079,8 +3229,8 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
switch (VT.getSizeInBits()) {
default:
return {0u, nullptr};
- case 512:
- case 1024:
+ case 64:
+ case 128:
return {0u, &Hexagon::HvxQRRegClass};
}
break;
@@ -3127,12 +3277,12 @@ bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
// The type Ty passed here would then be "void". Skip the alignment
// checks, but do not return false right away, since that confuses
// LSR into crashing.
- unsigned A = DL.getABITypeAlignment(Ty);
+ Align A = DL.getABITypeAlign(Ty);
// The base offset must be a multiple of the alignment.
- if ((AM.BaseOffs % A) != 0)
+ if (!isAligned(A, AM.BaseOffs))
return false;
// The shifted offset must fit in 11 bits.
- if (!isInt<11>(AM.BaseOffs >> Log2_32(A)))
+ if (!isInt<11>(AM.BaseOffs >> Log2(A)))
return false;
}
@@ -3232,30 +3382,36 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
/// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
/// does not need to be loaded. It returns EVT::Other if the type should be
/// determined using generic target-independent logic.
-EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
- unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset,
- bool MemcpyStrSrc, const AttributeList &FuncAttributes) const {
-
- auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
- return (GivenA % MinA) == 0;
- };
-
- if (Size >= 8 && Aligned(DstAlign, 8) && (IsMemset || Aligned(SrcAlign, 8)))
+EVT HexagonTargetLowering::getOptimalMemOpType(
+ const MemOp &Op, const AttributeList &FuncAttributes) const {
+ if (Op.size() >= 8 && Op.isAligned(Align(8)))
return MVT::i64;
- if (Size >= 4 && Aligned(DstAlign, 4) && (IsMemset || Aligned(SrcAlign, 4)))
+ if (Op.size() >= 4 && Op.isAligned(Align(4)))
return MVT::i32;
- if (Size >= 2 && Aligned(DstAlign, 2) && (IsMemset || Aligned(SrcAlign, 2)))
+ if (Op.size() >= 2 && Op.isAligned(Align(2)))
return MVT::i16;
-
return MVT::Other;
}
+bool HexagonTargetLowering::allowsMemoryAccess(
+ LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
+ Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
+ MVT SVT = VT.getSimpleVT();
+ if (Subtarget.isHVXVectorType(SVT, true))
+ return allowsHvxMemoryAccess(SVT, Flags, Fast);
+ return TargetLoweringBase::allowsMemoryAccess(
+ Context, DL, VT, AddrSpace, Alignment, Flags, Fast);
+}
+
bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
- EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags,
- bool *Fast) const {
+ EVT VT, unsigned AddrSpace, unsigned Alignment,
+ MachineMemOperand::Flags Flags, bool *Fast) const {
+ MVT SVT = VT.getSimpleVT();
+ if (Subtarget.isHVXVectorType(SVT, true))
+ return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast);
if (Fast)
*Fast = false;
- return Subtarget.isHVXVectorType(VT.getSimpleVT());
+ return false;
}
std::pair<const TargetRegisterClass*, uint8_t>
@@ -3357,9 +3513,5 @@ bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
TargetLowering::AtomicExpansionKind
HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
- const DataLayout &DL = AI->getModule()->getDataLayout();
- unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
- if (Size >= 4 && Size <= 8)
- return AtomicExpansionKind::LLSC;
- return AtomicExpansionKind::None;
+ return AtomicExpansionKind::LLSC;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index e79646de6287..7d6e6b6185c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -235,19 +235,20 @@ namespace HexagonISD {
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
return Hexagon::R0;
}
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
return Hexagon::R1;
}
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
@@ -301,12 +302,16 @@ namespace HexagonISD {
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;
- EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
+
+ bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
+ unsigned AddrSpace, Align Alignment,
+ MachineMemOperand::Flags Flags,
+ bool *Fast) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
- unsigned Align, MachineMemOperand::Flags Flags, bool *Fast)
+ unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast)
const override;
/// Returns relocation base for the given PIC jumptable.
@@ -404,8 +409,15 @@ namespace HexagonISD {
VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const;
SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const;
+ bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags,
+ bool *Fast) const;
+ bool allowsHvxMisalignedMemoryAccesses(MVT VecTy,
+ MachineMemOperand::Flags Flags,
+ bool *Fast) const;
+
bool isHvxSingleTy(MVT Ty) const;
bool isHvxPairTy(MVT Ty) const;
+ bool isHvxBoolTy(MVT Ty) const;
SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
SelectionDAG &DAG) const;
SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const;
@@ -437,6 +449,8 @@ namespace HexagonISD {
const SDLoc &dl, SelectionDAG &DAG) const;
SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy,
bool ZeroExt, SelectionDAG &DAG) const;
+ SDValue compressHvxPred(SDValue VecQ, const SDLoc &dl, MVT ResTy,
+ SelectionDAG &DAG) const;
SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const;
@@ -444,7 +458,7 @@ namespace HexagonISD {
SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const;
-
+ SDValue LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const;
@@ -454,6 +468,9 @@ namespace HexagonISD {
SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue HvxVecPredBitcastComputation(SDValue Op, SelectionDAG &DAG) const;
SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;
SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const;
@@ -463,8 +480,12 @@ namespace HexagonISD {
const override;
bool isHvxOperation(SDValue Op) const;
+ bool isHvxOperation(SDNode *N) const;
SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const;
-
+ void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
+ void ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 204950f9010e..7cda915fffe9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -9,6 +9,7 @@
#include "HexagonISelLowering.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -39,7 +40,6 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass);
} else if (Subtarget.useHVX128BOps()) {
addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
@@ -50,7 +50,6 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass);
}
// Set up operation actions.
@@ -66,8 +65,18 @@ HexagonTargetLowering::initializeHVXLowering() {
AddPromotedToType(Opc, FromTy, ToTy);
};
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
+ // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
+ // Note: v16i1 -> i16 is handled in type legalization instead of op
+ // legalization.
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i128, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
for (MVT T : LegalV) {
setIndexedLoadAction(ISD::POST_INC, T, Legal);
@@ -194,12 +203,13 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::XOR, BoolV, Legal);
}
- if (Use64b)
+ if (Use64b) {
for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
- else
+ } else {
for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
+ }
setTargetDAGCombine(ISD::VSELECT);
}
@@ -283,6 +293,37 @@ HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
}
+bool
+HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
+ return Subtarget.isHVXVectorType(Ty, true) &&
+ Ty.getVectorElementType() == MVT::i1;
+}
+
+bool HexagonTargetLowering::allowsHvxMemoryAccess(
+ MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
+ // Bool vectors are excluded by default, but make it explicit to
+ // emphasize that bool vectors cannot be loaded or stored.
+ // Also, disallow double vector stores (to prevent unnecessary
+ // store widening in DAG combiner).
+ if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
+ return false;
+ if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
+ return false;
+ if (Fast)
+ *Fast = true;
+ return true;
+}
+
+bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
+ MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
+ if (!Subtarget.isHVXVectorType(VecTy))
+ return false;
+ // XXX Should this be false? vmemu are a bit slower than vmem.
+ if (Fast)
+ *Fast = true;
+ return true;
+}
+
SDValue
HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
SelectionDAG &DAG) const {
@@ -402,10 +443,11 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
(Constant**)Consts.end());
Constant *CV = ConstantVector::get(Tmp);
- unsigned Align = HwLen;
- SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG);
+ Align Alignment(HwLen);
+ SDValue CP =
+ LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(MF), Align);
+ MachinePointerInfo::getConstantPool(MF), Alignment);
}
// A special case is a situation where the vector is built entirely from
@@ -1023,6 +1065,63 @@ HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
}
SDValue
+HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
+ MVT ResTy, SelectionDAG &DAG) const {
+ // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
+ // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
+ // vector register. The remaining bits of the vector register are
+ // unspecified.
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned HwLen = Subtarget.getVectorLength();
+ MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+ MVT PredTy = ty(VecQ);
+ unsigned PredLen = PredTy.getVectorNumElements();
+ assert(HwLen % PredLen == 0);
+ MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
+
+ Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
+ SmallVector<Constant*, 128> Tmp;
+ // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
+ // These are bytes with the LSB rotated left with respect to their index.
+ for (unsigned i = 0; i != HwLen/8; ++i) {
+ for (unsigned j = 0; j != 8; ++j)
+ Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
+ }
+ Constant *CV = ConstantVector::get(Tmp);
+ Align Alignment(HwLen);
+ SDValue CP =
+ LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
+ SDValue Bytes =
+ DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(MF), Alignment);
+
+ // Select the bytes that correspond to true bits in the vector predicate.
+ SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
+ getZero(dl, VecTy, DAG));
+ // Calculate the OR of all bytes in each group of 8. That will compress
+ // all the individual bits into a single byte.
+ // First, OR groups of 4, via vrmpy with 0x01010101.
+ SDValue All1 =
+ DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
+ SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
+ // Then rotate the accumulated vector by 4 bytes, and do the final OR.
+ SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
+ {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
+ SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
+
+ // Pick every 8th byte and coalesce them at the beginning of the output.
+ // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
+ // byte and so on.
+ SmallVector<int,128> Mask;
+ for (unsigned i = 0; i != HwLen; ++i)
+ Mask.push_back((8*i) % HwLen + i/(HwLen/8));
+ SDValue Collect =
+ DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
+ return DAG.getBitcast(ResTy, Collect);
+}
+
+SDValue
HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
const {
const SDLoc &dl(Op);
@@ -1431,6 +1530,53 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
}
SDValue
+HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
+ SDValue ValQ = Op.getOperand(0);
+ MVT ResTy = ty(Op);
+ MVT VecTy = ty(ValQ);
+ const SDLoc &dl(Op);
+
+ if (isHvxBoolTy(VecTy) && ResTy.isScalarInteger()) {
+ unsigned HwLen = Subtarget.getVectorLength();
+ MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
+ SDValue VQ = compressHvxPred(ValQ, dl, WordTy, DAG);
+ unsigned BitWidth = ResTy.getSizeInBits();
+
+ if (BitWidth < 64) {
+ SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
+ dl, MVT::i32, DAG);
+ if (BitWidth == 32)
+ return W0;
+ assert(BitWidth < 32u);
+ return DAG.getZExtOrTrunc(W0, dl, ResTy);
+ }
+
+ // The result is >= 64 bits. The only options are 64 or 128.
+ assert(BitWidth == 64 || BitWidth == 128);
+ SmallVector<SDValue,4> Words;
+ for (unsigned i = 0; i != BitWidth/32; ++i) {
+ SDValue W = extractHvxElementReg(
+ VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
+ Words.push_back(W);
+ }
+ SmallVector<SDValue,2> Combines;
+ assert(Words.size() % 2 == 0);
+ for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
+ SDValue C = DAG.getNode(
+ HexagonISD::COMBINE, dl, MVT::i64, {Words[i+1], Words[i]});
+ Combines.push_back(C);
+ }
+
+ if (BitWidth == 64)
+ return Combines[0];
+
+ return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
+ }
+
+ return Op;
+}
+
+SDValue
HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
// Sign- and zero-extends are legal.
assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
@@ -1446,6 +1592,28 @@ HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
}
SDValue
+HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ MVT ResTy = ty(Op);
+
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ bool Use64b = Subtarget.useHVX64BOps();
+ unsigned IntPredCast = Use64b ? Intrinsic::hexagon_V6_pred_typecast
+ : Intrinsic::hexagon_V6_pred_typecast_128B;
+ if (IntNo == IntPredCast) {
+ SDValue Vs = Op.getOperand(1);
+ MVT OpTy = ty(Vs);
+ if (isHvxBoolTy(ResTy) && isHvxBoolTy(OpTy)) {
+ if (ResTy == OpTy)
+ return Vs;
+ return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Vs);
+ }
+ }
+
+ return Op;
+}
+
+SDValue
HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const {
assert(!Op.isMachineOpcode());
SmallVector<SDValue,2> OpsL, OpsH;
@@ -1566,7 +1734,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
-
+ case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
@@ -1580,6 +1748,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
case ISD::SETCC:
case ISD::INTRINSIC_VOID: return Op;
+ case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
// Unaligned loads will be handled by the default lowering.
case ISD::LOAD: return SDValue();
}
@@ -1589,6 +1758,28 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Unhandled HVX operation");
}
+void
+HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+}
+
+void
+HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+ unsigned Opc = N->getOpcode();
+ switch (Opc) {
+ case ISD::BITCAST:
+ if (isHvxBoolTy(ty(N->getOperand(0)))) {
+ SDValue Op(N, 0);
+ SDValue C = LowerHvxBitcast(Op, DAG);
+ Results.push_back(C);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
SDValue
HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
@@ -1621,3 +1812,16 @@ HexagonTargetLowering::isHvxOperation(SDValue Op) const {
return Subtarget.isHVXVectorType(ty(V), true);
});
}
+
+bool
+HexagonTargetLowering::isHvxOperation(SDNode *N) const {
+ // If the type of any result, or any operand type are HVX vector types,
+ // this is an HVX operation.
+ auto IsHvxTy = [this] (EVT Ty) {
+ return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
+ };
+ auto IsHvxOp = [this] (SDValue Op) {
+ return Subtarget.isHVXVectorType(ty(Op), true);
+ };
+ return llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
index f156de671059..ef2b3040931d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -37,6 +37,8 @@ def HVXVectorAccess : MemAccessSize<5>;
// Instruction Class Declaration +
//===----------------------------------------------------------------------===//
+// "Parse" bits are explicitly NOT defined in the opcode space to prevent
+// TableGen from using them for generation of the decoder tables.
class OpcodeHexagon {
field bits<32> Inst = ?; // Default to an invalid insn.
bits<4> IClass = 0; // ICLASS
@@ -164,6 +166,9 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
bit CVINew = 0;
let TSFlags{62} = CVINew;
+ bit isCVI = 0;
+ let TSFlags{63} = isCVI;
+
// Fields used for relation models.
bit isNonTemporal = 0;
string isNT = ""; // set to "true" for non-temporal vector stores.
@@ -226,9 +231,105 @@ class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
OpcodeHexagon;
//===----------------------------------------------------------------------===//
+// Special Instructions -
+//===----------------------------------------------------------------------===//
+
+// The 'invalid_decode' instruction is used by the disassembler to
+// show an instruction that didn't decode correctly. This feature
+// is only leveraged in a special disassembler mode that's activated
+// by a command line flag.
+def tc_invalid : InstrItinClass;
+class Enc_invalid : OpcodeHexagon {
+}
+def invalid_decode : HInst<
+(outs ),
+(ins ),
+"<invalid>",
+tc_invalid, TypeALU32_2op>, Enc_invalid {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b0000000000000000;
+let isCodeGenOnly = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Duplex Instruction Class Declaration
+//===----------------------------------------------------------------------===//
+
+class OpcodeDuplex {
+ field bits<32> Inst = ?; // Default to an invalid insn.
+ bits<4> IClass = 0; // ICLASS
+ bits<13> ISubHi = 0; // Low sub-insn
+ bits<13> ISubLo = 0; // High sub-insn
+
+ let Inst{31-29} = IClass{3-1};
+ let Inst{13} = IClass{0};
+ let Inst{15-14} = 0;
+ let Inst{28-16} = ISubHi;
+ let Inst{12-0} = ISubLo;
+}
+
+class InstDuplex<bits<4> iClass, list<dag> pattern = [],
+ string cstr = "">
+ : Instruction, OpcodeDuplex {
+ let Namespace = "Hexagon";
+ IType Type = TypeDUPLEX; // uses slot 0,1
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins);
+ let IClass = iClass;
+ let Constraints = cstr;
+ let Itinerary = DUPLEX;
+ let Size = 4;
+
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ let TSFlags{6-0} = Type.Value;
+
+ // Predicated instructions.
+ bits<1> isPredicated = 0;
+ let TSFlags{7} = isPredicated;
+ bits<1> isPredicatedFalse = 0;
+ let TSFlags{8} = isPredicatedFalse;
+ bits<1> isPredicatedNew = 0;
+ let TSFlags{9} = isPredicatedNew;
+
+ // New-value insn helper fields.
+ bits<1> isNewValue = 0;
+ let TSFlags{10} = isNewValue; // New-value consumer insn.
+ bits<1> hasNewValue = 0;
+ let TSFlags{11} = hasNewValue; // New-value producer insn.
+ bits<3> opNewValue = 0;
+ let TSFlags{14-12} = opNewValue; // New-value produced operand.
+ bits<1> isNVStorable = 0;
+ let TSFlags{15} = isNVStorable; // Store that can become new-value store.
+ bits<1> isNVStore = 0;
+ let TSFlags{16} = isNVStore; // New-value store insn.
+
+ // Immediate extender helper fields.
+ bits<1> isExtendable = 0;
+ let TSFlags{17} = isExtendable; // Insn may be extended.
+ bits<1> isExtended = 0;
+ let TSFlags{18} = isExtended; // Insn must be extended.
+ bits<3> opExtendable = 0;
+ let TSFlags{21-19} = opExtendable; // Which operand may be extended.
+ bits<1> isExtentSigned = 0;
+ let TSFlags{22} = isExtentSigned; // Signed or unsigned range.
+ bits<5> opExtentBits = 0;
+ let TSFlags{27-23} = opExtentBits; //Number of bits of range before extending.
+ bits<2> opExtentAlign = 0;
+ let TSFlags{29-28} = opExtentAlign; // Alignment exponent before extending.
+}
+
+//===----------------------------------------------------------------------===//
// Instruction Classes Definitions -
//===----------------------------------------------------------------------===//
-include "HexagonInstrFormatsV5.td"
include "HexagonInstrFormatsV60.td"
include "HexagonInstrFormatsV65.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td
deleted file mode 100644
index 68ef2d2d3a8a..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormatsV5.td
+++ /dev/null
@@ -1,86 +0,0 @@
-//==- HexagonInstrFormatsV5.td - Hexagon Instruction Formats --*- tablegen -==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V5 instruction classes in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-// Duplex Instruction Class Declaration
-//===----------------------------------------------------------------------===//
-
-class OpcodeDuplex {
- field bits<32> Inst = ?; // Default to an invalid insn.
- bits<4> IClass = 0; // ICLASS
- bits<13> ISubHi = 0; // Low sub-insn
- bits<13> ISubLo = 0; // High sub-insn
-
- let Inst{31-29} = IClass{3-1};
- let Inst{13} = IClass{0};
- let Inst{15-14} = 0;
- let Inst{28-16} = ISubHi;
- let Inst{12-0} = ISubLo;
-}
-
-class InstDuplex<bits<4> iClass, list<dag> pattern = [],
- string cstr = "">
- : Instruction, OpcodeDuplex {
- let Namespace = "Hexagon";
- IType Type = TypeDUPLEX; // uses slot 0,1
- let isCodeGenOnly = 1;
- let hasSideEffects = 0;
- dag OutOperandList = (outs);
- dag InOperandList = (ins);
- let IClass = iClass;
- let Constraints = cstr;
- let Itinerary = DUPLEX;
- let Size = 4;
-
- // SoftFail is a field the disassembler can use to provide a way for
- // instructions to not match without killing the whole decode process. It is
- // mainly used for ARM, but Tablegen expects this field to exist or it fails
- // to build the decode table.
- field bits<32> SoftFail = 0;
-
- // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
-
- let TSFlags{6-0} = Type.Value;
-
- // Predicated instructions.
- bits<1> isPredicated = 0;
- let TSFlags{7} = isPredicated;
- bits<1> isPredicatedFalse = 0;
- let TSFlags{8} = isPredicatedFalse;
- bits<1> isPredicatedNew = 0;
- let TSFlags{9} = isPredicatedNew;
-
- // New-value insn helper fields.
- bits<1> isNewValue = 0;
- let TSFlags{10} = isNewValue; // New-value consumer insn.
- bits<1> hasNewValue = 0;
- let TSFlags{11} = hasNewValue; // New-value producer insn.
- bits<3> opNewValue = 0;
- let TSFlags{14-12} = opNewValue; // New-value produced operand.
- bits<1> isNVStorable = 0;
- let TSFlags{15} = isNVStorable; // Store that can become new-value store.
- bits<1> isNVStore = 0;
- let TSFlags{16} = isNVStore; // New-value store insn.
-
- // Immediate extender helper fields.
- bits<1> isExtendable = 0;
- let TSFlags{17} = isExtendable; // Insn may be extended.
- bits<1> isExtended = 0;
- let TSFlags{18} = isExtended; // Insn must be extended.
- bits<3> opExtendable = 0;
- let TSFlags{21-19} = opExtendable; // Which operand may be extended.
- bits<1> isExtentSigned = 0;
- let TSFlags{22} = isExtentSigned; // Signed or unsigned range.
- bits<5> opExtentBits = 0;
- let TSFlags{27-23} = opExtentBits; //Number of bits of range before extending.
- bits<2> opExtentAlign = 0;
- let TSFlags{29-28} = opExtentAlign; // Alignment exponent before extending.
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormatsV65.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormatsV65.td
index eaecffe9c89e..246a1d364d41 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormatsV65.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrFormatsV65.td
@@ -11,13 +11,13 @@
//===----------------------------------------------------------------------===//
//----------------------------------------------------------------------------//
-// Hexagon Intruction Flags +
+// Hexagon Instruction Flags +
//
// *** Must match BaseInfo.h ***
//----------------------------------------------------------------------------//
//----------------------------------------------------------------------------//
-// Intruction Classes Definitions +
+// Instruction Classes Definitions +
//----------------------------------------------------------------------------//
class CVI_VA_Resource_NoOpcode<dag outs, dag ins, string asmstr,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 39ec8936214e..d1cd23c3be3e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -118,6 +118,12 @@ HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
: HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
Subtarget(ST) {}
+namespace llvm {
+namespace HexagonFUnits {
+ bool isSlot0Only(unsigned units);
+}
+}
+
static bool isIntRegForSubInst(unsigned Reg) {
return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
(Reg >= Hexagon::R16 && Reg <= Hexagon::R23);
@@ -370,7 +376,7 @@ bool HexagonInstrInfo::hasStoreToStackSlot(
/// This function can analyze one/two way branching only and should (mostly) be
/// called by target independent side.
/// First entry is always the opcode of the branching instruction, except when
-/// the Cond vector is supposed to be empty, e.g., when AnalyzeBranch fails, a
+/// the Cond vector is supposed to be empty, e.g., when analyzeBranch fails, a
/// BB with only unconditional jump. Subsequent entries depend upon the opcode,
/// e.g. Jump_c p will have
/// Cond[0] = Jump_c
@@ -784,6 +790,25 @@ bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
return NumInstrs <= 4;
}
+static void getLiveInRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) {
+ SmallVector<std::pair<MCPhysReg, const MachineOperand*>,2> Clobbers;
+ const MachineBasicBlock &B = *MI.getParent();
+ Regs.addLiveIns(B);
+ auto E = MachineBasicBlock::const_iterator(MI.getIterator());
+ for (auto I = B.begin(); I != E; ++I) {
+ Clobbers.clear();
+ Regs.stepForward(*I, Clobbers);
+ }
+}
+
+static void getLiveOutRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) {
+ const MachineBasicBlock &B = *MI.getParent();
+ Regs.addLiveOuts(B);
+ auto E = ++MachineBasicBlock::const_iterator(MI.getIterator()).getReverse();
+ for (auto I = B.rbegin(); I != E; ++I)
+ Regs.stepBackward(*I);
+}
+
void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
@@ -849,11 +874,15 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
if (Hexagon::HvxWRRegClass.contains(SrcReg, DestReg)) {
- Register LoSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
- Register HiSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
+ LivePhysRegs LiveAtMI(HRI);
+ getLiveInRegsAt(LiveAtMI, *I);
+ Register SrcLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
+ Register SrcHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
+ unsigned UndefLo = getUndefRegState(!LiveAtMI.contains(SrcLo));
+ unsigned UndefHi = getUndefRegState(!LiveAtMI.contains(SrcHi));
BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg)
- .addReg(HiSrc, KillFlag)
- .addReg(LoSrc, KillFlag);
+ .addReg(SrcHi, KillFlag | UndefHi)
+ .addReg(SrcLo, KillFlag | UndefLo);
return;
}
if (Hexagon::HvxQRRegClass.contains(SrcReg, DestReg)) {
@@ -882,17 +911,16 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI,
+ MachineBasicBlock::iterator I, Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned SlotAlign = MFI.getObjectAlignment(FI);
unsigned KillFlag = getKillRegState(isKill);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
- MFI.getObjectSize(FI), SlotAlign);
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io))
@@ -928,17 +956,16 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
}
void HexagonInstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned SlotAlign = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI), SlotAlign);
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg)
@@ -966,14 +993,6 @@ void HexagonInstrInfo::loadRegFromStackSlot(
}
}
-static void getLiveRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) {
- const MachineBasicBlock &B = *MI.getParent();
- Regs.addLiveOuts(B);
- auto E = ++MachineBasicBlock::const_iterator(MI.getIterator()).getReverse();
- for (auto I = B.rbegin(); I != E; ++I)
- Regs.stepBackward(*I);
-}
-
/// expandPostRAPseudo - This function is called for all pseudo instructions
/// that remain after register allocation. Many pseudo instructions are
/// created to help register allocation. This is the place to convert them
@@ -985,6 +1004,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
+ LivePhysRegs LiveIn(HRI), LiveOut(HRI);
DebugLoc DL = MI.getDebugLoc();
unsigned Opc = MI.getOpcode();
@@ -1005,10 +1025,9 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
auto UseAligned = [&] (const MachineInstr &MI, unsigned NeedAlign) {
if (MI.memoperands().empty())
return false;
- return all_of(MI.memoperands(),
- [NeedAlign] (const MachineMemOperand *MMO) {
- return NeedAlign <= MMO->getAlignment();
- });
+ return all_of(MI.memoperands(), [NeedAlign](const MachineMemOperand *MMO) {
+ return MMO->getAlign() >= NeedAlign;
+ });
};
switch (Opc) {
@@ -1032,10 +1051,15 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case Hexagon::V6_vassignp: {
Register SrcReg = MI.getOperand(1).getReg();
Register DstReg = MI.getOperand(0).getReg();
+ Register SrcLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
+ Register SrcHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
+ getLiveInRegsAt(LiveIn, MI);
+ unsigned UndefLo = getUndefRegState(!LiveIn.contains(SrcLo));
+ unsigned UndefHi = getUndefRegState(!LiveIn.contains(SrcHi));
unsigned Kill = getKillRegState(MI.getOperand(1).isKill());
BuildMI(MBB, MI, DL, get(Hexagon::V6_vcombine), DstReg)
- .addReg(HRI.getSubReg(SrcReg, Hexagon::vsub_hi), Kill)
- .addReg(HRI.getSubReg(SrcReg, Hexagon::vsub_lo), Kill);
+ .addReg(SrcHi, UndefHi)
+ .addReg(SrcLo, Kill | UndefLo);
MBB.erase(MI);
return true;
}
@@ -1255,9 +1279,8 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
const MachineOperand &Op1 = MI.getOperand(1);
const MachineOperand &Op2 = MI.getOperand(2);
const MachineOperand &Op3 = MI.getOperand(3);
- LivePhysRegs LiveAtMI(HRI);
- getLiveRegsAt(LiveAtMI, MI);
- bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
+ getLiveOutRegsAt(LiveOut, MI);
+ bool IsDestLive = !LiveOut.available(MRI, Op0.getReg());
Register PReg = Op1.getReg();
assert(Op1.getSubReg() == 0);
unsigned PState = getRegState(Op1);
@@ -1289,9 +1312,8 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineOperand &Op1 = MI.getOperand(1);
MachineOperand &Op2 = MI.getOperand(2);
MachineOperand &Op3 = MI.getOperand(3);
- LivePhysRegs LiveAtMI(HRI);
- getLiveRegsAt(LiveAtMI, MI);
- bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
+ getLiveOutRegsAt(LiveOut, MI);
+ bool IsDestLive = !LiveOut.available(MRI, Op0.getReg());
Register PReg = Op1.getReg();
assert(Op1.getSubReg() == 0);
unsigned PState = getRegState(Op1);
@@ -1349,7 +1371,8 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
static const CrashPseudoSourceValue CrashPSV(*this);
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(&CrashPSV),
- MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 8, 1);
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 8,
+ Align(1));
BuildMI(MBB, MI, DL, get(Hexagon::PS_loadrdabs), Hexagon::D13)
.addImm(0xBADC0FEE) // Misaligned load.
.addMemOperand(MMO);
@@ -1707,6 +1730,10 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
if (MI.getDesc().isTerminator() || MI.isPosition())
return true;
+ // INLINEASM_BR can jump to another block
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+ return true;
+
if (MI.isInlineAsm() && !ScheduleInlineAsm)
return true;
@@ -1735,7 +1762,7 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
strlen(MAI.getSeparatorString())) == 0)
atInsnStart = true;
- if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ if (atInsnStart && !isSpace(static_cast<unsigned char>(*Str))) {
Length += MaxInstLength;
atInsnStart = false;
}
@@ -1762,8 +1789,8 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
-bool HexagonInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &Mask,
+bool HexagonInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &Mask,
int &Value) const {
unsigned Opc = MI.getOpcode();
@@ -2940,12 +2967,16 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
}
/// Get the base register and byte offset of a load/store instr.
-bool HexagonInstrInfo::getMemOperandWithOffset(
- const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
+bool HexagonInstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
const TargetRegisterInfo *TRI) const {
- unsigned AccessSize = 0;
- BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
- return BaseOp != nullptr && BaseOp->isReg();
+ OffsetIsScalable = false;
+ const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, Width);
+ if (!BaseOp || !BaseOp->isReg())
+ return false;
+ BaseOps.push_back(BaseOp);
+ return true;
}
/// Can these instructions execute at the same time in a bundle.
@@ -3403,6 +3434,64 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA,
: Hexagon::J4_cmpeqi_tp1_jump_nt;
}
+// Returns -1 if there is no opcode found.
+int HexagonInstrInfo::getDuplexOpcode(const MachineInstr &MI,
+ bool ForBigCore) const {
+ // Static table to switch the opcodes across Tiny Core and Big Core.
+ // dup_ opcodes are Big core opcodes.
+ // NOTE: There are special instructions that need to handled later.
+ // L4_return* instructions, they will only occupy SLOT0 (on big core too).
+ // PS_jmpret - This pseudo translates to J2_jumpr which occupies only SLOT2.
+ // The compiler need to base the root instruction to L6_return_map_to_raw
+ // which can go any slot.
+ static const std::map<unsigned, unsigned> DupMap = {
+ {Hexagon::A2_add, Hexagon::dup_A2_add},
+ {Hexagon::A2_addi, Hexagon::dup_A2_addi},
+ {Hexagon::A2_andir, Hexagon::dup_A2_andir},
+ {Hexagon::A2_combineii, Hexagon::dup_A2_combineii},
+ {Hexagon::A2_sxtb, Hexagon::dup_A2_sxtb},
+ {Hexagon::A2_sxth, Hexagon::dup_A2_sxth},
+ {Hexagon::A2_tfr, Hexagon::dup_A2_tfr},
+ {Hexagon::A2_tfrsi, Hexagon::dup_A2_tfrsi},
+ {Hexagon::A2_zxtb, Hexagon::dup_A2_zxtb},
+ {Hexagon::A2_zxth, Hexagon::dup_A2_zxth},
+ {Hexagon::A4_combineii, Hexagon::dup_A4_combineii},
+ {Hexagon::A4_combineir, Hexagon::dup_A4_combineir},
+ {Hexagon::A4_combineri, Hexagon::dup_A4_combineri},
+ {Hexagon::C2_cmoveif, Hexagon::dup_C2_cmoveif},
+ {Hexagon::C2_cmoveit, Hexagon::dup_C2_cmoveit},
+ {Hexagon::C2_cmovenewif, Hexagon::dup_C2_cmovenewif},
+ {Hexagon::C2_cmovenewit, Hexagon::dup_C2_cmovenewit},
+ {Hexagon::C2_cmpeqi, Hexagon::dup_C2_cmpeqi},
+ {Hexagon::L2_deallocframe, Hexagon::dup_L2_deallocframe},
+ {Hexagon::L2_loadrb_io, Hexagon::dup_L2_loadrb_io},
+ {Hexagon::L2_loadrd_io, Hexagon::dup_L2_loadrd_io},
+ {Hexagon::L2_loadrh_io, Hexagon::dup_L2_loadrh_io},
+ {Hexagon::L2_loadri_io, Hexagon::dup_L2_loadri_io},
+ {Hexagon::L2_loadrub_io, Hexagon::dup_L2_loadrub_io},
+ {Hexagon::L2_loadruh_io, Hexagon::dup_L2_loadruh_io},
+ {Hexagon::S2_allocframe, Hexagon::dup_S2_allocframe},
+ {Hexagon::S2_storerb_io, Hexagon::dup_S2_storerb_io},
+ {Hexagon::S2_storerd_io, Hexagon::dup_S2_storerd_io},
+ {Hexagon::S2_storerh_io, Hexagon::dup_S2_storerh_io},
+ {Hexagon::S2_storeri_io, Hexagon::dup_S2_storeri_io},
+ {Hexagon::S4_storeirb_io, Hexagon::dup_S4_storeirb_io},
+ {Hexagon::S4_storeiri_io, Hexagon::dup_S4_storeiri_io},
+ };
+ unsigned OpNum = MI.getOpcode();
+ // Conversion to Big core.
+ if (ForBigCore) {
+ auto Iter = DupMap.find(OpNum);
+ if (Iter != DupMap.end())
+ return Iter->second;
+ } else { // Conversion to Tiny core.
+ for (auto Iter = DupMap.begin(), End = DupMap.end(); Iter != End; ++Iter)
+ if (Iter->second == OpNum)
+ return Iter->first;
+ }
+ return -1;
+}
+
int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
enum Hexagon::PredSense inPredSense;
inPredSense = invertPredicate ? Hexagon::PredSense_false :
@@ -3735,6 +3824,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
// Rd = memw(Rs+#u4:2)
// Rd = memub(Rs+#u4:0)
case Hexagon::L2_loadri_io:
+ case Hexagon::dup_L2_loadri_io:
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
// Special case this one from Group L2.
@@ -3753,6 +3843,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
}
break;
case Hexagon::L2_loadrub_io:
+ case Hexagon::dup_L2_loadrub_io:
// Rd = memub(Rs+#u4:0)
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
@@ -3772,6 +3863,8 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
// [if ([!]p0[.new])] jumpr r31
case Hexagon::L2_loadrh_io:
case Hexagon::L2_loadruh_io:
+ case Hexagon::dup_L2_loadrh_io:
+ case Hexagon::dup_L2_loadruh_io:
// Rd = memh/memuh(Rs+#u3:1)
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
@@ -3781,6 +3874,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_L2;
break;
case Hexagon::L2_loadrb_io:
+ case Hexagon::dup_L2_loadrb_io:
// Rd = memb(Rs+#u3:0)
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
@@ -3790,6 +3884,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_L2;
break;
case Hexagon::L2_loadrd_io:
+ case Hexagon::dup_L2_loadrd_io:
// Rdd = memd(r29+#u5:3)
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
@@ -3806,6 +3901,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:
case Hexagon::L4_return:
case Hexagon::L2_deallocframe:
+ case Hexagon::dup_L2_deallocframe:
return HexagonII::HSIG_L2;
case Hexagon::EH_RETURN_JMPR:
case Hexagon::PS_jmpret:
@@ -3825,6 +3921,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
case Hexagon::SL2_jumpr31_t:
case Hexagon::SL2_jumpr31_f:
case Hexagon::SL2_jumpr31_tnew:
+ case Hexagon::SL2_jumpr31_fnew:
DstReg = MI.getOperand(1).getReg();
SrcReg = MI.getOperand(0).getReg();
// [if ([!]p0[.new])] jumpr r31
@@ -3850,6 +3947,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
// memw(Rs+#u4:2) = Rt
// memb(Rs+#u4:0) = Rt
case Hexagon::S2_storeri_io:
+ case Hexagon::dup_S2_storeri_io:
// Special case this one from Group S2.
// memw(r29+#u5:2) = Rt
Src1Reg = MI.getOperand(0).getReg();
@@ -3866,6 +3964,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_S1;
break;
case Hexagon::S2_storerb_io:
+ case Hexagon::dup_S2_storerb_io:
// memb(Rs+#u4:0) = Rt
Src1Reg = MI.getOperand(0).getReg();
Src2Reg = MI.getOperand(2).getReg();
@@ -3883,6 +3982,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
// memb(Rs+#u4) = #U1
// allocframe(#u5:3)
case Hexagon::S2_storerh_io:
+ case Hexagon::dup_S2_storerh_io:
// memh(Rs+#u3:1) = Rt
Src1Reg = MI.getOperand(0).getReg();
Src2Reg = MI.getOperand(2).getReg();
@@ -3892,6 +3992,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_S1;
break;
case Hexagon::S2_storerd_io:
+ case Hexagon::dup_S2_storerd_io:
// memd(r29+#s6:3) = Rtt
Src1Reg = MI.getOperand(0).getReg();
Src2Reg = MI.getOperand(2).getReg();
@@ -3902,6 +4003,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_S2;
break;
case Hexagon::S4_storeiri_io:
+ case Hexagon::dup_S4_storeiri_io:
// memw(Rs+#u4:2) = #U1
Src1Reg = MI.getOperand(0).getReg();
if (isIntRegForSubInst(Src1Reg) && MI.getOperand(1).isImm() &&
@@ -3910,6 +4012,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_S2;
break;
case Hexagon::S4_storeirb_io:
+ case Hexagon::dup_S4_storeirb_io:
// memb(Rs+#u4) = #U1
Src1Reg = MI.getOperand(0).getReg();
if (isIntRegForSubInst(Src1Reg) &&
@@ -3918,6 +4021,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_S2;
break;
case Hexagon::S2_allocframe:
+ case Hexagon::dup_S2_allocframe:
if (MI.getOperand(2).isImm() &&
isShiftedUInt<5,3>(MI.getOperand(2).getImm()))
return HexagonII::HSIG_S1;
@@ -3941,6 +4045,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
// Rd = sxth/sxtb/zxtb/zxth(Rs)
// Rd = and(Rs,#1)
case Hexagon::A2_addi:
+ case Hexagon::dup_A2_addi:
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
if (isIntRegForSubInst(DstReg)) {
@@ -3962,6 +4067,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
}
break;
case Hexagon::A2_add:
+ case Hexagon::dup_A2_add:
// Rx = add(Rx,Rs)
DstReg = MI.getOperand(0).getReg();
Src1Reg = MI.getOperand(1).getReg();
@@ -3971,6 +4077,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_A;
break;
case Hexagon::A2_andir:
+ case Hexagon::dup_A2_andir:
// Same as zxtb.
// Rd16=and(Rs16,#255)
// Rd16=and(Rs16,#1)
@@ -3983,6 +4090,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_A;
break;
case Hexagon::A2_tfr:
+ case Hexagon::dup_A2_tfr:
// Rd = Rs
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
@@ -3990,6 +4098,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_A;
break;
case Hexagon::A2_tfrsi:
+ case Hexagon::dup_A2_tfrsi:
// Rd = #u6
// Do not test for #u6 size since the const is getting extended
// regardless and compound could be formed.
@@ -4002,6 +4111,10 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
case Hexagon::C2_cmovenewit:
case Hexagon::C2_cmoveif:
case Hexagon::C2_cmovenewif:
+ case Hexagon::dup_C2_cmoveit:
+ case Hexagon::dup_C2_cmovenewit:
+ case Hexagon::dup_C2_cmoveif:
+ case Hexagon::dup_C2_cmovenewif:
// if ([!]P0[.new]) Rd = #0
// Actual form:
// %r16 = C2_cmovenewit internal %p0, 0, implicit undef %r16;
@@ -4013,6 +4126,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_A;
break;
case Hexagon::C2_cmpeqi:
+ case Hexagon::dup_C2_cmpeqi:
// P0 = cmp.eq(Rs,#u2)
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
@@ -4023,6 +4137,8 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
break;
case Hexagon::A2_combineii:
case Hexagon::A4_combineii:
+ case Hexagon::dup_A2_combineii:
+ case Hexagon::dup_A4_combineii:
// Rdd = combine(#u2,#U2)
DstReg = MI.getOperand(0).getReg();
if (isDblRegForSubInst(DstReg, HRI) &&
@@ -4035,6 +4151,8 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_A;
break;
case Hexagon::A4_combineri:
+ case Hexagon::dup_A4_combineri:
+ // Rdd = combine(Rs,#0)
// Rdd = combine(Rs,#0)
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
@@ -4044,6 +4162,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_A;
break;
case Hexagon::A4_combineir:
+ case Hexagon::dup_A4_combineir:
// Rdd = combine(#0,Rs)
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(2).getReg();
@@ -4056,6 +4175,10 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
case Hexagon::A2_sxth:
case Hexagon::A2_zxtb:
case Hexagon::A2_zxth:
+ case Hexagon::dup_A2_sxtb:
+ case Hexagon::dup_A2_sxth:
+ case Hexagon::dup_A2_zxtb:
+ case Hexagon::dup_A2_zxth:
// Rd = sxth/sxtb/zxtb/zxth(Rs)
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
@@ -4199,6 +4322,61 @@ bool HexagonInstrInfo::isAddrModeWithOffset(const MachineInstr &MI) const {
addrMode == HexagonII::BaseLongOffset);
}
+bool HexagonInstrInfo::isPureSlot0(const MachineInstr &MI) const {
+ // Workaround for the Global Scheduler. Sometimes, it creates
+ // A4_ext as a Pseudo instruction and calls this function to see if
+ // it can be added to an existing bundle. Since the instruction doesn't
+ // belong to any BB yet, we can't use getUnits API.
+ if (MI.getOpcode() == Hexagon::A4_ext)
+ return false;
+
+ unsigned FuncUnits = getUnits(MI);
+ return HexagonFUnits::isSlot0Only(FuncUnits);
+}
+
+bool HexagonInstrInfo::isRestrictNoSlot1Store(const MachineInstr &MI) const {
+ const uint64_t F = MI.getDesc().TSFlags;
+ return ((F >> HexagonII::RestrictNoSlot1StorePos) &
+ HexagonII::RestrictNoSlot1StoreMask);
+}
+
+void HexagonInstrInfo::changeDuplexOpcode(MachineBasicBlock::instr_iterator MII,
+ bool ToBigInstrs) const {
+ int Opcode = -1;
+ if (ToBigInstrs) { // To BigCore Instr.
+ // Check if the instruction can form a Duplex.
+ if (getDuplexCandidateGroup(*MII))
+ // Get the opcode marked "dup_*" tag.
+ Opcode = getDuplexOpcode(*MII, ToBigInstrs);
+ } else // To TinyCore Instr.
+ Opcode = getDuplexOpcode(*MII, ToBigInstrs);
+
+ // Change the opcode of the instruction.
+ if (Opcode >= 0)
+ MII->setDesc(get(Opcode));
+}
+
+// This function is used to translate instructions to facilitate generating
+// Duplexes on TinyCore.
+void HexagonInstrInfo::translateInstrsForDup(MachineFunction &MF,
+ bool ToBigInstrs) const {
+ for (auto &MB : MF)
+ for (MachineBasicBlock::instr_iterator Instr = MB.instr_begin(),
+ End = MB.instr_end();
+ Instr != End; ++Instr)
+ changeDuplexOpcode(Instr, ToBigInstrs);
+}
+
+// This is a specialized form of above function.
+void HexagonInstrInfo::translateInstrsForDup(
+ MachineBasicBlock::instr_iterator MII, bool ToBigInstrs) const {
+ MachineBasicBlock *MBB = MII->getParent();
+ while ((MII != MBB->instr_end()) && MII->isInsideBundle()) {
+ changeDuplexOpcode(MII, ToBigInstrs);
+ ++MII;
+ }
+}
+
unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const {
using namespace HexagonII;
@@ -4328,7 +4506,7 @@ uint64_t HexagonInstrInfo::getType(const MachineInstr &MI) const {
return (F >> HexagonII::TypePos) & HexagonII::TypeMask;
}
-unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const {
+InstrStage::FuncUnits HexagonInstrInfo::getUnits(const MachineInstr &MI) const {
const InstrItineraryData &II = *Subtarget.getInstrItineraryData();
const InstrStage &IS = *II.beginStage(MI.getDesc().getSchedClass());
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 676f6f0a2a8c..847b9a672891 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -109,19 +109,19 @@ public:
bool AllowModify) const override;
/// Remove the branching code at the end of the specific MBB.
- /// This is only invoked in cases where AnalyzeBranch returns success. It
+ /// This is only invoked in cases where analyzeBranch returns success. It
/// returns the number of instructions that were removed.
unsigned removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved = nullptr) const override;
/// Insert branch code into the end of the specified MachineBasicBlock.
/// The operands to this method are the same as those
- /// returned by AnalyzeBranch. This is only invoked in cases where
- /// AnalyzeBranch returns success. It returns the number of instructions
+ /// returned by analyzeBranch. This is only invoked in cases where
+ /// analyzeBranch returns success. It returns the number of instructions
/// inserted.
///
/// It is also invoked by tail merging to add unconditional branches in
- /// cases where AnalyzeBranch doesn't apply because there was no original
+ /// cases where analyzeBranch doesn't apply because there was no original
/// branch to analyze. At least this much must be implemented, else tail
/// merging needs to be disabled.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
@@ -182,7 +182,7 @@ public:
/// is true, the register operand is the last use and must be marked kill.
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
@@ -191,7 +191,7 @@ public:
/// machine basic block before the specified machine instruction.
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
@@ -204,10 +204,11 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
/// Get the base register and byte offset of a load/store instr.
- bool getMemOperandWithOffset(const MachineInstr &LdSt,
- const MachineOperand *&BaseOp,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt,
+ SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
/// Reverses the branch condition of the specified condition list,
/// returning false on success and true if it cannot be reversed.
@@ -268,8 +269,8 @@ public:
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
- bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &Mask, int &Value) const override;
+ bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &Mask, int &Value) const override;
/// Compute the instruction latency of a given instruction.
/// If the instruction has higher cost when predicated, it's returned via
@@ -341,10 +342,10 @@ public:
MachineBasicBlock *TargetBB,
SmallPtrSet<MachineBasicBlock *, 8> &Visited) const;
- bool isBaseImmOffset(const MachineInstr &MI) const;
bool isAbsoluteSet(const MachineInstr &MI) const;
bool isAccumulator(const MachineInstr &MI) const;
bool isAddrModeWithOffset(const MachineInstr &MI) const;
+ bool isBaseImmOffset(const MachineInstr &MI) const;
bool isComplex(const MachineInstr &MI) const;
bool isCompoundBranchInstr(const MachineInstr &MI) const;
bool isConstExtended(const MachineInstr &MI) const;
@@ -387,6 +388,8 @@ public:
bool isPredicated(unsigned Opcode) const;
bool isPredicateLate(unsigned Opcode) const;
bool isPredictedTaken(unsigned Opcode) const;
+ bool isPureSlot0(const MachineInstr &MI) const;
+ bool isRestrictNoSlot1Store(const MachineInstr &MI) const;
bool isSaveCalleeSavedRegsCall(const MachineInstr &MI) const;
bool isSignExtendingLoad(const MachineInstr &MI) const;
bool isSolo(const MachineInstr &MI) const;
@@ -435,6 +438,7 @@ public:
getCompoundCandidateGroup(const MachineInstr &MI) const;
unsigned getCompoundOpcode(const MachineInstr &GA,
const MachineInstr &GB) const;
+ int getDuplexOpcode(const MachineInstr &MI, bool ForBigCore = true) const;
int getCondOpcode(int Opc, bool sense) const;
int getDotCurOp(const MachineInstr &MI) const;
int getNonDotCurOp(const MachineInstr &MI) const;
@@ -461,7 +465,7 @@ public:
short getRegForm(const MachineInstr &MI) const;
unsigned getSize(const MachineInstr &MI) const;
uint64_t getType(const MachineInstr &MI) const;
- unsigned getUnits(const MachineInstr &MI) const;
+ InstrStage::FuncUnits getUnits(const MachineInstr &MI) const;
MachineBasicBlock::instr_iterator expandVGatherPseudo(MachineInstr &MI) const;
@@ -480,6 +484,17 @@ public:
void setBundleNoShuf(MachineBasicBlock::instr_iterator MIB) const;
bool getBundleNoShuf(const MachineInstr &MIB) const;
+
+ // When TinyCore with Duplexes is enabled, this function is used to translate
+ // tiny-instructions to big-instructions and vice versa to get the slot
+ // consumption.
+ void changeDuplexOpcode(MachineBasicBlock::instr_iterator MII,
+ bool ToBigInstrs) const;
+ void translateInstrsForDup(MachineFunction &MF,
+ bool ToBigInstrs = true) const;
+ void translateInstrsForDup(MachineBasicBlock::instr_iterator MII,
+ bool ToBigInstrs) const;
+
// Addressing mode relations.
short changeAddrMode_abs_io(short Opc) const;
short changeAddrMode_io_abs(short Opc) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index 8ae55b207188..10d0261a95dd 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -205,12 +205,12 @@ def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred_timm, I32>;
multiclass MaskedStore <InstHexagon MI, Intrinsic IntID> {
def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),
- (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>,
+ (MI HvxQR:$src1, IntRegs:$src2, 0, HvxVR:$src3)>,
Requires<[UseHVX]>;
def : Pat<(!cast<Intrinsic>(IntID#"_128B") HvxQR:$src1, IntRegs:$src2,
HvxVR:$src3),
- (MI HvxQR:$src1, IntRegs:$src2, #0, HvxVR:$src3)>,
+ (MI HvxQR:$src1, IntRegs:$src2, 0, HvxVR:$src3)>,
Requires<[UseHVX]>;
}
@@ -236,6 +236,8 @@ def: T_R_pat<Y2_dczeroa, int_hexagon_Y2_dczeroa>;
def: T_RR_pat<Y4_l2fetch, int_hexagon_Y4_l2fetch>;
def: T_RP_pat<Y5_l2fetch, int_hexagon_Y5_l2fetch>;
+def: Pat<(int_hexagon_Y2_dcfetch I32:$Rt), (Y2_dcfetchbo I32:$Rt, 0)>;
+
//
// Patterns for optimizing code generations for HVX.
@@ -277,76 +279,6 @@ def : Pat <(v32i32 (int_hexagon_V6_hi_128B (v64i32 HvxWR:$src1))),
Requires<[UseHVX]>;
}
-def : Pat <(v512i1 (bitconvert (v16i32 HvxVR:$src1))),
- (v512i1 (V6_vandvrt (v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))),
- (v512i1 (V6_vandvrt (v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v512i1 (bitconvert (v64i8 HvxVR:$src1))),
- (v512i1 (V6_vandvrt (v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))),
- (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))),
- (v32i16 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v64i8 (bitconvert (v512i1 HvxQR:$src1))),
- (v64i8 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (bitconvert (v128i8 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))),
- (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))),
- (v64i16 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v128i8 (bitconvert (v1024i1 HvxQR:$src1))),
- (v128i8 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-let AddedComplexity = 140 in {
-def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)),
- (V6_vS32b_ai IntRegs:$addr, 0,
- (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
- (v512i1 (V6_vandvrt
- (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-
-def : Pat <(store (v1024i1 HvxQR:$src1), (i32 IntRegs:$addr)),
- (V6_vS32b_ai IntRegs:$addr, 0,
- (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101))))>,
- Requires<[UseHVX]>;
-
-def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
- (v1024i1 (V6_vandvrt
- (v32i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
- Requires<[UseHVX]>;
-}
-
def: Pat<(v64i16 (trunc v64i32:$Vdd)),
(v64i16 (V6_vpackwh_sat
(v32i32 (V6_hi HvxWR:$Vdd)),
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
index a60c80beb5d6..1245ee7974b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -25,59 +25,59 @@ def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 HvxWR:$src1))),
(v32i32 (EXTRACT_SUBREG (v64i32 HvxWR:$src1), vsub_hi)) >;
}
-def : Pat <(v512i1 (bitconvert (v16i32 HvxVR:$src1))),
- (v512i1 (V6_vandvrt(v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i1 (bitconvert (v16i32 HvxVR:$src1))),
+ (v64i1 (V6_vandvrt(v16i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))),
- (v512i1 (V6_vandvrt(v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i1 (bitconvert (v32i16 HvxVR:$src1))),
+ (v64i1 (V6_vandvrt(v32i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v512i1 (bitconvert (v64i8 HvxVR:$src1))),
- (v512i1 (V6_vandvrt(v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i1 (bitconvert (v64i8 HvxVR:$src1))),
+ (v64i1 (V6_vandvrt(v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))),
- (v16i32 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v16i32 (bitconvert (v64i1 HvxQR:$src1))),
+ (v16i32 (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))),
- (v32i16 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v32i16 (bitconvert (v64i1 HvxQR:$src1))),
+ (v32i16 (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v64i8 (bitconvert (v512i1 HvxQR:$src1))),
- (v64i8 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i8 (bitconvert (v64i1 HvxQR:$src1))),
+ (v64i8 (V6_vandqrt(v64i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i1 (bitconvert (v32i32 HvxVR:$src1))),
+ (v128i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i1 (bitconvert (v64i16 HvxVR:$src1))),
+ (v128i1 (V6_vandvrt (v64i16 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v1024i1 (bitconvert (v128i8 HvxVR:$src1))),
- (v1024i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i1 (bitconvert (v128i8 HvxVR:$src1))),
+ (v128i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))),
- (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v32i32 (bitconvert (v128i1 HvxQR:$src1))),
+ (v32i32 (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))),
- (v64i16 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v64i16 (bitconvert (v128i1 HvxQR:$src1))),
+ (v64i16 (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
-def : Pat <(v128i8 (bitconvert (v1024i1 HvxQR:$src1))),
- (v128i8 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
+def : Pat <(v128i8 (bitconvert (v128i1 HvxQR:$src1))),
+ (v128i8 (V6_vandqrt (v128i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>;
let AddedComplexity = 140 in {
-def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+def : Pat <(store (v64i1 HvxQR:$src1), (i32 IntRegs:$addr)),
(V6_vS32b_ai IntRegs:$addr, 0,
- (v16i32 (V6_vandqrt (v512i1 HvxQR:$src1),
+ (v16i32 (V6_vandqrt (v64i1 HvxQR:$src1),
(A2_tfrsi 0x01010101))))>;
-def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
- (v512i1 (V6_vandvrt
+def : Pat <(v64i1 (load (i32 IntRegs:$addr))),
+ (v64i1 (V6_vandvrt
(v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>;
-def : Pat <(store (v1024i1 HvxQR:$src1), (i32 IntRegs:$addr)),
+def : Pat <(store (v128i1 HvxQR:$src1), (i32 IntRegs:$addr)),
(V6_vS32b_ai IntRegs:$addr, 0,
- (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1),
+ (v32i32 (V6_vandqrt (v128i1 HvxQR:$src1),
(A2_tfrsi 0x01010101))))>;
-def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
- (v1024i1 (V6_vandvrt
+def : Pat <(v128i1 (load (i32 IntRegs:$addr))),
+ (v128i1 (V6_vandvrt
(v32i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index ffaf71e23690..2c1e0cadd9ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -20,7 +20,6 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -57,6 +56,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <algorithm>
#include <array>
#include <cassert>
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
index d1a153920e5e..188d91355a35 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -51,7 +51,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RelocationType = MCSymbolRefExpr::VK_None;
break;
case HexagonII::MO_PCREL:
- RelocationType = MCSymbolRefExpr::VK_Hexagon_PCREL;
+ RelocationType = MCSymbolRefExpr::VK_PCREL;
break;
case HexagonII::MO_GOT:
RelocationType = MCSymbolRefExpr::VK_GOT;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 2961e16cc9dc..89ef5c2a891d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -30,6 +30,9 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
unsigned StackAlignBaseVReg = 0; // Aligned-stack base register (virtual)
unsigned StackAlignBasePhysReg = 0; // (physical)
int VarArgsFrameIndex;
+ int RegSavedAreaStartFrameIndex;
+ int FirstNamedArgFrameIndex;
+ int LastNamedArgFrameIndex;
bool HasClobberLR = false;
bool HasEHReturn = false;
std::map<const MachineInstr*, unsigned> PacketInfo;
@@ -46,6 +49,15 @@ public:
void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+ void setRegSavedAreaStartFrameIndex(int v) { RegSavedAreaStartFrameIndex = v;}
+ int getRegSavedAreaStartFrameIndex() { return RegSavedAreaStartFrameIndex; }
+
+ void setFirstNamedArgFrameIndex(int v) { FirstNamedArgFrameIndex = v; }
+ int getFirstNamedArgFrameIndex() { return FirstNamedArgFrameIndex; }
+
+ void setLastNamedArgFrameIndex(int v) { LastNamedArgFrameIndex = v; }
+ int getLastNamedArgFrameIndex() { return LastNamedArgFrameIndex; }
+
void setStartPacket(MachineInstr* MI) {
PacketInfo[MI] |= Hexagon::StartPacket;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index e3579dfa9ba9..8dc1113194a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -291,7 +291,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
// at machine code level, we don't need this, but if we decide
// to move new value jump prior to RA, we would be needing this.
MachineRegisterInfo &MRI = MF.getRegInfo();
- if (secondReg && !Register::isPhysicalRegister(cmpOp2)) {
+ if (!Register::isPhysicalRegister(cmpOp2)) {
MachineInstr *def = MRI.getVRegDef(cmpOp2);
if (def->getOpcode() == TargetOpcode::COPY)
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index f1fe51f5e54f..c718e5f2d9fb 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -561,6 +561,7 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
MIB.add(ImmOp);
MIB.add(OldMI->getOperand(3));
OpStart = 4;
+ Changed = true;
} else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) {
short NewOpCode = HII->changeAddrMode_io_abs(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
@@ -570,10 +571,8 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
MIB.addGlobalAddress(GV, Offset, ImmOp.getTargetFlags());
MIB.add(OldMI->getOperand(2));
OpStart = 3;
+ Changed = true;
}
- Changed = true;
- LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
- LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
} else if (ImmOpNum == 1 && OldMI->getOperand(2).getImm() == 0) {
short NewOpCode = HII->changeAddrMode_rr_io(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
@@ -582,12 +581,14 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
MIB.add(ImmOp);
OpStart = 3;
Changed = true;
+ }
+ if (Changed) {
LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
- }
- if (Changed)
+
for (unsigned i = OpStart; i < OpEnd; ++i)
MIB.add(OldMI->getOperand(i));
+ }
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
index cf711058823c..cc10627955fb 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -362,6 +362,16 @@ def Rol: pf2<rotl>;
// --(1) Immediate -------------------------------------------------------
//
+def Imm64Lo: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(int32_t (N->getSExtValue()),
+ SDLoc(N), MVT::i32);
+}]>;
+def Imm64Hi: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(int32_t (N->getSExtValue()>>32),
+ SDLoc(N), MVT::i32);
+}]>;
+
+
def SDTHexagonCONST32
: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<0>]>;
@@ -389,7 +399,10 @@ def: Pat<(HexagonCP tconstpool:$A), (A2_tfrsi imm:$A)>;
def: Pat<(i1 0), (PS_false)>;
def: Pat<(i1 1), (PS_true)>;
-def: Pat<(i64 imm:$v), (CONST64 imm:$v)>;
+def: Pat<(i64 imm:$v), (CONST64 imm:$v)>,
+ Requires<[UseSmallData,NotOptTinyCore]>;
+def: Pat<(i64 imm:$v),
+ (Combinew (A2_tfrsi (Imm64Hi $v)), (A2_tfrsi (Imm64Lo $v)))>;
def ftoi : SDNodeXForm<fpimm, [{
APInt I = N->getValueAPF().bitcastToAPInt();
@@ -923,6 +936,13 @@ let AddedComplexity = 100 in {
defm: MinMax_pats<F2_sfmax, F2_sfmin, select, setole, i1, F32>;
}
+let AddedComplexity = 100, Predicates = [HasV67] in {
+ defm: MinMax_pats<F2_dfmin, F2_dfmax, select, setogt, i1, F64>;
+ defm: MinMax_pats<F2_dfmin, F2_dfmax, select, setoge, i1, F64>;
+ defm: MinMax_pats<F2_dfmax, F2_dfmin, select, setolt, i1, F64>;
+ defm: MinMax_pats<F2_dfmax, F2_dfmin, select, setole, i1, F64>;
+}
+
defm: MinMax_pats<A2_vminb, A2_vmaxb, vselect, setgt, v8i1, V8I8>;
defm: MinMax_pats<A2_vminb, A2_vmaxb, vselect, setge, v8i1, V8I8>;
defm: MinMax_pats<A2_vminh, A2_vmaxh, vselect, setgt, v4i1, V4I16>;
@@ -1075,7 +1095,7 @@ def Divu64_8: SDNodeXForm<imm, [{
// Special cases:
let AddedComplexity = 100 in {
def: Pat<(fshl I32:$Rs, I32:$Rt, (i32 16)),
- (A2_combine_hl I32:$Rs, I32:$Rt)>;
+ (A2_combine_lh I32:$Rs, I32:$Rt)>;
def: Pat<(fshl I64:$Rs, I64:$Rt, IsMul8_U3:$S),
(S2_valignib I64:$Rs, I64:$Rt, (Divu64_8 $S))>;
}
@@ -1109,7 +1129,7 @@ def FShr64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
// Special cases:
let AddedComplexity = 100 in {
def: Pat<(fshr I32:$Rs, I32:$Rt, (i32 16)),
- (A2_combine_hl I32:$Rs, I32:$Rt)>;
+ (A2_combine_lh I32:$Rs, I32:$Rt)>;
def: Pat<(fshr I64:$Rs, I64:$Rt, IsMul8_U3:$S),
(S2_valignib I64:$Rs, I64:$Rt, (Divu8 $S))>;
}
@@ -1231,7 +1251,7 @@ class OpshIRI_pat<InstHexagon MI, PatFrag Op, PatFrag ShOp,
: Pat<(Op anyimm:$u8, (ShOp RegPred:$Rs, ImmPred:$U5)),
(MI anyimm:$u8, RegPred:$Rs, imm:$U5)>;
-let AddedComplexity = 200 in {
+let AddedComplexity = 200, Predicates = [UseCompound] in {
def: OpshIRI_pat<S4_addi_asl_ri, Add, Su<Shl>, I32, u5_0ImmPred>;
def: OpshIRI_pat<S4_addi_lsr_ri, Add, Su<Srl>, I32, u5_0ImmPred>;
def: OpshIRI_pat<S4_subi_asl_ri, Sub, Su<Shl>, I32, u5_0ImmPred>;
@@ -1408,6 +1428,26 @@ let Predicates = [HasV66] in {
def: OpR_RR_pat<F2_dfsub, pf2<fsub>, f64, F64>;
}
+def DfMpy: OutPatFrag<(ops node:$Rs, node:$Rt),
+ (F2_dfmpyhh
+ (F2_dfmpylh
+ (F2_dfmpylh
+ (F2_dfmpyll $Rs, $Rt),
+ $Rs, $Rt),
+ $Rt, $Rs),
+ $Rs, $Rt)>;
+
+let Predicates = [HasV67,UseUnsafeMath], AddedComplexity = 50 in {
+ def: Pat<(fmul F64:$Rs, F64:$Rt), (DfMpy $Rs, $Rt)>;
+}
+let Predicates = [HasV67] in {
+ def: OpR_RR_pat<F2_dfmin, pf2<fminnum>, f64, F64>;
+ def: OpR_RR_pat<F2_dfmax, pf2<fmaxnum>, f64, F64>;
+
+ def: Pat<(fmul F64:$Rs, F64:$Rt), (DfMpy (F2_dfmpyfix $Rs, $Rt),
+ (F2_dfmpyfix $Rt, $Rs))>;
+}
+
// In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add,
// over add-add with individual multiplies as inputs.
let AddedComplexity = 10 in {
@@ -1510,7 +1550,7 @@ let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor.
// S4_addaddi and S4_subaddi don't have tied operands, so give them
// a bit of preference.
-let AddedComplexity = 30 in {
+let AddedComplexity = 30, Predicates = [UseCompound] in {
def: Pat<(add I32:$Rs, (Su<Add> I32:$Ru, anyimm:$s6)),
(S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>;
def: Pat<(add anyimm:$s6, (Su<Add> I32:$Rs, I32:$Ru)),
@@ -1523,8 +1563,10 @@ let AddedComplexity = 30 in {
(S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>;
}
+let Predicates = [UseCompound] in
def: Pat<(or I32:$Ru, (Su<And> I32:$Rx, anyimm:$s10)),
(S4_or_andix IntRegs:$Ru, IntRegs:$Rx, imm:$s10)>;
+
def: Pat<(or I32:$Rx, (Su<And> I32:$Rs, anyimm:$s10)),
(S4_or_andi IntRegs:$Rx, IntRegs:$Rs, imm:$s10)>;
def: Pat<(or I32:$Rx, (Su<Or> I32:$Rs, anyimm:$s10)),
@@ -1625,7 +1667,7 @@ def : Pat <(mulhs I64:$Rss, I64:$Rtt),
// will put the immediate addend into a register, while these instructions will
// use it directly. Such a construct does not appear in the middle of a gep,
// where M2_macsip would be preferable.
-let AddedComplexity = 20 in {
+let AddedComplexity = 20, Predicates = [UseCompound] in {
def: Pat<(add (Su<Mul> I32:$Rs, u6_0ImmPred:$U6), anyimm:$u6),
(M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
def: Pat<(add (Su<Mul> I32:$Rs, I32:$Rt), anyimm:$u6),
@@ -1633,13 +1675,14 @@ let AddedComplexity = 20 in {
}
// Keep these instructions less preferable to M2_macsip/M2_macsin.
-def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, u6_2ImmPred:$u6_2)),
- (M4_mpyri_addr_u2 IntRegs:$Ru, imm:$u6_2, IntRegs:$Rs)>;
-def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, anyimm:$u6)),
- (M4_mpyri_addr IntRegs:$Ru, IntRegs:$Rs, imm:$u6)>;
-def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
- (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;
-
+let Predicates = [UseCompound] in {
+ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, u6_2ImmPred:$u6_2)),
+ (M4_mpyri_addr_u2 IntRegs:$Ru, imm:$u6_2, IntRegs:$Rs)>;
+ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, anyimm:$u6)),
+ (M4_mpyri_addr IntRegs:$Ru, IntRegs:$Rs, imm:$u6)>;
+ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
+ (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;
+}
def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
(F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
@@ -1648,7 +1691,6 @@ def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
(F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
-
def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
(PS_vmulw V2I32:$Rs, V2I32:$Rt)>;
def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index 0ccfe64ad1e5..d0b02f035d1e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -45,7 +45,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/PassSupport.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPseudo.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPseudo.td
index d2b6d64e3c92..20c939577586 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPseudo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPseudo.td
@@ -112,7 +112,7 @@ let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
opExtendable = 0, hasSideEffects = 0 in
class LOOP_iBase<string mnemonic, InstHexagon rootInst>
: InstHexagon <(outs), (ins b30_2Imm:$offset, u10_0Imm:$src2),
- #mnemonic#"($offset,#$src2)",
+ mnemonic#"($offset,#$src2)",
[], "", rootInst.Itinerary, rootInst.Type>, OpcodeHexagon {
bits<9> offset;
bits<10> src2;
@@ -132,7 +132,7 @@ let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
opExtendable = 0, hasSideEffects = 0 in
class LOOP_rBase<string mnemonic, InstHexagon rootInst>
: InstHexagon<(outs), (ins b30_2Imm:$offset, IntRegs:$src2),
- #mnemonic#"($offset,$src2)",
+ mnemonic#"($offset,$src2)",
[], "", rootInst.Itinerary, rootInst.Type>, OpcodeHexagon {
bits<9> offset;
bits<5> src2;
@@ -490,7 +490,7 @@ def TFRI64_V4 : InstHexagon<(outs DoubleRegs:$dst),
A2_combineii.Itinerary, TypeALU32_2op>, OpcodeHexagon;
// Hexagon doesn't have a vector multiply with C semantics.
-// Instead, generate a pseudo instruction that gets expaneded into two
+// Instead, generate a pseudo instruction that gets expanded into two
// scalar MPYI instructions.
// This is expanded by ExpandPostRAPseudos.
let isPseudo = 1 in
@@ -527,13 +527,15 @@ multiclass NewCircularLoad<RegisterClass RC, MemAccessSize MS> {
let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS],
addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in {
+ // Use timing class of L2_loadrb_pci.
def NAME#_pci : LDInst<(outs RC:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_e93a3d71>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_5ceb2f9e>;
+ // Use timing class of L2_loadrb_pcr.
def NAME#_pcr : LDInst<(outs RC:$Rd32, IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_44d3da28>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_075c8dd8>;
}
}
@@ -548,13 +550,15 @@ multiclass NewCircularStore<RegisterClass RC, MemAccessSize MS> {
let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS],
addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in {
+ // Use timing class of S2_storerb_pci.
def NAME#_pci : STInst<(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_e86aa961>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_b4dc7630>;
+ // Use timing class of S2_storerb_pcr.
def NAME#_pcr : STInst<(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs),
- ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_da97ee82>;
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_a2b365d2>;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index d55aeaf10852..52f247977094 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -172,6 +172,13 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
Reserved.set(Hexagon::C8);
Reserved.set(Hexagon::USR_OVF);
+ // Leveraging these registers will require more work to recognize
+ // the new semantics posed, Hi/LoVec patterns, etc.
+ // Note well: if enabled, they should be restricted to only
+ // where `HST.useHVXOps() && HST.hasV67Ops()` is true.
+ for (auto Reg : Hexagon_MC::GetVectRegRev())
+ Reserved.set(Reg);
+
if (MF.getSubtarget<HexagonSubtarget>().hasReservedR19())
Reserved.set(Hexagon::R19);
@@ -196,7 +203,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
auto &HII = *HST.getInstrInfo();
auto &HFI = *HST.getFrameLowering();
- unsigned BP = 0;
+ Register BP;
int FI = MI.getOperand(FIOp).getIndex();
// Select the base pointer (BP) and calculate the actual offset from BP
// to the beginning of the object at index FI.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
index fc166b5a3410..52d15da3bcb5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -56,10 +56,6 @@ public:
/// Returns true if the frame pointer is valid.
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
- return true;
- }
-
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg,
const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index c23b837bb62f..49428db223a1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -18,6 +18,12 @@ let Namespace = "Hexagon" in {
let HWEncoding{4-0} = num;
}
+ // These registers are used to preserve a distinction between
+ // vector register pairs of differing order.
+ class HexagonFakeReg<string n> : Register<n> {
+ let isArtificial = 1;
+ }
+
class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs,
list<string> alt = []> :
RegisterWithSubRegs<n, subregs> {
@@ -30,6 +36,13 @@ let Namespace = "Hexagon" in {
class Ri<bits<5> num, string n, list<string> alt = []> :
HexagonReg<num, n, alt>;
+ // Rp - false/pseudo registers. These registers are used
+ // to provide a distinct set of aliases for both styles of vector
+ // register pairs without encountering subregister indexing constraints.
+ class R_fake<string n> :
+ HexagonFakeReg<n>;
+
+
// Rf - 32-bit floating-point registers.
class Rf<bits<5> num, string n> : HexagonReg<num, n>;
@@ -81,6 +94,7 @@ let Namespace = "Hexagon" in {
def isub_hi : SubRegIndex<32, 32>;
def vsub_lo : SubRegIndex<512>;
def vsub_hi : SubRegIndex<512, 512>;
+ def vsub_fake: SubRegIndex<512>;
def wsub_lo : SubRegIndex<1024>;
def wsub_hi : SubRegIndex<1024, 1024>;
def subreg_overflow : SubRegIndex<1, 0>;
@@ -183,27 +197,49 @@ let Namespace = "Hexagon" in {
foreach i = 0-31 in {
def V#i : Ri<i, "v"#i>, DwarfRegNum<[!add(i, 99)]>;
+ def VF#i : R_fake<"__"#!add(i,999999)>, DwarfRegNum<[!add(i, 999999)]>;
+ def VFR#i : R_fake<"__"#!add(i,9999999)>, DwarfRegNum<[!add(i, 9999999)]>;
}
def VTMP : Ri<0, "vtmp">, DwarfRegNum<[131]>;
// Aliases of the V* registers used to hold double vec values.
- let SubRegIndices = [vsub_lo, vsub_hi], CoveredBySubRegs = 1 in {
- def W0 : Rd< 0, "v1:0", [V0, V1]>, DwarfRegNum<[99]>;
- def W1 : Rd< 2, "v3:2", [V2, V3]>, DwarfRegNum<[101]>;
- def W2 : Rd< 4, "v5:4", [V4, V5]>, DwarfRegNum<[103]>;
- def W3 : Rd< 6, "v7:6", [V6, V7]>, DwarfRegNum<[105]>;
- def W4 : Rd< 8, "v9:8", [V8, V9]>, DwarfRegNum<[107]>;
- def W5 : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>;
- def W6 : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>;
- def W7 : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>;
- def W8 : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>;
- def W9 : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>;
- def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>;
- def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>;
- def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>;
- def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>;
- def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>;
- def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>;
+ let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in {
+ def W0 : Rd< 0, "v1:0", [V0, V1, VF0]>, DwarfRegNum<[99]>;
+ def W1 : Rd< 2, "v3:2", [V2, V3, VF1]>, DwarfRegNum<[101]>;
+ def W2 : Rd< 4, "v5:4", [V4, V5, VF2]>, DwarfRegNum<[103]>;
+ def W3 : Rd< 6, "v7:6", [V6, V7, VF3]>, DwarfRegNum<[105]>;
+ def W4 : Rd< 8, "v9:8", [V8, V9, VF4]>, DwarfRegNum<[107]>;
+ def W5 : Rd<10, "v11:10", [V10, V11, VF5]>, DwarfRegNum<[109]>;
+ def W6 : Rd<12, "v13:12", [V12, V13, VF6]>, DwarfRegNum<[111]>;
+ def W7 : Rd<14, "v15:14", [V14, V15, VF7]>, DwarfRegNum<[113]>;
+ def W8 : Rd<16, "v17:16", [V16, V17, VF8]>, DwarfRegNum<[115]>;
+ def W9 : Rd<18, "v19:18", [V18, V19, VF9]>, DwarfRegNum<[117]>;
+ def W10 : Rd<20, "v21:20", [V20, V21, VF10]>, DwarfRegNum<[119]>;
+ def W11 : Rd<22, "v23:22", [V22, V23, VF11]>, DwarfRegNum<[121]>;
+ def W12 : Rd<24, "v25:24", [V24, V25, VF12]>, DwarfRegNum<[123]>;
+ def W13 : Rd<26, "v27:26", [V26, V27, VF13]>, DwarfRegNum<[125]>;
+ def W14 : Rd<28, "v29:28", [V28, V29, VF14]>, DwarfRegNum<[127]>;
+ def W15 : Rd<30, "v31:30", [V30, V31, VF15]>, DwarfRegNum<[129]>;
+ }
+
+ // Reverse Aliases of the V* registers used to hold double vec values.
+ let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in {
+ def WR0 : Rd< 1, "v0:1", [V0, V1, VFR0]>, DwarfRegNum<[161]>;
+ def WR1 : Rd< 3, "v2:3", [V2, V3, VFR1]>, DwarfRegNum<[162]>;
+ def WR2 : Rd< 5, "v4:5", [V4, V5, VFR2]>, DwarfRegNum<[163]>;
+ def WR3 : Rd< 7, "v6:7", [V6, V7, VFR3]>, DwarfRegNum<[164]>;
+ def WR4 : Rd< 9, "v8:9", [V8, V9, VFR4]>, DwarfRegNum<[165]>;
+ def WR5 : Rd<11, "v10:11", [V10, V11, VFR5]>, DwarfRegNum<[166]>;
+ def WR6 : Rd<13, "v12:13", [V12, V13, VFR6]>, DwarfRegNum<[167]>;
+ def WR7 : Rd<15, "v14:15", [V14, V15, VFR7]>, DwarfRegNum<[168]>;
+ def WR8 : Rd<17, "v16:17", [V16, V17, VFR8]>, DwarfRegNum<[169]>;
+ def WR9 : Rd<19, "v18:19", [V18, V19, VFR9]>, DwarfRegNum<[170]>;
+ def WR10: Rd<21, "v20:21", [V20, V21, VFR10]>, DwarfRegNum<[171]>;
+ def WR11: Rd<23, "v22:23", [V22, V23, VFR11]>, DwarfRegNum<[172]>;
+ def WR12: Rd<25, "v24:25", [V24, V25, VFR12]>, DwarfRegNum<[173]>;
+ def WR13: Rd<27, "v26:27", [V26, V27, VFR13]>, DwarfRegNum<[174]>;
+ def WR14: Rd<29, "v28:29", [V28, V29, VFR14]>, DwarfRegNum<[175]>;
+ def WR15: Rd<31, "v30:31", [V30, V31, VFR15]>, DwarfRegNum<[176]>;
}
// Aliases of the V* registers used to hold quad vec values.
@@ -283,7 +319,7 @@ let Namespace = "Hexagon" in {
// HVX types
def VecI1: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v512i1, v1024i1, v512i1]>;
+ [v64i1, v128i1, v64i1]>;
def VecI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v64i8, v128i8, v64i8]>;
def VecI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
@@ -314,15 +350,15 @@ def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32], 512,
}
def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024,
- (add (sequence "W%u", 0, 15))> {
+ (add (sequence "W%u", 0, 15), (sequence "WR%u", 0, 15))> {
let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
[RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>;
}
-def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 512,
+def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 128,
(add Q0, Q1, Q2, Q3)> {
let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
- [RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>;
+ [RegInfo<64,512,512>, RegInfo<128,1024,1024>, RegInfo<64,512,512>]>;
}
def HvxVQR : RegisterClass<"Hexagon", [untyped], 2048,
@@ -365,6 +401,10 @@ def CtrRegs : RegisterClass<"Hexagon", [i32], 32,
FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI, UTIMERLO, UTIMERHI,
M0, M1, USR)>;
+let Size = 64 in
+def VectRegRev : RegisterClass<"Hexagon", [i64], 64,
+ (add (sequence "WR%u", 0, 15))>;
+
let isAllocatable = 0 in
def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSchedule.td
index 0834e9000460..5efd02ada54c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -56,37 +56,15 @@ def tc_ENDLOOP : InstrItinClass;
include "HexagonDepIICScalar.td"
include "HexagonDepIICHVX.td"
-//===----------------------------------------------------------------------===//
-// V5 Machine Info +
-//===----------------------------------------------------------------------===//
-
include "HexagonScheduleV5.td"
-
-// V55 Machine Info +
include "HexagonScheduleV55.td"
-//===----------------------------------------------------------------------===//
-// V60 Machine Info -
-//===----------------------------------------------------------------------===//
-
include "HexagonIICScalar.td"
include "HexagonIICHVX.td"
include "HexagonScheduleV60.td"
-//===----------------------------------------------------------------------===//
-// V62 Machine Info +
-//===----------------------------------------------------------------------===//
-
include "HexagonScheduleV62.td"
-
-//===----------------------------------------------------------------------===//
-// V65 Machine Info +
-//===----------------------------------------------------------------------===//
-
include "HexagonScheduleV65.td"
-
-//===----------------------------------------------------------------------===//
-// V66 Machine Info +
-//===----------------------------------------------------------------------===//
-
include "HexagonScheduleV66.td"
+include "HexagonScheduleV67.td"
+include "HexagonScheduleV67T.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonScheduleV67.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonScheduleV67.td
new file mode 100644
index 000000000000..4f9d861a5504
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonScheduleV67.td
@@ -0,0 +1,39 @@
+//=-HexagonScheduleV67.td - HexagonV67 Scheduling Definitions *- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//
+// ScalarItin and HVXItin contain some old itineraries
+// still used by a handful of instructions. Hopefully, we will be able
+// to get rid of them soon.
+
+def HexagonV67ItinList : DepScalarItinV66, ScalarItin,
+ DepHVXItinV66, HVXItin, PseudoItin {
+ list<InstrItinData> ItinList =
+ !listconcat(DepScalarItinV66_list, ScalarItin_list,
+ DepHVXItinV66_list, HVXItin_list, PseudoItin_list);
+}
+
+def HexagonItinerariesV67 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+ CVI_ALL_NOMEM, CVI_ZW],
+ [Hex_FWD, HVX_FWD],
+ HexagonV67ItinList.ItinList>;
+
+def HexagonModelV67 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV67;
+ let LoadLatency = 1;
+ let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V67 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonScheduleV67T.td b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonScheduleV67T.td
new file mode 100644
index 000000000000..f2bcb1e7256c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonScheduleV67T.td
@@ -0,0 +1,61 @@
+//=- HexagonScheduleV67T.td - Hexagon V67 Tiny Core Scheduling Definitions --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+class HexagonV67TPseudoItin {
+ list<InstrItinData> V67TPseudoItin_list = [
+ InstrItinData<PSEUDO, [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData<DUPLEX, [InstrStage<1, [SLOT0]>],
+ [2, 1, 1]>,
+ InstrItinData<tc_ENDLOOP, [InstrStage<1, [SLOT_ENDLOOP]>], [2]>
+ ];
+}
+
+// V67TItin_list and HVXItin contain some old itineraries
+// still used by a handful of instructions. Hopefully, we will be able to
+// get rid of them soon.
+def HexagonV67TItinList : DepScalarItinV67T,
+ DepHVXItinV67, HVXItin, HexagonV67TPseudoItin {
+ list<InstrItinData> V67TItin_list = [
+ InstrItinData<LD_tc_ld_SLOT01, [InstrStage<1, [SLOT0]>],
+ [3, 1, 1],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData<ST_tc_st_SLOT01, [InstrStage<1, [SLOT0]>],
+ [1, 1, 3, 3],
+ [Hex_FWD, Hex_FWD]>
+ ];
+
+ list<InstrItinData> ItinList =
+ !listconcat(DepScalarItinV67T_list,
+ DepHVXItinV67_list, V67TItin_list,
+ HVXItin_list, V67TPseudoItin_list);
+}
+
+def HexagonItinerariesV67T :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+ CVI_ALL_NOMEM, CVI_ZW],
+ [Hex_FWD, HVX_FWD],
+ HexagonV67TItinList.ItinList>;
+
+
+def HexagonModelV67T : SchedMachineModel {
+ let IssueWidth = 3;
+ let Itineraries = HexagonItinerariesV67T;
+ let LoadLatency = 1;
+ let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V67 Tiny Core Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index c5ba7ced4c30..1b724e8fcae9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -18,10 +18,10 @@ using namespace llvm;
SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize)
+ if (AlwaysInline || Alignment < Align(4) || !ConstantSize)
return SDValue();
uint64_t SizeVal = ConstantSize->getZExtValue();
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index af8b8318b059..0d3b1725d1bc 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -23,8 +23,8 @@ public:
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
- bool AlwaysInline,
+ SDValue Size, Align Alignment,
+ bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
index d80e0ed50c93..b45d871e04d6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -504,7 +504,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
// Get the registers on which the loop controlling compare instruction
// depends.
- unsigned CmpR1 = 0, CmpR2 = 0;
+ Register CmpR1, CmpR2;
const MachineInstr *CmpI = MRI->getVRegDef(PR);
while (CmpI->getOpcode() == Hexagon::C2_not)
CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg());
@@ -688,11 +688,12 @@ void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
for (auto &MO : MI->memoperands()) {
const MachinePointerInfo &Ptr = MO->getPointerInfo();
MachineMemOperand::Flags F = MO->getFlags();
- int A = MO->getAlignment();
+ Align A = MO->getAlign();
- auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A);
+ auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4 /*size*/, A);
LowI->addMemOperand(MF, Tmp1);
- auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4));
+ auto *Tmp2 =
+ MF.getMachineMemOperand(Ptr, F, 4 /*size*/, std::min(A, Align(4)));
HighI->addMemOperand(MF, Tmp2);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
index aab37393ed36..2c4007145bd0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -314,7 +314,7 @@ bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
MachineInstr *FirstMI = *Begin;
assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI);
- unsigned Alignment = FirstMMO.getAlignment();
+ unsigned Alignment = FirstMMO.getAlign().value();
unsigned SizeAccum = FirstMMO.getSize();
unsigned FirstOffset = getStoreOffset(FirstMI);
@@ -417,9 +417,8 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
DebugLoc DL = OG.back()->getDebugLoc();
const MachineMemOperand &OldM = getStoreTarget(FirstSt);
MachineMemOperand *NewM =
- MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
- TotalSize, OldM.getAlignment(),
- OldM.getAAInfo());
+ MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
+ TotalSize, OldM.getAlign(), OldM.getAAInfo());
if (Acc < 0x10000) {
// Create mem[hw] = #Acc
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 6c706fea096b..2b7e1bcba9a3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -75,14 +75,14 @@ static cl::opt<bool> EnableCheckBankConflict("hexagon-check-bank-conflict",
cl::Hidden, cl::ZeroOrMore, cl::init(true),
cl::desc("Enable checking for cache bank conflicts"));
-
HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
StringRef FS, const TargetMachine &TM)
: HexagonGenSubtargetInfo(TT, CPU, FS), OptLevel(TM.getOptLevel()),
- CPUString(Hexagon_MC::selectHexagonCPU(CPU)),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
+ TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
RegInfo(getHwMode()), TLInfo(TM, *this),
InstrItins(getInstrItineraryForCPU(CPUString)) {
+ Hexagon_MC::addArchSubtarget(this, FS);
// Beware of the default constructor of InstrItineraryData: it will
// reset all members to 0.
assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
@@ -90,24 +90,16 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
HexagonSubtarget &
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
- static std::map<StringRef, Hexagon::ArchEnum> CpuTable{
- {"generic", Hexagon::ArchEnum::V60},
- {"hexagonv5", Hexagon::ArchEnum::V5},
- {"hexagonv55", Hexagon::ArchEnum::V55},
- {"hexagonv60", Hexagon::ArchEnum::V60},
- {"hexagonv62", Hexagon::ArchEnum::V62},
- {"hexagonv65", Hexagon::ArchEnum::V65},
- {"hexagonv66", Hexagon::ArchEnum::V66},
- };
-
- auto FoundIt = CpuTable.find(CPUString);
- if (FoundIt != CpuTable.end())
- HexagonArchVersion = FoundIt->second;
+ Optional<Hexagon::ArchEnum> ArchVer =
+ Hexagon::GetCpu(Hexagon::CpuTable, CPUString);
+ if (ArchVer)
+ HexagonArchVersion = *ArchVer;
else
llvm_unreachable("Unrecognized Hexagon processor version");
UseHVX128BOps = false;
UseHVX64BOps = false;
+ UseAudioOps = false;
UseLongCalls = false;
UseBSBScheduling = hasV60Ops() && EnableBSBSched;
@@ -117,6 +109,13 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
if (OverrideLongCalls.getPosition())
UseLongCalls = OverrideLongCalls;
+ if (isTinyCore()) {
+ // Tiny core has a single thread, so back-to-back scheduling is enabled by
+ // default.
+ if (!EnableBSBSched.getPosition())
+ UseBSBScheduling = false;
+ }
+
FeatureBitset Features = getFeatureBits();
if (HexagonDisableDuplex)
setFeatureBits(Features.reset(Hexagon::FeatureDuplex));
@@ -316,13 +315,14 @@ bool HexagonSubtarget::useAA() const {
/// Perform target specific adjustments to the latency of a schedule
/// dependency.
-void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
+void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx,
+ SUnit *Dst, int DstOpIdx,
SDep &Dep) const {
- MachineInstr *SrcInst = Src->getInstr();
- MachineInstr *DstInst = Dst->getInstr();
if (!Src->isInstr() || !Dst->isInstr())
return;
+ MachineInstr *SrcInst = Src->getInstr();
+ MachineInstr *DstInst = Dst->getInstr();
const HexagonInstrInfo *QII = getInstrInfo();
// Instructions with .new operands have zero latency.
@@ -424,8 +424,17 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
int DefIdx = -1;
for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) {
const MachineOperand &MO = SrcI->getOperand(OpNum);
- if (MO.isReg() && MO.isDef() && MO.getReg() == DepR)
- DefIdx = OpNum;
+ bool IsSameOrSubReg = false;
+ if (MO.isReg()) {
+ unsigned MOReg = MO.getReg();
+ if (Register::isVirtualRegister(DepR)) {
+ IsSameOrSubReg = (MOReg == DepR);
+ } else {
+ IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(DepR, MOReg);
+ }
+ if (MO.isDef() && IsSameOrSubReg)
+ DefIdx = OpNum;
+ }
}
assert(DefIdx >= 0 && "Def Reg not found in Src MI");
MachineInstr *DstI = Dst->getInstr();
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 31157a0065d9..de4f245519e4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
-#include "HexagonDepArch.h"
+#include "HexagonArch.h"
#include "HexagonFrameLowering.h"
#include "HexagonISelLowering.h"
#include "HexagonInstrInfo.h"
@@ -45,14 +45,18 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
bool UseHVX64BOps = false;
bool UseHVX128BOps = false;
+ bool UseAudioOps = false;
+ bool UseCompound = false;
bool UseLongCalls = false;
bool UseMemops = false;
bool UsePackets = false;
bool UseNewValueJumps = false;
bool UseNewValueStores = false;
bool UseSmallData = false;
+ bool UseUnsafeMath = false;
bool UseZRegOps = false;
+ bool HasPreV65 = false;
bool HasMemNoShuf = false;
bool EnableDuplex = false;
bool ReservedR19 = false;
@@ -83,7 +87,14 @@ public:
};
private:
+ enum HexagonProcFamilyEnum { Others, TinyCore };
+
std::string CPUString;
+ Triple TargetTriple;
+
+ // The following objects can use the TargetTriple, so they must be
+ // declared after it.
+ HexagonProcFamilyEnum HexagonProcFamily = Others;
HexagonInstrInfo InstrInfo;
HexagonRegisterInfo RegInfo;
HexagonTargetLowering TLInfo;
@@ -95,6 +106,11 @@ public:
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
const TargetMachine &TM);
+ const Triple &getTargetTriple() const { return TargetTriple; }
+ bool isEnvironmentMusl() const {
+ return TargetTriple.getEnvironment() == Triple::Musl;
+ }
+
/// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
const InstrItineraryData *getInstrItineraryData() const override {
@@ -157,18 +173,45 @@ public:
bool hasV66OpsOnly() const {
return getHexagonArchVersion() == Hexagon::ArchEnum::V66;
}
+ bool hasV67Ops() const {
+ return getHexagonArchVersion() >= Hexagon::ArchEnum::V67;
+ }
+ bool hasV67OpsOnly() const {
+ return getHexagonArchVersion() == Hexagon::ArchEnum::V67;
+ }
+ bool useAudioOps() const { return UseAudioOps; }
+ bool useCompound() const { return UseCompound; }
bool useLongCalls() const { return UseLongCalls; }
bool useMemops() const { return UseMemops; }
bool usePackets() const { return UsePackets; }
bool useNewValueJumps() const { return UseNewValueJumps; }
bool useNewValueStores() const { return UseNewValueStores; }
bool useSmallData() const { return UseSmallData; }
+ bool useUnsafeMath() const { return UseUnsafeMath; }
bool useZRegOps() const { return UseZRegOps; }
+ bool isTinyCore() const { return HexagonProcFamily == TinyCore; }
+ bool isTinyCoreWithDuplex() const { return isTinyCore() && EnableDuplex; }
+
bool useHVXOps() const {
return HexagonHVXVersion > Hexagon::ArchEnum::NoArch;
}
+ bool useHVXV60Ops() const {
+ return HexagonHVXVersion >= Hexagon::ArchEnum::V60;
+ }
+ bool useHVXV62Ops() const {
+ return HexagonHVXVersion >= Hexagon::ArchEnum::V62;
+ }
+ bool useHVXV65Ops() const {
+ return HexagonHVXVersion >= Hexagon::ArchEnum::V65;
+ }
+ bool useHVXV66Ops() const {
+ return HexagonHVXVersion >= Hexagon::ArchEnum::V66;
+ }
+ bool useHVXV67Ops() const {
+ return HexagonHVXVersion >= Hexagon::ArchEnum::V67;
+ }
bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; }
bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; }
@@ -186,7 +229,11 @@ public:
// compiler time and will be removed eventually anyway.
bool enableMachineSchedDefaultSched() const override { return false; }
+ // For use with PostRAScheduling: get the anti-dependence breaking that should
+ // be performed before post-RA scheduling.
AntiDepBreakMode getAntiDepBreakMode() const override { return ANTIDEP_ALL; }
+ /// True if the subtarget should run a scheduler after register
+ /// allocation.
bool enablePostRAScheduler() const override { return true; }
bool enableSubRegLiveness() const override;
@@ -211,7 +258,8 @@ public:
/// Perform target specific adjustments to the latency of a schedule
/// dependency.
- void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const override;
+ void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
+ SDep &Dep) const override;
unsigned getVectorLength() const {
assert(useHVXOps());
@@ -239,9 +287,6 @@ public:
ArrayRef<MVT> ElemTypes = getHVXElementTypes();
if (IncludeBool && ElemTy == MVT::i1) {
- // Special case for the v512i1, etc.
- if (8*HwLen == NumElems)
- return true;
// Boolean HVX vector types are formed from regular HVX vector types
// by replacing the element type with i1.
for (MVT T : ElemTypes)
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 9e9ce209a825..3fe42ea13f51 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -237,6 +237,14 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
std::string FS = !FSAttr.hasAttribute(Attribute::None)
? FSAttr.getValueAsString().str()
: TargetFS;
+ // Append the preexisting target features last, so that +mattr overrides
+ // the "unsafe-fp-math" function attribute.
+ // Creating a separate target feature is not strictly necessary, it only
+ // exists to make "unsafe-fp-math" force creating a new subtarget.
+
+ if (FnAttrs.hasFnAttribute("unsafe-fp-math") &&
+ F.getFnAttribute("unsafe-fp-math").getValueAsString() == "true")
+ FS = FS.empty() ? "+unsafe-fp" : "+unsafe-fp," + FS;
auto &I = SubtargetMap[CPU + FS];
if (!I) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index fdcc41a4ca41..cfc8ed813c92 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/SectionKind.h"
@@ -112,7 +113,6 @@ static const char *getSectionSuffixForSize(unsigned Size) {
void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
SmallDataSection =
getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
@@ -308,7 +308,8 @@ unsigned HexagonTargetObjectFile::getSmallestAddressableSize(const Type *Ty,
const ArrayType *ATy = cast<const ArrayType>(Ty);
return getSmallestAddressableSize(ATy->getElementType(), GV, TM);
}
- case Type::VectorTyID: {
+ case Type::FixedVectorTyID:
+ case Type::ScalableVectorTyID: {
const VectorType *PTy = cast<const VectorType>(Ty);
return getSmallestAddressableSize(PTy->getElementType(), GV, TM);
}
@@ -323,6 +324,7 @@ unsigned HexagonTargetObjectFile::getSmallestAddressableSize(const Type *Ty,
}
case Type::FunctionTyID:
case Type::VoidTyID:
+ case Type::BFloatTyID:
case Type::X86_FP80TyID:
case Type::FP128TyID:
case Type::PPC_FP128TyID:
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
index b36282578950..550aac72346f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -13,6 +13,7 @@
#include "llvm/MC/MCSectionELF.h"
namespace llvm {
+ class Type;
class HexagonTargetObjectFile : public TargetLoweringObjectFileELF {
public:
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
index c5200b76933e..a5b14a7e0764 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
@@ -15,13 +15,13 @@ namespace llvm {
class HexagonTargetStreamer : public MCTargetStreamer {
public:
HexagonTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
- virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ virtual void emitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit = 0){};
virtual void emitFAlign(unsigned Size, unsigned MaxBytesToEmit){};
- virtual void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+ virtual void emitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment,
unsigned AccessGranularity){};
- virtual void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+ virtual void emitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlign,
unsigned AccessGranularity){};
};
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 4d4627cd2071..80c8736cb74a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -45,7 +45,7 @@ bool HexagonTTIImpl::useHVX() const {
bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
assert(VecTy->isVectorTy());
- if (cast<VectorType>(VecTy)->isScalable())
+ if (isa<ScalableVectorType>(VecTy))
return false;
// Avoid types like <2 x i32*>.
if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
@@ -60,8 +60,8 @@ bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
}
unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
- if (Ty->isVectorTy())
- return Ty->getVectorNumElements();
+ if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
+ return VTy->getNumElements();
assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&
"Expecting scalar type");
return 1;
@@ -78,12 +78,17 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Runtime = UP.Partial = true;
+}
+
+void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
// Only try to peel innermost loops with small runtime trip counts.
if (L && L->empty() && canPeel(L) &&
SE.getSmallConstantTripCount(L) == 0 &&
SE.getSmallConstantMaxTripCount(L) > 0 &&
SE.getSmallConstantMaxTripCount(L) <= 5) {
- UP.PeelCount = 2;
+ PP.PeelCount = 2;
}
}
@@ -115,9 +120,10 @@ unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
return (8 * ST.getVectorLength()) / ElemWidth;
}
-unsigned HexagonTTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
- bool Extract) {
- return BaseT::getScalarizationOverhead(Ty, Insert, Extract);
+unsigned HexagonTTIImpl::getScalarizationOverhead(VectorType *Ty,
+ const APInt &DemandedElts,
+ bool Insert, bool Extract) {
+ return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
}
unsigned HexagonTTIImpl::getOperandsScalarizationOverhead(
@@ -126,24 +132,18 @@ unsigned HexagonTTIImpl::getOperandsScalarizationOverhead(
}
unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
- ArrayRef<Type*> Tys) {
- return BaseT::getCallInstrCost(F, RetTy, Tys);
-}
-
-unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+ ArrayRef<Type*> Tys, TTI::TargetCostKind CostKind) {
+ return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind);
}
-unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type*> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed) {
- if (ID == Intrinsic::bswap) {
- std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
+unsigned
+HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ if (ICA.getID() == Intrinsic::bswap) {
+ std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ICA.getReturnType());
return LT.first + 2;
}
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
- ScalarizationCostPassed);
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
@@ -154,14 +154,20 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment,
unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return 1;
+
if (Opcode == Instruction::Store)
- return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind, I);
if (Src->isVectorTy()) {
VectorType *VecTy = cast<VectorType>(Src);
- unsigned VecWidth = VecTy->getBitWidth();
+ unsigned VecWidth = VecTy->getPrimitiveSizeInBits().getFixedSize();
if (useHVX() && isTypeForHVX(VecTy)) {
unsigned RegWidth = getRegisterBitWidth(true);
assert(RegWidth && "Non-zero vector register width expected");
@@ -183,7 +189,7 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Cost =
VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1;
- // At this point unspecified alignment is considered as Align::None().
+ // At this point unspecified alignment is considered as Align(1).
const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8));
unsigned AlignWidth = 8 * BoundAlignment.value();
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
@@ -195,12 +201,16 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return (3 - LogA) * Cost * NumLoads;
}
- return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind, I);
}
-unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode,
- Type *Src, unsigned Alignment, unsigned AddressSpace) {
- return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) {
+ return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind);
}
unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
@@ -208,57 +218,70 @@ unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
return 1;
}
-unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
- Value *Ptr, bool VariableMask, unsigned Alignment) {
+unsigned HexagonTTIImpl::getGatherScatterOpCost(
+ unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment);
+ Alignment, CostKind, I);
}
-unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
- Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
- bool UseMaskForGaps) {
+unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace,
+ CostKind,
UseMaskForCond, UseMaskForGaps);
return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
- nullptr);
+ CostKind);
}
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy, const Instruction *I) {
- if (ValTy->isVectorTy()) {
+ Type *CondTy, TTI::TargetCostKind CostKind, const Instruction *I) {
+ if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
if (Opcode == Instruction::FCmp)
return LT.first + FloatFactor * getTypeNumElements(ValTy);
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
}
unsigned HexagonTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
+ // TODO: Handle more cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info, Opd1PropInfo,
+ Opd2PropInfo, Args, CxtI);
+
if (Ty->isVectorTy()) {
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
if (LT.second.isFloatingPoint())
return LT.first + FloatFactor * getTypeNumElements(Ty);
}
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
- Type *SrcTy, const Instruction *I) {
+ Type *SrcTy, TTI::TargetCostKind CostKind, const Instruction *I) {
if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy);
std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy);
- return std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
+ unsigned Cost = std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
+ // TODO: Allow non-throughput costs that aren't binary.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost == 0 ? 0 : 1;
+ return Cost;
}
return 1;
}
@@ -292,8 +315,10 @@ unsigned HexagonTTIImpl::getCacheLineSize() const {
return ST.getL1CacheLineSize();
}
-int HexagonTTIImpl::getUserCost(const User *U,
- ArrayRef<const Value *> Operands) {
+int
+HexagonTTIImpl::getUserCost(const User *U,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
if (!CI->isIntegerCast())
return false;
@@ -315,7 +340,7 @@ int HexagonTTIImpl::getUserCost(const User *U,
if (const CastInst *CI = dyn_cast<const CastInst>(U))
if (isCastFoldedIntoLoad(CI))
return TargetTransformInfo::TCC_Free;
- return BaseT::getUserCost(U, Operands);
+ return BaseT::getUserCost(U, Operands, CostKind);
}
bool HexagonTTIImpl::shouldBuildLookupTables() const {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index ace0d797bbdb..5fe397486402 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -64,6 +64,9 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
/// Bias LSR towards creating post-increment opportunities.
bool shouldFavorPostInc() const;
@@ -101,34 +104,41 @@ public:
return true;
}
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
- unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args,
- unsigned VF);
- unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys);
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF);
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type*> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = UINT_MAX);
+ unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
+ bool Insert, bool Extract);
+ unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF);
+ unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys,
+ TTI::TargetCostKind CostKind);
+ unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
const SCEV *S);
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned
+ getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp);
- unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
- bool VariableMask, unsigned Alignment);
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace, bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
+ unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind,
+ const Instruction *I);
+ unsigned getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- const Instruction *I);
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -136,16 +146,18 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getCFInstrCost(unsigned Opcode) {
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
return 1;
}
/// @}
- int getUserCost(const User *U, ArrayRef<const Value *> Operands);
+ int getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind);
// Hexagon specific decision to generate a lookup table.
bool shouldBuildLookupTables() const;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVExtract.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVExtract.cpp
index b7d6dbe21c74..b5f06ebd3189 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVExtract.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVExtract.cpp
@@ -15,7 +15,7 @@
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/PassSupport.h"
+#include "llvm/Pass.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -107,7 +107,7 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
Register AR =
MF.getInfo<HexagonMachineFunctionInfo>()->getStackAlignBaseVReg();
std::map<unsigned, SmallVector<MachineInstr*,4>> VExtractMap;
- unsigned MaxAlign = 0;
+ MaybeAlign MaxAlign;
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
@@ -137,14 +137,14 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
continue;
const auto &VecRC = *MRI.getRegClass(VecR);
- unsigned Align = HRI.getSpillAlignment(VecRC);
- MaxAlign = std::max(MaxAlign, Align);
+ Align Alignment = HRI.getSpillAlign(VecRC);
+ MaxAlign = max(MaxAlign, Alignment);
// Make sure this is not a spill slot: spill slots cannot be aligned
// if there are variable-sized objects on the stack. They must be
// accessible via FP (which is not aligned), because SP is unknown,
// and AP may not be available at the location of the load/store.
- int FI = MFI.CreateStackObject(HRI.getSpillSize(VecRC), Align,
- /*isSpillSlot*/false);
+ int FI = MFI.CreateStackObject(HRI.getSpillSize(VecRC), Alignment,
+ /*isSpillSlot*/ false);
MachineInstr *DefI = MRI.getVRegDef(VecR);
MachineBasicBlock::iterator At = std::next(DefI->getIterator());
@@ -178,13 +178,13 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
}
}
- if (AR) {
+ if (AR && MaxAlign) {
// Update the required stack alignment.
MachineInstr *AlignaI = MRI.getVRegDef(AR);
assert(AlignaI->getOpcode() == Hexagon::PS_aligna);
MachineOperand &Op = AlignaI->getOperand(1);
- if (MaxAlign > Op.getImm())
- Op.setImm(MaxAlign);
+ if (*MaxAlign > Op.getImm())
+ Op.setImm(MaxAlign->value());
}
return Changed;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 36d71c41da54..fa1ba4f2e469 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -242,6 +242,10 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // TinyCore with Duplexes: Translate to big-instructions.
+ if (HST.isTinyCoreWithDuplex())
+ HII->translateInstrsForDup(MF, true);
+
// Loop over all of the basic blocks.
for (auto &MB : MF) {
auto Begin = MB.begin(), End = MB.end();
@@ -267,6 +271,10 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // TinyCore with Duplexes: Translate to tiny-instructions.
+ if (HST.isTinyCoreWithDuplex())
+ HII->translateInstrsForDup(MF, false);
+
Packetizer.unpacketizeSoloInstrs(MF);
return true;
}
@@ -1052,12 +1060,11 @@ bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI,
// we ignore the instruction.
const MCInstrDesc& TID = MI.getDesc();
auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
- unsigned FuncUnits = IS->getUnits();
- return !FuncUnits;
+ return !IS->getUnits();
}
bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
- // Ensure any bundles created by gather packetize remain seperate.
+ // Ensure any bundles created by gather packetize remain separate.
if (MI.isBundle())
return true;
@@ -1802,6 +1809,8 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
setmemShufDisabled(false);
}
+ PacketHasDuplex = false;
+ PacketHasSLOT0OnlyInsn = false;
ResourceTracker->clearResources();
LLVM_DEBUG(dbgs() << "End packet\n");
}
@@ -1809,7 +1818,64 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
if (Minimal)
return false;
- return !producesStall(MI);
+
+ // Constrainst for not packetizing this MI with existing instructions in a
+ // packet.
+ // MI is a store instruction.
+ // CurrentPacketMIs has a SLOT0 only instruction with constraint
+ // A_RESTRICT_NOSLOT1_STORE/isRestrictNoSlot1Store.
+ if (MI.mayStore() && isPureSlot0InsnWithNoSlot1Store(MI))
+ return false;
+
+ if (producesStall(MI))
+ return false;
+
+ // If TinyCore with Duplexes is enabled, check if this MI can form a Duplex
+ // with any other instruction in the existing packet.
+ auto &HST = MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>();
+ // Constraint 1: Only one duplex allowed per packet.
+ // Constraint 2: Consider duplex checks only if there is atleast one
+ // instruction in a packet.
+ // Constraint 3: If one of the existing instructions in the packet has a
+ // SLOT0 only instruction that can not be duplexed, do not attempt to form
+ // duplexes. (TODO: This will invalidate the L4_return* instructions to form a
+ // duplex)
+ if (HST.isTinyCoreWithDuplex() && CurrentPacketMIs.size() > 0 &&
+ !PacketHasDuplex) {
+ // Check for SLOT0 only non-duplexable instruction in packet.
+ for (auto &MJ : CurrentPacketMIs)
+ PacketHasSLOT0OnlyInsn |= HII->isPureSlot0(*MJ);
+ // Get the Big Core Opcode (dup_*).
+ int Opcode = HII->getDuplexOpcode(MI, false);
+ if (Opcode >= 0) {
+ // We now have an instruction that can be duplexed.
+ for (auto &MJ : CurrentPacketMIs) {
+ if (HII->isDuplexPair(MI, *MJ) && !PacketHasSLOT0OnlyInsn) {
+ PacketHasDuplex = true;
+ return true;
+ }
+ }
+ // If it can not be duplexed, check if there is a valid transition in DFA
+ // with the original opcode.
+ MachineInstr &MIRef = const_cast<MachineInstr &>(MI);
+ MIRef.setDesc(HII->get(Opcode));
+ return ResourceTracker->canReserveResources(MIRef);
+ }
+ }
+
+ return true;
+}
+
+bool HexagonPacketizerList::isPureSlot0InsnWithNoSlot1Store(
+ const MachineInstr &MI) {
+ bool noSlot1Store = false;
+ bool isSlot0Only = false;
+ for (auto J : CurrentPacketMIs) {
+ noSlot1Store |= HII->isRestrictNoSlot1Store(*J);
+ isSlot0Only |= HII->isPureSlot0(*J);
+ }
+
+ return (noSlot1Store && isSlot0Only);
}
// V60 forward scheduling.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index 943b9ac7ecc4..27a47220570a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -57,6 +57,13 @@ class HexagonPacketizerList : public VLIWPacketizerList {
// instruction from the previous packet.
bool PacketStalls = false;
+ // Set to true if the packet has a duplex pair of sub-instructions.
+ bool PacketHasDuplex = false;
+
+ // Set to true if the packet has a instruction that can only be executed
+ // in SLOT0.
+ bool PacketHasSLOT0OnlyInsn = false;
+
protected:
/// A handle to the branch probability pass.
const MachineBranchProbabilityInfo *MBPI;
@@ -149,6 +156,7 @@ protected:
bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J);
bool hasDualStoreDependence(const MachineInstr &I, const MachineInstr &J);
bool producesStall(const MachineInstr &MI);
+ bool isPureSlot0InsnWithNoSlot1Store(const MachineInstr &MI);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp
index 65a8dcd75bdc..fbc5e5c344ed 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp
@@ -71,9 +71,10 @@ public:
char HexagonVectorPrint::ID = 0;
static bool isVecReg(unsigned Reg) {
- return (Reg >= Hexagon::V0 && Reg <= Hexagon::V31)
- || (Reg >= Hexagon::W0 && Reg <= Hexagon::W15)
- || (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3);
+ return (Reg >= Hexagon::V0 && Reg <= Hexagon::V31) ||
+ (Reg >= Hexagon::W0 && Reg <= Hexagon::W15) ||
+ (Reg >= Hexagon::WR0 && Reg <= Hexagon::WR15) ||
+ (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3);
}
static std::string getStringReg(unsigned R) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 8f1e5c1c3a97..e7069819fa57 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/TargetRegistry.h"
#include <sstream>
@@ -43,6 +44,7 @@ class HexagonAsmBackend : public MCAsmBackend {
std::unique_ptr <MCInstrInfo> MCII;
std::unique_ptr <MCInst *> RelaxTarget;
MCInst * Extender;
+ unsigned MaxPacketSize;
void ReplaceInstruction(MCCodeEmitter &E, MCRelaxableFragment &RF,
MCInst &HMB) const {
@@ -62,7 +64,8 @@ public:
StringRef CPU)
: MCAsmBackend(support::little), OSABI(OSABI), CPU(CPU), relaxedCnt(0),
MCII(T.createMCInstrInfo()), RelaxTarget(new MCInst *),
- Extender(nullptr) {}
+ Extender(nullptr), MaxPacketSize(HexagonMCInstrInfo::packetSize(CPU))
+ {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
@@ -648,11 +651,12 @@ public:
llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
}
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override {
assert(HexagonMCInstrInfo::isBundle(Inst) &&
"Hexagon relaxInstruction only works on bundles");
+ MCInst Res;
Res.setOpcode(Hexagon::BUNDLE);
Res.addOperand(MCOperand::createImm(Inst.getOperand(0).getImm()));
// Copy the results into the bundle.
@@ -676,6 +680,8 @@ public:
// now copy over the original instruction(the one we may have extended)
Res.addOperand(MCOperand::createInst(I.getInst()));
}
+
+ Inst = std::move(Res);
(void)Update;
assert(Update && "Didn't find relaxation target");
}
@@ -685,7 +691,7 @@ public:
ParseIn = 0x00004000, // In packet parse-bits.
ParseEnd = 0x0000c000; // End of packet parse-bits.
- while(Count % HEXAGON_INSTR_SIZE) {
+ while (Count % HEXAGON_INSTR_SIZE) {
LLVM_DEBUG(dbgs() << "Alignment not a multiple of the instruction size:"
<< Count % HEXAGON_INSTR_SIZE << "/"
<< HEXAGON_INSTR_SIZE << "\n");
@@ -693,11 +699,11 @@ public:
OS << '\0';
}
- while(Count) {
+ while (Count) {
Count -= HEXAGON_INSTR_SIZE;
// Close the packet whenever a multiple of the maximum packet size remains
- uint32_t ParseBits = (Count % (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE))?
- ParseIn: ParseEnd;
+ uint32_t ParseBits = (Count % (MaxPacketSize * HEXAGON_INSTR_SIZE)) ?
+ ParseIn : ParseEnd;
support::endian::write<uint32_t>(OS, Nopcode | ParseBits, Endian);
}
return true;
@@ -728,7 +734,8 @@ public:
MCContext &Context = Asm.getContext();
auto &RF = cast<MCRelaxableFragment>(*K);
auto &Inst = const_cast<MCInst &>(RF.getInst());
- while (Size > 0 && HexagonMCInstrInfo::bundleSize(Inst) < 4) {
+ while (Size > 0 &&
+ HexagonMCInstrInfo::bundleSize(Inst) < MaxPacketSize) {
MCInst *Nop = new (Context) MCInst;
Nop->setOpcode(Hexagon::A2_nop);
Inst.addOperand(MCOperand::createInst(Nop));
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 3c64893bae45..4125566bc58a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -27,11 +27,6 @@ namespace HexagonII {
unsigned const TypeCVI_FIRST = TypeCVI_4SLOT_MPY;
unsigned const TypeCVI_LAST = TypeCVI_ZW;
- enum SubTarget {
- HasV55SubT = 0x3c,
- HasV60SubT = 0x38,
- };
-
enum AddrMode {
NoAddrMode = 0, // No addressing mode
Absolute = 1, // Absolute addressing mode
@@ -165,6 +160,9 @@ namespace HexagonII {
CVINewPos = 62,
CVINewMask = 0x1,
+
+ isCVIPos = 63,
+ isCVIMask = 0x1,
};
// *** The code above must match HexagonInstrFormat*.td *** //
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index cdbeae38b3a1..3dba6b07c460 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -64,7 +64,7 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_HEX_IE_GOT_32;
case MCSymbolRefExpr::VariantKind::VK_Hexagon_LD_GOT:
return ELF::R_HEX_LD_GOT_32;
- case MCSymbolRefExpr::VariantKind::VK_Hexagon_PCREL:
+ case MCSymbolRefExpr::VariantKind::VK_PCREL:
return ELF::R_HEX_32_PCREL;
case MCSymbolRefExpr::VariantKind::VK_TPREL:
return ELF::R_HEX_TPREL_32;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index f3da67562320..e5e5d08937ef 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -34,4 +34,5 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) {
UsesELFSectionDirectiveForBSS = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
UseLogicalShr = false;
+ UseIntegratedAssembler = false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 8b262bd0248e..fee1acdbbe8a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -81,6 +81,9 @@ void HexagonMCChecker::initReg(MCInst const &MCI, unsigned R, unsigned &PredReg,
if (!MCSubRegIterator(*SRI, &RI).isValid())
// Skip super-registers used indirectly.
Uses.insert(*SRI);
+
+ if (HexagonMCInstrInfo::IsReverseVecRegPair(R))
+ ReversePairs.insert(R);
}
void HexagonMCChecker::init(MCInst const &MCI) {
@@ -133,6 +136,9 @@ void HexagonMCChecker::init(MCInst const &MCI) {
if (R == Hexagon::C8)
R = Hexagon::USR;
+ if (HexagonMCInstrInfo::IsReverseVecRegPair(R))
+ ReversePairs.insert(R);
+
// Note register definitions, direct ones as well as indirect side-effects.
// Super-registers are not tracked directly, but their components.
for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
@@ -192,7 +198,7 @@ HexagonMCChecker::HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII,
MCSubtargetInfo const &STI, MCInst &mcb,
MCRegisterInfo const &ri, bool ReportErrors)
: Context(Context), MCB(mcb), RI(ri), MCII(MCII), STI(STI),
- ReportErrors(ReportErrors) {
+ ReportErrors(ReportErrors), ReversePairs() {
init();
}
@@ -200,7 +206,10 @@ HexagonMCChecker::HexagonMCChecker(HexagonMCChecker const &Other,
MCSubtargetInfo const &STI,
bool CopyReportErrors)
: Context(Other.Context), MCB(Other.MCB), RI(Other.RI), MCII(Other.MCII),
- STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false) {}
+ STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false),
+ ReversePairs() {
+ init();
+}
bool HexagonMCChecker::check(bool FullCheck) {
bool chkP = checkPredicates();
@@ -218,8 +227,9 @@ bool HexagonMCChecker::check(bool FullCheck) {
bool chkAXOK = checkAXOK();
bool chkCofMax1 = checkCOFMax1();
bool chkHWLoop = checkHWLoop();
+ bool chkLegalVecRegPair = checkLegalVecRegPair();
bool chk = chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl &&
- chkAXOK && chkCofMax1 && chkHWLoop;
+ chkAXOK && chkCofMax1 && chkHWLoop && chkLegalVecRegPair;
return chk;
}
@@ -381,7 +391,7 @@ bool HexagonMCChecker::checkPredicates() {
for (const auto &I : NewPreds) {
unsigned P = I;
- if (!Defs.count(P) || LatePreds.count(P)) {
+ if (!Defs.count(P) || LatePreds.count(P) || Defs.count(Hexagon::P3_0)) {
// Error out if the new predicate register is not defined,
// or defined "late"
// (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }").
@@ -729,3 +739,16 @@ void HexagonMCChecker::reportWarning(Twine const &Msg) {
if (ReportErrors)
Context.reportWarning(MCB.getLoc(), Msg);
}
+
+bool HexagonMCChecker::checkLegalVecRegPair() {
+ const bool IsPermitted = STI.getFeatureBits()[Hexagon::ArchV67];
+ const bool HasReversePairs = ReversePairs.size() != 0;
+
+ if (!IsPermitted && HasReversePairs) {
+ for (auto R : ReversePairs)
+ reportError("register pair `" + Twine(RI.getName(R)) +
+ "' is not permitted for this architecture");
+ return false;
+ }
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
index bc55ade9ccd7..00afdb664ba5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -72,6 +72,10 @@ class HexagonMCChecker {
using ReadOnlyIterator = std::set<unsigned>::iterator;
std::set<unsigned> ReadOnly;
+ // Contains the vector-pair-registers with the even number
+ // first ("v0:1", e.g.) used/def'd in this packet.
+ std::set<unsigned> ReversePairs;
+
void init();
void init(MCInst const &);
void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue);
@@ -94,6 +98,7 @@ class HexagonMCChecker {
bool checkAXOK();
bool checkHWLoop();
bool checkCOFMax1();
+ bool checkLegalVecRegPair();
static void compoundRegisterMap(unsigned &);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 95e23c99868a..24169c83bdb9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -147,7 +147,7 @@ static const std::map<unsigned, std::vector<unsigned>> ExtFixups = {
_, _, _, _,
_, _, _, _,
_ }},
- { MCSymbolRefExpr::VK_Hexagon_PCREL,
+ { MCSymbolRefExpr::VK_PCREL,
{ _, _, _, _,
_, _, P(_6_PCREL_X), _,
_, P(_9_X), _, _,
@@ -311,7 +311,7 @@ static const std::map<unsigned, std::vector<unsigned>> StdFixups = {
_, _, _, _,
_, _, _, _,
_ }},
- { MCSymbolRefExpr::VK_Hexagon_PCREL,
+ { MCSymbolRefExpr::VK_PCREL,
{ _, _, _, _,
_, _, _, _,
_, _, _, _,
@@ -391,15 +391,9 @@ void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
static bool RegisterMatches(unsigned Consumer, unsigned Producer,
unsigned Producer2) {
- if (Consumer == Producer)
- return true;
- if (Consumer == Producer2)
- return true;
- // Calculate if we're a single vector consumer referencing a double producer
- if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
- if (Consumer >= Hexagon::V0 && Consumer <= Hexagon::V31)
- return ((Consumer - Hexagon::V0) >> 1) == (Producer - Hexagon::W0);
- return false;
+ return (Consumer == Producer) || (Consumer == Producer2) ||
+ HexagonMCInstrInfo::IsSingleConsumerRefPairProducer(Producer,
+ Consumer);
}
/// EncodeSingleInstruction - Emit a single
@@ -497,7 +491,7 @@ Hexagon::Fixups HexagonMCCodeEmitter::getFixupNoBits(
{ MCSymbolRefExpr::VK_Hexagon_LD_GOT, fixup_Hexagon_LD_GOT_32_6_X },
{ MCSymbolRefExpr::VK_Hexagon_IE, fixup_Hexagon_IE_32_6_X },
{ MCSymbolRefExpr::VK_Hexagon_IE_GOT, fixup_Hexagon_IE_GOT_32_6_X },
- { MCSymbolRefExpr::VK_Hexagon_PCREL, fixup_Hexagon_B32_PCREL_X },
+ { MCSymbolRefExpr::VK_PCREL, fixup_Hexagon_B32_PCREL_X },
{ MCSymbolRefExpr::VK_Hexagon_GD_PLT, fixup_Hexagon_GD_PLT_B32_PCREL_X },
{ MCSymbolRefExpr::VK_Hexagon_LD_PLT, fixup_Hexagon_LD_PLT_B32_PCREL_X },
};
@@ -735,7 +729,8 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
unsigned SOffset = 0;
unsigned VOffset = 0;
unsigned UseReg = MO.getReg();
- unsigned DefReg1, DefReg2;
+ unsigned DefReg1 = Hexagon::NoRegister;
+ unsigned DefReg2 = Hexagon::NoRegister;
auto Instrs = HexagonMCInstrInfo::bundleInstructions(*State.Bundle);
const MCOperand *I = Instrs.begin() + State.Index - 1;
@@ -746,7 +741,8 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
if (HexagonMCInstrInfo::isImmext(Inst))
continue;
- DefReg1 = DefReg2 = 0;
+ DefReg1 = Hexagon::NoRegister;
+ DefReg2 = Hexagon::NoRegister;
++SOffset;
if (HexagonMCInstrInfo::isVector(MCII, Inst)) {
// Vector instructions don't count scalars.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index 3cbb8600ce7a..5154a0a1e46c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -10,12 +10,11 @@
//
//===----------------------------------------------------------------------===//
+#include "HexagonMCExpr.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -296,8 +295,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
DstReg = MCI.getOperand(1).getReg();
SrcReg = MCI.getOperand(0).getReg();
// [if ([!]p0[.new])] jumpr r31
- if ((HexagonMCInstrInfo::isPredReg(SrcReg) && (Hexagon::P0 == SrcReg)) &&
- (Hexagon::R31 == DstReg)) {
+ if ((Hexagon::P0 == SrcReg) && (Hexagon::R31 == DstReg)) {
return HexagonII::HSIG_L2;
}
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index a799f7f7c0b9..53e76a8b9ed7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -58,7 +58,7 @@ HexagonMCELFStreamer::HexagonMCELFStreamer(
: MCELFStreamer(Context, std::move(TAB), std::move(OW), std::move(Emitter)),
MCII(createHexagonMCInstrInfo()) {}
-void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
+void HexagonMCELFStreamer::emitInstruction(const MCInst &MCB,
const MCSubtargetInfo &STI) {
assert(MCB.getOpcode() == Hexagon::BUNDLE);
assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE);
@@ -71,7 +71,7 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
EmitSymbol(*MCI);
}
- MCObjectStreamer::EmitInstruction(MCB, STI);
+ MCObjectStreamer::emitInstruction(MCB, STI);
}
void HexagonMCELFStreamer::EmitSymbol(const MCInst &Inst) {
@@ -110,9 +110,9 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
SwitchSection(&Section);
if (ELFSymbol->isUndefined()) {
- EmitValueToAlignment(ByteAlignment, 0, 1, 0);
- EmitLabel(Symbol);
- EmitZeros(Size);
+ emitValueToAlignment(ByteAlignment, 0, 1, 0);
+ emitLabel(Symbol);
+ emitZeros(Size);
}
// Update the maximum alignment of the section if necessary.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
index 6248bd25d433..edf4ce29f908 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -30,7 +30,7 @@ public:
std::unique_ptr<MCCodeEmitter> Emitter,
MCAssembler *Assembler);
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
void EmitSymbol(const MCInst &Inst);
void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
index 59b1326adf0c..e88f46a04dae 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
@@ -12,7 +12,6 @@
#include "llvm/MC/MCExpr.h"
namespace llvm {
-class MCInst;
class HexagonMCExpr : public MCTargetExpr {
public:
static HexagonMCExpr *create(MCExpr const *Expr, MCContext &Ctx);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 0750bfe74f76..f9f342a07f6d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/HexagonMCShuffler.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -119,10 +120,10 @@ size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
return (1);
}
-bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
- MCSubtargetInfo const &STI,
- MCContext &Context, MCInst &MCB,
- HexagonMCChecker *Check) {
+namespace {
+bool canonicalizePacketImpl(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCB,
+ HexagonMCChecker *Check) {
// Check the bundle for errors.
bool CheckOk = Check ? Check->check(false) : true;
if (!CheckOk)
@@ -132,9 +133,9 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
if (!HexagonDisableCompound)
HexagonMCInstrInfo::tryCompound(MCII, STI, Context, MCB);
HexagonMCShuffle(Context, false, MCII, STI, MCB);
+
// Examine the packet and convert pairs of instructions to duplex
// instructions when possible.
- MCInst InstBundlePreDuplex = MCInst(MCB);
if (STI.getFeatureBits() [Hexagon::FeatureDuplex]) {
SmallVector<DuplexCandidate, 8> possibleDuplexes;
possibleDuplexes =
@@ -146,8 +147,11 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
HexagonMCInstrInfo::padEndloop(MCB, Context);
// If compounding and duplexing didn't reduce the size below
// 4 or less we have a packet that is too big.
- if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE)
+ if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) {
+ if (Check)
+ Check->reportError("invalid instruction packet: out of slots");
return false;
+ }
// Check the bundle for errors.
CheckOk = Check ? Check->check(true) : true;
if (!CheckOk)
@@ -155,6 +159,27 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
HexagonMCShuffle(Context, true, MCII, STI, MCB);
return true;
}
+} // namespace
+
+bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCB,
+ HexagonMCChecker *Check,
+ bool AttemptCompatibility) {
+ auto ArchSTI = Hexagon_MC::getArchSubtarget(&STI);
+ if (!AttemptCompatibility || ArchSTI == nullptr)
+ return canonicalizePacketImpl(MCII, STI, Context, MCB, Check);
+
+ const MCRegisterInfo *RI = Context.getRegisterInfo();
+ HexagonMCChecker DefaultCheck(Context, MCII, STI, MCB, *RI, false);
+ HexagonMCChecker *BaseCheck = (Check == nullptr) ? &DefaultCheck : Check;
+ HexagonMCChecker PerfCheck(*BaseCheck, STI, false);
+ if (canonicalizePacketImpl(MCII, STI, Context, MCB, &PerfCheck))
+ return true;
+
+ HexagonMCChecker ArchCheck(*BaseCheck, *ArchSTI, true);
+ return canonicalizePacketImpl(MCII, *ArchSTI, Context, MCB, &ArchCheck);
+}
MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII,
MCInst const &Inst,
@@ -394,6 +419,26 @@ unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
}
+/// Return the resources used by this instruction
+unsigned HexagonMCInstrInfo::getCVIResources(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCInst const &MCI) {
+
+ const InstrItinerary *II = STI.getSchedModel().InstrItineraries;
+ int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
+ int Size = II[SchedClass].LastStage - II[SchedClass].FirstStage;
+
+ // HVX resources used are currenty located at the second to last stage.
+ // This could also be done with a linear search of the stages looking for:
+ // CVI_ALL, CVI_MPY01, CVI_XLSHF, CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE,
+ // CVI_ZW
+ unsigned Stage = II[SchedClass].LastStage - 1;
+
+ if (Size < 2)
+ return 0;
+ return ((Stage + HexagonStages)->getUnits());
+}
+
/// Return the slots this instruction can execute out of
unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII,
MCSubtargetInfo const &STI,
@@ -473,7 +518,7 @@ bool HexagonMCInstrInfo::hasNewValue2(MCInstrInfo const &MCII,
MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) {
assert(isBundle(MCB));
- assert(Index < HEXAGON_PACKET_SIZE);
+ assert(Index < HEXAGON_PRESHUFFLE_PACKET_SIZE);
return *MCB.getOperand(bundleInstructionsOffset + Index).getInst();
}
@@ -613,6 +658,12 @@ bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII,
return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
}
+bool HexagonMCInstrInfo::isNewValueStore(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
+}
+
/// Return whether the operand is extendable.
bool HexagonMCInstrInfo::isOpExtendable(MCInstrInfo const &MCII,
MCInst const &MCI, unsigned short O) {
@@ -625,6 +676,45 @@ bool HexagonMCInstrInfo::isOuterLoop(MCInst const &MCI) {
return (Flags & outerLoopMask) != 0;
}
+bool HexagonMCInstrInfo::IsVecRegPair(unsigned VecReg) {
+ return (VecReg >= Hexagon::W0 && VecReg <= Hexagon::W15) ||
+ (VecReg >= Hexagon::WR0 && VecReg <= Hexagon::WR15);
+}
+
+bool HexagonMCInstrInfo::IsReverseVecRegPair(unsigned VecReg) {
+ return (VecReg >= Hexagon::WR0 && VecReg <= Hexagon::WR15);
+}
+
+bool HexagonMCInstrInfo::IsVecRegSingle(unsigned VecReg) {
+ return (VecReg >= Hexagon::V0 && VecReg <= Hexagon::V31);
+}
+
+std::pair<unsigned, unsigned>
+HexagonMCInstrInfo::GetVecRegPairIndices(unsigned VecRegPair) {
+ assert(IsVecRegPair(VecRegPair) &&
+ "VecRegPair must be a vector register pair");
+
+ const bool IsRev = IsReverseVecRegPair(VecRegPair);
+ const unsigned PairIndex =
+ 2 * (IsRev ? VecRegPair - Hexagon::WR0 : VecRegPair - Hexagon::W0);
+
+ return IsRev ? std::make_pair(PairIndex, PairIndex + 1)
+ : std::make_pair(PairIndex + 1, PairIndex);
+}
+
+bool HexagonMCInstrInfo::IsSingleConsumerRefPairProducer(unsigned Producer,
+ unsigned Consumer) {
+ if (IsVecRegPair(Producer) && IsVecRegSingle(Consumer)) {
+ const unsigned ProdPairIndex = IsReverseVecRegPair(Producer)
+ ? Producer - Hexagon::WR0
+ : Producer - Hexagon::W0;
+ const unsigned ConsumerSingleIndex = (Consumer - Hexagon::V0) >> 1;
+
+ return ConsumerSingleIndex == ProdPairIndex;
+ }
+ return false;
+}
+
bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -655,8 +745,17 @@ bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII,
!((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask));
}
-bool HexagonMCInstrInfo::isPredReg(unsigned Reg) {
- return (Reg >= Hexagon::P0 && Reg <= Hexagon::P3_0);
+bool HexagonMCInstrInfo::isPredReg(MCRegisterInfo const &MRI, unsigned Reg) {
+ auto &PredRegClass = MRI.getRegClass(Hexagon::PredRegsRegClassID);
+ return PredRegClass.contains(Reg);
+}
+
+bool HexagonMCInstrInfo::isPredRegister(MCInstrInfo const &MCII,
+ MCInst const &Inst, unsigned I) {
+ MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst);
+
+ return Inst.getOperand(I).isReg() &&
+ Desc.OpInfo[I].RegClass == Hexagon::PredRegsRegClassID;
}
/// Return whether the insn can be packaged only with A and X-type insns.
@@ -753,10 +852,8 @@ bool HexagonMCInstrInfo::isSubInstruction(MCInst const &MCI) {
}
bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) {
- if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) &&
- (getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST))
- return true;
- return false;
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::isCVIPos) & HexagonII::isCVIMask;
}
int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) {
@@ -802,6 +899,18 @@ bool HexagonMCInstrInfo::s27_2_reloc(MCExpr const &Expr) {
return HExpr->s27_2_reloc();
}
+unsigned HexagonMCInstrInfo::packetSizeSlots(MCSubtargetInfo const &STI) {
+ const bool IsTiny = STI.getFeatureBits()[Hexagon::ProcTinyCore];
+
+ return IsTiny ? (HEXAGON_PACKET_SIZE - 1) : HEXAGON_PACKET_SIZE;
+}
+
+unsigned HexagonMCInstrInfo::packetSize(StringRef CPU) {
+ return llvm::StringSwitch<unsigned>(CPU)
+ .Case("hexagonv67t", 3)
+ .Default(4);
+}
+
void HexagonMCInstrInfo::padEndloop(MCInst &MCB, MCContext &Context) {
MCInst Nop;
Nop.setOpcode(Hexagon::A2_nop);
@@ -836,6 +945,33 @@ bool HexagonMCInstrInfo::hasTmpDst(MCInstrInfo const &MCII, MCInst const &MCI) {
return (F >> HexagonII::HasTmpDstPos) & HexagonII::HasTmpDstMask;
}
+bool HexagonMCInstrInfo::requiresSlot(MCSubtargetInfo const &STI,
+ MCInst const &MCI) {
+ const unsigned OpCode = MCI.getOpcode();
+ const bool IsTiny = STI.getFeatureBits() [Hexagon::ProcTinyCore];
+ const bool NoSlotReqd = Hexagon::A4_ext == OpCode ||
+ (IsTiny && Hexagon::A2_nop == OpCode) ||
+ (IsTiny && Hexagon::J4_hintjumpr == OpCode);
+
+ return !NoSlotReqd;
+}
+
+unsigned HexagonMCInstrInfo::slotsConsumed(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCInst const &MCI) {
+ unsigned slotsUsed = 0;
+ for (auto HMI : bundleInstructions(MCI)) {
+ MCInst const &MCI = *HMI.getInst();
+ if (!requiresSlot(STI, MCI))
+ continue;
+ if (isDuplex(MCII, MCI))
+ slotsUsed += 2;
+ else
+ ++slotsUsed;
+ }
+ return slotsUsed;
+}
+
void HexagonMCInstrInfo::replaceDuplex(MCContext &Context, MCInst &MCB,
DuplexCandidate Candidate) {
assert(Candidate.packetIndexI < MCB.size());
@@ -874,9 +1010,8 @@ unsigned HexagonMCInstrInfo::SubregisterBit(unsigned Consumer,
unsigned Producer2) {
// If we're a single vector consumer of a double producer, set subreg bit
// based on if we're accessing the lower or upper register component
- if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
- if (Consumer >= Hexagon::V0 && Consumer <= Hexagon::V31)
- return (Consumer - Hexagon::V0) & 0x1;
+ if (IsVecRegPair(Producer) && IsVecRegSingle(Consumer))
+ return (Consumer - Hexagon::V0) & 0x1;
if (Producer2 != Hexagon::NoRegister)
return Consumer == Producer;
return 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 829f872c453e..7b3c079880f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -28,6 +28,7 @@ class MCContext;
class MCExpr;
class MCInstrDesc;
class MCInstrInfo;
+class MCRegisterInfo;
class MCSubtargetInfo;
class DuplexCandidate {
@@ -91,7 +92,8 @@ size_t bundleSize(MCInst const &MCI);
// Put the packet in to canonical form, compound, duplex, pad, and shuffle
bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCContext &Context, MCInst &MCB,
- HexagonMCChecker *Checker);
+ HexagonMCChecker *Checker,
+ bool AttemptCompatibility = false);
// Create a duplex instruction given the two subinsts
MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
@@ -165,6 +167,11 @@ MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII,
// Return the Hexagon ISA class for the insn.
unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI);
+/// Return the resources used by this instruction
+unsigned getCVIResources(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCInst const &MCI);
+
/// Return the slots used by the insn.
unsigned getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCInst const &MCI);
@@ -252,6 +259,8 @@ bool isMemReorderDisabled(MCInst const &MCI);
// Return whether the insn is a new-value consumer.
bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+/// Return true if the operand is a new-value store insn.
+bool isNewValueStore(MCInstrInfo const &MCII, MCInst const &MCI);
bool isOpExtendable(MCInstrInfo const &MCII, MCInst const &MCI, unsigned short);
// Can these two instructions be duplexed
@@ -270,8 +279,11 @@ bool isPredicatedNew(MCInstrInfo const &MCII, MCInst const &MCI);
// Return whether the predicate sense is true
bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI);
-// Is this a predicate register
-bool isPredReg(unsigned Reg);
+// Return true if this is a scalar predicate register.
+bool isPredReg(MCRegisterInfo const &MRI, unsigned Reg);
+
+// Returns true if the Ith operand is a predicate register.
+bool isPredRegister(MCInstrInfo const &MCII, MCInst const &Inst, unsigned I);
// Return whether the insn is a prefix.
bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -290,6 +302,21 @@ bool isVector(MCInstrInfo const &MCII, MCInst const &MCI);
bool mustExtend(MCExpr const &Expr);
bool mustNotExtend(MCExpr const &Expr);
+// Returns true if this instruction requires a slot to execute.
+bool requiresSlot(MCSubtargetInfo const &STI, MCInst const &MCI);
+
+unsigned packetSize(StringRef CPU);
+
+// Returns the maximum number of slots available in the given
+// subtarget's packets.
+unsigned packetSizeSlots(MCSubtargetInfo const &STI);
+
+// Returns the number of slots consumed by this packet, considering duplexed
+// and compound instructions.
+unsigned slotsConsumed(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst const &MCI);
+
+
// Pad the bundle with nops to satisfy endloop requirements
void padEndloop(MCInst &MCI, MCContext &Context);
class PredicateInfo {
@@ -324,6 +351,16 @@ bool subInstWouldBeExtended(MCInst const &potentialDuplex);
unsigned SubregisterBit(unsigned Consumer, unsigned Producer,
unsigned Producer2);
+bool IsVecRegSingle(unsigned VecReg);
+bool IsVecRegPair(unsigned VecReg);
+bool IsReverseVecRegPair(unsigned VecReg);
+bool IsSingleConsumerRefPairProducer(unsigned Producer, unsigned Consumer);
+
+/// Returns an ordered pair of the constituent register ordinals for
+/// each of the elements of \a VecRegPair. For example, Hexagon::W0 ("v0:1")
+/// returns { 0, 1 } and Hexagon::W1 ("v3:2") returns { 3, 2 }.
+std::pair<unsigned, unsigned> GetVecRegPairIndices(unsigned VecRegPair);
+
// Attempt to find and replace compound pairs
void tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCContext &Context, MCInst &MCI);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index f8dc0547baad..7514d0e67744 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -10,13 +10,13 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
-#include "HexagonDepArch.h"
+#include "HexagonArch.h"
#include "HexagonTargetStreamer.h"
#include "MCTargetDesc/HexagonInstPrinter.h"
#include "MCTargetDesc/HexagonMCAsmInfo.h"
#include "MCTargetDesc/HexagonMCELFStreamer.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "TargetInfo/HexagonTargetInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -37,8 +37,10 @@
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
+#include <mutex>
#include <new>
#include <string>
+#include <unordered_map>
using namespace llvm;
@@ -72,6 +74,10 @@ cl::opt<bool> MV65("mv65", cl::Hidden, cl::desc("Build for Hexagon V65"),
cl::init(false));
cl::opt<bool> MV66("mv66", cl::Hidden, cl::desc("Build for Hexagon V66"),
cl::init(false));
+cl::opt<bool> MV67("mv67", cl::Hidden, cl::desc("Build for Hexagon V67"),
+ cl::init(false));
+cl::opt<bool> MV67T("mv67t", cl::Hidden, cl::desc("Build for Hexagon V67T"),
+ cl::init(false));
cl::opt<Hexagon::ArchEnum>
EnableHVX("mhvx",
@@ -81,6 +87,7 @@ cl::opt<Hexagon::ArchEnum>
clEnumValN(Hexagon::ArchEnum::V62, "v62", "Build for HVX v62"),
clEnumValN(Hexagon::ArchEnum::V65, "v65", "Build for HVX v65"),
clEnumValN(Hexagon::ArchEnum::V66, "v66", "Build for HVX v66"),
+ clEnumValN(Hexagon::ArchEnum::V67, "v67", "Build for HVX v67"),
// Sentinel for no value specified.
clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
// Sentinel for flag not present.
@@ -107,14 +114,22 @@ static StringRef HexagonGetArchVariant() {
return "hexagonv65";
if (MV66)
return "hexagonv66";
+ if (MV67)
+ return "hexagonv67";
+ if (MV67T)
+ return "hexagonv67t";
return "";
}
StringRef Hexagon_MC::selectHexagonCPU(StringRef CPU) {
StringRef ArchV = HexagonGetArchVariant();
if (!ArchV.empty() && !CPU.empty()) {
- if (ArchV != CPU)
- report_fatal_error("conflicting architectures specified.");
+ // Tiny cores have a "t" suffix that is discarded when creating a secondary
+ // non-tiny subtarget. See: addArchSubtarget
+ std::pair<StringRef,StringRef> ArchP = ArchV.split('t');
+ std::pair<StringRef,StringRef> CPUP = CPU.split('t');
+ if (!ArchP.first.equals(CPUP.first))
+ report_fatal_error("conflicting architectures specified.");
return CPU;
}
if (ArchV.empty()) {
@@ -127,6 +142,56 @@ StringRef Hexagon_MC::selectHexagonCPU(StringRef CPU) {
unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV5FU::SLOT3; }
+unsigned llvm::HexagonConvertUnits(unsigned ItinUnits, unsigned *Lanes) {
+ enum {
+ CVI_NONE = 0,
+ CVI_XLANE = 1 << 0,
+ CVI_SHIFT = 1 << 1,
+ CVI_MPY0 = 1 << 2,
+ CVI_MPY1 = 1 << 3,
+ CVI_ZW = 1 << 4
+ };
+
+ if (ItinUnits == HexagonItinerariesV62FU::CVI_ALL ||
+ ItinUnits == HexagonItinerariesV62FU::CVI_ALL_NOMEM)
+ return (*Lanes = 4, CVI_XLANE);
+ else if (ItinUnits & HexagonItinerariesV62FU::CVI_MPY01 &&
+ ItinUnits & HexagonItinerariesV62FU::CVI_XLSHF)
+ return (*Lanes = 2, CVI_XLANE | CVI_MPY0);
+ else if (ItinUnits & HexagonItinerariesV62FU::CVI_MPY01)
+ return (*Lanes = 2, CVI_MPY0);
+ else if (ItinUnits & HexagonItinerariesV62FU::CVI_XLSHF)
+ return (*Lanes = 2, CVI_XLANE);
+ else if (ItinUnits & HexagonItinerariesV62FU::CVI_XLANE &&
+ ItinUnits & HexagonItinerariesV62FU::CVI_SHIFT &&
+ ItinUnits & HexagonItinerariesV62FU::CVI_MPY0 &&
+ ItinUnits & HexagonItinerariesV62FU::CVI_MPY1)
+ return (*Lanes = 1, CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1);
+ else if (ItinUnits & HexagonItinerariesV62FU::CVI_XLANE &&
+ ItinUnits & HexagonItinerariesV62FU::CVI_SHIFT)
+ return (*Lanes = 1, CVI_XLANE | CVI_SHIFT);
+ else if (ItinUnits & HexagonItinerariesV62FU::CVI_MPY0 &&
+ ItinUnits & HexagonItinerariesV62FU::CVI_MPY1)
+ return (*Lanes = 1, CVI_MPY0 | CVI_MPY1);
+ else if (ItinUnits == HexagonItinerariesV62FU::CVI_ZW)
+ return (*Lanes = 1, CVI_ZW);
+ else if (ItinUnits == HexagonItinerariesV62FU::CVI_XLANE)
+ return (*Lanes = 1, CVI_XLANE);
+ else if (ItinUnits == HexagonItinerariesV62FU::CVI_SHIFT)
+ return (*Lanes = 1, CVI_SHIFT);
+
+ return (*Lanes = 0, CVI_NONE);
+}
+
+
+namespace llvm {
+namespace HexagonFUnits {
+bool isSlot0Only(unsigned units) {
+ return HexagonItinerariesV62FU::SLOT0 == units;
+}
+} // namespace HexagonFUnits
+} // namespace llvm
+
namespace {
class HexagonTargetAsmStreamer : public HexagonTargetStreamer {
@@ -186,7 +251,7 @@ public:
}
- void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+ void emitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment,
unsigned AccessSize) override {
HexagonMCELFStreamer &HexagonELFStreamer =
@@ -195,7 +260,7 @@ public:
AccessSize);
}
- void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
+ void emitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment,
unsigned AccessSize) override {
HexagonMCELFStreamer &HexagonELFStreamer =
@@ -225,9 +290,8 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI = new HexagonMCAsmInfo(TT);
// VirtualFP = (R30 + #0).
- MCCFIInstruction Inst =
- MCCFIInstruction::createDefCfa(nullptr,
- MRI.getDwarfRegNum(Hexagon::R30, true), 0);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(
+ nullptr, MRI.getDwarfRegNum(Hexagon::R30, true), 0);
MAI->addInitialFrameState(Inst);
return MAI;
@@ -296,40 +360,51 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
case Hexagon::ArchEnum::V66:
Result.push_back("+hvxv66");
break;
+ case Hexagon::ArchEnum::V67:
+ Result.push_back("+hvxv67");
+ break;
case Hexagon::ArchEnum::Generic:{
Result.push_back(StringSwitch<StringRef>(CPU)
.Case("hexagonv60", "+hvxv60")
.Case("hexagonv62", "+hvxv62")
.Case("hexagonv65", "+hvxv65")
- .Case("hexagonv66", "+hvxv66"));
+ .Case("hexagonv66", "+hvxv66")
+ .Case("hexagonv67", "+hvxv67")
+ .Case("hexagonv67t", "+hvxv67"));
break;
}
case Hexagon::ArchEnum::NoArch:
- // Sentinal if -mhvx isn't specified
+ // Sentinel if -mhvx isn't specified
break;
}
return join(Result.begin(), Result.end(), ",");
}
}
-static bool isCPUValid(std::string CPU)
-{
- std::vector<std::string> table {
- "generic", "hexagonv5", "hexagonv55", "hexagonv60",
- "hexagonv62", "hexagonv65", "hexagonv66",
- };
-
- return std::find(table.begin(), table.end(), CPU) != table.end();
+static bool isCPUValid(const std::string &CPU) {
+ return Hexagon::CpuTable.find(CPU) != Hexagon::CpuTable.cend();
}
namespace {
std::pair<std::string, std::string> selectCPUAndFS(StringRef CPU,
StringRef FS) {
std::pair<std::string, std::string> Result;
- Result.first = Hexagon_MC::selectHexagonCPU(CPU);
+ Result.first = std::string(Hexagon_MC::selectHexagonCPU(CPU));
Result.second = selectHexagonFS(Result.first, FS);
return Result;
}
+std::mutex ArchSubtargetMutex;
+std::unordered_map<std::string, std::unique_ptr<MCSubtargetInfo const>>
+ ArchSubtarget;
+} // namespace
+
+MCSubtargetInfo const *
+Hexagon_MC::getArchSubtarget(MCSubtargetInfo const *STI) {
+ std::lock_guard<std::mutex> Lock(ArchSubtargetMutex);
+ auto Existing = ArchSubtarget.find(std::string(STI->getCPU()));
+ if (Existing == ArchSubtarget.end())
+ return nullptr;
+ return Existing->second.get();
}
FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
@@ -338,7 +413,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// turns on hvxvNN, corresponding to the existing ArchVNN.
FeatureBitset FB = S;
unsigned CpuArch = ArchV5;
- for (unsigned F : {ArchV66, ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
+ for (unsigned F : {ArchV67, ArchV66, ArchV65, ArchV62, ArchV60, ArchV55,
+ ArchV5}) {
if (!FB.test(F))
continue;
CpuArch = F;
@@ -353,7 +429,7 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
}
bool HasHvxVer = false;
for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65,
- ExtensionHVXV66}) {
+ ExtensionHVXV66, ExtensionHVXV67}) {
if (!FB.test(F))
continue;
HasHvxVer = true;
@@ -366,6 +442,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
// HasHvxVer is false, and UseHvx is true.
switch (CpuArch) {
+ case ArchV67:
+ FB.set(ExtensionHVXV67);
+ LLVM_FALLTHROUGH;
case ArchV66:
FB.set(ExtensionHVXV66);
LLVM_FALLTHROUGH;
@@ -389,22 +468,52 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,
StringRef CPUName = Features.first;
StringRef ArchFS = Features.second;
+ MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(TT, CPUName, ArchFS);
+ if (X != nullptr && (CPUName == "hexagonv67t"))
+ addArchSubtarget(X, ArchFS);
+
+ if (CPU.equals("help"))
+ exit(0);
+
if (!isCPUValid(CPUName.str())) {
errs() << "error: invalid CPU \"" << CPUName.str().c_str()
<< "\" specified\n";
return nullptr;
}
- MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(TT, CPUName, ArchFS);
if (HexagonDisableDuplex) {
llvm::FeatureBitset Features = X->getFeatureBits();
X->setFeatureBits(Features.reset(Hexagon::FeatureDuplex));
}
X->setFeatureBits(completeHVXFeatures(X->getFeatureBits()));
+
+ // The Z-buffer instructions are grandfathered in for current
+ // architectures but omitted for new ones. Future instruction
+ // sets may introduce new/conflicting z-buffer instructions.
+ const bool ZRegOnDefault =
+ (CPUName == "hexagonv67") || (CPUName == "hexagonv66");
+ if (ZRegOnDefault) {
+ llvm::FeatureBitset Features = X->getFeatureBits();
+ X->setFeatureBits(Features.set(Hexagon::ExtensionZReg));
+ }
+
return X;
}
+void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI,
+ StringRef FS) {
+ assert(STI != nullptr);
+ if (STI->getCPU().contains("t")) {
+ auto ArchSTI = createHexagonMCSubtargetInfo(
+ STI->getTargetTriple(),
+ STI->getCPU().substr(0, STI->getCPU().size() - 1), FS);
+ std::lock_guard<std::mutex> Lock(ArchSubtargetMutex);
+ ArchSubtarget[std::string(STI->getCPU())] =
+ std::unique_ptr<MCSubtargetInfo const>(ArchSTI);
+ }
+}
+
unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
static std::map<StringRef,unsigned> ElfFlags = {
{"hexagonv5", ELF::EF_HEXAGON_MACH_V5},
@@ -413,6 +522,8 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
{"hexagonv62", ELF::EF_HEXAGON_MACH_V62},
{"hexagonv65", ELF::EF_HEXAGON_MACH_V65},
{"hexagonv66", ELF::EF_HEXAGON_MACH_V66},
+ {"hexagonv67", ELF::EF_HEXAGON_MACH_V67},
+ {"hexagonv67t", ELF::EF_HEXAGON_MACH_V67T},
};
auto F = ElfFlags.find(STI.getCPU());
@@ -420,6 +531,10 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
return F->second;
}
+llvm::ArrayRef<MCPhysReg> Hexagon_MC::GetVectRegRev() {
+ return makeArrayRef(VectRegRev);
+}
+
namespace {
class HexagonMCInstrAnalysis : public MCInstrAnalysis {
public:
@@ -437,6 +552,10 @@ public:
bool evaluateBranch(MCInst const &Inst, uint64_t Addr,
uint64_t Size, uint64_t &Target) const override {
+ if (!(isCall(Inst) || isUnconditionalBranch(Inst) ||
+ isConditionalBranch(Inst)))
+ return false;
+
//assert(!HexagonMCInstrInfo::isBundle(Inst));
if(!HexagonMCInstrInfo::isExtendable(*Info, Inst))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 7b42460a2a1c..5bf7c9a1a908 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include <cstdint>
#include <string>
@@ -46,7 +47,6 @@
namespace llvm {
-struct InstrItinerary;
struct InstrStage;
class FeatureBitset;
class MCAsmBackend;
@@ -60,8 +60,6 @@ class MCTargetOptions;
class Target;
class Triple;
class StringRef;
-class raw_ostream;
-class raw_pwrite_stream;
extern cl::opt<bool> HexagonDisableCompound;
extern cl::opt<bool> HexagonDisableDuplex;
@@ -78,7 +76,12 @@ namespace Hexagon_MC {
/// etc. do not need to go through TargetRegistry.
MCSubtargetInfo *createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU,
StringRef FS);
+ MCSubtargetInfo const *getArchSubtarget(MCSubtargetInfo const *STI);
+ void addArchSubtarget(MCSubtargetInfo const *STI,
+ StringRef FS);
unsigned GetELFFlags(const MCSubtargetInfo &STI);
+
+ llvm::ArrayRef<MCPhysReg> GetVectRegRev();
}
MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
@@ -94,6 +97,7 @@ std::unique_ptr<MCObjectTargetWriter>
createHexagonELFObjectWriter(uint8_t OSABI, StringRef CPU);
unsigned HexagonGetLastSlot();
+unsigned HexagonConvertUnits(unsigned ItinUnits, unsigned *Lanes);
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 18c7790a17cc..2788b86181e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -105,62 +105,30 @@ unsigned HexagonResource::setWeight(unsigned s) {
return Weight;
}
-void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
- (*TUL)[HexagonII::TypeCVI_VA] =
- UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
- (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
- (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1);
- (*TUL)[HexagonII::TypeCVI_VX_LATE] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1);
- (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2);
- (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1);
- (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2);
- (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1);
- (*TUL)[HexagonII::TypeCVI_VS_VX] = UnitsAndLanes(CVI_XLANE | CVI_SHIFT, 1);
- (*TUL)[HexagonII::TypeCVI_VINLANESAT] =
- (CPU == "hexagonv60")
- ? UnitsAndLanes(CVI_SHIFT, 1)
- : UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
- (*TUL)[HexagonII::TypeCVI_VM_LD] =
- UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
- (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0);
- (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1);
- (*TUL)[HexagonII::TypeCVI_VM_ST] =
- UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
- (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0);
- (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1);
- (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4);
- (*TUL)[HexagonII::TypeCVI_GATHER] =
- UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
- (*TUL)[HexagonII::TypeCVI_SCATTER] =
- UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
- (*TUL)[HexagonII::TypeCVI_SCATTER_DV] =
- UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
- (*TUL)[HexagonII::TypeCVI_SCATTER_NEW_ST] =
- UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
- (*TUL)[HexagonII::TypeCVI_4SLOT_MPY] = UnitsAndLanes(CVI_XLANE, 4);
- (*TUL)[HexagonII::TypeCVI_ZW] = UnitsAndLanes(CVI_ZW, 1);
-}
-
-HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL,
- MCInstrInfo const &MCII, unsigned s,
+HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ unsigned s,
MCInst const *id)
: HexagonResource(s) {
- unsigned T = HexagonMCInstrInfo::getType(MCII, *id);
- if (TUL->count(T)) {
- // For an HVX insn.
- Valid = true;
- setUnits((*TUL)[T].first);
- setLanes((*TUL)[T].second);
- setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad());
- setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore());
- } else {
+ const unsigned ItinUnits = HexagonMCInstrInfo::getCVIResources(MCII, STI, *id);
+ unsigned Lanes;
+ const unsigned Units = HexagonConvertUnits(ItinUnits, &Lanes);
+
+ if (Units == 0 && Lanes == 0) {
// For core insns.
Valid = false;
setUnits(0);
setLanes(0);
setLoad(false);
setStore(false);
+ } else {
+ // For an HVX insn.
+ Valid = true;
+ setUnits(Units);
+ setLanes(Lanes);
+ setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad());
+ setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore());
}
}
@@ -201,124 +169,293 @@ HexagonShuffler::HexagonShuffler(MCContext &Context, bool ReportErrors,
MCSubtargetInfo const &STI)
: Context(Context), MCII(MCII), STI(STI), ReportErrors(ReportErrors) {
reset();
- HexagonCVIResource::SetupTUL(&TUL, STI.getCPU());
}
void HexagonShuffler::reset() {
Packet.clear();
BundleFlags = 0;
+ CheckFailure = false;
}
void HexagonShuffler::append(MCInst const &ID, MCInst const *Extender,
unsigned S) {
- HexagonInstr PI(&TUL, MCII, &ID, Extender, S);
+ HexagonInstr PI(MCII, STI, &ID, Extender, S);
Packet.push_back(PI);
}
-static struct {
- unsigned first;
- unsigned second;
-} jumpSlots[] = {{8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}};
-#define MAX_JUMP_SLOTS (sizeof(jumpSlots) / sizeof(jumpSlots[0]))
-void HexagonShuffler::restrictSlot1AOK() {
- bool HasRestrictSlot1AOK = false;
- SMLoc RestrictLoc;
- for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const &Inst = ISJ->getDesc();
- if (HexagonMCInstrInfo::isRestrictSlot1AOK(MCII, Inst)) {
- HasRestrictSlot1AOK = true;
- RestrictLoc = Inst.getLoc();
- }
- }
- if (HasRestrictSlot1AOK)
- for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const &Inst = ISJ->getDesc();
- unsigned Type = HexagonMCInstrInfo::getType(MCII, Inst);
+static const unsigned Slot0Mask = 1 << 0;
+static const unsigned Slot1Mask = 1 << 1;
+static const unsigned Slot3Mask = 1 << 3;
+static const unsigned slotSingleLoad = Slot0Mask;
+static const unsigned slotSingleStore = Slot0Mask;
+
+void HexagonShuffler::restrictSlot1AOK(HexagonPacketSummary const &Summary) {
+ if (Summary.Slot1AOKLoc)
+ for (HexagonInstr &ISJ : insts()) {
+ MCInst const &Inst = ISJ.getDesc();
+ const unsigned Type = HexagonMCInstrInfo::getType(MCII, Inst);
if (Type != HexagonII::TypeALU32_2op &&
Type != HexagonII::TypeALU32_3op &&
Type != HexagonII::TypeALU32_ADDI) {
- unsigned Units = ISJ->Core.getUnits();
- if (Units & 2U) {
+ const unsigned Units = ISJ.Core.getUnits();
+
+ if (Units & Slot1Mask) {
AppliedRestrictions.push_back(std::make_pair(
Inst.getLoc(),
"Instruction was restricted from being in slot 1"));
- AppliedRestrictions.push_back(
- std::make_pair(RestrictLoc, "Instruction can only be combine "
- "with an ALU instruction in slot 1"));
- ISJ->Core.setUnits(Units & ~2U);
+ AppliedRestrictions.push_back(std::make_pair(
+ *Summary.Slot1AOKLoc, "Instruction can only be combined "
+ "with an ALU instruction in slot 1"));
+ ISJ.Core.setUnits(Units & ~Slot1Mask);
}
}
}
}
-void HexagonShuffler::restrictNoSlot1Store() {
- bool HasRestrictNoSlot1Store = false;
- SMLoc RestrictLoc;
- for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const &Inst = ISJ->getDesc();
- if (HexagonMCInstrInfo::isRestrictNoSlot1Store(MCII, Inst)) {
- HasRestrictNoSlot1Store = true;
- RestrictLoc = Inst.getLoc();
+void HexagonShuffler::restrictNoSlot1Store(
+ HexagonPacketSummary const &Summary) {
+ // If this packet contains an instruction that bars slot-1 stores,
+ // we should mask off slot 1 from all of the store instructions in
+ // this packet.
+
+ if (!Summary.NoSlot1StoreLoc)
+ return;
+
+ bool AppliedRestriction = false;
+
+ for (HexagonInstr &ISJ : insts()) {
+ MCInst const &Inst = ISJ.getDesc();
+ if (HexagonMCInstrInfo::getDesc(MCII, Inst).mayStore()) {
+ unsigned Units = ISJ.Core.getUnits();
+ if (Units & Slot1Mask) {
+ AppliedRestriction = true;
+ AppliedRestrictions.push_back(std::make_pair(
+ Inst.getLoc(), "Instruction was restricted from being in slot 1"));
+ ISJ.Core.setUnits(Units & ~Slot1Mask);
+ }
}
}
- if (HasRestrictNoSlot1Store) {
- bool AppliedRestriction = false;
- for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const &Inst = ISJ->getDesc();
- if (HexagonMCInstrInfo::getDesc(MCII, Inst).mayStore()) {
- unsigned Units = ISJ->Core.getUnits();
- if (Units & 2U) {
- AppliedRestriction = true;
- AppliedRestrictions.push_back(std::make_pair(
- Inst.getLoc(),
- "Instruction was restricted from being in slot 1"));
- ISJ->Core.setUnits(Units & ~2U);
+
+ if (AppliedRestriction)
+ AppliedRestrictions.push_back(
+ std::make_pair(*Summary.NoSlot1StoreLoc,
+ "Instruction does not allow a store in slot 1"));
+}
+
+bool HexagonShuffler::applySlotRestrictions(
+ HexagonPacketSummary const &Summary) {
+ // These restrictions can modify the slot masks in the instructions
+ // in the Packet member. They should run unconditionally and their
+ // order does not matter.
+ restrictSlot1AOK(Summary);
+ restrictNoSlot1Store(Summary);
+
+ permitNonSlot();
+
+ // These restrictions can modify the slot masks in the instructions
+ // in the Packet member, but they can also detect constraint failures
+ // which are fatal.
+ if (!CheckFailure)
+ restrictStoreLoadOrder(Summary);
+ if (!CheckFailure)
+ restrictBranchOrder(Summary);
+ if (!CheckFailure)
+ restrictPreferSlot3(Summary);
+ return !CheckFailure;
+}
+
+void HexagonShuffler::restrictBranchOrder(HexagonPacketSummary const &Summary) {
+ // preserve branch order
+ const bool HasMultipleBranches = Summary.branchInsts.size() > 1;
+ if (!HasMultipleBranches)
+ return;
+
+ if (Summary.branchInsts.size() > 2) {
+ reportError(Twine("too many branches in packet"));
+ return;
+ }
+
+ const static std::pair<unsigned, unsigned> jumpSlots[] = {
+ {8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}};
+ // try all possible choices
+ for (std::pair<unsigned, unsigned> jumpSlot : jumpSlots) {
+ // validate first jump with this slot rule
+ if (!(jumpSlot.first & Summary.branchInsts[0]->Core.getUnits()))
+ continue;
+
+ // validate second jump with this slot rule
+ if (!(jumpSlot.second & Summary.branchInsts[1]->Core.getUnits()))
+ continue;
+
+ // both valid for this configuration, set new slot rules
+ const HexagonPacket PacketSave = Packet;
+ Summary.branchInsts[0]->Core.setUnits(jumpSlot.first);
+ Summary.branchInsts[1]->Core.setUnits(jumpSlot.second);
+
+ const bool HasShuffledPacket = tryAuction(Summary).hasValue();
+ if (HasShuffledPacket)
+ return;
+
+ // if yes, great, if not then restore original slot mask
+ // restore original values
+ Packet = PacketSave;
+ }
+
+ reportError("invalid instruction packet: out of slots");
+}
+
+
+void HexagonShuffler::permitNonSlot() {
+ for (HexagonInstr &ISJ : insts()) {
+ const bool RequiresSlot = HexagonMCInstrInfo::requiresSlot(STI, *ISJ.ID);
+ if (!RequiresSlot)
+ ISJ.Core.setAllUnits();
+ }
+}
+
+bool HexagonShuffler::ValidResourceUsage(HexagonPacketSummary const &Summary) {
+ Optional<HexagonPacket> ShuffledPacket = tryAuction(Summary);
+
+ if (!ShuffledPacket) {
+ reportError("invalid instruction packet: slot error");
+ return false;
+ } else {
+ Packet = *ShuffledPacket;
+ }
+
+ // Verify the CVI slot subscriptions.
+ std::stable_sort(begin(), end(), HexagonInstr::lessCVI);
+ // create vector of hvx instructions to check
+ HVXInstsT hvxInsts;
+ hvxInsts.clear();
+ for (const_iterator I = cbegin(); I != cend(); ++I) {
+ struct CVIUnits inst;
+ inst.Units = I->CVI.getUnits();
+ inst.Lanes = I->CVI.getLanes();
+ if (inst.Units == 0)
+ continue; // not an hvx inst or an hvx inst that doesn't uses any pipes
+ hvxInsts.push_back(inst);
+ }
+
+ // if there are any hvx instructions in this packet, check pipe usage
+ if (hvxInsts.size() > 0) {
+ unsigned startIdx, usedUnits;
+ startIdx = usedUnits = 0x0;
+ if (!checkHVXPipes(hvxInsts, startIdx, usedUnits)) {
+ // too many pipes used to be valid
+ reportError(Twine("invalid instruction packet: slot error"));
+ return false;
+ }
+ }
+ return true;
+}
+
+bool HexagonShuffler::restrictStoreLoadOrder(
+ HexagonPacketSummary const &Summary) {
+ // Modify packet accordingly.
+ // TODO: need to reserve slots #0 and #1 for duplex insns.
+ static const unsigned slotFirstLoadStore = Slot1Mask;
+ static const unsigned slotLastLoadStore = Slot0Mask;
+ unsigned slotLoadStore = slotFirstLoadStore;
+
+ for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
+ MCInst const &ID = ISJ->getDesc();
+
+ if (!ISJ->Core.getUnits())
+ // Error if insn may not be executed in any slot.
+ return false;
+
+ // A single load must use slot #0.
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) {
+ if (Summary.loads == 1 && Summary.loads == Summary.memory &&
+ Summary.memops == 0)
+ // Pin the load to slot #0.
+ switch (ID.getOpcode()) {
+ case Hexagon::V6_vgathermw:
+ case Hexagon::V6_vgathermh:
+ case Hexagon::V6_vgathermhw:
+ case Hexagon::V6_vgathermwq:
+ case Hexagon::V6_vgathermhq:
+ case Hexagon::V6_vgathermhwq:
+ // Slot1 only loads
+ break;
+ default:
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad);
+ break;
+ }
+ else if (Summary.loads >= 1 && isMemReorderDisabled()) { // }:mem_noshuf
+ // Loads must keep the original order ONLY if
+ // isMemReorderDisabled() == true
+ if (slotLoadStore < slotLastLoadStore) {
+ // Error if no more slots available for loads.
+ reportError("invalid instruction packet: too many loads");
+ return false;
+ }
+ // Pin the load to the highest slot available to it.
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
+ // Update the next highest slot available to loads.
+ slotLoadStore >>= 1;
+ }
+ }
+
+ // A single store must use slot #0.
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).mayStore()) {
+ if (!Summary.store0) {
+ const bool PacketHasNoOnlySlot0 =
+ llvm::none_of(insts(), [&](HexagonInstr const &I) {
+ return I.Core.getUnits() == Slot0Mask &&
+ I.ID->getOpcode() != ID.getOpcode();
+ });
+ const bool SafeToMoveToSlot0 =
+ (Summary.loads == 0) ||
+ (!isMemReorderDisabled() && PacketHasNoOnlySlot0);
+
+ if (Summary.stores == 1 && SafeToMoveToSlot0)
+ // Pin the store to slot #0 only if isMemReorderDisabled() == false
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore);
+ else if (Summary.stores >= 1) {
+ if (slotLoadStore < slotLastLoadStore) {
+ // Error if no more slots available for stores.
+ reportError("invalid instruction packet: too many stores");
+ return false;
+ }
+ // Pin the store to the highest slot available to it.
+ ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
+ // Update the next highest slot available to stores.
+ slotLoadStore >>= 1;
}
}
+ if (Summary.store1 && Summary.stores > 1) {
+ // Error if a single store with another store.
+ reportError("invalid instruction packet: too many stores");
+ return false;
+ }
}
- if (AppliedRestriction)
- AppliedRestrictions.push_back(std::make_pair(
- RestrictLoc, "Instruction does not allow a store in slot 1"));
}
-}
-void HexagonShuffler::applySlotRestrictions() {
- restrictSlot1AOK();
- restrictNoSlot1Store();
+ return true;
}
-/// Check that the packet is legal and enforce relative insn order.
-bool HexagonShuffler::check() {
- // Descriptive slot masks.
- const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1,
- slotThree = 0x8, // slotFirstJump = 0x8,
- slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1;
- // Highest slots for branches and stores used to keep their original order.
- // unsigned slotJump = slotFirstJump;
- unsigned slotLoadStore = slotFirstLoadStore;
- // Number of memory operations, loads, solo loads, stores, solo stores, single
- // stores.
- unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
- unsigned NonZCVIloads = 0, AllCVIloads = 0, CVIstores = 0;
- // Number of duplex insns
- unsigned duplex = 0;
- unsigned pSlot3Cnt = 0;
- unsigned memops = 0;
- iterator slot3ISJ = end();
- std::vector<iterator> foundBranches;
- unsigned reservedSlots = 0;
+HexagonShuffler::HexagonPacketSummary HexagonShuffler::GetPacketSummary() {
+ HexagonPacketSummary Summary = HexagonPacketSummary();
// Collect information from the insns in the packet.
for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
MCInst const &ID = ISJ->getDesc();
+ if (HexagonMCInstrInfo::isRestrictSlot1AOK(MCII, ID))
+ Summary.Slot1AOKLoc = ID.getLoc();
+ if (HexagonMCInstrInfo::isRestrictNoSlot1Store(MCII, ID))
+ Summary.NoSlot1StoreLoc = ID.getLoc();
+
if (HexagonMCInstrInfo::prefersSlot3(MCII, ID)) {
- ++pSlot3Cnt;
- slot3ISJ = ISJ;
+ ++Summary.pSlot3Cnt;
+ Summary.PrefSlot3Inst = ISJ;
}
- reservedSlots |= HexagonMCInstrInfo::getOtherReservedSlots(MCII, STI, ID);
+ Summary.ReservedSlotMask |=
+ HexagonMCInstrInfo::getOtherReservedSlots(MCII, STI, ID);
switch (HexagonMCInstrInfo::getType(MCII, ID)) {
case HexagonII::TypeS_2op:
@@ -326,26 +463,27 @@ bool HexagonShuffler::check() {
case HexagonII::TypeALU64:
break;
case HexagonII::TypeJ:
- foundBranches.push_back(ISJ);
+ Summary.branchInsts.push_back(ISJ);
break;
case HexagonII::TypeCVI_VM_VP_LDU:
case HexagonII::TypeCVI_VM_LD:
case HexagonII::TypeCVI_VM_TMP_LD:
case HexagonII::TypeCVI_GATHER:
+ case HexagonII::TypeCVI_GATHER_DV:
case HexagonII::TypeCVI_GATHER_RST:
- ++NonZCVIloads;
+ ++Summary.NonZCVIloads;
LLVM_FALLTHROUGH;
case HexagonII::TypeCVI_ZW:
- ++AllCVIloads;
+ ++Summary.AllCVIloads;
LLVM_FALLTHROUGH;
case HexagonII::TypeLD:
- ++loads;
- ++memory;
+ ++Summary.loads;
+ ++Summary.memory;
if (ISJ->Core.getUnits() == slotSingleLoad ||
HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_VP_LDU)
- ++load0;
+ ++Summary.load0;
if (HexagonMCInstrInfo::getDesc(MCII, ID).isReturn())
- foundBranches.push_back(ISJ);
+ Summary.branchInsts.push_back(ISJ);
break;
case HexagonII::TypeCVI_VM_STU:
case HexagonII::TypeCVI_VM_ST:
@@ -355,266 +493,143 @@ bool HexagonShuffler::check() {
case HexagonII::TypeCVI_SCATTER_RST:
case HexagonII::TypeCVI_SCATTER_NEW_RST:
case HexagonII::TypeCVI_SCATTER_NEW_ST:
- ++CVIstores;
+ ++Summary.CVIstores;
LLVM_FALLTHROUGH;
case HexagonII::TypeST:
- ++stores;
- ++memory;
+ ++Summary.stores;
+ ++Summary.memory;
if (ISJ->Core.getUnits() == slotSingleStore ||
HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_STU)
- ++store0;
+ ++Summary.store0;
break;
case HexagonII::TypeV4LDST:
- ++loads;
- ++stores;
- ++store1;
- ++memops;
- ++memory;
+ ++Summary.loads;
+ ++Summary.stores;
+ ++Summary.store1;
+ ++Summary.memops;
+ ++Summary.memory;
break;
case HexagonII::TypeNCJ:
- ++memory; // NV insns are memory-like.
- foundBranches.push_back(ISJ);
+ ++Summary.memory; // NV insns are memory-like.
+ Summary.branchInsts.push_back(ISJ);
break;
case HexagonII::TypeV2LDST:
if (HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) {
- ++loads;
- ++memory;
+ ++Summary.loads;
+ ++Summary.memory;
if (ISJ->Core.getUnits() == slotSingleLoad ||
HexagonMCInstrInfo::getType(MCII, ID) ==
HexagonII::TypeCVI_VM_VP_LDU)
- ++load0;
+ ++Summary.load0;
} else {
assert(HexagonMCInstrInfo::getDesc(MCII, ID).mayStore());
- ++memory;
- ++stores;
+ ++Summary.memory;
+ ++Summary.stores;
}
break;
case HexagonII::TypeCR:
// Legacy conditional branch predicated on a register.
case HexagonII::TypeCJ:
if (HexagonMCInstrInfo::getDesc(MCII, ID).isBranch())
- foundBranches.push_back(ISJ);
+ Summary.branchInsts.push_back(ISJ);
break;
case HexagonII::TypeDUPLEX: {
- ++duplex;
+ ++Summary.duplex;
MCInst const &Inst0 = *ID.getOperand(0).getInst();
MCInst const &Inst1 = *ID.getOperand(1).getInst();
if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isBranch())
- foundBranches.push_back(ISJ);
+ Summary.branchInsts.push_back(ISJ);
if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isBranch())
- foundBranches.push_back(ISJ);
+ Summary.branchInsts.push_back(ISJ);
if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isReturn())
- foundBranches.push_back(ISJ);
+ Summary.branchInsts.push_back(ISJ);
if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isReturn())
- foundBranches.push_back(ISJ);
+ Summary.branchInsts.push_back(ISJ);
break;
}
}
}
- applySlotRestrictions();
+ return Summary;
+}
+bool HexagonShuffler::ValidPacketMemoryOps(
+ HexagonPacketSummary const &Summary) const {
// Check if the packet is legal.
- const unsigned ZCVIloads = AllCVIloads - NonZCVIloads;
+ const unsigned ZCVIloads = Summary.AllCVIloads - Summary.NonZCVIloads;
const bool ValidHVXMem =
- NonZCVIloads <= 1 && ZCVIloads <= 1 && CVIstores <= 1;
- if ((load0 > 1 || store0 > 1 || !ValidHVXMem) ||
- (duplex > 1 || (duplex && memory))) {
- reportError(llvm::Twine("invalid instruction packet"));
- return false;
- }
-
- // Modify packet accordingly.
- // TODO: need to reserve slots #0 and #1 for duplex insns.
- bool bOnlySlot3 = false;
- for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const &ID = ISJ->getDesc();
-
- if (!ISJ->Core.getUnits()) {
- // Error if insn may not be executed in any slot.
- return false;
- }
-
- // A single load must use slot #0.
- if (HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) {
- if (loads == 1 && loads == memory && memops == 0)
- // Pin the load to slot #0.
- switch (ID.getOpcode()) {
- case Hexagon::V6_vgathermw:
- case Hexagon::V6_vgathermh:
- case Hexagon::V6_vgathermhw:
- case Hexagon::V6_vgathermwq:
- case Hexagon::V6_vgathermhq:
- case Hexagon::V6_vgathermhwq:
- // Slot1 only loads
- break;
- default:
- ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad);
- break;
- }
- else if (loads >= 1 && isMemReorderDisabled()) { // }:mem_noshuf
- // Loads must keep the original order ONLY if
- // isMemReorderDisabled() == true
- if (slotLoadStore < slotLastLoadStore) {
- // Error if no more slots available for loads.
- reportError(
- llvm::Twine("invalid instruction packet: too many loads"));
- return false;
- }
- // Pin the load to the highest slot available to it.
- ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
- // Update the next highest slot available to loads.
- slotLoadStore >>= 1;
- }
- }
+ Summary.NonZCVIloads <= 1 && ZCVIloads <= 1 && Summary.CVIstores <= 1;
+ const bool InvalidPacket =
+ ((Summary.load0 > 1 || Summary.store0 > 1 || !ValidHVXMem) ||
+ (Summary.duplex > 1 || (Summary.duplex && Summary.memory)));
- // A single store must use slot #0.
- if (HexagonMCInstrInfo::getDesc(MCII, ID).mayStore()) {
- if (!store0) {
- if (stores == 1 && (loads == 0 || !isMemReorderDisabled()))
- // Pin the store to slot #0 only if isMemReorderDisabled() == false
- ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore);
- else if (stores >= 1) {
- if (slotLoadStore < slotLastLoadStore) {
- // Error if no more slots available for stores.
- reportError(Twine("invalid instruction packet: too many stores"));
- return false;
- }
- // Pin the store to the highest slot available to it.
- ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
- // Update the next highest slot available to stores.
- slotLoadStore >>= 1;
- }
- }
- if (store1 && stores > 1) {
- // Error if a single store with another store.
- reportError(Twine("invalid instruction packet: too many stores"));
- return false;
- }
- }
-
- // flag if an instruction requires to be in slot 3
- if (ISJ->Core.getUnits() == slotThree)
- bOnlySlot3 = true;
-
- if (!ISJ->Core.getUnits()) {
- // Error if insn may not be executed in any slot.
- reportError(Twine("invalid instruction packet: out of slots"));
- return false;
- }
- }
-
- // preserve branch order
- bool validateSlots = true;
- if (foundBranches.size() > 1) {
- if (foundBranches.size() > 2) {
- reportError(Twine("too many branches in packet"));
- return false;
- }
-
- // try all possible choices
- for (unsigned int i = 0; i < MAX_JUMP_SLOTS; ++i) {
- // validate first jump with this slot rule
- if (!(jumpSlots[i].first & foundBranches[0]->Core.getUnits()))
- continue;
-
- // validate second jump with this slot rule
- if (!(jumpSlots[i].second & foundBranches[1]->Core.getUnits()))
- continue;
-
- // both valid for this configuration, set new slot rules
- PacketSave = Packet;
- foundBranches[0]->Core.setUnits(jumpSlots[i].first);
- foundBranches[1]->Core.setUnits(jumpSlots[i].second);
-
- HexagonUnitAuction AuctionCore(reservedSlots);
- std::stable_sort(begin(), end(), HexagonInstr::lessCore);
-
- // see if things ok with that instruction being pinned to slot "slotJump"
- bool bFail = false;
- for (iterator I = begin(); I != end() && !bFail; ++I)
- if (!AuctionCore.bid(I->Core.getUnits()))
- bFail = true;
-
- // if yes, great, if not then restore original slot mask
- if (!bFail) {
- validateSlots = false; // all good, no need to re-do auction
- break;
- } else
- // restore original values
- Packet = PacketSave;
- }
- if (validateSlots) {
- reportError(Twine("invalid instruction packet: out of slots"));
- return false;
- }
- }
-
- if (foundBranches.size() <= 1 && bOnlySlot3 == false && pSlot3Cnt == 1 &&
- slot3ISJ != end()) {
- validateSlots = true;
- // save off slot mask of instruction marked with A_PREFER_SLOT3
- // and then pin it to slot #3
- unsigned saveUnits = slot3ISJ->Core.getUnits();
- slot3ISJ->Core.setUnits(saveUnits & slotThree);
+ return !InvalidPacket;
+}
- HexagonUnitAuction AuctionCore(reservedSlots);
- std::stable_sort(begin(), end(), HexagonInstr::lessCore);
+void HexagonShuffler::restrictPreferSlot3(HexagonPacketSummary const &Summary) {
+ // flag if an instruction requires to be in slot 3
+ const bool HasOnlySlot3 = llvm::any_of(insts(), [&](HexagonInstr const &I) {
+ return (I.Core.getUnits() == Slot3Mask);
+ });
+ const bool NeedsPrefSlot3Shuffle =
+ (Summary.branchInsts.size() <= 1 && !HasOnlySlot3 &&
+ Summary.pSlot3Cnt == 1 && Summary.PrefSlot3Inst);
+
+ if (!NeedsPrefSlot3Shuffle)
+ return;
+
+ HexagonInstr *PrefSlot3Inst = *Summary.PrefSlot3Inst;
+ // save off slot mask of instruction marked with A_PREFER_SLOT3
+ // and then pin it to slot #3
+ const unsigned saveUnits = PrefSlot3Inst->Core.getUnits();
+ PrefSlot3Inst->Core.setUnits(saveUnits & Slot3Mask);
+ const bool HasShuffledPacket = tryAuction(Summary).hasValue();
+ if (HasShuffledPacket)
+ return;
+
+ PrefSlot3Inst->Core.setUnits(saveUnits);
+}
- // see if things ok with that instruction being pinned to slot #3
- bool bFail = false;
- for (iterator I = begin(); I != end() && !bFail; ++I)
- if (!AuctionCore.bid(I->Core.getUnits()))
- bFail = true;
+/// Check that the packet is legal and enforce relative insn order.
+bool HexagonShuffler::check() {
+ const HexagonPacketSummary Summary = GetPacketSummary();
+ if (!applySlotRestrictions(Summary))
+ return false;
- // if yes, great, if not then restore original slot mask
- if (!bFail)
- validateSlots = false; // all good, no need to re-do auction
- else
- for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const &ID = ISJ->getDesc();
- if (HexagonMCInstrInfo::prefersSlot3(MCII, ID))
- ISJ->Core.setUnits(saveUnits);
- }
+ if (!ValidPacketMemoryOps(Summary)) {
+ reportError("invalid instruction packet");
+ return false;
}
- // Check if any slot, core or CVI, is over-subscribed.
- // Verify the core slot subscriptions.
- if (validateSlots) {
- HexagonUnitAuction AuctionCore(reservedSlots);
+ ValidResourceUsage(Summary);
- std::stable_sort(begin(), end(), HexagonInstr::lessCore);
-
- for (iterator I = begin(); I != end(); ++I)
- if (!AuctionCore.bid(I->Core.getUnits())) {
- reportError(Twine("invalid instruction packet: slot error"));
- return false;
- }
- }
- // Verify the CVI slot subscriptions.
- std::stable_sort(begin(), end(), HexagonInstr::lessCVI);
- // create vector of hvx instructions to check
- HVXInstsT hvxInsts;
- hvxInsts.clear();
- for (iterator I = begin(); I != end(); ++I) {
- struct CVIUnits inst;
- inst.Units = I->CVI.getUnits();
- inst.Lanes = I->CVI.getLanes();
- if (inst.Units == 0)
- continue; // not an hvx inst or an hvx inst that doesn't uses any pipes
- hvxInsts.push_back(inst);
- }
- // if there are any hvx instructions in this packet, check pipe usage
- if (hvxInsts.size() > 0) {
- unsigned startIdx, usedUnits;
- startIdx = usedUnits = 0x0;
- if (!checkHVXPipes(hvxInsts, startIdx, usedUnits)) {
- // too many pipes used to be valid
- reportError(Twine("invalid instruction packet: slot error"));
- return false;
- }
- }
+ return !CheckFailure;
+}
- return true;
+llvm::Optional<HexagonShuffler::HexagonPacket>
+HexagonShuffler::tryAuction(HexagonPacketSummary const &Summary) const {
+ HexagonPacket PacketResult = Packet;
+ HexagonUnitAuction AuctionCore(Summary.ReservedSlotMask);
+ std::stable_sort(PacketResult.begin(), PacketResult.end(),
+ HexagonInstr::lessCore);
+
+ const bool ValidSlots =
+ llvm::all_of(insts(PacketResult), [&AuctionCore](HexagonInstr const &I) {
+ return AuctionCore.bid(I.Core.getUnits());
+ });
+
+ LLVM_DEBUG(
+ dbgs() << "Shuffle attempt: " << (ValidSlots ? "passed" : "failed")
+ << "\n";
+ for (HexagonInstr const &ISJ : insts(PacketResult))
+ dbgs() << "\t" << HexagonMCInstrInfo::getName(MCII, *ISJ.ID) << ": "
+ << llvm::format_hex(ISJ.Core.getUnits(), 4, true) << "\n";
+ );
+
+ Optional<HexagonPacket> Res;
+ if (ValidSlots)
+ Res = PacketResult;
+
+ return Res;
}
bool HexagonShuffler::shuffle() {
@@ -653,20 +668,25 @@ bool HexagonShuffler::shuffle() {
++emptySlots;
}
- for (iterator ISJ = begin(); ISJ != end(); ++ISJ)
- LLVM_DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); if (ISJ->CVI.isValid()) {
- dbgs() << '/';
- dbgs().write_hex(ISJ->CVI.getUnits()) << '|';
- dbgs() << ISJ->CVI.getLanes();
- } dbgs() << ':'
- << HexagonMCInstrInfo::getDesc(MCII, ISJ->getDesc()).getOpcode();
- dbgs() << '\n');
- LLVM_DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(
+ for (HexagonInstr const &ISJ : insts()) {
+ dbgs().write_hex(ISJ.Core.getUnits());
+ if (ISJ.CVI.isValid()) {
+ dbgs() << '/';
+ dbgs().write_hex(ISJ.CVI.getUnits()) << '|';
+ dbgs() << ISJ.CVI.getLanes();
+ }
+ dbgs() << ':'
+ << HexagonMCInstrInfo::getDesc(MCII, ISJ.getDesc()).getOpcode()
+ << '\n';
+ } dbgs() << '\n';
+ );
return Ok;
}
void HexagonShuffler::reportError(Twine const &Msg) {
+ CheckFailure = true;
if (ReportErrors) {
for (auto const &I : AppliedRestrictions) {
auto SM = Context.getSourceManager();
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index bf3bad36dfe5..1b4ebc5111db 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -17,11 +17,13 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include <cstdint>
+#include <functional>
#include <utility>
namespace llvm {
@@ -45,6 +47,9 @@ public:
setWeight(s);
}
+ void setAllUnits() {
+ setUnits(((1u << HEXAGON_PACKET_SIZE) - 1));
+ }
unsigned setWeight(unsigned s);
unsigned getUnits() const { return (Slots); }
@@ -65,7 +70,6 @@ public:
class HexagonCVIResource : public HexagonResource {
public:
using UnitsAndLanes = std::pair<unsigned, unsigned>;
- using TypeUnitsAndLanes = DenseMap<unsigned, UnitsAndLanes>;
private:
// Available HVX slots.
@@ -90,11 +94,10 @@ private:
void setStore(bool f = true) { Store = f; }
public:
- HexagonCVIResource(TypeUnitsAndLanes *TUL, MCInstrInfo const &MCII,
+ HexagonCVIResource(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
unsigned s, MCInst const *id);
- static void SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU);
-
bool isValid() const { return Valid; }
unsigned getLanes() const { return Lanes; }
bool mayLoad() const { return Load; }
@@ -111,10 +114,10 @@ class HexagonInstr {
HexagonCVIResource CVI;
public:
- HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T,
- MCInstrInfo const &MCII, MCInst const *id,
+ HexagonInstr(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI, MCInst const *id,
MCInst const *Extender, unsigned s)
- : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {}
+ : ID(id), Extender(Extender), Core(s), CVI(MCII, STI, s, id){};
MCInst const &getDesc() const { return *ID; }
MCInst const *getExtender() const { return Extender; }
@@ -140,11 +143,30 @@ class HexagonShuffler {
using HexagonPacket =
SmallVector<HexagonInstr, HEXAGON_PRESHUFFLE_PACKET_SIZE>;
+ struct HexagonPacketSummary {
+ // Number of memory operations, loads, solo loads, stores, solo stores,
+ // single stores.
+ unsigned memory;
+ unsigned loads;
+ unsigned load0;
+ unsigned stores;
+ unsigned store0;
+ unsigned store1;
+ unsigned NonZCVIloads;
+ unsigned AllCVIloads;
+ unsigned CVIstores;
+ // Number of duplex insns
+ unsigned duplex;
+ unsigned pSlot3Cnt;
+ Optional<HexagonInstr *> PrefSlot3Inst;
+ unsigned memops;
+ unsigned ReservedSlotMask;
+ SmallVector<HexagonInstr *, HEXAGON_PRESHUFFLE_PACKET_SIZE> branchInsts;
+ Optional<SMLoc> Slot1AOKLoc;
+ Optional<SMLoc> NoSlot1StoreLoc;
+ };
// Insn handles in a bundle.
HexagonPacket Packet;
- HexagonPacket PacketSave;
-
- HexagonCVIResource::TypeUnitsAndLanes TUL;
protected:
MCContext &Context;
@@ -153,13 +175,29 @@ protected:
MCSubtargetInfo const &STI;
SMLoc Loc;
bool ReportErrors;
+ bool CheckFailure;
std::vector<std::pair<SMLoc, std::string>> AppliedRestrictions;
- void applySlotRestrictions();
- void restrictSlot1AOK();
- void restrictNoSlot1Store();
+ bool applySlotRestrictions(HexagonPacketSummary const &Summary);
+ void restrictSlot1AOK(HexagonPacketSummary const &Summary);
+ void restrictNoSlot1Store(HexagonPacketSummary const &Summary);
+ void restrictNoSlot1();
+ bool restrictStoreLoadOrder(HexagonPacketSummary const &Summary);
+ void restrictBranchOrder(HexagonPacketSummary const &Summary);
+ void restrictPreferSlot3(HexagonPacketSummary const &Summary);
+ void permitNonSlot();
+
+ Optional<HexagonPacket> tryAuction(HexagonPacketSummary const &Summary) const;
+
+ HexagonPacketSummary GetPacketSummary();
+ bool ValidPacketMemoryOps(HexagonPacketSummary const &Summary) const;
+ bool ValidResourceUsage(HexagonPacketSummary const &Summary);
+ bool validPacketInsts() const;
public:
using iterator = HexagonPacket::iterator;
+ using const_iterator = HexagonPacket::const_iterator;
+ using packet_range = iterator_range<HexagonPacket::iterator>;
+ using const_packet_range = iterator_range<HexagonPacket::const_iterator>;
HexagonShuffler(MCContext &Context, bool ReportErrors,
MCInstrInfo const &MCII, MCSubtargetInfo const &STI);
@@ -179,6 +217,25 @@ public:
iterator begin() { return (Packet.begin()); }
iterator end() { return (Packet.end()); }
+ const_iterator cbegin() const { return (Packet.begin()); }
+ const_iterator cend() const { return (Packet.end()); }
+ packet_range insts(HexagonPacket &P) {
+ return make_range(P.begin(), P.end());
+ }
+ const_packet_range insts(HexagonPacket const &P) const {
+ return make_range(P.begin(), P.end());
+ }
+ packet_range insts() { return make_range(begin(), end()); }
+ const_packet_range insts() const { return make_range(cbegin(), cend()); }
+
+ using InstPredicate = bool (*)(MCInstrInfo const &, MCInst const &);
+
+ bool HasInstWith(InstPredicate Pred) const {
+ return llvm::any_of(insts(), [&](HexagonInstr const &I) {
+ MCInst const &Inst = I.getDesc();
+ return (*Pred)(MCII, Inst);
+ });
+ }
// Add insn handle to the bundle .
void append(MCInst const &ID, MCInst const *Extender, unsigned S);
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 8b8504978c75..639ab24b0817 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -47,7 +47,7 @@ struct LanaiOperand;
class LanaiAsmParser : public MCTargetAsmParser {
// Parse operands
- std::unique_ptr<LanaiOperand> parseRegister();
+ std::unique_ptr<LanaiOperand> parseRegister(bool RestoreOnFailure = false);
std::unique_ptr<LanaiOperand> parseImmediate();
@@ -67,6 +67,8 @@ class LanaiAsmParser : public MCTargetAsmParser {
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseRegister(unsigned &RegNum, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
@@ -657,7 +659,7 @@ bool LanaiAsmParser::MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
case Match_Success:
- Out.EmitInstruction(Inst, SubtargetInfo);
+ Out.emitInstruction(Inst, SubtargetInfo);
Opcode = Inst.getOpcode();
return false;
case Match_MissingFeature:
@@ -687,21 +689,30 @@ bool LanaiAsmParser::MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
// backwards compatible with GCC and the different ways inline assembly is
// handled.
// TODO: see if there isn't a better way to do this.
-std::unique_ptr<LanaiOperand> LanaiAsmParser::parseRegister() {
+std::unique_ptr<LanaiOperand>
+LanaiAsmParser::parseRegister(bool RestoreOnFailure) {
SMLoc Start = Parser.getTok().getLoc();
SMLoc End = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Optional<AsmToken> PercentTok;
unsigned RegNum;
// Eat the '%'.
- if (Lexer.getKind() == AsmToken::Percent)
+ if (Lexer.getKind() == AsmToken::Percent) {
+ PercentTok = Parser.getTok();
Parser.Lex();
+ }
if (Lexer.getKind() == AsmToken::Identifier) {
RegNum = MatchRegisterName(Lexer.getTok().getIdentifier());
- if (RegNum == 0)
+ if (RegNum == 0) {
+ if (PercentTok.hasValue() && RestoreOnFailure)
+ Lexer.UnLex(PercentTok.getValue());
return nullptr;
+ }
Parser.Lex(); // Eat identifier token
return LanaiOperand::createReg(RegNum, Start, End);
}
+ if (PercentTok.hasValue() && RestoreOnFailure)
+ Lexer.UnLex(PercentTok.getValue());
return nullptr;
}
@@ -710,12 +721,25 @@ bool LanaiAsmParser::ParseRegister(unsigned &RegNum, SMLoc &StartLoc,
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
- std::unique_ptr<LanaiOperand> Op = parseRegister();
+ std::unique_ptr<LanaiOperand> Op = parseRegister(/*RestoreOnFailure=*/false);
if (Op != nullptr)
RegNum = Op->getReg();
return (Op == nullptr);
}
+OperandMatchResultTy LanaiAsmParser::tryParseRegister(unsigned &RegNum,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ const AsmToken &Tok = getParser().getTok();
+ StartLoc = Tok.getLoc();
+ EndLoc = Tok.getEndLoc();
+ std::unique_ptr<LanaiOperand> Op = parseRegister(/*RestoreOnFailure=*/true);
+ if (Op == nullptr)
+ return MatchOperand_NoMatch;
+ RegNum = Op->getReg();
+ return MatchOperand_Success;
+}
+
std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() {
SMLoc Start = Parser.getTok().getLoc();
SMLoc End = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/Lanai.h b/contrib/llvm-project/llvm/lib/Target/Lanai/Lanai.h
index 2f06ea91ab03..2bd266b1b96e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/Lanai.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/Lanai.h
@@ -19,9 +19,6 @@
namespace llvm {
class FunctionPass;
class LanaiTargetMachine;
-class MachineFunctionPass;
-class TargetMachine;
-class formatted_raw_ostream;
// createLanaiISelDag - This pass converts a legalized DAG into a
// Lanai-specific DAG, ready for instruction scheduling.
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
index c13ee08e1213..6bac7c75853d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
@@ -51,7 +51,7 @@ public:
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &O) override;
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
bool isBlockOnlyReachableByFallthrough(
const MachineBasicBlock *MBB) const override;
@@ -155,7 +155,7 @@ void LanaiAsmPrinter::emitCallInstruction(const MachineInstr *MI) {
// Insert save rca instruction immediately before the call.
// TODO: We should generate a pc-relative mov instruction here instead
// of pc + 16 (should be mov .+16 %rca).
- OutStreamer->EmitInstruction(MCInstBuilder(Lanai::ADD_I_LO)
+ OutStreamer->emitInstruction(MCInstBuilder(Lanai::ADD_I_LO)
.addReg(Lanai::RCA)
.addReg(Lanai::PC)
.addImm(16),
@@ -163,7 +163,7 @@ void LanaiAsmPrinter::emitCallInstruction(const MachineInstr *MI) {
// Push rca onto the stack.
// st %rca, [--%sp]
- OutStreamer->EmitInstruction(MCInstBuilder(Lanai::SW_RI)
+ OutStreamer->emitInstruction(MCInstBuilder(Lanai::SW_RI)
.addReg(Lanai::RCA)
.addReg(Lanai::SP)
.addImm(-4)
@@ -175,9 +175,9 @@ void LanaiAsmPrinter::emitCallInstruction(const MachineInstr *MI) {
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
TmpInst.setOpcode(Lanai::BT);
- OutStreamer->EmitInstruction(TmpInst, STI);
+ OutStreamer->emitInstruction(TmpInst, STI);
} else {
- OutStreamer->EmitInstruction(MCInstBuilder(Lanai::ADD_R)
+ OutStreamer->emitInstruction(MCInstBuilder(Lanai::ADD_R)
.addReg(Lanai::PC)
.addReg(MI->getOperand(0).getReg())
.addReg(Lanai::R0)
@@ -191,10 +191,10 @@ void LanaiAsmPrinter::customEmitInstruction(const MachineInstr *MI) {
MCSubtargetInfo STI = getSubtargetInfo();
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
- OutStreamer->EmitInstruction(TmpInst, STI);
+ OutStreamer->emitInstruction(TmpInst, STI);
}
-void LanaiAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void LanaiAsmPrinter::emitInstruction(const MachineInstr *MI) {
MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
@@ -211,7 +211,7 @@ void LanaiAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// isBlockOnlyReachableByFallthough - Return true if the basic block has
// exactly one predecessor and the control transfer mechanism between
// the predecessor and this block is a fall-through.
-// FIXME: could the overridden cases be handled in AnalyzeBranch?
+// FIXME: could the overridden cases be handled in analyzeBranch?
bool LanaiAsmPrinter::isBlockOnlyReachableByFallthrough(
const MachineBasicBlock *MBB) const {
// The predecessor has to be immediately before this block.
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
index eddc2b8e61f7..3c84ed057fd1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
@@ -32,8 +32,8 @@ void LanaiFrameLowering::determineFrameLayout(MachineFunction &MF) const {
unsigned FrameSize = MFI.getStackSize();
// Get the alignment.
- unsigned StackAlign = LRI->needsStackRealignment(MF) ? MFI.getMaxAlignment()
- : getStackAlignment();
+ Align StackAlign =
+ LRI->needsStackRealignment(MF) ? MFI.getMaxAlign() : getStackAlign();
// Get the maximum call frame size of all the calls.
unsigned MaxCallFrameSize = MFI.getMaxCallFrameSize();
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index 6fa0c93d4a05..32ccf7172594 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -388,7 +388,7 @@ static bool CC_Lanai32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
}
// VarArgs get passed on stack
- unsigned Offset = State.AllocateStack(4, 4);
+ unsigned Offset = State.AllocateStack(4, Align(4));
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return false;
}
@@ -633,13 +633,13 @@ SDValue LanaiTargetLowering::LowerCCCCallTo(
SDValue Arg = OutVals[I];
unsigned Size = Flags.getByValSize();
- unsigned Align = Flags.getByValAlign();
+ Align Alignment = Flags.getNonZeroByValAlign();
- int FI = MFI.CreateStackObject(Size, Align, false);
+ int FI = MFI.CreateStackObject(Size, Alignment, false);
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue SizeNode = DAG.getConstant(Size, DL, MVT::i32);
- Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
+ Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
/*IsVolatile=*/false,
/*AlwaysInline=*/false,
/*isTailCall=*/false, MachinePointerInfo(),
@@ -1136,7 +1136,7 @@ SDValue LanaiTargetLowering::LowerConstantPool(SDValue Op,
if (getTargetMachine().getCodeModel() == CodeModel::Small ||
TLOF->isConstantInSmallSection(DAG.getDataLayout(), C)) {
SDValue Small = DAG.getTargetConstantPool(
- C, MVT::i32, N->getAlignment(), N->getOffset(), LanaiII::MO_NO_FLAG);
+ C, MVT::i32, N->getAlign(), N->getOffset(), LanaiII::MO_NO_FLAG);
return DAG.getNode(ISD::OR, DL, MVT::i32,
DAG.getRegister(Lanai::R0, MVT::i32),
DAG.getNode(LanaiISD::SMALL, DL, MVT::i32, Small));
@@ -1144,9 +1144,9 @@ SDValue LanaiTargetLowering::LowerConstantPool(SDValue Op,
uint8_t OpFlagHi = LanaiII::MO_ABS_HI;
uint8_t OpFlagLo = LanaiII::MO_ABS_LO;
- SDValue Hi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ SDValue Hi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlign(),
N->getOffset(), OpFlagHi);
- SDValue Lo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ SDValue Lo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlign(),
N->getOffset(), OpFlagLo);
Hi = DAG.getNode(LanaiISD::HI, DL, MVT::i32, Hi);
Lo = DAG.getNode(LanaiISD::LO, DL, MVT::i32, Lo);
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index 4ce72f9621ad..c82142970357 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -48,7 +48,7 @@ void LanaiInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
void LanaiInstrInfo::storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator Position,
- unsigned SourceRegister, bool IsKill, int FrameIndex,
+ Register SourceRegister, bool IsKill, int FrameIndex,
const TargetRegisterClass *RegisterClass,
const TargetRegisterInfo * /*RegisterInfo*/) const {
DebugLoc DL;
@@ -68,7 +68,7 @@ void LanaiInstrInfo::storeRegToStackSlot(
void LanaiInstrInfo::loadRegFromStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator Position,
- unsigned DestinationRegister, int FrameIndex,
+ Register DestinationRegister, int FrameIndex,
const TargetRegisterClass *RegisterClass,
const TargetRegisterInfo * /*RegisterInfo*/) const {
DebugLoc DL;
@@ -174,8 +174,8 @@ LanaiInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
-bool LanaiInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
+bool LanaiInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &CmpMask,
int &CmpValue) const {
switch (MI.getOpcode()) {
default:
@@ -183,7 +183,7 @@ bool LanaiInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case Lanai::SFSUB_F_RI_LO:
case Lanai::SFSUB_F_RI_HI:
SrcReg = MI.getOperand(0).getReg();
- SrcReg2 = 0;
+ SrcReg2 = Register();
CmpMask = ~0;
CmpValue = MI.getOperand(1).getImm();
return true;
@@ -281,7 +281,7 @@ inline static unsigned flagSettingOpcodeVariant(unsigned OldOpcode) {
}
bool LanaiInstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int /*CmpMask*/,
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int /*CmpMask*/,
int CmpValue, const MachineRegisterInfo *MRI) const {
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
@@ -454,9 +454,9 @@ bool LanaiInstrInfo::analyzeSelect(const MachineInstr &MI,
// Identify instructions that can be folded into a SELECT instruction, and
// return the defining instruction.
-static MachineInstr *canFoldIntoSelect(unsigned Reg,
+static MachineInstr *canFoldIntoSelect(Register Reg,
const MachineRegisterInfo &MRI) {
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
return nullptr;
@@ -795,10 +795,10 @@ bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
return true;
}
-bool LanaiInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
- const MachineOperand *&BaseOp,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const {
+bool LanaiInstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
switch (LdSt.getOpcode()) {
default:
return false;
@@ -811,7 +811,11 @@ bool LanaiInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
case Lanai::STH_RI:
case Lanai::LDBs_RI:
case Lanai::LDBz_RI:
- unsigned Width;
- return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
+ const MachineOperand *BaseOp;
+ OffsetIsScalable = false;
+ if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
+ return false;
+ BaseOps.push_back(BaseOp);
+ return true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.h
index c7741dd7437f..44c1e629a8e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -54,23 +54,24 @@ public:
void
storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Position,
- unsigned SourceRegister, bool IsKill, int FrameIndex,
+ Register SourceRegister, bool IsKill, int FrameIndex,
const TargetRegisterClass *RegisterClass,
const TargetRegisterInfo *RegisterInfo) const override;
void
loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Position,
- unsigned DestinationRegister, int FrameIndex,
+ Register DestinationRegister, int FrameIndex,
const TargetRegisterClass *RegisterClass,
const TargetRegisterInfo *RegisterInfo) const override;
bool expandPostRAPseudo(MachineInstr &MI) const override;
- bool getMemOperandWithOffset(const MachineInstr &LdSt,
- const MachineOperand *&BaseOp,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt,
+ SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
const MachineOperand *&BaseOp,
@@ -94,15 +95,15 @@ public:
// For a comparison instruction, return the source registers in SrcReg and
// SrcReg2 if having two register operands, and the value it compares against
// in CmpValue. Return true if the comparison instruction can be analyzed.
- bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
+ bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &CmpMask,
int &CmpValue) const override;
// See if the comparison instruction can be converted into something more
// efficient. E.g., on Lanai register-register instructions can set the flag
// register, obviating the need for a separate compare.
- bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int CmpMask, int CmpValue,
+ bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const override;
// Analyze the given select instruction, returning true if it cannot be
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMCInstLower.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMCInstLower.h
index 00d3ebb05045..6323319fae43 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMCInstLower.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMCInstLower.h
@@ -18,9 +18,7 @@ class MCInst;
class MCOperand;
class MCSymbol;
class MachineInstr;
-class MachineModuleInfoMachO;
class MachineOperand;
-class Mangler;
// LanaiMCInstLower - This class is used to lower an MachineInstr
// into an MCInst.
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
index 7b4e0750ba08..eeef1d919925 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
@@ -11,12 +11,3 @@
using namespace llvm;
void LanaiMachineFunctionInfo::anchor() {}
-
-unsigned LanaiMachineFunctionInfo::getGlobalBaseReg() {
- // Return if it has already been initialized.
- if (GlobalBaseReg)
- return GlobalBaseReg;
-
- return GlobalBaseReg =
- MF.getRegInfo().createVirtualRegister(&Lanai::GPRRegClass);
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h
index 2c97c619c246..de712637b5a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h
@@ -24,29 +24,25 @@ namespace llvm {
class LanaiMachineFunctionInfo : public MachineFunctionInfo {
virtual void anchor();
- MachineFunction &MF;
-
// SRetReturnReg - Lanai ABI require that sret lowering includes
// returning the value of the returned struct in a register. This field
// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg;
+ Register SRetReturnReg;
// GlobalBaseReg - keeps track of the virtual register initialized for
// use as the global base register. This is used for PIC in some PIC
// relocation models.
- unsigned GlobalBaseReg;
+ Register GlobalBaseReg;
// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex;
public:
explicit LanaiMachineFunctionInfo(MachineFunction &MF)
- : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0) {}
-
- unsigned getSRetReturnReg() const { return SRetReturnReg; }
- void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+ : VarArgsFrameIndex(0) {}
- unsigned getGlobalBaseReg();
+ Register getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(Register Reg) { SRetReturnReg = Reg; }
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
index 7c28debb94dd..64f87ae5f963 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -66,11 +66,6 @@ bool LanaiRegisterInfo::requiresRegisterScavenging(
return true;
}
-bool LanaiRegisterInfo::trackLivenessAfterRegAlloc(
- const MachineFunction & /*MF*/) const {
- return true;
-}
-
static bool isALUArithLoOpcode(unsigned Opcode) {
switch (Opcode) {
case Lanai::ADD_I_LO:
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
index 4e4da619d366..cbc95b273e1d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
@@ -34,8 +34,6 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
-
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp
index dff87a3e264d..307619c04481 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp
@@ -20,7 +20,7 @@ namespace llvm {
SDValue LanaiSelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG & /*DAG*/, const SDLoc & /*dl*/, SDValue /*Chain*/,
- SDValue /*Dst*/, SDValue /*Src*/, SDValue Size, unsigned /*Align*/,
+ SDValue /*Dst*/, SDValue /*Src*/, SDValue Size, Align /*Alignment*/,
bool /*isVolatile*/, bool /*AlwaysInline*/,
MachinePointerInfo /*DstPtrInfo*/,
MachinePointerInfo /*SrcPtrInfo*/) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.h
index c5650a7c1f53..8355168a7396 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.h
@@ -24,8 +24,8 @@ public:
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
- bool AlwaysInline,
+ SDValue Size, Align Alignment,
+ bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
index 9a872c789bcc..ebf91e08fbc8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
@@ -23,7 +23,7 @@
using namespace llvm;
void LanaiSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
- std::string CPUName = CPU;
+ std::string CPUName = std::string(CPU);
if (CPUName.empty())
CPUName = "generic";
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h
index d2ac40007e24..fb2bc0644fe8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetMachine.h
@@ -22,7 +22,6 @@
#include "llvm/Target/TargetMachine.h"
namespace llvm {
-class formatted_raw_ostream;
class LanaiTargetMachine : public LLVMTargetMachine {
LanaiSubtarget Subtarget;
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp
index b0f7c090bb8e..a421f3156153 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp
@@ -28,7 +28,6 @@ static cl::opt<unsigned> SSThreshold(
void LanaiTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
SmallDataSection = getContext().getELFSection(
".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
@@ -117,13 +116,13 @@ bool LanaiTargetObjectFile::isConstantInSmallSection(const DataLayout &DL,
return isInSmallSection(DL.getTypeAllocSize(CN->getType()));
}
-MCSection *LanaiTargetObjectFile::getSectionForConstant(const DataLayout &DL,
- SectionKind Kind,
- const Constant *C,
- unsigned &Align) const {
+MCSection *LanaiTargetObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ Align &Alignment) const {
if (isConstantInSmallSection(DL, C))
return SmallDataSection;
// Otherwise, we work the same as ELF.
- return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align);
+ return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C,
+ Alignment);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.h
index 938a1e675b6a..404349465dbc 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetObjectFile.h
@@ -12,7 +12,6 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
namespace llvm {
-class LanaiTargetMachine;
class LanaiTargetObjectFile : public TargetLoweringObjectFileELF {
MCSection *SmallDataSection;
MCSection *SmallBSSSection;
@@ -38,7 +37,7 @@ public:
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
- unsigned &Align) const override;
+ Align &Alignment) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
index a22d3a34f98c..7366d5059c9f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
@@ -49,7 +49,7 @@ public:
return TTI::PSK_Software;
}
- int getIntImmCost(const APInt &Imm, Type *Ty) {
+ int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
if (Imm == 0)
return TTI::TCC_Free;
@@ -66,17 +66,19 @@ public:
return 4 * TTI::TCC_Basic;
}
- int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) {
- return getIntImmCost(Imm, Ty);
+ int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
+ return getIntImmCost(Imm, Ty, CostKind);
}
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) {
- return getIntImmCost(Imm, Ty);
+ Type *Ty, TTI::TargetCostKind CostKind) {
+ return getIntImmCost(Imm, Ty, CostKind);
}
unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -87,7 +89,8 @@ public:
switch (ISD) {
default:
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info,
Opd1PropInfo, Opd2PropInfo);
case ISD::MUL:
case ISD::SDIV:
@@ -98,7 +101,8 @@ public:
// instruction cost was arbitrarily chosen to reduce the desirability
// of emitting arithmetic instructions that are emulated in software.
// TODO: Investigate the performance impact given specialized lowerings.
- return 64 * BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ return 64 * BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info,
Opd1PropInfo, Opd2PropInfo);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index a6ce3d5eb4ff..0fb27a926003 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -74,10 +74,6 @@ public:
return false;
}
- void relaxInstruction(const MCInst & /*Inst*/,
- const MCSubtargetInfo & /*STI*/,
- MCInst & /*Res*/) const override {}
-
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
index ccc413995917..7027d18126bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
@@ -141,7 +141,7 @@ void LanaiInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annotation,
const MCSubtargetInfo & /*STI*/,
raw_ostream &OS) {
- if (!printAlias(MI, OS) && !printAliasInstr(MI, OS))
+ if (!printAlias(MI, OS) && !printAliasInstr(MI, Address, OS))
printInstruction(MI, Address, OS);
printAnnotation(OS, Annotation);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
index a71a9497c691..ce6df2969d73 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
@@ -44,9 +44,10 @@ public:
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
- bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
+ raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
index f1c174897047..d8c7bd15aacb 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
@@ -28,9 +28,6 @@ LanaiMCAsmInfo::LanaiMCAsmInfo(const Triple & /*TheTriple*/,
// Lanai assembly requires ".section" before ".bss"
UsesELFSectionDirectiveForBSS = true;
- // Use the integrated assembler instead of system one.
- UseIntegratedAssembler = true;
-
// Use '!' as comment string to correspond with old toolchain.
CommentString = "!";
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index 9de15bf61c8c..2ff893273c92 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -52,7 +52,7 @@ static MCRegisterInfo *createLanaiMCRegisterInfo(const Triple & /*TT*/) {
static MCSubtargetInfo *
createLanaiMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- std::string CPUName = CPU;
+ std::string CPUName = std::string(CPU);
if (CPUName.empty())
CPUName = "generic";
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
index cf66d3226659..651ed36cdc24 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
@@ -22,14 +22,9 @@ class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
-class MCInstrAnalysis;
class MCObjectTargetWriter;
-class MCRelocationInfo;
class MCSubtargetInfo;
class Target;
-class Triple;
-class StringRef;
-class raw_pwrite_stream;
MCCodeEmitter *createLanaiMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index 0995e80a0a09..9529b5e802d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -46,6 +46,8 @@ class MSP430AsmParser : public MCTargetAsmParser {
bool MatchingInlineAsm) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -154,12 +156,12 @@ public:
addExprOperand(Inst, Mem.Offset);
}
- bool isReg() const { return Kind == k_Reg; }
- bool isImm() const { return Kind == k_Imm; }
- bool isToken() const { return Kind == k_Tok; }
- bool isMem() const { return Kind == k_Mem; }
- bool isIndReg() const { return Kind == k_IndReg; }
- bool isPostIndReg() const { return Kind == k_PostIndReg; }
+ bool isReg() const override { return Kind == k_Reg; }
+ bool isImm() const override { return Kind == k_Imm; }
+ bool isToken() const override { return Kind == k_Tok; }
+ bool isMem() const override { return Kind == k_Mem; }
+ bool isIndReg() const { return Kind == k_IndReg; }
+ bool isPostIndReg() const { return Kind == k_PostIndReg; }
bool isCGImm() const {
if (Kind != k_Imm)
@@ -180,7 +182,7 @@ public:
return Tok;
}
- unsigned getReg() const {
+ unsigned getReg() const override {
assert(Kind == k_Reg && "Invalid access!");
return Reg;
}
@@ -220,10 +222,10 @@ public:
return std::make_unique<MSP430Operand>(k_PostIndReg, RegNum, S, E);
}
- SMLoc getStartLoc() const { return Start; }
- SMLoc getEndLoc() const { return End; }
+ SMLoc getStartLoc() const override { return Start; }
+ SMLoc getEndLoc() const override { return End; }
- virtual void print(raw_ostream &O) const {
+ void print(raw_ostream &O) const override {
switch (Kind) {
case k_Tok:
O << "Token " << Tok;
@@ -261,7 +263,7 @@ bool MSP430AsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
switch (MatchResult) {
case Match_Success:
Inst.setLoc(Loc);
- Out.EmitInstruction(Inst, STI);
+ Out.emitInstruction(Inst, STI);
return false;
case Match_MnemonicFail:
return Error(Loc, "invalid instruction mnemonic");
@@ -288,13 +290,28 @@ static unsigned MatchRegisterAltName(StringRef Name);
bool MSP430AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
+ switch (tryParseRegister(RegNo, StartLoc, EndLoc)) {
+ case MatchOperand_ParseFail:
+ return Error(StartLoc, "invalid register name");
+ case MatchOperand_Success:
+ return false;
+ case MatchOperand_NoMatch:
+ return true;
+ }
+
+ llvm_unreachable("unknown match result type");
+}
+
+OperandMatchResultTy MSP430AsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
if (getLexer().getKind() == AsmToken::Identifier) {
auto Name = getLexer().getTok().getIdentifier().lower();
RegNo = MatchRegisterName(Name);
if (RegNo == MSP430::NoRegister) {
RegNo = MatchRegisterAltName(Name);
if (RegNo == MSP430::NoRegister)
- return true;
+ return MatchOperand_NoMatch;
}
AsmToken const &T = getParser().getTok();
@@ -302,10 +319,10 @@ bool MSP430AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
EndLoc = T.getEndLoc();
getLexer().Lex(); // eat register token
- return false;
+ return MatchOperand_Success;
}
- return Error(StartLoc, "invalid register name");
+ return MatchOperand_ParseFail;
}
bool MSP430AsmParser::parseJccInstruction(ParseInstructionInfo &Info,
@@ -414,7 +431,7 @@ bool MSP430AsmParser::ParseDirectiveRefSym(AsmToken DirectiveID) {
return TokError("expected identifier in directive");
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
- getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
+ getStreamer().emitSymbolAttribute(Sym, MCSA_Global);
return false;
}
@@ -523,7 +540,7 @@ bool MSP430AsmParser::ParseLiteralValues(unsigned Size, SMLoc L) {
const MCExpr *Value;
if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size, L);
+ getParser().getStreamer().emitValue(Value, Size, L);
return false;
};
return (parseMany(parseOne));
@@ -545,7 +562,7 @@ static unsigned convertGR16ToGR8(unsigned Reg) {
case MSP430::SP: return MSP430::SPB;
case MSP430::SR: return MSP430::SRB;
case MSP430::CG: return MSP430::CGB;
- case MSP430::FP: return MSP430::FPB;
+ case MSP430::R4: return MSP430::R4B;
case MSP430::R5: return MSP430::R5B;
case MSP430::R6: return MSP430::R6B;
case MSP430::R7: return MSP430::R7B;
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
index 6aa76156bf14..d2902189ec40 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
@@ -65,7 +65,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430Disassembler() {
static const unsigned GR8DecoderTable[] = {
MSP430::PCB, MSP430::SPB, MSP430::SRB, MSP430::CGB,
- MSP430::FPB, MSP430::R5B, MSP430::R6B, MSP430::R7B,
+ MSP430::R4B, MSP430::R5B, MSP430::R6B, MSP430::R7B,
MSP430::R8B, MSP430::R9B, MSP430::R10B, MSP430::R11B,
MSP430::R12B, MSP430::R13B, MSP430::R14B, MSP430::R15B
};
@@ -83,7 +83,7 @@ static DecodeStatus DecodeGR8RegisterClass(MCInst &MI, uint64_t RegNo,
static const unsigned GR16DecoderTable[] = {
MSP430::PC, MSP430::SP, MSP430::SR, MSP430::CG,
- MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R4, MSP430::R5, MSP430::R6, MSP430::R7,
MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15
};
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index 365e5da74de0..958212dc77c9 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -95,9 +95,6 @@ public:
return false;
}
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {}
-
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
index 4e054f85ccc3..87ee312424c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
@@ -43,26 +43,26 @@ MSP430TargetELFStreamer::MSP430TargetELFStreamer(MCStreamer &S,
Streamer.SwitchSection(AttributeSection);
// Format version.
- Streamer.EmitIntValue(0x41, 1);
+ Streamer.emitInt8(0x41);
// Subsection length.
- Streamer.EmitIntValue(22, 4);
+ Streamer.emitInt32(22);
// Vendor name string, zero-terminated.
- Streamer.EmitBytes("mspabi");
- Streamer.EmitIntValue(0, 1);
+ Streamer.emitBytes("mspabi");
+ Streamer.emitInt8(0);
// Attribute vector scope tag. 1 stands for the entire file.
- Streamer.EmitIntValue(1, 1);
+ Streamer.emitInt8(1);
// Attribute vector length.
- Streamer.EmitIntValue(11, 4);
+ Streamer.emitInt32(11);
// OFBA_MSPABI_Tag_ISA(4) = 1, MSP430
- Streamer.EmitIntValue(4, 1);
- Streamer.EmitIntValue(1, 1);
+ Streamer.emitInt8(4);
+ Streamer.emitInt8(1);
// OFBA_MSPABI_Tag_Code_Model(6) = 1, Small
- Streamer.EmitIntValue(6, 1);
- Streamer.EmitIntValue(1, 1);
+ Streamer.emitInt8(6);
+ Streamer.emitInt8(1);
// OFBA_MSPABI_Tag_Data_Model(8) = 1, Small
- Streamer.EmitIntValue(8, 1);
- Streamer.EmitIntValue(1, 1);
+ Streamer.emitInt8(8);
+ Streamer.emitInt8(1);
}
MCELFStreamer &MSP430TargetELFStreamer::getStreamer() {
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
index 0c6da5a35c68..420893f65d5b 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
void MSP430InstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot, const MCSubtargetInfo &STI,
raw_ostream &O) {
- if (!printAliasInstr(MI, O))
+ if (!printAliasInstr(MI, Address, O))
printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
index 200dc0e6db60..6a6b07f2eba0 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
@@ -27,9 +27,10 @@ namespace llvm {
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
- bool printAliasInstr(const MCInst *MI, raw_ostream &O);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &O);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
+ raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
private:
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index cfdc44ada771..de07b47096d3 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -24,5 +24,6 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT,
AlignmentIsInBytes = false;
UsesELFSectionDirectiveForBSS = true;
- UseIntegratedAssembler = true;
+
+ SupportsDebugInformation = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430.h
index 67f35b8034d9..34f0a37bced9 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430.h
@@ -36,7 +36,6 @@ namespace MSP430CC {
namespace llvm {
class MSP430TargetMachine;
class FunctionPass;
- class formatted_raw_ostream;
FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM,
CodeGenOpt::Level OptLevel);
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 2f871b959a71..459188434f2c 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -57,7 +57,7 @@ namespace {
const char *ExtraCode, raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &O) override;
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
void EmitInterruptVectorSection(MachineFunction &ISR);
};
@@ -148,7 +148,7 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
//===----------------------------------------------------------------------===//
-void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void MSP430AsmPrinter::emitInstruction(const MachineInstr *MI) {
MSP430MCInstLower MCInstLowering(OutContext, *this);
MCInst TmpInst;
@@ -169,7 +169,7 @@ void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) {
OutStreamer->SwitchSection(IV);
const MCSymbol *FunctionSymbol = getSymbol(F);
- OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+ OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
OutStreamer->SwitchSection(Cur);
}
@@ -180,7 +180,7 @@ bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
SetupMachineFunction(MF);
- EmitFunctionBody();
+ emitFunctionBody();
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
index de60ad9bd7e6..4be8d0760e68 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -64,16 +64,16 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
// Save FP into the appropriate stack slot...
BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
- .addReg(MSP430::FP, RegState::Kill);
+ .addReg(MSP430::R4, RegState::Kill);
// Update FP with the new base value...
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FP)
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::R4)
.addReg(MSP430::SP);
// Mark the FramePtr as live-in in every block except the entry.
for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
I != E; ++I)
- I->addLiveIn(MSP430::FP);
+ I->addLiveIn(MSP430::R4);
} else
NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
@@ -132,7 +132,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
NumBytes = FrameSize - CSSize;
// pop FP.
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FP);
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::R4);
} else
NumBytes = StackSize - CSSize;
@@ -154,7 +154,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
if (MFI.hasVarSizedObjects()) {
BuildMI(MBB, MBBI, DL,
- TII.get(MSP430::MOV16rr), MSP430::SP).addReg(MSP430::FP);
+ TII.get(MSP430::MOV16rr), MSP430::SP).addReg(MSP430::R4);
if (CSSize) {
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
@@ -176,11 +176,9 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
}
// FIXME: Can we eleminate these in favour of generic code?
-bool
-MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool MSP430FrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -202,11 +200,9 @@ MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-bool
-MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool MSP430FrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -227,8 +223,6 @@ MachineBasicBlock::iterator MSP430FrameLowering::eliminateCallFramePseudoInstr(
MachineBasicBlock::iterator I) const {
const MSP430InstrInfo &TII =
*static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo());
- unsigned StackAlign = getStackAlignment();
-
if (!hasReservedCallFrame(MF)) {
// If the stack pointer can be changed after prologue, turn the
// adjcallstackup instruction into a 'sub SP, <amt>' and the
@@ -240,7 +234,7 @@ MachineBasicBlock::iterator MSP430FrameLowering::eliminateCallFramePseudoInstr(
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+ Amount = alignTo(Amount, getStackAlign());
MachineInstr *New = nullptr;
if (Old.getOpcode() == TII.getCallFrameSetupOpcode()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.h
index 70e284053021..f6995edf4b0a 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.h
@@ -36,12 +36,13 @@ public:
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 8550230155c8..7dabb9b4abae 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -50,7 +50,7 @@ namespace {
const BlockAddress *BlockAddr = nullptr;
const char *ES = nullptr;
int JT = -1;
- unsigned Align = 0; // CP alignment.
+ Align Alignment; // CP alignment.
MSP430ISelAddressMode() = default;
@@ -74,12 +74,12 @@ namespace {
} else if (CP) {
errs() << " CP ";
CP->dump();
- errs() << " Align" << Align << '\n';
+ errs() << " Align" << Alignment.value() << '\n';
} else if (ES) {
errs() << "ES ";
errs() << ES << '\n';
} else if (JT != -1)
- errs() << " JT" << JT << " Align" << Align << '\n';
+ errs() << " JT" << JT << " Align" << Alignment.value() << '\n';
}
#endif
};
@@ -146,7 +146,7 @@ bool MSP430DAGToDAGISel::MatchWrapper(SDValue N, MSP430ISelAddressMode &AM) {
//AM.SymbolFlags = G->getTargetFlags();
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
AM.CP = CP->getConstVal();
- AM.Align = CP->getAlignment();
+ AM.Alignment = CP->getAlign();
AM.Disp += CP->getOffset();
//AM.SymbolFlags = CP->getTargetFlags();
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
@@ -263,8 +263,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
MVT::i16, AM.Disp,
0/*AM.SymbolFlags*/);
else if (AM.CP)
- Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16,
- AM.Align, AM.Disp, 0/*AM.SymbolFlags*/);
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16, AM.Alignment, AM.Disp,
+ 0 /*AM.SymbolFlags*/);
else if (AM.ES)
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i16, 0/*AM.SymbolFlags*/);
else if (AM.JT != -1)
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index 37e6ea24d088..821339f50355 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -514,7 +514,7 @@ static void AnalyzeArguments(CCState &State,
// Handle byval arguments
if (ArgFlags.isByVal()) {
- State.HandleByVal(ValNo++, ArgVT, LocVT, LocInfo, 2, 2, ArgFlags);
+ State.HandleByVal(ValNo++, ArgVT, LocVT, LocInfo, 2, Align(2), ArgFlags);
continue;
}
@@ -863,13 +863,11 @@ SDValue MSP430TargetLowering::LowerCCCCallTo(
if (Flags.isByVal()) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i16);
- MemOp = DAG.getMemcpy(Chain, dl, PtrOff, Arg, SizeNode,
- Flags.getByValAlign(),
- /*isVolatile*/false,
- /*AlwaysInline=*/true,
- /*isTailCall=*/false,
- MachinePointerInfo(),
- MachinePointerInfo());
+ MemOp = DAG.getMemcpy(
+ Chain, dl, PtrOff, Arg, SizeNode, Flags.getNonZeroByValAlign(),
+ /*isVolatile*/ false,
+ /*AlwaysInline=*/true,
+ /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo());
} else {
MemOp = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
}
@@ -1302,7 +1300,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
- MSP430::FP, VT);
+ MSP430::R4, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo());
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.h
index 650f9a704062..f23042a369fd 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -79,6 +79,10 @@ namespace llvm {
return MVT::i8;
}
+ MVT::SimpleValueType getCmpLibcallReturnType() const override {
+ return MVT::i16;
+ }
+
/// LowerOperation - Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index 9e03334d6b62..130211878be1 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -35,7 +35,7 @@ MSP430InstrInfo::MSP430InstrInfo(MSP430Subtarget &STI)
void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIdx,
+ Register SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
@@ -46,7 +46,7 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdx),
MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MFI.getObjectAlign(FrameIdx));
if (RC == &MSP430::GR16RegClass)
BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
@@ -62,7 +62,7 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
+ Register DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const{
DebugLoc DL;
@@ -73,7 +73,7 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdx),
MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MFI.getObjectAlign(FrameIdx));
if (RC == &MSP430::GR16RegClass)
BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
@@ -160,18 +160,6 @@ reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
-bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
- if (!MI.isTerminator())
- return false;
-
- // Conditional branch is a special case.
- if (MI.isBranch() && !MI.isBarrier())
- return true;
- if (!MI.isPredicable())
- return true;
- return !isPredicated(MI);
-}
-
bool MSP430InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.h
index e3838772c061..710913b2d36f 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -41,13 +41,13 @@ public:
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill,
+ Register SrcReg, bool isKill,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
+ Register DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
@@ -56,7 +56,6 @@ public:
// Branch folding goodness
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
- bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MCInstLower.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MCInstLower.h
index 910ad4bb12d5..4d0197d9e2b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MCInstLower.h
@@ -18,7 +18,6 @@ namespace llvm {
class MCOperand;
class MCSymbol;
class MachineInstr;
- class MachineModuleInfoMachO;
class MachineOperand;
/// MSP430MCInstLower - This class is used to lower an MachineInstr
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 712519cfe38a..261db9e288f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -35,7 +35,7 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
/// SRetReturnReg - Some subtargets require that sret lowering includes
/// returning the value of the returned struct in a register. This field
/// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg = 0;
+ Register SRetReturnReg;
public:
MSP430MachineFunctionInfo() = default;
@@ -46,8 +46,8 @@ public:
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
- unsigned getSRetReturnReg() const { return SRetReturnReg; }
- void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+ Register getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(Register Reg) { SRetReturnReg = Reg; }
int getRAIndex() const { return ReturnAddrIndex; }
void setRAIndex(int Index) { ReturnAddrIndex = Index; }
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
index bec357a1548d..5583ebee2f31 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -39,7 +39,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const MSP430FrameLowering *TFI = getFrameLowering(*MF);
const Function* F = &MF->getFunction();
static const MCPhysReg CalleeSavedRegs[] = {
- MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R4, MSP430::R5, MSP430::R6, MSP430::R7,
MSP430::R8, MSP430::R9, MSP430::R10,
0
};
@@ -49,7 +49,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
0
};
static const MCPhysReg CalleeSavedRegsIntr[] = {
- MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R4, MSP430::R5, MSP430::R6, MSP430::R7,
MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15,
0
@@ -86,8 +86,8 @@ BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Mark frame pointer as reserved if needed.
if (TFI->hasFP(MF)) {
- Reserved.set(MSP430::FPB);
- Reserved.set(MSP430::FP);
+ Reserved.set(MSP430::R4B);
+ Reserved.set(MSP430::R4);
}
return Reserved;
@@ -112,7 +112,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
DebugLoc dl = MI.getDebugLoc();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FP : MSP430::SP);
+ unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::R4 : MSP430::SP);
int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex);
// Skip the saved PC
@@ -156,5 +156,5 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const MSP430FrameLowering *TFI = getFrameLowering(MF);
- return TFI->hasFP(MF) ? MSP430::FP : MSP430::SP;
+ return TFI->hasFP(MF) ? MSP430::R4 : MSP430::SP;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
index 11003dba383f..61cc72d494b5 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -15,6 +15,7 @@ class MSP430Reg<bits<4> num, string n, list<string> alt = []> : Register<n> {
let Namespace = "MSP430";
let HWEncoding{3-0} = num;
let AltNames = alt;
+ let DwarfNumbers = [num];
}
class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs,
@@ -24,6 +25,7 @@ class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs,
let Namespace = "MSP430";
let HWEncoding{3-0} = num;
let AltNames = alt;
+ let DwarfNumbers = [num];
}
//===----------------------------------------------------------------------===//
@@ -34,7 +36,7 @@ def PCB : MSP430Reg<0, "r0", ["pc"]>;
def SPB : MSP430Reg<1, "r1", ["sp"]>;
def SRB : MSP430Reg<2, "r2", ["sr"]>;
def CGB : MSP430Reg<3, "r3", ["cg"]>;
-def FPB : MSP430Reg<4, "r4", ["fp"]>;
+def R4B : MSP430Reg<4, "r4", ["fp"]>;
def R5B : MSP430Reg<5, "r5">;
def R6B : MSP430Reg<6, "r6">;
def R7B : MSP430Reg<7, "r7">;
@@ -54,7 +56,7 @@ def PC : MSP430RegWithSubregs<0, "r0", [PCB], ["pc"]>;
def SP : MSP430RegWithSubregs<1, "r1", [SPB], ["sp"]>;
def SR : MSP430RegWithSubregs<2, "r2", [SRB], ["sr"]>;
def CG : MSP430RegWithSubregs<3, "r3", [CGB], ["cg"]>;
-def FP : MSP430RegWithSubregs<4, "r4", [FPB], ["fp"]>;
+def R4 : MSP430RegWithSubregs<4, "r4", [R4B], ["fp"]>;
def R5 : MSP430RegWithSubregs<5, "r5", [R5B]>;
def R6 : MSP430RegWithSubregs<6, "r6", [R6B]>;
def R7 : MSP430RegWithSubregs<7, "r7", [R7B]>;
@@ -72,7 +74,7 @@ def GR8 : RegisterClass<"MSP430", [i8], 8,
// Volatile registers
(add R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
// Frame pointer, sometimes allocable
- FPB,
+ R4B,
// Volatile, but not allocable
PCB, SPB, SRB, CGB)>;
@@ -80,6 +82,6 @@ def GR16 : RegisterClass<"MSP430", [i16], 16,
// Volatile registers
(add R12, R13, R14, R15, R11, R10, R9, R8, R7, R6, R5,
// Frame pointer, sometimes allocable
- FP,
+ R4,
// Volatile, but not allocable
PC, SP, SR, CG)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
index 20168773cd53..1f3c1d34f76f 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -43,7 +43,7 @@ MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
ExtendedInsts = false;
HWMultMode = NoHWMult;
- std::string CPUName = CPU;
+ StringRef CPUName = CPU;
if (CPUName.empty())
CPUName = "msp430";
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index 81851427c0ed..827f24daad16 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -47,7 +47,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT,
Options, getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(std::make_unique<TargetLoweringObjectFileELF>()),
- Subtarget(TT, CPU, FS, *this) {
+ Subtarget(TT, std::string(CPU), std::string(FS), *this) {
initAsmInfo();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index e467ed36938b..9dbbdeb34dba 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -179,6 +179,8 @@ class MipsAsmParser : public MCTargetAsmParser {
/// Parse a register as used in CFI directives
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool parseParenSuffix(StringRef Name, OperandVector &Operands);
@@ -296,6 +298,12 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandSgtImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
+ bool expandSle(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
+ bool expandSleImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
bool expandRotation(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out, const MCSubtargetInfo *STI);
bool expandRotationImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
@@ -332,6 +340,12 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
+ bool expandSne(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
+ bool expandSneI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
bool expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
@@ -347,6 +361,7 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetArchDirective();
bool parseSetFeature(uint64_t Feature);
bool isPicAndNotNxxAbi(); // Used by .cpload, .cprestore, and .cpsetup.
+ bool parseDirectiveCpAdd(SMLoc Loc);
bool parseDirectiveCpLoad(SMLoc Loc);
bool parseDirectiveCpLocal(SMLoc Loc);
bool parseDirectiveCpRestore(SMLoc Loc);
@@ -366,6 +381,7 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetMsaDirective();
bool parseSetNoMsaDirective();
bool parseSetNoDspDirective();
+ bool parseSetNoMips3DDirective();
bool parseSetReorderDirective();
bool parseSetNoReorderDirective();
bool parseSetMips16Directive();
@@ -2126,10 +2142,10 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
MCSymbolRefExpr::create(JalSym, MCSymbolRefExpr::VK_None,
getContext(), IDLoc);
- TOut.getStreamer().EmitRelocDirective(*TmpExpr,
- inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
+ TOut.getStreamer().emitRelocDirective(
+ *TmpExpr, inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
RelocJalrExpr, IDLoc, *STI);
- TOut.getStreamer().EmitLabel(TmpLabel);
+ TOut.getStreamer().emitLabel(TmpLabel);
}
Inst = JalrInst;
@@ -2311,7 +2327,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
tryExpandInstruction(Inst, IDLoc, Out, STI);
switch (ExpandResult) {
case MER_NotAMacro:
- Out.EmitInstruction(Inst, *STI);
+ Out.emitInstruction(Inst, *STI);
break;
case MER_Success:
break;
@@ -2512,6 +2528,14 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
case Mips::SGTImm64:
case Mips::SGTUImm64:
return expandSgtImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::SLE:
+ case Mips::SLEU:
+ return expandSle(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::SLEImm:
+ case Mips::SLEUImm:
+ case Mips::SLEImm64:
+ case Mips::SLEUImm64:
+ return expandSleImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::SLTImm64:
if (isInt<16>(Inst.getOperand(2).getImm())) {
Inst.setOpcode(Mips::SLTi64);
@@ -2588,6 +2612,10 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return expandSeq(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::SEQIMacro:
return expandSeqI(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::SNEMacro:
+ return expandSne(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::SNEIMacro:
+ return expandSneI(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::MFTC0: case Mips::MTTC0:
case Mips::MFTGPR: case Mips::MTTGPR:
case Mips::MFTLO: case Mips::MTTLO:
@@ -2638,7 +2666,7 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
const MCOperand SecondRegOp = Inst.getOperand(1);
JalrInst.addOperand(SecondRegOp);
}
- Out.EmitInstruction(JalrInst, *STI);
+ Out.emitInstruction(JalrInst, *STI);
// If .set reorder is active and branch instruction has a delay slot,
// emit a NOP after it.
@@ -3386,8 +3414,8 @@ bool MipsAsmParser::expandLoadSingleImmToFPR(MCInst &Inst, SMLoc IDLoc,
MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
getStreamer().SwitchSection(ReadOnlySection);
- getStreamer().EmitLabel(Sym, IDLoc);
- getStreamer().EmitIntValue(ImmOp32, 4);
+ getStreamer().emitLabel(Sym, IDLoc);
+ getStreamer().emitInt32(ImmOp32);
getStreamer().SwitchSection(CS);
if (emitPartialAddress(TOut, IDLoc, Sym))
@@ -3438,9 +3466,9 @@ bool MipsAsmParser::expandLoadDoubleImmToGPR(MCInst &Inst, SMLoc IDLoc,
MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
getStreamer().SwitchSection(ReadOnlySection);
- getStreamer().EmitLabel(Sym, IDLoc);
- getStreamer().EmitValueToAlignment(8);
- getStreamer().EmitIntValue(ImmOp64, 8);
+ getStreamer().emitLabel(Sym, IDLoc);
+ getStreamer().emitValueToAlignment(8);
+ getStreamer().emitIntValue(ImmOp64, 8);
getStreamer().SwitchSection(CS);
unsigned TmpReg = getATReg(IDLoc);
@@ -3521,9 +3549,9 @@ bool MipsAsmParser::expandLoadDoubleImmToFPR(MCInst &Inst, bool Is64FPU,
MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
getStreamer().SwitchSection(ReadOnlySection);
- getStreamer().EmitLabel(Sym, IDLoc);
- getStreamer().EmitValueToAlignment(8);
- getStreamer().EmitIntValue(ImmOp64, 8);
+ getStreamer().emitLabel(Sym, IDLoc);
+ getStreamer().emitValueToAlignment(8);
+ getStreamer().emitIntValue(ImmOp64, 8);
getStreamer().SwitchSection(CS);
if (emitPartialAddress(TOut, IDLoc, Sym))
@@ -3569,7 +3597,7 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc,
Inst.addOperand(MCOperand::createImm(Offset.getImm()));
}
}
- Out.EmitInstruction(Inst, *STI);
+ Out.emitInstruction(Inst, *STI);
// If .set reorder is active and branch instruction has a delay slot,
// emit a NOP after it.
@@ -3856,7 +3884,7 @@ bool MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
}
Inst.setOpcode(NewOpcode);
- Out.EmitInstruction(Inst, *STI);
+ Out.emitInstruction(Inst, *STI);
return false;
}
@@ -4258,7 +4286,7 @@ bool MipsAsmParser::expandDivRem(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
if (!Signed) {
if (!UseTraps)
- TOut.getStreamer().EmitLabel(BrTarget);
+ TOut.getStreamer().emitLabel(BrTarget);
TOut.emitR(isDiv ? Mips::MFLO : Mips::MFHI, RdReg, IDLoc, STI);
return false;
@@ -4269,7 +4297,7 @@ bool MipsAsmParser::expandDivRem(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return true;
if (!UseTraps)
- TOut.getStreamer().EmitLabel(BrTarget);
+ TOut.getStreamer().emitLabel(BrTarget);
TOut.emitRRI(Mips::ADDiu, ATReg, ZeroReg, -1, IDLoc, STI);
@@ -4297,7 +4325,7 @@ bool MipsAsmParser::expandDivRem(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
TOut.emitII(Mips::BREAK, 0x6, 0, IDLoc, STI);
}
- TOut.getStreamer().EmitLabel(BrTargetEnd);
+ TOut.getStreamer().emitLabel(BrTargetEnd);
TOut.emitR(isDiv ? Mips::MFLO : Mips::MFHI, RdReg, IDLoc, STI);
return false;
}
@@ -4636,6 +4664,88 @@ bool MipsAsmParser::expandSgtImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return false;
}
+bool MipsAsmParser::expandSle(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+
+ assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isReg() && "Invalid instruction operand.");
+
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ unsigned OpReg = Inst.getOperand(2).getReg();
+ unsigned OpCode;
+
+ warnIfNoMacro(IDLoc);
+
+ switch (Inst.getOpcode()) {
+ case Mips::SLE:
+ OpCode = Mips::SLT;
+ break;
+ case Mips::SLEU:
+ OpCode = Mips::SLTu;
+ break;
+ default:
+ llvm_unreachable("unexpected 'sge' opcode");
+ }
+
+ // $SrcReg <= $OpReg is equal to (not ($OpReg < $SrcReg))
+ TOut.emitRRR(OpCode, DstReg, OpReg, SrcReg, IDLoc, STI);
+ TOut.emitRRI(Mips::XORi, DstReg, DstReg, 1, IDLoc, STI);
+
+ return false;
+}
+
+bool MipsAsmParser::expandSleImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+
+ assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+ unsigned OpRegCode;
+
+ warnIfNoMacro(IDLoc);
+
+ switch (Inst.getOpcode()) {
+ case Mips::SLEImm:
+ case Mips::SLEImm64:
+ OpRegCode = Mips::SLT;
+ break;
+ case Mips::SLEUImm:
+ case Mips::SLEUImm64:
+ OpRegCode = Mips::SLTu;
+ break;
+ default:
+ llvm_unreachable("unexpected 'sge' opcode with immediate");
+ }
+
+ // $SrcReg <= Imm is equal to (not (Imm < $SrcReg))
+ unsigned ImmReg = DstReg;
+ if (DstReg == SrcReg) {
+ unsigned ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+ ImmReg = ATReg;
+ }
+
+ if (loadImmediate(ImmValue, ImmReg, Mips::NoRegister, isInt<32>(ImmValue),
+ false, IDLoc, Out, STI))
+ return true;
+
+ TOut.emitRRR(OpRegCode, DstReg, ImmReg, SrcReg, IDLoc, STI);
+ TOut.emitRRI(Mips::XORi, DstReg, DstReg, 1, IDLoc, STI);
+
+ return false;
+}
+
bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out,
const MCSubtargetInfo *STI) {
@@ -5099,7 +5209,7 @@ bool MipsAsmParser::expandMulO(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
TOut.emitNop(IDLoc, STI);
TOut.emitII(Mips::BREAK, 6, 0, IDLoc, STI);
- TOut.getStreamer().EmitLabel(BrTarget);
+ TOut.getStreamer().emitLabel(BrTarget);
}
TOut.emitR(Mips::MFLO, DstReg, IDLoc, STI);
@@ -5136,7 +5246,7 @@ bool MipsAsmParser::expandMulOU(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
TOut.emitNop(IDLoc, STI);
TOut.emitII(Mips::BREAK, 6, 0, IDLoc, STI);
- TOut.getStreamer().EmitLabel(BrTarget);
+ TOut.getStreamer().emitLabel(BrTarget);
}
return false;
@@ -5325,6 +5435,88 @@ bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return false;
}
+bool MipsAsmParser::expandSne(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+
+ MipsTargetStreamer &TOut = getTargetStreamer();
+
+ assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isReg() && "Invalid instruction operand.");
+
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ unsigned OpReg = Inst.getOperand(2).getReg();
+
+ warnIfNoMacro(IDLoc);
+
+ if (SrcReg != Mips::ZERO && OpReg != Mips::ZERO) {
+ TOut.emitRRR(Mips::XOR, DstReg, SrcReg, OpReg, IDLoc, STI);
+ TOut.emitRRR(Mips::SLTu, DstReg, Mips::ZERO, DstReg, IDLoc, STI);
+ return false;
+ }
+
+ unsigned Reg = SrcReg == Mips::ZERO ? OpReg : SrcReg;
+ TOut.emitRRR(Mips::SLTu, DstReg, Mips::ZERO, Reg, IDLoc, STI);
+ return false;
+}
+
+bool MipsAsmParser::expandSneI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+
+ assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+
+ warnIfNoMacro(IDLoc);
+
+ if (ImmValue == 0) {
+ TOut.emitRRR(Mips::SLTu, DstReg, Mips::ZERO, SrcReg, IDLoc, STI);
+ return false;
+ }
+
+ if (SrcReg == Mips::ZERO) {
+ Warning(IDLoc, "comparison is always true");
+ if (loadImmediate(1, DstReg, Mips::NoRegister, true, false, IDLoc, Out,
+ STI))
+ return true;
+ return false;
+ }
+
+ unsigned Opc;
+ if (ImmValue > -0x8000 && ImmValue < 0) {
+ ImmValue = -ImmValue;
+ Opc = isGP64bit() ? Mips::DADDiu : Mips::ADDiu;
+ } else {
+ Opc = Mips::XORi;
+ }
+
+ if (isUInt<16>(ImmValue)) {
+ TOut.emitRRI(Opc, DstReg, SrcReg, ImmValue, IDLoc, STI);
+ TOut.emitRRR(Mips::SLTu, DstReg, Mips::ZERO, DstReg, IDLoc, STI);
+ return false;
+ }
+
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, isInt<32>(ImmValue),
+ false, IDLoc, Out, STI))
+ return true;
+
+ TOut.emitRRR(Mips::XOR, DstReg, SrcReg, ATReg, IDLoc, STI);
+ TOut.emitRRR(Mips::SLTu, DstReg, Mips::ZERO, DstReg, IDLoc, STI);
+ return false;
+}
+
// Map the DSP accumulator and control register to the corresponding gpr
// operand. Unlike the other alias, the m(f|t)t(lo|hi|acx) instructions
// do not map the DSP registers contigously to gpr registers.
@@ -6202,6 +6394,12 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
+ return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
+}
+
+OperandMatchResultTy MipsAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
OperandMatchResultTy ResTy = parseAnyRegister(Operands);
if (ResTy == MatchOperand_Success) {
@@ -6219,11 +6417,12 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
RegNo = isGP64bit() ? Operand.getGPR64Reg() : Operand.getGPR32Reg();
}
- return (RegNo == (unsigned)-1);
+ return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch
+ : MatchOperand_Success;
}
assert(Operands.size() == 0);
- return (RegNo == (unsigned)-1);
+ return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch : MatchOperand_Success;
}
bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
@@ -6976,6 +7175,21 @@ bool MipsAsmParser::parseSetNoDspDirective() {
return false;
}
+bool MipsAsmParser::parseSetNoMips3DDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex(); // Eat "nomips3d".
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ clearFeatureBits(Mips::FeatureMips3D, "mips3d");
+ getTargetStreamer().emitDirectiveSetNoMips3D();
+ return false;
+}
+
bool MipsAsmParser::parseSetMips16Directive() {
MCAsmParser &Parser = getParser();
Parser.Lex(); // Eat "mips16".
@@ -7308,6 +7522,10 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
switch (Feature) {
default:
llvm_unreachable("Unimplemented feature");
+ case Mips::FeatureMips3D:
+ setFeatureBits(Mips::FeatureMips3D, "mips3d");
+ getTargetStreamer().emitDirectiveSetMips3D();
+ break;
case Mips::FeatureDSP:
setFeatureBits(Mips::FeatureDSP, "dsp");
getTargetStreamer().emitDirectiveSetDsp();
@@ -7415,6 +7633,31 @@ bool MipsAsmParser::isPicAndNotNxxAbi() {
return inPicMode() && !(isABI_N32() || isABI_N64());
}
+bool MipsAsmParser::parseDirectiveCpAdd(SMLoc Loc) {
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg;
+ OperandMatchResultTy ResTy = parseAnyRegister(Reg);
+ if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ reportParseError("expected register");
+ return false;
+ }
+
+ MipsOperand &RegOpnd = static_cast<MipsOperand &>(*Reg[0]);
+ if (!RegOpnd.isGPRAsmReg()) {
+ reportParseError(RegOpnd.getStartLoc(), "invalid register");
+ return false;
+ }
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+ getParser().Lex(); // Consume the EndOfStatement.
+
+ getTargetStreamer().emitDirectiveCpAdd(RegOpnd.getGPR32Reg());
+ return false;
+}
+
bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
if (AssemblerOptions.back()->isReorder())
Warning(Loc, ".cpload should be inside a noreorder section");
@@ -7723,6 +7966,10 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetFeature(Mips::FeatureDSPR2);
if (IdVal == "nodsp")
return parseSetNoDspDirective();
+ if (IdVal == "mips3d")
+ return parseSetFeature(Mips::FeatureMips3D);
+ if (IdVal == "nomips3d")
+ return parseSetNoMips3DDirective();
if (IdVal == "msa")
return parseSetMsaDirective();
if (IdVal == "nomsa")
@@ -7761,7 +8008,7 @@ bool MipsAsmParser::parseDirectiveGpWord() {
// method to evaluate the expression.
if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitGPRel32Value(Value);
+ getParser().getStreamer().emitGPRel32Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(getLexer().getLoc(),
@@ -7779,7 +8026,7 @@ bool MipsAsmParser::parseDirectiveGpDWord() {
// method to evaluate the expression.
if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitGPRel64Value(Value);
+ getParser().getStreamer().emitGPRel64Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(getLexer().getLoc(),
@@ -7797,7 +8044,7 @@ bool MipsAsmParser::parseDirectiveDtpRelWord() {
// method to evaluate the expression.
if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitDTPRel32Value(Value);
+ getParser().getStreamer().emitDTPRel32Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(getLexer().getLoc(),
@@ -7815,7 +8062,7 @@ bool MipsAsmParser::parseDirectiveDtpRelDWord() {
// method to evaluate the expression.
if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitDTPRel64Value(Value);
+ getParser().getStreamer().emitDTPRel64Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(getLexer().getLoc(),
@@ -7833,7 +8080,7 @@ bool MipsAsmParser::parseDirectiveTpRelWord() {
// method to evaluate the expression.
if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitTPRel32Value(Value);
+ getParser().getStreamer().emitTPRel32Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(getLexer().getLoc(),
@@ -7851,7 +8098,7 @@ bool MipsAsmParser::parseDirectiveTpRelDWord() {
// method to evaluate the expression.
if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitTPRel64Value(Value);
+ getParser().getStreamer().emitTPRel64Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(getLexer().getLoc(),
@@ -8323,6 +8570,10 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
MCAsmParser &Parser = getParser();
StringRef IDVal = DirectiveID.getString();
+ if (IDVal == ".cpadd") {
+ parseDirectiveCpAdd(DirectiveID.getLoc());
+ return false;
+ }
if (IDVal == ".cpload") {
parseDirectiveCpLoad(DirectiveID.getLoc());
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
index fca1149453c9..c5a1a3e6286e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -57,17 +57,17 @@ namespace llvm {
MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) {
// Write out a Elf_Internal_ABIFlags_v0 struct
- OS.EmitIntValue(ABIFlagsSection.getVersionValue(), 2); // version
- OS.EmitIntValue(ABIFlagsSection.getISALevelValue(), 1); // isa_level
- OS.EmitIntValue(ABIFlagsSection.getISARevisionValue(), 1); // isa_rev
- OS.EmitIntValue(ABIFlagsSection.getGPRSizeValue(), 1); // gpr_size
- OS.EmitIntValue(ABIFlagsSection.getCPR1SizeValue(), 1); // cpr1_size
- OS.EmitIntValue(ABIFlagsSection.getCPR2SizeValue(), 1); // cpr2_size
- OS.EmitIntValue(ABIFlagsSection.getFpABIValue(), 1); // fp_abi
- OS.EmitIntValue(ABIFlagsSection.getISAExtensionValue(), 4); // isa_ext
- OS.EmitIntValue(ABIFlagsSection.getASESetValue(), 4); // ases
- OS.EmitIntValue(ABIFlagsSection.getFlags1Value(), 4); // flags1
- OS.EmitIntValue(ABIFlagsSection.getFlags2Value(), 4); // flags2
+ OS.emitIntValue(ABIFlagsSection.getVersionValue(), 2); // version
+ OS.emitIntValue(ABIFlagsSection.getISALevelValue(), 1); // isa_level
+ OS.emitIntValue(ABIFlagsSection.getISARevisionValue(), 1); // isa_rev
+ OS.emitIntValue(ABIFlagsSection.getGPRSizeValue(), 1); // gpr_size
+ OS.emitIntValue(ABIFlagsSection.getCPR1SizeValue(), 1); // cpr1_size
+ OS.emitIntValue(ABIFlagsSection.getCPR2SizeValue(), 1); // cpr2_size
+ OS.emitIntValue(ABIFlagsSection.getFpABIValue(), 1); // fp_abi
+ OS.emitIntValue(ABIFlagsSection.getISAExtensionValue(), 4); // isa_ext
+ OS.emitIntValue(ABIFlagsSection.getASESetValue(), 4); // ases
+ OS.emitIntValue(ABIFlagsSection.getFlags1Value(), 4); // flags1
+ OS.emitIntValue(ABIFlagsSection.getFlags2Value(), 4); // flags2
return OS;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index 534e6573b63c..046cc686b311 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -18,7 +18,6 @@ namespace llvm {
template <typename T> class ArrayRef;
class MCTargetOptions;
class StringRef;
-class TargetRegisterClass;
class MipsABIInfo {
public:
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index cca75dfc45c2..1126b871cb11 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -22,9 +22,7 @@ namespace llvm {
class MCAssembler;
struct MCFixupKindInfo;
-class MCObjectWriter;
class MCRegisterInfo;
-class MCSymbolELF;
class Target;
class MipsAsmBackend : public MCAsmBackend {
@@ -74,17 +72,6 @@ public:
return false;
}
- /// RelaxInstruction - Relax the instruction in the given fragment
- /// to the next wider instruction.
- ///
- /// \param Inst - The instruction to relax, which may be the same
- /// as the output.
- /// \param [out] Res On return, the relaxed instruction.
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {}
-
- /// @}
-
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index cc3168790b98..9c317e3f8840 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -234,14 +234,15 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
case Mips::fixup_Mips_32:
case FK_Data_4:
return IsPCRel ? ELF::R_MIPS_PC32 : ELF::R_MIPS_32;
+ case Mips::fixup_Mips_64:
+ case FK_Data_8:
+ return IsPCRel
+ ? setRTypes(ELF::R_MIPS_PC32, ELF::R_MIPS_64, ELF::R_MIPS_NONE)
+ : (unsigned)ELF::R_MIPS_64;
}
if (IsPCRel) {
switch (Kind) {
- case FK_Data_8:
- Ctx.reportError(Fixup.getLoc(),
- "MIPS does not support 64-bit PC-relative relocations");
- return ELF::R_MIPS_NONE;
case Mips::fixup_Mips_Branch_PCRel:
case Mips::fixup_Mips_PC16:
return ELF::R_MIPS_PC16;
@@ -277,9 +278,6 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
}
switch (Kind) {
- case Mips::fixup_Mips_64:
- case FK_Data_8:
- return ELF::R_MIPS_64;
case FK_DTPRel_4:
return ELF::R_MIPS_TLS_DTPREL32;
case FK_DTPRel_8:
@@ -289,14 +287,9 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
case FK_TPRel_8:
return ELF::R_MIPS_TLS_TPREL64;
case FK_GPRel_4:
- if (is64Bit()) {
- unsigned Type = (unsigned)ELF::R_MIPS_NONE;
- Type = setRType((unsigned)ELF::R_MIPS_GPREL32, Type);
- Type = setRType2((unsigned)ELF::R_MIPS_64, Type);
- Type = setRType3((unsigned)ELF::R_MIPS_NONE, Type);
- return Type;
- }
- return ELF::R_MIPS_GPREL32;
+ return setRTypes(ELF::R_MIPS_GPREL32,
+ is64Bit() ? ELF::R_MIPS_64 : ELF::R_MIPS_NONE,
+ ELF::R_MIPS_NONE);
case Mips::fixup_Mips_GPREL16:
return ELF::R_MIPS_GPREL16;
case Mips::fixup_Mips_26:
@@ -329,34 +322,16 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_MIPS_GOT_OFST;
case Mips::fixup_Mips_GOT_DISP:
return ELF::R_MIPS_GOT_DISP;
- case Mips::fixup_Mips_GPOFF_HI: {
- unsigned Type = (unsigned)ELF::R_MIPS_NONE;
- Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type);
- Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
- Type = setRType3((unsigned)ELF::R_MIPS_HI16, Type);
- return Type;
- }
- case Mips::fixup_MICROMIPS_GPOFF_HI: {
- unsigned Type = (unsigned)ELF::R_MIPS_NONE;
- Type = setRType((unsigned)ELF::R_MICROMIPS_GPREL16, Type);
- Type = setRType2((unsigned)ELF::R_MICROMIPS_SUB, Type);
- Type = setRType3((unsigned)ELF::R_MICROMIPS_HI16, Type);
- return Type;
- }
- case Mips::fixup_Mips_GPOFF_LO: {
- unsigned Type = (unsigned)ELF::R_MIPS_NONE;
- Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type);
- Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
- Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type);
- return Type;
- }
- case Mips::fixup_MICROMIPS_GPOFF_LO: {
- unsigned Type = (unsigned)ELF::R_MIPS_NONE;
- Type = setRType((unsigned)ELF::R_MICROMIPS_GPREL16, Type);
- Type = setRType2((unsigned)ELF::R_MICROMIPS_SUB, Type);
- Type = setRType3((unsigned)ELF::R_MICROMIPS_LO16, Type);
- return Type;
- }
+ case Mips::fixup_Mips_GPOFF_HI:
+ return setRTypes(ELF::R_MIPS_GPREL16, ELF::R_MIPS_SUB, ELF::R_MIPS_HI16);
+ case Mips::fixup_MICROMIPS_GPOFF_HI:
+ return setRTypes(ELF::R_MICROMIPS_GPREL16, ELF::R_MICROMIPS_SUB,
+ ELF::R_MICROMIPS_HI16);
+ case Mips::fixup_Mips_GPOFF_LO:
+ return setRTypes(ELF::R_MIPS_GPREL16, ELF::R_MIPS_SUB, ELF::R_MIPS_LO16);
+ case Mips::fixup_MICROMIPS_GPOFF_LO:
+ return setRTypes(ELF::R_MICROMIPS_GPREL16, ELF::R_MICROMIPS_SUB,
+ ELF::R_MICROMIPS_LO16);
case Mips::fixup_Mips_HIGHER:
return ELF::R_MIPS_HIGHER;
case Mips::fixup_Mips_HIGHEST:
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 1b83e9445fb5..e6e32ec7f27c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -33,9 +33,9 @@ MipsELFStreamer::MipsELFStreamer(MCContext &Context,
std::unique_ptr<MipsRegInfoRecord>(RegInfoRecord));
}
-void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
+void MipsELFStreamer::emitInstruction(const MCInst &Inst,
const MCSubtargetInfo &STI) {
- MCELFStreamer::EmitInstruction(Inst, STI);
+ MCELFStreamer::emitInstruction(Inst, STI);
MCContext &Context = getContext();
const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo();
@@ -53,20 +53,20 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
createPendingLabelRelocs();
}
-void MipsELFStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
+void MipsELFStreamer::emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
Frame.Begin = getContext().createTempSymbol();
- MCELFStreamer::EmitLabel(Frame.Begin);
+ MCELFStreamer::emitLabel(Frame.Begin);
}
-MCSymbol *MipsELFStreamer::EmitCFILabel() {
+MCSymbol *MipsELFStreamer::emitCFILabel() {
MCSymbol *Label = getContext().createTempSymbol("cfi", true);
- MCELFStreamer::EmitLabel(Label);
+ MCELFStreamer::emitLabel(Label);
return Label;
}
-void MipsELFStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+void MipsELFStreamer::emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
Frame.End = getContext().createTempSymbol();
- MCELFStreamer::EmitLabel(Frame.End);
+ MCELFStreamer::emitLabel(Frame.End);
}
void MipsELFStreamer::createPendingLabelRelocs() {
@@ -85,8 +85,8 @@ void MipsELFStreamer::createPendingLabelRelocs() {
Labels.clear();
}
-void MipsELFStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) {
- MCELFStreamer::EmitLabel(Symbol);
+void MipsELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
+ MCELFStreamer::emitLabel(Symbol);
Labels.push_back(Symbol);
}
@@ -96,14 +96,14 @@ void MipsELFStreamer::SwitchSection(MCSection *Section,
Labels.clear();
}
-void MipsELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+void MipsELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
SMLoc Loc) {
- MCELFStreamer::EmitValueImpl(Value, Size, Loc);
+ MCELFStreamer::emitValueImpl(Value, Size, Loc);
Labels.clear();
}
-void MipsELFStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
- MCELFStreamer::EmitIntValue(Value, Size);
+void MipsELFStreamer::emitIntValue(uint64_t Value, unsigned Size) {
+ MCELFStreamer::emitIntValue(Value, Size);
Labels.clear();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index 2febfbc69b6f..f6a2c039c0c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -41,12 +41,12 @@ public:
/// \p Inst is actually emitted. For example, we can inspect the operands and
/// gather sufficient information that allows us to reason about the register
/// usage for the translation unit.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
/// Overriding this function allows us to record all labels that should be
/// marked as microMIPS. Based on this data marking is done in
/// EmitInstruction.
- void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
+ void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
/// Overriding this function allows us to dismiss all labels that are
/// candidates for marking as microMIPS when .section directive is processed.
@@ -56,14 +56,14 @@ public:
/// Overriding these functions allows us to dismiss all labels that are
/// candidates for marking as microMIPS when .word/.long/.4byte etc
/// directives are emitted.
- void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
- void EmitIntValue(uint64_t Value, unsigned Size) override;
+ void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
+ void emitIntValue(uint64_t Value, unsigned Size) override;
// Overriding these functions allows us to avoid recording of these labels
// in EmitLabel and later marking them as microMIPS.
- void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
- void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
- MCSymbol *EmitCFILabel() override;
+ void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
+ void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
+ MCSymbol *emitCFILabel() override;
/// Emits all the option records stored up until the point it's called.
void EmitMipsOptionRecords();
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
index 649ba20324bf..3700d6309e1a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
@@ -109,7 +109,7 @@ void MipsInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
// Try to print any aliases first.
- if (!printAliasInstr(MI, O) && !printAlias(*MI, O))
+ if (!printAliasInstr(MI, Address, O) && !printAlias(*MI, O))
printInstruction(MI, Address, O);
printAnnotation(O, Annot);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
index 0b1ee800e440..3f534a2f1843 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
@@ -86,12 +86,17 @@ public:
void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
const MCSubtargetInfo &STI, raw_ostream &O) override;
- bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
+ raw_ostream &O);
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
+ raw_ostream &O) {
+ printOperand(MI, OpNum, O);
+ }
template <unsigned Bits, unsigned Offset = 0>
void printUImm(const MCInst *MI, int opNum, raw_ostream &O);
void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 5182205edaea..9c85a39bc348 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -50,5 +50,4 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple,
ExceptionsType = ExceptionHandling::DwarfCFI;
DwarfRegNumForCFI = true;
HasMipsExpressions = true;
- UseIntegratedAssembler = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 846f508005f5..9de34cc0e787 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -129,7 +129,7 @@ void MipsMCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const {
OS << (char)C;
}
-void MipsMCCodeEmitter::EmitInstruction(uint64_t Val, unsigned Size,
+void MipsMCCodeEmitter::emitInstruction(uint64_t Val, unsigned Size,
const MCSubtargetInfo &STI,
raw_ostream &OS) const {
// Output the instruction encoding in little endian byte order.
@@ -137,8 +137,8 @@ void MipsMCCodeEmitter::EmitInstruction(uint64_t Val, unsigned Size,
// mips32r2: 4 | 3 | 2 | 1
// microMIPS: 2 | 1 | 4 | 3
if (IsLittleEndian && Size == 4 && isMicroMips(STI)) {
- EmitInstruction(Val >> 16, 2, STI, OS);
- EmitInstruction(Val, 2, STI, OS);
+ emitInstruction(Val >> 16, 2, STI, OS);
+ emitInstruction(Val, 2, STI, OS);
} else {
for (unsigned i = 0; i < Size; ++i) {
unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
@@ -226,7 +226,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (!Size)
llvm_unreachable("Desc.getSize() returns 0");
- EmitInstruction(Binary, Size, STI, OS);
+ emitInstruction(Binary, Size, STI, OS);
}
/// getBranchTargetOpValue - Return binary encoding of the branch
@@ -723,21 +723,8 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
return 0;
}
- if (Kind == MCExpr::SymbolRef) {
- Mips::Fixups FixupKind = Mips::Fixups(0);
-
- switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
- default: llvm_unreachable("Unknown fixup kind!");
- break;
- case MCSymbolRefExpr::VK_None:
- // FIXME: This is ok for O32/N32 but not N64.
- FixupKind = Mips::fixup_Mips_32;
- break;
- } // switch
-
- Fixups.push_back(MCFixup::create(0, Expr, MCFixupKind(FixupKind)));
- return 0;
- }
+ if (Kind == MCExpr::SymbolRef)
+ Ctx.reportError(Expr->getLoc(), "expected an immediate");
return 0;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index ff6e1d62b05f..16e94c723b34 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -44,7 +44,7 @@ public:
void EmitByte(unsigned char C, raw_ostream &OS) const;
- void EmitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI,
+ void emitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI,
raw_ostream &OS) const;
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 809be99ff3f4..b7ecb0fdca5e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -29,8 +29,6 @@ class MCTargetOptions;
class StringRef;
class Target;
class Triple;
-class raw_ostream;
-class raw_pwrite_stream;
MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 0544758f8a25..eade2d9bd745 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -105,7 +105,7 @@ private:
MaskInst.addOperand(MCOperand::createReg(AddrReg));
MaskInst.addOperand(MCOperand::createReg(AddrReg));
MaskInst.addOperand(MCOperand::createReg(MaskReg));
- MipsELFStreamer::EmitInstruction(MaskInst, STI);
+ MipsELFStreamer::emitInstruction(MaskInst, STI);
}
// Sandbox indirect branch or return instruction by inserting mask operation
@@ -113,10 +113,10 @@ private:
void sandboxIndirectJump(const MCInst &MI, const MCSubtargetInfo &STI) {
unsigned AddrReg = MI.getOperand(0).getReg();
- EmitBundleLock(false);
+ emitBundleLock(false);
emitMask(AddrReg, IndirectBranchMaskReg, STI);
- MipsELFStreamer::EmitInstruction(MI, STI);
- EmitBundleUnlock();
+ MipsELFStreamer::emitInstruction(MI, STI);
+ emitBundleUnlock();
}
// Sandbox memory access or SP change. Insert mask operation before and/or
@@ -124,26 +124,26 @@ private:
void sandboxLoadStoreStackChange(const MCInst &MI, unsigned AddrIdx,
const MCSubtargetInfo &STI, bool MaskBefore,
bool MaskAfter) {
- EmitBundleLock(false);
+ emitBundleLock(false);
if (MaskBefore) {
// Sandbox memory access.
unsigned BaseReg = MI.getOperand(AddrIdx).getReg();
emitMask(BaseReg, LoadStoreStackMaskReg, STI);
}
- MipsELFStreamer::EmitInstruction(MI, STI);
+ MipsELFStreamer::emitInstruction(MI, STI);
if (MaskAfter) {
// Sandbox SP change.
unsigned SPReg = MI.getOperand(0).getReg();
assert((Mips::SP == SPReg) && "Unexpected stack-pointer register.");
emitMask(SPReg, LoadStoreStackMaskReg, STI);
}
- EmitBundleUnlock();
+ emitBundleUnlock();
}
public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to mask dangerous instructions.
- void EmitInstruction(const MCInst &Inst,
+ void emitInstruction(const MCInst &Inst,
const MCSubtargetInfo &STI) override {
// Sandbox indirect jumps.
if (isIndirectJump(Inst)) {
@@ -181,25 +181,25 @@ public:
report_fatal_error("Dangerous instruction in branch delay slot!");
// Start the sandboxing sequence by emitting call.
- EmitBundleLock(true);
+ emitBundleLock(true);
if (IsIndirectCall) {
unsigned TargetReg = Inst.getOperand(1).getReg();
emitMask(TargetReg, IndirectBranchMaskReg, STI);
}
- MipsELFStreamer::EmitInstruction(Inst, STI);
+ MipsELFStreamer::emitInstruction(Inst, STI);
PendingCall = true;
return;
}
if (PendingCall) {
// Finish the sandboxing sequence by emitting branch delay.
- MipsELFStreamer::EmitInstruction(Inst, STI);
- EmitBundleUnlock();
+ MipsELFStreamer::emitInstruction(Inst, STI);
+ emitBundleUnlock();
PendingCall = false;
return;
}
// None of the sandboxing applies, just emit the instruction.
- MipsELFStreamer::EmitInstruction(Inst, STI);
+ MipsELFStreamer::emitInstruction(Inst, STI);
}
};
@@ -270,7 +270,7 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context,
S->getAssembler().setRelaxAll(true);
// Set bundle-alignment as required by the NaCl ABI for the target.
- S->EmitBundleAlignMode(Log2(MIPS_NACL_BUNDLE_ALIGN));
+ S->emitBundleAlignMode(Log2(MIPS_NACL_BUNDLE_ALIGN));
return S;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index bdfb70aa9813..a4a953bcd7c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -40,17 +40,17 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() {
Sec->setAlignment(Align(8));
Streamer->SwitchSection(Sec);
- Streamer->EmitIntValue(ELF::ODK_REGINFO, 1); // kind
- Streamer->EmitIntValue(40, 1); // size
- Streamer->EmitIntValue(0, 2); // section
- Streamer->EmitIntValue(0, 4); // info
- Streamer->EmitIntValue(ri_gprmask, 4);
- Streamer->EmitIntValue(0, 4); // pad
- Streamer->EmitIntValue(ri_cprmask[0], 4);
- Streamer->EmitIntValue(ri_cprmask[1], 4);
- Streamer->EmitIntValue(ri_cprmask[2], 4);
- Streamer->EmitIntValue(ri_cprmask[3], 4);
- Streamer->EmitIntValue(ri_gp_value, 8);
+ Streamer->emitInt8(ELF::ODK_REGINFO); // kind
+ Streamer->emitInt8(40); // size
+ Streamer->emitInt16(0); // section
+ Streamer->emitInt32(0); // info
+ Streamer->emitInt32(ri_gprmask);
+ Streamer->emitInt32(0); // pad
+ Streamer->emitInt32(ri_cprmask[0]);
+ Streamer->emitInt32(ri_cprmask[1]);
+ Streamer->emitInt32(ri_cprmask[2]);
+ Streamer->emitInt32(ri_cprmask[3]);
+ Streamer->emitIntValue(ri_gp_value, 8);
} else {
MCSectionELF *Sec = Context.getELFSection(".reginfo", ELF::SHT_MIPS_REGINFO,
ELF::SHF_ALLOC, 24, "");
@@ -58,13 +58,13 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() {
Sec->setAlignment(MTS->getABI().IsN32() ? Align(8) : Align(4));
Streamer->SwitchSection(Sec);
- Streamer->EmitIntValue(ri_gprmask, 4);
- Streamer->EmitIntValue(ri_cprmask[0], 4);
- Streamer->EmitIntValue(ri_cprmask[1], 4);
- Streamer->EmitIntValue(ri_cprmask[2], 4);
- Streamer->EmitIntValue(ri_cprmask[3], 4);
+ Streamer->emitInt32(ri_gprmask);
+ Streamer->emitInt32(ri_cprmask[0]);
+ Streamer->emitInt32(ri_cprmask[1]);
+ Streamer->emitInt32(ri_cprmask[2]);
+ Streamer->emitInt32(ri_cprmask[3]);
assert((ri_gp_value & 0xffffffff) == ri_gp_value);
- Streamer->EmitIntValue(ri_gp_value, 4);
+ Streamer->emitInt32(ri_gp_value);
}
Streamer->PopSection();
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 054dc79f4aa9..6ec8fe805968 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -109,6 +109,9 @@ void MipsTargetStreamer::emitDirectiveSetHardFloat() {
void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetDspr2() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips3D() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetNoMips3D() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveCpAdd(unsigned RegNo) {}
void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
void MipsTargetStreamer::emitDirectiveCpLocal(unsigned RegNo) {
// .cplocal $reg
@@ -169,7 +172,7 @@ void MipsTargetStreamer::emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc,
TmpInst.setOpcode(Opcode);
TmpInst.addOperand(MCOperand::createReg(Reg0));
TmpInst.setLoc(IDLoc);
- getStreamer().EmitInstruction(TmpInst, *STI);
+ getStreamer().emitInstruction(TmpInst, *STI);
}
void MipsTargetStreamer::emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1,
@@ -179,7 +182,7 @@ void MipsTargetStreamer::emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1,
TmpInst.addOperand(MCOperand::createReg(Reg0));
TmpInst.addOperand(Op1);
TmpInst.setLoc(IDLoc);
- getStreamer().EmitInstruction(TmpInst, *STI);
+ getStreamer().emitInstruction(TmpInst, *STI);
}
void MipsTargetStreamer::emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm,
@@ -199,7 +202,7 @@ void MipsTargetStreamer::emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2,
TmpInst.addOperand(MCOperand::createImm(Imm1));
TmpInst.addOperand(MCOperand::createImm(Imm2));
TmpInst.setLoc(IDLoc);
- getStreamer().EmitInstruction(TmpInst, *STI);
+ getStreamer().emitInstruction(TmpInst, *STI);
}
void MipsTargetStreamer::emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1,
@@ -211,7 +214,7 @@ void MipsTargetStreamer::emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1,
TmpInst.addOperand(MCOperand::createReg(Reg1));
TmpInst.addOperand(Op2);
TmpInst.setLoc(IDLoc);
- getStreamer().EmitInstruction(TmpInst, *STI);
+ getStreamer().emitInstruction(TmpInst, *STI);
}
void MipsTargetStreamer::emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1,
@@ -230,7 +233,7 @@ void MipsTargetStreamer::emitRRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1,
TmpInst.addOperand(MCOperand::createReg(Reg2));
TmpInst.addOperand(Op3);
TmpInst.setLoc(IDLoc);
- getStreamer().EmitInstruction(TmpInst, *STI);
+ getStreamer().emitInstruction(TmpInst, *STI);
}
void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1,
@@ -251,7 +254,7 @@ void MipsTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0,
TmpInst.addOperand(MCOperand::createImm(Imm1));
TmpInst.addOperand(MCOperand::createImm(Imm2));
TmpInst.setLoc(IDLoc);
- getStreamer().EmitInstruction(TmpInst, *STI);
+ getStreamer().emitInstruction(TmpInst, *STI);
}
void MipsTargetStreamer::emitAddu(unsigned DstReg, unsigned SrcReg,
@@ -609,6 +612,16 @@ void MipsTargetAsmStreamer::emitDirectiveSetNoDsp() {
MipsTargetStreamer::emitDirectiveSetNoDsp();
}
+void MipsTargetAsmStreamer::emitDirectiveSetMips3D() {
+ OS << "\t.set\tmips3d\n";
+ MipsTargetStreamer::emitDirectiveSetMips3D();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetNoMips3D() {
+ OS << "\t.set\tnomips3d\n";
+ MipsTargetStreamer::emitDirectiveSetNoMips3D();
+}
+
void MipsTargetAsmStreamer::emitDirectiveSetPop() {
OS << "\t.set\tpop\n";
MipsTargetStreamer::emitDirectiveSetPop();
@@ -650,6 +663,12 @@ void MipsTargetAsmStreamer::emitFMask(unsigned FPUBitmask,
OS << "," << FPUTopSavedRegOff << '\n';
}
+void MipsTargetAsmStreamer::emitDirectiveCpAdd(unsigned RegNo) {
+ OS << "\t.cpadd\t$"
+ << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n";
+ forbidModuleDirective();
+}
+
void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) {
OS << "\t.cpload\t$"
<< StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n";
@@ -883,9 +902,9 @@ void MipsTargetELFStreamer::finish() {
if (Alignment) {
OS.SwitchSection(&Section);
if (Section.UseCodeAlign())
- OS.EmitCodeAlignment(Alignment, Alignment);
+ OS.emitCodeAlignment(Alignment, Alignment);
else
- OS.EmitValueToAlignment(Alignment, 0, 1, Alignment);
+ OS.emitValueToAlignment(Alignment, 0, 1, Alignment);
}
}
}
@@ -997,17 +1016,17 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
OS.SwitchSection(Sec);
- OS.EmitValueImpl(ExprRef, 4);
+ OS.emitValueImpl(ExprRef, 4);
- OS.EmitIntValue(GPRInfoSet ? GPRBitMask : 0, 4); // reg_mask
- OS.EmitIntValue(GPRInfoSet ? GPROffset : 0, 4); // reg_offset
+ OS.emitIntValue(GPRInfoSet ? GPRBitMask : 0, 4); // reg_mask
+ OS.emitIntValue(GPRInfoSet ? GPROffset : 0, 4); // reg_offset
- OS.EmitIntValue(FPRInfoSet ? FPRBitMask : 0, 4); // fpreg_mask
- OS.EmitIntValue(FPRInfoSet ? FPROffset : 0, 4); // fpreg_offset
+ OS.emitIntValue(FPRInfoSet ? FPRBitMask : 0, 4); // fpreg_mask
+ OS.emitIntValue(FPRInfoSet ? FPROffset : 0, 4); // fpreg_offset
- OS.EmitIntValue(FrameInfoSet ? FrameOffset : 0, 4); // frame_offset
- OS.EmitIntValue(FrameInfoSet ? FrameReg : 0, 4); // frame_reg
- OS.EmitIntValue(FrameInfoSet ? ReturnReg : 0, 4); // return_reg
+ OS.emitIntValue(FrameInfoSet ? FrameOffset : 0, 4); // frame_offset
+ OS.emitIntValue(FrameInfoSet ? FrameReg : 0, 4); // frame_reg
+ OS.emitIntValue(FrameInfoSet ? ReturnReg : 0, 4); // return_reg
// The .end directive marks the end of a procedure. Invalidate
// the information gathered up until this point.
@@ -1017,7 +1036,7 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
// .end also implicitly sets the size.
MCSymbol *CurPCSym = Context.createTempSymbol();
- OS.EmitLabel(CurPCSym);
+ OS.emitLabel(CurPCSym);
const MCExpr *Size = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(CurPCSym, MCSymbolRefExpr::VK_None, Context),
ExprRef, Context);
@@ -1108,6 +1127,17 @@ void MipsTargetELFStreamer::emitFMask(unsigned FPUBitmask,
FPROffset = FPUTopSavedRegOff;
}
+void MipsTargetELFStreamer::emitDirectiveCpAdd(unsigned RegNo) {
+ // .cpadd $reg
+ // This directive inserts code to add $gp to the argument's register
+ // when support for position independent code is enabled.
+ if (!Pic)
+ return;
+
+ emitAddu(RegNo, RegNo, GPReg, getABI().IsN64(), &STI);
+ forbidModuleDirective();
+}
+
void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
// .cpload $reg
// This directive expands to:
@@ -1139,7 +1169,7 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
MCA.getContext()),
MCA.getContext());
TmpInst.addOperand(MCOperand::createExpr(HiSym));
- getStreamer().EmitInstruction(TmpInst, STI);
+ getStreamer().emitInstruction(TmpInst, STI);
TmpInst.clear();
@@ -1152,7 +1182,7 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
MCA.getContext()),
MCA.getContext());
TmpInst.addOperand(MCOperand::createExpr(LoSym));
- getStreamer().EmitInstruction(TmpInst, STI);
+ getStreamer().emitInstruction(TmpInst, STI);
TmpInst.clear();
@@ -1160,7 +1190,7 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
TmpInst.addOperand(MCOperand::createReg(GPReg));
TmpInst.addOperand(MCOperand::createReg(GPReg));
TmpInst.addOperand(MCOperand::createReg(RegNo));
- getStreamer().EmitInstruction(TmpInst, STI);
+ getStreamer().emitInstruction(TmpInst, STI);
forbidModuleDirective();
}
@@ -1269,7 +1299,7 @@ void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
Inst.addOperand(MCOperand::createReg(Mips::SP));
Inst.addOperand(MCOperand::createImm(SaveLocation));
}
- getStreamer().EmitInstruction(Inst, STI);
+ getStreamer().emitInstruction(Inst, STI);
forbidModuleDirective();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
index e9fb9b310e3b..101d080f9567 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This files descributes the formats of the microMIPS instruction set.
+// This files describes the formats of the microMIPS instruction set.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index b707f1b96184..269ad8b548a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This files describes the defintions of the microMIPSr3 instructions.
+// This files describes the definitions of the microMIPSr3 instructions.
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
index db93b3d80ede..55d3c59cbf03 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -376,12 +376,12 @@ static bool CheckXWPInstr(MachineInstr *MI, bool ReduceToLwp,
// Returns true if the registers Reg1 and Reg2 are consecutive
static bool ConsecutiveRegisters(unsigned Reg1, unsigned Reg2) {
- static SmallVector<unsigned, 31> Registers = {
- Mips::AT, Mips::V0, Mips::V1, Mips::A0, Mips::A1, Mips::A2, Mips::A3,
- Mips::T0, Mips::T1, Mips::T2, Mips::T3, Mips::T4, Mips::T5, Mips::T6,
- Mips::T7, Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5,
- Mips::S6, Mips::S7, Mips::T8, Mips::T9, Mips::K0, Mips::K1, Mips::GP,
- Mips::SP, Mips::FP, Mips::RA};
+ constexpr std::array<unsigned, 31> Registers = {
+ {Mips::AT, Mips::V0, Mips::V1, Mips::A0, Mips::A1, Mips::A2, Mips::A3,
+ Mips::T0, Mips::T1, Mips::T2, Mips::T3, Mips::T4, Mips::T5, Mips::T6,
+ Mips::T7, Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5,
+ Mips::S6, Mips::S7, Mips::T8, Mips::T9, Mips::K0, Mips::K1, Mips::GP,
+ Mips::SP, Mips::FP, Mips::RA}};
for (uint8_t i = 0; i < Registers.size() - 1; i++) {
if (Registers[i] == Reg1) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td
index b8a69815cc12..7fe750249c58 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips.td
@@ -54,22 +54,6 @@ class AdditionalRequires<list<Predicate> preds> {
}
//===----------------------------------------------------------------------===//
-// Register File, Calling Conv, Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "MipsRegisterInfo.td"
-include "MipsSchedule.td"
-include "MipsInstrInfo.td"
-include "MipsCallingConv.td"
-include "MipsRegisterBanks.td"
-
-// Avoid forward declaration issues.
-include "MipsScheduleP5600.td"
-include "MipsScheduleGeneric.td"
-
-def MipsInstrInfo : InstrInfo;
-
-//===----------------------------------------------------------------------===//
// Mips Subtarget features //
//===----------------------------------------------------------------------===//
@@ -177,6 +161,8 @@ def FeatureDSPR3
: SubtargetFeature<"dspr3", "HasDSPR3", "true", "Mips DSP-R3 ASE",
[ FeatureDSP, FeatureDSPR2 ]>;
+def FeatureMips3D : SubtargetFeature<"mips3d", "Has3D", "true", "Mips 3D ASE">;
+
def FeatureMSA : SubtargetFeature<"msa", "HasMSA", "true", "Mips MSA ASE">;
def FeatureEVA : SubtargetFeature<"eva", "HasEVA", "true", "Mips EVA ASE">;
@@ -221,6 +207,23 @@ def FeatureUseIndirectJumpsHazard : SubtargetFeature<"use-indirect-jump-hazard",
"UseIndirectJumpsHazard",
"true", "Use indirect jump"
" guards to prevent certain speculation based attacks">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MipsRegisterInfo.td"
+include "MipsSchedule.td"
+include "MipsInstrInfo.td"
+include "MipsCallingConv.td"
+include "MipsRegisterBanks.td"
+
+// Avoid forward declaration issues.
+include "MipsScheduleP5600.td"
+include "MipsScheduleGeneric.td"
+
+def MipsInstrInfo : InstrInfo;
+
//===----------------------------------------------------------------------===//
// Mips processors supported.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
index 5a2a916a6b7a..fefa1134b021 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -62,8 +62,8 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
TII.makeFrame(Mips::SP, StackSize, MBB, MBBI);
// emit ".cfi_def_cfa_offset StackSize"
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize));
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@@ -109,11 +109,9 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
TII.restoreFrame(Mips::SP, StackSize, MBB, MBBI);
}
-bool Mips16FrameLowering::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool Mips16FrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
//
@@ -137,10 +135,9 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool Mips16FrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
//
// Registers RA,S0,S1 are the callee saved registers and they will be restored
// with the restore instruction during emitEpilogue.
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.h
index 6b62453f8dfe..7f0f1cd4ea37 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.h
@@ -27,13 +27,14 @@ public:
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index e9a3c7ec4b19..cc1f72c03632 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -43,7 +43,7 @@ namespace {
} // end anonymous namespace
-static void EmitInlineAsm(LLVMContext &C, BasicBlock *BB, StringRef AsmText) {
+static void emitInlineAsm(LLVMContext &C, BasicBlock *BB, StringRef AsmText) {
std::vector<Type *> AsmArgTypes;
std::vector<Value *> AsmArgs;
@@ -260,7 +260,7 @@ static void assureFPCallStub(Function &F, Module *M,
return;
LLVMContext &Context = M->getContext();
bool LE = TM.isLittleEndian();
- std::string Name = F.getName();
+ std::string Name(F.getName());
std::string SectionName = ".mips16.call.fp." + Name;
std::string StubName = "__call_stub_fp_" + Name;
//
@@ -339,7 +339,7 @@ static void assureFPCallStub(Function &F, Module *M,
AsmText += "jr $$18\n";
else
AsmText += "jr $$25\n";
- EmitInlineAsm(Context, BB, AsmText);
+ emitInlineAsm(Context, BB, AsmText);
new UnreachableInst(Context, BB);
}
@@ -448,7 +448,7 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
bool PicMode = TM.isPositionIndependent();
bool LE = TM.isLittleEndian();
LLVMContext &Context = M->getContext();
- std::string Name = F->getName();
+ std::string Name(F->getName());
std::string SectionName = ".mips16.fn." + Name;
std::string StubName = "__fn_stub_" + Name;
std::string LocalName = "$$__fn_local_" + Name;
@@ -475,7 +475,7 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
AsmText += swapFPIntParams(PV, M, LE, false);
AsmText += "jr $$25\n";
AsmText += LocalName + " = " + Name + "\n";
- EmitInlineAsm(Context, BB, AsmText);
+ emitInlineAsm(Context, BB, AsmText);
new UnreachableInst(FStub->getContext(), BB);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 768d54fc9c24..ddd28d095e51 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -72,7 +72,7 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL;
- Register V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ Register V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(MF);
const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass;
V0 = RegInfo.createVirtualRegister(RC);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 5425df77d9b8..a3b86bdc2ca0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -492,7 +492,7 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(JumpTarget);
JumpTarget = getAddrGlobal(S, CLI.DL, JumpTarget.getValueType(), DAG,
MipsII::MO_GOT, Chain,
- FuncInfo->callPtrInfo(S->getSymbol()));
+ FuncInfo->callPtrInfo(MF, S->getSymbol()));
} else
RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index d2a1ba39cb0e..3403ec01aef2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -105,7 +105,7 @@ Mips16InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
void Mips16InstrInfo::storeRegToStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
+ Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
int64_t Offset) const {
@@ -123,7 +123,7 @@ void Mips16InstrInfo::storeRegToStack(MachineBasicBlock &MBB,
void Mips16InstrInfo::loadRegFromStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
+ Register DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
int64_t Offset) const {
@@ -182,7 +182,7 @@ unsigned Mips16InstrInfo::getOppositeBranchOpc(unsigned Opc) const {
}
static void addSaveRestoreRegs(MachineInstrBuilder &MIB,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
unsigned Flags = 0) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
// Add the callee-saved register as live-in. Do not add if the register is
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.h
index 2ff849cb2ca2..294afd6460f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.h
@@ -54,14 +54,14 @@ public:
void storeRegToStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
int64_t Offset) const override;
void loadRegFromStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
int64_t Offset) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.td
index 19ea50c89b96..990202b23bc0 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16InstrInfo.td
@@ -1642,7 +1642,7 @@ def : Mips16Pat<(select (i32 (setle CPU16Regs:$a, CPU16Regs:$b)),
CPU16Regs:$b, CPU16Regs:$a)>;
//
-// unnsigned
+// unsigned
// x = (a <= b)? x : y
//
// if (b < a) x = y
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
index 5703f585a6a2..f6f43da9abf8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -53,12 +53,10 @@ bool Mips16RegisterInfo::useFPForScavengingIndex
return false;
}
-bool Mips16RegisterInfo::saveScavengerRegister
- (MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator &UseMI,
- const TargetRegisterClass *RC,
- unsigned Reg) const {
+bool Mips16RegisterInfo::saveScavengerRegister(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC,
+ Register Reg) const {
DebugLoc DL;
const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
TII.copyPhysReg(MBB, I, DL, Mips::T0, Reg, true);
@@ -96,7 +94,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// 3. Locations for callee-saved registers.
// Everything else is referenced relative to whatever register
// getFrameRegister() returns.
- unsigned FrameReg;
+ Register FrameReg;
if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
FrameReg = Mips::SP;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16RegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16RegisterInfo.h
index fca78b43f96b..ff115b30162b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16RegisterInfo.h
@@ -16,7 +16,6 @@
#include "MipsRegisterInfo.h"
namespace llvm {
-class Mips16InstrInfo;
class Mips16RegisterInfo : public MipsRegisterInfo {
public:
@@ -29,10 +28,10 @@ public:
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
bool saveScavengerRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator &UseMI,
- const TargetRegisterClass *RC,
- unsigned Reg) const override;
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ Register Reg) const override;
const TargetRegisterClass *intRegClass(unsigned Size) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
index 306289d56e4b..bd62a56d3008 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -1248,5 +1248,19 @@ def : MipsInstAlias<"sgtu $rs, $imm", (SGTUImm64 GPR64Opnd:$rs,
GPR64Opnd:$rs,
imm64:$imm), 0>, GPR_64;
+def SLEImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, imm64:$imm),
+ "sle\t$rd, $rs, $imm">, GPR_64;
+def : MipsInstAlias<"sle $rs, $imm", (SLEImm64 GPR64Opnd:$rs,
+ GPR64Opnd:$rs,
+ imm64:$imm), 0>, GPR_64;
+
+def SLEUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, imm64:$imm),
+ "sleu\t$rd, $rs, $imm">, GPR_64;
+def : MipsInstAlias<"sleu $rs, $imm", (SLEUImm64 GPR64Opnd:$rs,
+ GPR64Opnd:$rs,
+ imm64:$imm), 0>, GPR_64;
+
def : MipsInstAlias<"rdhwr $rt, $rs",
(RDHWR64 GPR64Opnd:$rt, HWRegsOpnd:$rs, 0), 1>, GPR_64;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 8f75336dce5a..cc073fbf5231 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -174,18 +174,18 @@ static void emitDirectiveRelocJalr(const MachineInstr &MI,
MCSymbolRefExpr::create(OffsetLabel, OutContext);
const MCExpr *CaleeExpr =
MCSymbolRefExpr::create(Callee, OutContext);
- OutStreamer.EmitRelocDirective
- (*OffsetExpr,
- Subtarget.inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
- CaleeExpr, SMLoc(), *TM.getMCSubtargetInfo());
- OutStreamer.EmitLabel(OffsetLabel);
+ OutStreamer.emitRelocDirective(
+ *OffsetExpr,
+ Subtarget.inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
+ CaleeExpr, SMLoc(), *TM.getMCSubtargetInfo());
+ OutStreamer.emitLabel(OffsetLabel);
return;
}
}
}
}
-void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void MipsAsmPrinter::emitInstruction(const MachineInstr *MI) {
MipsTargetStreamer &TS = getTargetStreamer();
unsigned Opc = MI->getOpcode();
TS.forbidModuleDirective();
@@ -202,7 +202,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// If we just ended a constant pool, mark it as such.
if (InConstantPool && Opc != Mips::CONSTPOOL_ENTRY) {
- OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+ OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
InConstantPool = false;
}
if (Opc == Mips::CONSTPOOL_ENTRY) {
@@ -218,17 +218,17 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// If this is the first entry of the pool, mark it.
if (!InConstantPool) {
- OutStreamer->EmitDataRegion(MCDR_DataRegion);
+ OutStreamer->emitDataRegion(MCDR_DataRegion);
InConstantPool = true;
}
- OutStreamer->EmitLabel(GetCPISymbol(LabelId));
+ OutStreamer->emitLabel(GetCPISymbol(LabelId));
const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
if (MCPE.isMachineConstantPoolEntry())
- EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ emitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
- EmitGlobalConstant(MF->getDataLayout(), MCPE.Val.ConstVal);
+ emitGlobalConstant(MF->getDataLayout(), MCPE.Val.ConstVal);
return;
}
@@ -280,7 +280,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
//
if (I->isPseudo() && !Subtarget->inMips16Mode()
&& !isLongBranchPseudo(I->getOpcode()))
- llvm_unreachable("Pseudo opcode found in EmitInstruction()");
+ llvm_unreachable("Pseudo opcode found in emitInstruction()");
MCInst TmpInst0;
MCInstLowering.Lower(&*I, TmpInst0);
@@ -398,13 +398,13 @@ const char *MipsAsmPrinter::getCurrentABIString() const {
}
}
-void MipsAsmPrinter::EmitFunctionEntryLabel() {
+void MipsAsmPrinter::emitFunctionEntryLabel() {
MipsTargetStreamer &TS = getTargetStreamer();
// NaCl sandboxing requires that indirect call instructions are masked.
// This means that function entry points should be bundle-aligned.
if (Subtarget->isTargetNaCl())
- EmitAlignment(std::max(MF->getAlignment(), MIPS_NACL_BUNDLE_ALIGN));
+ emitAlignment(std::max(MF->getAlignment(), MIPS_NACL_BUNDLE_ALIGN));
if (Subtarget->inMicroMipsMode()) {
TS.emitDirectiveSetMicroMips();
@@ -419,12 +419,12 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
TS.emitDirectiveSetNoMips16();
TS.emitDirectiveEnt(*CurrentFnSym);
- OutStreamer->EmitLabel(CurrentFnSym);
+ OutStreamer->emitLabel(CurrentFnSym);
}
/// EmitFunctionBodyStart - Targets can override this to emit stuff before
/// the first basic block in the function.
-void MipsAsmPrinter::EmitFunctionBodyStart() {
+void MipsAsmPrinter::emitFunctionBodyStart() {
MipsTargetStreamer &TS = getTargetStreamer();
MCInstLowering.Initialize(&MF->getContext());
@@ -445,7 +445,7 @@ void MipsAsmPrinter::EmitFunctionBodyStart() {
/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
/// the last basic block in the function.
-void MipsAsmPrinter::EmitFunctionBodyEnd() {
+void MipsAsmPrinter::emitFunctionBodyEnd() {
MipsTargetStreamer &TS = getTargetStreamer();
// There are instruction for this macros, but they must
@@ -462,11 +462,11 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
if (!InConstantPool)
return;
InConstantPool = false;
- OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+ OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
}
-void MipsAsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {
- AsmPrinter::EmitBasicBlockEnd(MBB);
+void MipsAsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
+ AsmPrinter::emitBasicBlockEnd(MBB);
MipsTargetStreamer &TS = getTargetStreamer();
if (MBB.empty())
TS.emitDirectiveInsn();
@@ -770,7 +770,7 @@ printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O) {
}
}
-void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
+void MipsAsmPrinter::emitStartOfAsmFile(Module &M) {
MipsTargetStreamer &TS = getTargetStreamer();
// MipsTargetStreamer has an initialization order problem when emitting an
@@ -860,7 +860,7 @@ void MipsAsmPrinter::EmitJal(const MCSubtargetInfo &STI, MCSymbol *Symbol) {
I.setOpcode(Mips::JAL);
I.addOperand(
MCOperand::createExpr(MCSymbolRefExpr::create(Symbol, OutContext)));
- OutStreamer->EmitInstruction(I, STI);
+ OutStreamer->emitInstruction(I, STI);
}
void MipsAsmPrinter::EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode,
@@ -868,7 +868,7 @@ void MipsAsmPrinter::EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode,
MCInst I;
I.setOpcode(Opcode);
I.addOperand(MCOperand::createReg(Reg));
- OutStreamer->EmitInstruction(I, STI);
+ OutStreamer->emitInstruction(I, STI);
}
void MipsAsmPrinter::EmitInstrRegReg(const MCSubtargetInfo &STI,
@@ -888,7 +888,7 @@ void MipsAsmPrinter::EmitInstrRegReg(const MCSubtargetInfo &STI,
I.setOpcode(Opcode);
I.addOperand(MCOperand::createReg(Reg1));
I.addOperand(MCOperand::createReg(Reg2));
- OutStreamer->EmitInstruction(I, STI);
+ OutStreamer->emitInstruction(I, STI);
}
void MipsAsmPrinter::EmitInstrRegRegReg(const MCSubtargetInfo &STI,
@@ -899,7 +899,7 @@ void MipsAsmPrinter::EmitInstrRegRegReg(const MCSubtargetInfo &STI,
I.addOperand(MCOperand::createReg(Reg1));
I.addOperand(MCOperand::createReg(Reg2));
I.addOperand(MCOperand::createReg(Reg3));
- OutStreamer->EmitInstruction(I, STI);
+ OutStreamer->emitInstruction(I, STI);
}
void MipsAsmPrinter::EmitMovFPIntPair(const MCSubtargetInfo &STI,
@@ -990,7 +990,7 @@ void MipsAsmPrinter::EmitFPCallStub(
//
// .global xxxx
//
- OutStreamer->EmitSymbolAttribute(MSymbol, MCSA_Global);
+ OutStreamer->emitSymbolAttribute(MSymbol, MCSA_Global);
const char *RetType;
//
// make the comment field identifying the return and parameter
@@ -1054,7 +1054,7 @@ void MipsAsmPrinter::EmitFPCallStub(
//
// .align 2
//
- OutStreamer->EmitValueToAlignment(4);
+ OutStreamer->emitValueToAlignment(4);
MipsTargetStreamer &TS = getTargetStreamer();
//
// .set nomips16
@@ -1073,8 +1073,8 @@ void MipsAsmPrinter::EmitFPCallStub(
TS.emitDirectiveEnt(*Stub);
MCSymbol *MType =
OutContext.getOrCreateSymbol("__call_stub_fp_" + Twine(Symbol));
- OutStreamer->EmitSymbolAttribute(MType, MCSA_ELF_TypeFunction);
- OutStreamer->EmitLabel(Stub);
+ OutStreamer->emitSymbolAttribute(MType, MCSA_ELF_TypeFunction);
+ OutStreamer->emitLabel(Stub);
// Only handle non-pic for now.
assert(!isPositionIndependent() &&
@@ -1113,7 +1113,7 @@ void MipsAsmPrinter::EmitFPCallStub(
EmitInstrReg(*STI, Mips::JR, Mips::S2);
MCSymbol *Tmp = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(Tmp);
+ OutStreamer->emitLabel(Tmp);
const MCSymbolRefExpr *E = MCSymbolRefExpr::create(Stub, OutContext);
const MCSymbolRefExpr *T = MCSymbolRefExpr::create(Tmp, OutContext);
const MCExpr *T_min_E = MCBinaryExpr::createSub(T, E, OutContext);
@@ -1122,7 +1122,7 @@ void MipsAsmPrinter::EmitFPCallStub(
OutStreamer->PopSection();
}
-void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
+void MipsAsmPrinter::emitEndOfAsmFile(Module &M) {
// Emit needed stubs
//
for (std::map<
@@ -1203,9 +1203,9 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
// LD RA, 8(SP)
// DADDIU SP, SP, 16
//
- OutStreamer->EmitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4);
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->EmitLabel(CurSled);
+ OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
// Emit "B .tmpN" instruction, which jumps over the nop sled to the actual
@@ -1223,7 +1223,7 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
.addReg(Mips::ZERO)
.addImm(0));
- OutStreamer->EmitLabel(Target);
+ OutStreamer->emitLabel(Target);
if (!Subtarget->isGP64bit()) {
EmitToStreamer(*OutStreamer,
@@ -1255,15 +1255,15 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// Emit .dtprelword or .dtpreldword directive
// and value for debug thread local expression.
-void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const {
+void MipsAsmPrinter::emitDebugValue(const MCExpr *Value, unsigned Size) const {
if (auto *MipsExpr = dyn_cast<MipsMCExpr>(Value)) {
if (MipsExpr && MipsExpr->getKind() == MipsMCExpr::MEK_DTPREL) {
switch (Size) {
case 4:
- OutStreamer->EmitDTPRel32Value(MipsExpr->getSubExpr());
+ OutStreamer->emitDTPRel32Value(MipsExpr->getSubExpr());
break;
case 8:
- OutStreamer->EmitDTPRel64Value(MipsExpr->getSubExpr());
+ OutStreamer->emitDTPRel64Value(MipsExpr->getSubExpr());
break;
default:
llvm_unreachable("Unexpected size of expression value.");
@@ -1271,7 +1271,7 @@ void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const {
return;
}
}
- AsmPrinter::EmitDebugValue(Value, Size);
+ AsmPrinter::emitDebugValue(Value, Size);
}
// Align all targets of indirect branches on bundle size. Used only if target
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.h
index 173a1312812e..64424b181504 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -126,22 +126,22 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
- void EmitConstantPool() override {
+ void emitConstantPool() override {
bool UsingConstantPools =
(Subtarget->inMips16Mode() && Subtarget->useConstantIslands());
if (!UsingConstantPools)
- AsmPrinter::EmitConstantPool();
+ AsmPrinter::emitConstantPool();
// we emit constant pools customly!
}
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
void printSavedRegsBitmask();
void emitFrameDirective();
const char *getCurrentABIString() const;
- void EmitFunctionEntryLabel() override;
- void EmitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
- void EmitBasicBlockEnd(const MachineBasicBlock &MBB) override;
+ void emitFunctionEntryLabel() override;
+ void emitFunctionBodyStart() override;
+ void emitFunctionBodyEnd() override;
+ void emitBasicBlockEnd(const MachineBasicBlock &MBB) override;
bool isBlockOnlyReachableByFallthrough(
const MachineBasicBlock* MBB) const override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -154,10 +154,10 @@ public:
void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = nullptr);
void printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O);
- void EmitStartOfAsmFile(Module &M) override;
- void EmitEndOfAsmFile(Module &M) override;
+ void emitStartOfAsmFile(Module &M) override;
+ void emitEndOfAsmFile(Module &M) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
- void EmitDebugValue(const MCExpr *Value, unsigned Size) const override;
+ void emitDebugValue(const MCExpr *Value, unsigned Size) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsBranchExpansion.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
index 1523a6c020aa..aa8e298fa759 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
@@ -342,16 +342,25 @@ void MipsBranchExpansion::replaceBranch(MachineBasicBlock &MBB, Iter Br,
for (unsigned I = 0, E = Br->getDesc().getNumOperands(); I < E; ++I) {
MachineOperand &MO = Br->getOperand(I);
- if (!MO.isReg()) {
- assert(MO.isMBB() && "MBB operand expected.");
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ MIB.addReg(MO.getReg());
break;
+ case MachineOperand::MO_Immediate:
+ // Octeon BBIT family of branch has an immediate operand
+ // (e.g. BBIT0 $v0, 3, %bb.1).
+ if (!TII->isBranchWithImm(Br->getOpcode()))
+ llvm_unreachable("Unexpected immediate in branch instruction");
+ MIB.addImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MIB.addMBB(MBBOpnd);
+ break;
+ default:
+ llvm_unreachable("Unexpected operand type in branch instruction");
}
-
- MIB.addReg(MO.getReg());
}
- MIB.addMBB(MBBOpnd);
-
if (Br->hasDelaySlot()) {
// Bundle the instruction in the delay slot to the newly created branch
// and erase the original branch.
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCCState.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCCState.cpp
index ef48c850a1b8..fe3fe82797c3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCCState.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCCState.cpp
@@ -30,9 +30,9 @@ static bool isF128SoftLibCall(const char *CallSym) {
// Check that LibCalls is sorted alphabetically.
auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
- assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp));
- return std::binary_search(std::begin(LibCalls), std::end(LibCalls),
- CallSym, Comp);
+ assert(llvm::is_sorted(LibCalls, Comp));
+ return std::binary_search(std::begin(LibCalls), std::end(LibCalls), CallSym,
+ Comp);
}
/// This function returns true if Ty is fp128, {f128} or i128 which was
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
index 6ba15c232867..cffd99affac1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -110,10 +110,10 @@ private:
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
- void buildLoad(Register Val, const CCValAssign &VA) {
+ MachineInstrBuilder buildLoad(const DstOp &Res, const CCValAssign &VA) {
MachineMemOperand *MMO;
Register Addr = getStackAddress(VA, MMO);
- MIRBuilder.buildLoad(Val, Addr, *MMO);
+ return MIRBuilder.buildLoad(Res, Addr, *MMO);
}
};
@@ -136,29 +136,19 @@ private:
void IncomingValueHandler::assignValueToReg(Register ValVReg,
const CCValAssign &VA,
const EVT &VT) {
- const MipsSubtarget &STI =
- static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
Register PhysReg = VA.getLocReg();
if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
const MipsSubtarget &STI =
static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
-
- MIRBuilder
- .buildInstr(STI.isFP64bit() ? Mips::BuildPairF64_64
- : Mips::BuildPairF64)
- .addDef(ValVReg)
- .addUse(PhysReg + (STI.isLittle() ? 0 : 1))
- .addUse(PhysReg + (STI.isLittle() ? 1 : 0))
- .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
- *STI.getRegBankInfo());
+ bool IsEL = STI.isLittle();
+ LLT s32 = LLT::scalar(32);
+ auto Lo = MIRBuilder.buildCopy(s32, Register(PhysReg + (IsEL ? 0 : 1)));
+ auto Hi = MIRBuilder.buildCopy(s32, Register(PhysReg + (IsEL ? 1 : 0)));
+ MIRBuilder.buildMerge(ValVReg, {Lo, Hi});
markPhysRegUsed(PhysReg);
markPhysRegUsed(PhysReg + 1);
} else if (VT == MVT::f32 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
- MIRBuilder.buildInstr(Mips::MTC1)
- .addDef(ValVReg)
- .addUse(PhysReg)
- .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
- *STI.getRegBankInfo());
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
markPhysRegUsed(PhysReg);
} else {
switch (VA.getLocInfo()) {
@@ -189,13 +179,11 @@ Register IncomingValueHandler::getStackAddress(const CCValAssign &VA,
MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
- unsigned Align = MinAlign(TFL->getStackAlignment(), Offset);
- MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, Size, Align);
-
- Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 32));
- MIRBuilder.buildFrameIndex(AddrReg, FI);
+ Align Alignment = commonAlignment(TFL->getStackAlign(), Offset);
+ MMO =
+ MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, Size, Alignment);
- return AddrReg;
+ return MIRBuilder.buildFrameIndex(LLT::pointer(0, 32), FI).getReg(0);
}
void IncomingValueHandler::assignValueToAddress(Register ValVReg,
@@ -203,9 +191,8 @@ void IncomingValueHandler::assignValueToAddress(Register ValVReg,
if (VA.getLocInfo() == CCValAssign::SExt ||
VA.getLocInfo() == CCValAssign::ZExt ||
VA.getLocInfo() == CCValAssign::AExt) {
- Register LoadReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
- buildLoad(LoadReg, VA);
- MIRBuilder.buildTrunc(ValVReg, LoadReg);
+ auto Load = buildLoad(LLT::scalar(32), VA);
+ MIRBuilder.buildTrunc(ValVReg, Load);
} else
buildLoad(ValVReg, VA);
}
@@ -251,32 +238,15 @@ void OutgoingValueHandler::assignValueToReg(Register ValVReg,
const CCValAssign &VA,
const EVT &VT) {
Register PhysReg = VA.getLocReg();
- const MipsSubtarget &STI =
- static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
-
if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
- MIRBuilder
- .buildInstr(STI.isFP64bit() ? Mips::ExtractElementF64_64
- : Mips::ExtractElementF64)
- .addDef(PhysReg + (STI.isLittle() ? 1 : 0))
- .addUse(ValVReg)
- .addImm(1)
- .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
- *STI.getRegBankInfo());
- MIRBuilder
- .buildInstr(STI.isFP64bit() ? Mips::ExtractElementF64_64
- : Mips::ExtractElementF64)
- .addDef(PhysReg + (STI.isLittle() ? 0 : 1))
- .addUse(ValVReg)
- .addImm(0)
- .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
- *STI.getRegBankInfo());
+ const MipsSubtarget &STI =
+ static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+ bool IsEL = STI.isLittle();
+ auto Unmerge = MIRBuilder.buildUnmerge(LLT::scalar(32), ValVReg);
+ MIRBuilder.buildCopy(Register(PhysReg + (IsEL ? 0 : 1)), Unmerge.getReg(0));
+ MIRBuilder.buildCopy(Register(PhysReg + (IsEL ? 1 : 0)), Unmerge.getReg(1));
} else if (VT == MVT::f32 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
- MIRBuilder.buildInstr(Mips::MFC1)
- .addDef(PhysReg)
- .addUse(ValVReg)
- .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
- *STI.getRegBankInfo());
+ MIRBuilder.buildCopy(PhysReg, ValVReg);
} else {
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -291,23 +261,21 @@ Register OutgoingValueHandler::getStackAddress(const CCValAssign &VA,
LLT p0 = LLT::pointer(0, 32);
LLT s32 = LLT::scalar(32);
- Register SPReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(SPReg, Register(Mips::SP));
+ auto SPReg = MIRBuilder.buildCopy(p0, Register(Mips::SP));
- Register OffsetReg = MRI.createGenericVirtualRegister(s32);
unsigned Offset = VA.getLocMemOffset();
- MIRBuilder.buildConstant(OffsetReg, Offset);
+ auto OffsetReg = MIRBuilder.buildConstant(s32, Offset);
- Register AddrReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildPtrAdd(AddrReg, SPReg, OffsetReg);
+ auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
MachinePointerInfo MPO =
MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
- unsigned Align = MinAlign(TFL->getStackAlignment(), Offset);
- MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size, Align);
+ Align Alignment = commonAlignment(TFL->getStackAlign(), Offset);
+ MMO =
+ MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size, Alignment);
- return AddrReg;
+ return AddrReg.getReg(0);
}
void OutgoingValueHandler::assignValueToAddress(Register ValVReg,
@@ -323,19 +291,13 @@ Register OutgoingValueHandler::extendRegister(Register ValReg,
LLT LocTy{VA.getLocVT()};
switch (VA.getLocInfo()) {
case CCValAssign::SExt: {
- Register ExtReg = MRI.createGenericVirtualRegister(LocTy);
- MIRBuilder.buildSExt(ExtReg, ValReg);
- return ExtReg;
+ return MIRBuilder.buildSExt(LocTy, ValReg).getReg(0);
}
case CCValAssign::ZExt: {
- Register ExtReg = MRI.createGenericVirtualRegister(LocTy);
- MIRBuilder.buildZExt(ExtReg, ValReg);
- return ExtReg;
+ return MIRBuilder.buildZExt(LocTy, ValReg).getReg(0);
}
case CCValAssign::AExt: {
- Register ExtReg = MRI.createGenericVirtualRegister(LocTy);
- MIRBuilder.buildAnyExt(ExtReg, ValReg);
- return ExtReg;
+ return MIRBuilder.buildAnyExt(LocTy, ValReg).getReg(0);
}
// TODO : handle upper extends
case CCValAssign::Full:
@@ -489,7 +451,7 @@ bool MipsCallLowering::lowerFormalArguments(
static_cast<const MipsTargetMachine &>(MF.getTarget());
const MipsABIInfo &ABI = TM.getABI();
CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(F.getCallingConv()),
- 1);
+ Align(1));
CCInfo.AnalyzeFormalArguments(Ins, TLI.CCAssignFnForCall());
setLocInfo(ArgLocs, Ins);
@@ -524,9 +486,8 @@ bool MipsCallLowering::lowerFormalArguments(
MachinePointerInfo MPO = MachinePointerInfo::getFixedStack(MF, FI);
MachineInstrBuilder FrameIndex =
MIRBuilder.buildFrameIndex(LLT::pointer(MPO.getAddrSpace(), 32), FI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, RegSize,
- /* Alignment */ RegSize);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MPO, MachineMemOperand::MOStore, RegSize, Align(RegSize));
MIRBuilder.buildStore(Copy, FrameIndex, *MMO);
}
}
@@ -611,7 +572,8 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MipsCCState CCInfo(F.getCallingConv(), IsCalleeVarArg, MF, ArgLocs,
F.getContext());
- CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(Info.CallConv), 1);
+ CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(Info.CallConv),
+ Align(1));
const char *Call =
Info.Callee.isSymbol() ? Info.Callee.getSymbolName() : nullptr;
CCInfo.AnalyzeCallOperands(Outs, TLI.CCAssignFnForCall(), FuncOrigArgs, Call);
@@ -631,7 +593,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (IsCalleeGlobalPIC) {
MIRBuilder.buildCopy(
Register(Mips::GP),
- MF.getInfo<MipsFunctionInfo>()->getGlobalBaseRegForGlobalISel());
+ MF.getInfo<MipsFunctionInfo>()->getGlobalBaseRegForGlobalISel(MF));
MIB.addDef(Mips::GP, RegState::Implicit);
}
MIRBuilder.insertInstr(MIB);
@@ -691,7 +653,7 @@ void MipsCallLowering::subTargetRegTypeForCallingConv(
if (i == 0)
Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL));
else
- Flags.setOrigAlign(Align::None());
+ Flags.setOrigAlign(Align(1));
ISDArgs.emplace_back(Flags, RegisterVT, VT, true, OrigArgIndices[ArgNo],
0);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index 1f1a1574443c..faf7160e63e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -529,7 +529,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
MF->push_back(BB);
// MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
- const Align MaxAlign(MCP->getConstantPoolAlignment());
+ const Align MaxAlign = MCP->getConstantPoolAlign();
// Mark the basic block as required by the const-pool.
// If AlignConstantIslands isn't set, use 4-byte alignment for everything.
@@ -554,14 +554,13 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
- unsigned Align = CPs[i].getAlignment();
- assert(isPowerOf2_32(Align) && "Invalid alignment");
+ Align Alignment = CPs[i].getAlign();
// Verify that all constant pool entries are a multiple of their alignment.
// If not, we would have to pad them out so that instructions stay aligned.
- assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+ assert(isAligned(Alignment, Size) && "CP Entry not multiple of 4 bytes!");
// Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
- unsigned LogAlign = Log2_32(Align);
+ unsigned LogAlign = Log2(Alignment);
MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
MachineInstr *CPEMI =
@@ -579,7 +578,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
CPEntries.emplace_back(1, CPEntry(CPEMI, i));
++NumCPEs;
LLVM_DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
- << Size << ", align = " << Align << '\n');
+ << Size << ", align = " << Alignment.value() << '\n');
}
LLVM_DEBUG(BB->dump());
}
@@ -628,7 +627,7 @@ Align MipsConstantIslands::getCPEAlign(const MachineInstr &CPEMI) {
unsigned CPI = CPEMI.getOperand(1).getIndex();
assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
- return Align(MCP->getConstants()[CPI].getAlignment());
+ return MCP->getConstants()[CPI].getAlign();
}
/// initializeFunctionInfo - Do the initial scan of the function, building up
@@ -940,7 +939,7 @@ bool MipsConstantIslands::isWaterInRange(unsigned UserOffset,
MachineFunction::const_iterator NextBlock = ++Water->getIterator();
if (NextBlock == MF->end()) {
NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
- NextBlockAlignment = Align::None();
+ NextBlockAlignment = Align(1);
} else {
NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
NextBlockAlignment = NextBlock->getAlignment();
@@ -1656,7 +1655,7 @@ void MipsConstantIslands::prescanForConstants() {
Type *Int32Ty =
Type::getInt32Ty(MF->getFunction().getContext());
const Constant *C = ConstantInt::get(Int32Ty, V);
- unsigned index = MCP->getConstantPoolIndex(C, 4);
+ unsigned index = MCP->getConstantPoolIndex(C, Align(4));
I->getOperand(2).ChangeToImmediate(index);
LLVM_DEBUG(dbgs() << "constant island constant " << *I << "\n");
I->setDesc(TII->get(Mips::LwRxPcTcp16));
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDSPInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
index 6f062d0f3c25..abb6aea50710 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -21,11 +21,11 @@ def Dsp2MicroMips : InstrMapping {
}
def HasDSP : Predicate<"Subtarget->hasDSP()">,
- AssemblerPredicate<"FeatureDSP">;
+ AssemblerPredicate<(all_of FeatureDSP)>;
def HasDSPR2 : Predicate<"Subtarget->hasDSPR2()">,
- AssemblerPredicate<"FeatureDSPR2">;
+ AssemblerPredicate<(all_of FeatureDSPR2)>;
def HasDSPR3 : Predicate<"Subtarget->hasDSPR3()">,
- AssemblerPredicate<"FeatureDSPR3">;
+ AssemblerPredicate<(all_of FeatureDSPR3)>;
class ISA_DSPR2 {
list<Predicate> ASEPredicate = [HasDSPR2];
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 84ff674569cd..155d19ba6959 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -96,7 +96,7 @@ static cl::opt<CompactBranchPolicy> MipsCompactBranchPolicy(
cl::values(clEnumValN(CB_Never, "never",
"Do not use compact branches if possible."),
clEnumValN(CB_Optimal, "optimal",
- "Use compact branches where appropiate (default)."),
+ "Use compact branches where appropriate (default)."),
clEnumValN(CB_Always, "always",
"Always use compact branches if possible.")));
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp
index 80f288ac500c..8a847eaf6618 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -420,7 +420,7 @@ unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) {
if (IsThreadLocal)
return 0;
emitInst(Mips::LW, DestReg)
- .addReg(MFI->getGlobalBaseReg())
+ .addReg(MFI->getGlobalBaseReg(*MF))
.addGlobalAddress(GV, 0, MipsII::MO_GOT);
if ((GV->hasInternalLinkage() ||
(GV->hasLocalLinkage() && !isa<Function>(GV)))) {
@@ -437,7 +437,7 @@ unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) {
const TargetRegisterClass *RC = &Mips::GPR32RegClass;
unsigned DestReg = createResultReg(RC);
emitInst(Mips::LW, DestReg)
- .addReg(MFI->getGlobalBaseReg())
+ .addReg(MFI->getGlobalBaseReg(*MF))
.addSym(Sym, MipsII::MO_GOT);
return DestReg;
}
@@ -795,12 +795,11 @@ bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
}
if (Addr.isFIBase()) {
unsigned FI = Addr.getFI();
- unsigned Align = 4;
int64_t Offset = Addr.getOffset();
MachineFrameInfo &MFI = MF->getFrameInfo();
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI), Align);
+ MFI.getObjectSize(FI), Align(4));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addFrameIndex(FI)
.addImm(Offset)
@@ -846,12 +845,11 @@ bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
}
if (Addr.isFIBase()) {
unsigned FI = Addr.getFI();
- unsigned Align = 4;
int64_t Offset = Addr.getOffset();
MachineFrameInfo &MFI = MF->getFrameInfo();
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
- MFI.getObjectSize(FI), Align);
+ MFI.getObjectSize(FI), Align(4));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg)
.addFrameIndex(FI)
@@ -1263,7 +1261,7 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
Addr.setReg(Mips::SP);
Addr.setOffset(VA.getLocMemOffset() + BEAlign);
- unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
+ Align Alignment = DL.getABITypeAlign(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 8c36bcd5c8f2..d88696525e9e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -65,7 +65,7 @@ bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
/// getGlobalBaseReg - Output the instructions required to put the
/// GOT address into a register.
SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
- Register GlobalBaseReg = MF->getInfo<MipsFunctionInfo>()->getGlobalBaseReg();
+ Register GlobalBaseReg = MF->getInfo<MipsFunctionInfo>()->getGlobalBaseReg(*MF);
return CurDAG->getRegister(GlobalBaseReg, getTargetLowering()->getPointerTy(
CurDAG->getDataLayout()))
.getNode();
@@ -253,7 +253,7 @@ bool MipsDAGToDAGISel::selectVecAddAsVecSubIfProfitable(SDNode *Node) {
SDLoc DL(Node);
SDValue NegC = CurDAG->FoldConstantArithmetic(
- ISD::SUB, DL, VT, CurDAG->getConstant(0, DL, VT).getNode(), C.getNode());
+ ISD::SUB, DL, VT, {CurDAG->getConstant(0, DL, VT), C});
assert(NegC && "Constant-folding failed!");
SDValue NewNode = CurDAG->getNode(ISD::SUB, DL, VT, X, NegC);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 46b1f35a6fc7..2da35020006e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -142,8 +142,9 @@ unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
}
SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
- MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
- return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *FI = MF.getInfo<MipsFunctionInfo>();
+ return DAG.getRegister(FI->getGlobalBaseReg(MF), Ty);
}
SDValue MipsTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
@@ -173,7 +174,7 @@ SDValue MipsTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
SDValue MipsTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
- return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
N->getOffset(), Flag);
}
@@ -1451,6 +1452,14 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case Mips::PseudoD_SELECT_I:
case Mips::PseudoD_SELECT_I64:
return emitPseudoD_SELECT(MI, BB);
+ case Mips::LDR_W:
+ return emitLDR_W(MI, BB);
+ case Mips::LDR_D:
+ return emitLDR_D(MI, BB);
+ case Mips::STR_W:
+ return emitSTR_W(MI, BB);
+ case Mips::STR_D:
+ return emitSTR_D(MI, BB);
}
}
@@ -2900,8 +2909,8 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
// argument which is not f32 or f64.
bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1 ||
State.getFirstUnallocated(F32Regs) != ValNo;
- unsigned OrigAlign = ArgFlags.getOrigAlign();
- bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
+ Align OrigAlign = ArgFlags.getNonZeroOrigAlign();
+ bool isI64 = (ValVT == MVT::i32 && OrigAlign == Align(8));
bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo);
// The MIPS vector ABI for floats passes them in a pair of registers
@@ -3201,7 +3210,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// caller side but removing it breaks the frame size calculation.
unsigned ReservedArgArea =
MemcpyInByVal ? 0 : ABI.GetCalleeAllocdArgSizeInBytes(CallConv);
- CCInfo.AllocateStack(ReservedArgArea, 1);
+ CCInfo.AllocateStack(ReservedArgArea, Align(1));
CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(),
ES ? ES->getSymbol() : nullptr);
@@ -3209,6 +3218,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
+ // Call site info for function parameters tracking.
+ MachineFunction::CallSiteInfo CSInfo;
+
// Check if it's really possible to do a tail call. Restrict it to functions
// that are part of this compilation unit.
bool InternalLinkage = false;
@@ -3223,7 +3235,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
G->getGlobal()->hasProtectedVisibility());
}
}
- if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
+ if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
@@ -3335,6 +3347,17 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// RegsToPass vector
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+
+ // If the parameter is passed through reg $D, which splits into
+ // two physical registers, avoid creating call site info.
+ if (Mips::AFGR64RegClass.contains(VA.getLocReg()))
+ continue;
+
+ // Collect CSInfo about which register passes which parameter.
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (Options.SupportsDebugEntryValues)
+ CSInfo.emplace_back(VA.getLocReg(), i);
+
continue;
}
@@ -3398,11 +3421,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else if (Subtarget.useXGOT()) {
Callee = getAddrGlobalLargeGOT(G, DL, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16, Chain,
- FuncInfo->callPtrInfo(Val));
+ FuncInfo->callPtrInfo(MF, Val));
IsCallReloc = true;
} else {
Callee = getAddrGlobal(G, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
- FuncInfo->callPtrInfo(Val));
+ FuncInfo->callPtrInfo(MF, Val));
IsCallReloc = true;
}
} else
@@ -3420,11 +3443,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else if (Subtarget.useXGOT()) {
Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16, Chain,
- FuncInfo->callPtrInfo(Sym));
+ FuncInfo->callPtrInfo(MF, Sym));
IsCallReloc = true;
} else { // PIC
Callee = getAddrGlobal(S, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
- FuncInfo->callPtrInfo(Sym));
+ FuncInfo->callPtrInfo(MF, Sym));
IsCallReloc = true;
}
@@ -3439,12 +3462,16 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (IsTailCall) {
MF.getFrameInfo().setHasTailCall();
- return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, Ops);
+ SDValue Ret = DAG.getNode(MipsISD::TailCall, DL, MVT::Other, Ops);
+ DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
+ return Ret;
}
Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, Ops);
SDValue InFlag = Chain.getValue(1);
+ DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
+
// Create the CALLSEQ_END node in the case of where it is not a call to
// memcpy.
if (!(MemcpyInByVal)) {
@@ -3605,7 +3632,7 @@ SDValue MipsTargetLowering::LowerFormalArguments(
SmallVector<CCValAssign, 16> ArgLocs;
MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
- CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
+ CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), Align(1));
const Function &Func = DAG.getMachineFunction().getFunction();
Function::const_arg_iterator FuncArg = Func.arg_begin();
@@ -3940,7 +3967,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
break;
case 'f': // FPU or MSA register
if (Subtarget.hasMSA() && type->isVectorTy() &&
- cast<VectorType>(type)->getBitWidth() == 128)
+ type->getPrimitiveSizeInBits().getFixedSize() == 128)
weight = CW_Register;
else if (type->isFloatTy())
weight = CW_Register;
@@ -4269,9 +4296,7 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
}
EVT MipsTargetLowering::getOptimalMemOpType(
- uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes) const {
if (Subtarget.hasMips64())
return MVT::i64;
@@ -4363,7 +4388,8 @@ void MipsTargetLowering::passByValArg(
unsigned ByValSizeInBytes = Flags.getByValSize();
unsigned OffsetInBytes = 0; // From beginning of struct
unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
- unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes);
+ Align Alignment =
+ std::min(Flags.getNonZeroByValAlign(), Align(RegSizeInBytes));
EVT PtrTy = getPointerTy(DAG.getDataLayout()),
RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
unsigned NumRegs = LastReg - FirstReg;
@@ -4378,7 +4404,7 @@ void MipsTargetLowering::passByValArg(
SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
DAG.getConstant(OffsetInBytes, DL, PtrTy));
SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr,
- MachinePointerInfo(), Alignment);
+ MachinePointerInfo(), Alignment.value());
MemOpChains.push_back(LoadVal.getValue(1));
unsigned ArgReg = ArgRegs[FirstReg + I];
RegsToPass.push_back(std::make_pair(ArgReg, LoadVal));
@@ -4405,7 +4431,7 @@ void MipsTargetLowering::passByValArg(
PtrTy));
SDValue LoadVal = DAG.getExtLoad(
ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(),
- MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment);
+ MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment.value());
MemOpChains.push_back(LoadVal.getValue(1));
// Shift the loaded value.
@@ -4426,7 +4452,7 @@ void MipsTargetLowering::passByValArg(
OffsetInBytes += LoadSizeInBytes;
TotalBytesLoaded += LoadSizeInBytes;
- Alignment = std::min(Alignment, LoadSizeInBytes);
+ Alignment = std::min(Alignment, Align(LoadSizeInBytes));
}
unsigned ArgReg = ArgRegs[FirstReg + I];
@@ -4441,11 +4467,10 @@ void MipsTargetLowering::passByValArg(
DAG.getConstant(OffsetInBytes, DL, PtrTy));
SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr,
DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
- Chain = DAG.getMemcpy(Chain, DL, Dst, Src,
- DAG.getConstant(MemCpySize, DL, PtrTy),
- Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false,
- /*isTailCall=*/false,
- MachinePointerInfo(), MachinePointerInfo());
+ Chain = DAG.getMemcpy(
+ Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, DL, PtrTy),
+ Align(Alignment), /*isVolatile=*/false, /*AlwaysInline=*/false,
+ /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo());
MemOpChains.push_back(Chain);
}
@@ -4497,12 +4522,12 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
}
void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
- unsigned Align) const {
+ Align Alignment) const {
const TargetFrameLowering *TFL = Subtarget.getFrameLowering();
assert(Size && "Byval argument's size shouldn't be 0.");
- Align = std::min(Align, TFL->getStackAlignment());
+ Alignment = std::min(Alignment, TFL->getStackAlign());
unsigned FirstReg = 0;
unsigned NumRegs = 0;
@@ -4516,17 +4541,17 @@ void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
// We used to check the size as well but we can't do that anymore since
// CCState::HandleByVal() rounds up the size after calling this function.
- assert(!(Align % RegSizeInBytes) &&
- "Byval argument's alignment should be a multiple of"
- "RegSizeInBytes.");
+ assert(
+ Alignment >= Align(RegSizeInBytes) &&
+ "Byval argument's alignment should be a multiple of RegSizeInBytes.");
FirstReg = State->getFirstUnallocated(IntArgRegs);
- // If Align > RegSizeInBytes, the first arg register must be even.
+ // If Alignment > RegSizeInBytes, the first arg register must be even.
// FIXME: This condition happens to do the right thing but it's not the
// right way to test it. We want to check that the stack frame offset
// of the register is aligned.
- if ((Align > RegSizeInBytes) && (FirstReg % 2)) {
+ if ((Alignment > RegSizeInBytes) && (FirstReg % 2)) {
State->AllocateReg(IntArgRegs[FirstReg], ShadowRegs[FirstReg]);
++FirstReg;
}
@@ -4717,3 +4742,274 @@ MipsTargetLowering::getRegisterByName(const char *RegName, LLT VT,
}
report_fatal_error("Invalid register name global variable");
}
+
+MachineBasicBlock *MipsTargetLowering::emitLDR_W(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const bool IsLittle = Subtarget.isLittle();
+ DebugLoc DL = MI.getDebugLoc();
+
+ Register Dest = MI.getOperand(0).getReg();
+ Register Address = MI.getOperand(1).getReg();
+ unsigned Imm = MI.getOperand(2).getImm();
+
+ MachineBasicBlock::iterator I(MI);
+
+ if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) {
+ // Mips release 6 can load from adress that is not naturally-aligned.
+ Register Temp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ BuildMI(*BB, I, DL, TII->get(Mips::LW))
+ .addDef(Temp)
+ .addUse(Address)
+ .addImm(Imm);
+ BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Dest).addUse(Temp);
+ } else {
+ // Mips release 5 needs to use instructions that can load from an unaligned
+ // memory address.
+ Register LoadHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register LoadFull = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register Undef = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(Undef);
+ BuildMI(*BB, I, DL, TII->get(Mips::LWR))
+ .addDef(LoadHalf)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 0 : 3))
+ .addUse(Undef);
+ BuildMI(*BB, I, DL, TII->get(Mips::LWL))
+ .addDef(LoadFull)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 3 : 0))
+ .addUse(LoadHalf);
+ BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Dest).addUse(LoadFull);
+ }
+
+ MI.eraseFromParent();
+ return BB;
+}
+
+MachineBasicBlock *MipsTargetLowering::emitLDR_D(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const bool IsLittle = Subtarget.isLittle();
+ DebugLoc DL = MI.getDebugLoc();
+
+ Register Dest = MI.getOperand(0).getReg();
+ Register Address = MI.getOperand(1).getReg();
+ unsigned Imm = MI.getOperand(2).getImm();
+
+ MachineBasicBlock::iterator I(MI);
+
+ if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) {
+ // Mips release 6 can load from adress that is not naturally-aligned.
+ if (Subtarget.isGP64bit()) {
+ Register Temp = MRI.createVirtualRegister(&Mips::GPR64RegClass);
+ BuildMI(*BB, I, DL, TII->get(Mips::LD))
+ .addDef(Temp)
+ .addUse(Address)
+ .addImm(Imm);
+ BuildMI(*BB, I, DL, TII->get(Mips::FILL_D)).addDef(Dest).addUse(Temp);
+ } else {
+ Register Wtemp = MRI.createVirtualRegister(&Mips::MSA128WRegClass);
+ Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ BuildMI(*BB, I, DL, TII->get(Mips::LW))
+ .addDef(Lo)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 0 : 4));
+ BuildMI(*BB, I, DL, TII->get(Mips::LW))
+ .addDef(Hi)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 4 : 0));
+ BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Wtemp).addUse(Lo);
+ BuildMI(*BB, I, DL, TII->get(Mips::INSERT_W), Dest)
+ .addUse(Wtemp)
+ .addUse(Hi)
+ .addImm(1);
+ }
+ } else {
+ // Mips release 5 needs to use instructions that can load from an unaligned
+ // memory address.
+ Register LoHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register LoFull = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register LoUndef = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register HiHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register HiFull = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register HiUndef = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register Wtemp = MRI.createVirtualRegister(&Mips::MSA128WRegClass);
+ BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(LoUndef);
+ BuildMI(*BB, I, DL, TII->get(Mips::LWR))
+ .addDef(LoHalf)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 0 : 7))
+ .addUse(LoUndef);
+ BuildMI(*BB, I, DL, TII->get(Mips::LWL))
+ .addDef(LoFull)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 3 : 4))
+ .addUse(LoHalf);
+ BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(HiUndef);
+ BuildMI(*BB, I, DL, TII->get(Mips::LWR))
+ .addDef(HiHalf)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 4 : 3))
+ .addUse(HiUndef);
+ BuildMI(*BB, I, DL, TII->get(Mips::LWL))
+ .addDef(HiFull)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 7 : 0))
+ .addUse(HiHalf);
+ BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Wtemp).addUse(LoFull);
+ BuildMI(*BB, I, DL, TII->get(Mips::INSERT_W), Dest)
+ .addUse(Wtemp)
+ .addUse(HiFull)
+ .addImm(1);
+ }
+
+ MI.eraseFromParent();
+ return BB;
+}
+
+MachineBasicBlock *MipsTargetLowering::emitSTR_W(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const bool IsLittle = Subtarget.isLittle();
+ DebugLoc DL = MI.getDebugLoc();
+
+ Register StoreVal = MI.getOperand(0).getReg();
+ Register Address = MI.getOperand(1).getReg();
+ unsigned Imm = MI.getOperand(2).getImm();
+
+ MachineBasicBlock::iterator I(MI);
+
+ if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) {
+ // Mips release 6 can store to adress that is not naturally-aligned.
+ Register BitcastW = MRI.createVirtualRegister(&Mips::MSA128WRegClass);
+ Register Tmp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ BuildMI(*BB, I, DL, TII->get(Mips::COPY)).addDef(BitcastW).addUse(StoreVal);
+ BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+ .addDef(Tmp)
+ .addUse(BitcastW)
+ .addImm(0);
+ BuildMI(*BB, I, DL, TII->get(Mips::SW))
+ .addUse(Tmp)
+ .addUse(Address)
+ .addImm(Imm);
+ } else {
+ // Mips release 5 needs to use instructions that can store to an unaligned
+ // memory address.
+ Register Tmp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+ .addDef(Tmp)
+ .addUse(StoreVal)
+ .addImm(0);
+ BuildMI(*BB, I, DL, TII->get(Mips::SWR))
+ .addUse(Tmp)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 0 : 3));
+ BuildMI(*BB, I, DL, TII->get(Mips::SWL))
+ .addUse(Tmp)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 3 : 0));
+ }
+
+ MI.eraseFromParent();
+
+ return BB;
+}
+
+MachineBasicBlock *MipsTargetLowering::emitSTR_D(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const bool IsLittle = Subtarget.isLittle();
+ DebugLoc DL = MI.getDebugLoc();
+
+ Register StoreVal = MI.getOperand(0).getReg();
+ Register Address = MI.getOperand(1).getReg();
+ unsigned Imm = MI.getOperand(2).getImm();
+
+ MachineBasicBlock::iterator I(MI);
+
+ if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) {
+ // Mips release 6 can store to adress that is not naturally-aligned.
+ if (Subtarget.isGP64bit()) {
+ Register BitcastD = MRI.createVirtualRegister(&Mips::MSA128DRegClass);
+ Register Lo = MRI.createVirtualRegister(&Mips::GPR64RegClass);
+ BuildMI(*BB, I, DL, TII->get(Mips::COPY))
+ .addDef(BitcastD)
+ .addUse(StoreVal);
+ BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_D))
+ .addDef(Lo)
+ .addUse(BitcastD)
+ .addImm(0);
+ BuildMI(*BB, I, DL, TII->get(Mips::SD))
+ .addUse(Lo)
+ .addUse(Address)
+ .addImm(Imm);
+ } else {
+ Register BitcastW = MRI.createVirtualRegister(&Mips::MSA128WRegClass);
+ Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ BuildMI(*BB, I, DL, TII->get(Mips::COPY))
+ .addDef(BitcastW)
+ .addUse(StoreVal);
+ BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+ .addDef(Lo)
+ .addUse(BitcastW)
+ .addImm(0);
+ BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+ .addDef(Hi)
+ .addUse(BitcastW)
+ .addImm(1);
+ BuildMI(*BB, I, DL, TII->get(Mips::SW))
+ .addUse(Lo)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 0 : 4));
+ BuildMI(*BB, I, DL, TII->get(Mips::SW))
+ .addUse(Hi)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 4 : 0));
+ }
+ } else {
+ // Mips release 5 needs to use instructions that can store to an unaligned
+ // memory address.
+ Register Bitcast = MRI.createVirtualRegister(&Mips::MSA128WRegClass);
+ Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ BuildMI(*BB, I, DL, TII->get(Mips::COPY)).addDef(Bitcast).addUse(StoreVal);
+ BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+ .addDef(Lo)
+ .addUse(Bitcast)
+ .addImm(0);
+ BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+ .addDef(Hi)
+ .addUse(Bitcast)
+ .addImm(1);
+ BuildMI(*BB, I, DL, TII->get(Mips::SWR))
+ .addUse(Lo)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 0 : 3));
+ BuildMI(*BB, I, DL, TII->get(Mips::SWL))
+ .addUse(Lo)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 3 : 0));
+ BuildMI(*BB, I, DL, TII->get(Mips::SWR))
+ .addUse(Hi)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 4 : 7));
+ BuildMI(*BB, I, DL, TII->get(Mips::SWL))
+ .addUse(Hi)
+ .addUse(Address)
+ .addImm(Imm + (IsLittle ? 7 : 4));
+ }
+
+ MI.eraseFromParent();
+ return BB;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
index 92cbe1d54c5b..16b4d51d3ca6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -306,7 +306,7 @@ class TargetRegisterClass;
/// Return the correct alignment for the current calling convention.
Align getABIAlignmentForCallingConv(Type *ArgTy,
DataLayout DL) const override {
- const Align ABIAlign(DL.getABITypeAlignment(ArgTy));
+ const Align ABIAlign = DL.getABITypeAlign(ArgTy);
if (ArgTy->isVectorTy())
return std::min(ABIAlign, Align(8));
return ABIAlign;
@@ -346,21 +346,21 @@ class TargetRegisterClass;
void AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const override;
- void HandleByVal(CCState *, unsigned &, unsigned) const override;
+ void HandleByVal(CCState *, unsigned &, Align) const override;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
return ABI.IsN64() ? Mips::A0_64 : Mips::A0;
}
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
return ABI.IsN64() ? Mips::A1_64 : Mips::A1;
}
@@ -669,10 +669,7 @@ class TargetRegisterClass;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
- EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset,
- bool MemcpyStrSrc,
+ EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
/// isFPImmLegal - Returns true if the target can instruction select the
@@ -709,6 +706,10 @@ class TargetRegisterClass;
bool isFPCmp, unsigned Opc) const;
MachineBasicBlock *emitPseudoD_SELECT(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitLDR_W(MachineInstr &MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitLDR_D(MachineInstr &MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitSTR_W(MachineInstr &MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitSTR_D(MachineInstr &MI, MachineBasicBlock *BB) const;
};
/// Create MipsTargetLowering objects.
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td
index 79776998463f..5696df96e798 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -48,6 +48,7 @@ def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
[SDNPHasChain, SDNPOptInGlue]>;
def MipsTruncIntFP : SDNode<"MipsISD::TruncIntFP", SDT_MipsTruncIntFP>;
def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>;
+def : GINodeEquiv<G_MERGE_VALUES, MipsBuildPairF64>;
def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64",
SDT_MipsExtractElementF64>;
@@ -62,15 +63,17 @@ let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in
//===----------------------------------------------------------------------===//
def IsFP64bit : Predicate<"Subtarget->isFP64bit()">,
- AssemblerPredicate<"FeatureFP64Bit">;
+ AssemblerPredicate<(all_of FeatureFP64Bit)>;
def NotFP64bit : Predicate<"!Subtarget->isFP64bit()">,
- AssemblerPredicate<"!FeatureFP64Bit">;
+ AssemblerPredicate<(all_of (not FeatureFP64Bit))>;
def IsSingleFloat : Predicate<"Subtarget->isSingleFloat()">,
- AssemblerPredicate<"FeatureSingleFloat">;
+ AssemblerPredicate<(all_of FeatureSingleFloat)>;
def IsNotSingleFloat : Predicate<"!Subtarget->isSingleFloat()">,
- AssemblerPredicate<"!FeatureSingleFloat">;
+ AssemblerPredicate<(all_of (not FeatureSingleFloat))>;
def IsNotSoftFloat : Predicate<"!Subtarget->useSoftFloat()">,
- AssemblerPredicate<"!FeatureSoftFloat">;
+ AssemblerPredicate<(all_of (not FeatureSoftFloat))>;
+def HasMips3D : Predicate<"Subtarget->has3D()">,
+ AssemblerPredicate<(all_of FeatureMips3D)>;
//===----------------------------------------------------------------------===//
// Mips FGR size adjectives.
@@ -455,6 +458,12 @@ let DecoderNamespace = "MipsFP64" in {
def PLU_PS64 : ADDS_FT<"plu.ps", FGR64Opnd, II_CVT, 0>,
ADDS_FM<0x2D, 22>,
ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ def PUL_PS64 : ADDS_FT<"pul.ps", FGR64Opnd, II_CVT, 0>,
+ ADDS_FM<0x2E, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ def PUU_PS64 : ADDS_FT<"puu.ps", FGR64Opnd, II_CVT, 0>,
+ ADDS_FM<0x2F, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
def CVT_S_PU64 : ABSS_FT<"cvt.s.pu", FGR32Opnd, FGR64Opnd, II_CVT>,
ABSS_FM<0x20, 22>,
@@ -470,6 +479,21 @@ let DecoderNamespace = "MipsFP64" in {
}
let DecoderNamespace = "MipsFP64" in {
+ let AdditionalPredicates = [HasMips3D] in {
+ def ADDR_PS64 : ADDS_FT<"addr.ps", FGR64Opnd, II_ADDR_PS, 0>,
+ ADDS_FM<0x18, 22>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ def MULR_PS64 : ADDS_FT<"mulr.ps", FGR64Opnd, II_MULR_PS, 0>,
+ ADDS_FM<0x1a, 22>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ def CVT_PS_PW64 : ABSS_FT<"cvt.ps.pw", FGR64Opnd, FGR64Opnd, II_CVT>,
+ ABSS_FM<0x26, 20>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ def CVT_PW_PS64 : ABSS_FT<"cvt.pw.ps", FGR64Opnd, FGR64Opnd, II_CVT>,
+ ABSS_FM<0x24, 22>,
+ ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
+ }
+}
+
+let DecoderNamespace = "MipsFP64" in {
let AdditionalPredicates = [NotInMicroMips] in {
def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>,
ABSS_FM<0x20, 21>, INSN_MIPS3_32R2, FGR_64;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFormats.td
index 4624c1f2d04a..10529c7d9e19 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -169,39 +169,6 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
}
//===----------------------------------------------------------------------===//
-// Format I instruction class in Mips : <|opcode|rs|rt|immediate|>
-//===----------------------------------------------------------------------===//
-
-class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmI>
-{
- bits<5> rt;
- bits<5> rs;
- bits<16> imm16;
-
- let Opcode = op;
-
- let Inst{25-21} = rs;
- let Inst{20-16} = rt;
- let Inst{15-0} = imm16;
-}
-
-class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
- list<dag> pattern, InstrItinClass itin>:
- InstSE<outs, ins, asmstr, pattern, itin, FrmI>
-{
- bits<5> rs;
- bits<5> rt;
- bits<16> imm16;
-
- let Opcode = op;
-
- let Inst{25-21} = rs;
- let Inst{20-16} = rt;
- let Inst{15-0} = imm16;
-}
-
-//===----------------------------------------------------------------------===//
// Format J instruction class in Mips : <|opcode|address|>
//===----------------------------------------------------------------------===//
@@ -711,20 +678,6 @@ class EI_FM<bits<1> sc> : StdArch
// Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
//===----------------------------------------------------------------------===//
-class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
- InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFI>
-{
- bits<5> ft;
- bits<5> base;
- bits<16> imm16;
-
- let Opcode = op;
-
- let Inst{25-21} = base;
- let Inst{20-16} = ft;
- let Inst{15-0} = imm16;
-}
-
class ADDS_FM<bits<6> funct, bits<5> fmt> : StdArch {
bits<5> fd;
bits<5> fs;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 25bbe5990827..0c6080258a3a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Target/TargetMachine.h"
@@ -66,10 +67,10 @@ MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
MachineMemOperand::Flags Flags) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
- Flags, MFI.getObjectSize(FI), Align);
+ Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlign(FI));
}
//===----------------------------------------------------------------------===//
@@ -841,3 +842,56 @@ MipsInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
};
return makeArrayRef(Flags);
}
+
+Optional<ParamLoadedValue>
+MipsInstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const {
+ DIExpression *Expr =
+ DIExpression::get(MI.getMF()->getFunction().getContext(), {});
+
+ // TODO: Special MIPS instructions that need to be described separately.
+ if (auto RegImm = isAddImmediate(MI, Reg)) {
+ Register SrcReg = RegImm->Reg;
+ int64_t Offset = RegImm->Imm;
+ // When SrcReg is $zero, treat loaded value as immediate only.
+ // Ex. $a2 = ADDiu $zero, 10
+ if (SrcReg == Mips::ZERO || SrcReg == Mips::ZERO_64) {
+ return ParamLoadedValue(MI.getOperand(2), Expr);
+ }
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, Offset);
+ return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
+ } else if (auto DestSrc = isCopyInstr(MI)) {
+ const MachineFunction *MF = MI.getMF();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ Register DestReg = DestSrc->Destination->getReg();
+ // TODO: Handle cases where the Reg is sub- or super-register of the
+ // DestReg.
+ if (TRI->isSuperRegister(Reg, DestReg) || TRI->isSubRegister(Reg, DestReg))
+ return None;
+ }
+
+ return TargetInstrInfo::describeLoadedValue(MI, Reg);
+}
+
+Optional<RegImmPair> MipsInstrInfo::isAddImmediate(const MachineInstr &MI,
+ Register Reg) const {
+ // TODO: Handle cases where Reg is a super- or sub-register of the
+ // destination register.
+ const MachineOperand &Op0 = MI.getOperand(0);
+ if (!Op0.isReg() || Reg != Op0.getReg())
+ return None;
+
+ switch (MI.getOpcode()) {
+ case Mips::ADDiu:
+ case Mips::DADDiu: {
+ const MachineOperand &Dop = MI.getOperand(0);
+ const MachineOperand &Sop1 = MI.getOperand(1);
+ const MachineOperand &Sop2 = MI.getOperand(2);
+ // Value is sum of register and immediate. Immediate value could be
+ // global string address which is not supported.
+ if (Dop.isReg() && Sop1.isReg() && Sop2.isImm())
+ return RegImmPair{Sop1.getReg(), Sop2.getImm()};
+ // TODO: Handle case where Sop1 is a frame-index.
+ }
+ }
+ return None;
+} \ No newline at end of file
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.h
index 092a960b4ba7..c96ed202df30 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -106,12 +106,16 @@ public:
virtual unsigned getOppositeBranchOpc(unsigned Opc) const = 0;
+ virtual bool isBranchWithImm(unsigned Opc) const {
+ return false;
+ }
+
/// Return the number of bytes of code the specified instruction may be.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override {
storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0);
@@ -119,7 +123,7 @@ public:
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override {
loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0);
@@ -127,14 +131,14 @@ public:
virtual void storeRegToStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
int64_t Offset) const = 0;
virtual void loadRegFromStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
int64_t Offset) const = 0;
@@ -161,6 +165,12 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const override;
+ Optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
+ Register Reg) const override;
+
+ Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const override;
+
protected:
bool isZeroImm(const MachineOperand &op) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
index d9a3ff802708..a3b928870f3f 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -156,69 +156,69 @@ def MipsSDR : SDNode<"MipsISD::SDR", SDTStore,
// Mips Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
def HasMips2 : Predicate<"Subtarget->hasMips2()">,
- AssemblerPredicate<"FeatureMips2">;
+ AssemblerPredicate<(all_of FeatureMips2)>;
def HasMips3_32 : Predicate<"Subtarget->hasMips3_32()">,
- AssemblerPredicate<"FeatureMips3_32">;
+ AssemblerPredicate<(all_of FeatureMips3_32)>;
def HasMips3_32r2 : Predicate<"Subtarget->hasMips3_32r2()">,
- AssemblerPredicate<"FeatureMips3_32r2">;
+ AssemblerPredicate<(all_of FeatureMips3_32r2)>;
def HasMips3 : Predicate<"Subtarget->hasMips3()">,
- AssemblerPredicate<"FeatureMips3">;
+ AssemblerPredicate<(all_of FeatureMips3)>;
def NotMips3 : Predicate<"!Subtarget->hasMips3()">,
- AssemblerPredicate<"!FeatureMips3">;
+ AssemblerPredicate<(all_of (not FeatureMips3))>;
def HasMips4_32 : Predicate<"Subtarget->hasMips4_32()">,
- AssemblerPredicate<"FeatureMips4_32">;
+ AssemblerPredicate<(all_of FeatureMips4_32)>;
def NotMips4_32 : Predicate<"!Subtarget->hasMips4_32()">,
- AssemblerPredicate<"!FeatureMips4_32">;
+ AssemblerPredicate<(all_of (not FeatureMips4_32))>;
def HasMips4_32r2 : Predicate<"Subtarget->hasMips4_32r2()">,
- AssemblerPredicate<"FeatureMips4_32r2">;
+ AssemblerPredicate<(all_of FeatureMips4_32r2)>;
def HasMips5_32r2 : Predicate<"Subtarget->hasMips5_32r2()">,
- AssemblerPredicate<"FeatureMips5_32r2">;
+ AssemblerPredicate<(all_of FeatureMips5_32r2)>;
def HasMips32 : Predicate<"Subtarget->hasMips32()">,
- AssemblerPredicate<"FeatureMips32">;
+ AssemblerPredicate<(all_of FeatureMips32)>;
def HasMips32r2 : Predicate<"Subtarget->hasMips32r2()">,
- AssemblerPredicate<"FeatureMips32r2">;
+ AssemblerPredicate<(all_of FeatureMips32r2)>;
def HasMips32r5 : Predicate<"Subtarget->hasMips32r5()">,
- AssemblerPredicate<"FeatureMips32r5">;
+ AssemblerPredicate<(all_of FeatureMips32r5)>;
def HasMips32r6 : Predicate<"Subtarget->hasMips32r6()">,
- AssemblerPredicate<"FeatureMips32r6">;
+ AssemblerPredicate<(all_of FeatureMips32r6)>;
def NotMips32r6 : Predicate<"!Subtarget->hasMips32r6()">,
- AssemblerPredicate<"!FeatureMips32r6">;
+ AssemblerPredicate<(all_of (not FeatureMips32r6))>;
def IsGP64bit : Predicate<"Subtarget->isGP64bit()">,
- AssemblerPredicate<"FeatureGP64Bit">;
+ AssemblerPredicate<(all_of FeatureGP64Bit)>;
def IsGP32bit : Predicate<"!Subtarget->isGP64bit()">,
- AssemblerPredicate<"!FeatureGP64Bit">;
+ AssemblerPredicate<(all_of (not FeatureGP64Bit))>;
def IsPTR64bit : Predicate<"Subtarget->isABI_N64()">,
- AssemblerPredicate<"FeaturePTR64Bit">;
+ AssemblerPredicate<(all_of FeaturePTR64Bit)>;
def IsPTR32bit : Predicate<"!Subtarget->isABI_N64()">,
- AssemblerPredicate<"!FeaturePTR64Bit">;
+ AssemblerPredicate<(all_of (not FeaturePTR64Bit))>;
def HasMips64 : Predicate<"Subtarget->hasMips64()">,
- AssemblerPredicate<"FeatureMips64">;
+ AssemblerPredicate<(all_of FeatureMips64)>;
def NotMips64 : Predicate<"!Subtarget->hasMips64()">,
- AssemblerPredicate<"!FeatureMips64">;
+ AssemblerPredicate<(all_of (not FeatureMips64))>;
def HasMips64r2 : Predicate<"Subtarget->hasMips64r2()">,
- AssemblerPredicate<"FeatureMips64r2">;
+ AssemblerPredicate<(all_of FeatureMips64r2)>;
def HasMips64r5 : Predicate<"Subtarget->hasMips64r5()">,
- AssemblerPredicate<"FeatureMips64r5">;
+ AssemblerPredicate<(all_of FeatureMips64r5)>;
def HasMips64r6 : Predicate<"Subtarget->hasMips64r6()">,
- AssemblerPredicate<"FeatureMips64r6">;
+ AssemblerPredicate<(all_of FeatureMips64r6)>;
def NotMips64r6 : Predicate<"!Subtarget->hasMips64r6()">,
- AssemblerPredicate<"!FeatureMips64r6">;
+ AssemblerPredicate<(all_of (not FeatureMips64r6))>;
def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">,
- AssemblerPredicate<"FeatureMips16">;
+ AssemblerPredicate<(all_of FeatureMips16)>;
def NotInMips16Mode : Predicate<"!Subtarget->inMips16Mode()">,
- AssemblerPredicate<"!FeatureMips16">;
+ AssemblerPredicate<(all_of (not FeatureMips16))>;
def HasCnMips : Predicate<"Subtarget->hasCnMips()">,
- AssemblerPredicate<"FeatureCnMips">;
+ AssemblerPredicate<(all_of FeatureCnMips)>;
def NotCnMips : Predicate<"!Subtarget->hasCnMips()">,
- AssemblerPredicate<"!FeatureCnMips">;
+ AssemblerPredicate<(all_of (not FeatureCnMips))>;
def HasCnMipsP : Predicate<"Subtarget->hasCnMipsP()">,
- AssemblerPredicate<"FeatureCnMipsP">;
+ AssemblerPredicate<(all_of FeatureCnMipsP)>;
def NotCnMipsP : Predicate<"!Subtarget->hasCnMipsP()">,
- AssemblerPredicate<"!FeatureCnMipsP">;
+ AssemblerPredicate<(all_of (not FeatureCnMipsP))>;
def IsSym32 : Predicate<"Subtarget->hasSym32()">,
- AssemblerPredicate<"FeatureSym32">;
+ AssemblerPredicate<(all_of FeatureSym32)>;
def IsSym64 : Predicate<"!Subtarget->hasSym32()">,
- AssemblerPredicate<"!FeatureSym32">;
+ AssemblerPredicate<(all_of (not FeatureSym32))>;
def IsN64 : Predicate<"Subtarget->isABI_N64()">;
def IsNotN64 : Predicate<"!Subtarget->isABI_N64()">;
def RelocNotPIC : Predicate<"!TM.isPositionIndependent()">;
@@ -227,34 +227,34 @@ def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
def UseAbs : Predicate<"Subtarget->inAbs2008Mode() ||"
"TM.Options.NoNaNsFPMath">;
def HasStdEnc : Predicate<"Subtarget->hasStandardEncoding()">,
- AssemblerPredicate<"!FeatureMips16">;
+ AssemblerPredicate<(all_of (not FeatureMips16))>;
def NotDSP : Predicate<"!Subtarget->hasDSP()">;
def InMicroMips : Predicate<"Subtarget->inMicroMipsMode()">,
- AssemblerPredicate<"FeatureMicroMips">;
+ AssemblerPredicate<(all_of FeatureMicroMips)>;
def NotInMicroMips : Predicate<"!Subtarget->inMicroMipsMode()">,
- AssemblerPredicate<"!FeatureMicroMips">;
+ AssemblerPredicate<(all_of (not FeatureMicroMips))>;
def IsLE : Predicate<"Subtarget->isLittle()">;
def IsBE : Predicate<"!Subtarget->isLittle()">;
def IsNotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
-def UseTCCInDIV : AssemblerPredicate<"FeatureUseTCCInDIV">;
+def UseTCCInDIV : AssemblerPredicate<(all_of FeatureUseTCCInDIV)>;
def HasEVA : Predicate<"Subtarget->hasEVA()">,
- AssemblerPredicate<"FeatureEVA">;
+ AssemblerPredicate<(all_of FeatureEVA)>;
def HasMSA : Predicate<"Subtarget->hasMSA()">,
- AssemblerPredicate<"FeatureMSA">;
+ AssemblerPredicate<(all_of FeatureMSA)>;
def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">,
- AssemblerPredicate<"!FeatureMadd4">;
+ AssemblerPredicate<(all_of (not FeatureMadd4))>;
def HasMT : Predicate<"Subtarget->hasMT()">,
- AssemblerPredicate<"FeatureMT">;
+ AssemblerPredicate<(all_of FeatureMT)>;
def UseIndirectJumpsHazard : Predicate<"Subtarget->useIndirectJumpsHazard()">,
- AssemblerPredicate<"FeatureUseIndirectJumpsHazard">;
+ AssemblerPredicate<(all_of FeatureUseIndirectJumpsHazard)>;
def NoIndirectJumpGuards : Predicate<"!Subtarget->useIndirectJumpsHazard()">,
- AssemblerPredicate<"!FeatureUseIndirectJumpsHazard">;
+ AssemblerPredicate<(all_of (not FeatureUseIndirectJumpsHazard))>;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
- AssemblerPredicate<"FeatureCRC">;
+ AssemblerPredicate<(all_of FeatureCRC)>;
def HasVirt : Predicate<"Subtarget->hasVirt()">,
- AssemblerPredicate<"FeatureVirt">;
+ AssemblerPredicate<(all_of FeatureVirt)>;
def HasGINV : Predicate<"Subtarget->hasGINV()">,
- AssemblerPredicate<"FeatureGINV">;
+ AssemblerPredicate<(all_of FeatureGINV)>;
// TODO: Add support for FPOpFusion::Standard
def AllowFPOpFusion : Predicate<"TM.Options.AllowFPOpFusion =="
" FPOpFusion::Fast">;
@@ -498,7 +498,7 @@ class MADD4 {
list<Predicate> AdditionalPredicates = [HasMadd4];
}
-// Classses used for separating expansions that differ based on the ABI in
+// Classes used for separating expansions that differ based on the ABI in
// use.
class ABI_N64 {
list<Predicate> AdditionalPredicates = [IsN64];
@@ -1286,7 +1286,7 @@ def LUiORiPred : PatLeaf<(imm), [{
return isInt<32>(SVal) && (SVal & 0xffff);
}]>;
-// Mips Address Mode! SDNode frameindex could possibily be a match
+// Mips Address Mode! SDNode frameindex could possibly be a match
// since load and store instructions from stack used it.
def addr :
ComplexPattern<iPTR, 2, "selectIntAddr", [frameindex]>;
@@ -1871,10 +1871,11 @@ class MTC3OP<string asmstr, RegisterOperand RO, RegisterOperand RD,
class TrapBase<Instruction RealInst>
: PseudoSE<(outs), (ins), [(trap)], II_TRAP>,
PseudoInstExpansion<(RealInst 0, 0)> {
- let isBarrier = 1;
- let isTerminator = 1;
+ let mayStore = 0;
+ let mayLoad = 0;
+ let hasSideEffects = 1;
+ let isTrap = 1;
let isCodeGenOnly = 1;
- let isCTI = 1;
}
//===----------------------------------------------------------------------===//
@@ -2588,6 +2589,22 @@ def : MipsInstAlias<"seq $rd, $imm",
(SEQIMacro GPR32Opnd:$rd, GPR32Opnd:$rd, simm32:$imm), 0>,
NOT_ASE_CNMIPS;
+def SNEMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "sne $rd, $rs, $rt">, NOT_ASE_CNMIPS;
+
+def : MipsInstAlias<"sne $rd, $rs",
+ (SNEMacro GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>,
+ NOT_ASE_CNMIPS;
+
+def SNEIMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, simm32_relaxed:$imm),
+ "sne $rd, $rs, $imm">, NOT_ASE_CNMIPS;
+
+def : MipsInstAlias<"sne $rd, $imm",
+ (SNEIMacro GPR32Opnd:$rd, GPR32Opnd:$rd, simm32:$imm), 0>,
+ NOT_ASE_CNMIPS;
+
def MULImmMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rd, GPR32Opnd:$rs,
simm32_relaxed:$imm),
"mul\t$rd, $rs, $imm">,
@@ -2735,6 +2752,34 @@ let AdditionalPredicates = [NotInMicroMips] in {
uimm32_coerced:$imm), 0>,
GPR_32;
+ def SLE : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "sle\t$rd, $rs, $rt">, ISA_MIPS1;
+ def : MipsInstAlias<"sle $rs, $rt",
+ (SLE GPR32Opnd:$rs, GPR32Opnd:$rs, GPR32Opnd:$rt), 0>,
+ ISA_MIPS1;
+ def SLEImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, simm32:$imm),
+ "sle\t$rd, $rs, $imm">, GPR_32;
+ def : MipsInstAlias<"sle $rs, $imm", (SLEImm GPR32Opnd:$rs,
+ GPR32Opnd:$rs,
+ simm32:$imm), 0>,
+ GPR_32;
+
+ def SLEU : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "sleu\t$rd, $rs, $rt">, ISA_MIPS1;
+ def : MipsInstAlias<"sleu $rs, $rt",
+ (SLEU GPR32Opnd:$rs, GPR32Opnd:$rs, GPR32Opnd:$rt), 0>,
+ ISA_MIPS1;
+ def SLEUImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, uimm32_coerced:$imm),
+ "sleu\t$rd, $rs, $imm">, GPR_32;
+ def : MipsInstAlias<"sleu $rs, $imm", (SLEUImm GPR32Opnd:$rs,
+ GPR32Opnd:$rs,
+ uimm32_coerced:$imm), 0>,
+ GPR_32;
+
def : MipsInstAlias<
"not $rt, $rs",
(NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>, ISA_MIPS1;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index 2f4c9d74262e..256fb74c1d6c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -49,6 +49,12 @@ private:
getRegClassForTypeOnBank(Register Reg, MachineRegisterInfo &MRI) const;
unsigned selectLoadStoreOpCode(MachineInstr &I,
MachineRegisterInfo &MRI) const;
+ bool buildUnalignedStore(MachineInstr &I, unsigned Opc,
+ MachineOperand &BaseAddr, unsigned Offset,
+ MachineMemOperand *MMO) const;
+ bool buildUnalignedLoad(MachineInstr &I, unsigned Opc, Register Dest,
+ MachineOperand &BaseAddr, unsigned Offset,
+ Register TiedDest, MachineMemOperand *MMO) const;
const MipsTargetMachine &TM;
const MipsSubtarget &STI;
@@ -248,6 +254,35 @@ MipsInstructionSelector::selectLoadStoreOpCode(MachineInstr &I,
return Opc;
}
+bool MipsInstructionSelector::buildUnalignedStore(
+ MachineInstr &I, unsigned Opc, MachineOperand &BaseAddr, unsigned Offset,
+ MachineMemOperand *MMO) const {
+ MachineInstr *NewInst =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
+ .add(I.getOperand(0))
+ .add(BaseAddr)
+ .addImm(Offset)
+ .addMemOperand(MMO);
+ if (!constrainSelectedInstRegOperands(*NewInst, TII, TRI, RBI))
+ return false;
+ return true;
+}
+
+bool MipsInstructionSelector::buildUnalignedLoad(
+ MachineInstr &I, unsigned Opc, Register Dest, MachineOperand &BaseAddr,
+ unsigned Offset, Register TiedDest, MachineMemOperand *MMO) const {
+ MachineInstr *NewInst =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
+ .addDef(Dest)
+ .add(BaseAddr)
+ .addImm(Offset)
+ .addUse(TiedDest)
+ .addMemOperand(*I.memoperands_begin());
+ if (!constrainSelectedInstRegOperands(*NewInst, TII, TRI, RBI))
+ return false;
+ return true;
+}
+
bool MipsInstructionSelector::select(MachineInstr &I) {
MachineBasicBlock &MBB = *I.getParent();
@@ -358,7 +393,7 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
.addUse(DestAddress)
.addJumpTableIndex(I.getOperand(1).getIndex(), MipsII::MO_ABS_LO)
.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo(), MachineMemOperand::MOLoad, 4, 4));
+ MachinePointerInfo(), MachineMemOperand::MOLoad, 4, Align(4)));
if (!constrainSelectedInstRegOperands(*LW, TII, TRI, RBI))
return false;
@@ -369,7 +404,7 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
.addDef(Dest)
.addUse(DestTmp)
.addUse(MF.getInfo<MipsFunctionInfo>()
- ->getGlobalBaseRegForGlobalISel());
+ ->getGlobalBaseRegForGlobalISel(MF));
if (!constrainSelectedInstRegOperands(*ADDu, TII, TRI, RBI))
return false;
}
@@ -404,10 +439,7 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
case G_LOAD:
case G_ZEXTLOAD:
case G_SEXTLOAD: {
- const unsigned NewOpc = selectLoadStoreOpCode(I, MRI);
- if (NewOpc == I.getOpcode())
- return false;
-
+ auto MMO = *I.memoperands_begin();
MachineOperand BaseAddr = I.getOperand(1);
int64_t SignedOffset = 0;
// Try to fold load/store + G_PTR_ADD + G_CONSTANT
@@ -429,11 +461,48 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
}
}
+ // Unaligned memory access
+ if (MMO->getAlign() < MMO->getSize() &&
+ !STI.systemSupportsUnalignedAccess()) {
+ if (MMO->getSize() != 4 || !isRegInGprb(I.getOperand(0).getReg(), MRI))
+ return false;
+
+ if (I.getOpcode() == G_STORE) {
+ if (!buildUnalignedStore(I, Mips::SWL, BaseAddr, SignedOffset + 3, MMO))
+ return false;
+ if (!buildUnalignedStore(I, Mips::SWR, BaseAddr, SignedOffset, MMO))
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
+
+ if (I.getOpcode() == G_LOAD) {
+ Register ImplDef = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::IMPLICIT_DEF))
+ .addDef(ImplDef);
+ Register Tmp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ if (!buildUnalignedLoad(I, Mips::LWL, Tmp, BaseAddr, SignedOffset + 3,
+ ImplDef, MMO))
+ return false;
+ if (!buildUnalignedLoad(I, Mips::LWR, I.getOperand(0).getReg(),
+ BaseAddr, SignedOffset, Tmp, MMO))
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
+
+ return false;
+ }
+
+ const unsigned NewOpc = selectLoadStoreOpCode(I, MRI);
+ if (NewOpc == I.getOpcode())
+ return false;
+
MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
.add(I.getOperand(0))
.add(BaseAddr)
.addImm(SignedOffset)
- .addMemOperand(*I.memoperands_begin());
+ .addMemOperand(MMO);
break;
}
case G_UDIV:
@@ -472,6 +541,36 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
.add(I.getOperand(3));
break;
}
+ case G_UNMERGE_VALUES: {
+ if (I.getNumOperands() != 3)
+ return false;
+ Register Src = I.getOperand(2).getReg();
+ Register Lo = I.getOperand(0).getReg();
+ Register Hi = I.getOperand(1).getReg();
+ if (!isRegInFprb(Src, MRI) ||
+ !(isRegInGprb(Lo, MRI) && isRegInGprb(Hi, MRI)))
+ return false;
+
+ unsigned Opcode =
+ STI.isFP64bit() ? Mips::ExtractElementF64_64 : Mips::ExtractElementF64;
+
+ MachineInstr *ExtractLo = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode))
+ .addDef(Lo)
+ .addUse(Src)
+ .addImm(0);
+ if (!constrainSelectedInstRegOperands(*ExtractLo, TII, TRI, RBI))
+ return false;
+
+ MachineInstr *ExtractHi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode))
+ .addDef(Hi)
+ .addUse(Src)
+ .addImm(1);
+ if (!constrainSelectedInstRegOperands(*ExtractHi, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
case G_IMPLICIT_DEF: {
Register Dst = I.getOperand(0).getReg();
MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::IMPLICIT_DEF))
@@ -570,7 +669,7 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
MachineInstr *LWGOT = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LW))
.addDef(I.getOperand(0).getReg())
.addReg(MF.getInfo<MipsFunctionInfo>()
- ->getGlobalBaseRegForGlobalISel())
+ ->getGlobalBaseRegForGlobalISel(MF))
.addGlobalAddress(GVal);
// Global Values that don't have local linkage are handled differently
// when they are part of call sequence. MipsCallLowering::lowerCall
@@ -582,7 +681,7 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
LWGOT->getOperand(2).setTargetFlags(MipsII::MO_GOT);
LWGOT->addMemOperand(
MF, MF.getMachineMemOperand(MachinePointerInfo::getGOT(MF),
- MachineMemOperand::MOLoad, 4, 4));
+ MachineMemOperand::MOLoad, 4, Align(4)));
if (!constrainSelectedInstRegOperands(*LWGOT, TII, TRI, RBI))
return false;
@@ -626,11 +725,11 @@ bool MipsInstructionSelector::select(MachineInstr &I) {
MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LW))
.addDef(I.getOperand(0).getReg())
.addReg(MF.getInfo<MipsFunctionInfo>()
- ->getGlobalBaseRegForGlobalISel())
+ ->getGlobalBaseRegForGlobalISel(MF))
.addJumpTableIndex(I.getOperand(1).getIndex(), MipsII::MO_GOT)
- .addMemOperand(
- MF.getMachineMemOperand(MachinePointerInfo::getGOT(MF),
- MachineMemOperand::MOLoad, 4, 4));
+ .addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF), MachineMemOperand::MOLoad, 4,
+ Align(4)));
} else {
MI =
BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index 9645aa24dc05..b489c8137769 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -21,22 +21,38 @@ struct TypesAndMemOps {
LLT ValTy;
LLT PtrTy;
unsigned MemSize;
- bool MustBeNaturallyAligned;
+ bool SystemSupportsUnalignedAccess;
};
+// Assumes power of 2 memory size. Subtargets that have only naturally-aligned
+// memory access need to perform additional legalization here.
+static bool isUnalignedMemmoryAccess(uint64_t MemSize, uint64_t AlignInBits) {
+ assert(isPowerOf2_64(MemSize) && "Expected power of 2 memory size");
+ assert(isPowerOf2_64(AlignInBits) && "Expected power of 2 align");
+ if (MemSize > AlignInBits)
+ return true;
+ return false;
+}
+
static bool
CheckTy0Ty1MemSizeAlign(const LegalityQuery &Query,
std::initializer_list<TypesAndMemOps> SupportedValues) {
+ unsigned QueryMemSize = Query.MMODescrs[0].SizeInBits;
+
+ // Non power of two memory access is never legal.
+ if (!isPowerOf2_64(QueryMemSize))
+ return false;
+
for (auto &Val : SupportedValues) {
if (Val.ValTy != Query.Types[0])
continue;
if (Val.PtrTy != Query.Types[1])
continue;
- if (Val.MemSize != Query.MMODescrs[0].SizeInBits)
- continue;
- if (Val.MustBeNaturallyAligned &&
- Query.MMODescrs[0].SizeInBits % Query.MMODescrs[0].AlignInBits != 0)
+ if (Val.MemSize != QueryMemSize)
continue;
+ if (!Val.SystemSupportsUnalignedAccess &&
+ isUnalignedMemmoryAccess(QueryMemSize, Query.MMODescrs[0].AlignInBits))
+ return false;
return true;
}
return false;
@@ -79,20 +95,55 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
.legalFor({s32})
.maxScalar(0, s32);
+ // MIPS32r6 does not have alignment restrictions for memory access.
+ // For MIPS32r5 and older memory access must be naturally-aligned i.e. aligned
+ // to at least a multiple of its own size. There is however a two instruction
+ // combination that performs 4 byte unaligned access (lwr/lwl and swl/swr)
+ // therefore 4 byte load and store are legal and will use NoAlignRequirements.
+ bool NoAlignRequirements = true;
+
getActionDefinitionsBuilder({G_LOAD, G_STORE})
.legalIf([=, &ST](const LegalityQuery &Query) {
- if (CheckTy0Ty1MemSizeAlign(Query, {{s32, p0, 8, ST.hasMips32r6()},
- {s32, p0, 16, ST.hasMips32r6()},
- {s32, p0, 32, ST.hasMips32r6()},
- {p0, p0, 32, ST.hasMips32r6()},
- {s64, p0, 64, ST.hasMips32r6()}}))
+ if (CheckTy0Ty1MemSizeAlign(
+ Query, {{s32, p0, 8, NoAlignRequirements},
+ {s32, p0, 16, ST.systemSupportsUnalignedAccess()},
+ {s32, p0, 32, NoAlignRequirements},
+ {p0, p0, 32, NoAlignRequirements},
+ {s64, p0, 64, ST.systemSupportsUnalignedAccess()}}))
+ return true;
+ if (ST.hasMSA() && CheckTy0Ty1MemSizeAlign(
+ Query, {{v16s8, p0, 128, NoAlignRequirements},
+ {v8s16, p0, 128, NoAlignRequirements},
+ {v4s32, p0, 128, NoAlignRequirements},
+ {v2s64, p0, 128, NoAlignRequirements}}))
+ return true;
+ return false;
+ })
+ // Custom lower scalar memory access, up to 8 bytes, for:
+ // - non-power-of-2 MemSizes
+ // - unaligned 2 or 8 byte MemSizes for MIPS32r5 and older
+ .customIf([=, &ST](const LegalityQuery &Query) {
+ if (!Query.Types[0].isScalar() || Query.Types[1] != p0 ||
+ Query.Types[0] == s1)
+ return false;
+
+ unsigned Size = Query.Types[0].getSizeInBits();
+ unsigned QueryMemSize = Query.MMODescrs[0].SizeInBits;
+ assert(QueryMemSize <= Size && "Scalar can't hold MemSize");
+
+ if (Size > 64 || QueryMemSize > 64)
+ return false;
+
+ if (!isPowerOf2_64(Query.MMODescrs[0].SizeInBits))
return true;
- if (ST.hasMSA() &&
- CheckTy0Ty1MemSizeAlign(Query, {{v16s8, p0, 128, false},
- {v8s16, p0, 128, false},
- {v4s32, p0, 128, false},
- {v2s64, p0, 128, false}}))
+
+ if (!ST.systemSupportsUnalignedAccess() &&
+ isUnalignedMemmoryAccess(QueryMemSize,
+ Query.MMODescrs[0].AlignInBits)) {
+ assert(QueryMemSize != 32 && "4 byte load and store are legal");
return true;
+ }
+
return false;
})
.minScalar(0, s32);
@@ -111,7 +162,7 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
{s32, p0, 16, 8}})
.clampScalar(0, s32, s32);
- getActionDefinitionsBuilder({G_ZEXT, G_SEXT})
+ getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
.legalIf([](const LegalityQuery &Query) { return false; })
.maxScalar(0, s32);
@@ -202,6 +253,25 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
.lowerFor({s32})
.maxScalar(0, s32);
+ getActionDefinitionsBuilder(G_CTLZ)
+ .legalFor({{s32, s32}})
+ .maxScalar(0, s32)
+ .maxScalar(1, s32);
+ getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+ .lowerFor({{s32, s32}});
+
+ getActionDefinitionsBuilder(G_CTTZ)
+ .lowerFor({{s32, s32}})
+ .maxScalar(0, s32)
+ .maxScalar(1, s32);
+ getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF)
+ .lowerFor({{s32, s32}, {s64, s64}});
+
+ getActionDefinitionsBuilder(G_CTPOP)
+ .lowerFor({{s32, s32}})
+ .clampScalar(0, s32, s32)
+ .clampScalar(1, s32, s32);
+
// FP instructions
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({s32, s64});
@@ -256,20 +326,98 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
verify(*ST.getInstrInfo());
}
-bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder,
- GISelChangeObserver &Observer) const {
-
+bool MipsLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
using namespace TargetOpcode;
- MIRBuilder.setInstr(MI);
- const MipsSubtarget &STI =
- static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
switch (MI.getOpcode()) {
+ case G_LOAD:
+ case G_STORE: {
+ unsigned MemSize = (**MI.memoperands_begin()).getSize();
+ Register Val = MI.getOperand(0).getReg();
+ unsigned Size = MRI.getType(Val).getSizeInBits();
+
+ MachineMemOperand *MMOBase = *MI.memoperands_begin();
+
+ assert(MemSize <= 8 && "MemSize is too large");
+ assert(Size <= 64 && "Scalar size is too large");
+
+ // Split MemSize into two, P2HalfMemSize is largest power of two smaller
+ // then MemSize. e.g. 8 = 4 + 4 , 6 = 4 + 2, 3 = 2 + 1.
+ unsigned P2HalfMemSize, RemMemSize;
+ if (isPowerOf2_64(MemSize)) {
+ P2HalfMemSize = RemMemSize = MemSize / 2;
+ } else {
+ P2HalfMemSize = 1 << Log2_32(MemSize);
+ RemMemSize = MemSize - P2HalfMemSize;
+ }
+
+ Register BaseAddr = MI.getOperand(1).getReg();
+ LLT PtrTy = MRI.getType(BaseAddr);
+ MachineFunction &MF = MIRBuilder.getMF();
+
+ auto P2HalfMemOp = MF.getMachineMemOperand(MMOBase, 0, P2HalfMemSize);
+ auto RemMemOp = MF.getMachineMemOperand(MMOBase, P2HalfMemSize, RemMemSize);
+
+ if (MI.getOpcode() == G_STORE) {
+ // Widen Val to s32 or s64 in order to create legal G_LSHR or G_UNMERGE.
+ if (Size < 32)
+ Val = MIRBuilder.buildAnyExt(s32, Val).getReg(0);
+ if (Size > 32 && Size < 64)
+ Val = MIRBuilder.buildAnyExt(s64, Val).getReg(0);
+
+ auto C_P2HalfMemSize = MIRBuilder.buildConstant(s32, P2HalfMemSize);
+ auto Addr = MIRBuilder.buildPtrAdd(PtrTy, BaseAddr, C_P2HalfMemSize);
+
+ if (MI.getOpcode() == G_STORE && MemSize <= 4) {
+ MIRBuilder.buildStore(Val, BaseAddr, *P2HalfMemOp);
+ auto C_P2Half_InBits = MIRBuilder.buildConstant(s32, P2HalfMemSize * 8);
+ auto Shift = MIRBuilder.buildLShr(s32, Val, C_P2Half_InBits);
+ MIRBuilder.buildStore(Shift, Addr, *RemMemOp);
+ } else {
+ auto Unmerge = MIRBuilder.buildUnmerge(s32, Val);
+ MIRBuilder.buildStore(Unmerge.getReg(0), BaseAddr, *P2HalfMemOp);
+ MIRBuilder.buildStore(Unmerge.getReg(1), Addr, *RemMemOp);
+ }
+ }
+
+ if (MI.getOpcode() == G_LOAD) {
+
+ if (MemSize <= 4) {
+ // This is anyextending load, use 4 byte lwr/lwl.
+ auto *Load4MMO = MF.getMachineMemOperand(MMOBase, 0, 4);
+
+ if (Size == 32)
+ MIRBuilder.buildLoad(Val, BaseAddr, *Load4MMO);
+ else {
+ auto Load = MIRBuilder.buildLoad(s32, BaseAddr, *Load4MMO);
+ MIRBuilder.buildTrunc(Val, Load.getReg(0));
+ }
+
+ } else {
+ auto C_P2HalfMemSize = MIRBuilder.buildConstant(s32, P2HalfMemSize);
+ auto Addr = MIRBuilder.buildPtrAdd(PtrTy, BaseAddr, C_P2HalfMemSize);
+
+ auto Load_P2Half = MIRBuilder.buildLoad(s32, BaseAddr, *P2HalfMemOp);
+ auto Load_Rem = MIRBuilder.buildLoad(s32, Addr, *RemMemOp);
+
+ if (Size == 64)
+ MIRBuilder.buildMerge(Val, {Load_P2Half, Load_Rem});
+ else {
+ auto Merge = MIRBuilder.buildMerge(s64, {Load_P2Half, Load_Rem});
+ MIRBuilder.buildTrunc(Val, Merge);
+ }
+ }
+ }
+ MI.eraseFromParent();
+ break;
+ }
case G_UITOFP: {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
@@ -288,11 +436,8 @@ bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI,
// Next, subtract 2^52 * 0x1.0000000000000 i.e. 0x10000000000000.0 from it.
// Done. Trunc double to float if needed.
- MachineInstrBuilder Bitcast = MIRBuilder.buildInstr(
- STI.isFP64bit() ? Mips::BuildPairF64_64 : Mips::BuildPairF64, {s64},
- {Src, MIRBuilder.buildConstant(s32, UINT32_C(0x43300000))});
- Bitcast.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
- *STI.getRegBankInfo());
+ auto C_HiMask = MIRBuilder.buildConstant(s32, UINT32_C(0x43300000));
+ auto Bitcast = MIRBuilder.buildMerge(s64, {Src, C_HiMask.getReg(0)});
MachineInstrBuilder TwoP52FP = MIRBuilder.buildFConstant(
s64, BitsToDouble(UINT64_C(0x4330000000000000)));
@@ -352,15 +497,15 @@ static bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
return true;
}
-bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
+bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
const MipsSubtarget &ST =
static_cast<const MipsSubtarget &>(MI.getMF()->getSubtarget());
const MipsInstrInfo &TII = *ST.getInstrInfo();
const MipsRegisterInfo &TRI = *ST.getRegisterInfo();
const RegisterBankInfo &RBI = *ST.getRegBankInfo();
- MIRBuilder.setInstr(MI);
switch (MI.getIntrinsicID()) {
case Intrinsic::memcpy:
@@ -377,14 +522,14 @@ bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
return constrainSelectedInstRegOperands(*Trap, TII, TRI, RBI);
}
case Intrinsic::vacopy: {
- Register Tmp = MRI.createGenericVirtualRegister(LLT::pointer(0, 32));
MachinePointerInfo MPO;
- MIRBuilder.buildLoad(Tmp, MI.getOperand(2),
- *MI.getMF()->getMachineMemOperand(
- MPO, MachineMemOperand::MOLoad, 4, 4));
+ auto Tmp =
+ MIRBuilder.buildLoad(LLT::pointer(0, 32), MI.getOperand(2),
+ *MI.getMF()->getMachineMemOperand(
+ MPO, MachineMemOperand::MOLoad, 4, Align(4)));
MIRBuilder.buildStore(Tmp, MI.getOperand(1),
*MI.getMF()->getMachineMemOperand(
- MPO, MachineMemOperand::MOStore, 4, 4));
+ MPO, MachineMemOperand::MOStore, 4, Align(4)));
MI.eraseFromParent();
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.h
index 9696c262b2db..05027b718a85 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsLegalizerInfo.h
@@ -25,12 +25,10 @@ class MipsLegalizerInfo : public LegalizerInfo {
public:
MipsLegalizerInfo(const MipsSubtarget &ST);
- bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder,
- GISelChangeObserver &Observer) const override;
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
- bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const override;
+ bool legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const override;
};
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index 0fef518c240e..3e32574596ca 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -2339,6 +2339,16 @@ class LDI_H_DESC : MSA_I10_LDI_DESC_BASE<"ldi.h", MSA128HOpnd>;
class LDI_W_DESC : MSA_I10_LDI_DESC_BASE<"ldi.w", MSA128WOpnd>;
class LDI_D_DESC : MSA_I10_LDI_DESC_BASE<"ldi.d", MSA128DOpnd>;
+class MSA_LOAD_PSEUDO_BASE<SDPatternOperator intrinsic, RegisterOperand RO> :
+ PseudoSE<(outs RO:$dst), (ins PtrRC:$ptr, GPR32:$imm),
+ [(set RO:$dst, (intrinsic iPTR:$ptr, GPR32:$imm))]> {
+ let hasNoSchedulingInfo = 1;
+ let usesCustomInserter = 1;
+}
+
+def LDR_D : MSA_LOAD_PSEUDO_BASE<int_mips_ldr_d, MSA128DOpnd>;
+def LDR_W : MSA_LOAD_PSEUDO_BASE<int_mips_ldr_w, MSA128WOpnd>;
+
class LSA_DESC_BASE<string instr_asm, RegisterOperand RORD,
InstrItinClass itin = NoItinerary> {
dag OutOperandList = (outs RORD:$rd);
@@ -2671,6 +2681,16 @@ class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, MSA128WOpnd,
class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, MSA128DOpnd,
mem_simm10_lsl3, addrimm10lsl3>;
+class MSA_STORE_PSEUDO_BASE<SDPatternOperator intrinsic, RegisterOperand RO> :
+ PseudoSE<(outs), (ins RO:$dst, PtrRC:$ptr, GPR32:$imm),
+ [(intrinsic RO:$dst, iPTR:$ptr, GPR32:$imm)]> {
+ let hasNoSchedulingInfo = 1;
+ let usesCustomInserter = 1;
+}
+
+def STR_D : MSA_STORE_PSEUDO_BASE<int_mips_str_d, MSA128DOpnd>;
+def STR_W : MSA_STORE_PSEUDO_BASE<int_mips_str_w, MSA128WOpnd>;
+
class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b,
MSA128BOpnd>;
class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.cpp
index 85b20fc58231..a7a2be30f58a 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.cpp
@@ -44,22 +44,22 @@ static const TargetRegisterClass &getGlobalBaseRegClass(MachineFunction &MF) {
return Mips::GPR32RegClass;
}
-Register MipsFunctionInfo::getGlobalBaseReg() {
+Register MipsFunctionInfo::getGlobalBaseReg(MachineFunction &MF) {
if (!GlobalBaseReg)
GlobalBaseReg =
MF.getRegInfo().createVirtualRegister(&getGlobalBaseRegClass(MF));
return GlobalBaseReg;
}
-Register MipsFunctionInfo::getGlobalBaseRegForGlobalISel() {
+Register MipsFunctionInfo::getGlobalBaseRegForGlobalISel(MachineFunction &MF) {
if (!GlobalBaseReg) {
- getGlobalBaseReg();
- initGlobalBaseReg();
+ getGlobalBaseReg(MF);
+ initGlobalBaseReg(MF);
}
return GlobalBaseReg;
}
-void MipsFunctionInfo::initGlobalBaseReg() {
+void MipsFunctionInfo::initGlobalBaseReg(MachineFunction &MF) {
if (!GlobalBaseReg)
return;
@@ -68,14 +68,13 @@ void MipsFunctionInfo::initGlobalBaseReg() {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
- unsigned V0, V1;
const TargetRegisterClass *RC;
const MipsABIInfo &ABI =
static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI();
RC = (ABI.IsN64()) ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
- V0 = RegInfo.createVirtualRegister(RC);
- V1 = RegInfo.createVirtualRegister(RC);
+ Register V0 = RegInfo.createVirtualRegister(RC);
+ Register V1 = RegInfo.createVirtualRegister(RC);
if (ABI.IsN64()) {
MF.getRegInfo().addLiveIn(Mips::T9_64);
@@ -147,7 +146,7 @@ void MipsFunctionInfo::initGlobalBaseReg() {
.addReg(Mips::V0).addReg(Mips::T9);
}
-void MipsFunctionInfo::createEhDataRegsFI() {
+void MipsFunctionInfo::createEhDataRegsFI(MachineFunction &MF) {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
for (int I = 0; I < 4; ++I) {
const TargetRegisterClass &RC =
@@ -155,12 +154,12 @@ void MipsFunctionInfo::createEhDataRegsFI() {
? Mips::GPR64RegClass
: Mips::GPR32RegClass;
- EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC),
- TRI.getSpillAlignment(RC), false);
+ EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(
+ TRI.getSpillSize(RC), TRI.getSpillAlign(RC), false);
}
}
-void MipsFunctionInfo::createISRRegFI() {
+void MipsFunctionInfo::createISRRegFI(MachineFunction &MF) {
// ISRs require spill slots for Status & ErrorPC Coprocessor 0 registers.
// The current implementation only supports Mips32r2+ not Mips64rX. Status
// is always 32 bits, ErrorPC is 32 or 64 bits dependent on architecture,
@@ -170,7 +169,7 @@ void MipsFunctionInfo::createISRRegFI() {
for (int I = 0; I < 2; ++I)
ISRDataRegFI[I] = MF.getFrameInfo().CreateStackObject(
- TRI.getSpillSize(RC), TRI.getSpillAlignment(RC), false);
+ TRI.getSpillSize(RC), TRI.getSpillAlign(RC), false);
}
bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
@@ -181,19 +180,22 @@ bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
bool MipsFunctionInfo::isISRRegFI(int FI) const {
return IsISR && (FI == ISRDataRegFI[0] || FI == ISRDataRegFI[1]);
}
-MachinePointerInfo MipsFunctionInfo::callPtrInfo(const char *ES) {
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(MachineFunction &MF,
+ const char *ES) {
return MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES));
}
-MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *GV) {
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(MachineFunction &MF,
+ const GlobalValue *GV) {
return MachinePointerInfo(MF.getPSVManager().getGlobalValueCallEntry(GV));
}
-int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
+int MipsFunctionInfo::getMoveF64ViaSpillFI(MachineFunction &MF,
+ const TargetRegisterClass *RC) {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
if (MoveF64ViaSpillFI == -1) {
MoveF64ViaSpillFI = MF.getFrameInfo().CreateStackObject(
- TRI.getSpillSize(*RC), TRI.getSpillAlignment(*RC), false);
+ TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC), false);
}
return MoveF64ViaSpillFI;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.h
index aaa1e0e18441..786d210e2aaa 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -24,7 +24,7 @@ namespace llvm {
/// Mips target-specific information for each MachineFunction.
class MipsFunctionInfo : public MachineFunctionInfo {
public:
- MipsFunctionInfo(MachineFunction &MF) : MF(MF) {}
+ MipsFunctionInfo(MachineFunction &MF) {}
~MipsFunctionInfo() override;
@@ -32,12 +32,12 @@ public:
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
bool globalBaseRegSet() const;
- Register getGlobalBaseReg();
- Register getGlobalBaseRegForGlobalISel();
+ Register getGlobalBaseReg(MachineFunction &MF);
+ Register getGlobalBaseRegForGlobalISel(MachineFunction &MF);
// Insert instructions to initialize the global base register in the
// first MBB of the function.
- void initGlobalBaseReg();
+ void initGlobalBaseReg(MachineFunction &MF);
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
@@ -53,30 +53,30 @@ public:
bool callsEhReturn() const { return CallsEhReturn; }
void setCallsEhReturn() { CallsEhReturn = true; }
- void createEhDataRegsFI();
+ void createEhDataRegsFI(MachineFunction &MF);
int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; }
bool isEhDataRegFI(int FI) const;
/// Create a MachinePointerInfo that has an ExternalSymbolPseudoSourceValue
/// object representing a GOT entry for an external function.
- MachinePointerInfo callPtrInfo(const char *ES);
+ MachinePointerInfo callPtrInfo(MachineFunction &MF, const char *ES);
// Functions with the "interrupt" attribute require special prologues,
// epilogues and additional spill slots.
bool isISR() const { return IsISR; }
void setISR() { IsISR = true; }
- void createISRRegFI();
- int getISRRegFI(unsigned Reg) const { return ISRDataRegFI[Reg]; }
+ void createISRRegFI(MachineFunction &MF);
+ int getISRRegFI(Register Reg) const { return ISRDataRegFI[Reg]; }
bool isISRRegFI(int FI) const;
/// Create a MachinePointerInfo that has a GlobalValuePseudoSourceValue object
/// representing a GOT entry for a global function.
- MachinePointerInfo callPtrInfo(const GlobalValue *GV);
+ MachinePointerInfo callPtrInfo(MachineFunction &MF, const GlobalValue *GV);
void setSaveS2() { SaveS2 = true; }
bool hasSaveS2() const { return SaveS2; }
- int getMoveF64ViaSpillFI(const TargetRegisterClass *RC);
+ int getMoveF64ViaSpillFI(MachineFunction &MF, const TargetRegisterClass *RC);
std::map<const char *, const Mips16HardFloatInfo::FuncSignature *>
StubsNeeded;
@@ -84,17 +84,15 @@ public:
private:
virtual void anchor();
- MachineFunction& MF;
-
/// SRetReturnReg - Some subtargets require that sret lowering includes
/// returning the value of the returned struct in a register. This field
/// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg = 0;
+ Register SRetReturnReg;
/// GlobalBaseReg - keeps track of the virtual register initialized for
/// use as the global base register. This is used for PIC in some PIC
/// relocation models.
- unsigned GlobalBaseReg = 0;
+ Register GlobalBaseReg;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
index 8bd64ff6cb27..2823d300dc6e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -218,8 +218,7 @@ bool OptimizePICCall::runOnMachineFunction(MachineFunction &F) {
MBBI.preVisit(ScopedHT);
Changed |= visitNode(MBBI);
const MachineDomTreeNode *Node = MBBI.getNode();
- const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
- WorkList.append(Children.begin(), Children.end());
+ WorkList.append(Node->begin(), Node->end());
}
return Changed;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
index f9d93ca29658..310e54b0ea8d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -44,9 +44,22 @@ bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return false;
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
- case TargetOpcode::G_ZEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD: {
+ // Don't attempt to combine non power of 2 loads or unaligned loads when
+ // subtarget doesn't support them.
+ auto MMO = *MI.memoperands_begin();
+ const MipsSubtarget &STI =
+ static_cast<const MipsSubtarget &>(MI.getMF()->getSubtarget());
+ if (!isPowerOf2_64(MMO->getSize()))
+ return false;
+ bool isUnaligned = MMO->getAlign() < MMO->getSize();
+ if (!STI.systemSupportsUnalignedAccess() && isUnaligned)
+ return false;
+
return Helper.tryCombineExtendingLoads(MI);
}
+ }
+
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 2a3f5a05dfe0..6325e513f9f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -132,9 +132,6 @@ static bool isFloatingPointOpcodeUse(unsigned Opc) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_FCMP:
- case Mips::MFC1:
- case Mips::ExtractElementF64:
- case Mips::ExtractElementF64_64:
return true;
default:
return isFloatingPointOpcode(Opc);
@@ -147,15 +144,25 @@ static bool isFloatingPointOpcodeDef(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP:
- case Mips::MTC1:
- case Mips::BuildPairF64:
- case Mips::BuildPairF64_64:
return true;
default:
return isFloatingPointOpcode(Opc);
}
}
+static bool isGprbTwoInstrUnalignedLoadOrStore(const MachineInstr *MI) {
+ if (MI->getOpcode() == TargetOpcode::G_LOAD ||
+ MI->getOpcode() == TargetOpcode::G_STORE) {
+ auto MMO = *MI->memoperands_begin();
+ const MipsSubtarget &STI =
+ static_cast<const MipsSubtarget &>(MI->getMF()->getSubtarget());
+ if (MMO->getSize() == 4 && (!STI.systemSupportsUnalignedAccess() &&
+ MMO->getAlign() < MMO->getSize()))
+ return true;
+ }
+ return false;
+}
+
static bool isAmbiguous(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_LOAD:
@@ -163,6 +170,8 @@ static bool isAmbiguous(unsigned Opc) {
case TargetOpcode::G_PHI:
case TargetOpcode::G_SELECT:
case TargetOpcode::G_IMPLICIT_DEF:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_MERGE_VALUES:
return true;
default:
return false;
@@ -247,10 +256,17 @@ MipsRegisterBankInfo::AmbiguousRegDefUseContainer::AmbiguousRegDefUseContainer(
if (MI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
addDefUses(MI->getOperand(0).getReg(), MRI);
+
+ if (MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES)
+ addUseDef(MI->getOperand(MI->getNumOperands() - 1).getReg(), MRI);
+
+ if (MI->getOpcode() == TargetOpcode::G_MERGE_VALUES)
+ addDefUses(MI->getOperand(0).getReg(), MRI);
}
bool MipsRegisterBankInfo::TypeInfoForMF::visit(
- const MachineInstr *MI, const MachineInstr *WaitingForTypeOfMI) {
+ const MachineInstr *MI, const MachineInstr *WaitingForTypeOfMI,
+ InstType &AmbiguousTy) {
assert(isAmbiguous(MI->getOpcode()) && "Visiting non-Ambiguous opcode.\n");
if (wasVisited(MI))
return true; // InstType has already been determined for MI.
@@ -258,18 +274,28 @@ bool MipsRegisterBankInfo::TypeInfoForMF::visit(
startVisit(MI);
AmbiguousRegDefUseContainer DefUseContainer(MI);
+ if (isGprbTwoInstrUnalignedLoadOrStore(MI)) {
+ setTypes(MI, Integer);
+ return true;
+ }
+
+ if (AmbiguousTy == InstType::Ambiguous &&
+ (MI->getOpcode() == TargetOpcode::G_MERGE_VALUES ||
+ MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES))
+ AmbiguousTy = InstType::AmbiguousWithMergeOrUnmerge;
+
// Visit instructions where MI's DEF operands are USED.
- if (visitAdjacentInstrs(MI, DefUseContainer.getDefUses(), true))
+ if (visitAdjacentInstrs(MI, DefUseContainer.getDefUses(), true, AmbiguousTy))
return true;
// Visit instructions that DEFINE MI's USE operands.
- if (visitAdjacentInstrs(MI, DefUseContainer.getUseDefs(), false))
+ if (visitAdjacentInstrs(MI, DefUseContainer.getUseDefs(), false, AmbiguousTy))
return true;
// All MI's adjacent instructions, are ambiguous.
if (!WaitingForTypeOfMI) {
// This is chain of ambiguous instructions.
- setTypes(MI, InstType::Ambiguous);
+ setTypes(MI, AmbiguousTy);
return true;
}
// Excluding WaitingForTypeOfMI, MI is either connected to chains of ambiguous
@@ -286,7 +312,7 @@ bool MipsRegisterBankInfo::TypeInfoForMF::visit(
bool MipsRegisterBankInfo::TypeInfoForMF::visitAdjacentInstrs(
const MachineInstr *MI, SmallVectorImpl<MachineInstr *> &AdjacentInstrs,
- bool isDefUse) {
+ bool isDefUse, InstType &AmbiguousTy) {
while (!AdjacentInstrs.empty()) {
MachineInstr *AdjMI = AdjacentInstrs.pop_back_val();
@@ -303,9 +329,11 @@ bool MipsRegisterBankInfo::TypeInfoForMF::visitAdjacentInstrs(
return true;
}
- // Defaults to integer instruction. Includes G_MERGE_VALUES and
- // G_UNMERGE_VALUES.
- if (!isAmbiguous(AdjMI->getOpcode())) {
+ // Defaults to integer instruction. Small registers in G_MERGE (uses) and
+ // G_UNMERGE (defs) will always be gprb.
+ if ((!isDefUse && AdjMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) ||
+ (isDefUse && AdjMI->getOpcode() == TargetOpcode::G_MERGE_VALUES) ||
+ !isAmbiguous(AdjMI->getOpcode())) {
setTypes(MI, InstType::Integer);
return true;
}
@@ -314,7 +342,7 @@ bool MipsRegisterBankInfo::TypeInfoForMF::visitAdjacentInstrs(
// adjacent instructions and determine InstType without visiting AdjMI.
if (!wasVisited(AdjMI) ||
getRecordedTypeForInstr(AdjMI) != InstType::NotDetermined) {
- if (visit(AdjMI, MI)) {
+ if (visit(AdjMI, MI, AmbiguousTy)) {
// InstType is successfully determined and is same as for AdjMI.
setTypes(MI, getRecordedTypeForInstr(AdjMI));
return true;
@@ -355,14 +383,15 @@ void MipsRegisterBankInfo::TypeInfoForMF::setTypesAccordingToPhysicalRegister(
MipsRegisterBankInfo::InstType
MipsRegisterBankInfo::TypeInfoForMF::determineInstType(const MachineInstr *MI) {
- visit(MI, nullptr);
+ InstType DefaultAmbiguousType = InstType::Ambiguous;
+ visit(MI, nullptr, DefaultAmbiguousType);
return getRecordedTypeForInstr(MI);
}
void MipsRegisterBankInfo::TypeInfoForMF::cleanupIfNewFunction(
llvm::StringRef FunctionName) {
if (MFName != FunctionName) {
- MFName = FunctionName;
+ MFName = std::string(FunctionName);
WaitingQueues.clear();
Types.clear();
}
@@ -453,6 +482,7 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_BRINDIRECT:
case G_VASTART:
case G_BSWAP:
+ case G_CTLZ:
OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
break;
case G_ADD:
@@ -467,7 +497,7 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping = getMSAMapping(MF);
break;
case G_STORE:
- case G_LOAD:
+ case G_LOAD: {
if (Op0Size == 128) {
OperandsMapping = getOperandsMapping(
{getMSAMapping(MF), &Mips::ValueMappings[Mips::GPRIdx]});
@@ -477,41 +507,56 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
if (!Op0Ty.isPointer())
InstTy = TI.determineInstType(&MI);
- if (InstTy == InstType::FloatingPoint ||
- (Op0Size == 64 && InstTy == InstType::Ambiguous))
+ if (isFloatingPoint_32or64(InstTy, Op0Size) ||
+ isAmbiguous_64(InstTy, Op0Size)) {
OperandsMapping = getOperandsMapping(
{getFprbMapping(Op0Size), &Mips::ValueMappings[Mips::GPRIdx]});
- else
+ } else {
+ assert((isInteger_32(InstTy, Op0Size) ||
+ isAmbiguous_32(InstTy, Op0Size) ||
+ isAmbiguousWithMergeOrUnmerge_64(InstTy, Op0Size)) &&
+ "Unexpected Inst type");
OperandsMapping =
getOperandsMapping({getGprbOrCustomMapping(Op0Size, MappingID),
&Mips::ValueMappings[Mips::GPRIdx]});
+ }
break;
- case G_PHI:
+ }
+ case G_PHI: {
if (!Op0Ty.isPointer())
InstTy = TI.determineInstType(&MI);
// PHI is copylike and should have one regbank in mapping for def register.
- if (InstTy == InstType::Integer && Op0Size == 64) {
+ if (isAmbiguousWithMergeOrUnmerge_64(InstTy, Op0Size)) {
OperandsMapping =
getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx]});
+ TI.clearTypeInfoData(&MI);
return getInstructionMapping(CustomMappingID, /*Cost=*/1, OperandsMapping,
/*NumOperands=*/1);
}
+ assert((isInteger_32(InstTy, Op0Size) ||
+ isFloatingPoint_32or64(InstTy, Op0Size) ||
+ isAmbiguous_32or64(InstTy, Op0Size)) &&
+ "Unexpected Inst type");
// Use default handling for PHI, i.e. set reg bank of def operand to match
// register banks of use operands.
return getInstrMappingImpl(MI);
+ }
case G_SELECT: {
if (!Op0Ty.isPointer())
InstTy = TI.determineInstType(&MI);
-
- if (InstTy == InstType::FloatingPoint ||
- (Op0Size == 64 && InstTy == InstType::Ambiguous)) {
+ if (isFloatingPoint_32or64(InstTy, Op0Size) ||
+ isAmbiguous_64(InstTy, Op0Size)) {
const RegisterBankInfo::ValueMapping *Bank = getFprbMapping(Op0Size);
OperandsMapping = getOperandsMapping(
{Bank, &Mips::ValueMappings[Mips::GPRIdx], Bank, Bank});
break;
} else {
+ assert((isInteger_32(InstTy, Op0Size) ||
+ isAmbiguous_32(InstTy, Op0Size) ||
+ isAmbiguousWithMergeOrUnmerge_64(InstTy, Op0Size)) &&
+ "Unexpected Inst type");
const RegisterBankInfo::ValueMapping *Bank =
getGprbOrCustomMapping(Op0Size, MappingID);
OperandsMapping = getOperandsMapping(
@@ -519,28 +564,45 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
- case G_IMPLICIT_DEF:
+ case G_IMPLICIT_DEF: {
if (!Op0Ty.isPointer())
InstTy = TI.determineInstType(&MI);
- if (InstTy == InstType::FloatingPoint)
+ if (isFloatingPoint_32or64(InstTy, Op0Size))
OperandsMapping = getFprbMapping(Op0Size);
- else
+ else {
+ assert((isInteger_32(InstTy, Op0Size) ||
+ isAmbiguousWithMergeOrUnmerge_64(InstTy, Op0Size)) &&
+ "Unexpected Inst type");
OperandsMapping = getGprbOrCustomMapping(Op0Size, MappingID);
-
- break;
- case G_UNMERGE_VALUES:
+ }
+ } break;
+ case G_UNMERGE_VALUES: {
+ assert(MI.getNumOperands() == 3 && "Unsupported G_UNMERGE_VALUES");
+ unsigned Op3Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+ InstTy = TI.determineInstType(&MI);
+ assert((isAmbiguousWithMergeOrUnmerge_64(InstTy, Op3Size) ||
+ isFloatingPoint_64(InstTy, Op3Size)) &&
+ "Unexpected Inst type");
OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx],
&Mips::ValueMappings[Mips::GPRIdx],
&Mips::ValueMappings[Mips::DPRIdx]});
- MappingID = CustomMappingID;
+ if (isAmbiguousWithMergeOrUnmerge_64(InstTy, Op3Size))
+ MappingID = CustomMappingID;
break;
- case G_MERGE_VALUES:
+ }
+ case G_MERGE_VALUES: {
+ InstTy = TI.determineInstType(&MI);
+ assert((isAmbiguousWithMergeOrUnmerge_64(InstTy, Op0Size) ||
+ isFloatingPoint_64(InstTy, Op0Size)) &&
+ "Unexpected Inst type");
OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx],
&Mips::ValueMappings[Mips::GPRIdx],
&Mips::ValueMappings[Mips::GPRIdx]});
- MappingID = CustomMappingID;
+ if (isAmbiguousWithMergeOrUnmerge_64(InstTy, Op0Size))
+ MappingID = CustomMappingID;
break;
+ }
case G_FADD:
case G_FSUB:
case G_FMUL:
@@ -605,6 +667,8 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getInvalidInstructionMapping();
}
+ if (MappingID == CustomMappingID)
+ TI.clearTypeInfoData(&MI);
return getInstructionMapping(MappingID, /*Cost=*/1, OperandsMapping,
NumOperands);
}
@@ -652,10 +716,10 @@ void MipsRegisterBankInfo::setRegBank(MachineInstr &MI,
static void
combineAwayG_UNMERGE_VALUES(LegalizationArtifactCombiner &ArtCombiner,
- MachineInstr &MI) {
+ MachineInstr &MI, GISelObserverWrapper &Observer) {
SmallVector<Register, 4> UpdatedDefs;
SmallVector<MachineInstr *, 2> DeadInstrs;
- ArtCombiner.tryCombineMerges(MI, DeadInstrs, UpdatedDefs);
+ ArtCombiner.tryCombineMerges(MI, DeadInstrs, UpdatedDefs, Observer);
for (MachineInstr *DeadMI : DeadInstrs)
DeadMI->eraseFromParent();
}
@@ -688,7 +752,7 @@ void MipsRegisterBankInfo::applyMappingImpl(
// not be considered for regbank selection. RegBankSelect for mips
// visits/makes corresponding G_MERGE first. Combine them here.
if (NewMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES)
- combineAwayG_UNMERGE_VALUES(ArtCombiner, *NewMI);
+ combineAwayG_UNMERGE_VALUES(ArtCombiner, *NewMI, WrapperObserver);
// This G_MERGE will be combined away when its corresponding G_UNMERGE
// gets regBankSelected.
else if (NewMI->getOpcode() == TargetOpcode::G_MERGE_VALUES)
@@ -700,7 +764,7 @@ void MipsRegisterBankInfo::applyMappingImpl(
return;
}
case TargetOpcode::G_UNMERGE_VALUES:
- combineAwayG_UNMERGE_VALUES(ArtCombiner, MI);
+ combineAwayG_UNMERGE_VALUES(ArtCombiner, MI, WrapperObserver);
return;
default:
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
index 66267f8d794d..55eeaf096b14 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
@@ -66,9 +66,55 @@ private:
/// Represents moving 'bags of bits' around. Select same bank for entire
/// chain to avoid cross bank copies. Currently we select fprb for s64 and
/// gprb for s32 Ambiguous operands.
- Ambiguous
+ Ambiguous,
+ /// Only used for s64. Unlike Ambiguous s64, AmbiguousWithMergeOrUnmerge s64
+ /// is mapped to gprb (legalized using narrow scalar to s32).
+ AmbiguousWithMergeOrUnmerge
};
+ bool isAmbiguous_64(InstType InstTy, unsigned OpSize) const {
+ if (InstTy == InstType::Ambiguous && OpSize == 64)
+ return true;
+ return false;
+ }
+
+ bool isAmbiguous_32(InstType InstTy, unsigned OpSize) const {
+ if (InstTy == InstType::Ambiguous && OpSize == 32)
+ return true;
+ return false;
+ }
+
+ bool isAmbiguous_32or64(InstType InstTy, unsigned OpSize) const {
+ if (InstTy == InstType::Ambiguous && (OpSize == 32 || OpSize == 64))
+ return true;
+ return false;
+ }
+
+ bool isAmbiguousWithMergeOrUnmerge_64(InstType InstTy,
+ unsigned OpSize) const {
+ if (InstTy == InstType::AmbiguousWithMergeOrUnmerge && OpSize == 64)
+ return true;
+ return false;
+ }
+
+ bool isFloatingPoint_32or64(InstType InstTy, unsigned OpSize) const {
+ if (InstTy == InstType::FloatingPoint && (OpSize == 32 || OpSize == 64))
+ return true;
+ return false;
+ }
+
+ bool isFloatingPoint_64(InstType InstTy, unsigned OpSize) const {
+ if (InstTy == InstType::FloatingPoint && OpSize == 64)
+ return true;
+ return false;
+ }
+
+ bool isInteger_32(InstType InstTy, unsigned OpSize) const {
+ if (InstTy == InstType::Integer && OpSize == 32)
+ return true;
+ return false;
+ }
+
/// Some generic instructions have operands that can be mapped to either fprb
/// or gprb e.g. for G_LOAD we consider only operand 0 as ambiguous, operand 1
/// is always gprb since it is a pointer.
@@ -113,12 +159,13 @@ private:
DenseMap<const MachineInstr *, InstType> Types;
/// Recursively visit MI's adjacent instructions and find MI's InstType.
- bool visit(const MachineInstr *MI, const MachineInstr *WaitingForTypeOfMI);
+ bool visit(const MachineInstr *MI, const MachineInstr *WaitingForTypeOfMI,
+ InstType &AmbiguousTy);
/// Visit MI's adjacent UseDefs or DefUses.
bool visitAdjacentInstrs(const MachineInstr *MI,
SmallVectorImpl<MachineInstr *> &AdjacentInstrs,
- bool isDefUse);
+ bool isDefUse, InstType &AmbiguousTy);
/// Set type for MI, and recursively for all instructions that are
/// waiting for MI's type.
@@ -170,6 +217,13 @@ private:
InstType determineInstType(const MachineInstr *MI);
void cleanupIfNewFunction(llvm::StringRef FunctionName);
+
+ /// MI is about to get destroyed (using narrow scalar). Internal data is
+ /// saved based on MI's address, clear it since it is no longer valid.
+ void clearTypeInfoData(const MachineInstr *MI) {
+ Types.erase(MI);
+ WaitingQueues.erase(MI);
+ };
};
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 7b02d126eb28..3452bf495a34 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -245,11 +245,6 @@ MipsRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
return true;
}
-bool
-MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- return true;
-}
-
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
@@ -271,7 +266,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
<< "spOffset : " << spOffset << "\n"
<< "stackSize : " << stackSize << "\n"
<< "alignment : "
- << MF.getFrameInfo().getObjectAlignment(FrameIndex)
+ << DebugStr(MF.getFrameInfo().getObjectAlign(FrameIndex))
<< "\n");
eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.h
index 4ed32b09718b..06f214c2d6b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -58,8 +58,6 @@ public:
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
-
/// Stack Frame Processing Methods
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.td
index 8a6279da46b7..7d4dcca89e31 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -194,23 +194,23 @@ let Namespace = "Mips" in {
// FP control registers.
foreach I = 0-31 in
- def FCR#I : MipsReg<#I, ""#I>;
+ def FCR#I : MipsReg<I, ""#I>;
// FP condition code registers.
foreach I = 0-7 in
- def FCC#I : MipsReg<#I, "fcc"#I>;
+ def FCC#I : MipsReg<I, "fcc"#I>;
// COP0 registers.
foreach I = 0-31 in
- def COP0#I : MipsReg<#I, ""#I>;
+ def COP0#I : MipsReg<I, ""#I>;
// COP2 registers.
foreach I = 0-31 in
- def COP2#I : MipsReg<#I, ""#I>;
+ def COP2#I : MipsReg<I, ""#I>;
// COP3 registers.
foreach I = 0-31 in
- def COP3#I : MipsReg<#I, ""#I>;
+ def COP3#I : MipsReg<I, ""#I>;
// PC register
def PC : Register<"pc">;
@@ -222,11 +222,11 @@ let Namespace = "Mips" in {
def HWR3 : MipsReg<3, "hwr_ccres">;
foreach I = 4-31 in
- def HWR#I : MipsReg<#I, ""#I>;
+ def HWR#I : MipsReg<I, ""#I>;
// Accum registers
foreach I = 0-3 in
- def AC#I : ACCReg<#I, "ac"#I,
+ def AC#I : ACCReg<I, "ac"#I,
[!cast<Register>("LO"#I), !cast<Register>("HI"#I)]>;
def AC0_64 : ACCReg<0, "ac0", [LO0_64, HI0_64]>;
@@ -262,7 +262,7 @@ let Namespace = "Mips" in {
// These registers do not exist, but instructions like `cfcmsa`
// and `ctcmsa` allows to specify them.
foreach I = 8-31 in
- def MSA#I : MipsReg<#I, ""#I>;
+ def MSA#I : MipsReg<I, ""#I>;
// Octeon multiplier and product registers
def MPL0 : MipsReg<0, "mpl0">;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index 166ddea0431f..a657bb44ac78 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -320,7 +320,7 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
// We re-use the same spill slot each time so that the stack frame doesn't
// grow too much in functions with a large number of moves.
- int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC2);
+ int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(MF, RC2);
if (!Subtarget.isLittle())
std::swap(LoReg, HiReg);
TII.storeRegToStack(MBB, I, LoReg, I->getOperand(1).isKill(), FI, RC,
@@ -386,7 +386,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
// We re-use the same spill slot each time so that the stack frame doesn't
// grow too much in functions with a large number of moves.
- int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC);
+ int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(MF, RC);
TII.storeRegToStack(MBB, I, SrcReg, Op1.isKill(), FI, RC, &RegInfo, 0);
TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &RegInfo, Offset);
return true;
@@ -434,8 +434,8 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
TII.adjustStackPtr(SP, -StackSize, MBB, MBBI);
// emit ".cfi_def_cfa_offset StackSize"
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize));
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@@ -539,11 +539,11 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
// addiu $Reg, $zero, -MaxAlignment
// andi $sp, $sp, $Reg
Register VR = MF.getRegInfo().createVirtualRegister(RC);
- assert(isInt<16>(MFI.getMaxAlignment()) &&
+ assert((Log2(MFI.getMaxAlign()) < 16) &&
"Function's alignment size requirement is not supported.");
- int MaxAlign = -(int)MFI.getMaxAlignment();
+ int64_t MaxAlign = -(int64_t)MFI.getMaxAlign().value();
- BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR).addReg(ZERO) .addImm(MaxAlign);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR).addReg(ZERO).addImm(MaxAlign);
BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR);
if (hasBP(MF)) {
@@ -776,7 +776,7 @@ void MipsSEFrameLowering::emitInterruptEpilogueStub(
int MipsSEFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
- unsigned &FrameReg) const {
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
MipsABIInfo ABI = STI.getABI();
@@ -789,11 +789,9 @@ int MipsSEFrameLowering::getFrameIndexReference(const MachineFunction &MF,
getOffsetOfLocalArea() + MFI.getOffsetAdjustment();
}
-bool MipsSEFrameLowering::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool MipsSEFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
const TargetInstrInfo &TII = *STI.getInstrInfo();
@@ -880,11 +878,11 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Create spill slots for eh data registers if function calls eh_return.
if (MipsFI->callsEhReturn())
- MipsFI->createEhDataRegsFI();
+ MipsFI->createEhDataRegsFI(MF);
// Create spill slots for Coprocessor 0 registers if function is an ISR.
if (MipsFI->isISR())
- MipsFI->createISRRegFI();
+ MipsFI->createISRRegFI(MF);
// Expand pseudo instructions which load, store or copy accumulators.
// Add an emergency spill slot if a pseudo was expanded.
@@ -895,8 +893,7 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
const TargetRegisterClass &RC = STI.isGP64bit() ?
Mips::GPR64RegClass : Mips::GPR32RegClass;
int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC),
- TRI->getSpillAlignment(RC),
- false);
+ TRI->getSpillAlign(RC), false);
RS->addScavengingFrameIndex(FI);
}
@@ -912,8 +909,7 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
const TargetRegisterClass &RC =
ABI.ArePtrs64bit() ? Mips::GPR64RegClass : Mips::GPR32RegClass;
int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC),
- TRI->getSpillAlignment(RC),
- false);
+ TRI->getSpillAlign(RC), false);
RS->addScavengingFrameIndex(FI);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.h
index 78ffe161d9c6..c818a65f5b14 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.h
@@ -28,11 +28,11 @@ public:
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index bef1a3657ea5..7be5fc33a0af 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -153,7 +153,7 @@ void MipsSEDAGToDAGISel::emitMCountABI(MachineInstr &MI, MachineBasicBlock &MBB,
}
void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
- MF.getInfo<MipsFunctionInfo>()->initGlobalBaseReg();
+ MF.getInfo<MipsFunctionInfo>()->initGlobalBaseReg(MF);
MachineRegisterInfo *MRI = &MF.getRegInfo();
@@ -833,7 +833,9 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
}
case ISD::INTRINSIC_W_CHAIN: {
- switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ const unsigned IntrinsicOpcode =
+ cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntrinsicOpcode) {
default:
break;
@@ -845,6 +847,41 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
ReplaceNode(Node, Reg.getNode());
return true;
}
+ case Intrinsic::mips_ldr_d:
+ case Intrinsic::mips_ldr_w: {
+ unsigned Op = (IntrinsicOpcode == Intrinsic::mips_ldr_d) ? Mips::LDR_D
+ : Mips::LDR_W;
+
+ SDLoc DL(Node);
+ assert(Node->getNumOperands() == 4 && "Unexpected number of operands.");
+ const SDValue &Chain = Node->getOperand(0);
+ const SDValue &Intrinsic = Node->getOperand(1);
+ const SDValue &Pointer = Node->getOperand(2);
+ const SDValue &Constant = Node->getOperand(3);
+
+ assert(Chain.getValueType() == MVT::Other);
+ (void)Intrinsic;
+ assert(Intrinsic.getOpcode() == ISD::TargetConstant &&
+ Constant.getOpcode() == ISD::Constant &&
+ "Invalid instruction operand.");
+
+ // Convert Constant to TargetConstant.
+ const ConstantInt *Val =
+ cast<ConstantSDNode>(Constant)->getConstantIntValue();
+ SDValue Imm =
+ CurDAG->getTargetConstant(*Val, DL, Constant.getValueType());
+
+ SmallVector<SDValue, 3> Ops{Pointer, Imm, Chain};
+
+ assert(Node->getNumValues() == 2);
+ assert(Node->getValueType(0).is128BitVector());
+ assert(Node->getValueType(1) == MVT::Other);
+ SmallVector<EVT, 2> ResTys{Node->getValueType(0), Node->getValueType(1)};
+
+ ReplaceNode(Node, CurDAG->getMachineNode(Op, DL, ResTys, Ops));
+
+ return true;
+ }
}
break;
}
@@ -866,7 +903,9 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
}
case ISD::INTRINSIC_VOID: {
- switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ const unsigned IntrinsicOpcode =
+ cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntrinsicOpcode) {
default:
break;
@@ -879,6 +918,40 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
ReplaceNode(Node, ChainOut.getNode());
return true;
}
+ case Intrinsic::mips_str_d:
+ case Intrinsic::mips_str_w: {
+ unsigned Op = (IntrinsicOpcode == Intrinsic::mips_str_d) ? Mips::STR_D
+ : Mips::STR_W;
+
+ SDLoc DL(Node);
+ assert(Node->getNumOperands() == 5 && "Unexpected number of operands.");
+ const SDValue &Chain = Node->getOperand(0);
+ const SDValue &Intrinsic = Node->getOperand(1);
+ const SDValue &Vec = Node->getOperand(2);
+ const SDValue &Pointer = Node->getOperand(3);
+ const SDValue &Constant = Node->getOperand(4);
+
+ assert(Chain.getValueType() == MVT::Other);
+ (void)Intrinsic;
+ assert(Intrinsic.getOpcode() == ISD::TargetConstant &&
+ Constant.getOpcode() == ISD::Constant &&
+ "Invalid instruction operand.");
+
+ // Convert Constant to TargetConstant.
+ const ConstantInt *Val =
+ cast<ConstantSDNode>(Constant)->getConstantIntValue();
+ SDValue Imm =
+ CurDAG->getTargetConstant(*Val, DL, Constant.getValueType());
+
+ SmallVector<SDValue, 4> Ops{Vec, Pointer, Imm, Chain};
+
+ assert(Node->getNumValues() == 1);
+ assert(Node->getValueType(0) == MVT::Other);
+ SmallVector<EVT, 1> ResTys{Node->getValueType(0)};
+
+ ReplaceNode(Node, CurDAG->getMachineNode(Op, DL, ResTys, Ops));
+ return true;
+ }
}
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 798e8784405f..bdf29c53cbd5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1342,9 +1342,8 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
// Scan output.
SmallVector<EVT, 2> ResTys;
- for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
- I != E; ++I)
- ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
+ for (EVT Ty : Op->values())
+ ResTys.push_back((Ty == MVT::i64) ? MVT::Untyped : Ty);
// Create node.
SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index d4f09a2f3586..901a4fe4e2ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -243,7 +243,7 @@ MipsSEInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
void MipsSEInstrInfo::
storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
+ Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
int64_t Offset) const {
DebugLoc DL;
@@ -317,7 +317,7 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void MipsSEInstrInfo::
loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI, const TargetRegisterClass *RC,
+ Register DestReg, int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI, int64_t Offset) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -483,6 +483,20 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
+/// isBranchWithImm - Return true if the branch contains an immediate
+/// operand (\see lib/Target/Mips/MipsBranchExpansion.cpp).
+bool MipsSEInstrInfo::isBranchWithImm(unsigned Opc) const {
+ switch (Opc) {
+ default:
+ return false;
+ case Mips::BBIT0:
+ case Mips::BBIT1:
+ case Mips::BBIT032:
+ case Mips::BBIT132:
+ return true;
+ }
+}
+
/// getOppositeBranchOpc - Return the inverse of the specified
/// opcode, e.g. turning BEQ to BNE.
unsigned MipsSEInstrInfo::getOppositeBranchOpc(unsigned Opc) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.h
index 08c00ec8ccef..44a6dac2ccbd 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -48,20 +48,22 @@ public:
void storeRegToStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
int64_t Offset) const override;
void loadRegFromStack(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
int64_t Offset) const override;
bool expandPostRAPseudo(MachineInstr &MI) const override;
+ bool isBranchWithImm(unsigned Opc) const override;
+
unsigned getOppositeBranchOpc(unsigned Opc) const override;
/// Adjust SP by Amount bytes.
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSERegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSERegisterInfo.h
index 82ddf40f56a7..cc8496e0268b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSERegisterInfo.h
@@ -17,7 +17,6 @@
#include "MipsRegisterInfo.h"
namespace llvm {
-class MipsSEInstrInfo;
class MipsSERegisterInfo : public MipsRegisterInfo {
public:
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSchedule.td
index 0c0ddeab22c4..568c85af655d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSchedule.td
@@ -27,6 +27,7 @@ def II_ADD : InstrItinClass;
def II_ADDU : InstrItinClass;
def II_ADD_D : InstrItinClass;
def II_ADD_S : InstrItinClass;
+def II_ADDR_PS : InstrItinClass;
def II_ALIGN : InstrItinClass;
def II_AND : InstrItinClass;
def II_ANDI : InstrItinClass;
@@ -278,6 +279,7 @@ def II_MUL : InstrItinClass;
def II_MUH : InstrItinClass;
def II_MUHU : InstrItinClass;
def II_MULU : InstrItinClass;
+def II_MULR_PS : InstrItinClass;
def II_MULT : InstrItinClass;
def II_MULTU : InstrItinClass;
def II_MUL_D : InstrItinClass;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
index faccb37c2361..3888ca4e82f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleGeneric.td
@@ -822,17 +822,19 @@ def : InstRW<[GenericWriteFPUS], (instrs FABS_S, FABS_D32, FABS_D64, FADD_D32,
// madd.d, msub.dm mul.d, mul.ps, nmadd.d, nmsub.d, ceil.[wl].[sd], cvt.d.[sw],
// cvt.s.[dw], cvt.w.[sd], cvt.[sw].ps, round.[lw].[ds], floor.[lw].ds,
// trunc.w.[ds], trunc.w.ps,
-def : InstRW<[GenericWriteFPUL], (instrs CEIL_L_D64, CEIL_L_S, CEIL_W_D32,
+def : InstRW<[GenericWriteFPUL], (instrs ADDR_PS64,
+ CEIL_L_D64, CEIL_L_S, CEIL_W_D32,
CEIL_W_D64, CEIL_W_S, CVT_D32_S, CVT_D32_W,
CVT_D64_L, CVT_D64_S, CVT_D64_W, CVT_L_D64,
CVT_L_S, CVT_S_D32, CVT_S_D64, CVT_S_L,
CVT_S_W, CVT_W_D32, CVT_W_D64, CVT_W_S,
CVT_PS_S64, CVT_S_PL64, CVT_S_PU64,
+ CVT_PS_PW64, CVT_PW_PS64,
FLOOR_L_D64, FLOOR_L_S, FLOOR_W_D32,
FLOOR_W_D64, FLOOR_W_S, FMUL_D32, FMUL_D64,
- MADD_D32, MADD_D64, MSUB_D32, MSUB_D64,
+ MADD_D32, MADD_D64, MSUB_D32, MSUB_D64, MULR_PS64,
NMADD_D32, NMADD_D64, NMSUB_D32, NMSUB_D64,
- PLL_PS64, PLU_PS64,
+ PLL_PS64, PLU_PS64, PUL_PS64, PUU_PS64,
ROUND_L_D64, ROUND_L_S, ROUND_W_D32,
ROUND_W_D64, ROUND_W_S, TRUNC_L_D64,
TRUNC_L_S, TRUNC_W_D32, TRUNC_W_D64,
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
index 7331917baa25..3d159d412489 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsScheduleP5600.td
@@ -20,7 +20,8 @@ def MipsP5600Model : SchedMachineModel {
IsGP64bit, IsPTR64bit,
InMicroMips, InMips16Mode,
HasCnMips, HasCnMipsP,
- HasDSP, HasDSPR2, HasMT, HasCRC];
+ HasDSP, HasDSPR2, HasMips3D, HasMT,
+ HasCRC];
}
let SchedModel = MipsP5600Model in {
@@ -457,7 +458,7 @@ def : InstRW<[P5600WriteFPUL], (instrs CVT_PS_S64, CVT_S_PL64, CVT_S_PU64)>;
def : InstRW<[P5600WriteFPUL], (instregex "^C_[A-Z]+_(S|D32|D64)$")>;
def : InstRW<[P5600WriteFPUL], (instregex "^FCMP_(S32|D32|D64)$")>;
def : InstRW<[P5600WriteFPUL], (instregex "^PseudoCVT_(S|D32|D64)_(L|W)$")>;
-def : InstRW<[P5600WriteFPUL], (instrs PLL_PS64, PLU_PS64)>;
+def : InstRW<[P5600WriteFPUL], (instrs PLL_PS64, PLU_PS64, PUL_PS64, PUU_PS64)>;
// div.[ds], div.ps
def : InstRW<[P5600WriteFPUDivS], (instrs FDIV_S)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
index 133b818114c8..ef4191cec3df 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -237,7 +237,7 @@ CodeGenOpt::Level MipsSubtarget::getOptLevelToEnablePostRAScheduler() const {
MipsSubtarget &
MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
const TargetMachine &TM) {
- std::string CPUName = MIPS_MC::selectMipsCPU(TM.getTargetTriple(), CPU);
+ StringRef CPUName = MIPS_MC::selectMipsCPU(TM.getTargetTriple(), CPU);
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h
index 5a1dfec41a47..26ee961fc95d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -149,6 +149,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// HasDSP, HasDSPR2, HasDSPR3 -- supports DSP ASE.
bool HasDSP, HasDSPR2, HasDSPR3;
+ // Has3D -- Supports Mips3D ASE.
+ bool Has3D;
+
// Allow mixed Mips16 and Mips32 in one source file
bool AllowMixed16_32;
@@ -312,6 +315,7 @@ public:
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
bool hasDSPR3() const { return HasDSPR3; }
+ bool has3D() const { return Has3D; }
bool hasMSA() const { return HasMSA; }
bool disableMadd4() const { return DisableMadd4; }
bool hasEVA() const { return HasEVA; }
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 8fec6db00cb9..80cb6ce7ac0c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -131,6 +131,9 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT,
MaybeAlign(Options.StackAlignmentOverride)) {
Subtarget = &DefaultSubtarget;
initAsmInfo();
+
+ // Mips supports the debug entry values.
+ setSupportsDebugEntryValues(true);
}
MipsTargetMachine::~MipsTargetMachine() = default;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index 0852b5a18c68..481157a8aa89 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -44,7 +44,6 @@ EmbeddedData("membedded-data", cl::Hidden,
void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
SmallDataSection = getContext().getELFSection(
".sdata", ELF::SHT_PROGBITS,
@@ -177,12 +176,13 @@ bool MipsTargetObjectFile::IsConstantInSmallSection(
MCSection *MipsTargetObjectFile::getSectionForConstant(const DataLayout &DL,
SectionKind Kind,
const Constant *C,
- unsigned &Align) const {
+ Align &Alignment) const {
if (IsConstantInSmallSection(DL, C, *TM))
return SmallDataSection;
// Otherwise, we work the same as ELF.
- return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align);
+ return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C,
+ Alignment);
}
const MCExpr *
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetObjectFile.h
index bdf485f83260..07e9caf0dd09 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -40,7 +40,7 @@ class MipsTargetMachine;
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
- unsigned &Align) const override;
+ Align &Alignment) const override;
/// Describe a TLS variable address within debug info.
const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetStreamer.h
index b389ba8938c4..f4282f5d6974 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsTargetStreamer.h
@@ -19,8 +19,6 @@
namespace llvm {
-struct MipsABIFlagsSection;
-
class MipsTargetStreamer : public MCTargetStreamer {
public:
MipsTargetStreamer(MCStreamer &S);
@@ -84,12 +82,15 @@ public:
virtual void emitDirectiveSetDsp();
virtual void emitDirectiveSetDspr2();
virtual void emitDirectiveSetNoDsp();
+ virtual void emitDirectiveSetMips3D();
+ virtual void emitDirectiveSetNoMips3D();
virtual void emitDirectiveSetPop();
virtual void emitDirectiveSetPush();
virtual void emitDirectiveSetSoftFloat();
virtual void emitDirectiveSetHardFloat();
// PIC support
+ virtual void emitDirectiveCpAdd(unsigned RegNo);
virtual void emitDirectiveCpLoad(unsigned RegNo);
virtual void emitDirectiveCpLocal(unsigned RegNo);
virtual bool emitDirectiveCpRestore(int Offset,
@@ -263,12 +264,15 @@ public:
void emitDirectiveSetDsp() override;
void emitDirectiveSetDspr2() override;
void emitDirectiveSetNoDsp() override;
+ void emitDirectiveSetMips3D() override;
+ void emitDirectiveSetNoMips3D() override;
void emitDirectiveSetPop() override;
void emitDirectiveSetPush() override;
void emitDirectiveSetSoftFloat() override;
void emitDirectiveSetHardFloat() override;
// PIC support
+ void emitDirectiveCpAdd(unsigned RegNo) override;
void emitDirectiveCpLoad(unsigned RegNo) override;
void emitDirectiveCpLocal(unsigned RegNo) override;
@@ -341,6 +345,7 @@ public:
void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) override;
// PIC support
+ void emitDirectiveCpAdd(unsigned RegNo) override;
void emitDirectiveCpLoad(unsigned RegNo) override;
void emitDirectiveCpLocal(unsigned RegNo) override;
bool emitDirectiveCpRestore(int Offset, function_ref<unsigned()> GetATReg,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 7e1da9b7a94b..aef0eed6ab9a 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -51,4 +51,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple,
// @TODO: Can we just disable this?
WeakDirective = "\t// .weak\t";
GlobalDirective = "\t// .globl\t";
+
+ UseIntegratedAssembler = false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index ce5ca99c5397..77c4daea2b6a 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -16,7 +16,6 @@
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
-class Target;
class Triple;
class NVPTXMCAsmInfo : public MCAsmInfo {
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index e1691d2384e6..b394566edd0d 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -15,11 +15,6 @@
#include <stdint.h>
-namespace llvm {
-class Target;
-
-} // End llvm namespace
-
// Defines symbolic names for PTX registers.
#define GET_REGINFO_ENUM
#include "NVPTXGenRegisterInfo.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index 17f5ba7d900b..cdb70ff1f973 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -26,13 +26,13 @@ NVPTXTargetStreamer::~NVPTXTargetStreamer() = default;
void NVPTXTargetStreamer::outputDwarfFileDirectives() {
for (const std::string &S : DwarfFiles)
- getStreamer().EmitRawText(S.data());
+ getStreamer().emitRawText(S.data());
DwarfFiles.clear();
}
void NVPTXTargetStreamer::closeLastSection() {
if (HasSections)
- getStreamer().EmitRawText("\t}");
+ getStreamer().emitRawText("\t}");
}
void NVPTXTargetStreamer::emitDwarfFileDirective(StringRef Directive) {
@@ -128,7 +128,7 @@ void NVPTXTargetStreamer::emitRawBytes(StringRef Data) {
if (Label == Directive)
Label = ",";
}
- Streamer.EmitRawText(OS.str());
+ Streamer.emitRawText(OS.str());
}
#endif
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h
index 0acbace5f848..dfe0b9cb5ee6 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.h
@@ -21,7 +21,6 @@ namespace llvm {
class NVPTXTargetMachine;
class FunctionPass;
class MachineFunctionPass;
-class formatted_raw_ostream;
namespace NVPTXCC {
enum CondCodes {
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.td
index 1d947ef1ce62..2b39e9f412f7 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTX.td
@@ -55,6 +55,8 @@ def SM72 : SubtargetFeature<"sm_72", "SmVersion", "72",
"Target SM 7.2">;
def SM75 : SubtargetFeature<"sm_75", "SmVersion", "75",
"Target SM 7.5">;
+def SM80 : SubtargetFeature<"sm_80", "SmVersion", "80",
+ "Target SM 8.0">;
// PTX Versions
def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
@@ -77,6 +79,10 @@ def PTX63 : SubtargetFeature<"ptx63", "PTXVersion", "63",
"Use PTX version 6.3">;
def PTX64 : SubtargetFeature<"ptx64", "PTXVersion", "64",
"Use PTX version 6.4">;
+def PTX65 : SubtargetFeature<"ptx65", "PTXVersion", "65",
+ "Use PTX version 6.5">;
+def PTX70 : SubtargetFeature<"ptx70", "PTXVersion", "70",
+ "Use PTX version 7.0">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
@@ -100,6 +106,7 @@ def : Proc<"sm_62", [SM62, PTX50]>;
def : Proc<"sm_70", [SM70, PTX60]>;
def : Proc<"sm_72", [SM72, PTX61]>;
def : Proc<"sm_75", [SM75, PTX63]>;
+def : Proc<"sm_80", [SM80, PTX70]>;
def NVPTXInstrInfo : InstrInfo {
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 7117438dc503..da1a398a68f0 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -141,7 +141,7 @@ VisitGlobalVariableForEmission(const GlobalVariable *GV,
Visiting.erase(GV);
}
-void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCInst Inst;
lowerToMCInst(MI, Inst);
EmitToStreamer(*OutStreamer, Inst);
@@ -434,13 +434,13 @@ bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
return false;
}
-void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
- AsmPrinter::EmitBasicBlockStart(MBB);
+void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
+ AsmPrinter::emitBasicBlockStart(MBB);
if (isLoopHeaderOfNoUnroll(MBB))
- OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
+ OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
}
-void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
+void NVPTXAsmPrinter::emitFunctionEntryLabel() {
SmallString<128> Str;
raw_svector_ostream O(Str);
@@ -467,11 +467,11 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
if (isKernelFunction(*F))
emitKernelFunctionDirectives(*F, O);
- OutStreamer->EmitRawText(O.str());
+ OutStreamer->emitRawText(O.str());
VRegMapping.clear();
// Emit open brace for function body.
- OutStreamer->EmitRawText(StringRef("{\n"));
+ OutStreamer->emitRawText(StringRef("{\n"));
setAndEmitFunctionVirtualRegisters(*MF);
// Emit initial .loc debug directive for correct relocation symbol data.
if (MMI && MMI->hasDebugInfo())
@@ -485,18 +485,18 @@ bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
// debug labels/data after the last basic block.
// We need to emit the closing brace here because we don't have function that
// finished emission of the function body.
- OutStreamer->EmitRawText(StringRef("}\n"));
+ OutStreamer->emitRawText(StringRef("}\n"));
return Result;
}
-void NVPTXAsmPrinter::EmitFunctionBodyStart() {
+void NVPTXAsmPrinter::emitFunctionBodyStart() {
SmallString<128> Str;
raw_svector_ostream O(Str);
emitDemotedVars(&MF->getFunction(), O);
- OutStreamer->EmitRawText(O.str());
+ OutStreamer->emitRawText(O.str());
}
-void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
+void NVPTXAsmPrinter::emitFunctionBodyEnd() {
VRegMapping.clear();
}
@@ -762,13 +762,21 @@ static bool isEmptyXXStructor(GlobalVariable *GV) {
return InitList->getNumOperands() == 0;
}
-bool NVPTXAsmPrinter::doInitialization(Module &M) {
+void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
// Construct a default subtarget off of the TargetMachine defaults. The
// rest of NVPTX isn't friendly to change subtargets per function and
// so the default TargetMachine will have all of the options.
const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
+ SmallString<128> Str1;
+ raw_svector_ostream OS1(Str1);
+
+ // Emit header before any dwarf directives are emitted below.
+ emitHeader(M, OS1, *STI);
+ OutStreamer->emitRawText(OS1.str());
+}
+bool NVPTXAsmPrinter::doInitialization(Module &M) {
if (M.alias_size()) {
report_fatal_error("Module has aliases, which NVPTX does not support.");
return true; // error
@@ -784,26 +792,9 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
return true; // error
}
- SmallString<128> Str1;
- raw_svector_ostream OS1(Str1);
-
// We need to call the parent's one explicitly.
bool Result = AsmPrinter::doInitialization(M);
- // Emit header before any dwarf directives are emitted below.
- emitHeader(M, OS1, *STI);
- OutStreamer->EmitRawText(OS1.str());
-
- // Emit module-level inline asm if it exists.
- if (!M.getModuleInlineAsm().empty()) {
- OutStreamer->AddComment("Start of file scope inline assembly");
- OutStreamer->AddBlankLine();
- OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm()));
- OutStreamer->AddBlankLine();
- OutStreamer->AddComment("End of file scope inline assembly");
- OutStreamer->AddBlankLine();
- }
-
GlobalsEmitted = false;
return Result;
@@ -838,7 +829,7 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
OS2 << '\n';
- OutStreamer->EmitRawText(OS2.str());
+ OutStreamer->emitRawText(OS2.str());
}
void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
@@ -929,7 +920,7 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
->closeLastSection();
// Emit empty .debug_loc section for better support of the empty files.
- OutStreamer->EmitRawText("\t.section\t.debug_loc\t{\t}");
+ OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
}
// Output last DWARF .file directives, if any.
@@ -982,7 +973,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
msg.append("Error: ");
msg.append("Symbol ");
if (V->hasName())
- msg.append(V->getName());
+ msg.append(std::string(V->getName()));
msg.append("has unsupported appending linkage type");
llvm_unreachable(msg.c_str());
} else if (!V->hasInternalLinkage() &&
@@ -1184,7 +1175,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
case Type::IntegerTyID: // Integers larger than 64 bits
case Type::StructTyID:
case Type::ArrayTyID:
- case Type::VectorTyID:
+ case Type::FixedVectorTyID:
ElementSize = DL.getTypeStoreSize(ETy);
// Ptx allows variable initilization only for constant and
// global state spaces.
@@ -1358,7 +1349,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
switch (ETy->getTypeID()) {
case Type::StructTyID:
case Type::ArrayTyID:
- case Type::VectorTyID:
+ case Type::FixedVectorTyID:
ElementSize = DL.getTypeStoreSize(ETy);
O << " .b8 ";
getSymbol(GVar)->print(O, MAI);
@@ -1439,7 +1430,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (isKernelFunction(*F)) {
if (isSampler(*I) || isImage(*I)) {
if (isImage(*I)) {
- std::string sname = I->getName();
+ std::string sname = std::string(I->getName());
if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
if (hasImageHandles)
O << "\t.param .u64 .ptr .surfref ";
@@ -1634,8 +1625,8 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
const MachineFrameInfo &MFI = MF.getFrameInfo();
int NumBytes = (int) MFI.getStackSize();
if (NumBytes) {
- O << "\t.local .align " << MFI.getMaxAlignment() << " .b8 \t" << DEPOTNAME
- << getFunctionNumber() << "[" << NumBytes << "];\n";
+ O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
+ << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
O << "\t.reg .b64 \t%SP;\n";
O << "\t.reg .b64 \t%SPL;\n";
@@ -1684,7 +1675,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
}
}
- OutStreamer->EmitRawText(O.str());
+ OutStreamer->emitRawText(O.str());
}
void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
@@ -1815,7 +1806,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
aggBuffer->addBytes(ptr, 4, Bytes);
break;
} else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
+ if (const auto *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstant(Cexpr, DL))) {
int int32 = (int)(constInt->getZExtValue());
ConvertIntToBytes<>(ptr, int32);
@@ -1837,7 +1828,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
aggBuffer->addBytes(ptr, 8, Bytes);
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
+ if (const auto *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstant(Cexpr, DL))) {
long long int64 = (long long)(constInt->getZExtValue());
ConvertIntToBytes<>(ptr, int64);
@@ -1892,7 +1883,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
}
case Type::ArrayTyID:
- case Type::VectorTyID:
+ case Type::FixedVectorTyID:
case Type::StructTyID: {
if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
int ElementSize = DL.getTypeAllocSize(CPV->getType());
@@ -1993,23 +1984,22 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
}
switch (CE->getOpcode()) {
- default:
+ default: {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstant(CE, getDataLayout()))
- if (C && C != CE)
- return lowerConstantForGV(C, ProcessingGeneric);
+ Constant *C = ConstantFoldConstant(CE, getDataLayout());
+ if (C != CE)
+ return lowerConstantForGV(C, ProcessingGeneric);
// Otherwise report the problem to the user.
- {
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/false,
- !MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(OS.str());
- }
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ CE->printAsOperand(OS, /*PrintType=*/false,
+ !MF ? nullptr : MF->getFunction().getParent());
+ report_fatal_error(OS.str());
+ }
case Instruction::AddrSpaceCast: {
// Strip the addrspacecast and pass along the operand
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 7a66854d32f4..5c3a4eb470c1 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -32,7 +32,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -200,13 +200,14 @@ private:
const Function *F;
std::string CurrentFnName;
- void EmitBasicBlockStart(const MachineBasicBlock &MBB) override;
- void EmitFunctionEntryLabel() override;
- void EmitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
+ void emitStartOfAsmFile(Module &M) override;
+ void emitBasicBlockStart(const MachineBasicBlock &MBB) override;
+ void emitFunctionEntryLabel() override;
+ void emitFunctionBodyStart() override;
+ void emitFunctionBodyEnd() override;
void emitImplicitDef(const MachineInstr *MI) const override;
- void EmitInstruction(const MachineInstr *) override;
+ void emitInstruction(const MachineInstr *) override;
void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
MCOperand GetSymbolRef(const MCSymbol *Symbol);
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index d26912f47e50..c533921842e4 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -65,7 +65,7 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
int NVPTXFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
- unsigned &FrameReg) const {
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
FrameReg = NVPTX::VRDepot;
return MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
@@ -83,3 +83,8 @@ MachineBasicBlock::iterator NVPTXFrameLowering::eliminateCallFramePseudoInstr(
// ADJCALLSTACKUP instructions.
return MBB.erase(I);
}
+
+TargetFrameLowering::DwarfFrameBase
+NVPTXFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
+ return {DwarfFrameBase::CFA, {0}};
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
index 40269f58f06e..e4c2b9e77f70 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
-class NVPTXSubtarget;
+
class NVPTXFrameLowering : public TargetFrameLowering {
public:
explicit NVPTXFrameLowering();
@@ -25,11 +25,12 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
+ DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index b36d9b2e240a..9078ff8cfb97 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -144,7 +144,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
// variable initializers, as other uses have been already been removed
// while walking through the instructions in function definitions.
GV->replaceAllUsesWith(BitCastNewGV);
- std::string Name = GV->getName();
+ std::string Name = std::string(GV->getName());
GV->eraseFromParent();
NewGV->setName(Name);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 92f71c687c46..f45cc06e0a0a 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -88,11 +87,6 @@ static cl::opt<bool> UsePrecSqrtF32(
cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
cl::init(true));
-static cl::opt<bool> FtzEnabled(
- "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
- cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
- cl::init(false));
-
int NVPTXTargetLowering::getDivF32Level() const {
if (UsePrecDivF32.getNumOccurrences() > 0) {
// If nvptx-prec-div32=N is used on the command-line, always honor it
@@ -117,18 +111,8 @@ bool NVPTXTargetLowering::usePrecSqrtF32() const {
}
bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const {
- // TODO: Get rid of this flag; there can be only one way to do this.
- if (FtzEnabled.getNumOccurrences() > 0) {
- // If nvptx-f32ftz is used on the command-line, always honor it
- return FtzEnabled;
- } else {
- const Function &F = MF.getFunction();
- // Otherwise, check for an nvptx-f32ftz attribute on the function
- if (F.hasFnAttribute("nvptx-f32ftz"))
- return F.getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
- else
- return false;
- }
+ return MF.getDenormalMode(APFloat::IEEEsingle()).Output ==
+ DenormalMode::PreserveSign;
}
static bool IsPTXVectorType(MVT VT) {
@@ -233,11 +217,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
// covered by the vector op. Otherwise, it returns 1.
static unsigned CanMergeParamLoadStoresStartingAt(
unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
- const SmallVectorImpl<uint64_t> &Offsets, unsigned ParamAlignment) {
- assert(isPowerOf2_32(AccessSize) && "must be a power of 2!");
+ const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) {
// Can't vectorize if param alignment is not sufficient.
- if (AccessSize > ParamAlignment)
+ if (ParamAlignment < AccessSize)
return 1;
// Can't vectorize if offset is not aligned.
if (Offsets[Idx] & (AccessSize - 1))
@@ -297,7 +280,7 @@ enum ParamVectorizationFlags {
static SmallVector<ParamVectorizationFlags, 16>
VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
const SmallVectorImpl<uint64_t> &Offsets,
- unsigned ParamAlignment) {
+ Align ParamAlignment) {
// Set vector size to match ValueVTs and mark all elements as
// scalars by default.
SmallVector<ParamVectorizationFlags, 16> VectorInfo;
@@ -1258,8 +1241,8 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
std::string NVPTXTargetLowering::getPrototype(
const DataLayout &DL, Type *retTy, const ArgListTy &Args,
- const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
- ImmutableCallSite CS) const {
+ const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
+ const CallBase &CB) const {
auto PtrVT = getPointerTy(DL);
bool isABI = (STI.getSmVersion() >= 20);
@@ -1294,8 +1277,8 @@ std::string NVPTXTargetLowering::getPrototype(
O << ".param .b" << PtrVT.getSizeInBits() << " _";
} else if (retTy->isAggregateType() || retTy->isVectorTy() ||
retTy->isIntegerTy(128)) {
- O << ".param .align " << retAlignment << " .b8 _["
- << DL.getTypeAllocSize(retTy) << "]";
+ O << ".param .align " << (retAlignment ? retAlignment->value() : 0)
+ << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]";
} else {
llvm_unreachable("Unknown return type");
}
@@ -1316,7 +1299,7 @@ std::string NVPTXTargetLowering::getPrototype(
if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
unsigned align = 0;
- const CallInst *CallI = cast<CallInst>(CS.getInstruction());
+ const CallInst *CallI = cast<CallInst>(&CB);
// +1 because index 0 is reserved for return type alignment
if (!getAlign(*CallI, i + 1, align))
align = DL.getABITypeAlignment(Ty);
@@ -1358,9 +1341,9 @@ std::string NVPTXTargetLowering::getPrototype(
assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
- unsigned align = Outs[OIdx].Flags.getByValAlign();
+ Align align = Outs[OIdx].Flags.getNonZeroByValAlign();
unsigned sz = DL.getTypeAllocSize(ETy);
- O << ".param .align " << align << " .b8 ";
+ O << ".param .align " << align.value() << " .b8 ";
O << "_";
O << "[" << sz << "]";
}
@@ -1368,31 +1351,29 @@ std::string NVPTXTargetLowering::getPrototype(
return O.str();
}
-unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
- ImmutableCallSite CS,
- Type *Ty, unsigned Idx,
- const DataLayout &DL) const {
- if (!CS) {
+Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
+ const CallBase *CB, Type *Ty,
+ unsigned Idx,
+ const DataLayout &DL) const {
+ if (!CB) {
// CallSite is zero, fallback to ABI type alignment
- return DL.getABITypeAlignment(Ty);
+ return DL.getABITypeAlign(Ty);
}
- unsigned Align = 0;
- const Value *DirectCallee = CS.getCalledFunction();
+ unsigned Alignment = 0;
+ const Function *DirectCallee = CB->getCalledFunction();
if (!DirectCallee) {
// We don't have a direct function symbol, but that may be because of
// constant cast instructions in the call.
- const Instruction *CalleeI = CS.getInstruction();
- assert(CalleeI && "Call target is not a function or derived value?");
// With bitcast'd call targets, the instruction will be the call
- if (isa<CallInst>(CalleeI)) {
+ if (const auto *CI = dyn_cast<CallInst>(CB)) {
// Check if we have call alignment metadata
- if (getAlign(*cast<CallInst>(CalleeI), Idx, Align))
- return Align;
+ if (getAlign(*CI, Idx, Alignment))
+ return Align(Alignment);
- const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
+ const Value *CalleeV = CI->getCalledOperand();
// Ignore any bitcast instructions
while (isa<ConstantExpr>(CalleeV)) {
const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
@@ -1404,20 +1385,20 @@ unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
// We have now looked past all of the bitcasts. Do we finally have a
// Function?
- if (isa<Function>(CalleeV))
- DirectCallee = CalleeV;
+ if (const auto *CalleeF = dyn_cast<Function>(CalleeV))
+ DirectCallee = CalleeF;
}
}
// Check for function alignment information if we found that the
// ultimate target is a Function
if (DirectCallee)
- if (getAlign(*cast<Function>(DirectCallee), Idx, Align))
- return Align;
+ if (getAlign(*DirectCallee, Idx, Alignment))
+ return Align(Alignment);
// Call is indirect or alignment information is not available, fall back to
// the ABI type alignment
- return DL.getABITypeAlignment(Ty);
+ return DL.getABITypeAlign(Ty);
}
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -1432,7 +1413,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool &isTailCall = CLI.IsTailCall;
ArgListTy &Args = CLI.getArgs();
Type *RetTy = CLI.RetTy;
- ImmutableCallSite CS = CLI.CS;
+ const CallBase *CB = CLI.CB;
const DataLayout &DL = DAG.getDataLayout();
bool isABI = (STI.getSmVersion() >= 20);
@@ -1465,15 +1446,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets);
- unsigned ArgAlign =
- getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
+ Align ArgAlign = getArgumentAlignment(Callee, CB, Ty, paramCount + 1, DL);
unsigned AllocSize = DL.getTypeAllocSize(Ty);
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
bool NeedAlign; // Does argument declaration specify alignment?
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
// declare .param .align <align> .b8 .param<n>[<size>];
SDValue DeclareParamOps[] = {
- Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
+ Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
DAG.getConstant(paramCount, dl, MVT::i32),
DAG.getConstant(AllocSize, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
@@ -1554,8 +1534,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Adjust type of the store op if we've extended the scalar
// return value.
EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j];
- unsigned EltAlign =
- NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0;
+ MaybeAlign EltAlign;
+ if (NeedAlign)
+ EltAlign = commonAlignment(ArgAlign, Offsets[j]);
Chain = DAG.getMemIntrinsicNode(
Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
@@ -1585,7 +1566,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = Outs[OIdx].Flags.getByValSize();
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
+ Align ArgAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
// The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
// so we don't need to worry about natural alignment or not.
// See TargetLowering::LowerCallTo().
@@ -1593,18 +1574,19 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Enforce minumum alignment of 4 to work around ptxas miscompile
// for sm_50+. See corresponding alignment adjustment in
// emitFunctionParamList() for details.
- if (ArgAlign < 4)
- ArgAlign = 4;
- SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(sz, dl, MVT::i32), InFlag};
+ if (ArgAlign < Align(4))
+ ArgAlign = Align(4);
+ SDValue DeclareParamOps[] = {
+ Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(sz, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
EVT elemtype = VTs[j];
int curOffset = Offsets[j];
- unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
+ unsigned PartAlign = GreatestCommonDivisor64(ArgAlign.value(), curOffset);
auto PtrVT = getPointerTy(DL);
SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
DAG.getConstant(curOffset, dl, PtrVT));
@@ -1618,10 +1600,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(paramCount, dl, MVT::i32),
DAG.getConstant(curOffset, dl, MVT::i32),
theVal, InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
- CopyParamOps, elemtype,
- MachinePointerInfo(), /* Align */ 0,
- MachineMemOperand::MOStore);
+ Chain = DAG.getMemIntrinsicNode(
+ NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype,
+ MachinePointerInfo(), /* Align */ None, MachineMemOperand::MOStore);
InFlag = Chain.getValue(1);
}
@@ -1629,7 +1610,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
- unsigned retAlignment = 0;
+ MaybeAlign retAlignment = None;
// Handle Result
if (Ins.size() > 0) {
@@ -1657,12 +1638,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DeclareRetOps);
InFlag = Chain.getValue(1);
} else {
- retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
+ retAlignment = getArgumentAlignment(Callee, CB, RetTy, 0, DL);
+ assert(retAlignment && "retAlignment is guaranteed to be set");
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareRetOps[] = { Chain,
- DAG.getConstant(retAlignment, dl, MVT::i32),
- DAG.getConstant(resultsz / 8, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag };
+ SDValue DeclareRetOps[] = {
+ Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32),
+ DAG.getConstant(resultsz / 8, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
DeclareRetOps);
InFlag = Chain.getValue(1);
@@ -1672,7 +1654,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Both indirect calls and libcalls have nullptr Func. In order to distinguish
// between them we must rely on the call site value which is valid for
// indirect calls but is always null for libcalls.
- bool isIndirectCall = !Func && CS;
+ bool isIndirectCall = !Func && CB;
if (isa<ExternalSymbolSDNode>(Callee)) {
Function* CalleeFunc = nullptr;
@@ -1695,7 +1677,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The prototype is embedded in a string and put as the operand for a
// CallPrototype SDNode which will print out to the value of the string.
SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS);
+ std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB);
const char *ProtoStr =
nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
SDValue ProtoOps[] = {
@@ -1768,7 +1750,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
assert(VTs.size() == Ins.size() && "Bad value decomposition");
- unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
+ Align RetAlign = getArgumentAlignment(Callee, CB, RetTy, 0, DL);
auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
SmallVector<EVT, 6> LoadVTs;
@@ -1784,7 +1766,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool needTruncate = false;
EVT TheLoadType = VTs[i];
EVT EltType = Ins[i].VT;
- unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]);
+ Align EltAlign = commonAlignment(RetAlign, Offsets[i]);
if (ExtendIntegerRetVal) {
TheLoadType = MVT::i32;
EltType = MVT::i32;
@@ -2320,10 +2302,10 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
MemSDNode *MemSD = cast<MemSDNode>(N);
const DataLayout &TD = DAG.getDataLayout();
- unsigned Align = MemSD->getAlignment();
- unsigned PrefAlign =
- TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
- if (Align < PrefAlign) {
+ Align Alignment = MemSD->getAlign();
+ Align PrefAlign =
+ TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext()));
+ if (Alignment < PrefAlign) {
// This store is not sufficiently aligned, so bail out and let this vector
// store be scalarized. Note that we may still be able to emit smaller
// vector stores. For example, if we are storing a <4 x float> with an
@@ -2559,7 +2541,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
assert(VTs.size() > 0 && "Unexpected empty type.");
auto VectorInfo =
- VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty));
+ VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlign(Ty));
SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
int VecIdx = -1; // Index of the first element of the current vector.
@@ -2678,7 +2660,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
auto VectorInfo = VectorizePTXValueVTs(
- VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1);
+ VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlign(RetTy) : Align(1));
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
// 32-bits are sign extended or zero extended, depending on whether
@@ -2730,10 +2712,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Adjust type of load/store op if we've extended the scalar
// return value.
EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
- Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other),
- StoreOperands, TheStoreType,
- MachinePointerInfo(), /* Align */ 1,
- MachineMemOperand::MOStore);
+ Chain = DAG.getMemIntrinsicNode(
+ Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType,
+ MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
// Cleanup vector state.
StoreOperands.clear();
}
@@ -3799,8 +3780,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align =
- MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
return true;
}
@@ -3819,8 +3799,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align =
- MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
return true;
}
@@ -4810,11 +4789,10 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
LoadSDNode *LD = cast<LoadSDNode>(N);
- unsigned Align = LD->getAlignment();
+ Align Alignment = LD->getAlign();
auto &TD = DAG.getDataLayout();
- unsigned PrefAlign =
- TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
- if (Align < PrefAlign) {
+ Align PrefAlign = TD.getPrefTypeAlign(ResVT.getTypeForEVT(*DAG.getContext()));
+ if (Alignment < PrefAlign) {
// This load is not sufficiently aligned, so bail out and let this vector
// load be scalarized. Note that we may still be able to emit smaller
// vector loads. For example, if we are loading a <4 x float> with an
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 546fe49808e2..df9cd4159962 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -491,8 +491,7 @@ public:
std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
const SmallVectorImpl<ISD::OutputArg> &,
- unsigned retAlignment,
- ImmutableCallSite CS) const;
+ MaybeAlign retAlignment, const CallBase &CB) const;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -579,8 +578,8 @@ private:
SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- unsigned getArgumentAlignment(SDValue Callee, ImmutableCallSite CS, Type *Ty,
- unsigned Idx, const DataLayout &DL) const;
+ Align getArgumentAlignment(SDValue Callee, const CallBase *CB, Type *Ty,
+ unsigned Idx, const DataLayout &DL) const;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index cff230289e60..ec0c92ccf5c5 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -69,7 +69,7 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// analyzeBranch - Analyze the branching code at the end of MBB, returning
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
/// implemented for a target). Upon success, this returns false and returns
/// with the following information in various cases:
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 83039241a7c7..6cf59d285e8d 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -113,8 +113,8 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
/* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
/* CopyLen */ CopyLen,
- /* SrcAlign */ LI->getAlignment(),
- /* DestAlign */ SI->getAlignment(),
+ /* SrcAlign */ LI->getAlign(),
+ /* DestAlign */ SI->getAlign(),
/* SrcIsVolatile */ LI->isVolatile(),
/* DstIsVolatile */ SI->isVolatile(), TTI);
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index c3c5f6fbcba7..e60b5eeacdae 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -159,12 +159,14 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
assert(PType && "Expecting pointer type in handleByValParam");
Type *StructType = PType->getElementType();
- unsigned AS = Func->getParent()->getDataLayout().getAllocaAddrSpace();
+ const DataLayout &DL = Func->getParent()->getDataLayout();
+ unsigned AS = DL.getAllocaAddrSpace();
AllocaInst *AllocA = new AllocaInst(StructType, AS, Arg->getName(), FirstInst);
// Set the alignment to alignment of the byval parameter. This is because,
// later load/stores assume that alignment, and we are going to replace
// the use of the byval parameter with this alloca instruction.
- AllocA->setAlignment(MaybeAlign(Func->getParamAlignment(Arg->getArgNo())));
+ AllocA->setAlignment(Func->getParamAlign(Arg->getArgNo())
+ .getValueOr(DL.getPrefTypeAlign(StructType)));
Arg->replaceAllUsesWith(AllocA);
Value *ArgInParam = new AddrSpaceCastInst(
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index a7127b0e9a99..ea2274f394e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -67,14 +67,14 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
if (MI.isDebugValue()) {
assert(i == 0 && "Frame indices can only appear as the first "
"operand of a DBG_VALUE machine instruction");
- unsigned Reg;
+ Register Reg;
int64_t Offset =
TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
MI.getOperand(0).ChangeToRegister(Reg, /*isDef=*/false);
MI.getOperand(0).setIsDebug();
auto *DIExpr = DIExpression::prepend(
MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
- MI.getOperand(3).setMetadata(DIExpr);
+ MI.getDebugExpressionOp().setMetadata(DIExpr);
continue;
}
@@ -97,22 +97,21 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
}
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
-static inline void
-AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
- bool StackGrowsDown, int64_t &Offset,
- unsigned &MaxAlign) {
+static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ Align &MaxAlign) {
// If the stack grows down, add the object size to find the lowest address.
if (StackGrowsDown)
Offset += MFI.getObjectSize(FrameIdx);
- unsigned Align = MFI.getObjectAlignment(FrameIdx);
+ Align Alignment = MFI.getObjectAlign(FrameIdx);
// If the alignment of this object is greater than that of the stack, then
// increase the stack alignment to match.
- MaxAlign = std::max(MaxAlign, Align);
+ MaxAlign = std::max(MaxAlign, Alignment);
// Adjust to alignment boundary.
- Offset = (Offset + Align - 1) / Align * Align;
+ Offset = alignTo(Offset, Alignment);
if (StackGrowsDown) {
LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset
@@ -169,7 +168,7 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// NOTE: We do not have a call stack
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
// No scavenger
@@ -178,10 +177,10 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// frame index registers. Functions which don't want/need this optimization
// will continue to use the existing code path.
if (MFI.getUseLocalStackAllocationBlock()) {
- unsigned Align = MFI.getLocalFrameMaxAlign().value();
+ Align Alignment = MFI.getLocalFrameMaxAlign();
// Adjust to alignment boundary.
- Offset = (Offset + Align - 1) / Align * Align;
+ Offset = alignTo(Offset, Alignment);
LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
@@ -196,7 +195,7 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Allocate the local block
Offset += MFI.getLocalFrameSize();
- MaxAlign = std::max(Align, MaxAlign);
+ MaxAlign = std::max(Alignment, MaxAlign);
}
// No stack protector
@@ -227,18 +226,16 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// ensure that the callee's frame or the alloca data is suitably aligned;
// otherwise, for leaf functions, align to the TransientStackAlignment
// value.
- unsigned StackAlign;
+ Align StackAlign;
if (MFI.adjustsStack() || MFI.hasVarSizedObjects() ||
(RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0))
- StackAlign = TFI.getStackAlignment();
+ StackAlign = TFI.getStackAlign();
else
- StackAlign = TFI.getTransientStackAlignment();
+ StackAlign = TFI.getTransientStackAlign();
// If the frame pointer is eliminated, all frame offsets will be relative to
// SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+ Offset = alignTo(Offset, std::max(StackAlign, MaxAlign));
}
// Update frame info to pretend that this is part of the stack...
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index e213089e4085..8ae542130a14 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -152,7 +152,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
- std::string ParamBaseName = MF.getName();
+ std::string ParamBaseName = std::string(MF.getName());
ParamBaseName += "_param_";
assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference");
unsigned Param = atoi(Sym.data()+ParamBaseName.size());
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 357826c2d19c..f1fa6416f15f 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -33,13 +33,13 @@ void NVPTXSubtarget::anchor() {}
NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
// Provide the default CPU if we don't have one.
- TargetName = CPU.empty() ? "sm_20" : CPU;
+ TargetName = std::string(CPU.empty() ? "sm_20" : CPU);
- ParseSubtargetFeatures(TargetName, FS);
+ ParseSubtargetFeatures(TargetName, FS);
- // Set default to PTX 3.2 (CUDA 5.5)
- if (PTXVersion == 0) {
- PTXVersion = 32;
+ // Set default to PTX 3.2 (CUDA 5.5)
+ if (PTXVersion == 0) {
+ PTXVersion = 32;
}
return *this;
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 0778706d936a..85709eb731e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -117,7 +117,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
getEffectiveCodeModel(CM, CodeModel::Small), OL),
is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
TLOF(std::make_unique<NVPTXTargetObjectFile>()),
- Subtarget(TT, CPU, FS, *this) {
+ Subtarget(TT, std::string(CPU), std::string(FS), *this) {
if (TT.getOS() == Triple::NVCL)
drvInterface = NVPTX::NVCL;
else
@@ -276,8 +276,6 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
if (getOptLevel() != CodeGenOpt::None) {
addAddressSpaceInferencePasses();
- if (!DisableLoadStoreVectorizer)
- addPass(createLoadStoreVectorizerPass());
addStraightLineScalarOptimizationPasses();
}
@@ -295,8 +293,11 @@ void NVPTXPassConfig::addIRPasses() {
// %1 = shl %a, 2
//
// but EarlyCSE can do neither of them.
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
addEarlyCSEOrGVNPass();
+ if (!DisableLoadStoreVectorizer)
+ addPass(createLoadStoreVectorizerPass());
+ }
}
bool NVPTXPassConfig::addInstSelector() {
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index ab2a93b75922..366d92a5a805 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -27,7 +27,7 @@ public:
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
- unsigned &Align) const override {
+ Align &Alignment) const override {
return ReadOnlySection;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index afc40a7abed0..3873c73fb2e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -112,7 +112,8 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
}
int NVPTXTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
@@ -123,7 +124,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost(
switch (ISD) {
default:
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info,
Opd1PropInfo, Opd2PropInfo);
case ISD::ADD:
case ISD::MUL:
@@ -136,7 +138,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost(
if (LT.second.SimpleTy == MVT::i64)
return 2 * LT.first;
// Delegate other cases to the basic TTI.
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info,
Opd1PropInfo, Opd2PropInfo);
}
}
@@ -152,3 +155,8 @@ void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Partial = UP.Runtime = true;
UP.PartialThreshold = UP.Threshold / 4;
}
+
+void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 864d8b91a89a..cb832031f1ad 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -50,13 +50,11 @@ public:
// Loads and stores can be vectorized if the alignment is at least as big as
// the load/store we want to vectorize.
- bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const {
return Alignment >= ChainSizeInBytes;
}
- bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const {
return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
}
@@ -87,6 +85,7 @@ public:
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -96,6 +95,10 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
// Volatile loads/stores are only supported for shared and global address
// spaces, or for generic AS that maps to them.
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 43c2e9920403..74d129d330f3 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -226,17 +226,17 @@ bool isManaged(const Value &val) {
std::string getTextureName(const Value &val) {
assert(val.hasName() && "Found texture variable with no name");
- return val.getName();
+ return std::string(val.getName());
}
std::string getSurfaceName(const Value &val) {
assert(val.hasName() && "Found surface variable with no name");
- return val.getName();
+ return std::string(val.getName());
}
std::string getSamplerName(const Value &val) {
assert(val.hasName() && "Found sampler variable with no name");
- return val.getName();
+ return std::string(val.getName());
}
bool getMaxNTIDx(const Function &F, unsigned &x) {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 7e7902c27a81..13fd7d05ab9f 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -109,6 +109,8 @@ class PPCAsmParser : public MCTargetAsmParser {
bool MatchRegisterName(unsigned &RegNo, int64_t &IntVal);
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
const MCExpr *ExtractModifierFromExpr(const MCExpr *E,
PPCMCExpr::VariantKind &Variant);
@@ -356,6 +358,16 @@ public:
bool isS16ImmX16() const { return Kind == Expression ||
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 15) == 0); }
+ bool isS34ImmX16() const {
+ return Kind == Expression ||
+ (Kind == Immediate && isInt<34>(getImm()) && (getImm() & 15) == 0);
+ }
+ bool isS34Imm() const {
+ // Once the PC-Rel ABI is finalized, evaluate whether a 34-bit
+ // ContextImmediate is needed.
+ return Kind == Expression || (Kind == Immediate && isInt<34>(getImm()));
+ }
+
bool isS17Imm() const {
switch (Kind) {
case Expression:
@@ -388,6 +400,7 @@ public:
bool isCondBr() const { return Kind == Expression ||
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
+ bool isImmZero() const { return Kind == Immediate && getImm() == 0; }
bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isVSRegNumber() const {
return Kind == Immediate && isUInt<6>(getImm());
@@ -1142,7 +1155,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Post-process instructions (typically extended mnemonics)
ProcessInstruction(Inst, Operands);
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, getSTI());
+ Out.emitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
@@ -1210,14 +1223,22 @@ bool PPCAsmParser::MatchRegisterName(unsigned &RegNo, int64_t &IntVal) {
bool PPCAsmParser::
ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ return TokError("invalid register name");
+ return false;
+}
+
+OperandMatchResultTy PPCAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
RegNo = 0;
int64_t IntVal;
if (MatchRegisterName(RegNo, IntVal))
- return TokError("invalid register name");
- return false;
+ return MatchOperand_NoMatch;
+ return MatchOperand_Success;
}
/// Extract \code @l/@ha \endcode modifier from expression. Recursively scan
@@ -1380,7 +1401,7 @@ ParseExpression(const MCExpr *&EVal) {
PPCMCExpr::VariantKind Variant;
const MCExpr *E = ExtractModifierFromExpr(EVal, Variant);
if (E)
- EVal = PPCMCExpr::create(Variant, E, false, getParser().getContext());
+ EVal = PPCMCExpr::create(Variant, E, getParser().getContext());
return false;
}
@@ -1427,7 +1448,7 @@ ParseDarwinExpression(const MCExpr *&EVal) {
if (getLexer().isNot(AsmToken::RParen))
return Error(Parser.getTok().getLoc(), "expected ')'");
Parser.Lex(); // Eat the ')'
- EVal = PPCMCExpr::create(Variant, EVal, false, getParser().getContext());
+ EVal = PPCMCExpr::create(Variant, EVal, getParser().getContext());
}
return false;
}
@@ -1560,12 +1581,12 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// instruction name, to match what TableGen is doing.
std::string NewOpcode;
if (parseOptionalToken(AsmToken::Plus)) {
- NewOpcode = Name;
+ NewOpcode = std::string(Name);
NewOpcode += '+';
Name = NewOpcode;
}
if (parseOptionalToken(AsmToken::Minus)) {
- NewOpcode = Name;
+ NewOpcode = std::string(Name);
NewOpcode += '-';
Name = NewOpcode;
}
@@ -1658,9 +1679,9 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, AsmToken ID) {
if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
return Error(ExprLoc, "literal value out of range for '" +
ID.getIdentifier() + "' directive");
- getStreamer().EmitIntValue(IntValue, Size);
+ getStreamer().emitIntValue(IntValue, Size);
} else
- getStreamer().EmitValue(Value, Size, ExprLoc);
+ getStreamer().emitValue(Value, Size, ExprLoc);
return false;
};
@@ -1681,7 +1702,7 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, AsmToken ID) {
return addErrorSuffix(" in '.tc' directive");
// Align to word size.
- getParser().getStreamer().EmitValueToAlignment(Size);
+ getParser().getStreamer().emitValueToAlignment(Size);
// Emit expressions.
return ParseDirectiveWord(Size, ID);
@@ -1710,10 +1731,10 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
if (parseToken(AsmToken::EndOfStatement))
return addErrorSuffix(" in '.machine' directive");
- PPCTargetStreamer &TStreamer =
- *static_cast<PPCTargetStreamer *>(
- getParser().getStreamer().getTargetStreamer());
- TStreamer.emitMachine(CPU);
+ PPCTargetStreamer *TStreamer = static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ if (TStreamer != nullptr)
+ TStreamer->emitMachine(CPU);
return false;
}
@@ -1752,10 +1773,10 @@ bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
parseToken(AsmToken::EndOfStatement))
return addErrorSuffix(" in '.abiversion' directive");
- PPCTargetStreamer &TStreamer =
- *static_cast<PPCTargetStreamer *>(
- getParser().getStreamer().getTargetStreamer());
- TStreamer.emitAbiVersion(AbiVersion);
+ PPCTargetStreamer *TStreamer = static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ if (TStreamer != nullptr)
+ TStreamer->emitAbiVersion(AbiVersion);
return false;
}
@@ -1775,10 +1796,10 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
parseToken(AsmToken::EndOfStatement))
return addErrorSuffix(" in '.localentry' directive");
- PPCTargetStreamer &TStreamer =
- *static_cast<PPCTargetStreamer *>(
- getParser().getStreamer().getTargetStreamer());
- TStreamer.emitLocalEntry(Sym, Expr);
+ PPCTargetStreamer *TStreamer = static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ if (TStreamer != nullptr)
+ TStreamer->emitLocalEntry(Sym, Expr);
return false;
}
@@ -1830,23 +1851,23 @@ PPCAsmParser::applyModifierToExpr(const MCExpr *E,
MCContext &Ctx) {
switch (Variant) {
case MCSymbolRefExpr::VK_PPC_LO:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_LO, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_LO, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HI:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HI, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HI, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HA:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HA, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HA, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGH:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGH, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGH, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHA:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHA, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHA, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHER:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHER, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHERA:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHERA, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHEST:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHEST, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHESTA:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHESTA, E, Ctx);
default:
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index e3c0f958c7ed..74c6fd3733f0 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -60,9 +60,16 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCDisassembler() {
createPPCLEDisassembler);
}
-static DecodeStatus DecodePCRel24BranchTarget(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus decodeCondBrTarget(MCInst &Inst, unsigned Imm,
+ uint64_t /*Address*/,
+ const void * /*Decoder*/) {
+ Inst.addOperand(MCOperand::createImm(SignExtend32<14>(Imm)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeDirectBrTarget(MCInst &Inst, unsigned Imm,
+ uint64_t /*Address*/,
+ const void * /*Decoder*/) {
int32_t Offset = SignExtend32<24>(Imm);
Inst.addOperand(MCOperand::createImm(Offset));
return MCDisassembler::Success;
@@ -191,6 +198,14 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
+static DecodeStatus decodeImmZeroOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ if (Imm != 0)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(Imm));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
// Decode the memri field (imm, reg), which has the low 16-bits as the
@@ -262,6 +277,35 @@ static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
+static DecodeStatus decodeMemRI34PCRelOperands(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const void *Decoder) {
+ // Decode the memri34_pcrel field (imm, reg), which has the low 34-bits as the
+ // displacement, and the next 5 bits as an immediate 0.
+ uint64_t Base = Imm >> 34;
+ uint64_t Disp = Imm & 0x3FFFFFFFFUL;
+
+ assert(Base < 32 && "Invalid base register");
+
+ Inst.addOperand(MCOperand::createImm(SignExtend64<34>(Disp)));
+ return decodeImmZeroOperand(Inst, Base, Address, Decoder);
+}
+
+static DecodeStatus decodeMemRI34Operands(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const void *Decoder) {
+ // Decode the memri34 field (imm, reg), which has the low 34-bits as the
+ // displacement, and the next 5 bits as the register #.
+ uint64_t Base = Imm >> 34;
+ uint64_t Disp = Imm & 0x3FFFFFFFFUL;
+
+ assert(Base < 32 && "Invalid base register");
+
+ Inst.addOperand(MCOperand::createImm(SignExtend64<34>(Disp)));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
// Decode the spe8disp field (imm, reg), which has the low 5-bits as the
@@ -324,6 +368,29 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &CS) const {
+ auto *ReadFunc = IsLittleEndian ? support::endian::read32le
+ : support::endian::read32be;
+
+ // If this is an 8-byte prefixed instruction, handle it here.
+ // Note: prefixed instructions aren't technically 8-byte entities - the prefix
+ // appears in memory at an address 4 bytes prior to that of the base
+ // instruction regardless of endianness. So we read the two pieces and
+ // rebuild the 8-byte instruction.
+ // TODO: In this function we call decodeInstruction several times with
+ // different decoder tables. It may be possible to only call once by
+ // looking at the top 6 bits of the instruction.
+ if (STI.getFeatureBits()[PPC::FeaturePrefixInstrs] && Bytes.size() >= 8) {
+ uint32_t Prefix = ReadFunc(Bytes.data());
+ uint32_t BaseInst = ReadFunc(Bytes.data() + 4);
+ uint64_t Inst = BaseInst | (uint64_t)Prefix << 32;
+ DecodeStatus result = decodeInstruction(DecoderTable64, MI, Inst, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 8;
+ return result;
+ }
+ }
+
// Get the four bytes of the instruction.
Size = 4;
if (Bytes.size() < 4) {
@@ -332,8 +399,7 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
// Read the instruction in the proper endianness.
- uint32_t Inst = IsLittleEndian ? support::endian::read32le(Bytes.data())
- : support::endian::read32be(Bytes.data());
+ uint64_t Inst = ReadFunc(Bytes.data());
if (STI.getFeatureBits()[PPC::FeatureQPX]) {
DecodeStatus result =
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 8778e916f7e4..dbaf221db9fc 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -28,7 +28,6 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
- case FK_NONE:
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -40,11 +39,14 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
return Value & 0xfffc;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
+ case PPC::fixup_ppc_br24_notoc:
return Value & 0x3fffffc;
case PPC::fixup_ppc_half16:
return Value & 0xffff;
case PPC::fixup_ppc_half16ds:
return Value & 0xfffc;
+ case PPC::fixup_ppc_pcrel34:
+ return Value & 0x3ffffffff;
}
}
@@ -52,8 +54,6 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
- case FK_NONE:
- return 0;
case FK_Data_1:
return 1;
case FK_Data_2:
@@ -65,7 +65,9 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case PPC::fixup_ppc_brcond14abs:
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
+ case PPC::fixup_ppc_br24_notoc:
return 4;
+ case PPC::fixup_ppc_pcrel34:
case FK_Data_8:
return 8;
case PPC::fixup_ppc_nofixup:
@@ -91,24 +93,33 @@ public:
const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_br24_notoc", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24abs", 6, 24, 0 },
{ "fixup_ppc_brcond14abs", 16, 14, 0 },
{ "fixup_ppc_half16", 0, 16, 0 },
{ "fixup_ppc_half16ds", 0, 14, 0 },
+ { "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_br24_notoc", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24abs", 2, 24, 0 },
{ "fixup_ppc_brcond14abs", 2, 14, 0 },
{ "fixup_ppc_half16", 0, 16, 0 },
{ "fixup_ppc_half16ds", 2, 14, 0 },
+ { "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
+ // Fixup kinds from .reloc directive are like R_PPC_NONE/R_PPC64_NONE. They
+ // do not require any extra processing.
+ if (Kind >= FirstLiteralRelocationKind)
+ return MCAsmBackend::getFixupKindInfo(FK_NONE);
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
@@ -123,11 +134,14 @@ public:
const MCValue &Target, MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
const MCSubtargetInfo *STI) const override {
- Value = adjustFixupValue(Fixup.getKind(), Value);
+ MCFixupKind Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return;
+ Value = adjustFixupValue(Kind, Value);
if (!Value) return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
- unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ unsigned NumBytes = getFixupKindNumBytes(Kind);
// For each byte of the fragment that the fixup touches, mask in the bits
// from the fixup value. The Value has been "split up" into the appropriate
@@ -140,13 +154,13 @@ public:
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override {
- switch ((unsigned)Fixup.getKind()) {
+ MCFixupKind Kind = Fixup.getKind();
+ switch ((unsigned)Kind) {
default:
- return false;
- case FK_NONE:
- return true;
+ return Kind >= FirstLiteralRelocationKind;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
+ case PPC::fixup_ppc_br24_notoc:
// If the target symbol has a local entry point we must not attempt
// to resolve the fixup directly. Emit a relocation and leave
// resolution of the final target address to the linker.
@@ -178,8 +192,8 @@ public:
llvm_unreachable("relaxInstruction() unimplemented");
}
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override {
// FIXME.
llvm_unreachable("relaxInstruction() unimplemented");
}
@@ -200,21 +214,6 @@ public:
// FIXME: This should be in a separate file.
namespace {
-class DarwinPPCAsmBackend : public PPCAsmBackend {
-public:
- DarwinPPCAsmBackend(const Target &T, const Triple &TT)
- : PPCAsmBackend(T, TT) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- bool Is64 = TT.isPPC64();
- return createPPCMachObjectWriter(
- /*Is64Bit=*/Is64,
- (Is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
- MachO::CPU_SUBTYPE_POWERPC_ALL);
- }
-};
-
class ELFPPCAsmBackend : public PPCAsmBackend {
public:
ELFPPCAsmBackend(const Target &T, const Triple &TT) : PPCAsmBackend(T, TT) {}
@@ -243,14 +242,25 @@ public:
} // end anonymous namespace
Optional<MCFixupKind> ELFPPCAsmBackend::getFixupKind(StringRef Name) const {
- if (TT.isPPC64()) {
- if (Name == "R_PPC64_NONE")
- return FK_NONE;
- } else {
- if (Name == "R_PPC_NONE")
- return FK_NONE;
+ if (TT.isOSBinFormatELF()) {
+ unsigned Type;
+ if (TT.isPPC64()) {
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def"
+#undef ELF_RELOC
+ .Default(-1u);
+ } else {
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/PowerPC.def"
+#undef ELF_RELOC
+ .Default(-1u);
+ }
+ if (Type != -1u)
+ return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
}
- return MCAsmBackend::getFixupKind(Name);
+ return None;
}
MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
@@ -258,9 +268,6 @@ MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
const Triple &TT = STI.getTargetTriple();
- if (TT.isOSDarwin())
- return new DarwinPPCAsmBackend(T, TT);
-
if (TT.isOSBinFormatXCOFF())
return new XCOFFPPCAsmBackend(T, TT);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 20f752c3041a..d8b3301e97f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -73,6 +73,9 @@ static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
+ MCFixupKind Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return Kind - FirstLiteralRelocationKind;
MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup);
// determine the type of the relocation
@@ -83,6 +86,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
llvm_unreachable("Unimplemented");
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
+ case PPC::fixup_ppc_br24_notoc:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
case MCSymbolRefExpr::VK_None:
@@ -94,6 +98,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_LOCAL:
Type = ELF::R_PPC_LOCAL24PC;
break;
+ case MCSymbolRefExpr::VK_PPC_NOTOC:
+ Type = ELF::R_PPC64_REL24_NOTOC;
+ break;
}
break;
case PPC::fixup_ppc_brcond14:
@@ -121,6 +128,18 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
Target.print(errs());
errs() << '\n';
report_fatal_error("Invalid PC-relative half16ds relocation");
+ case PPC::fixup_ppc_pcrel34:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported Modifier for fixup_ppc_pcrel34");
+ case MCSymbolRefExpr::VK_PCREL:
+ Type = ELF::R_PPC64_PCREL34;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_PCREL:
+ Type = ELF::R_PPC64_GOT_PCREL34;
+ break;
+ }
+ break;
case FK_Data_4:
case FK_PCRel_4:
Type = ELF::R_PPC_REL32;
@@ -133,9 +152,6 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
} else {
switch (Fixup.getTargetKind()) {
default: llvm_unreachable("invalid fixup kind!");
- case FK_NONE:
- Type = ELF::R_PPC_NONE;
- break;
case PPC::fixup_ppc_br24abs:
Type = ELF::R_PPC_ADDR24;
break;
@@ -431,6 +447,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
return false;
case ELF::R_PPC_REL24:
+ case ELF::R_PPC64_REL24_NOTOC:
// If the target symbol has a local entry point, we must keep the
// target symbol to preserve that information for the linker.
// The "other" values are stored in the last 6 bits of the second byte.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
new file mode 100644
index 000000000000..4373778cc96c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -0,0 +1,112 @@
+//===-------- PPCELFStreamer.cpp - ELF Object Output ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a custom MCELFStreamer for PowerPC.
+//
+// The purpose of the custom ELF streamer is to allow us to intercept
+// instructions as they are being emitted and align all 8 byte instructions
+// to a 64 byte boundary if required (by adding a 4 byte nop). This is important
+// because 8 byte instructions are not allowed to cross 64 byte boundaries
+// and by aliging anything that is within 4 bytes of the boundary we can
+// guarantee that the 8 byte instructions do not cross that boundary.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "PPCELFStreamer.h"
+#include "PPCInstrInfo.h"
+#include "PPCMCCodeEmitter.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+
+PPCELFStreamer::PPCELFStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter)
+ : MCELFStreamer(Context, std::move(MAB), std::move(OW),
+ std::move(Emitter)), LastLabel(NULL) {
+}
+
+void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ // Prefixed instructions must not cross a 64-byte boundary (i.e. prefix is
+ // before the boundary and the remaining 4-bytes are after the boundary). In
+ // order to achieve this, a nop is added prior to any such boundary-crossing
+ // prefixed instruction. Align to 64 bytes if possible but add a maximum of 4
+ // bytes when trying to do that. If alignment requires adding more than 4
+ // bytes then the instruction won't be aligned. When emitting a code alignment
+ // a new fragment is created for this alignment. This fragment will contain
+ // all of the nops required as part of the alignment operation. In the cases
+ // when no nops are added then The fragment is still created but it remains
+ // empty.
+ emitCodeAlignment(64, 4);
+
+ // Emit the instruction.
+ // Since the previous emit created a new fragment then adding this instruction
+ // also forces the addition of a new fragment. Inst is now the first
+ // instruction in that new fragment.
+ MCELFStreamer::emitInstruction(Inst, STI);
+
+ // The above instruction is forced to start a new fragment because it
+ // comes after a code alignment fragment. Get that new fragment.
+ MCFragment *InstructionFragment = getCurrentFragment();
+ SMLoc InstLoc = Inst.getLoc();
+ // Check if there was a last label emitted.
+ if (LastLabel && !LastLabel->isUnset() && LastLabelLoc.isValid() &&
+ InstLoc.isValid()) {
+ const SourceMgr *SourceManager = getContext().getSourceManager();
+ unsigned InstLine = SourceManager->FindLineNumber(InstLoc);
+ unsigned LabelLine = SourceManager->FindLineNumber(LastLabelLoc);
+ // If the Label and the Instruction are on the same line then move the
+ // label to the top of the fragment containing the aligned instruction that
+ // was just added.
+ if (InstLine == LabelLine) {
+ AssignFragment(LastLabel, InstructionFragment);
+ LastLabel->setOffset(0);
+ }
+ }
+}
+
+void PPCELFStreamer::emitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ PPCMCCodeEmitter *Emitter =
+ static_cast<PPCMCCodeEmitter*>(getAssembler().getEmitterPtr());
+
+ // Special handling is only for prefixed instructions.
+ if (!Emitter->isPrefixedInstruction(Inst)) {
+ MCELFStreamer::emitInstruction(Inst, STI);
+ return;
+ }
+ emitPrefixedInstruction(Inst, STI);
+}
+
+void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
+ LastLabel = Symbol;
+ LastLabelLoc = Loc;
+ MCELFStreamer::emitLabel(Symbol);
+}
+
+MCELFStreamer *llvm::createPPCELFStreamer(
+ MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter) {
+ return new PPCELFStreamer(Context, std::move(MAB), std::move(OW),
+ std::move(Emitter));
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
new file mode 100644
index 000000000000..51863232d071
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
@@ -0,0 +1,54 @@
+//===- PPCELFStreamer.h - ELF Object Output --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a custom MCELFStreamer for PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_PPC_MCELFSTREAMER_PPCELFSTREAMER_H
+#define LLVM_LIB_TARGET_PPC_MCELFSTREAMER_PPCELFSTREAMER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include <memory>
+
+namespace llvm {
+
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCSubtargetInfo;
+
+class PPCELFStreamer : public MCELFStreamer {
+ // We need to keep track of the last label we emitted (only one) because
+ // depending on whether the label is on the same line as an aligned
+ // instruction or not, the label may refer to the instruction or the nop.
+ MCSymbol *LastLabel;
+ SMLoc LastLabelLoc;
+
+public:
+ PPCELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter);
+
+ void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+
+ // EmitLabel updates LastLabel and LastLabelLoc when a new label is emitted.
+ void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
+private:
+ void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
+};
+
+MCELFStreamer *createPPCELFStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter);
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_PPC_MCELFSTREAMER_PPCELFSTREAMER_H
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 845489788c86..2fb8947fd4e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -19,6 +19,10 @@ enum Fixups {
// 24-bit PC relative relocation for direct branches like 'b' and 'bl'.
fixup_ppc_br24 = FirstTargetFixupKind,
+ // 24-bit PC relative relocation for direct branches like 'b' and 'bl' where
+ // the caller does not use the TOC.
+ fixup_ppc_br24_notoc,
+
/// 14-bit PC relative relocation for conditional branches.
fixup_ppc_brcond14,
@@ -36,6 +40,9 @@ enum Fixups {
/// instrs like 'std'.
fixup_ppc_half16ds,
+ // A 34-bit fixup corresponding to PC-relative paddi.
+ fixup_ppc_pcrel34,
+
/// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the
/// TLS general and local dynamic models, or inserts the thread-pointer
/// register number.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 9cc1c539e24a..16da62a74b8c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -116,16 +116,6 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
}
- if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
- O << "\tmr ";
- printOperand(MI, 0, O);
- O << ", ";
- printOperand(MI, 1, O);
- printAnnotation(O, Annot);
- return;
- }
-
if (MI->getOpcode() == PPC::RLDICR ||
MI->getOpcode() == PPC::RLDICR_32) {
unsigned char SH = MI->getOperand(2).getImm();
@@ -193,7 +183,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
}
- if (!printAliasInstr(MI, O))
+ if (!printAliasInstr(MI, Address, O))
printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}
@@ -339,6 +329,13 @@ void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
O << (int)Value;
}
+void PPCInstPrinter::printImmZeroOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value == 0 && "Operand must be zero");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
@@ -391,6 +388,17 @@ void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo, O);
}
+void PPCInstPrinter::printS34ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm()) {
+ long long Value = MI->getOperand(OpNo).getImm();
+ assert(isInt<34>(Value) && "Invalid s34imm argument!");
+ O << (long long)Value;
+ }
+ else
+ printOperand(MI, OpNo, O);
+}
+
void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).isImm())
@@ -399,18 +407,29 @@ void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo, O);
}
-void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpNo, raw_ostream &O) {
if (!MI->getOperand(OpNo).isImm())
return printOperand(MI, OpNo, O);
-
- // Branches can take an immediate operand. This is used by the branch
- // selection pass to print .+8, an eight byte displacement from the PC.
- O << ".";
int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
- if (Imm >= 0)
- O << "+";
- O << Imm;
+ if (PrintBranchImmAsAddress) {
+ uint64_t Target = Address + Imm;
+ if (!TT.isPPC64())
+ Target &= 0xffffffff;
+ O << formatHex(Target);
+ } else {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print, for example `.+8` (for ELF) or `$+8` (for AIX)
+ // to express an eight byte displacement from the program counter.
+ if (!TT.isOSAIX())
+ O << ".";
+ else
+ O << "$";
+
+ if (Imm >= 0)
+ O << "+";
+ O << Imm;
+ }
}
void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
@@ -451,6 +470,22 @@ void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
O << ')';
}
+void PPCInstPrinter::printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printS34ImmOperand(MI, OpNo, O);
+ O << '(';
+ printImmZeroOperand(MI, OpNo + 1, O);
+ O << ')';
+}
+
+void PPCInstPrinter::printMemRegImm34(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printS34ImmOperand(MI, OpNo, O);
+ O << '(';
+ printOperand(MI, OpNo + 1, O);
+ O << ')';
+}
+
void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
// When used as the base register, r0 reads constant zero rather than
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index a3ec41aa348d..9763aeceef94 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -39,9 +39,9 @@ public:
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx,
+ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
raw_ostream &OS);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -61,14 +61,19 @@ public:
void printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS34ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printImmZeroOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBranchOperand(const MCInst *MI, uint64_t Address, unsigned OpNo,
+ raw_ostream &O);
void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegImm34(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index dc2c216a3efd..593dc2843c3d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -12,6 +12,7 @@
#include "PPCMCAsmInfo.h"
#include "llvm/ADT/Triple.h"
+#include <cassert>
using namespace llvm;
@@ -50,15 +51,15 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
AssemblerDialect = 1; // New-Style mnemonics.
LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
-
- UseIntegratedAssembler = true;
}
void PPCXCOFFMCAsmInfo::anchor() {}
PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
- assert(!IsLittleEndian && "Little-endian XCOFF not supported.");
+ if (T.getArch() == Triple::ppc64le)
+ report_fatal_error("XCOFF is not supported for little-endian targets");
CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
- ZeroDirective = "\t.space\t";
- SymbolsHaveSMC = true;
+
+ // A size of 8 is only supported by the assembler under 64-bit.
+ Data64bitsDirective = Is64Bit ? "\t.vbyte\t8, " : nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 8c52bbbd8a56..27c687686641 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -28,7 +28,7 @@ public:
};
class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF {
- virtual void anchor();
+ void anchor() override;
public:
explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 676efc500455..fb65e7320f2b 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -48,7 +48,9 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_br24));
+ ((MI.getOpcode() == PPC::BL8_NOTOC)
+ ? (MCFixupKind)PPC::fixup_ppc_br24_notoc
+ : (MCFixupKind)PPC::fixup_ppc_br24)));
return 0;
}
@@ -102,6 +104,20 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
return 0;
}
+uint64_t
+PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm())
+ return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the immediate field.
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_pcrel34));
+ return 0;
+}
+
unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -159,6 +175,104 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
return RegBits;
}
+uint64_t
+PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode the PCRelative version of memri34: imm34(r0).
+ // In the PC relative version the register for the address must be zero.
+ // The 34 bit immediate can fall into one of three cases:
+ // 1) It is a relocation to be filled in by the linker represented as:
+ // (MCExpr::SymbolRef)
+ // 2) It is a relocation + SignedOffset represented as:
+ // (MCExpr::Binary(MCExpr::SymbolRef + MCExpr::Constant))
+ // 3) It is a known value at compile time.
+
+ // Make sure that the register is a zero as expected.
+ assert(MI.getOperand(OpNo + 1).isImm() && "Expecting an immediate.");
+ uint64_t RegBits =
+ getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI) << 34;
+ assert(RegBits == 0 && "Operand must be 0.");
+
+ // If this is not a MCExpr then we are in case 3) and we are dealing with
+ // a value known at compile time, not a relocation.
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (!MO.isExpr())
+ return ((getMachineOpValue(MI, MO, Fixups, STI)) & 0x3FFFFFFFFUL) | RegBits;
+
+ // At this point in the function it is known that MO is of type MCExpr.
+ // Therefore we are dealing with either case 1) a symbol ref or
+ // case 2) a symbol ref plus a constant.
+ const MCExpr *Expr = MO.getExpr();
+ switch (Expr->getKind()) {
+ default:
+ llvm_unreachable("Unsupported MCExpr for getMemRI34PCRelEncoding.");
+ case MCExpr::SymbolRef: {
+ // Relocation alone.
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(Expr);
+ (void)SRE;
+ // Currently these are the only valid PCRelative Relocations.
+ assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL) &&
+ "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL");
+ // Generate the fixup for the relocation.
+ Fixups.push_back(
+ MCFixup::create(0, Expr,
+ static_cast<MCFixupKind>(PPC::fixup_ppc_pcrel34)));
+ // Put zero in the location of the immediate. The linker will fill in the
+ // correct value based on the relocation.
+ return 0;
+ }
+ case MCExpr::Binary: {
+ // Relocation plus some offset.
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+ assert(BE->getOpcode() == MCBinaryExpr::Add &&
+ "Binary expression opcode must be an add.");
+
+ const MCExpr *LHS = BE->getLHS();
+ const MCExpr *RHS = BE->getRHS();
+
+ // Need to check in both directions. Reloc+Offset and Offset+Reloc.
+ if (LHS->getKind() != MCExpr::SymbolRef)
+ std::swap(LHS, RHS);
+
+ if (LHS->getKind() != MCExpr::SymbolRef ||
+ RHS->getKind() != MCExpr::Constant)
+ llvm_unreachable("Expecting to have one constant and one relocation.");
+
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(LHS);
+ (void)SRE;
+ assert(isInt<34>(cast<MCConstantExpr>(RHS)->getValue()) &&
+ "Value must fit in 34 bits.");
+
+ // Currently these are the only valid PCRelative Relocations.
+ assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL) &&
+ "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL");
+ // Generate the fixup for the relocation.
+ Fixups.push_back(
+ MCFixup::create(0, Expr,
+ static_cast<MCFixupKind>(PPC::fixup_ppc_pcrel34)));
+ // Put zero in the location of the immediate. The linker will fill in the
+ // correct value based on the relocation.
+ return 0;
+ }
+ }
+}
+
+uint64_t
+PPCMCCodeEmitter::getMemRI34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode (imm, reg) as a memri34, which has the low 34-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo + 1).isReg() && "Expecting a register.");
+ uint64_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI)
+ << 34;
+ const MCOperand &MO = MI.getOperand(OpNo);
+ return ((getMachineOpValue(MI, MO, Fixups, STI)) & 0x3FFFFFFFFUL) | RegBits;
+}
+
unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI)
@@ -257,7 +371,7 @@ static unsigned getOpIdxForMO(const MCInst &MI, const MCOperand &MO) {
return ~0U; // Silence any warnings about no return.
}
-unsigned PPCMCCodeEmitter::
+uint64_t PPCMCCodeEmitter::
getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -316,5 +430,11 @@ unsigned PPCMCCodeEmitter::getInstSizeInBytes(const MCInst &MI) const {
return Desc.getSize();
}
+bool PPCMCCodeEmitter::isPrefixedInstruction(const MCInst &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ const PPCInstrInfo *InstrInfo = static_cast<const PPCInstrInfo*>(&MCII);
+ return InstrInfo->isPrefixed(Opcode);
+}
+
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index 1324faa12553..588aa76bd806 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -50,6 +50,9 @@ public:
unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ uint64_t getImm34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -59,6 +62,12 @@ public:
unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ uint64_t getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getMemRI34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -80,7 +89,7 @@ public:
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -97,6 +106,9 @@ public:
// Get the number of bytes used to encode the given MCInst.
unsigned getInstSizeInBytes(const MCInst &MI) const;
+ // Is this instruction a prefixed instruction.
+ bool isPrefixedInstruction(const MCInst &MI) const;
+
private:
FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
void
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index fb9dd5d7aa75..abff44449131 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -17,39 +17,44 @@ using namespace llvm;
#define DEBUG_TYPE "ppcmcexpr"
-const PPCMCExpr*
-PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr,
- bool IsDarwin, MCContext &Ctx) {
- return new (Ctx) PPCMCExpr(Kind, Expr, IsDarwin);
+const PPCMCExpr *PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) PPCMCExpr(Kind, Expr);
}
void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
- if (isDarwinSyntax()) {
- switch (Kind) {
- default: llvm_unreachable("Invalid kind!");
- case VK_PPC_LO: OS << "lo16"; break;
- case VK_PPC_HI: OS << "hi16"; break;
- case VK_PPC_HA: OS << "ha16"; break;
- }
-
- OS << '(';
- getSubExpr()->print(OS, MAI);
- OS << ')';
- } else {
- getSubExpr()->print(OS, MAI);
+ getSubExpr()->print(OS, MAI);
- switch (Kind) {
- default: llvm_unreachable("Invalid kind!");
- case VK_PPC_LO: OS << "@l"; break;
- case VK_PPC_HI: OS << "@h"; break;
- case VK_PPC_HA: OS << "@ha"; break;
- case VK_PPC_HIGH: OS << "@high"; break;
- case VK_PPC_HIGHA: OS << "@higha"; break;
- case VK_PPC_HIGHER: OS << "@higher"; break;
- case VK_PPC_HIGHERA: OS << "@highera"; break;
- case VK_PPC_HIGHEST: OS << "@highest"; break;
- case VK_PPC_HIGHESTA: OS << "@highesta"; break;
- }
+ switch (Kind) {
+ default:
+ llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO:
+ OS << "@l";
+ break;
+ case VK_PPC_HI:
+ OS << "@h";
+ break;
+ case VK_PPC_HA:
+ OS << "@ha";
+ break;
+ case VK_PPC_HIGH:
+ OS << "@high";
+ break;
+ case VK_PPC_HIGHA:
+ OS << "@higha";
+ break;
+ case VK_PPC_HIGHER:
+ OS << "@higher";
+ break;
+ case VK_PPC_HIGHERA:
+ OS << "@highera";
+ break;
+ case VK_PPC_HIGHEST:
+ OS << "@highest";
+ break;
+ case VK_PPC_HIGHESTA:
+ OS << "@highesta";
+ break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index ad1454566162..1dbc7eae63c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -33,33 +33,29 @@ public:
private:
const VariantKind Kind;
const MCExpr *Expr;
- bool IsDarwin;
int64_t evaluateAsInt64(int64_t Value) const;
- explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin)
- : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {}
+ explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr)
+ : Kind(Kind), Expr(Expr) {}
public:
/// @name Construction
/// @{
static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr,
- bool IsDarwin, MCContext &Ctx);
+ MCContext &Ctx);
- static const PPCMCExpr *createLo(const MCExpr *Expr,
- bool IsDarwin, MCContext &Ctx) {
- return create(VK_PPC_LO, Expr, IsDarwin, Ctx);
+ static const PPCMCExpr *createLo(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_PPC_LO, Expr, Ctx);
}
- static const PPCMCExpr *createHi(const MCExpr *Expr,
- bool IsDarwin, MCContext &Ctx) {
- return create(VK_PPC_HI, Expr, IsDarwin, Ctx);
+ static const PPCMCExpr *createHi(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_PPC_HI, Expr, Ctx);
}
- static const PPCMCExpr *createHa(const MCExpr *Expr,
- bool IsDarwin, MCContext &Ctx) {
- return create(VK_PPC_HA, Expr, IsDarwin, Ctx);
+ static const PPCMCExpr *createHa(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_PPC_HA, Expr, Ctx);
}
/// @}
@@ -72,10 +68,6 @@ public:
/// getSubExpr - Get the child of this expression.
const MCExpr *getSubExpr() const { return Expr; }
- /// isDarwinSyntax - True if expression is to be printed using Darwin syntax.
- bool isDarwinSyntax() const { return IsDarwin; }
-
-
/// @}
void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index cbfb8e2ff282..3092d56da1c5 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -13,6 +13,7 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCAsmInfo.h"
+#include "PPCELFStreamer.h"
#include "PPCTargetStreamer.h"
#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -20,11 +21,14 @@
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -91,12 +95,21 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
// Initial state of the frame pointer is R1.
unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
MCCFIInstruction Inst =
- MCCFIInstruction::createDefCfa(nullptr, MRI.getDwarfRegNum(Reg, true), 0);
+ MCCFIInstruction::cfiDefCfa(nullptr, MRI.getDwarfRegNum(Reg, true), 0);
MAI->addInitialFrameState(Inst);
return MAI;
}
+static MCStreamer *createPPCMCStreamer(const Triple &T, MCContext &Context,
+ std::unique_ptr<MCAsmBackend> &&MAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&Emitter,
+ bool RelaxAll) {
+ return createPPCELFStreamer(Context, std::move(MAB), std::move(OW),
+ std::move(Emitter));
+}
+
namespace {
class PPCTargetAsmStreamer : public PPCTargetStreamer {
@@ -107,14 +120,17 @@ public:
: PPCTargetStreamer(S), OS(OS) {}
void emitTCEntry(const MCSymbol &S) override {
- const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
- OS << "\t.tc ";
- OS << (MAI->getSymbolsHaveSMC()
- ? cast<MCSymbolXCOFF>(S).getUnqualifiedName()
- : S.getName());
- OS << "[TC],";
- OS << S.getName();
- OS << '\n';
+ if (const MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(&S)) {
+ MCSymbolXCOFF *TCSym =
+ cast<MCSymbolXCOFF>(Streamer.getContext().getOrCreateSymbol(
+ XSym->getSymbolTableName() + "[TC]"));
+ if (TCSym->hasRename())
+ Streamer.emitXCOFFRenameDirective(TCSym, TCSym->getSymbolTableName());
+ OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
+ return;
+ }
+
+ OS << "\t.tc " << S.getName() << "[TC]," << S.getName() << '\n';
}
void emitMachine(StringRef CPU) override {
@@ -146,8 +162,8 @@ public:
void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
- Streamer.EmitValueToAlignment(8);
- Streamer.EmitSymbolValue(&S, 8);
+ Streamer.emitValueToAlignment(8);
+ Streamer.emitSymbolValue(&S, 8);
}
void emitMachine(StringRef CPU) override {
@@ -166,13 +182,9 @@ public:
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
MCAssembler &MCA = getStreamer().getAssembler();
- int64_t Res;
- if (!LocalOffset->evaluateAsAbsolute(Res, MCA))
- report_fatal_error(".localentry expression must be absolute.");
-
- unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
- if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
- report_fatal_error(".localentry expression cannot be encoded.");
+ // encodePPC64LocalEntryOffset will report an error if it cannot
+ // encode LocalOffset.
+ unsigned Encoded = encodePPC64LocalEntryOffset(LocalOffset);
unsigned Other = S->getOther();
Other &= ~ELF::STO_PPC64_LOCAL_MASK;
@@ -201,6 +213,10 @@ public:
for (auto *Sym : UpdateOther)
if (Sym->isVariable())
copyLocalEntry(Sym, Sym->getVariableValue());
+
+ // Clear the set of symbols that needs to be updated so the streamer can
+ // be reused without issues.
+ UpdateOther.clear();
}
private:
@@ -217,6 +233,31 @@ private:
D->setOther(Other);
return true;
}
+
+ unsigned encodePPC64LocalEntryOffset(const MCExpr *LocalOffset) {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ int64_t Offset;
+ if (!LocalOffset->evaluateAsAbsolute(Offset, MCA))
+ MCA.getContext().reportFatalError(
+ LocalOffset->getLoc(), ".localentry expression must be absolute.");
+
+ switch (Offset) {
+ default:
+ MCA.getContext().reportFatalError(
+ LocalOffset->getLoc(),
+ ".localentry expression is not a valid power of 2.");
+ case 0:
+ return 0;
+ case 1:
+ return 1 << ELF::STO_PPC64_LOCAL_BIT;
+ case 4:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ return (int)Log2(Offset) << (int)ELF::STO_PPC64_LOCAL_BIT;
+ }
+ }
};
class PPCTargetMachOStreamer : public PPCTargetStreamer {
@@ -248,8 +289,8 @@ public:
void emitTCEntry(const MCSymbol &S) override {
const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
const unsigned PointerSize = MAI->getCodePointerSize();
- Streamer.EmitValueToAlignment(PointerSize);
- Streamer.EmitSymbolValue(&S, PointerSize);
+ Streamer.emitValueToAlignment(PointerSize);
+ Streamer.emitSymbolValue(&S, PointerSize);
}
void emitMachine(StringRef CPU) override {
@@ -313,6 +354,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() {
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(*T, createPPCAsmBackend);
+ // Register the elf streamer.
+ TargetRegistry::RegisterELFStreamer(*T, createPPCMCStreamer);
+
// Register the object target streamer.
TargetRegistry::RegisterObjectTargetStreamer(*T,
createObjectTargetStreamer);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 49443679bb31..719e005d9813 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -32,9 +32,6 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class MCTargetOptions;
class Target;
-class Triple;
-class StringRef;
-class raw_pwrite_stream;
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
deleted file mode 100644
index 672f910ab086..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ /dev/null
@@ -1,380 +0,0 @@
-//===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/PPCFixupKinds.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/MachO.h"
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCMachObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-
-using namespace llvm;
-
-namespace {
-class PPCMachObjectWriter : public MCMachObjectTargetWriter {
- bool recordScatteredRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- unsigned Log2Size, uint64_t &FixedValue);
-
- void RecordPPCRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue);
-
-public:
- PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
-
- void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
- const MCAsmLayout &Layout, const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue) override {
- if (Writer->is64Bit()) {
- report_fatal_error("Relocation emission for MachO/PPC64 unimplemented.");
- } else
- RecordPPCRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
- FixedValue);
- }
-};
-}
-
-/// computes the log2 of the size of the relocation,
-/// used for relocation_info::r_length.
-static unsigned getFixupKindLog2Size(unsigned Kind) {
- switch (Kind) {
- default:
- report_fatal_error("log2size(FixupKind): Unhandled fixup kind!");
- case FK_PCRel_1:
- case FK_Data_1:
- return 0;
- case FK_PCRel_2:
- case FK_Data_2:
- return 1;
- case FK_PCRel_4:
- case PPC::fixup_ppc_brcond14:
- case PPC::fixup_ppc_half16:
- case PPC::fixup_ppc_br24:
- case FK_Data_4:
- return 2;
- case FK_PCRel_8:
- case FK_Data_8:
- return 3;
- }
- return 0;
-}
-
-/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum.
-/// Outline based on PPCELFObjectWriter::getRelocType().
-static unsigned getRelocType(const MCValue &Target,
- const MCFixupKind FixupKind, // from
- // Fixup.getKind()
- const bool IsPCRel) {
- const MCSymbolRefExpr::VariantKind Modifier =
- Target.isAbsolute() ? MCSymbolRefExpr::VK_None
- : Target.getSymA()->getKind();
- // determine the type of the relocation
- unsigned Type = MachO::GENERIC_RELOC_VANILLA;
- if (IsPCRel) { // relative to PC
- switch ((unsigned)FixupKind) {
- default:
- report_fatal_error("Unimplemented fixup kind (relative)");
- case PPC::fixup_ppc_br24:
- Type = MachO::PPC_RELOC_BR24; // R_PPC_REL24
- break;
- case PPC::fixup_ppc_brcond14:
- Type = MachO::PPC_RELOC_BR14;
- break;
- case PPC::fixup_ppc_half16:
- switch (Modifier) {
- default:
- llvm_unreachable("Unsupported modifier for half16 fixup");
- case MCSymbolRefExpr::VK_PPC_HA:
- Type = MachO::PPC_RELOC_HA16;
- break;
- case MCSymbolRefExpr::VK_PPC_LO:
- Type = MachO::PPC_RELOC_LO16;
- break;
- case MCSymbolRefExpr::VK_PPC_HI:
- Type = MachO::PPC_RELOC_HI16;
- break;
- }
- break;
- }
- } else {
- switch ((unsigned)FixupKind) {
- default:
- report_fatal_error("Unimplemented fixup kind (absolute)!");
- case PPC::fixup_ppc_half16:
- switch (Modifier) {
- default:
- llvm_unreachable("Unsupported modifier for half16 fixup");
- case MCSymbolRefExpr::VK_PPC_HA:
- Type = MachO::PPC_RELOC_HA16_SECTDIFF;
- break;
- case MCSymbolRefExpr::VK_PPC_LO:
- Type = MachO::PPC_RELOC_LO16_SECTDIFF;
- break;
- case MCSymbolRefExpr::VK_PPC_HI:
- Type = MachO::PPC_RELOC_HI16_SECTDIFF;
- break;
- }
- break;
- case FK_Data_4:
- break;
- case FK_Data_2:
- break;
- }
- }
- return Type;
-}
-
-static void makeRelocationInfo(MachO::any_relocation_info &MRE,
- const uint32_t FixupOffset, const uint32_t Index,
- const unsigned IsPCRel, const unsigned Log2Size,
- const unsigned IsExtern, const unsigned Type) {
- MRE.r_word0 = FixupOffset;
- // The bitfield offsets that work (as determined by trial-and-error)
- // are different than what is documented in the mach-o manuals.
- // This appears to be an endianness issue; reversing the order of the
- // documented bitfields in <llvm/BinaryFormat/MachO.h> fixes this (but
- // breaks x86/ARM assembly).
- MRE.r_word1 = ((Index << 8) | // was << 0
- (IsPCRel << 7) | // was << 24
- (Log2Size << 5) | // was << 25
- (IsExtern << 4) | // was << 27
- (Type << 0)); // was << 28
-}
-
-static void
-makeScatteredRelocationInfo(MachO::any_relocation_info &MRE,
- const uint32_t Addr, const unsigned Type,
- const unsigned Log2Size, const unsigned IsPCRel,
- const uint32_t Value2) {
- // For notes on bitfield positions and endianness, see:
- // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry
- MRE.r_word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) |
- (IsPCRel << 30) | MachO::R_SCATTERED);
- MRE.r_word1 = Value2;
-}
-
-/// Compute fixup offset (address).
-static uint32_t getFixupOffset(const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup) {
- uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
- // On Mach-O, ppc_fixup_half16 relocations must refer to the
- // start of the instruction, not the second halfword, as ELF does
- if (Fixup.getTargetKind() == PPC::fixup_ppc_half16)
- FixupOffset &= ~uint32_t(3);
- return FixupOffset;
-}
-
-/// \return false if falling back to using non-scattered relocation,
-/// otherwise true for normal scattered relocation.
-/// based on X86MachObjectWriter::recordScatteredRelocation
-/// and ARMMachObjectWriter::recordScatteredRelocation
-bool PPCMachObjectWriter::recordScatteredRelocation(
- MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
- unsigned Log2Size, uint64_t &FixedValue) {
- // caller already computes these, can we just pass and reuse?
- const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
- const MCFixupKind FK = Fixup.getKind();
- const unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
- const unsigned Type = getRelocType(Target, FK, IsPCRel);
-
- // Is this a local or SECTDIFF relocation entry?
- // SECTDIFF relocation entries have symbol subtractions,
- // and require two entries, the first for the add-symbol value,
- // the second for the subtract-symbol value.
-
- // See <reloc.h>.
- const MCSymbol *A = &Target.getSymA()->getSymbol();
-
- if (!A->getFragment())
- report_fatal_error("symbol '" + A->getName() +
- "' can not be undefined in a subtraction expression");
-
- uint32_t Value = Writer->getSymbolAddress(*A, Layout);
- uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
- FixedValue += SecAddr;
- uint32_t Value2 = 0;
-
- if (const MCSymbolRefExpr *B = Target.getSymB()) {
- const MCSymbol *SB = &B->getSymbol();
-
- if (!SB->getFragment())
- report_fatal_error("symbol '" + SB->getName() +
- "' can not be undefined in a subtraction expression");
-
- // FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h
- Value2 = Writer->getSymbolAddress(*SB, Layout);
- FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
- }
- // FIXME: does FixedValue get used??
-
- // Relocations are written out in reverse order, so the PAIR comes first.
- if (Type == MachO::PPC_RELOC_SECTDIFF ||
- Type == MachO::PPC_RELOC_HI16_SECTDIFF ||
- Type == MachO::PPC_RELOC_LO16_SECTDIFF ||
- Type == MachO::PPC_RELOC_HA16_SECTDIFF ||
- Type == MachO::PPC_RELOC_LO14_SECTDIFF ||
- Type == MachO::PPC_RELOC_LOCAL_SECTDIFF) {
- // X86 had this piece, but ARM does not
- // If the offset is too large to fit in a scattered relocation,
- // we're hosed. It's an unfortunate limitation of the MachO format.
- if (FixupOffset > 0xffffff) {
- char Buffer[32];
- format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
- Asm.getContext().reportError(Fixup.getLoc(),
- Twine("Section too large, can't encode "
- "r_address (") +
- Buffer + ") into 24 bits of scattered "
- "relocation entry.");
- return false;
- }
-
- // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()?
- // see PPCMCExpr::evaluateAsRelocatableImpl()
- uint32_t other_half = 0;
- switch (Type) {
- case MachO::PPC_RELOC_LO16_SECTDIFF:
- other_half = (FixedValue >> 16) & 0xffff;
- // applyFixupOffset longer extracts the high part because it now assumes
- // this was already done.
- // It looks like this is not true for the FixedValue needed with Mach-O
- // relocs.
- // So we need to adjust FixedValue again here.
- FixedValue &= 0xffff;
- break;
- case MachO::PPC_RELOC_HA16_SECTDIFF:
- other_half = FixedValue & 0xffff;
- FixedValue =
- ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff;
- break;
- case MachO::PPC_RELOC_HI16_SECTDIFF:
- other_half = FixedValue & 0xffff;
- FixedValue = (FixedValue >> 16) & 0xffff;
- break;
- default:
- llvm_unreachable("Invalid PPC scattered relocation type.");
- break;
- }
-
- MachO::any_relocation_info MRE;
- makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
- Log2Size, IsPCRel, Value2);
- Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
- } else {
- // If the offset is more than 24-bits, it won't fit in a scattered
- // relocation offset field, so we fall back to using a non-scattered
- // relocation. This is a bit risky, as if the offset reaches out of
- // the block and the linker is doing scattered loading on this
- // symbol, things can go badly.
- //
- // Required for 'as' compatibility.
- if (FixupOffset > 0xffffff)
- return false;
- }
- MachO::any_relocation_info MRE;
- makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
- Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
- return true;
-}
-
-// see PPCELFObjectWriter for a general outline of cases
-void PPCMachObjectWriter::RecordPPCRelocation(
- MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue) {
- const MCFixupKind FK = Fixup.getKind(); // unsigned
- const unsigned Log2Size = getFixupKindLog2Size(FK);
- const bool IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
- const unsigned RelocType = getRelocType(Target, FK, IsPCRel);
-
- // If this is a difference or a defined symbol plus an offset, then we need a
- // scattered relocation entry. Differences always require scattered
- // relocations.
- if (Target.getSymB() &&
- // Q: are branch targets ever scattered?
- RelocType != MachO::PPC_RELOC_BR24 &&
- RelocType != MachO::PPC_RELOC_BR14) {
- recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
- Log2Size, FixedValue);
- return;
- }
-
- // this doesn't seem right for RIT_PPC_BR24
- // Get the symbol data, if any.
- const MCSymbol *A = nullptr;
- if (Target.getSymA())
- A = &Target.getSymA()->getSymbol();
-
- // See <reloc.h>.
- const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
- unsigned Index = 0;
- unsigned Type = RelocType;
-
- const MCSymbol *RelSymbol = nullptr;
- if (Target.isAbsolute()) { // constant
- // SymbolNum of 0 indicates the absolute section.
- //
- // FIXME: Currently, these are never generated (see code below). I cannot
- // find a case where they are actually emitted.
- report_fatal_error("FIXME: relocations to absolute targets "
- "not yet implemented");
- // the above line stolen from ARM, not sure
- } else {
- // Resolve constant variables.
- if (A->isVariable()) {
- int64_t Res;
- if (A->getVariableValue()->evaluateAsAbsolute(
- Res, Layout, Writer->getSectionAddressMap())) {
- FixedValue = Res;
- return;
- }
- }
-
- // Check whether we need an external or internal relocation.
- if (Writer->doesSymbolRequireExternRelocation(*A)) {
- RelSymbol = A;
- // For external relocations, make sure to offset the fixup value to
- // compensate for the addend of the symbol address, if it was
- // undefined. This occurs with weak definitions, for example.
- if (!A->isUndefined())
- FixedValue -= Layout.getSymbolOffset(*A);
- } else {
- // The index is the section ordinal (1-based).
- const MCSection &Sec = A->getSection();
- Index = Sec.getOrdinal() + 1;
- FixedValue += Writer->getSectionAddress(&Sec);
- }
- if (IsPCRel)
- FixedValue -= Writer->getSectionAddress(Fragment->getParent());
- }
-
- // struct relocation_info (8 bytes)
- MachO::any_relocation_info MRE;
- makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, false, Type);
- Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
-}
-
-std::unique_ptr<MCObjectTargetWriter>
-llvm::createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
- uint32_t CPUSubtype) {
- return std::make_unique<PPCMachObjectWriter>(Is64Bit, CPUType, CPUSubtype);
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index 7fdbb8990b55..d672d54772e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -7,16 +7,26 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCXCOFFObjectWriter.h"
using namespace llvm;
namespace {
class PPCXCOFFObjectWriter : public MCXCOFFObjectTargetWriter {
+ static constexpr uint8_t SignBitMask = 0x80;
public:
PPCXCOFFObjectWriter(bool Is64Bit);
+
+ std::pair<uint8_t, uint8_t>
+ getRelocTypeAndSignSize(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel) const override;
};
} // end anonymous namespace
@@ -27,3 +37,40 @@ std::unique_ptr<MCObjectTargetWriter>
llvm::createPPCXCOFFObjectWriter(bool Is64Bit) {
return std::make_unique<PPCXCOFFObjectWriter>(Is64Bit);
}
+
+std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
+ const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const {
+ const MCSymbolRefExpr::VariantKind Modifier =
+ Target.isAbsolute() ? MCSymbolRefExpr::VK_None
+ : Target.getSymA()->getKind();
+ // People from AIX OS team says AIX link editor does not care about
+ // the sign bit in the relocation entry "most" of the time.
+ // The system assembler seems to set the sign bit on relocation entry
+ // based on similar property of IsPCRel. So we will do the same here.
+ // TODO: More investigation on how assembler decides to set the sign
+ // bit, and we might want to match that.
+ const uint8_t EncodedSignednessIndicator = IsPCRel ? SignBitMask : 0u;
+
+ // The magic number we use in SignAndSize has a strong relationship with
+ // the corresponding MCFixupKind. In most cases, it's the MCFixupKind
+ // number - 1, because SignAndSize encodes the bit length being
+ // relocated minus 1.
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ report_fatal_error("Unimplemented fixup kind.");
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ report_fatal_error("Unsupported modifier for half16 fixup.");
+ case MCSymbolRefExpr::VK_None:
+ return {XCOFF::RelocationType::R_TOC, EncodedSignednessIndicator | 15};
+ }
+ break;
+ case PPC::fixup_ppc_br24:
+ // Branches are 4 byte aligned, so the 24 bits we encode in
+ // the instruction actually represents a 26 bit offset.
+ return {XCOFF::RelocationType::R_RBR, EncodedSignednessIndicator | 25};
+ case FK_Data_4:
+ return {XCOFF::RelocationType::R_POS, EncodedSignednessIndicator | 31};
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
index a83509f0e687..7e0aa2c6061d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.h
@@ -51,10 +51,9 @@ namespace llvm {
FunctionPass *createPPCExpandISELPass();
FunctionPass *createPPCPreEmitPeepholePass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP, bool IsDarwin);
+ AsmPrinter &AP);
bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
- MCOperand &OutMO, AsmPrinter &AP,
- bool IsDarwin);
+ MCOperand &OutMO, AsmPrinter &AP);
void initializePPCCTRLoopsPass(PassRegistry&);
#ifndef NDEBUG
@@ -99,33 +98,33 @@ namespace llvm {
/// the function's picbase, e.g. lo16(symbol-picbase).
MO_PIC_FLAG = 2,
- /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
- /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
- MO_NLP_FLAG = 4,
+ /// MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to
+ /// the current instruction address(pc), e.g., var@pcrel. Fixup is VK_PCREL.
+ MO_PCREL_FLAG = 4,
- /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
- /// symbol with hidden visibility. This causes a different kind of
- /// non-lazy-pointer to be generated.
- MO_NLP_HIDDEN_FLAG = 8,
+ /// MO_GOT_FLAG - If this bit is set the symbol reference is to be computed
+ /// via the GOT. For example when combined with the MO_PCREL_FLAG it should
+ /// produce the relocation @got@pcrel. Fixup is VK_PPC_GOT_PCREL.
+ MO_GOT_FLAG = 8,
/// The next are not flags but distinct values.
- MO_ACCESS_MASK = 0xf0,
+ MO_ACCESS_MASK = 0xf00,
/// MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
- MO_LO = 1 << 4,
- MO_HA = 2 << 4,
+ MO_LO = 1 << 8,
+ MO_HA = 2 << 8,
- MO_TPREL_LO = 4 << 4,
- MO_TPREL_HA = 3 << 4,
+ MO_TPREL_LO = 4 << 8,
+ MO_TPREL_HA = 3 << 8,
/// These values identify relocations on immediates folded
/// into memory operations.
- MO_DTPREL_LO = 5 << 4,
- MO_TLSLD_LO = 6 << 4,
- MO_TOC_LO = 7 << 4,
+ MO_DTPREL_LO = 5 << 8,
+ MO_TLSLD_LO = 6 << 8,
+ MO_TOC_LO = 7 << 8,
// Symbol for VK_PPC_TLS fixup attached to an ADD instruction
- MO_TLS = 8 << 4
+ MO_TLS = 8 << 8
};
} // end namespace PPCII
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
index b805b9a93443..9ad78bf67fe6 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
@@ -51,6 +51,7 @@ def DirectivePwr6x
def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">;
def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">;
def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">;
+def DirectivePwr10: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR10", "">;
def DirectivePwrFuture
: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">;
@@ -166,6 +167,16 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
"Enable Hardware Transactional Memory instructions">;
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
"Implement mftb using the mfspr instruction">;
+def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
+ "Target supports instruction fusion">;
+def FeatureAddiLoadFusion : SubtargetFeature<"fuse-addi-load",
+ "HasAddiLoadFusion", "true",
+ "Power8 Addi-Load fusion",
+ [FeatureFusion]>;
+def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load",
+ "HasAddisLoadFusion", "true",
+ "Power8 Addis-Load fusion",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
@@ -194,7 +205,11 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
def FeatureISA3_0 : SubtargetFeature<"isa-v30-instructions", "IsISA3_0",
"true",
- "Enable instructions added in ISA 3.0.">;
+ "Enable instructions in ISA 3.0.">;
+def FeatureISA3_1 : SubtargetFeature<"isa-v31-instructions", "IsISA3_1",
+ "true",
+ "Enable instructions in ISA 3.1.",
+ [FeatureISA3_0]>;
def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true",
"Enable POWER9 Altivec instructions",
[FeatureISA3_0, FeatureP8Altivec]>;
@@ -202,6 +217,10 @@ def FeatureP9Vector : SubtargetFeature<"power9-vector", "HasP9Vector", "true",
"Enable POWER9 vector instructions",
[FeatureISA3_0, FeatureP8Vector,
FeatureP9Altivec]>;
+def FeatureP10Vector : SubtargetFeature<"power10-vector", "HasP10Vector",
+ "true",
+ "Enable POWER10 vector instructions",
+ [FeatureISA3_1, FeatureP9Vector]>;
// A separate feature for this even though it is equivalent to P9Vector
// because this is a feature of the implementation rather than the architecture
// and may go away with future CPU's.
@@ -209,6 +228,21 @@ def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
"VectorsUseTwoUnits",
"true",
"Vectors use two units">;
+def FeaturePrefixInstrs : SubtargetFeature<"prefix-instrs", "HasPrefixInstrs",
+ "true",
+ "Enable prefixed instructions",
+ [FeatureISA3_0, FeatureP8Vector,
+ FeatureP9Altivec]>;
+def FeaturePCRelativeMemops :
+ SubtargetFeature<"pcrelative-memops", "HasPCRelativeMemops", "true",
+ "Enable PC relative Memory Ops",
+ [FeatureISA3_0]>;
+
+def FeaturePredictableSelectIsExpensive :
+ SubtargetFeature<"predictable-select-expensive",
+ "PredictableSelectIsExpensive",
+ "true",
+ "Prefer likely predicted branches over selects">;
// Since new processors generally contain a superset of features of those that
// came before them, the idea is to make implementations of new processors
@@ -225,7 +259,7 @@ def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
// !listconcat(FutureProcessorInheritableFeatures,
// FutureProcessorSpecificFeatures)
-// Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
+// Makes it explicit and obvious what is new in FutureProcessor vs. Power8 as
// well as providing a single point of definition if the feature set will be
// used elsewhere.
def ProcessorFeatures {
@@ -262,25 +296,34 @@ def ProcessorFeatures {
!listconcat(P7InheritableFeatures, P7SpecificFeatures);
// Power8
- list<SubtargetFeature> P8AdditionalFeatures = [DirectivePwr8,
- FeatureP8Altivec,
- FeatureP8Vector,
- FeatureP8Crypto,
- FeatureHTM,
- FeatureDirectMove,
- FeatureICBT,
- FeaturePartwordAtomic];
- list<SubtargetFeature> P8SpecificFeatures = [];
+ list<SubtargetFeature> P8AdditionalFeatures =
+ [DirectivePwr8,
+ FeatureP8Altivec,
+ FeatureP8Vector,
+ FeatureP8Crypto,
+ FeatureHTM,
+ FeatureDirectMove,
+ FeatureICBT,
+ FeaturePartwordAtomic,
+ FeaturePredictableSelectIsExpensive
+ ];
+
+ list<SubtargetFeature> P8SpecificFeatures = [FeatureAddiLoadFusion,
+ FeatureAddisLoadFusion];
list<SubtargetFeature> P8InheritableFeatures =
!listconcat(P7InheritableFeatures, P8AdditionalFeatures);
list<SubtargetFeature> P8Features =
!listconcat(P8InheritableFeatures, P8SpecificFeatures);
// Power9
- list<SubtargetFeature> P9AdditionalFeatures = [DirectivePwr9,
- FeatureP9Altivec,
- FeatureP9Vector,
- FeatureISA3_0];
+ list<SubtargetFeature> P9AdditionalFeatures =
+ [DirectivePwr9,
+ FeatureP9Altivec,
+ FeatureP9Vector,
+ FeatureISA3_0,
+ FeaturePredictableSelectIsExpensive
+ ];
+
// Some features are unique to Power9 and there is no reason to assume
// they will be part of any future CPUs. One example is the narrower
// dispatch for vector operations than scalar ones. For the time being,
@@ -294,13 +337,25 @@ def ProcessorFeatures {
list<SubtargetFeature> P9Features =
!listconcat(P9InheritableFeatures, P9SpecificFeatures);
+ // Power10
+ // For P10 CPU we assume that all of the existing features from Power9
+ // still exist with the exception of those we know are Power9 specific.
+ list<SubtargetFeature> P10AdditionalFeatures =
+ [DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs,
+ FeaturePCRelativeMemops, FeatureP10Vector];
+ list<SubtargetFeature> P10SpecificFeatures = [];
+ list<SubtargetFeature> P10InheritableFeatures =
+ !listconcat(P9InheritableFeatures, P10AdditionalFeatures);
+ list<SubtargetFeature> P10Features =
+ !listconcat(P10InheritableFeatures, P10SpecificFeatures);
+
// Future
- // For future CPU we assume that all of the existing features from Power 9
- // still exist with the exception of those we know are Power 9 specific.
+ // For future CPU we assume that all of the existing features from Power10
+ // still exist with the exception of those we know are Power10 specific.
list<SubtargetFeature> FutureAdditionalFeatures = [];
list<SubtargetFeature> FutureSpecificFeatures = [];
list<SubtargetFeature> FutureInheritableFeatures =
- !listconcat(P9InheritableFeatures, FutureAdditionalFeatures);
+ !listconcat(P10InheritableFeatures, FutureAdditionalFeatures);
list<SubtargetFeature> FutureFeatures =
!listconcat(FutureInheritableFeatures, FutureSpecificFeatures);
}
@@ -505,6 +560,8 @@ def : ProcessorModel<"pwr6x", G5Model,
def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
+// No scheduler model yet.
+def : ProcessorModel<"pwr10", NoSchedModel, ProcessorFeatures.P10Features>;
// No scheduler model for future CPU.
def : ProcessorModel<"future", NoSchedModel,
ProcessorFeatures.FutureFeatures>;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 20c5ac7b378a..bf5fe741bac8 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -83,8 +83,6 @@ protected:
const PPCSubtarget *Subtarget = nullptr;
StackMaps SM;
- virtual MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO);
-
public:
explicit PPCAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
@@ -100,7 +98,7 @@ public:
return AsmPrinter::doInitialization(M);
}
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
/// This function is for PrintAsmOperand and PrintAsmMemoryOperand,
/// invoked by EmitMSInlineAsmStr and EmitGCCInlineAsmStr only.
@@ -113,7 +111,7 @@ public:
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &O) override;
- void EmitEndOfAsmFile(Module &M) override;
+ void emitEndOfAsmFile(Module &M) override;
void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
@@ -137,37 +135,41 @@ public:
return "Linux PPC Assembly Printer";
}
- bool doFinalization(Module &M) override;
- void EmitStartOfAsmFile(Module &M) override;
+ void emitStartOfAsmFile(Module &M) override;
+ void emitEndOfAsmFile(Module &) override;
- void EmitFunctionEntryLabel() override;
+ void emitFunctionEntryLabel() override;
- void EmitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitFunctionBodyStart() override;
+ void emitFunctionBodyEnd() override;
+ void emitInstruction(const MachineInstr *MI) override;
};
class PPCAIXAsmPrinter : public PPCAsmPrinter {
private:
static void ValidateGV(const GlobalVariable *GV);
-protected:
- MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO) override;
public:
PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
- : PPCAsmPrinter(TM, std::move(Streamer)) {}
+ : PPCAsmPrinter(TM, std::move(Streamer)) {
+ if (MAI->isLittleEndian())
+ report_fatal_error(
+ "cannot create AIX PPC Assembly Printer for a little-endian target");
+ }
StringRef getPassName() const override { return "AIX PPC Assembly Printer"; }
+ bool doInitialization(Module &M) override;
+
void SetupMachineFunction(MachineFunction &MF) override;
- const MCExpr *lowerConstant(const Constant *CV) override;
+ void emitGlobalVariable(const GlobalVariable *GV) override;
- void EmitGlobalVariable(const GlobalVariable *GV) override;
+ void emitFunctionDescriptor() override;
- void EmitFunctionDescriptor() override;
+ void emitEndOfAsmFile(Module &) override;
- void EmitEndOfAsmFile(Module &) override;
+ void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const override;
};
} // end anonymous namespace
@@ -176,23 +178,7 @@ void PPCAsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
raw_ostream &O) {
// Computing the address of a global symbol, not calling it.
const GlobalValue *GV = MO.getGlobal();
- MCSymbol *SymToPrint;
-
- // External or weakly linked global variables need non-lazily-resolved stubs
- if (Subtarget->hasLazyResolverStub(GV)) {
- SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
- SymToPrint);
- if (!StubSym.getPointer())
- StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
- !GV->hasInternalLinkage());
- } else {
- SymToPrint = getSymbol(GV);
- }
-
- SymToPrint->print(O, MAI);
-
+ getSymbol(GV)->print(O, MAI);
printOffset(MO.getOffset(), O);
}
@@ -208,9 +194,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
// Linux assembler (Others?) does not take register mnemonics.
// FIXME - What about special registers used in mfspr/mtspr?
- if (!Subtarget->isDarwin())
- RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
- O << RegName;
+ O << PPCRegisterInfo::stripRegisterPrefix(RegName);
return;
}
case MachineOperand::MO_Immediate:
@@ -304,14 +288,9 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
O << ")";
return false;
case 'y': // A memory reference for an X-form instruction
- {
- const char *RegName = "r0";
- if (!Subtarget->isDarwin())
- RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
- O << RegName << ", ";
- printOperand(MI, OpNo, O);
- return false;
- }
+ O << "0, ";
+ printOperand(MI, OpNo, O);
+ return false;
case 'U': // Print 'u' for update form.
case 'X': // Print 'x' for indexed form.
// FIXME: Currently for PowerPC memory operands are always loaded
@@ -342,7 +321,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(const MCSymbol *Sym) {
return TOCEntry;
}
-void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
+void PPCAsmPrinter::emitEndOfAsmFile(Module &M) {
emitStackMaps(SM);
}
@@ -351,7 +330,7 @@ void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
auto &Ctx = OutStreamer->getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(MILabel);
+ OutStreamer->emitLabel(MILabel);
SM.recordStackMap(*MILabel, MI);
assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
@@ -380,7 +359,7 @@ void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
auto &Ctx = OutStreamer->getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(MILabel);
+ OutStreamer->emitLabel(MILabel);
SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers Opers(&MI);
@@ -519,16 +498,17 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
/// Map a machine operand for a TOC pseudo-machine instruction to its
/// corresponding MCSymbol.
-MCSymbol *PPCAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
+static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
+ AsmPrinter &AP) {
switch (MO.getType()) {
case MachineOperand::MO_GlobalAddress:
- return getSymbol(MO.getGlobal());
+ return AP.getSymbol(MO.getGlobal());
case MachineOperand::MO_ConstantPoolIndex:
- return GetCPISymbol(MO.getIndex());
+ return AP.GetCPISymbol(MO.getIndex());
case MachineOperand::MO_JumpTableIndex:
- return GetJTISymbol(MO.getIndex());
+ return AP.GetJTISymbol(MO.getIndex());
case MachineOperand::MO_BlockAddress:
- return GetBlockAddressSymbol(MO.getBlockAddress());
+ return AP.GetBlockAddressSymbol(MO.getBlockAddress());
default:
llvm_unreachable("Unexpected operand type to get symbol.");
}
@@ -537,9 +517,8 @@ MCSymbol *PPCAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
-void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
- const bool IsDarwin = TM.getTargetTriple().isOSDarwin();
const bool IsPPC64 = Subtarget->isPPC64();
const bool IsAIX = Subtarget->isAIXABI();
const Module *M = MF->getFunction().getParent();
@@ -617,7 +596,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
// Emit the label.
- OutStreamer->EmitLabel(PICBase);
+ OutStreamer->emitLabel(PICBase);
return;
}
case PPC::UpdateGBR: {
@@ -628,7 +607,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha
// addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l
// Get the offset from the GOT Base Register to the GOT
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
if (Subtarget->isSecurePlt() && isPositionIndependent() ) {
unsigned PICR = TmpInst.getOperand(0).getReg();
MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol(
@@ -640,19 +619,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *DeltaExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(BaseSymbol, OutContext), PB, OutContext);
- const MCExpr *DeltaHi = PPCMCExpr::createHa(DeltaExpr, false, OutContext);
+ const MCExpr *DeltaHi = PPCMCExpr::createHa(DeltaExpr, OutContext);
EmitToStreamer(
*OutStreamer,
MCInstBuilder(PPC::ADDIS).addReg(PICR).addReg(PICR).addExpr(DeltaHi));
- const MCExpr *DeltaLo = PPCMCExpr::createLo(DeltaExpr, false, OutContext);
+ const MCExpr *DeltaLo = PPCMCExpr::createLo(DeltaExpr, OutContext);
EmitToStreamer(
*OutStreamer,
MCInstBuilder(PPC::ADDI).addReg(PICR).addReg(PICR).addExpr(DeltaLo));
return;
} else {
MCSymbol *PICOffset =
- MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol();
+ MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol(*MF);
TmpInst.setOpcode(PPC::LWZ);
const MCExpr *Exp =
MCSymbolRefExpr::create(PICOffset, MCSymbolRefExpr::VK_None, OutContext);
@@ -679,10 +658,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
case PPC::LWZtoc: {
- assert(!IsDarwin && "TOC is an ELF/XCOFF construct.");
-
// Transform %rN = LWZtoc @op1, %r2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to LWZ.
TmpInst.setOpcode(PPC::LWZ);
@@ -692,7 +669,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for LWZtoc.");
// Map the operand to its corresponding MCSymbol.
- const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+ const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
// Create a reference to the GOT entry for the symbol. The GOT entry will be
// synthesized later.
@@ -737,10 +714,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDtocCPT:
case PPC::LDtocBA:
case PPC::LDtoc: {
- assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
-
// Transform %x3 = LDtoc @min1, %x2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to LD.
TmpInst.setOpcode(PPC::LD);
@@ -753,7 +728,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// global address operand to be a reference to the TOC entry we will
// synthesize later.
MCSymbol *TOCEntry =
- lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO));
+ lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this));
const MCSymbolRefExpr::VariantKind VK =
IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC;
@@ -769,7 +744,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
" AIX.");
// Transform %rd = ADDIStocHA %rA, @sym(%r2)
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to ADDIS.
TmpInst.setOpcode(PPC::ADDIS);
@@ -779,7 +754,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for ADDIStocHA.");
// Map the machine operand to its corresponding MCSymbol.
- MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
// Always use TOC on AIX. Map the global address operand to be a reference
// to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -799,7 +774,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
" AIX.");
// Transform %rd = LWZtocL @sym, %rs.
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to lwz.
TmpInst.setOpcode(PPC::LWZ);
@@ -809,7 +784,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for LWZtocL.");
// Map the machine operand to its corresponding MCSymbol.
- MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
// Always use TOC on AIX. Map the global address operand to be a reference
// to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -824,10 +799,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case PPC::ADDIStocHA8: {
- assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
-
// Transform %xd = ADDIStocHA8 %x2, @sym
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to ADDIS8. If the global address is the address of
// an external symbol, is a jump table address, is a block address, or is a
@@ -839,7 +812,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
"Invalid operand for ADDIStocHA8!");
- const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
const bool GlobalToc =
MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
@@ -864,10 +837,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case PPC::LDtocL: {
- assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
-
// Transform %xd = LDtocL @sym, %xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to LD. If the global address is the address of
// an external symbol, is a jump table address, is a block address, or is
@@ -885,7 +856,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"LDtocL used on symbol that could be accessed directly is "
"invalid. Must match ADDIStocHA8."));
- const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
@@ -900,7 +871,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::ADDItocL: {
// Transform %xd = ADDItocL %xs, @sym
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to ADDI8. If the global address is external, then
// generate a TOC entry and reference that. Otherwise, reference the
@@ -915,7 +886,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Interposable definitions must use indirect access."));
const MCExpr *Exp =
- MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO),
+ MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this),
MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext);
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
@@ -940,7 +911,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDgotTprelL:
case PPC::LDgotTprelL32: {
// Transform %xd = LDgotTprelL @sym, %xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to LD.
TmpInst.setOpcode(IsPPC64 ? PPC::LD : PPC::LWZ);
@@ -969,9 +940,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCBinaryExpr::createSub(MCSymbolRefExpr::create(GOTSymbol, OutContext),
MCSymbolRefExpr::create(GOTRef, OutContext),
OutContext);
- OutStreamer->EmitLabel(GOTRef);
- OutStreamer->EmitValue(OffsExpr, 4);
- OutStreamer->EmitLabel(NextInstr);
+ OutStreamer->emitLabel(GOTRef);
+ OutStreamer->emitValue(OffsExpr, 4);
+ OutStreamer->emitLabel(NextInstr);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR)
.addReg(MI->getOperand(0).getReg()));
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LWZ)
@@ -1170,10 +1141,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// suite shows a handful of test cases that fail this check for
// Darwin. Those need to be investigated before this sanity test
// can be enabled for those subtargets.
- if (!IsDarwin) {
- unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
- const MachineOperand &MO = MI->getOperand(OpNum);
- if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
+ unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.isGlobal()) {
+ const DataLayout &DL = MO.getGlobal()->getParent()->getDataLayout();
+ if (MO.getGlobal()->getPointerAlignment(DL) < 4)
llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
}
// Now process the instruction normally.
@@ -1181,17 +1153,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
EmitToStreamer(*OutStreamer, TmpInst);
}
-void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void PPCLinuxAsmPrinter::emitInstruction(const MachineInstr *MI) {
if (!Subtarget->isPPC64())
- return PPCAsmPrinter::EmitInstruction(MI);
+ return PPCAsmPrinter::emitInstruction(MI);
switch (MI->getOpcode()) {
default:
- return PPCAsmPrinter::EmitInstruction(MI);
+ return PPCAsmPrinter::emitInstruction(MI);
case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
// .begin:
// b .end # lis 0, FuncId[16..32]
@@ -1206,7 +1178,7 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// of instructions change.
MCSymbol *BeginOfSled = OutContext.createTempSymbol();
MCSymbol *EndOfSled = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(BeginOfSled);
+ OutStreamer->emitLabel(BeginOfSled);
EmitToStreamer(*OutStreamer,
MCInstBuilder(PPC::B).addExpr(
MCSymbolRefExpr::create(EndOfSled, OutContext)));
@@ -1221,8 +1193,8 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutContext.getOrCreateSymbol("__xray_FunctionEntry"),
OutContext)));
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
- OutStreamer->EmitLabel(EndOfSled);
- recordSled(BeginOfSled, *MI, SledKind::FUNCTION_ENTER);
+ OutStreamer->emitLabel(EndOfSled);
+ recordSled(BeginOfSled, *MI, SledKind::FUNCTION_ENTER, 2);
break;
}
case TargetOpcode::PATCHABLE_RET: {
@@ -1232,7 +1204,7 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
for (const auto &MO :
make_range(std::next(MI->operands_begin()), MI->operands_end())) {
MCOperand MCOp;
- if (LowerPPCMachineOperandToMCOperand(MO, MCOp, *this, false))
+ if (LowerPPCMachineOperandToMCOperand(MO, MCOp, *this))
RetInst.addOperand(MCOp);
}
@@ -1292,9 +1264,9 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
//
// Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
// of instructions change.
- OutStreamer->EmitCodeAlignment(8);
+ OutStreamer->emitCodeAlignment(8);
MCSymbol *BeginOfSled = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(BeginOfSled);
+ OutStreamer->emitLabel(BeginOfSled);
EmitToStreamer(*OutStreamer, RetInst);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
EmitToStreamer(
@@ -1309,8 +1281,8 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
EmitToStreamer(*OutStreamer, RetInst);
if (IsConditional)
- OutStreamer->EmitLabel(FallthroughLabel);
- recordSled(BeginOfSled, *MI, SledKind::FUNCTION_EXIT);
+ OutStreamer->emitLabel(FallthroughLabel);
+ recordSled(BeginOfSled, *MI, SledKind::FUNCTION_EXIT, 2);
break;
}
case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
@@ -1323,7 +1295,7 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
-void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
+void PPCLinuxAsmPrinter::emitStartOfAsmFile(Module &M) {
if (static_cast<const PPCTargetMachine &>(TM).isELFv2ABI()) {
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
@@ -1334,10 +1306,10 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (static_cast<const PPCTargetMachine &>(TM).isPPC64() ||
!isPositionIndependent())
- return AsmPrinter::EmitStartOfAsmFile(M);
+ return AsmPrinter::emitStartOfAsmFile(M);
if (M.getPICLevel() == PICLevel::SmallPIC)
- return AsmPrinter::EmitStartOfAsmFile(M);
+ return AsmPrinter::emitStartOfAsmFile(M);
OutStreamer->SwitchSection(OutContext.getELFSection(
".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC));
@@ -1345,7 +1317,7 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
MCSymbol *TOCSym = OutContext.getOrCreateSymbol(Twine(".LTOC"));
MCSymbol *CurrentPos = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(CurrentPos);
+ OutStreamer->emitLabel(CurrentPos);
// The GOT pointer points to the middle of the GOT, in order to reference the
// entire 64kB range. 0x8000 is the midpoint.
@@ -1354,24 +1326,24 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
MCConstantExpr::create(0x8000, OutContext),
OutContext);
- OutStreamer->EmitAssignment(TOCSym, tocExpr);
+ OutStreamer->emitAssignment(TOCSym, tocExpr);
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
}
-void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
+void PPCLinuxAsmPrinter::emitFunctionEntryLabel() {
// linux/ppc32 - Normal entry label.
if (!Subtarget->isPPC64() &&
(!isPositionIndependent() ||
MF->getFunction().getParent()->getPICLevel() == PICLevel::SmallPIC))
- return AsmPrinter::EmitFunctionEntryLabel();
+ return AsmPrinter::emitFunctionEntryLabel();
if (!Subtarget->isPPC64()) {
const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
if (PPCFI->usesPICBase() && !Subtarget->isSecurePlt()) {
- MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
+ MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol(*MF);
MCSymbol *PICBase = MF->getPICBaseSymbol();
- OutStreamer->EmitLabel(RelocSymbol);
+ OutStreamer->emitLabel(RelocSymbol);
const MCExpr *OffsExpr =
MCBinaryExpr::createSub(
@@ -1379,11 +1351,11 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
OutContext),
MCSymbolRefExpr::create(PICBase, OutContext),
OutContext);
- OutStreamer->EmitValue(OffsExpr, 4);
- OutStreamer->EmitLabel(CurrentFnSym);
+ OutStreamer->emitValue(OffsExpr, 4);
+ OutStreamer->emitLabel(CurrentFnSym);
return;
} else
- return AsmPrinter::EmitFunctionEntryLabel();
+ return AsmPrinter::emitFunctionEntryLabel();
}
// ELFv2 ABI - Normal entry label.
@@ -1397,17 +1369,17 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
- MCSymbol *GlobalEPSymbol = PPCFI->getGlobalEPSymbol();
+ MCSymbol *GlobalEPSymbol = PPCFI->getGlobalEPSymbol(*MF);
const MCExpr *TOCDeltaExpr =
MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
MCSymbolRefExpr::create(GlobalEPSymbol,
OutContext),
OutContext);
- OutStreamer->EmitLabel(PPCFI->getTOCOffsetSymbol());
- OutStreamer->EmitValue(TOCDeltaExpr, 8);
+ OutStreamer->emitLabel(PPCFI->getTOCOffsetSymbol(*MF));
+ OutStreamer->emitValue(TOCDeltaExpr, 8);
}
- return AsmPrinter::EmitFunctionEntryLabel();
+ return AsmPrinter::emitFunctionEntryLabel();
}
// Emit an official procedure descriptor.
@@ -1415,61 +1387,56 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
MCSectionELF *Section = OutStreamer->getContext().getELFSection(
".opd", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
OutStreamer->SwitchSection(Section);
- OutStreamer->EmitLabel(CurrentFnSym);
- OutStreamer->EmitValueToAlignment(8);
+ OutStreamer->emitLabel(CurrentFnSym);
+ OutStreamer->emitValueToAlignment(8);
MCSymbol *Symbol1 = CurrentFnSymForSize;
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
- OutStreamer->EmitValue(MCSymbolRefExpr::create(Symbol1, OutContext),
+ OutStreamer->emitValue(MCSymbolRefExpr::create(Symbol1, OutContext),
8 /*size*/);
MCSymbol *Symbol2 = OutContext.getOrCreateSymbol(StringRef(".TOC."));
// Generates a R_PPC64_TOC relocation for TOC base insertion.
- OutStreamer->EmitValue(
+ OutStreamer->emitValue(
MCSymbolRefExpr::create(Symbol2, MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
8/*size*/);
// Emit a null environment pointer.
- OutStreamer->EmitIntValue(0, 8 /* size */);
+ OutStreamer->emitIntValue(0, 8 /* size */);
OutStreamer->SwitchSection(Current.first, Current.second);
}
-bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+void PPCLinuxAsmPrinter::emitEndOfAsmFile(Module &M) {
const DataLayout &DL = getDataLayout();
bool isPPC64 = DL.getPointerSizeInBits() == 64;
- PPCTargetStreamer &TS =
- static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
if (!TOC.empty()) {
- MCSectionELF *Section;
-
- if (isPPC64)
- Section = OutStreamer->getContext().getELFSection(
- ".toc", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
- else
- Section = OutStreamer->getContext().getELFSection(
- ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ const char *Name = isPPC64 ? ".toc" : ".got2";
+ MCSectionELF *Section = OutContext.getELFSection(
+ Name, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
OutStreamer->SwitchSection(Section);
+ if (!isPPC64)
+ OutStreamer->emitValueToAlignment(4);
for (const auto &TOCMapPair : TOC) {
const MCSymbol *const TOCEntryTarget = TOCMapPair.first;
MCSymbol *const TOCEntryLabel = TOCMapPair.second;
- OutStreamer->EmitLabel(TOCEntryLabel);
- if (isPPC64) {
- TS.emitTCEntry(*TOCEntryTarget);
- } else {
- OutStreamer->EmitValueToAlignment(4);
- OutStreamer->EmitSymbolValue(TOCEntryTarget, 4);
- }
+ OutStreamer->emitLabel(TOCEntryLabel);
+ if (isPPC64 && TS != nullptr)
+ TS->emitTCEntry(*TOCEntryTarget);
+ else
+ OutStreamer->emitSymbolValue(TOCEntryTarget, 4);
}
}
- return AsmPrinter::doFinalization(M);
+ PPCAsmPrinter::emitEndOfAsmFile(M);
}
/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2.
-void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
+void PPCLinuxAsmPrinter::emitFunctionBodyStart() {
// In the ELFv2 ABI, in functions that use the TOC register, we need to
// provide two entry points. The ABI guarantees that when calling the
// local entry point, r2 is set up by the caller to contain the TOC base
@@ -1501,16 +1468,23 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
//
// This ensures we have r2 set up correctly while executing the function
// body, no matter which entry point is called.
- if (Subtarget->isELFv2ABI()
- // Only do all that if the function uses r2 in the first place.
- && !MF->getRegInfo().use_empty(PPC::X2)) {
+ const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+ const bool UsesX2OrR2 = !MF->getRegInfo().use_empty(PPC::X2) ||
+ !MF->getRegInfo().use_empty(PPC::R2);
+ const bool PCrelGEPRequired = Subtarget->isUsingPCRelativeCalls() &&
+ UsesX2OrR2 && PPCFI->usesTOCBasePtr();
+ const bool NonPCrelGEPRequired = !Subtarget->isUsingPCRelativeCalls() &&
+ Subtarget->isELFv2ABI() && UsesX2OrR2;
+
+ // Only do all that if the function uses R2 as the TOC pointer
+ // in the first place. We don't need the global entry point if the
+ // function uses R2 as an allocatable register.
+ if (NonPCrelGEPRequired || PCrelGEPRequired) {
// Note: The logic here must be synchronized with the code in the
// branch-selection pass which sets the offset of the first block in the
// function. This matters because it affects the alignment.
- const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
-
- MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol();
- OutStreamer->EmitLabel(GlobalEntryLabel);
+ MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol(*MF);
+ OutStreamer->emitLabel(GlobalEntryLabel);
const MCSymbolRefExpr *GlobalEntryLabelExp =
MCSymbolRefExpr::create(GlobalEntryLabel, OutContext);
@@ -1520,21 +1494,19 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
GlobalEntryLabelExp, OutContext);
- const MCExpr *TOCDeltaHi =
- PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
+ const MCExpr *TOCDeltaHi = PPCMCExpr::createHa(TOCDeltaExpr, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
.addReg(PPC::X2)
.addReg(PPC::X12)
.addExpr(TOCDeltaHi));
- const MCExpr *TOCDeltaLo =
- PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
+ const MCExpr *TOCDeltaLo = PPCMCExpr::createLo(TOCDeltaExpr, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
.addReg(PPC::X2)
.addReg(PPC::X2)
.addExpr(TOCDeltaLo));
} else {
- MCSymbol *TOCOffset = PPCFI->getTOCOffsetSymbol();
+ MCSymbol *TOCOffset = PPCFI->getTOCOffsetSymbol(*MF);
const MCExpr *TOCOffsetDeltaExpr =
MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCOffset, OutContext),
GlobalEntryLabelExp, OutContext);
@@ -1549,8 +1521,8 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
.addReg(PPC::X12));
}
- MCSymbol *LocalEntryLabel = PPCFI->getLocalEPSymbol();
- OutStreamer->EmitLabel(LocalEntryLabel);
+ MCSymbol *LocalEntryLabel = PPCFI->getLocalEPSymbol(*MF);
+ OutStreamer->emitLabel(LocalEntryLabel);
const MCSymbolRefExpr *LocalEntryLabelExp =
MCSymbolRefExpr::create(LocalEntryLabel, OutContext);
const MCExpr *LocalOffsetExp =
@@ -1562,13 +1534,43 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
if (TS)
TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), LocalOffsetExp);
+ } else if (Subtarget->isUsingPCRelativeCalls()) {
+ // When generating the entry point for a function we have a few scenarios
+ // based on whether or not that function uses R2 and whether or not that
+ // function makes calls (or is a leaf function).
+ // 1) A leaf function that does not use R2 (or treats it as callee-saved
+ // and preserves it). In this case st_other=0 and both
+ // the local and global entry points for the function are the same.
+ // No special entry point code is required.
+ // 2) A function uses the TOC pointer R2. This function may or may not have
+ // calls. In this case st_other=[2,6] and the global and local entry
+ // points are different. Code to correctly setup the TOC pointer in R2
+ // is put between the global and local entry points. This case is
+ // covered by the if statatement above.
+ // 3) A function does not use the TOC pointer R2 but does have calls.
+ // In this case st_other=1 since we do not know whether or not any
+ // of the callees clobber R2. This case is dealt with in this else if
+ // block. Tail calls are considered calls and the st_other should also
+ // be set to 1 in that case as well.
+ // 4) The function does not use the TOC pointer but R2 is used inside
+ // the function. In this case st_other=1 once again.
+ // 5) This function uses inline asm. We mark R2 as reserved if the function
+ // has inline asm as we have to assume that it may be used.
+ if (MF->getFrameInfo().hasCalls() || MF->getFrameInfo().hasTailCall() ||
+ MF->hasInlineAsm() || (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
+ if (TS)
+ TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym),
+ MCConstantExpr::create(1, OutContext));
+ }
}
}
/// EmitFunctionBodyEnd - Print the traceback table before the .size
/// directive.
///
-void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
+void PPCLinuxAsmPrinter::emitFunctionBodyEnd() {
// Only the 64-bit target requires a traceback table. For now,
// we only emit the word of zeroes that GDB requires to find
// the end of the function, and zeroes for the eight-byte
@@ -1577,19 +1579,73 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
// the PPC64 ELF ABI (this is a low-priority item because GDB does not
// currently make use of these fields).
if (Subtarget->isPPC64()) {
- OutStreamer->EmitIntValue(0, 4/*size*/);
- OutStreamer->EmitIntValue(0, 8/*size*/);
+ OutStreamer->emitIntValue(0, 4/*size*/);
+ OutStreamer->emitIntValue(0, 8/*size*/);
}
}
+void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
+ MCSymbol *GVSym) const {
+
+ assert(MAI->hasVisibilityOnlyWithLinkage() &&
+ "AIX's linkage directives take a visibility setting.");
+
+ MCSymbolAttr LinkageAttr = MCSA_Invalid;
+ switch (GV->getLinkage()) {
+ case GlobalValue::ExternalLinkage:
+ LinkageAttr = GV->isDeclaration() ? MCSA_Extern : MCSA_Global;
+ break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::ExternalWeakLinkage:
+ LinkageAttr = MCSA_Weak;
+ break;
+ case GlobalValue::AvailableExternallyLinkage:
+ LinkageAttr = MCSA_Extern;
+ break;
+ case GlobalValue::PrivateLinkage:
+ return;
+ case GlobalValue::InternalLinkage:
+ assert(GV->getVisibility() == GlobalValue::DefaultVisibility &&
+ "InternalLinkage should not have other visibility setting.");
+ LinkageAttr = MCSA_LGlobal;
+ break;
+ case GlobalValue::AppendingLinkage:
+ llvm_unreachable("Should never emit this");
+ case GlobalValue::CommonLinkage:
+ llvm_unreachable("CommonLinkage of XCOFF should not come to this path");
+ }
+
+ assert(LinkageAttr != MCSA_Invalid && "LinkageAttr should not MCSA_Invalid.");
+
+ MCSymbolAttr VisibilityAttr = MCSA_Invalid;
+ switch (GV->getVisibility()) {
+
+ // TODO: "exported" and "internal" Visibility needs to go here.
+ case GlobalValue::DefaultVisibility:
+ break;
+ case GlobalValue::HiddenVisibility:
+ VisibilityAttr = MAI->getHiddenVisibilityAttr();
+ break;
+ case GlobalValue::ProtectedVisibility:
+ VisibilityAttr = MAI->getProtectedVisibilityAttr();
+ break;
+ }
+
+ OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
+ VisibilityAttr);
+}
+
void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
- // Get the function descriptor symbol.
- CurrentFnDescSym = getSymbol(&MF.getFunction());
- // Set the containing csect.
- MCSectionXCOFF *FnDescSec = OutStreamer->getContext().getXCOFFSection(
- CurrentFnDescSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD,
- XCOFF::C_HIDEXT, SectionKind::getData());
- cast<MCSymbolXCOFF>(CurrentFnDescSym)->setContainingCsect(FnDescSec);
+ // Setup CurrentFnDescSym and its containing csect.
+ MCSectionXCOFF *FnDescSec =
+ cast<MCSectionXCOFF>(getObjFileLowering().getSectionForFunctionDescriptor(
+ &MF.getFunction(), TM));
+ FnDescSec->setAlignment(Align(Subtarget->isPPC64() ? 8 : 4));
+
+ CurrentFnDescSym = FnDescSec->getQualNameSymbol();
return AsmPrinter::SetupMachineFunction(MF);
}
@@ -1606,31 +1662,20 @@ void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) {
report_fatal_error("COMDAT not yet supported by AIX.");
}
-const MCExpr *PPCAIXAsmPrinter::lowerConstant(const Constant *CV) {
- if (const Function *F = dyn_cast<Function>(CV)) {
- MCSymbolXCOFF *FSym = cast<MCSymbolXCOFF>(getSymbol(F));
- if (!FSym->hasContainingCsect()) {
- const XCOFF::StorageClass SC =
- F->isDeclaration()
- ? TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F)
- : XCOFF::C_HIDEXT;
- MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection(
- FSym->getName(), XCOFF::XMC_DS,
- F->isDeclaration() ? XCOFF::XTY_ER : XCOFF::XTY_SD, SC,
- SectionKind::getData());
- FSym->setContainingCsect(Csect);
- }
- return MCSymbolRefExpr::create(
- FSym->getContainingCsect()->getQualNameSymbol(), OutContext);
- }
- return PPCAsmPrinter::lowerConstant(CV);
+static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {
+ return StringSwitch<bool>(GV->getName())
+ .Cases("llvm.global_ctors", "llvm.global_dtors", true)
+ .Default(false);
}
-void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
ValidateGV(GV);
- // External global variables are already handled.
- if (!GV->hasInitializer())
+ // TODO: Update the handling of global arrays for static init when we support
+ // the ".ref" directive.
+ // Otherwise, we can skip these arrays, because the AIX linker collects
+ // static init functions simply based on their name.
+ if (isSpecialLLVMGlobalArrayForStaticInit(GV))
return;
// Create the symbol, set its storage class.
@@ -1638,156 +1683,133 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
GVSym->setStorageClass(
TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
+ if (GV->isDeclarationForLinker()) {
+ emitLinkage(GV, GVSym);
+ return;
+ }
+
SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM);
- if ((!GVKind.isGlobalWriteableData() && !GVKind.isReadOnly()) ||
- GVKind.isMergeable2ByteCString() || GVKind.isMergeable4ByteCString())
+ if (!GVKind.isGlobalWriteableData() && !GVKind.isReadOnly())
report_fatal_error("Encountered a global variable kind that is "
"not supported yet.");
- // Create the containing csect and switch to it.
MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
+
+ // Switch to the containing csect.
OutStreamer->SwitchSection(Csect);
- GVSym->setContainingCsect(Csect);
const DataLayout &DL = GV->getParent()->getDataLayout();
// Handle common symbols.
if (GVKind.isCommon() || GVKind.isBSSLocal()) {
- unsigned Align =
- GV->getAlignment() ? GV->getAlignment() : DL.getPreferredAlignment(GV);
+ Align Alignment = GV->getAlign().getValueOr(DL.getPreferredAlign(GV));
uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
if (GVKind.isBSSLocal())
- OutStreamer->EmitXCOFFLocalCommonSymbol(
- GVSym, Size, Csect->getQualNameSymbol(), Align);
+ OutStreamer->emitXCOFFLocalCommonSymbol(
+ OutContext.getOrCreateSymbol(GVSym->getUnqualifiedName()), Size,
+ GVSym, Alignment.value());
else
- OutStreamer->EmitCommonSymbol(Csect->getQualNameSymbol(), Size, Align);
+ OutStreamer->emitCommonSymbol(GVSym, Size, Alignment.value());
return;
}
MCSymbol *EmittedInitSym = GVSym;
- EmitLinkage(GV, EmittedInitSym);
- EmitAlignment(getGVAlignment(GV, DL), GV);
- OutStreamer->EmitLabel(EmittedInitSym);
- EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
+ emitLinkage(GV, EmittedInitSym);
+ emitAlignment(getGVAlignment(GV, DL), GV);
+ OutStreamer->emitLabel(EmittedInitSym);
+ emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
}
-void PPCAIXAsmPrinter::EmitFunctionDescriptor() {
+void PPCAIXAsmPrinter::emitFunctionDescriptor() {
const DataLayout &DL = getDataLayout();
const unsigned PointerSize = DL.getPointerSizeInBits() == 64 ? 8 : 4;
MCSectionSubPair Current = OutStreamer->getCurrentSection();
// Emit function descriptor.
OutStreamer->SwitchSection(
- cast<MCSymbolXCOFF>(CurrentFnDescSym)->getContainingCsect());
- OutStreamer->EmitLabel(CurrentFnDescSym);
+ cast<MCSymbolXCOFF>(CurrentFnDescSym)->getRepresentedCsect());
// Emit function entry point address.
- OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
+ OutStreamer->emitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
PointerSize);
// Emit TOC base address.
- const MCSectionXCOFF *TOCBaseSec = OutStreamer->getContext().getXCOFFSection(
- StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
- SectionKind::getData());
- const MCSymbol *TOCBaseSym = TOCBaseSec->getQualNameSymbol();
- OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext),
+ const MCSymbol *TOCBaseSym =
+ cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
+ OutStreamer->emitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext),
PointerSize);
// Emit a null environment pointer.
- OutStreamer->EmitIntValue(0, PointerSize);
+ OutStreamer->emitIntValue(0, PointerSize);
OutStreamer->SwitchSection(Current.first, Current.second);
}
-void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) {
+void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
// If there are no functions in this module, we will never need to reference
// the TOC base.
if (M.empty())
return;
- // Emit TOC base.
- MCSectionXCOFF *TOCBaseSection = OutStreamer->getContext().getXCOFFSection(
- StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
- SectionKind::getData());
- // The TOC-base always has 0 size, but 4 byte alignment.
- TOCBaseSection->setAlignment(Align(4));
// Switch to section to emit TOC base.
- OutStreamer->SwitchSection(TOCBaseSection);
+ OutStreamer->SwitchSection(getObjFileLowering().getTOCBaseSection());
- PPCTargetStreamer &TS =
- static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
+
+ const unsigned EntryByteSize = Subtarget->isPPC64() ? 8 : 4;
+ const unsigned TOCEntriesByteSize = TOC.size() * EntryByteSize;
+ // TODO: If TOC entries' size is larger than 32768, then we run out of
+ // positive displacement to reach the TOC entry. We need to decide how to
+ // handle entries' size larger than that later.
+ if (TOCEntriesByteSize > 32767) {
+ report_fatal_error("Handling of TOC entry displacement larger than 32767 "
+ "is not yet implemented.");
+ }
for (auto &I : TOC) {
// Setup the csect for the current TC entry.
- MCSectionXCOFF *TCEntry = OutStreamer->getContext().getXCOFFSection(
- cast<MCSymbolXCOFF>(I.first)->getUnqualifiedName(), XCOFF::XMC_TC,
- XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData());
- cast<MCSymbolXCOFF>(I.second)->setContainingCsect(TCEntry);
+ MCSectionXCOFF *TCEntry = cast<MCSectionXCOFF>(
+ getObjFileLowering().getSectionForTOCEntry(I.first));
OutStreamer->SwitchSection(TCEntry);
- OutStreamer->EmitLabel(I.second);
- TS.emitTCEntry(*I.first);
+ OutStreamer->emitLabel(I.second);
+ if (TS != nullptr)
+ TS->emitTCEntry(*I.first);
}
}
-MCSymbol *
-PPCAIXAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
- const GlobalObject *GO = nullptr;
-
- // If the MO is a function or certain kind of globals, we want to make sure to
- // refer to the csect symbol, otherwise we can just do the default handling.
- if (MO.getType() != MachineOperand::MO_GlobalAddress ||
- !(GO = dyn_cast<const GlobalObject>(MO.getGlobal())))
- return PPCAsmPrinter::getMCSymbolForTOCPseudoMO(MO);
-
- // Do an early error check for globals we don't support. This will go away
- // eventually.
- const auto *GV = dyn_cast<const GlobalVariable>(GO);
- if (GV) {
- ValidateGV(GV);
- }
+bool PPCAIXAsmPrinter::doInitialization(Module &M) {
+ if (M.alias_size() > 0u)
+ report_fatal_error(
+ "module has aliases, which LLVM does not yet support for AIX");
- MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(getSymbol(GO));
-
- // If the global object is a global variable without initializer or is a
- // declaration of a function, then XSym is an external referenced symbol.
- // Hence we may need to explictly create a MCSectionXCOFF for it so that we
- // can return its symbol later.
- if (GO->isDeclaration()) {
- if (!XSym->hasContainingCsect()) {
- // Make sure the storage class is set.
- const XCOFF::StorageClass SC =
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
- XSym->setStorageClass(SC);
-
- MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection(
- XSym->getName(), isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA,
- XCOFF::XTY_ER, SC, SectionKind::getMetadata());
- XSym->setContainingCsect(Csect);
- }
+ const bool Result = PPCAsmPrinter::doInitialization(M);
- return XSym->getContainingCsect()->getQualNameSymbol();
- }
+ auto setCsectAlignment = [this](const GlobalObject *GO) {
+ // Declarations have 0 alignment which is set by default.
+ if (GO->isDeclarationForLinker())
+ return;
- // Handle initialized global variables and defined functions.
- SectionKind GOKind = getObjFileLowering().getKindForGlobal(GO, TM);
-
- if (GOKind.isText()) {
- // If the MO is a function, we want to make sure to refer to the function
- // descriptor csect.
- return OutStreamer->getContext()
- .getXCOFFSection(XSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD,
- XCOFF::C_HIDEXT, SectionKind::getData())
- ->getQualNameSymbol();
- } else if (GOKind.isCommon() || GOKind.isBSSLocal()) {
- // If the operand is a common then we should refer to the csect symbol.
- return cast<MCSectionXCOFF>(
- getObjFileLowering().SectionForGlobal(GO, GOKind, TM))
- ->getQualNameSymbol();
- }
+ SectionKind GOKind = getObjFileLowering().getKindForGlobal(GO, TM);
+ MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
+ getObjFileLowering().SectionForGlobal(GO, GOKind, TM));
+
+ Align GOAlign = getGVAlignment(GO, GO->getParent()->getDataLayout());
+ if (GOAlign > Csect->getAlignment())
+ Csect->setAlignment(GOAlign);
+ };
+
+ // We need to know, up front, the alignment of csects for the assembly path,
+ // because once a .csect directive gets emitted, we could not change the
+ // alignment value on it.
+ for (const auto &G : M.globals())
+ setCsectAlignment(&G);
+
+ for (const auto &F : M)
+ setCsectAlignment(&F);
- // Other global variables are refered to by labels inside of a single csect,
- // so refer to the label directly.
- return getSymbol(GV);
+ return Result;
}
/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 104cf2ba3c00..2259a29f838a 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -220,7 +220,7 @@ class PPCBoolRetToInt : public FunctionPass {
auto Defs = findAllDefs(U);
// If the values are all Constants or Arguments, don't bother
- if (llvm::none_of(Defs, isa<Instruction, Value *>))
+ if (llvm::none_of(Defs, [](Value *V) { return isa<Instruction>(V); }))
return false;
// Presently, we only know how to handle PHINode, Constant, Arguments and
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index 109b665e0d57..50ae4450a837 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -272,6 +272,11 @@ bool PPCBranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
return false;
}
+ if (Cand.BranchBlock->mayHaveInlineAsmBr()) {
+ LLVM_DEBUG(dbgs() << "Inline Asm Br - skip\n");
+ return false;
+ }
+
// For now only consider triangles (i.e, BranchTargetBlock is set,
// FalseMBB is null, and BranchTargetBlock is a successor to BranchBlock)
if (!Cand.BranchTargetBlock || FalseMBB ||
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index cdff4d383d23..47b9e97f0d67 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -31,6 +31,9 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-branch-select"
STATISTIC(NumExpanded, "Number of branches expanded to long format");
+STATISTIC(NumPrefixed, "Number of prefixed instructions");
+STATISTIC(NumPrefixedAligned,
+ "Number of prefixed instructions that have been aligned");
namespace {
struct PPCBSel : public MachineFunctionPass {
@@ -82,7 +85,7 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB,
unsigned Offset) {
const Align Alignment = MBB.getAlignment();
- if (Alignment == Align::None())
+ if (Alignment == Align(1))
return 0;
const Align ParentAlign = MBB.getParent()->getAlignment();
@@ -134,10 +137,38 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
}
unsigned BlockSize = 0;
+ unsigned UnalignedBytesRemaining = 0;
for (MachineInstr &MI : *MBB) {
- BlockSize += TII->getInstSizeInBytes(MI);
+ unsigned MINumBytes = TII->getInstSizeInBytes(MI);
if (MI.isInlineAsm() && (FirstImpreciseBlock < 0))
FirstImpreciseBlock = MBB->getNumber();
+ if (TII->isPrefixed(MI.getOpcode())) {
+ NumPrefixed++;
+
+ // All 8 byte instructions may require alignment. Each 8 byte
+ // instruction may be aligned by another 4 bytes.
+ // This means that an 8 byte instruction may require 12 bytes
+ // (8 for the instruction itself and 4 for the alignment nop).
+ // This will happen if an 8 byte instruction can be aligned to 64 bytes
+ // by only adding a 4 byte nop.
+ // We don't know the alignment at this point in the code so we have to
+ // adopt a more pessimistic approach. If an instruction may need
+ // alignment we assume that it does need alignment and add 4 bytes to
+ // it. As a result we may end up with more long branches than before
+ // but we are in the safe position where if we need a long branch we
+ // have one.
+ // The if statement checks to make sure that two 8 byte instructions
+ // are at least 64 bytes away from each other. It is not possible for
+ // two instructions that both need alignment to be within 64 bytes of
+ // each other.
+ if (!UnalignedBytesRemaining) {
+ BlockSize += 4;
+ UnalignedBytesRemaining = 60;
+ NumPrefixedAligned++;
+ }
+ }
+ UnalignedBytesRemaining -= std::min(UnalignedBytesRemaining, MINumBytes);
+ BlockSize += MINumBytes;
}
BlockSizes[MBB->getNumber()].first = BlockSize;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 4ce705300e1b..bb12e05173a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -46,7 +46,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
-#include "llvm/PassSupport.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 369b9ce1a711..1eaa7f7a44b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -283,15 +283,6 @@ def CC_PPC32_SVR4_ByVal : CallingConv<[
def CSR_Altivec : CalleeSavedRegs<(add V20, V21, V22, V23, V24, V25, V26, V27,
V28, V29, V30, V31)>;
-def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28,
- R29, R30, R31, F14, F15, F16, F17, F18,
- F19, F20, F21, F22, F23, F24, F25, F26,
- F27, F28, F29, F30, F31, CR2, CR3, CR4
- )>;
-
-def CSR_Darwin32_Altivec : CalleeSavedRegs<(add CSR_Darwin32, CSR_Altivec)>;
-
// SPE does not use FPRs, so break out the common register set as base.
def CSR_SVR432_COMM : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20,
R21, R22, R23, R24, R25, R26, R27,
@@ -316,45 +307,20 @@ def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
-def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
- X21, X22, X23, X24, X25, X26, X27, X28,
- X29, X30, X31, F14, F15, F16, F17, F18,
- F19, F20, F21, F22, F23, F24, F25, F26,
- F27, F28, F29, F30, F31, CR2, CR3, CR4
- )>;
-
-def CSR_Darwin64_Altivec : CalleeSavedRegs<(add CSR_Darwin64, CSR_Altivec)>;
-
-def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
+// Common CalleeSavedRegs for SVR4 and AIX.
+def CSR_PPC64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
X21, X22, X23, X24, X25, X26, X27, X28,
X29, X30, X31, F14, F15, F16, F17, F18,
F19, F20, F21, F22, F23, F24, F25, F26,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
-def CSR_AIX64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
- X21, X22, X23, X24, X25, X26, X27, X28,
- X29, X30, X31, F14, F15, F16, F17, F18,
- F19, F20, F21, F22, F23, F24, F25, F26,
- F27, F28, F29, F30, F31, CR2, CR3, CR4
- )>;
-
-// CSRs that are handled by prologue, epilogue.
-def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;
-
-def CSR_SVR464_ViaCopy : CalleeSavedRegs<(add CSR_SVR464)>;
-
-def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
-
-def CSR_SVR464_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_Altivec)>;
-
-def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>;
-def CSR_SVR464_R2_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2)>;
+def CSR_PPC64_Altivec : CalleeSavedRegs<(add CSR_PPC64, CSR_Altivec)>;
-def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>;
+def CSR_PPC64_R2 : CalleeSavedRegs<(add CSR_PPC64, X2)>;
-def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2_Altivec)>;
+def CSR_PPC64_R2_Altivec : CalleeSavedRegs<(add CSR_PPC64_Altivec, X2)>;
def CSR_NoRegs : CalleeSavedRegs<(add)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index aa5d830b549e..c9f74bbf861c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -90,8 +90,8 @@ protected:
// This is a conditional branch to the return. Replace the branch
// with a bclr.
BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
- .addImm(J->getOperand(0).getImm())
- .addReg(J->getOperand(1).getReg())
+ .add(J->getOperand(0))
+ .add(J->getOperand(1))
.copyImplicitOps(*I);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
@@ -106,7 +106,7 @@ protected:
BuildMI(
**PI, J, J->getDebugLoc(),
TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn))
- .addReg(J->getOperand(0).getReg())
+ .add(J->getOperand(0))
.copyImplicitOps(*I);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
index e8ef451c7ec9..4c74e82cf041 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -381,21 +381,10 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL,
MBB->end());
NewSuccessor->transferSuccessorsAndUpdatePHIs(MBB);
- // Copy the original liveIns of MBB to NewSuccessor.
- for (auto &LI : MBB->liveins())
- NewSuccessor->addLiveIn(LI);
-
- // After splitting the NewSuccessor block, Regs defined but not killed
- // in MBB should be treated as liveins of NewSuccessor.
- // Note: Cannot use stepBackward instead since we are using the Reg
- // liveness state at the end of MBB (liveOut of MBB) as the liveIn for
- // NewSuccessor. Otherwise, will cause cyclic dependence.
- LivePhysRegs LPR(*MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
- SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 2> Clobbers;
- for (MachineInstr &MI : *MBB)
- LPR.stepForward(MI, Clobbers);
- for (auto &LI : LPR)
- NewSuccessor->addLiveIn(LI);
+ // Update the liveins for NewSuccessor.
+ LivePhysRegs LPR;
+ computeAndAddLiveIns(LPR, *NewSuccessor);
+
} else {
// Remove successor from MBB.
MBB->removeSuccessor(Successor);
@@ -441,44 +430,26 @@ void PPCExpandISEL::populateBlocks(BlockISELList &BIL) {
// condition is true
MachineOperand &FalseValue = MI->getOperand(2); // Value to store if
// condition is false
- MachineOperand &ConditionRegister = MI->getOperand(3); // Condition
LLVM_DEBUG(dbgs() << "Dest: " << Dest << "\n");
LLVM_DEBUG(dbgs() << "TrueValue: " << TrueValue << "\n");
LLVM_DEBUG(dbgs() << "FalseValue: " << FalseValue << "\n");
- LLVM_DEBUG(dbgs() << "ConditionRegister: " << ConditionRegister << "\n");
+ LLVM_DEBUG(dbgs() << "ConditionRegister: " << MI->getOperand(3) << "\n");
// If the Dest Register and True Value Register are not the same one, we
// need the True Block.
bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue);
bool IsORIInstRequired = !useSameRegister(Dest, FalseValue);
- if (IsADDIInstRequired) {
- // Copy the result into the destination if the condition is true.
+ // Copy the result into the destination if the condition is true.
+ if (IsADDIInstRequired)
BuildMI(*TrueBlock, TrueBlockI, dl,
TII->get(isISEL8(*MI) ? PPC::ADDI8 : PPC::ADDI))
.add(Dest)
.add(TrueValue)
.add(MachineOperand::CreateImm(0));
- // Add the LiveIn registers required by true block.
- TrueBlock->addLiveIn(TrueValue.getReg());
- }
-
- if (IsORIInstRequired) {
- // Add the LiveIn registers required by false block.
- FalseBlock->addLiveIn(FalseValue.getReg());
- }
-
- if (NewSuccessor) {
- // Add the LiveIn registers required by NewSuccessor block.
- NewSuccessor->addLiveIn(Dest.getReg());
- NewSuccessor->addLiveIn(TrueValue.getReg());
- NewSuccessor->addLiveIn(FalseValue.getReg());
- NewSuccessor->addLiveIn(ConditionRegister.getReg());
- }
-
- // Copy the value into the destination if the condition is false.
+ // Copy the result into the destination if the condition is false.
if (IsORIInstRequired)
BuildMI(*FalseBlock, FalseBlockI, dl,
TII->get(isISEL8(*MI) ? PPC::ORI8 : PPC::ORI))
@@ -490,6 +461,18 @@ void PPCExpandISEL::populateBlocks(BlockISELList &BIL) {
NumExpanded++;
}
+
+ if (IsTrueBlockRequired) {
+ // Update the liveins for TrueBlock.
+ LivePhysRegs LPR;
+ computeAndAddLiveIns(LPR, *TrueBlock);
+ }
+
+ if (IsFalseBlockRequired) {
+ // Update the liveins for FalseBlock.
+ LivePhysRegs LPR;
+ computeAndAddLiveIns(LPR, *FalseBlock);
+ }
}
void PPCExpandISEL::expandMergeableISELs(BlockISELList &BIL) {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index d8425d89da92..39790ac9a8aa 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -87,6 +87,7 @@ class PPCFastISel final : public FastISel {
const TargetMachine &TM;
const PPCSubtarget *PPCSubTarget;
+ const PPCSubtarget *Subtarget;
PPCFunctionInfo *PPCFuncInfo;
const TargetInstrInfo &TII;
const TargetLowering &TLI;
@@ -97,12 +98,12 @@ class PPCFastISel final : public FastISel {
const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
+ Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
- TII(*PPCSubTarget->getInstrInfo()),
- TLI(*PPCSubTarget->getTargetLowering()),
+ TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
Context(&FuncInfo.Fn->getContext()) {}
- // Backend specific FastISel code.
+ // Backend specific FastISel code.
private:
bool fastSelectInstruction(const Instruction *I) override;
unsigned fastMaterializeConstant(const Constant *C) override;
@@ -456,7 +457,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
bool IsZExt, unsigned FP64LoadOpc) {
unsigned Opc;
bool UseOffset = true;
- bool HasSPE = PPCSubTarget->hasSPE();
+ bool HasSPE = Subtarget->hasSPE();
// If ResultReg is given, it determines the register class of the load.
// Otherwise, RC is the register class to use. If the result of the
@@ -498,7 +499,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
UseOffset = ((Addr.Offset & 3) == 0);
break;
case MVT::f32:
- Opc = PPCSubTarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
+ Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
break;
case MVT::f64:
Opc = FP64LoadOpc;
@@ -536,7 +537,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
Addr.Offset),
MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
- MFI.getObjectAlignment(Addr.Base.FI));
+ MFI.getObjectAlign(Addr.Base.FI));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
@@ -614,7 +615,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
Register ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
- PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
+ Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
return false;
updateValueMap(I, ResultReg);
return true;
@@ -647,10 +648,10 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
UseOffset = ((Addr.Offset & 3) == 0);
break;
case MVT::f32:
- Opc = PPCSubTarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
+ Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
break;
case MVT::f64:
- Opc = PPCSubTarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
+ Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
break;
}
@@ -682,7 +683,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
Addr.Offset),
MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
- MFI.getObjectAlignment(Addr.Base.FI));
+ MFI.getObjectAlign(Addr.Base.FI));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg)
@@ -794,8 +795,9 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
return false;
BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
- .addImm(PPCSubTarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
- .addReg(CondReg).addMBB(TBB);
+ .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
+ .addReg(CondReg)
+ .addMBB(TBB);
finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
@@ -827,7 +829,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
- if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
+ if (SrcVT == MVT::i1 && Subtarget->useCRBits())
return false;
// See if operand 2 is an immediate encodeable in the compare.
@@ -836,7 +838,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
// similar to ARM in this regard.
long Imm = 0;
bool UseImm = false;
- const bool HasSPE = PPCSubTarget->hasSPE();
+ const bool HasSPE = Subtarget->hasSPE();
// Only 16-bit integer constants can be represented in compares for
// PowerPC. Others will be materialized into a register.
@@ -988,7 +990,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
// Round the result to single precision.
unsigned DestReg;
auto RC = MRI.getRegClass(SrcReg);
- if (PPCSubTarget->hasSPE()) {
+ if (Subtarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::EFSCFD), DestReg)
@@ -1030,7 +1032,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
Address Addr;
Addr.BaseType = Address::FrameIndexBase;
- Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+ Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
// Store the value from the GPR.
if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
@@ -1043,10 +1045,10 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
if (SrcVT == MVT::i32) {
if (!IsSigned) {
LoadOpc = PPC::LFIWZX;
- Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
- } else if (PPCSubTarget->hasLFIWAX()) {
+ Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
+ } else if (Subtarget->hasLFIWAX()) {
LoadOpc = PPC::LFIWAX;
- Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
+ Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
}
}
@@ -1086,7 +1088,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
return false;
// Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
- if (PPCSubTarget->hasSPE()) {
+ if (Subtarget->hasSPE()) {
unsigned Opc;
if (DstVT == MVT::f32)
Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
@@ -1103,7 +1105,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
// We can only lower an unsigned convert if we have the newer
// floating-point conversion operations.
- if (!IsSigned && !PPCSubTarget->hasFPCVT())
+ if (!IsSigned && !Subtarget->hasFPCVT())
return false;
// FIXME: For now we require the newer floating-point conversion operations
@@ -1111,7 +1113,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
// to single-precision float. Otherwise we have to generate a lot of
// fiddly code to avoid double rounding. If necessary, the fiddly code
// can be found in PPCTargetLowering::LowerINT_TO_FP().
- if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
+ if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
return false;
// Extend the input if necessary.
@@ -1159,7 +1161,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
// easiest code gen possible.
Address Addr;
Addr.BaseType = Address::FrameIndexBase;
- Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+ Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
// Store the value from the FPR.
if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
@@ -1168,7 +1170,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
// Reload it into a GPR. If we want an i32 on big endian, modify the
// address to have a 4-byte offset so we load from the right place.
if (VT == MVT::i32)
- Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
+ Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
// Look at the currently assigned register for this instruction
// to determine the required register class.
@@ -1196,8 +1198,8 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
return false;
// If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
- if (DstVT == MVT::i64 && !IsSigned &&
- !PPCSubTarget->hasFPCVT() && !PPCSubTarget->hasSPE())
+ if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
+ !Subtarget->hasSPE())
return false;
Value *Src = I->getOperand(0);
@@ -1226,7 +1228,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
unsigned Opc;
auto RC = MRI.getRegClass(SrcReg);
- if (PPCSubTarget->hasSPE()) {
+ if (Subtarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
if (IsSigned)
Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
@@ -1234,7 +1236,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
} else if (isVSFRCRegClass(RC)) {
DestReg = createResultReg(&PPC::VSFRCRegClass);
- if (DstVT == MVT::i32)
+ if (DstVT == MVT::i32)
Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
else
Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
@@ -1244,7 +1246,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (IsSigned)
Opc = PPC::FCTIWZ;
else
- Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
+ Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
else
Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
}
@@ -1254,8 +1256,9 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
.addReg(SrcReg);
// Now move the integer value from a float register to an integer register.
- unsigned IntReg = PPCSubTarget->hasSPE() ? DestReg :
- PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
+ unsigned IntReg = Subtarget->hasSPE()
+ ? DestReg
+ : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
if (IntReg == 0)
return false;
@@ -1383,8 +1386,8 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
// Reserve space for the linkage area on the stack.
- unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
- CCInfo.AllocateStack(LinkageSize, 8);
+ unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
+ CCInfo.AllocateStack(LinkageSize, Align(8));
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
@@ -1573,7 +1576,7 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
RetVT != MVT::i8)
return false;
- else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
+ else if (RetVT == MVT::i1 && Subtarget->useCRBits())
// We can't handle boolean returns when CR bits are in use.
return false;
@@ -1688,9 +1691,6 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
- if (TLI.supportSplitCSR(FuncInfo.MF))
- return false;
-
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
@@ -1996,10 +1996,9 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
return 0;
// All FP constants are loaded from the constant pool.
- unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
- assert(Align > 0 && "Unexpectedly missing alignment information!");
- unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
- const bool HasSPE = PPCSubTarget->hasSPE();
+ Align Alignment = DL.getPrefTypeAlign(CFP->getType());
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
+ const bool HasSPE = Subtarget->hasSPE();
const TargetRegisterClass *RC;
if (HasSPE)
RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
@@ -2011,7 +2010,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getConstantPool(*FuncInfo.MF),
- MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
+ MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment);
unsigned Opc;
@@ -2093,7 +2092,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
- if (PPCSubTarget->isGVIndirectSymbol(GV)) {
+ if (Subtarget->isGVIndirectSymbol(GV)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
} else {
@@ -2200,7 +2199,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
bool UseSExt) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
- if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
+ if (VT == MVT::i1 && Subtarget->useCRBits()) {
unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
@@ -2355,7 +2354,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
Register ResultReg = MI->getOperand(0).getReg();
if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
- PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
+ Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
return false;
MachineBasicBlock::iterator I(MI);
@@ -2382,7 +2381,7 @@ unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
- if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
+ if (VT == MVT::i1 && Subtarget->useCRBits()) {
unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 4c608520e265..bd9174c1973d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCFrameLowering.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
@@ -31,6 +32,7 @@ using namespace llvm;
#define DEBUG_TYPE "framelowering"
STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
+STATISTIC(NumPrologProbed, "Number of prologues probed");
static cl::opt<bool>
EnablePEVectorSpills("ppc-enable-pe-vector-spills",
@@ -47,7 +49,7 @@ static const MCPhysReg VRRegNo[] = {
};
static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
- if (STI.isDarwinABI() || STI.isAIXABI())
+ if (STI.isAIXABI())
return STI.isPPC64() ? 16 : 8;
// SVR4 ABI:
return STI.isPPC64() ? 16 : 4;
@@ -60,20 +62,12 @@ static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
}
static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
- // For the Darwin ABI:
- // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
- // for saving the frame pointer (if needed.) While the published ABI has
- // not used this slot since at least MacOSX 10.2, there is older code
- // around that does use it, and that needs to continue to work.
- if (STI.isDarwinABI())
- return STI.isPPC64() ? -8U : -4U;
-
- // SVR4 ABI: First slot in the general register save area.
+ // First slot in the general register save area.
return STI.isPPC64() ? -8U : -4U;
}
static unsigned computeLinkageSize(const PPCSubtarget &STI) {
- if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
+ if (STI.isAIXABI() || STI.isPPC64())
return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
// 32-bit SVR4 ABI:
@@ -81,18 +75,16 @@ static unsigned computeLinkageSize(const PPCSubtarget &STI) {
}
static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
- if (STI.isDarwinABI())
- return STI.isPPC64() ? -16U : -8U;
+ // Third slot in the general purpose register save area.
+ if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
+ return -12U;
- // SVR4 ABI: First slot in the general register save area.
- return STI.isPPC64()
- ? -16U
- : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
+ // Second slot in the general purpose register save area.
+ return STI.isPPC64() ? -16U : -8U;
}
-static unsigned computeCRSaveOffset() {
- // The condition register save offset needs to be updated for AIX PPC32.
- return 8;
+static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
+ return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
}
PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
@@ -103,71 +95,97 @@ PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
LinkageSize(computeLinkageSize(Subtarget)),
BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
- CRSaveOffset(computeCRSaveOffset()) {}
+ CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
unsigned &NumEntries) const {
- if (Subtarget.isDarwinABI()) {
- NumEntries = 1;
- if (Subtarget.isPPC64()) {
- static const SpillSlot darwin64Offsets = {PPC::X31, -8};
- return &darwin64Offsets;
- } else {
- static const SpillSlot darwinOffsets = {PPC::R31, -4};
- return &darwinOffsets;
- }
- }
- // Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI()) {
- NumEntries = 0;
- return nullptr;
- }
+// Floating-point register save area offsets.
+#define CALLEE_SAVED_FPRS \
+ {PPC::F31, -8}, \
+ {PPC::F30, -16}, \
+ {PPC::F29, -24}, \
+ {PPC::F28, -32}, \
+ {PPC::F27, -40}, \
+ {PPC::F26, -48}, \
+ {PPC::F25, -56}, \
+ {PPC::F24, -64}, \
+ {PPC::F23, -72}, \
+ {PPC::F22, -80}, \
+ {PPC::F21, -88}, \
+ {PPC::F20, -96}, \
+ {PPC::F19, -104}, \
+ {PPC::F18, -112}, \
+ {PPC::F17, -120}, \
+ {PPC::F16, -128}, \
+ {PPC::F15, -136}, \
+ {PPC::F14, -144}
+
+// 32-bit general purpose register save area offsets shared by ELF and
+// AIX. AIX has an extra CSR with r13.
+#define CALLEE_SAVED_GPRS32 \
+ {PPC::R31, -4}, \
+ {PPC::R30, -8}, \
+ {PPC::R29, -12}, \
+ {PPC::R28, -16}, \
+ {PPC::R27, -20}, \
+ {PPC::R26, -24}, \
+ {PPC::R25, -28}, \
+ {PPC::R24, -32}, \
+ {PPC::R23, -36}, \
+ {PPC::R22, -40}, \
+ {PPC::R21, -44}, \
+ {PPC::R20, -48}, \
+ {PPC::R19, -52}, \
+ {PPC::R18, -56}, \
+ {PPC::R17, -60}, \
+ {PPC::R16, -64}, \
+ {PPC::R15, -68}, \
+ {PPC::R14, -72}
+
+// 64-bit general purpose register save area offsets.
+#define CALLEE_SAVED_GPRS64 \
+ {PPC::X31, -8}, \
+ {PPC::X30, -16}, \
+ {PPC::X29, -24}, \
+ {PPC::X28, -32}, \
+ {PPC::X27, -40}, \
+ {PPC::X26, -48}, \
+ {PPC::X25, -56}, \
+ {PPC::X24, -64}, \
+ {PPC::X23, -72}, \
+ {PPC::X22, -80}, \
+ {PPC::X21, -88}, \
+ {PPC::X20, -96}, \
+ {PPC::X19, -104}, \
+ {PPC::X18, -112}, \
+ {PPC::X17, -120}, \
+ {PPC::X16, -128}, \
+ {PPC::X15, -136}, \
+ {PPC::X14, -144}
+
+// Vector register save area offsets.
+#define CALLEE_SAVED_VRS \
+ {PPC::V31, -16}, \
+ {PPC::V30, -32}, \
+ {PPC::V29, -48}, \
+ {PPC::V28, -64}, \
+ {PPC::V27, -80}, \
+ {PPC::V26, -96}, \
+ {PPC::V25, -112}, \
+ {PPC::V24, -128}, \
+ {PPC::V23, -144}, \
+ {PPC::V22, -160}, \
+ {PPC::V21, -176}, \
+ {PPC::V20, -192}
// Note that the offsets here overlap, but this is fixed up in
// processFunctionBeforeFrameFinalized.
- static const SpillSlot Offsets[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::R31, -4},
- {PPC::R30, -8},
- {PPC::R29, -12},
- {PPC::R28, -16},
- {PPC::R27, -20},
- {PPC::R26, -24},
- {PPC::R25, -28},
- {PPC::R24, -32},
- {PPC::R23, -36},
- {PPC::R22, -40},
- {PPC::R21, -44},
- {PPC::R20, -48},
- {PPC::R19, -52},
- {PPC::R18, -56},
- {PPC::R17, -60},
- {PPC::R16, -64},
- {PPC::R15, -68},
- {PPC::R14, -72},
+ static const SpillSlot ELFOffsets32[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS32,
// CR save area offset. We map each of the nonvolatile CR fields
// to the slot for CR2, which is the first of the nonvolatile CR
@@ -178,19 +196,7 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
// VRSAVE save area offset.
{PPC::VRSAVE, -4},
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192},
+ CALLEE_SAVED_VRS,
// SPE register save area (overlaps Vector save area).
{PPC::S31, -8},
@@ -212,73 +218,48 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
{PPC::S15, -136},
{PPC::S14, -144}};
- static const SpillSlot Offsets64[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::X31, -8},
- {PPC::X30, -16},
- {PPC::X29, -24},
- {PPC::X28, -32},
- {PPC::X27, -40},
- {PPC::X26, -48},
- {PPC::X25, -56},
- {PPC::X24, -64},
- {PPC::X23, -72},
- {PPC::X22, -80},
- {PPC::X21, -88},
- {PPC::X20, -96},
- {PPC::X19, -104},
- {PPC::X18, -112},
- {PPC::X17, -120},
- {PPC::X16, -128},
- {PPC::X15, -136},
- {PPC::X14, -144},
+ static const SpillSlot ELFOffsets64[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS64,
// VRSAVE save area offset.
{PPC::VRSAVE, -4},
+ CALLEE_SAVED_VRS
+ };
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192}};
+ static const SpillSlot AIXOffsets32[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS32,
+ // Add AIX's extra CSR.
+ {PPC::R13, -76},
+ // TODO: Update when we add vector support for AIX.
+ };
- if (Subtarget.isPPC64()) {
- NumEntries = array_lengthof(Offsets64);
+ static const SpillSlot AIXOffsets64[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS64,
+ // TODO: Update when we add vector support for AIX.
+ };
- return Offsets64;
- } else {
- NumEntries = array_lengthof(Offsets);
+ if (Subtarget.is64BitELFABI()) {
+ NumEntries = array_lengthof(ELFOffsets64);
+ return ELFOffsets64;
+ }
- return Offsets;
+ if (Subtarget.is32BitELFABI()) {
+ NumEntries = array_lengthof(ELFOffsets32);
+ return ELFOffsets32;
}
+
+ assert(Subtarget.isAIXABI() && "Unexpected ABI.");
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(AIXOffsets64);
+ return AIXOffsets64;
+ }
+
+ NumEntries = array_lengthof(AIXOffsets32);
+ return AIXOffsets32;
}
/// RemoveVRSaveCode - We have found that this function does not need any code
@@ -479,9 +460,9 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
// Get stack alignments. The frame must be aligned to the greatest of these:
- unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
- unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
- unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
+ Align TargetAlign = getStackAlign(); // alignment required per the ABI
+ Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame
+ Align Alignment = std::max(TargetAlign, MaxAlign);
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -513,7 +494,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
// that allocations will be aligned.
if (MFI.hasVarSizedObjects())
- maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+ maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
// Update the new max call frame size if the caller passes in a valid pointer.
if (NewMaxCallFrameSize)
@@ -523,7 +504,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
FrameSize += maxCallFrameSize;
// Make sure the frame is aligned.
- FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+ FrameSize = alignTo(FrameSize, Alignment);
return FrameSize;
}
@@ -613,11 +594,11 @@ bool
PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
bool UseAtEnd,
bool TwoUniqueRegsRequired,
- unsigned *SR1,
- unsigned *SR2) const {
+ Register *SR1,
+ Register *SR2) const {
RegScavenger RS;
- unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
- unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
+ Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
+ Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
// Set the defaults for the two scratch registers.
if (SR1)
@@ -684,7 +665,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
if (SecondScratchReg != -1)
*SR2 = SecondScratchReg;
else
- *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
+ *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
}
// Now that we've done our best to provide both registers, double check
@@ -709,7 +690,7 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
int NegFrameSize = -FrameSize;
bool IsLargeFrame = !isInt<16>(NegFrameSize);
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
@@ -778,11 +759,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
DebugLoc dl;
- bool needsCFI = MF.needsFrameMoves();
+ // AIX assembler does not support cfi directives.
+ const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
@@ -790,8 +773,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isAIXABI = Subtarget.isAIXABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
- assert((Subtarget.isDarwinABI() || isSVR4ABI || isAIXABI) &&
- "Unsupported PPC ABI.");
+ assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
// process it.
@@ -821,20 +803,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
bool MustSaveTOC = FI->mustSaveTOC();
- const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
bool MustSaveCR = !MustSaveCRs.empty();
// Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
bool HasRedZone = isPPC64 || !isSVR4ABI;
- unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
Register BPReg = RegInfo->getBaseRegister(MF);
- unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
- unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
- unsigned ScratchReg = 0;
- unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
+ Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
+ Register ScratchReg;
+ Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
// ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
: PPC::MFLR );
@@ -854,6 +836,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
: PPC::SUBFC);
const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
: PPC::SUBFIC);
+ const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
+ : PPC::MFCR);
+ const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
// Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
// LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
@@ -863,9 +848,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
// Using the same bool variable as below to suppress compiler warnings.
- bool SingleScratchReg =
- findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
- &ScratchReg, &TempReg);
+ // Stack probe requires two scratch registers, one for old sp, one for large
+ // frame and large probe size.
+ bool SingleScratchReg = findScratchRegister(
+ &MBB, false,
+ twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
+ &ScratchReg, &TempReg);
assert(SingleScratchReg &&
"Required number of registers not available in this block");
@@ -906,21 +894,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
// Get stack alignments.
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
if (HasBP && MaxAlign > 1)
- assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
- "Invalid alignment!");
+ assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
// Frames of 32KB & larger require special handling because they cannot be
// indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
bool isLargeFrame = !isInt<16>(NegFrameSize);
- assert((isPPC64 || !MustSaveCR) &&
- "Prologue CR saving supported only in 64-bit mode");
-
- if (MustSaveCR && isAIXABI)
- report_fatal_error("Prologue CR saving is unimplemented on AIX.");
-
// Check if we can move the stack update instruction (stdu) down the prologue
// past the callee saves. Hopefully this will avoid the situation where the
// saves are waiting for the update on the store with update to complete.
@@ -960,49 +941,42 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- // If we need to spill the CR and the LR but we don't have two separate
- // registers available, we must spill them one at a time
- if (MustSaveCR && SingleScratchReg && MustSaveLR) {
+ // Where in the prologue we move the CR fields depends on how many scratch
+ // registers we have, and if we need to save the link register or not. This
+ // lambda is to avoid duplicating the logic in 2 places.
+ auto BuildMoveFromCR = [&]() {
+ if (isELFv2ABI && MustSaveCRs.size() == 1) {
// In the ELFv2 ABI, we are not required to save all CR fields.
- // If only one or two CR fields are clobbered, it is more efficient to use
- // mfocrf to selectively save just those fields, because mfocrf has short
+ // If only one CR field is clobbered, it is more efficient to use
+ // mfocrf to selectively save just that field, because mfocrf has short
// latency compares to mfcr.
- unsigned MfcrOpcode = PPC::MFCR8;
- unsigned CrState = RegState::ImplicitKill;
- if (isELFv2ABI && MustSaveCRs.size() == 1) {
- MfcrOpcode = PPC::MFOCRF8;
- CrState = RegState::Kill;
+ assert(isPPC64 && "V2 ABI is 64-bit only.");
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
+ MIB.addReg(MustSaveCRs[0], RegState::Kill);
+ } else {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
+ for (unsigned CRfield : MustSaveCRs)
+ MIB.addReg(CRfield, RegState::ImplicitKill);
}
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- MIB.addReg(MustSaveCRs[i], CrState);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
- .addReg(TempReg, getKillRegState(true))
- .addImm(getCRSaveOffset())
- .addReg(SPReg);
+ };
+
+ // If we need to spill the CR and the LR but we don't have two separate
+ // registers available, we must spill them one at a time
+ if (MustSaveCR && SingleScratchReg && MustSaveLR) {
+ BuildMoveFromCR();
+ BuildMI(MBB, MBBI, dl, StoreWordInst)
+ .addReg(TempReg, getKillRegState(true))
+ .addImm(CRSaveOffset)
+ .addReg(SPReg);
}
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
- if (MustSaveCR &&
- !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
- // In the ELFv2 ABI, we are not required to save all CR fields.
- // If only one or two CR fields are clobbered, it is more efficient to use
- // mfocrf to selectively save just those fields, because mfocrf has short
- // latency compares to mfcr.
- unsigned MfcrOpcode = PPC::MFCR8;
- unsigned CrState = RegState::ImplicitKill;
- if (isELFv2ABI && MustSaveCRs.size() == 1) {
- MfcrOpcode = PPC::MFOCRF8;
- CrState = RegState::Kill;
- }
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- MIB.addReg(MustSaveCRs[i], CrState);
- }
+ if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
+ BuildMoveFromCR();
if (HasRedZone) {
if (HasFP)
@@ -1029,11 +1003,11 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(SPReg);
if (MustSaveCR &&
- !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
+ !(SingleScratchReg && MustSaveLR)) {
assert(HasRedZone && "A red zone is always available on PPC64");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+ BuildMI(MBB, MBBI, dl, StoreWordInst)
.addReg(TempReg, getKillRegState(true))
- .addImm(getCRSaveOffset())
+ .addImm(CRSaveOffset)
.addReg(SPReg);
}
@@ -1055,58 +1029,81 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// the negated frame size will be placed in ScratchReg.
bool HasSTUX = false;
- // This condition must be kept in sync with canUseAsPrologue.
- if (HasBP && MaxAlign > 1) {
- if (isPPC64)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
- .addReg(SPReg)
- .addImm(0)
- .addImm(64 - Log2_32(MaxAlign));
- else // PPC32...
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
- .addReg(SPReg)
- .addImm(0)
- .addImm(32 - Log2_32(MaxAlign))
- .addImm(31);
- if (!isLargeFrame) {
- BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
+ // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
+ // pointer is always stored at SP, we will get a free probe due to an essential
+ // STU(X) instruction.
+ if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
+ // To be consistent with other targets, a pseudo instruction is emitted and
+ // will be later expanded in `inlineStackProbe`.
+ BuildMI(MBB, MBBI, dl,
+ TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
+ : PPC::PROBED_STACKALLOC_32))
+ .addDef(ScratchReg)
+ .addDef(TempReg) // TempReg stores the old sp.
.addImm(NegFrameSize);
- } else {
- assert(!SingleScratchReg && "Only a single scratch reg available");
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
- .addReg(TempReg, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
- .addReg(TempReg, RegState::Kill);
+ // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
+ // update the ScratchReg to meet the assumption that ScratchReg contains
+ // the NegFrameSize. This solution is rather tricky.
+ if (!HasRedZone) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
+ .addReg(TempReg)
+ .addReg(SPReg);
+ HasSTUX = true;
}
+ } else {
+ // This condition must be kept in sync with canUseAsPrologue.
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(64 - Log2(MaxAlign));
+ else // PPC32...
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(32 - Log2(MaxAlign))
+ .addImm(31);
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize);
+ } else {
+ assert(!SingleScratchReg && "Only a single scratch reg available");
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
+ .addReg(TempReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(TempReg, RegState::Kill);
+ }
- BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
- .addReg(SPReg, RegState::Kill)
- .addReg(SPReg)
- .addReg(ScratchReg);
- HasSTUX = true;
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ HasSTUX = true;
- } else if (!isLargeFrame) {
- BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
- .addReg(SPReg)
- .addImm(NegFrameSize)
- .addReg(SPReg);
+ } else if (!isLargeFrame) {
+ BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
+ .addReg(SPReg)
+ .addImm(NegFrameSize)
+ .addReg(SPReg);
- } else {
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
- .addReg(SPReg, RegState::Kill)
- .addReg(SPReg)
- .addReg(ScratchReg);
- HasSTUX = true;
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ HasSTUX = true;
+ }
}
// Save the TOC register after the stack pointer update if a prologue TOC
@@ -1247,7 +1244,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// Adjust the definition of CFA to account for the change in SP.
assert(NegFrameSize);
CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
}
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@@ -1337,7 +1334,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// actually saved gets its own CFI record.
unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset()));
+ nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
continue;
@@ -1367,6 +1364,175 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
}
+void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const {
+ // TODO: Generate CFI instructions.
+ bool isPPC64 = Subtarget.isPPC64();
+ const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ // AIX assembler does not support cfi directives.
+ const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
+ auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
+ int Opc = MI.getOpcode();
+ return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
+ });
+ if (StackAllocMIPos == PrologMBB.end())
+ return;
+ const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
+ DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
+ MachineInstr &MI = *StackAllocMIPos;
+ int64_t NegFrameSize = MI.getOperand(2).getImm();
+ int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
+ assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
+ int64_t NumBlocks = NegFrameSize / NegProbeSize;
+ int64_t NegResidualSize = NegFrameSize % NegProbeSize;
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register ScratchReg = MI.getOperand(0).getReg();
+ Register FPReg = MI.getOperand(1).getReg();
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ bool HasBP = RegInfo->hasBasePointer(MF);
+ Align MaxAlign = MFI.getMaxAlign();
+ // Initialize current frame pointer.
+ const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
+ BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
+ // Subroutines to generate .cfi_* directives.
+ auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register Reg) {
+ unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ };
+ auto buildDefCFA = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register Reg,
+ int Offset) {
+ unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+ unsigned CFIIndex = MBB.getParent()->addFrameInst(
+ MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ };
+ // Subroutine to determine if we can use the Imm as part of d-form.
+ auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
+ // Subroutine to materialize the Imm into TempReg.
+ auto MaterializeImm = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int64_t Imm,
+ Register &TempReg) {
+ assert(isInt<32>(Imm) && "Unhandled imm");
+ if (isInt<16>(Imm))
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
+ .addImm(Imm);
+ else {
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
+ .addImm(Imm >> 16);
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
+ .addReg(TempReg)
+ .addImm(Imm & 0xFFFF);
+ }
+ };
+ // Subroutine to store frame pointer and decrease stack pointer by probe size.
+ auto allocateAndProbe = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int64_t NegSize,
+ Register NegSizeReg, bool UseDForm) {
+ if (UseDForm)
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
+ .addReg(FPReg)
+ .addImm(NegSize)
+ .addReg(SPReg);
+ else
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
+ .addReg(FPReg)
+ .addReg(SPReg)
+ .addReg(NegSizeReg);
+ };
+ // Use FPReg to calculate CFA.
+ if (needsCFI)
+ buildDefCFA(PrologMBB, {MI}, FPReg, 0);
+ // For case HasBP && MaxAlign > 1, we have to align the SP by performing
+ // SP = SP - SP % MaxAlign.
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(FPReg)
+ .addImm(0)
+ .addImm(64 - Log2(MaxAlign));
+ else
+ BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(FPReg)
+ .addImm(0)
+ .addImm(32 - Log2(MaxAlign))
+ .addImm(31);
+ BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX),
+ SPReg)
+ .addReg(FPReg)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ }
+ // Probe residual part.
+ if (NegResidualSize) {
+ bool ResidualUseDForm = CanUseDForm(NegResidualSize);
+ if (!ResidualUseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
+ allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
+ ResidualUseDForm);
+ }
+ bool UseDForm = CanUseDForm(NegProbeSize);
+ // If number of blocks is small, just probe them directly.
+ if (NumBlocks < 3) {
+ if (!UseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+ for (int i = 0; i < NumBlocks; ++i)
+ allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(PrologMBB, {MI}, SPReg);
+ }
+ } else {
+ // Since CTR is a volatile register and current shrinkwrap implementation
+ // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
+ // CTR loop to probe.
+ // Calculate trip count and stores it in CTRReg.
+ MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
+ BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
+ .addReg(ScratchReg, RegState::Kill);
+ if (!UseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+ // Create MBBs of the loop.
+ MachineFunction::iterator MBBInsertPoint =
+ std::next(PrologMBB.getIterator());
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, LoopMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ExitMBB);
+ // Synthesize the loop body.
+ allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
+ UseDForm);
+ BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
+ .addMBB(LoopMBB);
+ LoopMBB->addSuccessor(ExitMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ // Synthesize the exit MBB.
+ ExitMBB->splice(ExitMBB->end(), &PrologMBB,
+ std::next(MachineBasicBlock::iterator(MI)),
+ PrologMBB.end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
+ PrologMBB.addSuccessor(LoopMBB);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+ }
+ // Update liveins.
+ recomputeLiveIns(*LoopMBB);
+ recomputeLiveIns(*ExitMBB);
+ }
+ ++NumPrologProbed;
+ MI.eraseFromParent();
+}
+
void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
@@ -1392,18 +1558,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
- const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
bool MustSaveCR = !MustSaveCRs.empty();
// Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
- unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
Register BPReg = RegInfo->getBaseRegister(MF);
- unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned ScratchReg = 0;
- unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ Register ScratchReg;
+ Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
: PPC::MTLR );
const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
@@ -1418,7 +1584,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
: PPC::ADDI );
const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
: PPC::ADD4 );
-
+ const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
+ : PPC::LWZ);
+ const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
+ : PPC::MTOCRF);
int LROffset = getReturnSaveOffset();
int FPOffset = 0;
@@ -1593,20 +1762,17 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// value (although not the base register). Make sure it is not overwritten
// too early.
- assert((isPPC64 || !MustSaveCR) &&
- "Epilogue CR restoring supported only in 64-bit mode");
-
// If we need to restore both the LR and the CR and we only have one
// available scratch register, we must do them one at a time.
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
// Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
// is live here.
assert(HasRedZone && "Expecting red zone");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
- .addImm(getCRSaveOffset())
+ BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
+ .addImm(CRSaveOffset)
.addReg(SPReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
+ BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
.addReg(TempReg, getKillRegState(i == e-1));
}
@@ -1623,11 +1789,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
- // This will only occur for PPC64.
- assert(isPPC64 && "Expecting 64-bit mode");
assert(RBReg == SPReg && "Should be using SP as a base register");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
- .addImm(getCRSaveOffset())
+ BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
+ .addImm(CRSaveOffset)
.addReg(RBReg);
}
@@ -1682,9 +1846,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
if (MustSaveCR &&
- !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
+ !(SingleScratchReg && MustSaveLR))
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
+ BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
.addReg(TempReg, getKillRegState(i == e-1));
if (MustSaveLR)
@@ -1729,13 +1893,25 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
DebugLoc dl = MBBI->getDebugLoc();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
- // Create branch instruction for pseudo tail call return instruction
+ // Create branch instruction for pseudo tail call return instruction.
+ // The TCRETURNdi variants are direct calls. Valid targets for those are
+ // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
+ // since we can tail call external functions with PC-Rel (i.e. we don't need
+ // to worry about different TOC pointers). Some of the external functions will
+ // be MO_GlobalAddress while others like memcpy for example, are going to
+ // be MO_ExternalSymbol.
unsigned RetOpcode = MBBI->getOpcode();
if (RetOpcode == PPC::TCRETURNdi) {
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ if (JumpTarget.isGlobal())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ else if (JumpTarget.isSymbol())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addExternalSymbol(JumpTarget.getSymbolName());
+ else
+ llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri) {
MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
@@ -1747,8 +1923,14 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
} else if (RetOpcode == PPC::TCRETURNdi8) {
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ if (JumpTarget.isGlobal())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ else if (JumpTarget.isSymbol())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addExternalSymbol(JumpTarget.getSymbolName());
+ else
+ llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri8) {
MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
@@ -1776,7 +1958,6 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
const bool isPPC64 = Subtarget.isPPC64();
- const bool IsDarwinABI = Subtarget.isDarwinABI();
MachineFrameInfo &MFI = MF.getFrameInfo();
// If the frame pointer save index hasn't been defined yet.
@@ -1823,25 +2004,26 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
- // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
- // function uses CR 2, 3, or 4.
- if (!isPPC64 && !IsDarwinABI &&
- (SavedRegs.test(PPC::CR2) ||
- SavedRegs.test(PPC::CR3) ||
+ // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
+ // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
+ // object at the offset of the CR-save slot in the linkage area. The actual
+ // save and restore of the condition register will be created as part of the
+ // prologue and epilogue insertion, but the FixedStack object is needed to
+ // keep the CalleSavedInfo valid.
+ if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
SavedRegs.test(PPC::CR4))) {
- int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+ const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
+ const int64_t SpillOffset =
+ Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
+ int FrameIdx =
+ MFI.CreateFixedObject(SpillSize, SpillOffset,
+ /* IsImmutable */ true, /* IsAliased */ false);
FI->setCRSpillFrameIndex(FrameIdx);
}
}
void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const {
- // Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI()) {
- addScavengingSpillSlot(MF, RS);
- return;
- }
-
// Get callee saved register information.
MachineFrameInfo &MFI = MF.getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
@@ -2014,11 +2196,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
std::min<unsigned>(TRI->getEncodingValue(MinGPR),
TRI->getEncodingValue(MinG8R));
- if (Subtarget.isPPC64()) {
- LowerBound -= (31 - MinReg + 1) * 8;
- } else {
- LowerBound -= (31 - MinReg + 1) * 4;
- }
+ const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
+ LowerBound -= (31 - MinReg + 1) * GPRegSize;
}
// For 32-bit only, the CR save area is below the general register
@@ -2026,19 +2205,13 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
// to the stack pointer and hence does not need an adjustment here.
// Only CR2 (the first nonvolatile spilled) has an associated frame
// index so that we have a single uniform save area.
- if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
+ if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
// Adjust the frame index of the CR spill slot.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
-
- if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
- // Leave Darwin logic as-is.
- || (!Subtarget.isSVR4ABI() &&
- (PPC::CRBITRCRegClass.contains(Reg) ||
- PPC::CRRCRegClass.contains(Reg)))) {
- int FI = CSI[i].getFrameIdx();
-
+ for (const auto &CSInfo : CSI) {
+ if (CSInfo.getReg() == PPC::CR2) {
+ int FI = CSInfo.getFrameIdx();
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ break;
}
}
@@ -2108,17 +2281,17 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
unsigned Size = TRI.getSpillSize(RC);
- unsigned Align = TRI.getSpillAlignment(RC);
- RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
+ Align Alignment = TRI.getSpillAlign(RC);
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
// Might we have over-aligned allocas?
- bool HasAlVars = MFI.hasVarSizedObjects() &&
- MFI.getMaxAlignment() > getStackAlignment();
+ bool HasAlVars =
+ MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
// These kinds of spills might need two registers.
if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
- RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
-
+ RS->addScavengingFrameIndex(
+ MFI.CreateStackObject(Size, Alignment, false));
}
}
@@ -2179,17 +2352,9 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
return AllSpilledToReg;
}
-
-bool
-PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
-
- // Currently, this function only handles SVR4 32- and 64-bit ABIs.
- // Return false otherwise to maintain pre-existing behavior.
- if (!Subtarget.isSVR4ABI())
- return false;
+bool PPCFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -2201,10 +2366,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // Only Darwin actually uses the VRSAVE register, but it can still appear
- // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
- // Darwin, ignore it.
- if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
+ if (Reg == PPC::VRSAVE)
continue;
// CR2 through CR4 are the nonvolatile CR fields.
@@ -2232,7 +2395,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Insert the spill to the stack frame.
if (IsCRField) {
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
- if (Subtarget.isPPC64()) {
+ if (!Subtarget.is32BitELFABI()) {
// The actual spill will happen at the start of the prologue.
FuncInfo->addMustSaveCR(Reg);
} else {
@@ -2260,37 +2423,37 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Use !IsLiveIn for the kill flag.
// We do not want to kill registers that are live in this function
// before their use because they will become undefined registers.
- TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
- CSI[i].getFrameIdx(), RC, TRI);
+ // Functions without NoUnwind need to preserve the order of elements in
+ // saved vector registers.
+ if (Subtarget.needsSwapsForVSXMemOps() &&
+ !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
+ TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
+ CSI[i].getFrameIdx(), RC, TRI);
+ else
+ TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
+ RC, TRI);
}
}
}
return true;
}
-static void
-restoreCRs(bool isPPC64, bool is31,
- bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
+static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
+ bool CR4Spilled, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
DebugLoc DL;
- unsigned RestoreOp, MoveReg;
+ unsigned MoveReg = PPC::R12;
- if (isPPC64)
- // This is handled during epilogue generation.
- return;
- else {
- // 32-bit: FP-relative
- MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
- PPC::R12),
- CSI[CSIIndex].getFrameIdx()));
- RestoreOp = PPC::MTOCRF;
- MoveReg = PPC::R12;
- }
+ // 32-bit: FP-relative
+ MBB.insert(MI,
+ addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
+ CSI[CSIIndex].getFrameIdx()));
+ unsigned RestoreOp = PPC::MTOCRF;
if (CR2Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
.addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
@@ -2343,17 +2506,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
return MBB.erase(I);
}
-bool
-PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
-
- // Currently, this function only handles SVR4 32- and 64-bit ABIs.
- // Return false otherwise to maintain pre-existing behavior.
- if (!Subtarget.isSVR4ABI())
- return false;
+static bool isCalleeSavedCR(unsigned Reg) {
+ return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
+}
+bool PPCFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
@@ -2374,15 +2533,18 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // Only Darwin actually uses the VRSAVE register, but it can still appear
- // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
- // Darwin, ignore it.
- if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
+ if (Reg == PPC::VRSAVE)
continue;
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
continue;
+ // Restore of callee saved condition register field is handled during
+ // epilogue insertion.
+ if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
+ continue;
+
if (Reg == PPC::CR2) {
CR2Spilled = true;
// The spill slot is associated only with CR2, which is the
@@ -2396,14 +2558,12 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
CR4Spilled = true;
continue;
} else {
- // When we first encounter a non-CR register after seeing at
+ // On 32-bit ELF when we first encounter a non-CR register after seeing at
// least one CR register, restore all spilled CRs together.
- if ((CR2Spilled || CR3Spilled || CR4Spilled)
- && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ if (CR2Spilled || CR3Spilled || CR4Spilled) {
bool is31 = needsFP(*MF);
- restoreCRs(Subtarget.isPPC64(), is31,
- CR2Spilled, CR3Spilled, CR4Spilled,
- MBB, I, CSI, CSIIndex);
+ restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
+ CSIIndex);
CR2Spilled = CR3Spilled = CR4Spilled = false;
}
@@ -2415,7 +2575,16 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
} else {
// Default behavior for non-CR saves.
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+
+ // Functions without NoUnwind need to preserve the order of elements in
+ // saved vector registers.
+ if (Subtarget.needsSwapsForVSXMemOps() &&
+ !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
+ TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
+ TRI);
+ else
+ TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+
assert(I != MBB.begin() &&
"loadRegFromStackSlot didn't insert any code!");
}
@@ -2432,9 +2601,10 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// If we haven't yet spilled the CRs, do so now.
if (CR2Spilled || CR3Spilled || CR4Spilled) {
+ assert(Subtarget.is32BitELFABI() &&
+ "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
bool is31 = needsFP(*MF);
- restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
- MBB, I, CSI, CSIIndex);
+ restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
}
return true;
@@ -2445,14 +2615,10 @@ unsigned PPCFrameLowering::getTOCSaveOffset() const {
}
unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
- if (Subtarget.isAIXABI())
- report_fatal_error("FramePointer is not implemented on AIX yet.");
return FramePointerSaveOffset;
}
unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
- if (Subtarget.isAIXABI())
- report_fatal_error("BasePointer is not implemented on AIX yet.");
return BasePointerSaveOffset;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index a5fbc9acbb28..8bf52c0ed01a 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -61,8 +61,8 @@ class PPCFrameLowering: public TargetFrameLowering {
bool findScratchRegister(MachineBasicBlock *MBB,
bool UseAtEnd,
bool TwoUniqueRegsRequired = false,
- unsigned *SR1 = nullptr,
- unsigned *SR2 = nullptr) const;
+ Register *SR1 = nullptr,
+ Register *SR2 = nullptr) const;
bool twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const;
/**
@@ -100,6 +100,8 @@ public:
/// the function.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const override;
bool hasFP(const MachineFunction &MF) const override;
bool needsFP(const MachineFunction &MF) const;
@@ -113,7 +115,7 @@ public:
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
/// This function will assign callee saved gprs to volatile vector registers
/// for prologue spills when applicable. It returns false if there are any
@@ -127,10 +129,11 @@ public:
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
/// targetHandlesStackFrameRounding - Returns true if the target is
/// responsible for rounding up the stack frame (probably at emitPrologue
@@ -153,10 +156,6 @@ public:
/// base pointer.
unsigned getBasePointerSaveOffset() const;
- /// getCRSaveOffset - Return the previous frame offset to save the
- /// CR register.
- unsigned getCRSaveOffset() const { return CRSaveOffset; }
-
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
unsigned getLinkageSize() const { return LinkageSize; }
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 776ec52e2604..8ffd89ef5ccd 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -139,6 +139,7 @@ namespace {
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
const PPCSubtarget *PPCSubTarget = nullptr;
+ const PPCSubtarget *Subtarget = nullptr;
const PPCTargetLowering *PPCLowering = nullptr;
unsigned GlobalBaseReg = 0;
@@ -150,10 +151,11 @@ namespace {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
- PPCLowering = PPCSubTarget->getTargetLowering();
+ Subtarget = &MF.getSubtarget<PPCSubtarget>();
+ PPCLowering = Subtarget->getTargetLowering();
SelectionDAGISel::runOnMachineFunction(MF);
- if (!PPCSubTarget->isSVR4ABI())
+ if (!Subtarget->isSVR4ABI())
InsertVRSaveCode(MF);
return true;
@@ -204,7 +206,6 @@ namespace {
bool tryBitfieldInsert(SDNode *N);
bool tryBitPermutation(SDNode *N);
bool tryIntCompareInGPR(SDNode *N);
- bool tryAndWithMask(SDNode *N);
// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
// an X-Form load instruction with the offset being a relocation coming from
@@ -239,7 +240,7 @@ namespace {
/// bit signed displacement.
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0);
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None);
}
/// SelectAddrIdx4 - Given the specified address, check to see if it can be
@@ -249,7 +250,8 @@ namespace {
/// displacement must be a multiple of 4.
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4);
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
+ Align(4));
}
/// SelectAddrIdx16 - Given the specified address, check to see if it can be
@@ -259,7 +261,8 @@ namespace {
/// displacement must be a multiple of 16.
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16);
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
+ Align(16));
}
/// SelectAddrIdxOnly - Given the specified address, force it to be
@@ -267,28 +270,29 @@ namespace {
bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
}
-
+
/// SelectAddrImm - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement [r+imm].
/// The last parameter \p 0 means D form has no requirment for 16 bit signed
/// displacement.
bool SelectAddrImm(SDValue N, SDValue &Disp,
SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None);
}
/// SelectAddrImmX4 - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement that is a multiple of
/// 4 (last parameter). Suitable for use by STD and friends.
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
}
/// SelectAddrImmX16 - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement that is a multiple of
/// 16(last parameter). Suitable for use by STXV and friends.
bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
+ Align(16));
}
// Select an address into a single register.
@@ -297,6 +301,10 @@ namespace {
return true;
}
+ bool SelectAddrPCRel(SDValue N, SDValue &Base) {
+ return PPCLowering->SelectAddressPCRel(N, Base);
+ }
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions. It is always correct to compute the value into
/// a register. The case of adding a (possibly relocatable) constant to a
@@ -317,7 +325,7 @@ namespace {
case InlineAsm::Constraint_Zy:
// We need to make sure that this one operand does not end up in r0
// (because we might end up lowering this as 0(%op)).
- const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
+ const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
SDLoc dl(Op);
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
@@ -343,6 +351,13 @@ namespace {
private:
bool trySETCC(SDNode *N);
+ bool tryAsSingleRLDICL(SDNode *N);
+ bool tryAsSingleRLDICR(SDNode *N);
+ bool tryAsSingleRLWINM(SDNode *N);
+ bool tryAsSingleRLWINM8(SDNode *N);
+ bool tryAsSingleRLWIMI(SDNode *N);
+ bool tryAsPairOfRLDICL(SDNode *N);
+ bool tryAsSingleRLDIMI(SDNode *N);
void PeepholePPC64();
void PeepholePPC64ZExt();
@@ -394,7 +409,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
MachineBasicBlock &EntryBB = *Fn.begin();
DebugLoc dl;
// Emit the following code into the entry block:
@@ -429,7 +444,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
///
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
if (!GlobalBaseReg) {
- const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
@@ -437,9 +452,9 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
DebugLoc dl;
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
- if (PPCSubTarget->isTargetELF()) {
+ if (Subtarget->isTargetELF()) {
GlobalBaseReg = PPC::R30;
- if (!PPCSubTarget->isSecurePlt() &&
+ if (!Subtarget->isSecurePlt() &&
M->getPICLevel() == PICLevel::SmallPIC) {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
@@ -3788,7 +3803,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
Opc = PPC::CMPD;
}
} else if (LHS.getValueType() == MVT::f32) {
- if (PPCSubTarget->hasSPE()) {
+ if (Subtarget->hasSPE()) {
switch (CC) {
default:
case ISD::SETEQ:
@@ -3815,7 +3830,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
} else
Opc = PPC::FCMPUS;
} else if (LHS.getValueType() == MVT::f64) {
- if (PPCSubTarget->hasSPE()) {
+ if (Subtarget->hasSPE()) {
switch (CC) {
default:
case ISD::SETEQ:
@@ -3840,10 +3855,10 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
break;
}
} else
- Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
+ Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
} else {
assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
- assert(PPCSubTarget->hasVSX() && "__float128 requires VSX");
+ assert(Subtarget->hasVSX() && "__float128 requires VSX");
Opc = PPC::XSCMPUQP;
}
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
@@ -3872,10 +3887,10 @@ static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
case ISD::SETULE:
case ISD::SETLE:
- return UseSPE ? PPC::PRED_LE : PPC::PRED_LE;
+ return PPC::PRED_LE;
case ISD::SETOGT:
case ISD::SETGT:
- return UseSPE ? PPC::PRED_GT : PPC::PRED_GT;
+ return PPC::PRED_GT;
case ISD::SETUGE:
case ISD::SETGE:
return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
@@ -4038,8 +4053,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
bool isPPC64 = (PtrVT == MVT::i64);
- if (!PPCSubTarget->useCRBits() &&
- isInt32Immediate(N->getOperand(1), Imm)) {
+ if (!Subtarget->useCRBits() && isInt32Immediate(N->getOperand(1), Imm)) {
// We can codegen setcc op, imm very efficiently compared to a brcond.
// Check for those cases here.
// setcc op, 0
@@ -4128,20 +4142,20 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
- if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE())
+ if (Subtarget->hasQPX() || Subtarget->hasSPE())
return false;
EVT VecVT = LHS.getValueType();
bool Swap, Negate;
- unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
- PPCSubTarget->hasVSX(), Swap, Negate);
+ unsigned int VCmpInst =
+ getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
if (Swap)
std::swap(LHS, RHS);
EVT ResVT = VecVT.changeVectorElementTypeToInteger();
if (Negate) {
SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
- CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
+ CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
ResVT, VCmp, VCmp);
return true;
}
@@ -4150,7 +4164,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
return true;
}
- if (PPCSubTarget->useCRBits())
+ if (Subtarget->useCRBits())
return false;
bool Inv;
@@ -4160,7 +4174,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
// SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
// The correct compare instruction is already set by SelectCC()
- if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
+ if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
Idx = 1;
}
@@ -4209,7 +4223,7 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
// because it is translated to r31 or r1 + slot + offset. We won't know the
// slot number until the stack frame is finalized.
const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
- unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
+ unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
if ((SlotAlign % Val) != 0)
return false;
@@ -4241,13 +4255,10 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
SDValue TrueRes = N->getOperand(2);
SDValue FalseRes = N->getOperand(3);
ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
- if (!TrueConst)
+ if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
+ N->getSimpleValueType(0) != MVT::i32))
return false;
- assert((N->getSimpleValueType(0) == MVT::i64 ||
- N->getSimpleValueType(0) == MVT::i32) &&
- "Expecting either i64 or i32 here.");
-
// We are looking for any of:
// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
@@ -4371,142 +4382,251 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
return true;
}
-bool PPCDAGToDAGISel::tryAndWithMask(SDNode *N) {
- if (N->getOpcode() != ISD::AND)
+bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ unsigned Imm;
+ if (!isInt32Immediate(N->getOperand(1), Imm))
return false;
SDLoc dl(N);
SDValue Val = N->getOperand(0);
- unsigned Imm, Imm2, SH, MB, ME;
- uint64_t Imm64;
-
+ unsigned SH, MB, ME;
// If this is an and of a value rotated between 0 and 31 bits and then and'd
// with a mask, emit rlwinm
- if (isInt32Immediate(N->getOperand(1), Imm) &&
- isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
- SDValue Val = N->getOperand(0).getOperand(0);
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
+ if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
+ Val = Val.getOperand(0);
+ SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl)};
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
return true;
}
// If this is just a masked value where the input is not handled, and
// is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
- if (isInt32Immediate(N->getOperand(1), Imm)) {
- if (isRunOfOnes(Imm, MB, ME) &&
- N->getOperand(0).getOpcode() != ISD::ROTL) {
- SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
- return true;
- }
- // AND X, 0 -> 0, not "rlwinm 32".
- if (Imm == 0) {
- ReplaceUses(SDValue(N, 0), N->getOperand(1));
- return true;
- }
+ if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
+ SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ return true;
+ }
- // ISD::OR doesn't get all the bitfield insertion fun.
- // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
- // bitfield insert.
- if (N->getOperand(0).getOpcode() == ISD::OR &&
- isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
- // The idea here is to check whether this is equivalent to:
- // (c1 & m) | (x & ~m)
- // where m is a run-of-ones mask. The logic here is that, for each bit in
- // c1 and c2:
- // - if both are 1, then the output will be 1.
- // - if both are 0, then the output will be 0.
- // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
- // come from x.
- // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
- // be 0.
- // If that last condition is never the case, then we can form m from the
- // bits that are the same between c1 and c2.
- unsigned MB, ME;
- if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
- SDValue Ops[] = { N->getOperand(0).getOperand(0),
- N->getOperand(0).getOperand(1),
- getI32Imm(0, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
- ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
- return true;
- }
- }
- } else if (isInt64Immediate(N->getOperand(1).getNode(), Imm64)) {
- // If this is a 64-bit zero-extension mask, emit rldicl.
- if (isMask_64(Imm64)) {
- MB = 64 - countTrailingOnes(Imm64);
- SH = 0;
-
- if (Val.getOpcode() == ISD::ANY_EXTEND) {
- auto Op0 = Val.getOperand(0);
- if ( Op0.getOpcode() == ISD::SRL &&
- isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
-
- auto ResultType = Val.getNode()->getValueType(0);
- auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
- ResultType);
- SDValue IDVal (ImDef, 0);
-
- Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
- ResultType, IDVal, Op0.getOperand(0),
- getI32Imm(1, dl)), 0);
- SH = 64 - Imm;
- }
- }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (Imm == 0) {
+ ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ return true;
+ }
- // If the operand is a logical right shift, we can fold it into this
- // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
- // for n <= mb. The right shift is really a left rotate followed by a
- // mask, and this mask is a more-restrictive sub-mask of the mask implied
- // by the shift.
- if (Val.getOpcode() == ISD::SRL &&
- isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
- assert(Imm < 64 && "Illegal shift amount");
- Val = Val.getOperand(0);
- SH = 64 - Imm;
- }
+ return false;
+}
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
- return true;
- } else if (isMask_64(~Imm64)) {
- // If this is a negated 64-bit zero-extension mask,
- // i.e. the immediate is a sequence of ones from most significant side
- // and all zero for reminder, we should use rldicr.
- MB = 63 - countTrailingOnes(~Imm64);
- SH = 0;
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
- return true;
- }
+bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ uint64_t Imm64;
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
+ return false;
- // It is not 16-bit imm that means we need two instructions at least if
- // using "and" instruction. Try to exploit it with rotate mask instructions.
- if (isRunOfOnes64(Imm64, MB, ME)) {
- if (MB >= 32 && MB <= ME) {
- // MB ME
- // +----------------------+
- // |xxxxxxxxxxx00011111000|
- // +----------------------+
- // 0 32 64
- // We can only do it if the MB is larger than 32 and MB <= ME
- // as RLWINM will replace the content of [0 - 32) with [32 - 64) even
- // we didn't rotate it.
- SDValue Ops[] = { Val, getI64Imm(0, dl), getI64Imm(MB - 32, dl),
- getI64Imm(ME - 32, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
- return true;
- }
- // TODO - handle it with rldicl + rldicl
- }
+ unsigned MB, ME;
+ if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
+ // MB ME
+ // +----------------------+
+ // |xxxxxxxxxxx00011111000|
+ // +----------------------+
+ // 0 32 64
+ // We can only do it if the MB is larger than 32 and MB <= ME
+ // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
+ // we didn't rotate it.
+ SDLoc dl(N);
+ SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
+ getI64Imm(ME - 32, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ uint64_t Imm64;
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
+ return false;
+
+ // Do nothing if it is 16-bit imm as the pattern in the .td file handle
+ // it well with "andi.".
+ if (isUInt<16>(Imm64))
+ return false;
+
+ SDLoc Loc(N);
+ SDValue Val = N->getOperand(0);
+
+ // Optimized with two rldicl's as follows:
+ // Add missing bits on left to the mask and check that the mask is a
+ // wrapped run of ones, i.e.
+ // Change pattern |0001111100000011111111|
+ // to |1111111100000011111111|.
+ unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
+ if (NumOfLeadingZeros != 0)
+ Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
+
+ unsigned MB, ME;
+ if (!isRunOfOnes64(Imm64, MB, ME))
+ return false;
+
+ // ME MB MB-ME+63
+ // +----------------------+ +----------------------+
+ // |1111111100000011111111| -> |0000001111111111111111|
+ // +----------------------+ +----------------------+
+ // 0 63 0 63
+ // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
+ unsigned OnesOnLeft = ME + 1;
+ unsigned ZerosInBetween = (MB - ME + 63) & 63;
+ // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
+ // on the left the bits that are already zeros in the mask.
+ Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
+ getI64Imm(OnesOnLeft, Loc),
+ getI64Imm(ZerosInBetween, Loc)),
+ 0);
+ // MB-ME+63 ME MB
+ // +----------------------+ +----------------------+
+ // |0000001111111111111111| -> |0001111100000011111111|
+ // +----------------------+ +----------------------+
+ // 0 63 0 63
+ // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
+ // left the number of ones we previously added.
+ SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
+ getI64Imm(NumOfLeadingZeros, Loc)};
+ CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
+ return true;
+}
+
+bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ unsigned Imm;
+ if (!isInt32Immediate(N->getOperand(1), Imm))
+ return false;
+
+ SDValue Val = N->getOperand(0);
+ unsigned Imm2;
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
+ // bitfield insert.
+ if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
+ return false;
+
+ // The idea here is to check whether this is equivalent to:
+ // (c1 & m) | (x & ~m)
+ // where m is a run-of-ones mask. The logic here is that, for each bit in
+ // c1 and c2:
+ // - if both are 1, then the output will be 1.
+ // - if both are 0, then the output will be 0.
+ // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
+ // come from x.
+ // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
+ // be 0.
+ // If that last condition is never the case, then we can form m from the
+ // bits that are the same between c1 and c2.
+ unsigned MB, ME;
+ if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
+ SDLoc dl(N);
+ SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
+ getI32Imm(MB, dl), getI32Imm(ME, dl)};
+ ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
+ return true;
}
return false;
}
+bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ uint64_t Imm64;
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
+ return false;
+
+ // If this is a 64-bit zero-extension mask, emit rldicl.
+ unsigned MB = 64 - countTrailingOnes(Imm64);
+ unsigned SH = 0;
+ unsigned Imm;
+ SDValue Val = N->getOperand(0);
+ SDLoc dl(N);
+
+ if (Val.getOpcode() == ISD::ANY_EXTEND) {
+ auto Op0 = Val.getOperand(0);
+ if (Op0.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
+
+ auto ResultType = Val.getNode()->getValueType(0);
+ auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
+ SDValue IDVal(ImDef, 0);
+
+ Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
+ IDVal, Op0.getOperand(0),
+ getI32Imm(1, dl)),
+ 0);
+ SH = 64 - Imm;
+ }
+ }
+
+ // If the operand is a logical right shift, we can fold it into this
+ // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
+ // for n <= mb. The right shift is really a left rotate followed by a
+ // mask, and this mask is a more-restrictive sub-mask of the mask implied
+ // by the shift.
+ if (Val.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
+ assert(Imm < 64 && "Illegal shift amount");
+ Val = Val.getOperand(0);
+ SH = 64 - Imm;
+ }
+
+ SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
+ return true;
+}
+
+bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ uint64_t Imm64;
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
+ !isMask_64(~Imm64))
+ return false;
+
+ // If this is a negated 64-bit zero-extension mask,
+ // i.e. the immediate is a sequence of ones from most significant side
+ // and all zero for reminder, we should use rldicr.
+ unsigned MB = 63 - countTrailingOnes(~Imm64);
+ unsigned SH = 0;
+ SDLoc dl(N);
+ SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
+ return true;
+}
+
+bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
+ uint64_t Imm64;
+ unsigned MB, ME;
+ SDValue N0 = N->getOperand(0);
+
+ // We won't get fewer instructions if the imm is 32-bit integer.
+ // rldimi requires the imm to have consecutive ones with both sides zero.
+ // Also, make sure the first Op has only one use, otherwise this may increase
+ // register pressure since rldimi is destructive.
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
+ isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
+ return false;
+
+ unsigned SH = 63 - ME;
+ SDLoc Dl(N);
+ // Use select64Imm for making LI instr instead of directly putting Imm64
+ SDValue Ops[] = {
+ N->getOperand(0),
+ SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
+ getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
+ return true;
+}
+
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
void PPCDAGToDAGISel::Select(SDNode *N) {
@@ -4551,7 +4671,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case PPCISD::ADDI_TLSGD_L_ADDR: {
const Module *Mod = MF->getFunction().getParent();
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
- !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
+ !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
Mod->getPICLevel() == PICLevel::SmallPIC)
break;
// Attach global base pointer on GETtlsADDR32 node in order to
@@ -4560,8 +4680,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
} break;
case PPCISD::CALL: {
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
- !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() ||
- !PPCSubTarget->isTargetELF())
+ !TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
+ !Subtarget->isTargetELF())
break;
SDValue Op = N->getOperand(1);
@@ -4625,7 +4745,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::STORE: {
// Change TLS initial-exec D-form stores to X-form stores.
StoreSDNode *ST = cast<StoreSDNode>(N);
- if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
+ if (EnableTLSOpt && Subtarget->isELFv2ABI() &&
ST->getAddressingMode() != ISD::PRE_INC)
if (tryTLSXFormStore(ST))
return;
@@ -4639,7 +4759,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// Normal loads are handled by code generated from the .td file.
if (LD->getAddressingMode() != ISD::PRE_INC) {
// Change TLS initial-exec D-form loads to X-form loads.
- if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
+ if (EnableTLSOpt && Subtarget->isELFv2ABI())
if (tryTLSXFormLoad(LD))
return;
break;
@@ -4730,7 +4850,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::AND:
// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
- if (tryAndWithMask(N))
+ if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
+ tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
return;
// Other cases are autogenerated.
@@ -4753,10 +4874,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
}
+ // If this is 'or' against an imm with consecutive ones and both sides zero,
+ // try to emit rldimi
+ if (tryAsSingleRLDIMI(N))
+ return;
+
// OR with a 32-bit immediate can be handled by ori + oris
// without creating an immediate in a GPR.
uint64_t Imm64 = 0;
- bool IsPPC64 = PPCSubTarget->isPPC64();
+ bool IsPPC64 = Subtarget->isPPC64();
if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
(Imm64 & ~0xFFFFFFFFuLL) == 0) {
// If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
@@ -4779,7 +4905,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// XOR with a 32-bit immediate can be handled by xori + xoris
// without creating an immediate in a GPR.
uint64_t Imm64 = 0;
- bool IsPPC64 = PPCSubTarget->isPPC64();
+ bool IsPPC64 = Subtarget->isPPC64();
if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
(Imm64 & ~0xFFFFFFFFuLL) == 0) {
// If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
@@ -4866,11 +4992,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
bool isPPC64 = (PtrVT == MVT::i64);
// If this is a select of i1 operands, we'll pattern match it.
- if (PPCSubTarget->useCRBits() &&
- N->getOperand(0).getValueType() == MVT::i1)
+ if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
break;
- if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {
+ if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
bool NeedSwapOps = false;
bool IsUnCmp = false;
if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
@@ -4945,7 +5070,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
unsigned BROpc =
- getPredicateForSetCC(CC, N->getOperand(0).getValueType(), PPCSubTarget);
+ getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
unsigned SelectCCOp;
if (N->getValueType(0) == MVT::i32)
@@ -4953,28 +5078,28 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
else if (N->getValueType(0) == MVT::i64)
SelectCCOp = PPC::SELECT_CC_I8;
else if (N->getValueType(0) == MVT::f32) {
- if (PPCSubTarget->hasP8Vector())
+ if (Subtarget->hasP8Vector())
SelectCCOp = PPC::SELECT_CC_VSSRC;
- else if (PPCSubTarget->hasSPE())
+ else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE4;
else
SelectCCOp = PPC::SELECT_CC_F4;
} else if (N->getValueType(0) == MVT::f64) {
- if (PPCSubTarget->hasVSX())
+ if (Subtarget->hasVSX())
SelectCCOp = PPC::SELECT_CC_VSFRC;
- else if (PPCSubTarget->hasSPE())
+ else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE;
else
SelectCCOp = PPC::SELECT_CC_F8;
} else if (N->getValueType(0) == MVT::f128)
SelectCCOp = PPC::SELECT_CC_F16;
- else if (PPCSubTarget->hasSPE())
+ else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE;
- else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
+ else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
SelectCCOp = PPC::SELECT_CC_QFRC;
- else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
+ else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
SelectCCOp = PPC::SELECT_CC_QSRC;
- else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
+ else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
SelectCCOp = PPC::SELECT_CC_QBRC;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
@@ -4988,8 +5113,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
case ISD::VECTOR_SHUFFLE:
- if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
- N->getValueType(0) == MVT::v2i64)) {
+ if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
+ N->getValueType(0) == MVT::v2i64)) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
@@ -5024,7 +5149,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// For little endian, we must swap the input operands and adjust
// the mask elements (reverse and invert them).
- if (PPCSubTarget->isLittleEndian()) {
+ if (Subtarget->isLittleEndian()) {
std::swap(Op1, Op2);
unsigned tmp = DM[0];
DM[0] = 1 - DM[1];
@@ -5041,7 +5166,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
break;
case PPCISD::BDNZ:
case PPCISD::BDZ: {
- bool IsPPC64 = PPCSubTarget->isPPC64();
+ bool IsPPC64 = Subtarget->isPPC64();
SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
@@ -5069,7 +5194,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::BR_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
unsigned PCC =
- getPredicateForSetCC(CC, N->getOperand(2).getValueType(), PPCSubTarget);
+ getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
if (N->getOperand(2).getValueType() == MVT::i1) {
unsigned Opc;
@@ -5122,11 +5247,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
case PPCISD::TOC_ENTRY: {
- const bool isPPC64 = PPCSubTarget->isPPC64();
- const bool isELFABI = PPCSubTarget->isSVR4ABI();
- const bool isAIXABI = PPCSubTarget->isAIXABI();
-
- assert(!PPCSubTarget->isDarwin() && "TOC is an ELF/XCOFF construct");
+ const bool isPPC64 = Subtarget->isPPC64();
+ const bool isELFABI = Subtarget->isSVR4ABI();
+ const bool isAIXABI = Subtarget->isAIXABI();
// PowerPC only support small, medium and large code model.
const CodeModel::Model CModel = TM.getCodeModel();
@@ -5177,7 +5300,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
// generate two instructions as described below. The first source operand
// is a symbol reference. If it must be toc-referenced according to
- // PPCSubTarget, we generate:
+ // Subtarget, we generate:
// [32-bit AIX]
// LWZtocL(@sym, ADDIStocHA(%r2, @sym))
// [64-bit ELF/AIX]
@@ -5209,7 +5332,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
case PPCISD::PPC32_PICGOT:
// Generate a PIC-safe GOT reference.
- assert(PPCSubTarget->is32BitELFABI() &&
+ assert(Subtarget->is32BitELFABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
PPCLowering->getPointerTy(CurDAG->getDataLayout()),
@@ -5306,7 +5429,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
"Only OR nodes are supported for CMPB");
SDValue Res;
- if (!PPCSubTarget->hasCMPB())
+ if (!Subtarget->hasCMPB())
return Res;
if (N->getValueType(0) != MVT::i32 &&
@@ -5517,7 +5640,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
// only one instruction (like a zero or one), then we should fold in those
// operations with the select.
void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
- if (!PPCSubTarget->useCRBits())
+ if (!Subtarget->useCRBits())
return;
if (N->getOpcode() != ISD::ZERO_EXTEND &&
@@ -5549,8 +5672,7 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
- User->getValueType(0),
- O0.getNode(), O1.getNode());
+ User->getValueType(0), {O0, O1});
};
// FIXME: When the semantics of the interaction between select and undef
@@ -6259,7 +6381,7 @@ static bool PeepholePPC64ZExtGather(SDValue Op32,
}
void PPCDAGToDAGISel::PeepholePPC64ZExt() {
- if (!PPCSubTarget->isPPC64())
+ if (!Subtarget->isPPC64())
return;
// When we zero-extend from i32 to i64, we use a pattern like this:
@@ -6428,10 +6550,6 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
}
void PPCDAGToDAGISel::PeepholePPC64() {
- // These optimizations are currently supported only for 64-bit SVR4.
- if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
- return;
-
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
while (Position != CurDAG->allnodes_begin()) {
@@ -6544,7 +6662,8 @@ void PPCDAGToDAGISel::PeepholePPC64() {
int MaxDisplacement = 7;
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
const GlobalValue *GV = GA->getGlobal();
- MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement);
+ Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
+ MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
}
bool UpdateHBase = false;
@@ -6610,10 +6729,10 @@ void PPCDAGToDAGISel::PeepholePPC64() {
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
+ Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
// We can't perform this optimization for data whose alignment
// is insufficient for the instruction encoding.
- if (GV->getAlignment() < 4 &&
- (RequiresMod4Offset || (Offset % 4) != 0)) {
+ if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
continue;
}
@@ -6621,8 +6740,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
const Constant *C = CP->getConstVal();
- ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
- CP->getAlignment(),
+ ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
Offset, Flags);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ca1649fae258..ddfbd04e1ebc 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -55,7 +55,6 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -118,14 +117,13 @@ cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
-static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
-cl::desc("enable quad precision float support on ppc"), cl::Hidden);
-
static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
cl::desc("use absolute jump tables on ppc"), cl::Hidden);
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
+STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
+STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
@@ -260,15 +258,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// PowerPC has no SREM/UREM instructions unless we are on P9
// On P9 we may use a hardware instruction to compute the remainder.
- // The instructions are not legalized directly because in the cases where the
- // result of both the remainder and the division is required it is more
- // efficient to compute the remainder from the result of the division rather
- // than use the remainder instruction.
+ // When the result of both the remainder and the division is required it is
+ // more efficient to compute the remainder from the result of the division
+ // rather than use the remainder instruction. The instructions are legalized
+ // directly because the DivRemPairsPass performs the transformation at the IR
+ // level.
if (Subtarget.isISA3_0()) {
- setOperationAction(ISD::SREM, MVT::i32, Custom);
- setOperationAction(ISD::UREM, MVT::i32, Custom);
- setOperationAction(ISD::SREM, MVT::i64, Custom);
- setOperationAction(ISD::UREM, MVT::i64, Custom);
+ setOperationAction(ISD::SREM, MVT::i32, Legal);
+ setOperationAction(ISD::UREM, MVT::i32, Legal);
+ setOperationAction(ISD::SREM, MVT::i64, Legal);
+ setOperationAction(ISD::UREM, MVT::i64, Legal);
} else {
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
@@ -286,6 +285,40 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ // Handle constrained floating-point operations of scalar.
+ // TODO: Handle SPE specific operation.
+ setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
+
+ setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
+ if (Subtarget.hasVSX())
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Legal);
+
+ if (Subtarget.hasFSQRT()) {
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
+ }
+
+ if (Subtarget.hasFPRND()) {
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
+
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
+ }
+
// We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
@@ -390,6 +423,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasSPE()) {
// SPE has built-in conversions
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
@@ -539,9 +575,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
- if (Subtarget.hasSPE())
+ if (Subtarget.hasSPE()) {
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
- else
+ } else
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
@@ -584,6 +621,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
if (Subtarget.hasAltivec()) {
+ for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
+ setOperationAction(ISD::SADDSAT, VT, Legal);
+ setOperationAction(ISD::SSUBSAT, VT, Legal);
+ setOperationAction(ISD::UADDSAT, VT, Legal);
+ setOperationAction(ISD::USUBSAT, VT, Legal);
+ }
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -738,6 +781,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (!Subtarget.hasP8Altivec())
setOperationAction(ISD::ABS, MVT::v2i64, Expand);
+ // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
+ setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
// With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
if (Subtarget.hasAltivec())
for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
@@ -764,7 +809,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
else
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
- setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
@@ -811,12 +856,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
@@ -906,6 +955,37 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ // Handle constrained floating-point operations of vector.
+ // The predictor is `hasVSX` because altivec instruction has
+ // no exception but VSX vector instruction has.
+ setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
+
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
}
@@ -925,44 +1005,59 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRL, MVT::v1i128, Legal);
setOperationAction(ISD::SRA, MVT::v1i128, Expand);
- if (EnableQuadPrecision) {
- addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
- setOperationAction(ISD::FADD, MVT::f128, Legal);
- setOperationAction(ISD::FSUB, MVT::f128, Legal);
- setOperationAction(ISD::FDIV, MVT::f128, Legal);
- setOperationAction(ISD::FMUL, MVT::f128, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
- // No extending loads to f128 on PPC.
- for (MVT FPT : MVT::fp_valuetypes())
- setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
- setOperationAction(ISD::FMA, MVT::f128, Legal);
- setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
- setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
- setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
- setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
- setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
-
- setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
- setOperationAction(ISD::FRINT, MVT::f128, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
- setOperationAction(ISD::FCEIL, MVT::f128, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
- setOperationAction(ISD::FROUND, MVT::f128, Legal);
-
- setOperationAction(ISD::SELECT, MVT::f128, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
- setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
- setTruncStoreAction(MVT::f128, MVT::f64, Expand);
- setTruncStoreAction(MVT::f128, MVT::f32, Expand);
- setOperationAction(ISD::BITCAST, MVT::i128, Custom);
- // No implementation for these ops for PowerPC.
- setOperationAction(ISD::FSIN , MVT::f128, Expand);
- setOperationAction(ISD::FCOS , MVT::f128, Expand);
- setOperationAction(ISD::FPOW, MVT::f128, Expand);
- setOperationAction(ISD::FPOWI, MVT::f128, Expand);
- setOperationAction(ISD::FREM, MVT::f128, Expand);
- }
+ addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
+ setOperationAction(ISD::FADD, MVT::f128, Legal);
+ setOperationAction(ISD::FSUB, MVT::f128, Legal);
+ setOperationAction(ISD::FDIV, MVT::f128, Legal);
+ setOperationAction(ISD::FMUL, MVT::f128, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
+ // No extending loads to f128 on PPC.
+ for (MVT FPT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
+ setOperationAction(ISD::FMA, MVT::f128, Legal);
+ setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
+
+ setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
+ setOperationAction(ISD::FRINT, MVT::f128, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f128, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
+ setOperationAction(ISD::FROUND, MVT::f128, Legal);
+
+ setOperationAction(ISD::SELECT, MVT::f128, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i128, Custom);
+ // No implementation for these ops for PowerPC.
+ setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FPOW, MVT::f128, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f128, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
+
+ // Handle constrained floating-point operations of fp128
+ setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
@@ -1135,6 +1230,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
}
+
+ // TODO: Handle constrained floating-point operations of v4f64
}
if (Subtarget.has64BitSupport())
@@ -1169,6 +1266,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget.hasFPCVT())
@@ -1208,34 +1306,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::VSELECT);
}
- // Darwin long double math library functions have $LDBL128 appended.
- if (Subtarget.isDarwin()) {
- setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
- setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
- setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
- setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
- setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
- setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
- setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
- setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
- setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
- setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
- }
-
- if (EnableQuadPrecision) {
- setLibcallName(RTLIB::LOG_F128, "logf128");
- setLibcallName(RTLIB::LOG2_F128, "log2f128");
- setLibcallName(RTLIB::LOG10_F128, "log10f128");
- setLibcallName(RTLIB::EXP_F128, "expf128");
- setLibcallName(RTLIB::EXP2_F128, "exp2f128");
- setLibcallName(RTLIB::SIN_F128, "sinf128");
- setLibcallName(RTLIB::COS_F128, "cosf128");
- setLibcallName(RTLIB::POW_F128, "powf128");
- setLibcallName(RTLIB::FMIN_F128, "fminf128");
- setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
- setLibcallName(RTLIB::POWI_F128, "__powikf2");
- setLibcallName(RTLIB::REM_F128, "fmodf128");
- }
+ setLibcallName(RTLIB::LOG_F128, "logf128");
+ setLibcallName(RTLIB::LOG2_F128, "log2f128");
+ setLibcallName(RTLIB::LOG10_F128, "log10f128");
+ setLibcallName(RTLIB::EXP_F128, "expf128");
+ setLibcallName(RTLIB::EXP2_F128, "exp2f128");
+ setLibcallName(RTLIB::SIN_F128, "sinf128");
+ setLibcallName(RTLIB::COS_F128, "cosf128");
+ setLibcallName(RTLIB::POW_F128, "powf128");
+ setLibcallName(RTLIB::FMIN_F128, "fminf128");
+ setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
+ setLibcallName(RTLIB::POWI_F128, "__powikf2");
+ setLibcallName(RTLIB::REM_F128, "fmodf128");
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
@@ -1245,8 +1327,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
setMinFunctionAlignment(Align(4));
- if (Subtarget.isDarwin())
- setPrefFunctionAlignment(Align(16));
switch (Subtarget.getCPUDirective()) {
default: break;
@@ -1263,6 +1343,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR10:
case PPC::DIR_PWR_FUTURE:
setPrefLoopAlignment(Align(16));
setPrefFunctionAlignment(Align(16));
@@ -1298,27 +1379,33 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
MaxLoadsPerMemcmp = 8;
MaxLoadsPerMemcmpOptSize = 4;
}
+
+ // Let the subtarget (CPU) decide if a predictable select is more expensive
+ // than the corresponding branch. This information is used in CGP to decide
+ // when to convert selects into branches.
+ PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
-static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
- unsigned MaxMaxAlign) {
+static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
if (MaxAlign == MaxMaxAlign)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
- MaxAlign = 32;
- else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
- MaxAlign = 16;
+ if (MaxMaxAlign >= 32 &&
+ VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
+ MaxAlign = Align(32);
+ else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
+ MaxAlign < 16)
+ MaxAlign = Align(16);
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- unsigned EltAlign = 0;
+ Align EltAlign;
getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (auto *EltTy : STy->elements()) {
- unsigned EltAlign = 0;
+ Align EltAlign;
getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
@@ -1332,16 +1419,12 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
- // Darwin passes everything on 4 byte boundary.
- if (Subtarget.isDarwin())
- return 4;
-
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- unsigned Align = Subtarget.isPPC64() ? 8 : 4;
+ Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
if (Subtarget.hasAltivec() || Subtarget.hasQPX())
- getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
- return Align;
+ getMaxByValAlign(Ty, Alignment, Subtarget.hasQPX() ? Align(32) : Align(16));
+ return Alignment.value();
}
bool PPCTargetLowering::useSoftFloat() const {
@@ -1356,6 +1439,16 @@ bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
+/// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific
+/// type is cheaper than a multiply followed by a shift.
+/// This is true for words and doublewords on 64-bit PowerPC.
+bool PPCTargetLowering::isMulhCheaperThanMulShift(EVT Type) const {
+ if (Subtarget.isPPC64() && (isOperationLegal(ISD::MULHS, Type) ||
+ isOperationLegal(ISD::MULHU, Type)))
+ return true;
+ return TargetLowering::isMulhCheaperThanMulShift(Type);
+}
+
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -1377,10 +1470,12 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
- case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
- case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
+ case PPCISD::XXSPLTI_SP_TO_DP:
+ return "PPCISD::XXSPLTI_SP_TO_DP";
+ case PPCISD::XXSPLTI32DX:
+ return "PPCISD::XXSPLTI32DX";
case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
@@ -1392,6 +1487,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
+ case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
@@ -1399,6 +1495,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
+ case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
@@ -1412,6 +1509,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
+ case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
+ return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
case PPCISD::ANDI_rec_1_EQ_BIT:
return "PPCISD::ANDI_rec_1_EQ_BIT";
case PPCISD::ANDI_rec_1_GT_BIT:
@@ -1425,7 +1524,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
case PPCISD::STXSIX: return "PPCISD::STXSIX";
case PPCISD::VEXTS: return "PPCISD::VEXTS";
- case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
@@ -1475,7 +1573,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
+ case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
+ case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
}
return nullptr;
}
@@ -2338,17 +2438,22 @@ bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
/// non-zero and N can be represented by a base register plus a signed 16-bit
/// displacement, make a more precise judgement by checking (displacement % \p
/// EncodingAlignment).
-bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
- SDValue &Index, SelectionDAG &DAG,
- unsigned EncodingAlignment) const {
- int16_t imm = 0;
+bool PPCTargetLowering::SelectAddressRegReg(
+ SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
+ MaybeAlign EncodingAlignment) const {
+ // If we have a PC Relative target flag don't select as [reg+reg]. It will be
+ // a [pc+imm].
+ if (SelectAddressPCRel(N, Base))
+ return false;
+
+ int16_t Imm = 0;
if (N.getOpcode() == ISD::ADD) {
// Is there any SPE load/store (f64), which can't handle 16bit offset?
// SPE load/store can only handle 8-bit offsets.
if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
return true;
- if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!EncodingAlignment || !(imm % EncodingAlignment)))
+ if (isIntS16Immediate(N.getOperand(1), Imm) &&
+ (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
return false; // r+i
if (N.getOperand(1).getOpcode() == PPCISD::Lo)
return false; // r+i
@@ -2357,8 +2462,8 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
Index = N.getOperand(1);
return true;
} else if (N.getOpcode() == ISD::OR) {
- if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!EncodingAlignment || !(imm % EncodingAlignment)))
+ if (isIntS16Immediate(N.getOperand(1), Imm) &&
+ (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
return false; // r+i can fold it if we can.
// If this is an or of disjoint bitfields, we can codegen this as an add
@@ -2413,8 +2518,7 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FrameIdx);
- if (Align >= 4)
+ if (MFI.getObjectAlign(FrameIdx) >= Align(4))
return;
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -2425,12 +2529,17 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
/// a signed 16-bit displacement [r+imm], and if it is not better
/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
/// displacements that are multiples of that value.
-bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
- SDValue &Base,
- SelectionDAG &DAG,
- unsigned EncodingAlignment) const {
+bool PPCTargetLowering::SelectAddressRegImm(
+ SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
+ MaybeAlign EncodingAlignment) const {
// FIXME dl should come from parent load or store, not from address
SDLoc dl(N);
+
+ // If we have a PC Relative target flag don't select as [reg+imm]. It will be
+ // a [pc+imm].
+ if (SelectAddressPCRel(N, Base))
+ return false;
+
// If this can be more profitably realized as r+r, fail.
if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
return false;
@@ -2438,7 +2547,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
if (N.getOpcode() == ISD::ADD) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
+ (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
@@ -2462,7 +2571,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
} else if (N.getOpcode() == ISD::OR) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
+ (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
@@ -2489,7 +2598,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// this as "d, 0"
int16_t Imm;
if (isIntS16Immediate(CN, Imm) &&
- (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
+ (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
@@ -2499,7 +2608,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// Handle 32-bit sext immediates with LIS + addr mode.
if ((CN->getValueType(0) == MVT::i32 ||
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
- (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
+ (!EncodingAlignment ||
+ isAligned(*EncodingAlignment, CN->getZExtValue()))) {
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
@@ -2554,6 +2664,27 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
return true;
}
+template <typename Ty> static bool isValidPCRelNode(SDValue N) {
+ Ty *PCRelCand = dyn_cast<Ty>(N);
+ return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
+}
+
+/// Returns true if this address is a PC Relative address.
+/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
+/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
+bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
+ // This is a materialize PC Relative node. Always select this as PC Relative.
+ Base = N;
+ if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
+ return true;
+ if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
+ isValidPCRelNode<GlobalAddressSDNode>(N) ||
+ isValidPCRelNode<JumpTableSDNode>(N) ||
+ isValidPCRelNode<BlockAddressSDNode>(N))
+ return true;
+ return false;
+}
+
/// Returns true if we should use a direct load into vector instruction
/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
@@ -2591,7 +2722,8 @@ static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
UI != UE; ++UI)
if (UI.getUse().get().getResNo() == 0 &&
- UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
+ UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
+ UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
return false;
return true;
@@ -2664,14 +2796,14 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
// LDU/STU can only handle immediates that are a multiple of 4.
if (VT != MVT::i64) {
- if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
return false;
} else {
// LDU/STU need an address with at least 4-byte alignment.
if (Alignment < 4)
return false;
- if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
return false;
}
@@ -2705,18 +2837,6 @@ static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
HiOpFlags |= PPCII::MO_PIC_FLAG;
LoOpFlags |= PPCII::MO_PIC_FLAG;
}
-
- // If this is a reference to a global value that requires a non-lazy-ptr, make
- // sure that instruction lowering adds it.
- if (GV && Subtarget.hasLazyResolverStub(GV)) {
- HiOpFlags |= PPCII::MO_NLP_FLAG;
- LoOpFlags |= PPCII::MO_NLP_FLAG;
-
- if (GV->hasHiddenVisibility()) {
- HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
- LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
- }
- }
}
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
@@ -2758,7 +2878,7 @@ SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
SDValue Ops[] = { GA, Reg };
return DAG.getMemIntrinsicNode(
PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
MachineMemOperand::MOLoad);
}
@@ -2771,8 +2891,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDLoc DL(CP);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue ConstPool = DAG.getTargetConstantPool(
+ C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
+ return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
+ }
setUsesTOCBasePtr(DAG);
- SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
return getTOCEntry(DAG, SDLoc(CP), GA);
}
@@ -2781,15 +2908,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
if (IsPIC && Subtarget.isSVR4ABI()) {
- SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
- PPCII::MO_PIC_FLAG);
+ SDValue GA =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
return getTOCEntry(DAG, SDLoc(CP), GA);
}
SDValue CPIHi =
- DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
SDValue CPILo =
- DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
}
@@ -2846,6 +2973,16 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ // isUsingPCRelativeCalls() returns true when PCRelative is enabled
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDLoc DL(JT);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue GA =
+ DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
+ SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+ return MatAddr;
+ }
+
// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
@@ -2875,6 +3012,16 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
const BlockAddress *BA = BASDN->getBlockAddress();
+ // isUsingPCRelativeCalls() returns true when PCRelative is enabled
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDLoc DL(BASDN);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
+ PPCII::MO_PCREL_FLAG);
+ SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+ return MatAddr;
+ }
+
// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual BlockAddress is stored in the TOC.
if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
@@ -3004,6 +3151,22 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
// 64-bit SVR4 ABI & AIX ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ if (isAccessedAsGotIndirect(Op)) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
+ PPCII::MO_PCREL_FLAG |
+ PPCII::MO_GOT_FLAG);
+ SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+ SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
+ MachinePointerInfo());
+ return Load;
+ } else {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
+ PPCII::MO_PCREL_FLAG);
+ return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+ }
+ }
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
return getTOCEntry(DAG, DL, GA);
@@ -3025,13 +3188,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue GALo =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
- SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
-
- // If the global reference is actually to a non-lazy-pointer, we have to do an
- // extra load to get the address of the global.
- if (MOHiFlag & PPCII::MO_NLP_FLAG)
- Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
- return Ptr;
+ return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
}
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -3192,10 +3349,10 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
// We have to copy the entire va_list struct:
// 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
- return DAG.getMemcpy(Op.getOperand(0), Op,
- Op.getOperand(1), Op.getOperand(2),
- DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
- false, MachinePointerInfo(), MachinePointerInfo());
+ return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
+ false, true, false, MachinePointerInfo(),
+ MachinePointerInfo());
}
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
@@ -3252,7 +3409,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
- if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
+ if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
@@ -3358,31 +3515,31 @@ static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
/// CalculateStackSlotAlignment - Calculates the alignment of this argument
/// on the stack.
-static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
- ISD::ArgFlagsTy Flags,
- unsigned PtrByteSize) {
- unsigned Align = PtrByteSize;
+static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ Align Alignment(PtrByteSize);
// Altivec parameters are padded to a 16 byte boundary.
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
- Align = 16;
+ Alignment = Align(16);
// QPX vector types stored in double-precision are padded to a 32 byte
// boundary.
else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
- Align = 32;
+ Alignment = Align(32);
// ByVal parameters are aligned as requested.
if (Flags.isByVal()) {
- unsigned BVAlign = Flags.getByValAlign();
+ auto BVAlign = Flags.getNonZeroByValAlign();
if (BVAlign > PtrByteSize) {
- if (BVAlign % PtrByteSize != 0)
- llvm_unreachable(
+ if (BVAlign.value() % PtrByteSize != 0)
+ llvm_unreachable(
"ByVal alignment is not a multiple of the pointer size");
- Align = BVAlign;
+ Alignment = BVAlign;
}
}
@@ -3392,12 +3549,12 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
// needs to be aligned to the size of the full type. (Except for
// ppcf128, which is only aligned as its f64 components.)
if (Flags.isSplit() && OrigVT != MVT::ppcf128)
- Align = OrigVT.getStoreSize();
+ Alignment = Align(OrigVT.getStoreSize());
else
- Align = ArgVT.getStoreSize();
+ Alignment = Align(ArgVT.getStoreSize());
}
- return Align;
+ return Alignment;
}
/// CalculateStackSlotUsed - Return whether this argument will use its
@@ -3415,9 +3572,9 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
bool UseMemory = false;
// Respect alignment of argument on the stack.
- unsigned Align =
- CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ Align Alignment =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = alignTo(ArgOffset, Alignment);
// If there's no space left in the argument save area, we must
// use memory (this check also catches zero-sized arguments).
if (ArgOffset >= LinkageSize + ParamAreaSize)
@@ -3461,10 +3618,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
/// ensure minimum alignment required for target.
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
unsigned NumBytes) {
- unsigned TargetAlign = Lowering->getStackAlignment();
- unsigned AlignMask = TargetAlign - 1;
- NumBytes = (NumBytes + AlignMask) & ~AlignMask;
- return NumBytes;
+ return alignTo(NumBytes, Lowering->getStackAlign());
}
SDValue PPCTargetLowering::LowerFormalArguments(
@@ -3527,7 +3681,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
- unsigned PtrByteSize = 4;
+ const Align PtrAlign(4);
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -3536,7 +3690,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve space for the linkage area on the stack.
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
- CCInfo.AllocateStack(LinkageSize, PtrByteSize);
+ CCInfo.AllocateStack(LinkageSize, PtrAlign);
if (useSoftFloat())
CCInfo.PreAnalyzeFormalArguments(Ins);
@@ -3645,7 +3799,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
- CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
@@ -3692,7 +3846,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
CCInfo.getNextStackOffset(), true));
- FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
+ FuncInfo->setVarArgsFrameIndex(
+ MFI.CreateStackObject(Depth, Align(8), false));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
// The fixed integer arguments of a variadic function are stored to the
@@ -3839,11 +3994,13 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
// We re-align the argument offset for each argument, except when using the
// fast calling convention, when we need to make sure we do that only when
// we'll actually use a stack slot.
- unsigned CurArgOffset, Align;
+ unsigned CurArgOffset;
+ Align Alignment;
auto ComputeArgOffset = [&]() {
/* Respect alignment of argument on the stack. */
- Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ Alignment =
+ CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = alignTo(ArgOffset, Alignment);
CurArgOffset = ArgOffset;
};
@@ -3891,7 +4048,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
else
- FI = MFI.CreateStackObject(ArgSize, Align, false);
+ FI = MFI.CreateStackObject(ArgSize, Alignment, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
// Handle aggregates smaller than 8 bytes.
@@ -4139,7 +4296,11 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
- if (isVarArg) {
+ // On ELFv2ABI spec, it writes:
+ // C programs that are intended to be *portable* across different compilers
+ // and architectures must use the header file <stdarg.h> to deal with variable
+ // argument lists.
+ if (isVarArg && MFI.hasVAStart()) {
int Depth = ArgOffset;
FuncInfo->setVarArgsFrameIndex(
@@ -4547,30 +4708,67 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
static bool isFunctionGlobalAddress(SDValue Callee);
-static bool
-callsShareTOCBase(const Function *Caller, SDValue Callee,
- const TargetMachine &TM) {
- // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
- // don't have enough information to determine if the caller and calle share
- // the same TOC base, so we have to pessimistically assume they don't for
- // correctness.
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (!G)
- return false;
-
- const GlobalValue *GV = G->getGlobal();
+static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
+ const TargetMachine &TM) {
+ // It does not make sense to call callsShareTOCBase() with a caller that
+ // is PC Relative since PC Relative callers do not have a TOC.
+#ifndef NDEBUG
+ const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
+ assert(!STICaller->isUsingPCRelativeCalls() &&
+ "PC Relative callers do not have a TOC and cannot share a TOC Base");
+#endif
+
+ // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
+ // don't have enough information to determine if the caller and callee share
+ // the same TOC base, so we have to pessimistically assume they don't for
+ // correctness.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (!G)
+ return false;
+
+ const GlobalValue *GV = G->getGlobal();
+
+ // If the callee is preemptable, then the static linker will use a plt-stub
+ // which saves the toc to the stack, and needs a nop after the call
+ // instruction to convert to a toc-restore.
+ if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
+ return false;
+
+ // Functions with PC Relative enabled may clobber the TOC in the same DSO.
+ // We may need a TOC restore in the situation where the caller requires a
+ // valid TOC but the callee is PC Relative and does not.
+ const Function *F = dyn_cast<Function>(GV);
+ const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
+
+ // If we have an Alias we can try to get the function from there.
+ if (Alias) {
+ const GlobalObject *GlobalObj = Alias->getBaseObject();
+ F = dyn_cast<Function>(GlobalObj);
+ }
+
+ // If we still have no valid function pointer we do not have enough
+ // information to determine if the callee uses PC Relative calls so we must
+ // assume that it does.
+ if (!F)
+ return false;
+
+ // If the callee uses PC Relative we cannot guarantee that the callee won't
+ // clobber the TOC of the caller and so we must assume that the two
+ // functions do not share a TOC base.
+ const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
+ if (STICallee->isUsingPCRelativeCalls())
+ return false;
+
// The medium and large code models are expected to provide a sufficiently
// large TOC to provide all data addressing needs of a module with a
- // single TOC. Since each module will be addressed with a single TOC then we
- // only need to check that caller and callee don't cross dso boundaries.
+ // single TOC.
if (CodeModel::Medium == TM.getCodeModel() ||
CodeModel::Large == TM.getCodeModel())
- return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
+ return true;
// Otherwise we need to ensure callee and caller are in the same section,
// since the linker may allocate multiple TOCs, and we don't know which
// sections will belong to the same TOC base.
-
if (!GV->isStrongDefinitionForLinker())
return false;
@@ -4585,26 +4783,6 @@ callsShareTOCBase(const Function *Caller, SDValue Callee,
return false;
}
- // If the callee might be interposed, then we can't assume the ultimate call
- // target will be in the same section. Even in cases where we can assume that
- // interposition won't happen, in any case where the linker might insert a
- // stub to allow for interposition, we must generate code as though
- // interposition might occur. To understand why this matters, consider a
- // situation where: a -> b -> c where the arrows indicate calls. b and c are
- // in the same section, but a is in a different module (i.e. has a different
- // TOC base pointer). If the linker allows for interposition between b and c,
- // then it will generate a stub for the call edge between b and c which will
- // save the TOC pointer into the designated stack slot allocated by b. If we
- // return true here, and therefore allow a tail call between b and c, that
- // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
- // pointer into the stack slot allocated by a (where the a -> b stub saved
- // a's TOC base pointer). If we're not considering a tail call, but rather,
- // whether a nop is needed after the call instruction in b, because the linker
- // will insert a stub, it might complain about a missing nop if we omit it
- // (although many don't complain in this case).
- if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
- return false;
-
return true;
}
@@ -4646,13 +4824,12 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget,
return false;
}
-static bool
-hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
- if (CS.arg_size() != CallerFn->arg_size())
+static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
+ if (CB.arg_size() != CallerFn->arg_size())
return false;
- ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
- ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
+ auto CalleeArgIter = CB.arg_begin();
+ auto CalleeArgEnd = CB.arg_end();
Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
@@ -4694,15 +4871,10 @@ areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
return CallerCC == CallingConv::C || CallerCC == CalleeCC;
}
-bool
-PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
- SDValue Callee,
- CallingConv::ID CalleeCC,
- ImmutableCallSite CS,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const {
+bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
+ SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
if (DisableSCO && !TailCallOpt) return false;
@@ -4744,15 +4916,22 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
needStackSlotPassParameters(Subtarget, Outs))
return false;
- // No TCO/SCO on indirect call because Caller have to restore its TOC
- if (!isFunctionGlobalAddress(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee))
+ // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
+ // the caller and callee share the same TOC for TCO/SCO. If the caller and
+ // callee potentially have different TOC bases then we cannot tail call since
+ // we need to restore the TOC pointer after the call.
+ // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
+ // We cannot guarantee this for indirect calls or calls to external functions.
+ // When PC-Relative addressing is used, the concept of the TOC is no longer
+ // applicable so this check is not required.
+ // Check first for indirect calls.
+ if (!Subtarget.isUsingPCRelativeCalls() &&
+ !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
return false;
- // If the caller and callee potentially have different TOC bases then we
- // cannot tail call since we need to restore the TOC pointer after the call.
- // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
- if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
+ // Check if we share the TOC base.
+ if (!Subtarget.isUsingPCRelativeCalls() &&
+ !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
return false;
// TCO allows altering callee ABI, so we don't have to check further.
@@ -4764,10 +4943,14 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
// If callee use the same argument list that caller is using, then we can
// apply SCO on this case. If it is not, then we need to check if callee needs
// stack for passing arguments.
- if (!hasSameArgumentList(&Caller, CS) &&
- needStackSlotPassParameters(Subtarget, Outs)) {
+ // PC Relative tail calls may not have a CallBase.
+ // If there is no CallBase we cannot verify if we have the same argument
+ // list so assume that we don't have the same argument list.
+ if (CB && !hasSameArgumentList(&Caller, *CB) &&
+ needStackSlotPassParameters(Subtarget, Outs))
+ return false;
+ else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
return false;
- }
return true;
}
@@ -4876,18 +5059,6 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
MachinePointerInfo::getFixedStack(MF, NewRetAddr));
-
- // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
- // slot as the FP is never overwritten.
- if (Subtarget.isDarwinABI()) {
- int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
- int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
- true);
- SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
- Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), NewFPIdx));
- }
}
return Chain;
}
@@ -4922,14 +5093,6 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
LROpOut = getReturnAddrFrameIndex(DAG);
LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
Chain = SDValue(LROpOut.getNode(), 1);
-
- // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
- // slot as the FP is never overwritten.
- if (Subtarget.isDarwinABI()) {
- FPOpOut = getFramePointerFrameIndex(DAG);
- FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
- Chain = SDValue(FPOpOut.getNode(), 1);
- }
}
return Chain;
}
@@ -4944,9 +5107,9 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
SDValue Chain, ISD::ArgFlagsTy Flags,
SelectionDAG &DAG, const SDLoc &dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
- return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- false, false, false, MachinePointerInfo(),
- MachinePointerInfo());
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
+ Flags.getNonZeroByValAlign(), false, false, false,
+ MachinePointerInfo(), MachinePointerInfo());
}
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
@@ -5097,28 +5260,37 @@ static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
return true;
}
-static unsigned getCallOpcode(bool isIndirectCall, bool isPatchPoint,
- bool isTailCall, const Function &Caller,
+// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
+static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
+ return Subtarget.isAIXABI() ||
+ (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
+}
+
+static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
+ const Function &Caller,
const SDValue &Callee,
const PPCSubtarget &Subtarget,
const TargetMachine &TM) {
- if (isTailCall)
+ if (CFlags.IsTailCall)
return PPCISD::TC_RETURN;
// This is a call through a function pointer.
- if (isIndirectCall) {
+ if (CFlags.IsIndirect) {
// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
// indirect calls. The save of the caller's TOC pointer to the stack will be
// inserted into the DAG as part of call lowering. The restore of the TOC
// pointer is modeled by using a pseudo instruction for the call opcode that
// represents the 2 instruction sequence of an indirect branch and link,
// immediately followed by a load of the TOC pointer from the the stack save
- // slot into gpr2.
- if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
- return PPCISD::BCTRL_LOAD_TOC;
+ // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
+ // as it is not saved or used.
+ return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
+ : PPCISD::BCTRL;
+ }
- // An indirect call that does not need a TOC restore.
- return PPCISD::BCTRL;
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
+ return PPCISD::CALL_NOTOC;
}
// The ABIs that maintain a TOC pointer accross calls need to have a nop
@@ -5136,14 +5308,6 @@ static unsigned getCallOpcode(bool isIndirectCall, bool isPatchPoint,
return PPCISD::CALL;
}
-static bool isValidAIXExternalSymSDNode(StringRef SymName) {
- return StringSwitch<bool>(SymName)
- .Cases("__divdi3", "__fixunsdfdi", "__floatundidf", "__floatundisf",
- "__moddi3", "__udivdi3", "__umoddi3", true)
- .Cases("ceil", "floor", "memcpy", "memmove", "memset", "round", true)
- .Default(false);
-}
-
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
const SDLoc &dl, const PPCSubtarget &Subtarget) {
if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
@@ -5179,14 +5343,14 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
MCSymbolXCOFF *S = cast<MCSymbolXCOFF>(
Context.getOrCreateSymbol(Twine(".") + Twine(FuncName)));
- if (IsDeclaration && !S->hasContainingCsect()) {
+ if (IsDeclaration && !S->hasRepresentedCsectSet()) {
// On AIX, an undefined symbol needs to be associated with a
// MCSectionXCOFF to get the correct storage mapping class.
// In this case, XCOFF::XMC_PR.
MCSectionXCOFF *Sec = Context.getXCOFFSection(
- S->getName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC,
+ S->getSymbolTableName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC,
SectionKind::getMetadata());
- S->setContainingCsect(Sec);
+ S->setRepresentedCsect(Sec);
}
MVT PtrVT =
@@ -5227,12 +5391,7 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
SC);
}
- // TODO: Remove this when the support for ExternalSymbolSDNode is complete.
- if (isValidAIXExternalSymSDNode(SymName)) {
- return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
- }
-
- report_fatal_error("Unexpected ExternalSymbolSDNode: " + Twine(SymName));
+ return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
}
// No transformation needed.
@@ -5270,7 +5429,7 @@ static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
SDValue &Glue, SDValue &Chain,
SDValue CallSeqStart,
- ImmutableCallSite CS, const SDLoc &dl,
+ const CallBase *CB, const SDLoc &dl,
bool hasNest,
const PPCSubtarget &Subtarget) {
// Function pointers in the 64-bit SVR4 ABI do not point to the function
@@ -5306,7 +5465,7 @@ static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
MachineMemOperand::MOInvariant)
: MachineMemOperand::MONone;
- MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
+ MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
// Registers used in building the DAG.
const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
@@ -5360,12 +5519,12 @@ static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
}
static void
-buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
- const SDLoc &dl, bool isTailCall, bool isVarArg,
- bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+buildCallOperands(SmallVectorImpl<SDValue> &Ops,
+ PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
+ SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
- const PPCSubtarget &Subtarget, bool isIndirect) {
+ const PPCSubtarget &Subtarget) {
const bool IsPPC64 = Subtarget.isPPC64();
// MVT for a general purpose register.
const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
@@ -5374,10 +5533,10 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
Ops.push_back(Chain);
// If it's a direct call pass the callee as the second operand.
- if (!isIndirect)
+ if (!CFlags.IsIndirect)
Ops.push_back(Callee);
else {
- assert(!isPatchPoint && "Patch point call are not indirect.");
+ assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
// For the TOC based ABIs, we have saved the TOC pointer to the linkage area
// on the stack (this would have been done in `LowerCall_64SVR4` or
@@ -5386,7 +5545,9 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
// pointer from the linkage area. The operand for the TOC restore is an add
// of the TOC save offset to the stack pointer. This must be the second
// operand: after the chain input but before any other variadic arguments.
- if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+ // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
+ // saved or used.
+ if (isTOCSaveRestoreRequired(Subtarget)) {
const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
@@ -5397,18 +5558,18 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
}
// Add the register used for the environment pointer.
- if (Subtarget.usesFunctionDescriptors() && !hasNest)
+ if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
RegVT));
// Add CTR register as callee so a bctr can be emitted later.
- if (isTailCall)
+ if (CFlags.IsTailCall)
Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
}
// If this is a tail call add stack pointer delta.
- if (isTailCall)
+ if (CFlags.IsTailCall)
Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
// Add argument registers to the end of the list so that they are known live
@@ -5420,17 +5581,18 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
// We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
// no way to mark dependencies as implicit here.
// We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
- if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint)
+ if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
+ !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
- if (isVarArg && Subtarget.is32BitELFABI())
+ if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *Mask =
- TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -5440,44 +5602,47 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
}
SDValue PPCTargetLowering::FinishCall(
- CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
- bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+ CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
+ SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
- if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI())
+ if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
+ Subtarget.isAIXABI())
setUsesTOCBasePtr(DAG);
- const bool isIndirect = isIndirectCall(Callee, DAG, Subtarget, isPatchPoint);
- unsigned CallOpc = getCallOpcode(isIndirect, isPatchPoint, isTailCall,
- DAG.getMachineFunction().getFunction(),
- Callee, Subtarget, DAG.getTarget());
+ unsigned CallOpc =
+ getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
+ Subtarget, DAG.getTarget());
- if (!isIndirect)
+ if (!CFlags.IsIndirect)
Callee = transformCallee(Callee, DAG, dl, Subtarget);
else if (Subtarget.usesFunctionDescriptors())
- prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CS,
- dl, hasNest, Subtarget);
+ prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
+ dl, CFlags.HasNest, Subtarget);
else
prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
// Build the operand list for the call instruction.
SmallVector<SDValue, 8> Ops;
- buildCallOperands(Ops, CallConv, dl, isTailCall, isVarArg, isPatchPoint,
- hasNest, DAG, RegsToPass, Glue, Chain, Callee, SPDiff,
- Subtarget, isIndirect);
+ buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
+ SPDiff, Subtarget);
// Emit tail call.
- if (isTailCall) {
+ if (CFlags.IsTailCall) {
+ // Indirect tail call when using PC Relative calls do not have the same
+ // constraints.
assert(((Callee.getOpcode() == ISD::Register &&
cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
Callee.getOpcode() == ISD::TargetGlobalAddress ||
- isa<ConstantSDNode>(Callee)) &&
- "Expecting a global address, external symbol, absolute value or "
- "register");
+ isa<ConstantSDNode>(Callee) ||
+ (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
+ "Expecting a global address, external symbol, absolute value, "
+ "register or an indirect tail call when PC Relative calls are "
+ "used.");
+ // PC Relative calls also use TC_RETURN as the way to mark tail calls.
assert(CallOpc == PPCISD::TC_RETURN &&
"Unexpected call opcode for a tail call.");
DAG.getMachineFunction().getFrameInfo().setHasTailCall();
@@ -5486,12 +5651,13 @@ SDValue PPCTargetLowering::FinishCall(
std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
Glue = Chain.getValue(1);
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
// PPCFrameLowering::eliminateCallFramePseudoInstr.
- int BytesCalleePops = (CallConv == CallingConv::Fast &&
+ int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
getTargetMachine().Options.GuaranteedTailCallOpt)
? NumBytes
: 0;
@@ -5501,7 +5667,8 @@ SDValue PPCTargetLowering::FinishCall(
Glue, dl);
Glue = Chain.getValue(1);
- return LowerCallResult(Chain, Glue, CallConv, isVarArg, Ins, dl, DAG, InVals);
+ return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
+ DAG, InVals);
}
SDValue
@@ -5518,15 +5685,14 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
bool isPatchPoint = CLI.IsPatchPoint;
- ImmutableCallSite CS = CLI.CS;
+ const CallBase *CB = CLI.CB;
if (isTailCall) {
- if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall()))
+ if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
isTailCall = false;
else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
- isTailCall =
- IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
- isVarArg, Outs, Ins, DAG);
+ isTailCall = IsEligibleForTailCallOptimization_64SVR4(
+ Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
else
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
@@ -5535,21 +5701,23 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!getTargetMachine().Options.GuaranteedTailCallOpt)
++NumSiblingCalls;
- assert(isa<GlobalAddressSDNode>(Callee) &&
+ // PC Relative calls no longer guarantee that the callee is a Global
+ // Address Node. The callee could be an indirect tail call in which
+ // case the SDValue for the callee could be a load (to load the address
+ // of a function pointer) or it may be a register copy (to move the
+ // address of the callee from a function parameter into a virtual
+ // register). It may also be an ExternalSymbolSDNode (ex memcopy).
+ assert((Subtarget.isUsingPCRelativeCalls() ||
+ isa<GlobalAddressSDNode>(Callee)) &&
"Callee should be an llvm::Function object.");
- LLVM_DEBUG(
- const GlobalValue *GV =
- cast<GlobalAddressSDNode>(Callee)->getGlobal();
- const unsigned Width =
- 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
- dbgs() << "TCO caller: "
- << left_justify(DAG.getMachineFunction().getName(), Width)
- << ", callee linkage: " << GV->getVisibility() << ", "
- << GV->getLinkage() << "\n");
+
+ LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
+ << "\nTCO callee: ");
+ LLVM_DEBUG(Callee.dump());
}
}
- if (!isTailCall && CS && CS.isMustTailCall())
+ if (!isTailCall && CB && CB->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
@@ -5560,42 +5728,49 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
!isTailCall)
Callee = LowerGlobalAddress(Callee, DAG);
+ CallFlags CFlags(
+ CallConv, isTailCall, isVarArg, isPatchPoint,
+ isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
+ // hasNest
+ Subtarget.is64BitELFABI() &&
+ any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
+ CLI.NoMerge);
+
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
- return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
+ return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ InVals, CB);
if (Subtarget.isSVR4ABI())
- return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
+ return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ InVals, CB);
if (Subtarget.isAIXABI())
- return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
+ return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ InVals, CB);
- return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
+ return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ InVals, CB);
}
SDValue PPCTargetLowering::LowerCall_32SVR4(
- SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const {
+ const CallBase *CB) const {
// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
// of the 32-bit SVR4 ABI stack frame layout.
+ const CallingConv::ID CallConv = CFlags.CallConv;
+ const bool IsVarArg = CFlags.IsVarArg;
+ const bool IsTailCall = CFlags.IsTailCall;
+
assert((CallConv == CallingConv::C ||
CallConv == CallingConv::Cold ||
CallConv == CallingConv::Fast) && "Unknown calling convention!");
- unsigned PtrByteSize = 4;
+ const Align PtrAlign(4);
MachineFunction &MF = DAG.getMachineFunction();
@@ -5614,15 +5789,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// Assign locations to all of the outgoing arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+ PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
- PtrByteSize);
+ PtrAlign);
if (useSoftFloat())
CCInfo.PreAnalyzeCallOperands(Outs);
- if (isVarArg) {
+ if (IsVarArg) {
// Handle fixed and variable vector arguments differently.
// Fixed vector arguments go into registers as long as registers are
// available. Variable vector arguments always go into memory.
@@ -5657,10 +5832,10 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// Assign locations to all of the outgoing aggregate by value arguments.
SmallVector<CCValAssign, 16> ByValArgLocs;
- CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
+ CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
- CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
@@ -5671,7 +5846,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
- int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+ int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
@@ -5767,7 +5942,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
assert(VA.isMemLoc());
unsigned LocMemOffset = VA.getLocMemOffset();
- if (!isTailCall) {
+ if (!IsTailCall) {
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
StackPtr, PtrOff);
@@ -5796,7 +5971,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// Set CR bit 6 to true if this is a vararg call with floating args passed in
// registers.
- if (isVarArg) {
+ if (IsVarArg) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, InFlag };
@@ -5806,14 +5981,12 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
InFlag = Chain.getValue(1);
}
- if (isTailCall)
+ if (IsTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
- /* unused except on PPC64 ELFv1 */ false, DAG,
- RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
- NumBytes, Ins, InVals, CS);
+ return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
// Copy an argument into memory, being careful to do this outside the
@@ -5834,25 +6007,24 @@ SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
}
SDValue PPCTargetLowering::LowerCall_64SVR4(
- SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const {
+ const CallBase *CB) const {
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
- bool hasNest = false;
bool IsSibCall = false;
+ bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
unsigned PtrByteSize = 8;
MachineFunction &MF = DAG.getMachineFunction();
- if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
+ if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
IsSibCall = true;
// Mark this function as potentially containing a function that contains a
@@ -5860,11 +6032,10 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
- if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CallConv == CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
- assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+ assert(!(IsFastCall && CFlags.IsVarArg) &&
"fastcc not supported on varargs functions");
// Count how many bytes are to be pushed on the stack, including the linkage
@@ -5894,7 +6065,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// can be passed to the callee in registers.
// For the fast calling convention, there is another check below.
// Note: We should keep consistent with LowerFormalArguments_64SVR4()
- bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
+ bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
if (!HasParameterArea) {
unsigned ParamAreaSize = NumGPRs * PtrByteSize;
unsigned AvailableFPRs = NumFPRs;
@@ -5916,7 +6087,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Avoid allocating parameter area for fastcc functions if all the arguments
// can be passed in the registers.
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
HasParameterArea = false;
// Add up all the space actually used.
@@ -5928,7 +6099,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (Flags.isNest())
continue;
- if (CallConv == CallingConv::Fast) {
+ if (IsFastCall) {
if (Flags.isByVal()) {
NumGPRsUsed += (Flags.getByValSize()+7)/8;
if (NumGPRsUsed > NumGPRs)
@@ -5976,9 +6147,9 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
}
/* Respect alignment of argument on the stack. */
- unsigned Align =
- CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
- NumBytes = ((NumBytes + Align - 1) / Align) * Align;
+ auto Alignement =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ NumBytes = alignTo(NumBytes, Alignement);
NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
if (Flags.isInConsecutiveRegsLast())
@@ -6001,8 +6172,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
NumBytes = LinkageSize;
// Tail call needs the stack to be aligned.
- if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CallConv == CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
int SPDiff = 0;
@@ -6010,11 +6180,11 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
if (!IsSibCall)
- SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+ SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
// To protect arguments on the stack from being clobbered in a tail call,
// force all the loads to happen before doing any other lowering.
- if (isTailCall)
+ if (CFlags.IsTailCall)
Chain = DAG.getStackArgumentTokenFactor(Chain);
// Adjust the stack pointer for the new arguments...
@@ -6058,16 +6228,16 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// we'll actually use a stack slot.
auto ComputePtrOff = [&]() {
/* Respect alignment of argument on the stack. */
- unsigned Align =
- CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ auto Alignment =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = alignTo(ArgOffset, Alignment);
PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
};
- if (CallConv != CallingConv::Fast) {
+ if (!IsFastCall) {
ComputePtrOff();
/* Compute GPR index associated with argument offset. */
@@ -6098,7 +6268,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (Size == 0)
continue;
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ComputePtrOff();
// All aggregates smaller than 8 bytes must be passed right-justified.
@@ -6203,7 +6373,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (Flags.isNest()) {
// The 'nest' parameter, if any, is passed in R11.
RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
- hasNest = true;
break;
}
@@ -6213,18 +6382,18 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (GPR_idx != NumGPRs) {
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ComputePtrOff();
assert(HasParameterArea &&
"Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- true, isTailCall, false, MemOpChains,
+ true, CFlags.IsTailCall, false, MemOpChains,
TailCallArguments, dl);
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ArgOffset += PtrByteSize;
}
- if (CallConv != CallingConv::Fast)
+ if (!IsFastCall)
ArgOffset += PtrByteSize;
break;
case MVT::f32:
@@ -6238,7 +6407,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Unnamed arguments for vararg functions always go to GPRs and
// then the parameter save area. For now, put all arguments to vararg
// routines always in both locations (FPR *and* GPR or stack slot).
- bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+ bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
bool NeededLoad = false;
// First load the argument into the next available FPR.
@@ -6248,7 +6417,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Next, load the argument into GPR or stack slot if needed.
if (!NeedGPROrStack)
;
- else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
+ else if (GPR_idx != NumGPRs && !IsFastCall) {
// FIXME: We may want to re-enable this for CallingConv::Fast on the P8
// once we support fp <-> gpr moves.
@@ -6292,7 +6461,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (ArgVal.getNode())
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
} else {
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ComputePtrOff();
// Single-precision floating-point values are mapped to the
@@ -6306,7 +6475,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
assert(HasParameterArea &&
"Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- true, isTailCall, false, MemOpChains,
+ true, CFlags.IsTailCall, false, MemOpChains,
TailCallArguments, dl);
NeededLoad = true;
@@ -6314,7 +6483,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// When passing an array of floats, the array occupies consecutive
// space in the argument area; only round up to the next doubleword
// at the end of the array. Otherwise, each float takes 8 bytes.
- if (CallConv != CallingConv::Fast || NeededLoad) {
+ if (!IsFastCall || NeededLoad) {
ArgOffset += (Arg.getValueType() == MVT::f32 &&
Flags.isInConsecutiveRegs()) ? 4 : 8;
if (Flags.isInConsecutiveRegsLast())
@@ -6339,7 +6508,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// usual; unnamed arguments always go to the stack or the corresponding
// GPRs when within range. For now, we always put the value in both
// locations (or even all three).
- if (isVarArg) {
+ if (CFlags.IsVarArg) {
assert(HasParameterArea &&
"Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
@@ -6371,19 +6540,19 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (VR_idx != NumVRs) {
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
} else {
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ComputePtrOff();
assert(HasParameterArea &&
"Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- true, isTailCall, true, MemOpChains,
+ true, CFlags.IsTailCall, true, MemOpChains,
TailCallArguments, dl);
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ArgOffset += 16;
}
- if (CallConv != CallingConv::Fast)
+ if (!IsFastCall)
ArgOffset += 16;
break;
} // not QPX
@@ -6395,7 +6564,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
case MVT::v4f64:
case MVT::v4i1: {
bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
- if (isVarArg) {
+ if (CFlags.IsVarArg) {
assert(HasParameterArea &&
"Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
@@ -6427,19 +6596,19 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (QFPR_idx != NumQFPRs) {
RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
} else {
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ComputePtrOff();
assert(HasParameterArea &&
"Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- true, isTailCall, true, MemOpChains,
+ true, CFlags.IsTailCall, true, MemOpChains,
TailCallArguments, dl);
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ArgOffset += (IsF32 ? 16 : 32);
}
- if (CallConv != CallingConv::Fast)
+ if (!IsFastCall)
ArgOffset += (IsF32 ? 16 : 32);
break;
}
@@ -6456,23 +6625,26 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Check if this is an indirect call (MTCTR/BCTRL).
// See prepareDescriptorIndirectCall and buildCallOperands for more
// information about calls through function pointers in the 64-bit SVR4 ABI.
- if (!isTailCall && !isPatchPoint &&
- !isFunctionGlobalAddress(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee)) {
- // Load r2 into a virtual register and store it to the TOC save area.
- setUsesTOCBasePtr(DAG);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
- // TOC save area offset.
- unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
- SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- Chain = DAG.getStore(
- Val.getValue(1), dl, Val, AddPtr,
- MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
+ if (CFlags.IsIndirect) {
+ // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
+ // caller in the TOC save area.
+ if (isTOCSaveRestoreRequired(Subtarget)) {
+ assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
+ // Load r2 into a virtual register and store it to the TOC save area.
+ setUsesTOCBasePtr(DAG);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+ // TOC save area offset.
+ unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
+ MachinePointerInfo::getStack(
+ DAG.getMachineFunction(), TOCSaveOffset));
+ }
// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
// This does not mean the MTCTR instruction must use R12; it's easier
// to model this as an extra parameter, so do that.
- if (isELFv2ABI && !isPatchPoint)
+ if (isELFv2ABI && !CFlags.IsPatchPoint)
RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
@@ -6485,23 +6657,21 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
InFlag = Chain.getValue(1);
}
- if (isTailCall && !IsSibCall)
+ if (CFlags.IsTailCall && !IsSibCall)
PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
- DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
- SPDiff, NumBytes, Ins, InVals, CS);
+ return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
SDValue PPCTargetLowering::LowerCall_Darwin(
- SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const {
+ const CallBase *CB) const {
unsigned NumOps = Outs.size();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -6516,7 +6686,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CallConv == CallingConv::Fast)
+ CFlags.CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
// Count how many bytes are to be pushed on the stack, including the linkage
@@ -6539,7 +6709,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
- if (!isVarArg && !isPPC64) {
+ if (!CFlags.IsVarArg && !isPPC64) {
// Non-varargs Altivec parameters go after all the non-Altivec
// parameters; handle those later so we know how much padding we need.
nAltivecParamsAtEnd++;
@@ -6566,16 +6736,16 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
// Tail call needs the stack to be aligned.
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CallConv == CallingConv::Fast)
+ CFlags.CallConv == CallingConv::Fast)
NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
- int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+ int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
// To protect arguments on the stack from being clobbered in a tail call,
// force all the loads to happen before doing any other lowering.
- if (isTailCall)
+ if (CFlags.IsTailCall)
Chain = DAG.getStackArgumentTokenFactor(Chain);
// Adjust the stack pointer for the new arguments...
@@ -6711,7 +6881,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, false, MemOpChains,
+ isPPC64, CFlags.IsTailCall, false, MemOpChains,
TailCallArguments, dl);
}
ArgOffset += PtrByteSize;
@@ -6721,7 +6891,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
if (FPR_idx != NumFPRs) {
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
- if (isVarArg) {
+ if (CFlags.IsVarArg) {
SDValue Store =
DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
MemOpChains.push_back(Store);
@@ -6753,7 +6923,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
}
} else
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, false, MemOpChains,
+ isPPC64, CFlags.IsTailCall, false, MemOpChains,
TailCallArguments, dl);
if (isPPC64)
ArgOffset += 8;
@@ -6764,7 +6934,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
- if (isVarArg) {
+ if (CFlags.IsVarArg) {
// These go aligned on the stack, or in the corresponding R registers
// when within range. The Darwin PPC ABI doc claims they also go in
// V registers; in fact gcc does this only for arguments that are
@@ -6810,7 +6980,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
} else if (nAltivecParamsAtEnd==0) {
// We are emitting Altivec params in order.
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, true, MemOpChains,
+ isPPC64, CFlags.IsTailCall, true, MemOpChains,
TailCallArguments, dl);
ArgOffset += 16;
}
@@ -6822,7 +6992,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
// don't track this here because nobody below needs it.
// If there are more Altivec parameters than fit in registers emit
// the stores here.
- if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+ if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
unsigned j = 0;
// Offset is aligned; skip 1st 12 params which go in V registers.
ArgOffset = ((ArgOffset+15)/16)*16;
@@ -6836,7 +7006,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
SDValue PtrOff;
// We are emitting Altivec params in order.
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, true, MemOpChains,
+ isPPC64, CFlags.IsTailCall, true, MemOpChains,
TailCallArguments, dl);
ArgOffset += 16;
}
@@ -6850,12 +7020,11 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
// On Darwin, R12 must contain the address of an indirect callee. This does
// not mean the MTCTR instruction must use R12; it's easier to model this as
// an extra parameter, so do that.
- if (!isTailCall &&
- !isFunctionGlobalAddress(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee) &&
- !isBLACompatibleAddress(Callee, DAG))
+ if (CFlags.IsIndirect) {
+ assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
PPC::R12), Callee));
+ }
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
@@ -6866,37 +7035,37 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
InFlag = Chain.getValue(1);
}
- if (isTailCall)
+ if (CFlags.IsTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
- /* unused except on PPC64 ELFv1 */ false, DAG,
- RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
- NumBytes, Ins, InVals, CS);
+ return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State) {
+ const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
+ State.getMachineFunction().getSubtarget());
+ const bool IsPPC64 = Subtarget.isPPC64();
+ const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
+ const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+
+ assert((!ValVT.isInteger() ||
+ (ValVT.getSizeInBits() <= RegVT.getSizeInBits())) &&
+ "Integer argument exceeds register size: should have been legalized");
+
if (ValVT == MVT::f128)
report_fatal_error("f128 is unimplemented on AIX.");
- if (ArgFlags.isByVal())
- report_fatal_error("Passing structure by value is unimplemented.");
-
if (ArgFlags.isNest())
report_fatal_error("Nest arguments are unimplemented.");
if (ValVT.isVector() || LocVT.isVector())
report_fatal_error("Vector arguments are unimplemented on AIX.");
- const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
- State.getMachineFunction().getSubtarget());
- const bool IsPPC64 = Subtarget.isPPC64();
- const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
-
static const MCPhysReg GPR_32[] = {// 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10};
@@ -6904,6 +7073,38 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+ if (ArgFlags.isByVal()) {
+ if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
+ report_fatal_error("Pass-by-value arguments with alignment greater than "
+ "register width are not supported.");
+
+ const unsigned ByValSize = ArgFlags.getByValSize();
+
+ // An empty aggregate parameter takes up no storage and no registers,
+ // but needs a MemLoc for a stack slot for the formal arguments side.
+ if (ByValSize == 0) {
+ State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
+ State.getNextStackOffset(), RegVT,
+ LocInfo));
+ return false;
+ }
+
+ const unsigned StackSize = alignTo(ByValSize, PtrAlign);
+ unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
+ for (const unsigned E = Offset + StackSize; Offset < E;
+ Offset += PtrAlign.value()) {
+ if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+ else {
+ State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
+ Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
+ LocInfo));
+ break;
+ }
+ }
+ return false;
+ }
+
// Arguments always reserve parameter save area.
switch (ValVT.SimpleTy) {
default:
@@ -6913,49 +7114,55 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
assert(IsPPC64 && "PPC32 should have split i64 values.");
LLVM_FALLTHROUGH;
case MVT::i1:
- case MVT::i32:
- State.AllocateStack(PtrByteSize, PtrByteSize);
- if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
- MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
- // Promote integers if needed.
- if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
- LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
- : CCValAssign::LocInfo::ZExt;
+ case MVT::i32: {
+ const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
+ // AIX integer arguments are always passed in register width.
+ if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
+ LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
+ : CCValAssign::LocInfo::ZExt;
+ if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
- }
else
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
- return false;
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
+ return false;
+ }
case MVT::f32:
case MVT::f64: {
// Parameter save area (PSA) is reserved even if the float passes in fpr.
const unsigned StoreSize = LocVT.getStoreSize();
// Floats are always 4-byte aligned in the PSA on AIX.
// This includes f64 in 64-bit mode for ABI compatibility.
- State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
- if (unsigned Reg = State.AllocateReg(FPR))
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- else
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
-
- // AIX requires that GPRs are reserved for float arguments.
- // Successfully reserved GPRs are only initialized for vararg calls.
- MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
- for (unsigned I = 0; I < StoreSize; I += PtrByteSize) {
+ const unsigned Offset =
+ State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
+ unsigned FReg = State.AllocateReg(FPR);
+ if (FReg)
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
+
+ // Reserve and initialize GPRs or initialize the PSA as required.
+ for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+ assert(FReg && "An FPR should be available when a GPR is reserved.");
if (State.isVarArg()) {
+ // Successfully reserved GPRs are only initialized for vararg calls.
// Custom handling is required for:
// f64 in PPC32 needs to be split into 2 GPRs.
// f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
State.addLoc(
CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
}
- } else if (State.isVarArg()) {
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
+ } else {
+ // If there are insufficient GPRs, the PSA needs to be initialized.
+ // Initialization occurs even if an FPR was initialized for
+ // compatibility with the AIX XL compiler. The full memory for the
+ // argument will be initialized even if a prior word is saved in GPR.
+ // A custom memLoc is used when the argument also passes in FPR so
+ // that the callee handling can skip over it easily.
+ State.addLoc(
+ FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
+ LocInfo)
+ : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ break;
}
}
@@ -7000,6 +7207,64 @@ static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
}
+static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
+ const unsigned LASize = FL->getLinkageSize();
+
+ if (PPC::GPRCRegClass.contains(Reg)) {
+ assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
+ "Reg must be a valid argument register!");
+ return LASize + 4 * (Reg - PPC::R3);
+ }
+
+ if (PPC::G8RCRegClass.contains(Reg)) {
+ assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
+ "Reg must be a valid argument register!");
+ return LASize + 8 * (Reg - PPC::X3);
+ }
+
+ llvm_unreachable("Only general purpose registers expected.");
+}
+
+// AIX ABI Stack Frame Layout:
+//
+// Low Memory +--------------------------------------------+
+// SP +---> | Back chain | ---+
+// | +--------------------------------------------+ |
+// | | Saved Condition Register | |
+// | +--------------------------------------------+ |
+// | | Saved Linkage Register | |
+// | +--------------------------------------------+ | Linkage Area
+// | | Reserved for compilers | |
+// | +--------------------------------------------+ |
+// | | Reserved for binders | |
+// | +--------------------------------------------+ |
+// | | Saved TOC pointer | ---+
+// | +--------------------------------------------+
+// | | Parameter save area |
+// | +--------------------------------------------+
+// | | Alloca space |
+// | +--------------------------------------------+
+// | | Local variable space |
+// | +--------------------------------------------+
+// | | Float/int conversion temporary |
+// | +--------------------------------------------+
+// | | Save area for AltiVec registers |
+// | +--------------------------------------------+
+// | | AltiVec alignment padding |
+// | +--------------------------------------------+
+// | | Save area for VRSAVE register |
+// | +--------------------------------------------+
+// | | Save area for General Purpose registers |
+// | +--------------------------------------------+
+// | | Save area for Floating Point registers |
+// | +--------------------------------------------+
+// +---- | Back chain |
+// High Memory +--------------------------------------------+
+//
+// Specifications:
+// AIX 7.2 Assembler Language Reference
+// Subroutine linkage convention
+
SDValue PPCTargetLowering::LowerFormalArguments_AIX(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -7009,9 +7274,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
CallConv == CallingConv::Fast) &&
"Unexpected calling convention!");
- if (isVarArg)
- report_fatal_error("This call type is unimplemented on AIX.");
-
if (getTargetMachine().Options.GuaranteedTailCallOpt)
report_fatal_error("Tail call support is unimplemented on AIX.");
@@ -7029,67 +7291,214 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+ const EVT PtrVT = getPointerTy(MF.getDataLayout());
// Reserve space for the linkage area on the stack.
const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
- // On AIX a minimum of 8 words is saved to the parameter save area.
- const unsigned MinParameterSaveArea = 8 * PtrByteSize;
- CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize);
+ CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDValue ArgValue;
- ISD::ArgFlagsTy Flags = Ins[i].Flags;
- if (VA.isRegLoc()) {
- EVT ValVT = VA.getValVT();
- MVT LocVT = VA.getLocVT();
+ SmallVector<SDValue, 8> MemOps;
+
+ for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
+ CCValAssign &VA = ArgLocs[I++];
+ MVT LocVT = VA.getLocVT();
+ ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
+
+ // For compatibility with the AIX XL compiler, the float args in the
+ // parameter save area are initialized even if the argument is available
+ // in register. The caller is required to initialize both the register
+ // and memory, however, the callee can choose to expect it in either.
+ // The memloc is dismissed here because the argument is retrieved from
+ // the register.
+ if (VA.isMemLoc() && VA.needsCustom())
+ continue;
+
+ if (Flags.isByVal() && VA.isMemLoc()) {
+ const unsigned Size =
+ alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
+ PtrByteSize);
+ const int FI = MF.getFrameInfo().CreateFixedObject(
+ Size, VA.getLocMemOffset(), /* IsImmutable */ false,
+ /* IsAliased */ true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+
+ continue;
+ }
+
+ if (Flags.isByVal()) {
+ assert(VA.isRegLoc() && "MemLocs should already be handled.");
+
+ const MCPhysReg ArgReg = VA.getLocReg();
+ const PPCFrameLowering *FL = Subtarget.getFrameLowering();
+
+ if (Flags.getNonZeroByValAlign() > PtrByteSize)
+ report_fatal_error("Over aligned byvals not supported yet.");
+
+ const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
+ const int FI = MF.getFrameInfo().CreateFixedObject(
+ StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
+ /* IsAliased */ true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+
+ // Add live ins for all the RegLocs for the same ByVal.
+ const TargetRegisterClass *RegClass =
+ IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+
+ auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
+ unsigned Offset) {
+ const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
+ // Since the callers side has left justified the aggregate in the
+ // register, we can simply store the entire register into the stack
+ // slot.
+ SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+ // The store to the fixedstack object is needed becuase accessing a
+ // field of the ByVal will use a gep and load. Ideally we will optimize
+ // to extracting the value from the register directly, and elide the
+ // stores when the arguments address is not taken, but that will need to
+ // be future work.
+ SDValue Store =
+ DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom,
+ DAG.getObjectPtrOffset(dl, FIN, Offset),
+ MachinePointerInfo::getFixedStack(MF, FI, Offset));
+
+ MemOps.push_back(Store);
+ };
+
+ unsigned Offset = 0;
+ HandleRegLoc(VA.getLocReg(), Offset);
+ Offset += PtrByteSize;
+ for (; Offset != StackSize && ArgLocs[I].isRegLoc();
+ Offset += PtrByteSize) {
+ assert(ArgLocs[I].getValNo() == VA.getValNo() &&
+ "RegLocs should be for ByVal argument.");
+
+ const CCValAssign RL = ArgLocs[I++];
+ HandleRegLoc(RL.getLocReg(), Offset);
+ }
+
+ if (Offset != StackSize) {
+ assert(ArgLocs[I].getValNo() == VA.getValNo() &&
+ "Expected MemLoc for remaining bytes.");
+ assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
+ // Consume the MemLoc.The InVal has already been emitted, so nothing
+ // more needs to be done.
+ ++I;
+ }
+
+ continue;
+ }
+
+ EVT ValVT = VA.getValVT();
+ if (VA.isRegLoc() && !VA.needsCustom()) {
MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
unsigned VReg =
MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
- ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
if (ValVT.isScalarInteger() &&
(ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
ArgValue =
truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
}
InVals.push_back(ArgValue);
- } else {
- report_fatal_error("Handling of formal arguments on the stack is "
- "unimplemented!");
+ continue;
+ }
+ if (VA.isMemLoc()) {
+ const unsigned LocSize = LocVT.getStoreSize();
+ const unsigned ValSize = ValVT.getStoreSize();
+ assert((ValSize <= LocSize) &&
+ "Object size is larger than size of MemLoc");
+ int CurArgOffset = VA.getLocMemOffset();
+ // Objects are right-justified because AIX is big-endian.
+ if (LocSize > ValSize)
+ CurArgOffset += LocSize - ValSize;
+ // Potential tail calls could cause overwriting of argument stack slots.
+ const bool IsImmutable =
+ !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue ArgValue =
+ DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
+ InVals.push_back(ArgValue);
+ continue;
}
}
+ // On AIX a minimum of 8 words is saved to the parameter save area.
+ const unsigned MinParameterSaveArea = 8 * PtrByteSize;
// Area that is at least reserved in the caller of this function.
- unsigned MinReservedArea = CCInfo.getNextStackOffset();
+ unsigned CallerReservedArea =
+ std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
// Set the size that is at least reserved in caller of this function. Tail
// call optimized function's reserved stack space needs to be aligned so
// that taking the difference between two stack areas will result in an
// aligned stack.
- MinReservedArea =
- EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
+ CallerReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- FuncInfo->setMinReservedArea(MinReservedArea);
+ FuncInfo->setMinReservedArea(CallerReservedArea);
+
+ if (isVarArg) {
+ FuncInfo->setVarArgsFrameIndex(
+ MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10};
+
+ static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+ const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
+
+ // The fixed integer arguments of a variadic function are stored to the
+ // VarArgsFrameIndex on the stack so that they may be loaded by
+ // dereferencing the result of va_next.
+ for (unsigned GPRIndex =
+ (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
+ GPRIndex < NumGPArgRegs; ++GPRIndex) {
+
+ const unsigned VReg =
+ IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
+ : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
+ MemOps.push_back(Store);
+ // Increment the address for the next argument to store.
+ SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return Chain;
}
SDValue PPCTargetLowering::LowerCall_AIX(
- SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const {
+ const CallBase *CB) const {
+ // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
+ // AIX ABI stack frame layout.
- assert((CallConv == CallingConv::C ||
- CallConv == CallingConv::Cold ||
- CallConv == CallingConv::Fast) && "Unexpected calling convention!");
+ assert((CFlags.CallConv == CallingConv::C ||
+ CFlags.CallConv == CallingConv::Cold ||
+ CFlags.CallConv == CallingConv::Fast) &&
+ "Unexpected calling convention!");
- if (isPatchPoint)
+ if (CFlags.IsPatchPoint)
report_fatal_error("This call type is unimplemented on AIX.");
const PPCSubtarget& Subtarget =
@@ -7101,7 +7510,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+ CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
+ *DAG.getContext());
// Reserve space for the linkage save area (LSA) on the stack.
// In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
@@ -7109,8 +7519,9 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
const bool IsPPC64 = Subtarget.isPPC64();
+ const EVT PtrVT = getPointerTy(DAG.getDataLayout());
const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
- CCInfo.AllocateStack(LinkageSize, PtrByteSize);
+ CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
// The prolog code of the callee may store up to 8 GPR argument registers to
@@ -7120,7 +7531,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
- const unsigned NumBytes = LinkageSize + MinParameterSaveAreaSize;
+ const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
+ CCInfo.getNextStackOffset());
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass.
@@ -7128,77 +7540,192 @@ SDValue PPCTargetLowering::LowerCall_AIX(
SDValue CallSeqStart = Chain;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Set up a copy of the stack pointer for loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
+ : DAG.getRegister(PPC::R1, MVT::i32);
for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
- CCValAssign &VA = ArgLocs[I++];
+ const unsigned ValNo = ArgLocs[I].getValNo();
+ SDValue Arg = OutVals[ValNo];
+ ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
- if (VA.isMemLoc())
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
- if (!VA.isRegLoc())
- report_fatal_error(
- "Unexpected non-register location for function call argument.");
+ if (Flags.isByVal()) {
+ const unsigned ByValSize = Flags.getByValSize();
- SDValue Arg = OutVals[VA.getValNo()];
+ // Nothing to do for zero-sized ByVals on the caller side.
+ if (!ByValSize) {
+ ++I;
+ continue;
+ }
- if (!VA.needsCustom()) {
- switch (VA.getLocInfo()) {
- default:
- report_fatal_error("Unexpected argument extension type.");
- case CCValAssign::Full:
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
- break;
+ auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
+ return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
+ (LoadOffset != 0)
+ ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
+ : Arg,
+ MachinePointerInfo(), VT);
+ };
+
+ unsigned LoadOffset = 0;
+
+ // Initialize registers, which are fully occupied by the by-val argument.
+ while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
+ SDValue Load = GetLoad(PtrVT, LoadOffset);
+ MemOpChains.push_back(Load.getValue(1));
+ LoadOffset += PtrByteSize;
+ const CCValAssign &ByValVA = ArgLocs[I++];
+ assert(ByValVA.getValNo() == ValNo &&
+ "Unexpected location for pass-by-value argument.");
+ RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
}
+
+ if (LoadOffset == ByValSize)
+ continue;
+
+ // There must be one more loc to handle the remainder.
+ assert(ArgLocs[I].getValNo() == ValNo &&
+ "Expected additional location for by-value argument.");
+
+ if (ArgLocs[I].isMemLoc()) {
+ assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
+ const CCValAssign &ByValVA = ArgLocs[I++];
+ ISD::ArgFlagsTy MemcpyFlags = Flags;
+ // Only memcpy the bytes that don't pass in register.
+ MemcpyFlags.setByValSize(ByValSize - LoadOffset);
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(
+ (LoadOffset != 0) ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
+ : Arg,
+ DAG.getObjectPtrOffset(dl, StackPtr, ByValVA.getLocMemOffset()),
+ CallSeqStart, MemcpyFlags, DAG, dl);
+ continue;
+ }
+
+ // Initialize the final register residue.
+ // Any residue that occupies the final by-val arg register must be
+ // left-justified on AIX. Loads must be a power-of-2 size and cannot be
+ // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
+ // 2 and 1 byte loads.
+ const unsigned ResidueBytes = ByValSize % PtrByteSize;
+ assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
+ "Unexpected register residue for by-value argument.");
+ SDValue ResidueVal;
+ for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
+ const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
+ const MVT VT =
+ N == 1 ? MVT::i8
+ : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
+ SDValue Load = GetLoad(VT, LoadOffset);
+ MemOpChains.push_back(Load.getValue(1));
+ LoadOffset += N;
+ Bytes += N;
+
+ // By-val arguments are passed left-justfied in register.
+ // Every load here needs to be shifted, otherwise a full register load
+ // should have been used.
+ assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
+ "Unexpected load emitted during handling of pass-by-value "
+ "argument.");
+ unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
+ EVT ShiftAmountTy =
+ getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
+ SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
+ SDValue ShiftedLoad =
+ DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
+ ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
+ ShiftedLoad)
+ : ShiftedLoad;
+ }
+
+ const CCValAssign &ByValVA = ArgLocs[I++];
+ RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
+ continue;
+ }
+
+ CCValAssign &VA = ArgLocs[I++];
+ const MVT LocVT = VA.getLocVT();
+ const MVT ValVT = VA.getValVT();
+
+ switch (VA.getLocInfo()) {
+ default:
+ report_fatal_error("Unexpected argument extension type.");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc() && !VA.needsCustom()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ if (VA.isMemLoc()) {
+ SDValue PtrOff =
+ DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ MemOpChains.push_back(
+ DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
continue;
}
// Custom handling is used for GPR initializations for vararg float
// arguments.
- assert(isVarArg && VA.getValVT().isFloatingPoint() &&
- VA.getLocVT().isInteger() &&
- "Unexpected custom register handling for calling convention.");
+ assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
+ ValVT.isFloatingPoint() && LocVT.isInteger() &&
+ "Unexpected register handling for calling convention.");
SDValue ArgAsInt =
- DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
+ DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
- if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize())
+ if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
// f32 in 32-bit GPR
// f64 in 64-bit GPR
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
- else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits())
+ else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits())
// f32 in 64-bit GPR.
RegsToPass.push_back(std::make_pair(
- VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT())));
+ VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
else {
// f64 in two 32-bit GPRs
// The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
- assert(Arg.getValueType() == MVT::f64 && isVarArg && !IsPPC64 &&
+ assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
"Unexpected custom register for argument!");
CCValAssign &GPR1 = VA;
SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
DAG.getConstant(32, dl, MVT::i8));
RegsToPass.push_back(std::make_pair(
GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
- assert(I != E && "A second custom GPR is expected!");
- CCValAssign &GPR2 = ArgLocs[I++];
- assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() &&
- GPR2.needsCustom() && "A second custom GPR is expected!");
- RegsToPass.push_back(std::make_pair(
- GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+
+ if (I != E) {
+ // If only 1 GPR was available, there will only be one custom GPR and
+ // the argument will also pass in memory.
+ CCValAssign &PeekArg = ArgLocs[I];
+ if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
+ assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
+ CCValAssign &GPR2 = ArgLocs[I++];
+ RegsToPass.push_back(std::make_pair(
+ GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+ }
+ }
}
}
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
// For indirect calls, we need to save the TOC base to the stack for
// restoration after the call.
- if (!isTailCall && !isPatchPoint &&
- !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) {
+ if (CFlags.IsIndirect) {
+ assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
@@ -7224,10 +7751,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
}
const int SPDiff = 0;
- return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
- /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass,
- InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins,
- InVals, CS);
+ return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
bool
@@ -7299,25 +7824,6 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const MCPhysReg *I =
- TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
- if (I) {
- for (; *I; ++I) {
-
- if (PPC::G8RCRegClass.contains(*I))
- RetOps.push_back(DAG.getRegister(*I, MVT::i64));
- else if (PPC::F8RCRegClass.contains(*I))
- RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
- else if (PPC::CRRCRegClass.contains(*I))
- RetOps.push_back(DAG.getRegister(*I, MVT::i1));
- else if (PPC::VRRCRegClass.contains(*I))
- RetOps.push_back(DAG.getRegister(*I, MVT::Other));
- else
- llvm_unreachable("Unexpected register class in CSRsViaCopy!");
- }
- }
-
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
@@ -7419,6 +7925,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
@@ -7431,9 +7938,10 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
DAG.getConstant(0, dl, PtrVT), Size);
// Construct a node for the frame pointer save index.
SDValue FPSIdx = getFramePointerFrameIndex(DAG);
- // Build a DYNALLOC node.
SDValue Ops[3] = { Chain, NegSize, FPSIdx };
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+ if (hasInlineStackProbe(MF))
+ return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
}
@@ -7582,15 +8090,6 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
!Op.getOperand(2).getValueType().isFloatingPoint())
return Op;
- bool HasNoInfs = DAG.getTarget().Options.NoInfsFPMath;
- bool HasNoNaNs = DAG.getTarget().Options.NoNaNsFPMath;
- // We might be able to do better than this under some circumstances, but in
- // general, fsel-based lowering of select is a finite-math-only optimization.
- // For more information, see section F.3 of the 2.06 ISA specification.
- // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the
- // presence of infinities.
- if (!Subtarget.hasP9Vector() && (!HasNoInfs || !HasNoNaNs))
- return Op;
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
EVT ResVT = Op.getValueType();
@@ -7598,14 +8097,14 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
SDLoc dl(Op);
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ // We have xsmaxcdp/xsmincdp which are OK to emit even in the
+ // presence of infinities.
if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
switch (CC) {
default:
- // Not a min/max but with finite math, we may still be able to use fsel.
- if (HasNoInfs && HasNoNaNs)
- break;
- return Op;
+ break;
case ISD::SETOGT:
case ISD::SETGT:
return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
@@ -7615,10 +8114,13 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
}
- // TODO: Propagate flags from the select rather than global settings.
- SDNodeFlags Flags;
- Flags.setNoInfs(true);
- Flags.setNoNaNs(true);
+ // We might be able to do better than this under some circumstances, but in
+ // general, fsel-based lowering of select is a finite-math-only optimization.
+ // For more information, see section F.3 of the 2.06 ISA specification.
+ // With ISA 3.0
+ if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
+ (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
+ return Op;
// If the RHS of the comparison is a 0.0, we don't need to do the
// subtraction at all.
@@ -7738,12 +8240,12 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
// Emit a store to the stack slot.
SDValue Chain;
- unsigned Alignment = DAG.getEVTAlignment(Tmp.getValueType());
+ Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
if (i32Stack) {
MachineFunction &MF = DAG.getMachineFunction();
- Alignment = 4;
+ Alignment = Align(4);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
@@ -7803,7 +8305,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const {
// FP to INT conversions are legal for f128.
- if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128))
+ if (Op->getOperand(0).getValueType() == MVT::f128)
return Op;
// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
@@ -7899,7 +8401,7 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
RLI.MPI = LD->getPointerInfo();
RLI.IsDereferenceable = LD->isDereferenceable();
RLI.IsInvariant = LD->isInvariant();
- RLI.Alignment = LD->getAlignment();
+ RLI.Alignment = LD->getAlign();
RLI.AAInfo = LD->getAAInfo();
RLI.Ranges = LD->getRanges();
@@ -8043,16 +8545,19 @@ SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
SDValue ShuffleSrc2 =
SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
- unsigned ExtendOp =
- SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
SDValue Extend;
- if (!Subtarget.hasP9Altivec() && SignedConv) {
+ if (SignedConv) {
Arrange = DAG.getBitcast(IntermediateVT, Arrange);
+ EVT ExtVT = Op.getOperand(0).getValueType();
+ if (Subtarget.hasP9Altivec())
+ ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
+ IntermediateVT.getVectorNumElements());
+
Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
- DAG.getValueType(Op.getOperand(0).getValueType()));
+ DAG.getValueType(ExtVT));
} else
- Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
+ Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
}
@@ -8068,7 +8573,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
return LowerINT_TO_FPVector(Op, DAG, dl);
// Conversions to f128 are legal.
- if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
+ if (Op.getValueType() == MVT::f128)
return Op;
if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
@@ -8163,8 +8668,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SINT, DAG.getConstant(53, dl, MVT::i32));
Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
Cond, DAG.getConstant(1, dl, MVT::i64));
- Cond = DAG.getSetCC(dl, MVT::i32,
- Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
+ Cond = DAG.getSetCC(
+ dl,
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
+ Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
}
@@ -8205,7 +8712,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
MachineFrameInfo &MFI = MF.getFrameInfo();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- int FrameIdx = MFI.CreateStackObject(4, 4, false);
+ int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Store =
@@ -8220,7 +8727,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
RLI.Chain = Store;
RLI.MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
- RLI.Alignment = 4;
+ RLI.Alignment = Align(4);
MachineMemOperand *MMO =
MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
@@ -8257,7 +8764,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
bool ReusingLoad;
if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
DAG))) {
- int FrameIdx = MFI.CreateStackObject(4, 4, false);
+ int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Store =
@@ -8272,7 +8779,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
RLI.Chain = Store;
RLI.MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
- RLI.Alignment = 4;
+ RLI.Alignment = Align(4);
}
MachineMemOperand *MMO =
@@ -8289,7 +8796,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
assert(Subtarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
- int FrameIdx = MFI.CreateStackObject(8, 8, false);
+ int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
@@ -8341,22 +8848,20 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
EVT PtrVT = getPointerTy(MF.getDataLayout());
// Save FP Control Word to register
- EVT NodeTys[] = {
- MVT::f64, // return register
- MVT::Glue // unused in this context
- };
- SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
+ SDValue Chain = Op.getOperand(0);
+ SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
+ Chain = MFFS.getValue(1);
// Save FP register to stack slot
- int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
+ int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
- MachinePointerInfo());
+ Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
// Load FP Control Word from low 32 bits of stack slot.
SDValue Four = DAG.getConstant(4, dl, PtrVT);
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
- SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
+ SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
+ Chain = CWD.getValue(1);
// Transform as necessary
SDValue CWD1 =
@@ -8373,8 +8878,11 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue RetVal =
DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
- return DAG.getNode((VT.getSizeInBits() < 16 ?
- ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+ RetVal =
+ DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
+ dl, VT, RetVal);
+
+ return DAG.getMergeValues({RetVal, Chain}, dl);
}
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
@@ -8468,19 +8976,21 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
// Vector related lowering.
//
-/// BuildSplatI - Build a canonical splati of Val with an element size of
-/// SplatSize. Cast the result to VT.
-static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
- SelectionDAG &DAG, const SDLoc &dl) {
+/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
+/// element size of SplatSize. Cast the result to VT.
+static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
+ SelectionDAG &DAG, const SDLoc &dl) {
static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
- // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
- if (Val == -1)
+ // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
+ if (Val == ((1LU << (SplatSize * 8)) - 1)) {
SplatSize = 1;
+ Val = 0xFF;
+ }
EVT CanonicalVT = VTys[SplatSize-1];
@@ -8591,10 +9101,9 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Op0 = Op->getOperand(0);
- if (!EnableQuadPrecision ||
- (Op.getValueType() != MVT::f128 ) ||
+ if ((Op.getValueType() != MVT::f128) ||
(Op0.getOpcode() != ISD::BUILD_PAIR) ||
- (Op0.getOperand(0).getValueType() != MVT::i64) ||
+ (Op0.getOperand(0).getValueType() != MVT::i64) ||
(Op0.getOperand(1).getValueType() != MVT::i64))
return SDValue();
@@ -8606,7 +9115,8 @@ static const SDValue *getNormalLoadInput(const SDValue &Op) {
const SDValue *InputLoad = &Op;
if (InputLoad->getOpcode() == ISD::BITCAST)
InputLoad = &InputLoad->getOperand(0);
- if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
+ InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED)
InputLoad = &InputLoad->getOperand(0);
if (InputLoad->getOpcode() != ISD::LOAD)
return nullptr;
@@ -8614,6 +9124,34 @@ static const SDValue *getNormalLoadInput(const SDValue &Op) {
return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
}
+// Convert the argument APFloat to a single precision APFloat if there is no
+// loss in information during the conversion to single precision APFloat and the
+// resulting number is not a denormal number. Return true if successful.
+bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
+ APFloat APFloatToConvert = ArgAPFloat;
+ bool LosesInfo = true;
+ APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+ &LosesInfo);
+ bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
+ if (Success)
+ ArgAPFloat = APFloatToConvert;
+ return Success;
+}
+
+// Bitcast the argument APInt to a double and convert it to a single precision
+// APFloat, bitcast the APFloat to an APInt and assign it to the original
+// argument if there is no loss in information during the conversion from
+// double to single precision APFloat and the resulting number is not a denormal
+// number. Return true if successful.
+bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
+ double DpValue = ArgAPInt.bitsToDouble();
+ APFloat APFloatDp(DpValue);
+ bool Success = convertToNonDenormSingle(APFloatDp);
+ if (Success)
+ ArgAPInt = APFloatDp.bitcastToAPInt();
+ return Success;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -8630,7 +9168,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// then convert it to a floating-point vector and compare it
// to a zero vector to get the boolean result.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, 16, false);
+ int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -8665,8 +9203,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
Constant *CP = ConstantVector::get(CV);
- SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
- 16 /* alignment */);
+ SDValue CPIdx =
+ DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), Align(16));
SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
@@ -8733,9 +9271,23 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
- if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
- HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
- SplatBitSize > 32) {
+ bool BVNIsConstantSplat =
+ BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, !Subtarget.isLittleEndian());
+
+ // If it is a splat of a double, check if we can shrink it to a 32 bit
+ // non-denormal float which when converted back to double gives us the same
+ // double. This is to exploit the XXSPLTIDP instruction.
+ if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
+ (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
+ convertToNonDenormSingle(APSplatBits)) {
+ SDValue SplatNode = DAG.getNode(
+ PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
+ DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
+ return DAG.getBitcast(Op.getValueType(), SplatNode);
+ }
+
+ if (!BVNIsConstantSplat || SplatBitSize > 32) {
const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
// Handle load-and-splat patterns as we have instructions that will do this
@@ -8774,8 +9326,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return SDValue();
}
- unsigned SplatBits = APSplatBits.getZExtValue();
- unsigned SplatUndef = APSplatUndef.getZExtValue();
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+ uint64_t SplatUndef = APSplatUndef.getZExtValue();
unsigned SplatSize = SplatBitSize / 8;
// First, handle single instruction cases.
@@ -8790,17 +9342,30 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return Op;
}
- // We have XXSPLTIB for constant splats one byte wide
- // FIXME: SplatBits is an unsigned int being cast to an int while passing it
- // as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here.
+ // We have XXSPLTIW for constant splats four bytes wide.
+ // Given vector length is a multiple of 4, 2-byte splats can be replaced
+ // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
+ // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
+ // turned into a 4-byte splat of 0xABABABAB.
+ if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
+ return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
+ Op.getValueType(), DAG, dl);
+
+ if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
+ return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
+ dl);
+
+ // We have XXSPLTIB for constant splats one byte wide.
if (Subtarget.hasP9Vector() && SplatSize == 1)
- return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl);
+ return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
+ dl);
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
(32-SplatBitSize));
if (SextVal >= -16 && SextVal <= 15)
- return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+ return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
+ dl);
// Two instruction sequences.
@@ -8831,7 +9396,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// for fneg/fabs.
if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
// Make -1 and vspltisw -1:
- SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+ SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
// Make the VSLW intrinsic, computing 0x8000_0000.
SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
@@ -8859,7 +9424,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + shl self.
if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
Intrinsic::ppc_altivec_vslw
@@ -8870,7 +9435,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + srl self.
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
Intrinsic::ppc_altivec_vsrw
@@ -8881,7 +9446,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + sra self.
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
Intrinsic::ppc_altivec_vsraw
@@ -8893,7 +9458,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + rol self.
if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
Intrinsic::ppc_altivec_vrlw
@@ -8904,19 +9469,19 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// t = vsplti c, result = vsldoi t, t, 1
if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
- SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 2
if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
- SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 3
if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
- SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
}
@@ -9215,6 +9780,107 @@ SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
+/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
+/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
+/// return the default SDValue.
+SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG) const {
+ // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
+ // to v16i8. Peek through the bitcasts to get the actual operands.
+ SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
+ SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
+
+ auto ShuffleMask = SVN->getMask();
+ SDValue VecShuffle(SVN, 0);
+ SDLoc DL(SVN);
+
+ // Check that we have a four byte shuffle.
+ if (!isNByteElemShuffleMask(SVN, 4, 1))
+ return SDValue();
+
+ // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
+ if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
+ std::swap(LHS, RHS);
+ VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
+ ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
+ }
+
+ // Ensure that the RHS is a vector of constants.
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
+ if (!BVN)
+ return SDValue();
+
+ // Check if RHS is a splat of 4-bytes (or smaller).
+ APInt APSplatValue, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
+ SplatBitSize > 32)
+ return SDValue();
+
+ // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
+ // The instruction splats a constant C into two words of the source vector
+ // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
+ // Thus we check that the shuffle mask is the equivalent of
+ // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
+ // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
+ // within each word are consecutive, so we only need to check the first byte.
+ SDValue Index;
+ bool IsLE = Subtarget.isLittleEndian();
+ if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
+ (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
+ ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
+ Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
+ else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
+ (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
+ ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
+ Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
+ else
+ return SDValue();
+
+ // If the splat is narrower than 32-bits, we need to get the 32-bit value
+ // for XXSPLTI32DX.
+ unsigned SplatVal = APSplatValue.getZExtValue();
+ for (; SplatBitSize < 32; SplatBitSize <<= 1)
+ SplatVal |= (SplatVal << SplatBitSize);
+
+ SDValue SplatNode = DAG.getNode(
+ PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
+ Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
+}
+
+/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
+/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
+/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
+/// i.e (or (shl x, C1), (srl x, 128-C1)).
+SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
+ assert(Op.getValueType() == MVT::v1i128 &&
+ "Only set v1i128 as custom, other type shouldn't reach here!");
+ SDLoc dl(Op);
+ SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
+ SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
+ unsigned SHLAmt = N1.getConstantOperandVal(0);
+ if (SHLAmt % 8 == 0) {
+ SmallVector<int, 16> Mask(16, 0);
+ std::iota(Mask.begin(), Mask.end(), 0);
+ std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
+ if (SDValue Shuffle =
+ DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
+ DAG.getUNDEF(MVT::v16i8), Mask))
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
+ }
+ SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
+ SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
+ DAG.getConstant(SHLAmt, dl, MVT::i32));
+ SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
+ DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
+ SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
+}
+
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
/// is a shuffle we can handle in a single instruction, return it. Otherwise,
/// return the code it can be lowered into. Worst case, it can always be
@@ -9225,6 +9891,18 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+
+ // Any nodes that were combined in the target-independent combiner prior
+ // to vector legalization will not be sent to the target combine. Try to
+ // combine it here.
+ if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
+ if (!isa<ShuffleVectorSDNode>(NewShuffle))
+ return NewShuffle;
+ Op = NewShuffle;
+ SVOp = cast<ShuffleVectorSDNode>(Op);
+ V1 = Op.getOperand(0);
+ V2 = Op.getOperand(1);
+ }
EVT VT = Op.getValueType();
bool isLittleEndian = Subtarget.isLittleEndian();
@@ -9250,6 +9928,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
else
Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
+
+ // If we are loading a partial vector, it does not make sense to adjust
+ // the base pointer. This happens with (splat (s_to_v_permuted (ld))).
+ if (LD->getMemoryVT().getSizeInBits() == (IsFourByte ? 32 : 64))
+ Offset = 0;
SDValue BasePtr = LD->getBasePtr();
if (Offset != 0)
BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
@@ -9288,6 +9971,12 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
+ if (Subtarget.hasPrefixInstrs()) {
+ SDValue SplatInsertNode;
+ if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
+ return SplatInsertNode;
+ }
+
if (Subtarget.hasP9Altivec()) {
SDValue NewISDNode;
if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
@@ -9523,7 +10212,13 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
MVT::i32));
}
+ ShufflesHandledWithVPERM++;
SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
+ LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
+ LLVM_DEBUG(SVOp->dump());
+ LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
+ LLVM_DEBUG(VPermMask.dump());
+
if (isLittleEndian)
return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
V2, V1, VPermMask);
@@ -9880,18 +10575,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return SDValue();
}
-SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
- // Check for a DIV with the same operands as this REM.
- for (auto UI : Op.getOperand(1)->uses()) {
- if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
- (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
- if (UI->getOperand(0) == Op.getOperand(0) &&
- UI->getOperand(1) == Op.getOperand(1))
- return SDValue();
- }
- return Op;
-}
-
// Lower scalar BSWAP64 to xxbrd.
SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9950,7 +10633,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SDLoc dl(Op);
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, 16, false);
+ int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -10020,7 +10703,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
Value);
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, 16, false);
+ int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -10161,9 +10844,8 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SDValue Stores[4];
for (unsigned Idx = 0; Idx < 4; ++Idx) {
- SDValue Ex = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
- DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
+ DAG.getVectorIdxConstant(Idx, dl));
SDValue Store;
if (ScalarVT != ScalarMemVT)
Store =
@@ -10220,7 +10902,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
Value);
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, 16, false);
+ int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -10269,9 +10951,9 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::v4i32) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
- SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
- SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
-
+ SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
+ // +16 as shift amt.
+ SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
SDValue RHSSwap = // = vrlw RHS, 16
BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
@@ -10291,13 +10973,6 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
Neg16, DAG, dl);
return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
- } else if (Op.getValueType() == MVT::v8i16) {
- SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
-
- SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
-
- return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
- LHS, RHS, Zero, DAG, dl);
} else if (Op.getValueType() == MVT::v16i8) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
bool isLittleEndian = Subtarget.isLittleEndian();
@@ -10504,6 +11179,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::ABS: return LowerABS(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::ROTL: return LowerROTL(Op, DAG);
// For counter-based loop handling.
case ISD::INTRINSIC_W_CHAIN: return SDValue();
@@ -10516,9 +11192,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_VOID:
return LowerINTRINSIC_VOID(Op, DAG);
- case ISD::SREM:
- case ISD::UREM:
- return LowerREM(Op, DAG);
case ISD::BSWAP:
return LowerBSWAP(Op, DAG);
case ISD::ATOMIC_CMP_SWAP:
@@ -10537,8 +11210,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
- Results.push_back(RTB);
- Results.push_back(RTB.getValue(1));
+ Results.push_back(
+ DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
Results.push_back(RTB.getValue(2));
break;
}
@@ -11198,13 +11871,192 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
return MBB;
}
+bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+ return false;
+}
+
+unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ unsigned StackAlign = TFI->getStackAlignment();
+ assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
+ "Unexpected stack alignment");
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute.
+ unsigned StackProbeSize = 4096;
+ const Function &Fn = MF.getFunction();
+ if (Fn.hasFnAttribute("stack-probe-size"))
+ Fn.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+ // Round down to the stack alignment.
+ StackProbeSize &= ~(StackAlign - 1);
+ return StackProbeSize ? StackProbeSize : StackAlign;
+}
+
+// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
+// into three phases. In the first phase, it uses pseudo instruction
+// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
+// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
+// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
+// MaxCallFrameSize so that it can calculate correct data area pointer.
+MachineBasicBlock *
+PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ const bool isPPC64 = Subtarget.isPPC64();
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ const unsigned ProbeSize = getStackProbeSize(*MF);
+ const BasicBlock *ProbedBB = MBB->getBasicBlock();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ // The CFG of probing stack looks as
+ // +-----+
+ // | MBB |
+ // +--+--+
+ // |
+ // +----v----+
+ // +--->+ TestMBB +---+
+ // | +----+----+ |
+ // | | |
+ // | +-----v----+ |
+ // +---+ BlockMBB | |
+ // +----------+ |
+ // |
+ // +---------+ |
+ // | TailMBB +<--+
+ // +---------+
+ // In MBB, calculate previous frame pointer and final stack pointer.
+ // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
+ // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
+ // TailMBB is spliced via \p MI.
+ MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
+ MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
+ MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
+
+ MachineFunction::iterator MBBIter = ++MBB->getIterator();
+ MF->insert(MBBIter, TestMBB);
+ MF->insert(MBBIter, BlockMBB);
+ MF->insert(MBBIter, TailMBB);
+
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register NegSizeReg = MI.getOperand(1).getReg();
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+
+ // Get the canonical FinalStackPtr like what
+ // PPCRegisterInfo::lowerDynamicAlloc does.
+ BuildMI(*MBB, {MI}, DL,
+ TII->get(isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64
+ : PPC::PREPARE_PROBED_ALLOCA_32),
+ FramePointer)
+ .addDef(FinalStackPtr)
+ .addReg(NegSizeReg)
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+
+ // Materialize a scratch register for update.
+ int64_t NegProbeSize = -(int64_t)ProbeSize;
+ assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
+ Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ if (!isInt<16>(NegProbeSize)) {
+ Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
+ .addImm(NegProbeSize >> 16);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
+ ScratchReg)
+ .addReg(TempReg)
+ .addImm(NegProbeSize & 0xFFFF);
+ } else
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
+ .addImm(NegProbeSize);
+
+ {
+ // Probing leading residual part.
+ Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
+ .addReg(NegSizeReg)
+ .addReg(ScratchReg);
+ Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
+ .addReg(Div)
+ .addReg(ScratchReg);
+ Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
+ .addReg(Mul)
+ .addReg(NegSizeReg);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
+ .addReg(FramePointer)
+ .addReg(SPReg)
+ .addReg(NegMod);
+ }
+
+ {
+ // Remaining part should be multiple of ProbeSize.
+ Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
+ BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
+ .addReg(SPReg)
+ .addReg(FinalStackPtr);
+ BuildMI(TestMBB, DL, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_EQ)
+ .addReg(CmpResult)
+ .addMBB(TailMBB);
+ TestMBB->addSuccessor(BlockMBB);
+ TestMBB->addSuccessor(TailMBB);
+ }
+
+ {
+ // Touch the block.
+ // |P...|P...|P...
+ BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
+ .addReg(FramePointer)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
+ BlockMBB->addSuccessor(TestMBB);
+ }
+
+ // Calculation of MaxCallFrameSize is deferred to prologepilog, use
+ // DYNAREAOFFSET pseudo instruction to get the future result.
+ Register MaxCallFrameSizeReg =
+ MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ BuildMI(TailMBB, DL,
+ TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
+ MaxCallFrameSizeReg)
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
+ .addReg(SPReg)
+ .addReg(MaxCallFrameSizeReg);
+
+ // Splice instructions after MI to TailMBB.
+ TailMBB->splice(TailMBB->end(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ MBB->addSuccessor(TestMBB);
+
+ // Delete the pseudo instruction.
+ MI.eraseFromParent();
+
+ ++NumDynamicAllocaProbed;
+ return TailMBB;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
if (MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
if (Subtarget.is64BitELFABI() &&
- MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+ MI.getOpcode() == TargetOpcode::PATCHPOINT &&
+ !Subtarget.isUsingPCRelativeCalls()) {
// Call lowering should have added an r2 operand to indicate a dependence
// on the TOC base pointer value. It can't however, because there is no
// way to mark the dependence as implicit there, and so the stackmap code
@@ -11886,12 +12738,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
MachineFrameInfo &MFI = F->getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(8, 8, false);
+ int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
MachineMemOperand *MMOStore = F->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
- MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlign(FrameIdx));
// Store the SrcReg into the stack.
BuildMI(*BB, MI, dl, TII->get(StoreOp))
@@ -11901,9 +12753,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addMemOperand(MMOStore);
MachineMemOperand *MMOLoad = F->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
- MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlign(FrameIdx));
// Load from the stack where SrcReg is stored, and save to DestReg,
// so we have done the RegClass conversion from RegClass::SrcReg to
@@ -11963,6 +12815,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addReg(NewFPSCRReg)
.addImm(0)
.addImm(0);
+ } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
+ MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
+ return emitProbedAlloca(MI, BB);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -13167,15 +14022,20 @@ static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
DAG.getVectorShuffle(Input.getValueType(), dl, Input,
DAG.getUNDEF(Input.getValueType()), ShuffleMask);
- EVT Ty = N->getValueType(0);
- SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
- return BV;
+ EVT VT = N->getValueType(0);
+ SDValue Conv = DAG.getBitcast(VT, Shuffle);
+
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ Input.getValueType().getVectorElementType(),
+ VT.getVectorNumElements());
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
+ DAG.getValueType(ExtVT));
}
// Look for build vector patterns where input operands come from sign
// extended vector_extract elements of specific indices. If the correct indices
-// aren't used, add a vector shuffle to fix up the indices and create a new
-// PPCISD:SExtVElems node which selects the vector sign extend instructions
+// aren't used, add a vector shuffle to fix up the indices and create
+// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
// during instruction selection.
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
// This array encodes the indices that the vector sign extend instructions
@@ -13498,8 +14358,8 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
// Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
// aligned and the type is a vector with elements up to 4 bytes
- if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
- && VecTy.getScalarSizeInBits() <= 32 ) {
+ if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
+ VecTy.getScalarSizeInBits() <= 32) {
return SDValue();
}
@@ -13569,8 +14429,8 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
// Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
// aligned and the type is a vector with elements up to 4 bytes
- if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
- && VecTy.getScalarSizeInBits() <= 32 ) {
+ if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
+ VecTy.getScalarSizeInBits() <= 32) {
return SDValue();
}
@@ -13650,6 +14510,210 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
return Val;
}
+static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
+ // Check that the source of the element keeps flipping
+ // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
+ bool PrevElemFromFirstVec = Mask[0] < NumElts;
+ for (int i = 1, e = Mask.size(); i < e; i++) {
+ if (PrevElemFromFirstVec && Mask[i] < NumElts)
+ return false;
+ if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
+ return false;
+ PrevElemFromFirstVec = !PrevElemFromFirstVec;
+ }
+ return true;
+}
+
+static bool isSplatBV(SDValue Op) {
+ if (Op.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ SDValue FirstOp;
+
+ // Find first non-undef input.
+ for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
+ FirstOp = Op.getOperand(i);
+ if (!FirstOp.isUndef())
+ break;
+ }
+
+ // All inputs are undef or the same as the first non-undef input.
+ for (int i = 1, e = Op.getNumOperands(); i < e; i++)
+ if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
+ return false;
+ return true;
+}
+
+static SDValue isScalarToVec(SDValue Op) {
+ if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return Op;
+ if (Op.getOpcode() != ISD::BITCAST)
+ return SDValue();
+ Op = Op.getOperand(0);
+ if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return Op;
+ return SDValue();
+}
+
+static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
+ int LHSMaxIdx, int RHSMinIdx,
+ int RHSMaxIdx, int HalfVec) {
+ for (int i = 0, e = ShuffV.size(); i < e; i++) {
+ int Idx = ShuffV[i];
+ if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
+ ShuffV[i] += HalfVec;
+ }
+ return;
+}
+
+// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
+// the original is:
+// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
+// In such a case, just change the shuffle mask to extract the element
+// from the permuted index.
+static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
+ SDLoc dl(OrigSToV);
+ EVT VT = OrigSToV.getValueType();
+ assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ "Expecting a SCALAR_TO_VECTOR here");
+ SDValue Input = OrigSToV.getOperand(0);
+
+ if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
+ SDValue OrigVector = Input.getOperand(0);
+
+ // Can't handle non-const element indices or different vector types
+ // for the input to the extract and the output of the scalar_to_vector.
+ if (Idx && VT == OrigVector.getValueType()) {
+ SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
+ NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
+ return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
+ }
+ }
+ return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
+ OrigSToV.getOperand(0));
+}
+
+// On little endian subtargets, combine shuffles such as:
+// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
+// into:
+// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
+// because the latter can be matched to a single instruction merge.
+// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
+// to put the value into element zero. Adjust the shuffle mask so that the
+// vector can remain in permuted form (to prevent a swap prior to a shuffle).
+SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG) const {
+ SDValue LHS = SVN->getOperand(0);
+ SDValue RHS = SVN->getOperand(1);
+ auto Mask = SVN->getMask();
+ int NumElts = LHS.getValueType().getVectorNumElements();
+ SDValue Res(SVN, 0);
+ SDLoc dl(SVN);
+
+ // None of these combines are useful on big endian systems since the ISA
+ // already has a big endian bias.
+ if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
+ return Res;
+
+ // If this is not a shuffle of a shuffle and the first element comes from
+ // the second vector, canonicalize to the commuted form. This will make it
+ // more likely to match one of the single instruction patterns.
+ if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
+ RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
+ std::swap(LHS, RHS);
+ Res = DAG.getCommutedVectorShuffle(*SVN);
+ Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
+ }
+
+ // Adjust the shuffle mask if either input vector comes from a
+ // SCALAR_TO_VECTOR and keep the respective input vector in permuted
+ // form (to prevent the need for a swap).
+ SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
+ SDValue SToVLHS = isScalarToVec(LHS);
+ SDValue SToVRHS = isScalarToVec(RHS);
+ if (SToVLHS || SToVRHS) {
+ int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
+ : SToVRHS.getValueType().getVectorNumElements();
+ int NumEltsOut = ShuffV.size();
+
+ // Initially assume that neither input is permuted. These will be adjusted
+ // accordingly if either input is.
+ int LHSMaxIdx = -1;
+ int RHSMinIdx = -1;
+ int RHSMaxIdx = -1;
+ int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
+
+ // Get the permuted scalar to vector nodes for the source(s) that come from
+ // ISD::SCALAR_TO_VECTOR.
+ if (SToVLHS) {
+ // Set up the values for the shuffle vector fixup.
+ LHSMaxIdx = NumEltsOut / NumEltsIn;
+ SToVLHS = getSToVPermuted(SToVLHS, DAG);
+ if (SToVLHS.getValueType() != LHS.getValueType())
+ SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
+ LHS = SToVLHS;
+ }
+ if (SToVRHS) {
+ RHSMinIdx = NumEltsOut;
+ RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
+ SToVRHS = getSToVPermuted(SToVRHS, DAG);
+ if (SToVRHS.getValueType() != RHS.getValueType())
+ SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
+ RHS = SToVRHS;
+ }
+
+ // Fix up the shuffle mask to reflect where the desired element actually is.
+ // The minimum and maximum indices that correspond to element zero for both
+ // the LHS and RHS are computed and will control which shuffle mask entries
+ // are to be changed. For example, if the RHS is permuted, any shuffle mask
+ // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
+ // HalfVec to refer to the corresponding element in the permuted vector.
+ fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
+ HalfVec);
+ Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
+
+ // We may have simplified away the shuffle. We won't be able to do anything
+ // further with it here.
+ if (!isa<ShuffleVectorSDNode>(Res))
+ return Res;
+ Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
+ }
+
+ // The common case after we commuted the shuffle is that the RHS is a splat
+ // and we have elements coming in from the splat at indices that are not
+ // conducive to using a merge.
+ // Example:
+ // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
+ if (!isSplatBV(RHS))
+ return Res;
+
+ // We are looking for a mask such that all even elements are from
+ // one vector and all odd elements from the other.
+ if (!isAlternatingShuffMask(Mask, NumElts))
+ return Res;
+
+ // Adjust the mask so we are pulling in the same index from the splat
+ // as the index from the interesting vector in consecutive elements.
+ // Example (even elements from first vector):
+ // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
+ if (Mask[0] < NumElts)
+ for (int i = 1, e = Mask.size(); i < e; i += 2)
+ ShuffV[i] = (ShuffV[i - 1] + NumElts);
+ // Example (odd elements from first vector):
+ // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
+ else
+ for (int i = 0, e = Mask.size(); i < e; i += 2)
+ ShuffV[i] = (ShuffV[i + 1] + NumElts);
+
+ // If the RHS has undefs, we need to remove them since we may have created
+ // a shuffle that adds those instead of the splat value.
+ SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
+ RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
+
+ Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
+ return Res;
+}
+
SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
LSBaseSDNode *LSBase,
DAGCombinerInfo &DCI) const {
@@ -13721,6 +14785,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return combineSRL(N, DCI);
case ISD::MUL:
return combineMUL(N, DCI);
+ case ISD::FMA:
+ case PPCISD::FNMSUB:
+ return combineFMALike(N, DCI);
case PPCISD::SHL:
if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
return N->getOperand(0);
@@ -13756,7 +14823,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
}
- break;
+ return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
case ISD::STORE: {
EVT Op1VT = N->getOperand(1).getValueType();
@@ -13963,17 +15030,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
EVT MemVT = LD->getMemoryVT();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
+ Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
- unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
+ Align ScalarABIAlignment = DAG.getDataLayout().getABITypeAlign(STy);
if (LD->isUnindexed() && VT.isVector() &&
((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
// P8 and later hardware should just use LOAD.
- !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
- VT == MVT::v4i32 || VT == MVT::v4f32)) ||
+ !Subtarget.hasP8Vector() &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
+ VT == MVT::v4f32)) ||
(Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
- LD->getAlignment() >= ScalarABIAlignment)) &&
- LD->getAlignment() < ABIAlignment) {
+ LD->getAlign() >= ScalarABIAlignment)) &&
+ LD->getAlign() < ABIAlignment) {
// This is a type-legal unaligned Altivec or QPX load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
@@ -14520,6 +15588,7 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR10:
case PPC::DIR_PWR_FUTURE: {
if (!ML)
break;
@@ -14926,18 +15995,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
bool isPPC64 = Subtarget.isPPC64();
- bool IsDarwinABI = Subtarget.isDarwinABI();
bool is64Bit = isPPC64 && VT == LLT::scalar(64);
if (!is64Bit && VT != LLT::scalar(32))
report_fatal_error("Invalid register global variable type");
Register Reg = StringSwitch<Register>(RegName)
- .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
- .Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2)
- .Case("r13", (!isPPC64 && IsDarwinABI) ? Register() :
- (is64Bit ? PPC::X13 : PPC::R13))
- .Default(Register());
+ .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
+ .Case("r2", isPPC64 ? Register() : PPC::R2)
+ .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
+ .Default(Register());
if (Reg)
return Reg;
@@ -15030,7 +16097,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = -VT.getStoreSize()+1;
Info.size = 2*VT.getStoreSize()-1;
- Info.align = Align::None();
+ Info.align = Align(1);
Info.flags = MachineMemOperand::MOLoad;
return true;
}
@@ -15064,7 +16131,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.size = VT.getStoreSize();
- Info.align = Align::None();
+ Info.align = Align(1);
Info.flags = MachineMemOperand::MOLoad;
return true;
}
@@ -15116,7 +16183,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = -VT.getStoreSize()+1;
Info.size = 2*VT.getStoreSize()-1;
- Info.align = Align::None();
+ Info.align = Align(1);
Info.flags = MachineMemOperand::MOStore;
return true;
}
@@ -15149,7 +16216,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.size = VT.getStoreSize();
- Info.align = Align::None();
+ Info.align = Align(1);
Info.flags = MachineMemOperand::MOStore;
return true;
}
@@ -15160,35 +16227,24 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
}
-/// getOptimalMemOpType - Returns the target specific optimal type for load
-/// and store operations as a result of memset, memcpy, and memmove
-/// lowering. If DstAlign is zero that means it's safe to destination
-/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
-/// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If 'IsMemset' is
-/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
-/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
-/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT PPCTargetLowering::getOptimalMemOpType(
- uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes) const {
if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
// When expanding a memset, require at least two QPX instructions to cover
// the cost of loading the value to be stored from the constant pool.
- if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
- (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+ if (Subtarget.hasQPX() && Op.size() >= 32 &&
+ (Op.isMemcpy() || Op.size() >= 64) && Op.isAligned(Align(32)) &&
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
return MVT::v4f64;
}
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
- if (Subtarget.hasAltivec() && Size >= 16 &&
- (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
- ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+ if (Subtarget.hasAltivec() && Op.size() >= 16 &&
+ (Op.isAligned(Align(16)) ||
+ ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
return MVT::v4i32;
}
@@ -15304,22 +16360,48 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
- VT = VT.getScalarType();
-
- if (!VT.isSimple())
- return false;
+ return isFMAFasterThanFMulAndFAdd(
+ MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
+}
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::f32:
- case MVT::f64:
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
+ Type *Ty) const {
+ switch (Ty->getScalarType()->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
return true;
- case MVT::f128:
- return (EnableQuadPrecision && Subtarget.hasP9Vector());
+ case Type::FP128TyID:
+ return Subtarget.hasP9Vector();
default:
- break;
+ return false;
}
+}
- return false;
+// Currently this is a copy from AArch64TargetLowering::isProfitableToHoist.
+// FIXME: add more patterns which are profitable to hoist.
+bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
+ if (I->getOpcode() != Instruction::FMul)
+ return true;
+
+ if (!I->hasOneUse())
+ return true;
+
+ Instruction *User = I->user_back();
+ assert(User && "A single use instruction with no uses.");
+
+ if (User->getOpcode() != Instruction::FSub &&
+ User->getOpcode() != Instruction::FAdd)
+ return true;
+
+ const TargetOptions &Options = getTargetMachine().Options;
+ const Function *F = I->getFunction();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ Type *Ty = User->getOperand(0)->getType();
+
+ return !(
+ isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+ isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
}
const MCPhysReg *
@@ -15335,12 +16417,12 @@ PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
return ScratchRegs;
}
-unsigned PPCTargetLowering::getExceptionPointerRegister(
+Register PPCTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
}
-unsigned PPCTargetLowering::getExceptionSelectorRegister(
+Register PPCTargetLowering::getExceptionSelectorRegister(
const Constant *PersonalityFn) const {
return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
}
@@ -15371,58 +16453,83 @@ PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
return PPC::createFastISel(FuncInfo, LibInfo);
}
-void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
- if (Subtarget.isDarwinABI()) return;
- if (!Subtarget.isPPC64()) return;
-
- // Update IsSplitCSR in PPCFunctionInfo
- PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
- PFI->setIsSplitCSR(true);
+// 'Inverted' means the FMA opcode after negating one multiplicand.
+// For example, (fma -a b c) = (fnmsub a b c)
+static unsigned invertFMAOpcode(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Invalid FMA opcode for PowerPC!");
+ case ISD::FMA:
+ return PPCISD::FNMSUB;
+ case PPCISD::FNMSUB:
+ return ISD::FMA;
+ }
}
-void PPCTargetLowering::insertCopiesSplitCSR(
- MachineBasicBlock *Entry,
- const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
- const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
- if (!IStart)
- return;
+SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOps, bool OptForSize,
+ NegatibleCost &Cost,
+ unsigned Depth) const {
+ if (Depth > SelectionDAG::MaxRecursionDepth)
+ return SDValue();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
- MachineBasicBlock::iterator MBBI = Entry->begin();
- for (const MCPhysReg *I = IStart; *I; ++I) {
- const TargetRegisterClass *RC = nullptr;
- if (PPC::G8RCRegClass.contains(*I))
- RC = &PPC::G8RCRegClass;
- else if (PPC::F8RCRegClass.contains(*I))
- RC = &PPC::F8RCRegClass;
- else if (PPC::CRRCRegClass.contains(*I))
- RC = &PPC::CRRCRegClass;
- else if (PPC::VRRCRegClass.contains(*I))
- RC = &PPC::VRRCRegClass;
- else
- llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ unsigned Opc = Op.getOpcode();
+ EVT VT = Op.getValueType();
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+
+ switch (Opc) {
+ case PPCISD::FNMSUB:
+ // TODO: QPX subtarget is deprecated. No transformation here.
+ if (!Op.hasOneUse() || !isTypeLegal(VT) || Subtarget.hasQPX())
+ break;
- Register NewVR = MRI->createVirtualRegister(RC);
- // Create copy from CSR to a virtual register.
- // FIXME: this currently does not emit CFI pseudo-instructions, it works
- // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
- // nounwind. If we want to generalize this later, we may need to emit
- // CFI pseudo-instructions.
- assert(Entry->getParent()->getFunction().hasFnAttribute(
- Attribute::NoUnwind) &&
- "Function should be nounwind in insertCopiesSplitCSR!");
- Entry->addLiveIn(*I);
- BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
- .addReg(*I);
+ const TargetOptions &Options = getTargetMachine().Options;
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ SDLoc Loc(Op);
- // Insert the copy-back instructions right before the terminator.
- for (auto *Exit : Exits)
- BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::COPY), *I)
- .addReg(NewVR);
+ NegatibleCost N2Cost = NegatibleCost::Expensive;
+ SDValue NegN2 =
+ getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
+
+ if (!NegN2)
+ return SDValue();
+
+ // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
+ // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
+ // These transformations may change sign of zeroes. For example,
+ // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
+ if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
+ // Try and choose the cheaper one to negate.
+ NegatibleCost N0Cost = NegatibleCost::Expensive;
+ SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
+ N0Cost, Depth + 1);
+
+ NegatibleCost N1Cost = NegatibleCost::Expensive;
+ SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
+ N1Cost, Depth + 1);
+
+ if (NegN0 && N0Cost <= N1Cost) {
+ Cost = std::min(N0Cost, N2Cost);
+ return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
+ } else if (NegN1) {
+ Cost = std::min(N1Cost, N2Cost);
+ return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
+ }
+ }
+
+ // (fneg (fnmsub a b c)) => (fma a b (fneg c))
+ if (isOperationLegal(ISD::FMA, VT)) {
+ Cost = N2Cost;
+ return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
+ }
+
+ break;
}
+
+ return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
+ Cost, Depth);
}
// Override to enable LOAD_STACK_GUARD lowering on Linux.
@@ -15450,6 +16557,13 @@ bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return false;
case MVT::f32:
case MVT::f64:
+ if (Subtarget.hasPrefixInstrs()) {
+ // With prefixed instructions, we can materialize anything that can be
+ // represented with a 32-bit immediate, not just positive zero.
+ APFloat APFloatOfImm = Imm;
+ return convertToNonDenormSingle(APFloatOfImm);
+ }
+ LLVM_FALLTHROUGH;
case MVT::ppcf128:
return Imm.isPosZero();
}
@@ -15620,10 +16734,59 @@ static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Transform
+// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
+// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
+// In this case both C1 and C2 must be known constants.
+// C1+C2 must fit into a 34 bit signed integer.
+static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ if (!Subtarget.isUsingPCRelativeCalls())
+ return SDValue();
+
+ // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
+ // If we find that node try to cast the Global Address and the Constant.
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
+ std::swap(LHS, RHS);
+
+ if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
+ return SDValue();
+
+ // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
+ GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
+ ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
+
+ // Check that both casts succeeded.
+ if (!GSDN || !ConstNode)
+ return SDValue();
+
+ int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
+ SDLoc DL(GSDN);
+
+ // The signed int offset needs to fit in 34 bits.
+ if (!isInt<34>(NewOffset))
+ return SDValue();
+
+ // The new global address is a copy of the old global address except
+ // that it has the updated Offset.
+ SDValue GA =
+ DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
+ NewOffset, GSDN->getTargetFlags());
+ SDValue MatPCRel =
+ DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
+ return MatPCRel;
+}
+
SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
return Value;
+ if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
+ return Value;
+
return SDValue();
}
@@ -15648,6 +16811,24 @@ SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
SDLoc dl(N);
SDValue Op0 = N->getOperand(0);
+ // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
+ if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+ return SDValue();
+ SDValue Sub = Op0.getOperand(0);
+ if (Sub.getOpcode() == ISD::SUB) {
+ SDValue SubOp0 = Sub.getOperand(0);
+ SDValue SubOp1 = Sub.getOperand(1);
+ if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
+ (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
+ return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
+ SubOp1.getOperand(0),
+ DCI.DAG.getTargetConstant(0, dl, MVT::i32));
+ }
+ }
+ }
+
// Looking for a truncate of i128 to i64.
if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
return SDValue();
@@ -15702,6 +16883,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
// vector 7 2 2
return true;
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR10:
case PPC::DIR_PWR_FUTURE:
// type mul add shl
// scalar 5 2 2
@@ -15763,6 +16945,44 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
}
}
+// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
+// in combiner since we need to check SD flags and other subtarget features.
+SDValue PPCTargetLowering::combineFMALike(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDNodeFlags Flags = N->getFlags();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetOptions &Options = getTargetMachine().Options;
+ unsigned Opc = N->getOpcode();
+ bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool LegalOps = !DCI.isBeforeLegalizeOps();
+ SDLoc Loc(N);
+
+ // TODO: QPX subtarget is deprecated. No transformation here.
+ if (Subtarget.hasQPX() || !isOperationLegal(ISD::FMA, VT))
+ return SDValue();
+
+ // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
+ // since (fnmsub a b c)=-0 while c-ab=+0.
+ if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
+ return SDValue();
+
+ // (fma (fneg a) b c) => (fnmsub a b c)
+ // (fnmsub (fneg a) b c) => (fma a b c)
+ if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
+ return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
+
+ // (fma a (fneg b) c) => (fnmsub a b c)
+ // (fnmsub a (fneg b) c) => (fma a b c)
+ if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
+ return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
+
+ return SDValue();
+}
+
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
if (!Subtarget.is64BitELFABI())
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 2e1485373d19..768eaa43e013 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -85,19 +85,10 @@ namespace llvm {
/// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
VEXTS,
- /// SExtVElems, takes an input vector of a smaller type and sign
- /// extends to an output vector of a larger type.
- SExtVElems,
-
/// Reciprocal estimate instructions (unary FP ops).
FRE,
FRSQRTE,
- // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
- // three v4f32 operands and producing a v4f32 result.
- VMADDFP,
- VNMSUBFP,
-
/// VPERM - The PPC VPERM Instruction.
///
VPERM,
@@ -106,6 +97,15 @@ namespace llvm {
///
XXSPLT,
+ /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for
+ /// converting immediate single precision numbers to double precision
+ /// vector or scalar.
+ XXSPLTI_SP_TO_DP,
+
+ /// XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
+ ///
+ XXSPLTI32DX,
+
/// VECINSERT - The PPC vector insert instruction
///
VECINSERT,
@@ -142,6 +142,10 @@ namespace llvm {
/// dynamic alloca.
DYNAREAOFFSET,
+ /// To avoid stack clash, allocation is performed by block and each block is
+ /// probed.
+ PROBED_ALLOCA,
+
/// GlobalBaseReg - On Darwin, this node represents the result of the mflr
/// at function entry, used for PIC code.
GlobalBaseReg,
@@ -157,6 +161,9 @@ namespace llvm {
SRA,
SHL,
+ /// FNMSUB - Negated multiply-subtract instruction.
+ FNMSUB,
+
/// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
/// word and shift left immediate.
EXTSWSLI,
@@ -169,9 +176,11 @@ namespace llvm {
/// CALL - A direct function call.
/// CALL_NOP is a call with the special NOP which follows 64-bit
+ /// CALL_NOTOC the caller does not use the TOC.
/// SVR4 calls and 32-bit/64-bit AIX calls.
CALL,
CALL_NOP,
+ CALL_NOTOC,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
@@ -225,6 +234,14 @@ namespace llvm {
/// As with SINT_VEC_TO_FP, used for converting illegal types.
UINT_VEC_TO_FP,
+ /// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to
+ /// place the value into the least significant element of the most
+ /// significant doubleword in the vector. This is not element zero for
+ /// anything smaller than a doubleword on either endianness. This node has
+ /// the same semantics as SCALAR_TO_VECTOR except that the value remains in
+ /// the aforementioned location in the vector register.
+ SCALAR_TO_VECTOR_PERMUTED,
+
// FIXME: Remove these once the ANDI glue bug is fixed:
/// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
/// eq or gt bit of CR0 after executing andi. x, 1. This is used to
@@ -430,6 +447,11 @@ namespace llvm {
/// lower (IDX=1) half of v4f32 to v2f64.
FP_EXTEND_HALF,
+ /// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done
+ /// either through an add like PADDI or through a PC Relative load like
+ /// PLD.
+ MAT_PCREL_ADDR,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -676,17 +698,9 @@ namespace llvm {
return VT.isScalarInteger();
}
- bool supportSplitCSR(MachineFunction *MF) const override {
- return
- MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
- MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
- }
-
- void initializeSplitCSR(MachineBasicBlock *Entry) const override;
-
- void insertCopiesSplitCSR(
- MachineBasicBlock *Entry,
- const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+ SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,
+ bool OptForSize, NegatibleCost &Cost,
+ unsigned Depth = 0) const override;
/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
@@ -716,7 +730,7 @@ namespace llvm {
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
SelectionDAG &DAG,
- unsigned EncodingAlignment = 0) const;
+ MaybeAlign EncodingAlignment = None) const;
/// SelectAddressRegImm - Returns true if the address N can be represented
/// by a base register plus a signed 16-bit displacement [r+imm], and if it
@@ -725,13 +739,17 @@ namespace llvm {
/// requirement, i.e. multiples of 4 for DS form.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG,
- unsigned EncodingAlignment) const;
+ MaybeAlign EncodingAlignment) const;
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
SelectionDAG &DAG) const;
+ /// SelectAddressPCRel - Represent the specified address as pc relative to
+ /// be represented as [pc+imm]
+ bool SelectAddressPCRel(SDValue N, SDValue &Base) const;
+
Sched::Preference getSchedulingPreference(SDNode *N) const override;
/// LowerOperation - Provide custom lowering hooks for some operations.
@@ -794,6 +812,13 @@ namespace llvm {
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+ MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
+
+ bool hasInlineStackProbe(MachineFunction &MF) const override;
+
+ unsigned getStackProbeSize(MachineFunction &MF) const;
+
ConstraintType getConstraintType(StringRef Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
@@ -879,7 +904,7 @@ namespace llvm {
if (VT != MVT::f32 && VT != MVT::f64)
return false;
- return true;
+ return true;
}
// Returns true if the address of the global is stored in TOC entry.
@@ -892,21 +917,10 @@ namespace llvm {
MachineFunction &MF,
unsigned Intrinsic) const override;
- /// getOptimalMemOpType - Returns the target specific optimal type for load
- /// and store operations as a result of memset, memcpy, and memmove
- /// lowering. If DstAlign is zero that means it's safe to destination
- /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
- /// means there isn't a need to check it against alignment requirement,
- /// probably because the source does not need to be loaded. If 'IsMemset' is
- /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
- /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
- /// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
- EVT
- getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
@@ -922,6 +936,14 @@ namespace llvm {
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
+
+ /// isProfitableToHoist - Check if it is profitable to hoist instruction
+ /// \p I to its dominator block.
+ /// For example, it is not profitable if \p I and it's only user can form a
+ /// FMA instruction, because Powerpc prefers FMADD.
+ bool isProfitableToHoist(Instruction *I) const override;
+
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
// Should we expand the build vector with shuffles?
@@ -950,14 +972,19 @@ namespace llvm {
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override;
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+ /// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a
+ /// specific type is cheaper than a multiply followed by a shift.
+ /// This is true for words and doublewords on 64-bit PowerPC.
+ bool isMulhCheaperThanMulShift(EVT Type) const override;
+
/// Override to support customized stack guard loading.
bool useLoadStackGuardNode() const override;
void insertSSPDeclarations(Module &M) const override;
@@ -973,6 +1000,24 @@ namespace llvm {
unsigned JTI,
MCContext &Ctx) const override;
+ /// Structure that collects some common arguments that get passed around
+ /// between the functions for call lowering.
+ struct CallFlags {
+ const CallingConv::ID CallConv;
+ const bool IsTailCall : 1;
+ const bool IsVarArg : 1;
+ const bool IsPatchPoint : 1;
+ const bool IsIndirect : 1;
+ const bool HasNest : 1;
+ const bool NoMerge : 1;
+
+ CallFlags(CallingConv::ID CC, bool IsTailCall, bool IsVarArg,
+ bool IsPatchPoint, bool IsIndirect, bool HasNest, bool NoMerge)
+ : CallConv(CC), IsTailCall(IsTailCall), IsVarArg(IsVarArg),
+ IsPatchPoint(IsPatchPoint), IsIndirect(IsIndirect),
+ HasNest(HasNest), NoMerge(NoMerge) {}
+ };
+
private:
struct ReuseLoadInfo {
SDValue Ptr;
@@ -981,7 +1026,7 @@ namespace llvm {
MachinePointerInfo MPI;
bool IsDereferenceable = false;
bool IsInvariant = false;
- unsigned Alignment = 0;
+ Align Alignment;
AAMDNodes AAInfo;
const MDNode *Ranges = nullptr;
@@ -1032,15 +1077,10 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
- bool
- IsEligibleForTailCallOptimization_64SVR4(
- SDValue Callee,
- CallingConv::ID CalleeCC,
- ImmutableCallSite CS,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const;
+ bool IsEligibleForTailCallOptimization_64SVR4(
+ SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB,
+ bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff,
SDValue Chain, SDValue &LROpOut,
@@ -1083,7 +1123,6 @@ namespace llvm {
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -1091,6 +1130,7 @@ namespace llvm {
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -1100,15 +1140,14 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
- SDValue FinishCall(CallingConv::ID CallConv, const SDLoc &dl,
- bool isTailCall, bool isVarArg, bool isPatchPoint,
- bool hasNest, SelectionDAG &DAG,
+
+ SDValue FinishCall(CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
SDValue &Callee, int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const;
+ const CallBase *CB) const;
SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
@@ -1155,42 +1194,34 @@ namespace llvm {
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
const SDLoc &dl) const;
- SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const;
- SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ const CallBase *CB) const;
+ SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const;
- SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ const CallBase *CB) const;
+ SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const;
- SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ const CallBase *CB) const;
+ SDValue LowerCall_AIX(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const;
+ const CallBase *CB) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
@@ -1206,10 +1237,13 @@ namespace llvm {
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG) const;
SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
DAGCombinerInfo &DCI) const;
@@ -1240,6 +1274,10 @@ namespace llvm {
/// essentially v16i8 vector version of VINSERTH.
SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+ /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
+ /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1.
+ SDValue lowerToXXSPLTI32DX(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+
// Return whether the call instruction can potentially be optimized to a
// tail call. This will cause the optimizers to attempt to move, or
// duplicate return instructions to help enable tail call optimizations.
@@ -1258,6 +1296,9 @@ namespace llvm {
bool isIntS16Immediate(SDNode *N, int16_t &Imm);
bool isIntS16Immediate(SDValue Op, int16_t &Imm);
+ bool convertToNonDenormSingle(APInt &ArgAPInt);
+ bool convertToNonDenormSingle(APFloat &ArgAPFloat);
+
} // end namespace llvm
#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index cd81e40e35f1..1c457d4170d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -140,6 +140,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
(outs), (ins abscalltarget:$func),
"bla $func\n\tnop", IIC_BrB,
[(PPCcall_nop (i64 imm:$func))]>;
+ let Predicates = [PCRelativeMemops] in {
+ // BL8_NOTOC means that the caller does not use the TOC pointer and if
+ // it does use R2 then it is just a caller saved register. Therefore it is
+ // safe to emit only the bl and not the nop for this instruction. The
+ // linker will not try to restore R2 after the call.
+ def BL8_NOTOC : IForm<18, 0, 1, (outs),
+ (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>;
+ }
}
let Uses = [CTR8, RM] in {
let isPredicable = 1 in
@@ -194,6 +203,11 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
+def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)),
+ (BL8_NOTOC tglobaladdr:$dst)>;
+def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)),
+ (BL8_NOTOC texternalsym:$dst)>;
+
// Calls for AIX
def : Pat<(PPCcall (i64 mcsym:$dst)),
(BL8 mcsym:$dst)>;
@@ -411,6 +425,19 @@ def DYNALLOC8 : PPCEmitTimePseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri
(PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
def DYNAREAOFFSET8 : PPCEmitTimePseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
[(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
+// Probed alloca to support stack clash protection.
+let Defs = [X1], Uses = [X1], hasNoSchedulingInfo = 1 in {
+def PROBED_ALLOCA_64 : PPCCustomInserterPseudo<(outs g8rc:$result),
+ (ins g8rc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_64",
+ [(set i64:$result,
+ (PPCprobedalloca i64:$negsize, iaddr:$fpsi))]>;
+def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs g8rc:$fp,
+ g8rc:$sp),
+ (ins g8rc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_64", []>;
+def PROBED_STACKALLOC_64 : PPCEmitTimePseudo<(outs g8rc:$scratch, g8rc:$temp),
+ (ins i64imm:$stacksize),
+ "#PROBED_STACKALLOC_64", []>;
+}
let hasSideEffects = 0 in {
let Defs = [LR8] in {
@@ -772,8 +799,9 @@ def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
"popcntw $rA, $rS", IIC_IntGeneral,
[(set i32:$rA, (ctpop i32:$rS))]>;
-def POPCNTB : XForm_11<31, 122, (outs gprc:$rA), (ins gprc:$rS),
- "popcntb $rA, $rS", IIC_IntGeneral, []>;
+def POPCNTB : XForm_11<31, 122, (outs g8rc:$rA), (ins g8rc:$rS),
+ "popcntb $rA, $rS", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_popcntb i64:$rS))]>;
defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divd", "$rT, $rA, $rB", IIC_IntDivD,
@@ -909,6 +937,104 @@ def ISEL8 : AForm_4<31, 15,
} // hasSideEffects = 0
} // End FXU Operations.
+def : InstAlias<"li $rD, $imm", (ADDI8 g8rc:$rD, ZERO8, s16imm64:$imm)>;
+def : InstAlias<"lis $rD, $imm", (ADDIS8 g8rc:$rD, ZERO8, s17imm64:$imm)>;
+
+def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mr. $rA, $rB", (OR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"not. $rA, $rB", (NOR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
+
+def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+
+def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM8 g8rc:$rA, g8rc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINM8_rec g8rc:$rA, g8rc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM8 g8rc:$rA, g8rc:$rS, g8rc:$rB, 0, 31)>;
+def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNM8_rec g8rc:$rA, g8rc:$rS, g8rc:$rB, 0, 31)>;
+def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM8 g8rc:$rA, g8rc:$rS, 0, u5imm:$n, 31)>;
+def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINM8_rec g8rc:$rA, g8rc:$rS, 0, u5imm:$n, 31)>;
+
+def : InstAlias<"isellt $rT, $rA, $rB",
+ (ISEL8 g8rc:$rT, g8rc_nox0:$rA, g8rc:$rB, CR0LT)>;
+def : InstAlias<"iselgt $rT, $rA, $rB",
+ (ISEL8 g8rc:$rT, g8rc_nox0:$rA, g8rc:$rB, CR0GT)>;
+def : InstAlias<"iseleq $rT, $rA, $rB",
+ (ISEL8 g8rc:$rT, g8rc_nox0:$rA, g8rc:$rB, CR0EQ)>;
+
+def : InstAlias<"nop", (ORI8 X0, X0, 0)>;
+def : InstAlias<"xnop", (XORI8 X0, X0, 0)>;
+
+def : InstAlias<"cntlzw $rA, $rS", (CNTLZW8 g8rc:$rA, g8rc:$rS)>;
+def : InstAlias<"cntlzw. $rA, $rS", (CNTLZW8_rec g8rc:$rA, g8rc:$rS)>;
+
+def : InstAlias<"mtxer $Rx", (MTSPR8 1, g8rc:$Rx)>;
+def : InstAlias<"mfxer $Rx", (MFSPR8 g8rc:$Rx, 1)>;
+
+def : InstAlias<"mtudscr $Rx", (MTSPR8 3, g8rc:$Rx)>;
+def : InstAlias<"mfudscr $Rx", (MFSPR8 g8rc:$Rx, 3)>;
+
+def : InstAlias<"mfrtcu $Rx", (MFSPR8 g8rc:$Rx, 4)>;
+def : InstAlias<"mfrtcl $Rx", (MFSPR8 g8rc:$Rx, 5)>;
+
+def : InstAlias<"mtlr $Rx", (MTSPR8 8, g8rc:$Rx)>;
+def : InstAlias<"mflr $Rx", (MFSPR8 g8rc:$Rx, 8)>;
+
+def : InstAlias<"mtctr $Rx", (MTSPR8 9, g8rc:$Rx)>;
+def : InstAlias<"mfctr $Rx", (MFSPR8 g8rc:$Rx, 9)>;
+
+def : InstAlias<"mtuamr $Rx", (MTSPR8 13, g8rc:$Rx)>;
+def : InstAlias<"mfuamr $Rx", (MFSPR8 g8rc:$Rx, 13)>;
+
+def : InstAlias<"mtdscr $Rx", (MTSPR8 17, g8rc:$Rx)>;
+def : InstAlias<"mfdscr $Rx", (MFSPR8 g8rc:$Rx, 17)>;
+
+def : InstAlias<"mtdsisr $Rx", (MTSPR8 18, g8rc:$Rx)>;
+def : InstAlias<"mfdsisr $Rx", (MFSPR8 g8rc:$Rx, 18)>;
+
+def : InstAlias<"mtdar $Rx", (MTSPR8 19, g8rc:$Rx)>;
+def : InstAlias<"mfdar $Rx", (MFSPR8 g8rc:$Rx, 19)>;
+
+def : InstAlias<"mtdec $Rx", (MTSPR8 22, g8rc:$Rx)>;
+def : InstAlias<"mfdec $Rx", (MFSPR8 g8rc:$Rx, 22)>;
+
+def : InstAlias<"mtsdr1 $Rx", (MTSPR8 25, g8rc:$Rx)>;
+def : InstAlias<"mfsdr1 $Rx", (MFSPR8 g8rc:$Rx, 25)>;
+
+def : InstAlias<"mtsrr0 $Rx", (MTSPR8 26, g8rc:$Rx)>;
+def : InstAlias<"mfsrr0 $Rx", (MFSPR8 g8rc:$Rx, 26)>;
+
+def : InstAlias<"mtsrr1 $Rx", (MTSPR8 27, g8rc:$Rx)>;
+def : InstAlias<"mfsrr1 $Rx", (MFSPR8 g8rc:$Rx, 27)>;
+
+def : InstAlias<"mtcfar $Rx", (MTSPR8 28, g8rc:$Rx)>;
+def : InstAlias<"mfcfar $Rx", (MFSPR8 g8rc:$Rx, 28)>;
+
+def : InstAlias<"mtamr $Rx", (MTSPR8 29, g8rc:$Rx)>;
+def : InstAlias<"mfamr $Rx", (MFSPR8 g8rc:$Rx, 29)>;
+
+foreach SPRG = 0-3 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR8 g8rc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR8 g8rc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mfsprg "#SPRG#", $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
+}
+
+def : InstAlias<"mfasr $RT", (MFSPR8 g8rc:$RT, 280)>;
+def : InstAlias<"mtasr $RT", (MTSPR8 280, g8rc:$RT)>;
+
+def : InstAlias<"mttbl $Rx", (MTSPR8 284, g8rc:$Rx)>;
+def : InstAlias<"mttbu $Rx", (MTSPR8 285, g8rc:$Rx)>;
+
+def : InstAlias<"mfpvr $RT", (MFSPR8 g8rc:$RT, 287)>;
+
+def : InstAlias<"mfspefscr $Rx", (MFSPR8 g8rc:$Rx, 512)>;
+def : InstAlias<"mtspefscr $Rx", (MTSPR8 512, g8rc:$Rx)>;
//===----------------------------------------------------------------------===//
// Load/Store instructions.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 6e8635f2413c..920eeed9d41f 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -341,7 +341,7 @@ class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
//===----------------------------------------------------------------------===//
// Instruction Definitions.
-def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">;
+def HasAltivec : Predicate<"Subtarget->hasAltivec()">;
let Predicates = [HasAltivec] in {
def DSS : DSS_Form<0, 822, (outs), (ins u5imm:$STRM),
@@ -491,7 +491,7 @@ let isCommutable = 1 in {
def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vaddfp $vD, $vA, $vB", IIC_VecFP,
[(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
-
+
def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vaddubm $vD, $vA, $vB", IIC_VecGeneral,
[(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
@@ -501,7 +501,7 @@ def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vadduwm $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
-
+
def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>;
@@ -635,7 +635,7 @@ def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub,
def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
v4i32, v8i16>;
} // isCommutable
-
+
def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>;
def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>;
def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>;
@@ -657,7 +657,7 @@ def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vsubuwm $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
-
+
def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>;
@@ -869,6 +869,26 @@ def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
(v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
+// Multiply
+def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>;
+
+// Add
+def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>;
+
+// Saturating adds/subtracts.
+def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
+def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;
+def : Pat<(v8i16 (saddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDSHS $vA, $vB))>;
+def : Pat<(v8i16 (uaddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDUHS $vA, $vB))>;
+def : Pat<(v4i32 (saddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDSWS $vA, $vB))>;
+def : Pat<(v4i32 (uaddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDUWS $vA, $vB))>;
+def : Pat<(v16i8 (ssubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBSBS $vA, $vB))>;
+def : Pat<(v16i8 (usubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBUBS $vA, $vB))>;
+def : Pat<(v8i16 (ssubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBSHS $vA, $vB))>;
+def : Pat<(v8i16 (usubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBUHS $vA, $vB))>;
+def : Pat<(v4i32 (ssubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBSWS $vA, $vB))>;
+def : Pat<(v4i32 (usubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBUWS $vA, $vB))>;
+
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
@@ -1002,14 +1022,9 @@ def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)),
def : Pat<(fmul v4f32:$vA, v4f32:$vB),
(VMADDFP $vA, $vB,
- (v4i32 (VSLW (v4i32 (V_SETALLONES)), (v4i32 (V_SETALLONES)))))>;
+ (v4i32 (VSLW (v4i32 (V_SETALLONES)), (v4i32 (V_SETALLONES)))))>;
-// Fused multiply add and multiply sub for packed float. These are represented
-// separately from the real instructions above, for operations that must have
-// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
-def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
- (VMADDFP $A, $B, $C)>;
-def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C),
(VNMSUBFP $A, $B, $C)>;
def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
@@ -1121,8 +1136,8 @@ def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot_ppc v4i32:$vB)))),
} // end HasAltivec
-def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
-def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
+def HasP8Altivec : Predicate<"Subtarget->hasP8Altivec()">;
+def HasP8Crypto : Predicate<"Subtarget->hasP8Crypto()">;
let Predicates = [HasP8Altivec] in {
let isCommutable = 1 in {
@@ -1143,7 +1158,7 @@ def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
} // isCommutable
-// Vector merge
+// Vector merge
def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmrgew $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
@@ -1251,16 +1266,16 @@ def VPOPCNTD : VXForm_2<1987, (outs vrrc:$vD), (ins vrrc:$vB),
[(set v2i64:$vD, (ctpop v2i64:$vB))]>;
let isCommutable = 1 in {
-// FIXME: Use AddedComplexity > 400 to ensure these patterns match before the
+// FIXME: Use AddedComplexity > 400 to ensure these patterns match before the
// VSX equivalents. We need to fix this up at some point. Two possible
// solutions for this problem:
// 1. Disable Altivec patterns that compete with VSX patterns using the
-// !HasVSX predicate. This essentially favours VSX over Altivec, in
-// hopes of reducing register pressure (larger register set using VSX
+// !HasVSX predicate. This essentially favours VSX over Altivec, in
+// hopes of reducing register pressure (larger register set using VSX
// instructions than VMX instructions)
// 2. Employ a more disciplined use of AddedComplexity, which would provide
// more fine-grained control than option 1. This would be beneficial
-// if we find situations where Altivec is really preferred over VSX.
+// if we find situations where Altivec is really preferred over VSX.
def VEQV : VXForm_1<1668, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"veqv $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (vnot_ppc (xor v4i32:$vA, v4i32:$vB)))]>;
@@ -1339,7 +1354,7 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
} // HasP8Crypto
// The following altivec instructions were introduced in Power ISA 3.0
-def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;
+def HasP9Altivec : Predicate<"Subtarget->hasP9Altivec()">;
let Predicates = [HasP9Altivec] in {
// Vector Multiply-Sum
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 115bd44ea202..632d4d9deb8a 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -39,7 +39,11 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
// Indicate that this instruction is of type X-Form Load or Store
bits<1> XFormMemOp = 0;
- let TSFlags{7} = XFormMemOp;
+ let TSFlags{6} = XFormMemOp;
+
+ // Indicate that this instruction is prefixed.
+ bits<1> Prefixed = 0;
+ let TSFlags{7} = Prefixed;
// Fields used for relation models.
string BaseName = "";
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 6cbf999ca73d..992ad8216f3b 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -13,7 +13,7 @@
-def HasHTM : Predicate<"PPCSubTarget->hasHTM()">;
+def HasHTM : Predicate<"Subtarget->hasHTM()">;
def HTM_get_imm : SDNodeXForm<imm, [{
return getI32Imm (N->getZExtValue(), SDLoc(N));
@@ -169,3 +169,8 @@ def : Pat<(i64 (int_ppc_ttest)),
36, 28)>;
} // [HasHTM]
+
+def : InstAlias<"tend.", (TEND 0)>, Requires<[HasHTM]>;
+def : InstAlias<"tendall.", (TEND 1)>, Requires<[HasHTM]>;
+def : InstAlias<"tsuspend.", (TSR 0)>, Requires<[HasHTM]>;
+def : InstAlias<"tresume.", (TSR 1)>, Requires<[HasHTM]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index d7925befcd37..11c97210ead9 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -19,6 +19,7 @@
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -72,27 +73,6 @@ static cl::opt<bool>
UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
cl::desc("Use the old (incorrect) instruction latency calculation"));
-// Index into the OpcodesForSpill array.
-enum SpillOpcodeKey {
- SOK_Int4Spill,
- SOK_Int8Spill,
- SOK_Float8Spill,
- SOK_Float4Spill,
- SOK_CRSpill,
- SOK_CRBitSpill,
- SOK_VRVectorSpill,
- SOK_VSXVectorSpill,
- SOK_VectorFloat8Spill,
- SOK_VectorFloat4Spill,
- SOK_VRSaveSpill,
- SOK_QuadFloat8Spill,
- SOK_QuadFloat4Spill,
- SOK_QuadBitSpill,
- SOK_SpillToVSR,
- SOK_SPESpill,
- SOK_LastOpcodeSpill // This must be last on the enum.
-};
-
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
@@ -225,13 +205,42 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+/// This is an architecture-specific helper function of reassociateOps.
+/// Set special operand attributes for new instructions after reassociation.
+void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
+ MachineInstr &OldMI2,
+ MachineInstr &NewMI1,
+ MachineInstr &NewMI2) const {
+ // Propagate FP flags from the original instructions.
+ // But clear poison-generating flags because those may not be valid now.
+ uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+ NewMI1.setFlags(IntersectedFlags);
+ NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
+
+ NewMI2.setFlags(IntersectedFlags);
+ NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
+}
+
+void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI,
+ uint16_t Flags) const {
+ MI.setFlags(Flags);
+ MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ MI.clearFlag(MachineInstr::MIFlag::IsExact);
+}
+
// This function does not list all associative and commutative operations, but
// only those worth feeding through the machine combiner in an attempt to
// reduce the critical path. Mostly, this means floating-point operations,
-// because they have high latencies (compared to other operations, such and
+// because they have high latencies(>=5) (compared to other operations, such as
// and/or, which are also associative and commutative, but have low latencies).
bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
switch (Inst.getOpcode()) {
+ // Floating point:
// FP Add:
case PPC::FADD:
case PPC::FADDS:
@@ -258,12 +267,157 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case PPC::QVFMUL:
case PPC::QVFMULS:
case PPC::QVFMULSs:
+ return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Inst.getFlag(MachineInstr::MIFlag::FmNsz);
+ // Fixed point:
+ // Multiply:
+ case PPC::MULHD:
+ case PPC::MULLD:
+ case PPC::MULHW:
+ case PPC::MULLW:
return true;
default:
return false;
}
}
+#define InfoArrayIdxFMAInst 0
+#define InfoArrayIdxFAddInst 1
+#define InfoArrayIdxFMULInst 2
+#define InfoArrayIdxAddOpIdx 3
+#define InfoArrayIdxMULOpIdx 4
+// Array keeps info for FMA instructions:
+// Index 0(InfoArrayIdxFMAInst): FMA instruction;
+// Index 1(InfoArrayIdxFAddInst): ADD instruction assoaicted with FMA;
+// Index 2(InfoArrayIdxFMULInst): MUL instruction assoaicted with FMA;
+// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
+// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
+// second MUL operand index is plus 1.
+static const uint16_t FMAOpIdxInfo[][5] = {
+ // FIXME: Add more FMA instructions like XSNMADDADP and so on.
+ {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2},
+ {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2},
+ {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2},
+ {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2},
+ {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1},
+ {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1},
+ {PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1},
+ {PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}};
+
+// Check if an opcode is a FMA instruction. If it is, return the index in array
+// FMAOpIdxInfo. Otherwise, return -1.
+int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
+ for (unsigned I = 0; I < array_lengthof(FMAOpIdxInfo); I++)
+ if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
+ return I;
+ return -1;
+}
+
+// Try to reassociate FMA chains like below:
+//
+// Pattern 1:
+// A = FADD X, Y (Leaf)
+// B = FMA A, M21, M22 (Prev)
+// C = FMA B, M31, M32 (Root)
+// -->
+// A = FMA X, M21, M22
+// B = FMA Y, M31, M32
+// C = FADD A, B
+//
+// Pattern 2:
+// A = FMA X, M11, M12 (Leaf)
+// B = FMA A, M21, M22 (Prev)
+// C = FMA B, M31, M32 (Root)
+// -->
+// A = FMUL M11, M12
+// B = FMA X, M21, M22
+// D = FMA A, M31, M32
+// C = FADD B, D
+//
+// breaking the dependency between A and B, allowing FMA to be executed in
+// parallel (or back-to-back in a pipeline) instead of depending on each other.
+bool PPCInstrInfo::getFMAPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ MachineBasicBlock *MBB = Root.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
+ for (const auto &MO : Instr.explicit_operands())
+ if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
+ return false;
+ return true;
+ };
+
+ auto IsReassociable = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
+ bool IsLeaf, bool IsAdd) {
+ int16_t Idx = -1;
+ if (!IsAdd) {
+ Idx = getFMAOpIdxInfo(Instr.getOpcode());
+ if (Idx < 0)
+ return false;
+ } else if (Instr.getOpcode() !=
+ FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())]
+ [InfoArrayIdxFAddInst])
+ return false;
+
+ // Instruction can be reassociated.
+ // fast math flags may prohibit reassociation.
+ if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
+ return false;
+
+ // Instruction operands are virtual registers for reassociation.
+ if (!IsAllOpsVirtualReg(Instr))
+ return false;
+
+ if (IsAdd && IsLeaf)
+ return true;
+
+ AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
+
+ const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
+ MachineInstr *MIAdd = MRI.getUniqueVRegDef(OpAdd.getReg());
+ // If 'add' operand's def is not in current block, don't do ILP related opt.
+ if (!MIAdd || MIAdd->getParent() != MBB)
+ return false;
+
+ // If this is not Leaf FMA Instr, its 'add' operand should only have one use
+ // as this fma will be changed later.
+ return IsLeaf ? true : MRI.hasOneNonDBGUse(OpAdd.getReg());
+ };
+
+ int16_t AddOpIdx = -1;
+ // Root must be a valid FMA like instruction.
+ if (!IsReassociable(Root, AddOpIdx, false, false))
+ return false;
+
+ assert((AddOpIdx >= 0) && "add operand index not right!");
+
+ Register RegB = Root.getOperand(AddOpIdx).getReg();
+ MachineInstr *Prev = MRI.getUniqueVRegDef(RegB);
+
+ // Prev must be a valid FMA like instruction.
+ AddOpIdx = -1;
+ if (!IsReassociable(*Prev, AddOpIdx, false, false))
+ return false;
+
+ assert((AddOpIdx >= 0) && "add operand index not right!");
+
+ Register RegA = Prev->getOperand(AddOpIdx).getReg();
+ MachineInstr *Leaf = MRI.getUniqueVRegDef(RegA);
+ AddOpIdx = -1;
+ if (IsReassociable(*Leaf, AddOpIdx, true, false)) {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
+ return true;
+ }
+ if (IsReassociable(*Leaf, AddOpIdx, true, true)) {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM);
+ return true;
+ }
+ return false;
+}
+
bool PPCInstrInfo::getMachineCombinerPatterns(
MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
@@ -272,16 +426,201 @@ bool PPCInstrInfo::getMachineCombinerPatterns(
if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
return false;
- // FP reassociation is only legal when we don't need strict IEEE semantics.
- if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
- return false;
+ if (getFMAPatterns(Root, Patterns))
+ return true;
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
}
+void PPCInstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
+ case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
+ break;
+ default:
+ // Reassociate default patterns.
+ TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
+ DelInstrs, InstrIdxForVirtReg);
+ break;
+ }
+}
+
+// Currently, only handle two patterns REASSOC_XY_AMM_BMM and
+// REASSOC_XMM_AMM_BMM. See comments for getFMAPatterns.
+void PPCInstrInfo::reassociateFMA(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ MachineFunction *MF = Root.getMF();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineOperand &OpC = Root.getOperand(0);
+ Register RegC = OpC.getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(RegC);
+ MRI.constrainRegClass(RegC, RC);
+
+ unsigned FmaOp = Root.getOpcode();
+ int16_t Idx = getFMAOpIdxInfo(FmaOp);
+ assert(Idx >= 0 && "Root must be a FMA instruction");
+
+ uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
+ uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
+ MachineInstr *Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
+ MachineInstr *Leaf =
+ MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
+ uint16_t IntersectedFlags =
+ Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
+
+ auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
+ bool &KillFlag) {
+ Reg = Operand.getReg();
+ MRI.constrainRegClass(Reg, RC);
+ KillFlag = Operand.isKill();
+ };
+
+ auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
+ Register &MulOp2, bool &MulOp1KillFlag,
+ bool &MulOp2KillFlag) {
+ GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
+ GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
+ };
+
+ Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32;
+ bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
+ KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false;
+
+ GetFMAInstrInfo(Root, RegM31, RegM32, KillM31, KillM32);
+ GetFMAInstrInfo(*Prev, RegM21, RegM22, KillM21, KillM22);
+
+ if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ GetFMAInstrInfo(*Leaf, RegM11, RegM12, KillM11, KillM12);
+ GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
+ } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
+ GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
+ GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
+ }
+
+ // Create new virtual registers for the new results instead of
+ // recycling legacy ones because the MachineCombiner's computation of the
+ // critical path requires a new register definition rather than an existing
+ // one.
+ Register NewVRA = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
+
+ Register NewVRB = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
+
+ Register NewVRD = 0;
+ if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ NewVRD = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
+ }
+
+ auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
+ Register RegMul1, bool KillRegMul1,
+ Register RegMul2, bool KillRegMul2) {
+ MI->getOperand(AddOpIdx).setReg(RegAdd);
+ MI->getOperand(AddOpIdx).setIsKill(KillAdd);
+ MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
+ MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
+ MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
+ MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
+ };
+
+ if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
+ // Create new instructions for insertion.
+ MachineInstrBuilder MINewB =
+ BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegM21, getKillRegState(KillM21))
+ .addReg(RegM22, getKillRegState(KillM22));
+ MachineInstrBuilder MINewA =
+ BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
+ .addReg(RegY, getKillRegState(KillY))
+ .addReg(RegM31, getKillRegState(KillM31))
+ .addReg(RegM32, getKillRegState(KillM32));
+ // If AddOpIdx is not 1, adjust the order.
+ if (AddOpIdx != 1) {
+ AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
+ AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
+ }
+
+ MachineInstrBuilder MINewC =
+ BuildMI(*MF, Root.getDebugLoc(),
+ get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)
+ .addReg(NewVRB, getKillRegState(true))
+ .addReg(NewVRA, getKillRegState(true));
+
+ // Update flags for newly created instructions.
+ setSpecialOperandAttr(*MINewA, IntersectedFlags);
+ setSpecialOperandAttr(*MINewB, IntersectedFlags);
+ setSpecialOperandAttr(*MINewC, IntersectedFlags);
+
+ // Record new instructions for insertion.
+ InsInstrs.push_back(MINewA);
+ InsInstrs.push_back(MINewB);
+ InsInstrs.push_back(MINewC);
+ } else if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ assert(NewVRD && "new FMA register not created!");
+ // Create new instructions for insertion.
+ MachineInstrBuilder MINewA =
+ BuildMI(*MF, Leaf->getDebugLoc(),
+ get(FMAOpIdxInfo[Idx][InfoArrayIdxFMULInst]), NewVRA)
+ .addReg(RegM11, getKillRegState(KillM11))
+ .addReg(RegM12, getKillRegState(KillM12));
+ MachineInstrBuilder MINewB =
+ BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegM21, getKillRegState(KillM21))
+ .addReg(RegM22, getKillRegState(KillM22));
+ MachineInstrBuilder MINewD =
+ BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
+ .addReg(NewVRA, getKillRegState(true))
+ .addReg(RegM31, getKillRegState(KillM31))
+ .addReg(RegM32, getKillRegState(KillM32));
+ // If AddOpIdx is not 1, adjust the order.
+ if (AddOpIdx != 1) {
+ AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
+ AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
+ KillM32);
+ }
+
+ MachineInstrBuilder MINewC =
+ BuildMI(*MF, Root.getDebugLoc(),
+ get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)
+ .addReg(NewVRB, getKillRegState(true))
+ .addReg(NewVRD, getKillRegState(true));
+
+ // Update flags for newly created instructions.
+ setSpecialOperandAttr(*MINewA, IntersectedFlags);
+ setSpecialOperandAttr(*MINewB, IntersectedFlags);
+ setSpecialOperandAttr(*MINewD, IntersectedFlags);
+ setSpecialOperandAttr(*MINewC, IntersectedFlags);
+
+ // Record new instructions for insertion.
+ InsInstrs.push_back(MINewA);
+ InsInstrs.push_back(MINewB);
+ InsInstrs.push_back(MINewD);
+ InsInstrs.push_back(MINewC);
+ }
+
+ assert(!InsInstrs.empty() &&
+ "Insertion instructions set should not be empty!");
+
+ // Record old instructions for deletion.
+ DelInstrs.push_back(Leaf);
+ DelInstrs.push_back(Prev);
+ DelInstrs.push_back(&Root);
+}
+
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
+ Register &SrcReg, Register &DstReg,
unsigned &SubIdx) const {
switch (MI.getOpcode()) {
default: return false;
@@ -753,9 +1092,10 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Select analysis.
bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
- ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+ ArrayRef<MachineOperand> Cond,
+ Register DstReg, Register TrueReg,
+ Register FalseReg, int &CondCycles,
+ int &TrueCycles, int &FalseCycles) const {
if (Cond.size() != 2)
return false;
@@ -791,9 +1131,9 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const DebugLoc &dl, unsigned DestReg,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg) const {
+ const DebugLoc &dl, Register DestReg,
+ ArrayRef<MachineOperand> Cond, Register TrueReg,
+ Register FalseReg) const {
assert(Cond.size() == 2 &&
"PPC branch conditions have two components!");
@@ -852,7 +1192,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
}
- unsigned FirstReg = SwapOps ? FalseReg : TrueReg,
+ Register FirstReg = SwapOps ? FalseReg : TrueReg,
SecondReg = SwapOps ? TrueReg : FalseReg;
// The first input register of isel cannot be r0. If it is a member
@@ -863,7 +1203,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
const TargetRegisterClass *FirstRC =
MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
&PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
- unsigned OldFirstReg = FirstReg;
+ Register OldFirstReg = FirstReg;
FirstReg = MRI.createVirtualRegister(FirstRC);
BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
.addReg(OldFirstReg);
@@ -1024,183 +1364,66 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
}
-unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg,
- const TargetRegisterClass *RC)
- const {
- const unsigned *OpcodesForSpill = getStoreOpcodesForSpillArray();
+static unsigned getSpillIndex(const TargetRegisterClass *RC) {
int OpcodeIndex = 0;
- if (RC != nullptr) {
- if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
- PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Int4Spill;
- } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
- PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Int8Spill;
- } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Float8Spill;
- } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Float4Spill;
- } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SPESpill;
- } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_CRSpill;
- } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_CRBitSpill;
- } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VRVectorSpill;
- } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VSXVectorSpill;
- } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VectorFloat8Spill;
- } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VectorFloat4Spill;
- } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VRSaveSpill;
- } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat8Spill;
- } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat4Spill;
- } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadBitSpill;
- } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SpillToVSR;
- } else {
- llvm_unreachable("Unknown regclass!");
- }
+ if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_Int4Spill;
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_Int8Spill;
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_Float8Spill;
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_Float4Spill;
+ } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_SPESpill;
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_CRSpill;
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_CRBitSpill;
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_VRVectorSpill;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_VSXVectorSpill;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_VectorFloat8Spill;
+ } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_VectorFloat4Spill;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_VRSaveSpill;
+ } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_QuadFloat8Spill;
+ } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_QuadFloat4Spill;
+ } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_QuadBitSpill;
+ } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_SpillToVSR;
} else {
- if (PPC::GPRCRegClass.contains(Reg) ||
- PPC::GPRC_NOR0RegClass.contains(Reg)) {
- OpcodeIndex = SOK_Int4Spill;
- } else if (PPC::G8RCRegClass.contains(Reg) ||
- PPC::G8RC_NOX0RegClass.contains(Reg)) {
- OpcodeIndex = SOK_Int8Spill;
- } else if (PPC::F8RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_Float8Spill;
- } else if (PPC::F4RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_Float4Spill;
- } else if (PPC::SPERCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SPESpill;
- } else if (PPC::CRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_CRSpill;
- } else if (PPC::CRBITRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_CRBitSpill;
- } else if (PPC::VRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VRVectorSpill;
- } else if (PPC::VSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VSXVectorSpill;
- } else if (PPC::VSFRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VectorFloat8Spill;
- } else if (PPC::VSSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VectorFloat4Spill;
- } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VRSaveSpill;
- } else if (PPC::QFRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadFloat8Spill;
- } else if (PPC::QSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadFloat4Spill;
- } else if (PPC::QBRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadBitSpill;
- } else if (PPC::SPILLTOVSRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SpillToVSR;
- } else {
- llvm_unreachable("Unknown regclass!");
- }
+ llvm_unreachable("Unknown regclass!");
}
- return OpcodesForSpill[OpcodeIndex];
+ return OpcodeIndex;
}
unsigned
-PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg,
- const TargetRegisterClass *RC) const {
- const unsigned *OpcodesForSpill = getLoadOpcodesForSpillArray();
- int OpcodeIndex = 0;
+PPCInstrInfo::getStoreOpcodeForSpill(const TargetRegisterClass *RC) const {
+ const unsigned *OpcodesForSpill = getStoreOpcodesForSpillArray();
+ return OpcodesForSpill[getSpillIndex(RC)];
+}
- if (RC != nullptr) {
- if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
- PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Int4Spill;
- } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
- PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Int8Spill;
- } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Float8Spill;
- } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Float4Spill;
- } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SPESpill;
- } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_CRSpill;
- } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_CRBitSpill;
- } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VRVectorSpill;
- } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VSXVectorSpill;
- } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VectorFloat8Spill;
- } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VectorFloat4Spill;
- } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VRSaveSpill;
- } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat8Spill;
- } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat4Spill;
- } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadBitSpill;
- } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SpillToVSR;
- } else {
- llvm_unreachable("Unknown regclass!");
- }
- } else {
- if (PPC::GPRCRegClass.contains(Reg) ||
- PPC::GPRC_NOR0RegClass.contains(Reg)) {
- OpcodeIndex = SOK_Int4Spill;
- } else if (PPC::G8RCRegClass.contains(Reg) ||
- PPC::G8RC_NOX0RegClass.contains(Reg)) {
- OpcodeIndex = SOK_Int8Spill;
- } else if (PPC::F8RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_Float8Spill;
- } else if (PPC::F4RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_Float4Spill;
- } else if (PPC::SPERCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SPESpill;
- } else if (PPC::CRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_CRSpill;
- } else if (PPC::CRBITRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_CRBitSpill;
- } else if (PPC::VRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VRVectorSpill;
- } else if (PPC::VSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VSXVectorSpill;
- } else if (PPC::VSFRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VectorFloat8Spill;
- } else if (PPC::VSSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VectorFloat4Spill;
- } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VRSaveSpill;
- } else if (PPC::QFRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadFloat8Spill;
- } else if (PPC::QSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadFloat4Spill;
- } else if (PPC::QBRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadBitSpill;
- } else if (PPC::SPILLTOVSRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SpillToVSR;
- } else {
- llvm_unreachable("Unknown regclass!");
- }
- }
- return OpcodesForSpill[OpcodeIndex];
+unsigned
+PPCInstrInfo::getLoadOpcodeForSpill(const TargetRegisterClass *RC) const {
+ const unsigned *OpcodesForSpill = getLoadOpcodesForSpillArray();
+ return OpcodesForSpill[getSpillIndex(RC)];
}
void PPCInstrInfo::StoreRegToStackSlot(
MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr *> &NewMIs) const {
- unsigned Opcode = getStoreOpcodeForSpill(PPC::NoRegister, RC);
+ unsigned Opcode = getStoreOpcodeForSpill(RC);
DebugLoc DL;
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -1221,24 +1444,13 @@ void PPCInstrInfo::StoreRegToStackSlot(
FuncInfo->setHasNonRISpills();
}
-void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill,
- int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void PPCInstrInfo::storeRegToStackSlotNoUpd(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg,
+ bool isKill, int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr *, 4> NewMIs;
- // We need to avoid a situation in which the value from a VRRC register is
- // spilled using an Altivec instruction and reloaded into a VSRC register
- // using a VSX instruction. The issue with this is that the VSX
- // load/store instructions swap the doublewords in the vector and the Altivec
- // ones don't. The register classes on the spill/reload may be different if
- // the register is defined using an Altivec instruction and is then used by a
- // VSX instruction.
- RC = updatedRC(RC);
-
StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
@@ -1248,16 +1460,33 @@ void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdx),
MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MFI.getObjectAlign(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
}
+void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ Register SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ // We need to avoid a situation in which the value from a VRRC register is
+ // spilled using an Altivec instruction and reloaded into a VSRC register
+ // using a VSX instruction. The issue with this is that the VSX
+ // load/store instructions swap the doublewords in the vector and the Altivec
+ // ones don't. The register classes on the spill/reload may be different if
+ // the register is defined using an Altivec instruction and is then used by a
+ // VSX instruction.
+ RC = updatedRC(RC);
+ storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
+}
+
void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr *> &NewMIs)
const {
- unsigned Opcode = getLoadOpcodeForSpill(PPC::NoRegister, RC);
+ unsigned Opcode = getLoadOpcodeForSpill(RC);
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
FrameIdx));
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -1273,12 +1502,10 @@ void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
FuncInfo->setHasNonRISpills();
}
-void
-PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void PPCInstrInfo::loadRegFromStackSlotNoUpd(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg,
+ int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
@@ -1287,16 +1514,6 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setHasSpills();
- // We need to avoid a situation in which the value from a VRRC register is
- // spilled using an Altivec instruction and reloaded into a VSRC register
- // using a VSX instruction. The issue with this is that the VSX
- // load/store instructions swap the doublewords in the vector and the Altivec
- // ones don't. The register classes on the spill/reload may be different if
- // the register is defined using an Altivec instruction and is then used by a
- // VSX instruction.
- if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
- RC = &PPC::VSRCRegClass;
-
LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
@@ -1306,10 +1523,27 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdx),
MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MFI.getObjectAlign(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
}
+void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ Register DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ // We need to avoid a situation in which the value from a VRRC register is
+ // spilled using an Altivec instruction and reloaded into a VSRC register
+ // using a VSX instruction. The issue with this is that the VSX
+ // load/store instructions swap the doublewords in the vector and the Altivec
+ // ones don't. The register classes on the spill/reload may be different if
+ // the register is defined using an Altivec instruction and is then used by a
+ // VSX instruction.
+ RC = updatedRC(RC);
+
+ loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
+}
+
bool PPCInstrInfo::
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
@@ -1321,9 +1555,11 @@ reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
-bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
- unsigned Reg, MachineRegisterInfo *MRI) const {
- // For some instructions, it is legal to fold ZERO into the RA register field.
+// For some instructions, it is legal to fold ZERO into the RA register field.
+// This function performs that fold by replacing the operand with PPC::ZERO,
+// it does not consider whether the load immediate zero is no longer in use.
+bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ Register Reg) const {
// A zero immediate should always be loaded with a single li.
unsigned DefOpc = DefMI.getOpcode();
if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
@@ -1343,6 +1579,8 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (UseMCID.isPseudo())
return false;
+ // We need to find which of the User's operands is to be folded, that will be
+ // the operand that matches the given register ID.
unsigned UseIdx;
for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
if (UseMI.getOperand(UseIdx).isReg() &&
@@ -1371,7 +1609,7 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (UseInfo->Constraints != 0)
return false;
- unsigned ZeroReg;
+ MCRegister ZeroReg;
if (UseInfo->isLookupPtrRegClass()) {
bool isPPC64 = Subtarget.isPPC64();
ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
@@ -1380,13 +1618,19 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
PPC::ZERO8 : PPC::ZERO;
}
- bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
UseMI.getOperand(UseIdx).setReg(ZeroReg);
+ return true;
+}
- if (DeleteDef)
+// Folds zero into instructions which have a load immediate zero as an operand
+// but also recognize zero as immediate zero. If the definition of the load
+// has no more users it is deleted.
+bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ Register Reg, MachineRegisterInfo *MRI) const {
+ bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
+ if (MRI->use_nodbg_empty(Reg))
DefMI.eraseFromParent();
-
- return true;
+ return Changed;
}
static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
@@ -1423,17 +1667,6 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const {
return false;
}
-bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
- if (!MI.isTerminator())
- return false;
-
- // Conditional branch is a special case.
- if (MI.isBranch() && !MI.isBarrier())
- return true;
-
- return !isPredicated(MI);
-}
-
bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const {
unsigned OpC = MI.getOpcode();
@@ -1587,8 +1820,8 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI,
return Found;
}
-bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &Mask,
+bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &Mask,
int &Value) const {
unsigned Opc = MI.getOpcode();
@@ -1617,8 +1850,8 @@ bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
}
}
-bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int Mask, int Value,
+bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int Mask, int Value,
const MachineRegisterInfo *MRI) const {
if (DisableCmpOpt)
return false;
@@ -1646,8 +1879,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
// Look through copies unless that gets us to a physical register.
- unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
- if (Register::isVirtualRegister(ActualSrc))
+ Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
+ if (ActualSrc.isVirtual())
SrcReg = ActualSrc;
// Get the unique definition of SrcReg.
@@ -2036,8 +2269,8 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_PLT, "ppc-plt"},
{MO_PIC_FLAG, "ppc-pic"},
- {MO_NLP_FLAG, "ppc-nlp"},
- {MO_NLP_HIDDEN_FLAG, "ppc-nlp-hidden"}};
+ {MO_PCREL_FLAG, "ppc-pcrel"},
+ {MO_GOT_FLAG, "ppc-got"}};
return makeArrayRef(TargetFlags);
}
@@ -2330,7 +2563,8 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
const TargetRegisterInfo *TRI = &getRegisterInfo();
// If we're in SSA, get the defs through the MRI. Otherwise, only look
- // within the basic block to see if the register is defined using an LI/LI8.
+ // within the basic block to see if the register is defined using an
+ // LI/LI8/ADDI/ADDI8.
if (MRI->isSSA()) {
for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
if (!MI.getOperand(i).isReg())
@@ -2341,9 +2575,16 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI);
if (Register::isVirtualRegister(TrueReg)) {
DefMI = MRI->getVRegDef(TrueReg);
- if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
+ if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8 ||
+ DefMI->getOpcode() == PPC::ADDI ||
+ DefMI->getOpcode() == PPC::ADDI8) {
OpNoForForwarding = i;
- break;
+ // The ADDI and LI operand maybe exist in one instruction at same
+ // time. we prefer to fold LI operand as LI only has one Imm operand
+ // and is more possible to be converted. So if current DefMI is
+ // ADDI/ADDI8, we continue to find possible LI/LI8.
+ if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
+ break;
}
}
}
@@ -2400,44 +2641,20 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
return OpNoForForwarding == ~0U ? nullptr : DefMI;
}
+unsigned PPCInstrInfo::getSpillTarget() const {
+ return Subtarget.hasP9Vector() ? 1 : 0;
+}
+
const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const {
- static const unsigned OpcodesForSpill[2][SOK_LastOpcodeSpill] = {
- // Power 8
- {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR,
- PPC::SPILL_CRBIT, PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX,
- PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb,
- PPC::SPILLTOVSR_ST, PPC::EVSTDD},
- // Power 9
- {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR,
- PPC::SPILL_CRBIT, PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32,
- PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb,
- PPC::SPILLTOVSR_ST}};
-
- return OpcodesForSpill[(Subtarget.hasP9Vector()) ? 1 : 0];
+ return StoreSpillOpcodesArray[getSpillTarget()];
}
const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
- static const unsigned OpcodesForSpill[2][SOK_LastOpcodeSpill] = {
- // Power 8
- {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,
- PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX,
- PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb,
- PPC::SPILLTOVSR_LD, PPC::EVLDD},
- // Power 9
- {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,
- PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, PPC::DFLOADf32,
- PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb,
- PPC::SPILLTOVSR_LD}};
-
- return OpcodesForSpill[(Subtarget.hasP9Vector()) ? 1 : 0];
+ return LoadSpillOpcodesArray[getSpillTarget()];
}
void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
unsigned RegNo) const {
- const MachineRegisterInfo &MRI =
- StartMI.getParent()->getParent()->getRegInfo();
- if (MRI.isSSA())
- return;
// Instructions between [StartMI, EndMI] should be in same basic block.
assert((StartMI.getParent() == EndMI.getParent()) &&
@@ -2588,6 +2805,13 @@ bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const {
return true;
return false;
};
+
+ // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
+ // treated as special zero when ScaleReg is R0/X0 register.
+ if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
+ (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
+ return false;
+
// Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
// and Imm Instr.
if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
@@ -2750,10 +2974,16 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
"The forwarding operand needs to be valid at this point");
bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
- Register ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg();
if (KilledDef && KillFwdDefMI)
*KilledDef = DefMI;
+ // If this is a imm instruction and its register operands is produced by ADDI,
+ // put the imm into imm inst directly.
+ if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
+ PPC::INSTRUCTION_LIST_END &&
+ transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
+ return true;
+
ImmInstrInfo III;
bool IsVFReg = MI.getOperand(0).isReg()
? isVFRegister(MI.getOperand(0).getReg())
@@ -2767,228 +2997,17 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
KillFwdDefMI))
return true;
- if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) ||
- !DefMI->getOperand(1).isImm())
- return false;
-
- int64_t Immediate = DefMI->getOperand(1).getImm();
- // Sign-extend to 64-bits.
- int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
- (Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
-
// If this is a reg+reg instruction that has a reg+imm form,
// and one of the operands is produced by LI, convert it now.
- if (HasImmForm)
- return transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI, SExtImm);
-
- bool ReplaceWithLI = false;
- bool Is64BitLI = false;
- int64_t NewImm = 0;
- bool SetCR = false;
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
- default: return false;
-
- // FIXME: Any branches conditional on such a comparison can be made
- // unconditional. At this time, this happens too infrequently to be worth
- // the implementation effort, but if that ever changes, we could convert
- // such a pattern here.
- case PPC::CMPWI:
- case PPC::CMPLWI:
- case PPC::CMPDI:
- case PPC::CMPLDI: {
- // Doing this post-RA would require dataflow analysis to reliably find uses
- // of the CR register set by the compare.
- // No need to fixup killed/dead flag since this transformation is only valid
- // before RA.
- if (PostRA)
- return false;
- // If a compare-immediate is fed by an immediate and is itself an input of
- // an ISEL (the most common case) into a COPY of the correct register.
- bool Changed = false;
- Register DefReg = MI.getOperand(0).getReg();
- int64_t Comparand = MI.getOperand(2).getImm();
- int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ?
- (Comparand | 0xFFFFFFFFFFFF0000) : Comparand;
-
- for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
- unsigned UseOpc = CompareUseMI.getOpcode();
- if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
- continue;
- unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
- Register TrueReg = CompareUseMI.getOperand(1).getReg();
- Register FalseReg = CompareUseMI.getOperand(2).getReg();
- unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg,
- FalseReg, CRSubReg);
- if (RegToCopy == PPC::NoRegister)
- continue;
- // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
- if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
- CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
- replaceInstrOperandWithImm(CompareUseMI, 1, 0);
- CompareUseMI.RemoveOperand(3);
- CompareUseMI.RemoveOperand(2);
- continue;
- }
- LLVM_DEBUG(
- dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
- LLVM_DEBUG(DefMI->dump(); MI.dump(); CompareUseMI.dump());
- LLVM_DEBUG(dbgs() << "Is converted to:\n");
- // Convert to copy and remove unneeded operands.
- CompareUseMI.setDesc(get(PPC::COPY));
- CompareUseMI.RemoveOperand(3);
- CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1);
- CmpIselsConverted++;
- Changed = true;
- LLVM_DEBUG(CompareUseMI.dump());
- }
- if (Changed)
- return true;
- // This may end up incremented multiple times since this function is called
- // during a fixed-point transformation, but it is only meant to indicate the
- // presence of this opportunity.
- MissedConvertibleImmediateInstrs++;
- return false;
- }
-
- // Immediate forms - may simply be convertable to an LI.
- case PPC::ADDI:
- case PPC::ADDI8: {
- // Does the sum fit in a 16-bit signed field?
- int64_t Addend = MI.getOperand(2).getImm();
- if (isInt<16>(Addend + SExtImm)) {
- ReplaceWithLI = true;
- Is64BitLI = Opc == PPC::ADDI8;
- NewImm = Addend + SExtImm;
- break;
- }
- return false;
- }
- case PPC::RLDICL:
- case PPC::RLDICL_rec:
- case PPC::RLDICL_32:
- case PPC::RLDICL_32_64: {
- // Use APInt's rotate function.
- int64_t SH = MI.getOperand(2).getImm();
- int64_t MB = MI.getOperand(3).getImm();
- APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
- SExtImm, true);
- InVal = InVal.rotl(SH);
- uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
- InVal &= Mask;
- // Can't replace negative values with an LI as that will sign-extend
- // and not clear the left bits. If we're setting the CR bit, we will use
- // ANDI_rec which won't sign extend, so that's safe.
- if (isUInt<15>(InVal.getSExtValue()) ||
- (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
- ReplaceWithLI = true;
- Is64BitLI = Opc != PPC::RLDICL_32;
- NewImm = InVal.getSExtValue();
- SetCR = Opc == PPC::RLDICL_rec;
- break;
- }
- return false;
- }
- case PPC::RLWINM:
- case PPC::RLWINM8:
- case PPC::RLWINM_rec:
- case PPC::RLWINM8_rec: {
- int64_t SH = MI.getOperand(2).getImm();
- int64_t MB = MI.getOperand(3).getImm();
- int64_t ME = MI.getOperand(4).getImm();
- APInt InVal(32, SExtImm, true);
- InVal = InVal.rotl(SH);
- // Set the bits ( MB + 32 ) to ( ME + 32 ).
- uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
- InVal &= Mask;
- // Can't replace negative values with an LI as that will sign-extend
- // and not clear the left bits. If we're setting the CR bit, we will use
- // ANDI_rec which won't sign extend, so that's safe.
- bool ValueFits = isUInt<15>(InVal.getSExtValue());
- ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
- isUInt<16>(InVal.getSExtValue()));
- if (ValueFits) {
- ReplaceWithLI = true;
- Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
- NewImm = InVal.getSExtValue();
- SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
- break;
- }
- return false;
- }
- case PPC::ORI:
- case PPC::ORI8:
- case PPC::XORI:
- case PPC::XORI8: {
- int64_t LogicalImm = MI.getOperand(2).getImm();
- int64_t Result = 0;
- if (Opc == PPC::ORI || Opc == PPC::ORI8)
- Result = LogicalImm | SExtImm;
- else
- Result = LogicalImm ^ SExtImm;
- if (isInt<16>(Result)) {
- ReplaceWithLI = true;
- Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
- NewImm = Result;
- break;
- }
- return false;
- }
- }
-
- if (ReplaceWithLI) {
- // We need to be careful with CR-setting instructions we're replacing.
- if (SetCR) {
- // We don't know anything about uses when we're out of SSA, so only
- // replace if the new immediate will be reproduced.
- bool ImmChanged = (SExtImm & NewImm) != NewImm;
- if (PostRA && ImmChanged)
- return false;
-
- if (!PostRA) {
- // If the defining load-immediate has no other uses, we can just replace
- // the immediate with the new immediate.
- if (MRI->hasOneUse(DefMI->getOperand(0).getReg()))
- DefMI->getOperand(1).setImm(NewImm);
-
- // If we're not using the GPR result of the CR-setting instruction, we
- // just need to and with zero/non-zero depending on the new immediate.
- else if (MRI->use_empty(MI.getOperand(0).getReg())) {
- if (NewImm) {
- assert(Immediate && "Transformation converted zero to non-zero?");
- NewImm = Immediate;
- }
- }
- else if (ImmChanged)
- return false;
- }
- }
-
- LLVM_DEBUG(dbgs() << "Replacing instruction:\n");
- LLVM_DEBUG(MI.dump());
- LLVM_DEBUG(dbgs() << "Fed by:\n");
- LLVM_DEBUG(DefMI->dump());
- LoadImmediateInfo LII;
- LII.Imm = NewImm;
- LII.Is64Bit = Is64BitLI;
- LII.SetCR = SetCR;
- // If we're setting the CR, the original load-immediate must be kept (as an
- // operand to ANDI_rec/ANDI8_rec).
- if (KilledDef && SetCR)
- *KilledDef = nullptr;
- replaceInstrWithLI(MI, LII);
-
- // Fixup killed/dead flag after transformation.
- // Pattern:
- // ForwardingOperandReg = LI imm1
- // y = op2 imm2, ForwardingOperandReg(killed)
- if (IsForwardingOperandKilled)
- fixupIsDeadOrKill(*DefMI, MI, ForwardingOperandReg);
+ if (HasImmForm &&
+ transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
+ return true;
- LLVM_DEBUG(dbgs() << "With:\n");
- LLVM_DEBUG(MI.dump());
+ // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
+ // can be simpified to LI.
+ if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
return true;
- }
+
return false;
}
@@ -3505,6 +3524,10 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
RegMO = &DefMI.getOperand(1);
ImmMO = &DefMI.getOperand(2);
+ // Before RA, ADDI first operand could be a frame index.
+ if (!RegMO->isReg())
+ return false;
+
// This DefMI is elgible for forwarding if it is:
// 1. add inst
// 2. one of the operands is Imm/CPI/Global.
@@ -3553,7 +3576,8 @@ bool PPCInstrInfo::isRegElgibleForForwarding(
bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
const MachineInstr &DefMI,
const ImmInstrInfo &III,
- int64_t &Imm) const {
+ int64_t &Imm,
+ int64_t BaseImm) const {
assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
if (DefMI.getOpcode() == PPC::ADDItocL) {
// The operand for ADDItocL is CPI, which isn't imm at compiling time,
@@ -3567,19 +3591,21 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
// not just an immediate but also a multiple of 4, or 16 depending on the
// load. A DForm load cannot be represented if it is a multiple of say 2.
// XForm loads do not have this restriction.
- if (ImmMO.isGlobal() &&
- ImmMO.getGlobal()->getAlignment() < III.ImmMustBeMultipleOf)
- return false;
+ if (ImmMO.isGlobal()) {
+ const DataLayout &DL = ImmMO.getGlobal()->getParent()->getDataLayout();
+ if (ImmMO.getGlobal()->getPointerAlignment(DL) < III.ImmMustBeMultipleOf)
+ return false;
+ }
return true;
}
if (ImmMO.isImm()) {
// It is Imm, we need to check if the Imm fit the range.
- int64_t Immediate = ImmMO.getImm();
// Sign-extend to 64-bits.
- Imm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
- (Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
+ // DefMI may be folded with another imm form instruction, the result Imm is
+ // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
+ Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
if (Imm % III.ImmMustBeMultipleOf)
return false;
@@ -3603,6 +3629,328 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
return true;
}
+bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
+ unsigned OpNoForForwarding,
+ MachineInstr **KilledDef) const {
+ if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
+ !DefMI.getOperand(1).isImm())
+ return false;
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ bool PostRA = !MRI->isSSA();
+
+ int64_t Immediate = DefMI.getOperand(1).getImm();
+ // Sign-extend to 64-bits.
+ int64_t SExtImm = SignExtend64<16>(Immediate);
+
+ bool IsForwardingOperandKilled = MI.getOperand(OpNoForForwarding).isKill();
+ Register ForwardingOperandReg = MI.getOperand(OpNoForForwarding).getReg();
+
+ bool ReplaceWithLI = false;
+ bool Is64BitLI = false;
+ int64_t NewImm = 0;
+ bool SetCR = false;
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default:
+ return false;
+
+ // FIXME: Any branches conditional on such a comparison can be made
+ // unconditional. At this time, this happens too infrequently to be worth
+ // the implementation effort, but if that ever changes, we could convert
+ // such a pattern here.
+ case PPC::CMPWI:
+ case PPC::CMPLWI:
+ case PPC::CMPDI:
+ case PPC::CMPLDI: {
+ // Doing this post-RA would require dataflow analysis to reliably find uses
+ // of the CR register set by the compare.
+ // No need to fixup killed/dead flag since this transformation is only valid
+ // before RA.
+ if (PostRA)
+ return false;
+ // If a compare-immediate is fed by an immediate and is itself an input of
+ // an ISEL (the most common case) into a COPY of the correct register.
+ bool Changed = false;
+ Register DefReg = MI.getOperand(0).getReg();
+ int64_t Comparand = MI.getOperand(2).getImm();
+ int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
+ ? (Comparand | 0xFFFFFFFFFFFF0000)
+ : Comparand;
+
+ for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
+ unsigned UseOpc = CompareUseMI.getOpcode();
+ if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
+ continue;
+ unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
+ Register TrueReg = CompareUseMI.getOperand(1).getReg();
+ Register FalseReg = CompareUseMI.getOperand(2).getReg();
+ unsigned RegToCopy =
+ selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
+ if (RegToCopy == PPC::NoRegister)
+ continue;
+ // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
+ if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
+ CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
+ replaceInstrOperandWithImm(CompareUseMI, 1, 0);
+ CompareUseMI.RemoveOperand(3);
+ CompareUseMI.RemoveOperand(2);
+ continue;
+ }
+ LLVM_DEBUG(
+ dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
+ LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
+ LLVM_DEBUG(dbgs() << "Is converted to:\n");
+ // Convert to copy and remove unneeded operands.
+ CompareUseMI.setDesc(get(PPC::COPY));
+ CompareUseMI.RemoveOperand(3);
+ CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1);
+ CmpIselsConverted++;
+ Changed = true;
+ LLVM_DEBUG(CompareUseMI.dump());
+ }
+ if (Changed)
+ return true;
+ // This may end up incremented multiple times since this function is called
+ // during a fixed-point transformation, but it is only meant to indicate the
+ // presence of this opportunity.
+ MissedConvertibleImmediateInstrs++;
+ return false;
+ }
+
+ // Immediate forms - may simply be convertable to an LI.
+ case PPC::ADDI:
+ case PPC::ADDI8: {
+ // Does the sum fit in a 16-bit signed field?
+ int64_t Addend = MI.getOperand(2).getImm();
+ if (isInt<16>(Addend + SExtImm)) {
+ ReplaceWithLI = true;
+ Is64BitLI = Opc == PPC::ADDI8;
+ NewImm = Addend + SExtImm;
+ break;
+ }
+ return false;
+ }
+ case PPC::RLDICL:
+ case PPC::RLDICL_rec:
+ case PPC::RLDICL_32:
+ case PPC::RLDICL_32_64: {
+ // Use APInt's rotate function.
+ int64_t SH = MI.getOperand(2).getImm();
+ int64_t MB = MI.getOperand(3).getImm();
+ APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
+ SExtImm, true);
+ InVal = InVal.rotl(SH);
+ uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
+ InVal &= Mask;
+ // Can't replace negative values with an LI as that will sign-extend
+ // and not clear the left bits. If we're setting the CR bit, we will use
+ // ANDI_rec which won't sign extend, so that's safe.
+ if (isUInt<15>(InVal.getSExtValue()) ||
+ (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
+ ReplaceWithLI = true;
+ Is64BitLI = Opc != PPC::RLDICL_32;
+ NewImm = InVal.getSExtValue();
+ SetCR = Opc == PPC::RLDICL_rec;
+ break;
+ }
+ return false;
+ }
+ case PPC::RLWINM:
+ case PPC::RLWINM8:
+ case PPC::RLWINM_rec:
+ case PPC::RLWINM8_rec: {
+ int64_t SH = MI.getOperand(2).getImm();
+ int64_t MB = MI.getOperand(3).getImm();
+ int64_t ME = MI.getOperand(4).getImm();
+ APInt InVal(32, SExtImm, true);
+ InVal = InVal.rotl(SH);
+ APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
+ InVal &= Mask;
+ // Can't replace negative values with an LI as that will sign-extend
+ // and not clear the left bits. If we're setting the CR bit, we will use
+ // ANDI_rec which won't sign extend, so that's safe.
+ bool ValueFits = isUInt<15>(InVal.getSExtValue());
+ ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
+ isUInt<16>(InVal.getSExtValue()));
+ if (ValueFits) {
+ ReplaceWithLI = true;
+ Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
+ NewImm = InVal.getSExtValue();
+ SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
+ break;
+ }
+ return false;
+ }
+ case PPC::ORI:
+ case PPC::ORI8:
+ case PPC::XORI:
+ case PPC::XORI8: {
+ int64_t LogicalImm = MI.getOperand(2).getImm();
+ int64_t Result = 0;
+ if (Opc == PPC::ORI || Opc == PPC::ORI8)
+ Result = LogicalImm | SExtImm;
+ else
+ Result = LogicalImm ^ SExtImm;
+ if (isInt<16>(Result)) {
+ ReplaceWithLI = true;
+ Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
+ NewImm = Result;
+ break;
+ }
+ return false;
+ }
+ }
+
+ if (ReplaceWithLI) {
+ // We need to be careful with CR-setting instructions we're replacing.
+ if (SetCR) {
+ // We don't know anything about uses when we're out of SSA, so only
+ // replace if the new immediate will be reproduced.
+ bool ImmChanged = (SExtImm & NewImm) != NewImm;
+ if (PostRA && ImmChanged)
+ return false;
+
+ if (!PostRA) {
+ // If the defining load-immediate has no other uses, we can just replace
+ // the immediate with the new immediate.
+ if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
+ DefMI.getOperand(1).setImm(NewImm);
+
+ // If we're not using the GPR result of the CR-setting instruction, we
+ // just need to and with zero/non-zero depending on the new immediate.
+ else if (MRI->use_empty(MI.getOperand(0).getReg())) {
+ if (NewImm) {
+ assert(Immediate && "Transformation converted zero to non-zero?");
+ NewImm = Immediate;
+ }
+ } else if (ImmChanged)
+ return false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Replacing instruction:\n");
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "Fed by:\n");
+ LLVM_DEBUG(DefMI.dump());
+ LoadImmediateInfo LII;
+ LII.Imm = NewImm;
+ LII.Is64Bit = Is64BitLI;
+ LII.SetCR = SetCR;
+ // If we're setting the CR, the original load-immediate must be kept (as an
+ // operand to ANDI_rec/ANDI8_rec).
+ if (KilledDef && SetCR)
+ *KilledDef = nullptr;
+ replaceInstrWithLI(MI, LII);
+
+ // Fixup killed/dead flag after transformation.
+ // Pattern:
+ // ForwardingOperandReg = LI imm1
+ // y = op2 imm2, ForwardingOperandReg(killed)
+ if (IsForwardingOperandKilled)
+ fixupIsDeadOrKill(DefMI, MI, ForwardingOperandReg);
+
+ LLVM_DEBUG(dbgs() << "With:\n");
+ LLVM_DEBUG(MI.dump());
+ return true;
+ }
+ return false;
+}
+
+bool PPCInstrInfo::transformToNewImmFormFedByAdd(
+ MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
+ MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
+ bool PostRA = !MRI->isSSA();
+ // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
+ // for post-ra.
+ if (PostRA)
+ return false;
+
+ // Only handle load/store.
+ if (!MI.mayLoadOrStore())
+ return false;
+
+ unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
+
+ assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
+ "MI must have x-form opcode");
+
+ // get Imm Form info.
+ ImmInstrInfo III;
+ bool IsVFReg = MI.getOperand(0).isReg()
+ ? isVFRegister(MI.getOperand(0).getReg())
+ : false;
+
+ if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
+ return false;
+
+ if (!III.IsSummingOperands)
+ return false;
+
+ if (OpNoForForwarding != III.OpNoForForwarding)
+ return false;
+
+ MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
+ if (!ImmOperandMI.isImm())
+ return false;
+
+ // Check DefMI.
+ MachineOperand *ImmMO = nullptr;
+ MachineOperand *RegMO = nullptr;
+ if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
+ return false;
+ assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
+
+ // Check Imm.
+ // Set ImmBase from imm instruction as base and get new Imm inside
+ // isImmElgibleForForwarding.
+ int64_t ImmBase = ImmOperandMI.getImm();
+ int64_t Imm = 0;
+ if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
+ return false;
+
+ // Get killed info in case fixup needed after transformation.
+ unsigned ForwardKilledOperandReg = ~0U;
+ if (MI.getOperand(III.OpNoForForwarding).isKill())
+ ForwardKilledOperandReg = MI.getOperand(III.OpNoForForwarding).getReg();
+
+ // Do the transform
+ LLVM_DEBUG(dbgs() << "Replacing instruction:\n");
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "Fed by:\n");
+ LLVM_DEBUG(DefMI.dump());
+
+ MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
+ MI.getOperand(III.OpNoForForwarding).setIsKill(RegMO->isKill());
+ MI.getOperand(III.ImmOpNo).setImm(Imm);
+
+ // FIXME: fix kill/dead flag if MI and DefMI are not in same basic block.
+ if (DefMI.getParent() == MI.getParent()) {
+ // Check if reg is killed between MI and DefMI.
+ auto IsKilledFor = [&](unsigned Reg) {
+ MachineBasicBlock::const_reverse_iterator It = MI;
+ MachineBasicBlock::const_reverse_iterator E = DefMI;
+ It++;
+ for (; It != E; ++It) {
+ if (It->killsRegister(Reg))
+ return true;
+ }
+ return false;
+ };
+
+ // Update kill flag
+ if (RegMO->isKill() || IsKilledFor(RegMO->getReg()))
+ fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+ if (ForwardKilledOperandReg != ~0U)
+ fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+ }
+
+ LLVM_DEBUG(dbgs() << "With:\n");
+ LLVM_DEBUG(MI.dump());
+ return true;
+}
+
// If an X-Form instruction is fed by an add-immediate and one of its operands
// is the literal zero, attempt to forward the source of the add-immediate to
// the corresponding D-Form instruction with the displacement coming from
@@ -3722,8 +4070,15 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
const ImmInstrInfo &III,
unsigned ConstantOpNo,
- MachineInstr &DefMI,
- int64_t Imm) const {
+ MachineInstr &DefMI) const {
+ // DefMI must be LI or LI8.
+ if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
+ !DefMI.getOperand(1).isImm())
+ return false;
+
+ // Get Imm operand and Sign-extend to 64-bits.
+ int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
+
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
bool PostRA = !MRI.isSSA();
// Exit early if we can't convert this.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 2fe8df0e1d68..d98597f48340 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -65,7 +65,9 @@ enum {
NewDef_Shift = 6,
/// This instruction is an X-Form memory operation.
- XFormMemOp = 0x1 << (NewDef_Shift+1)
+ XFormMemOp = 0x1 << NewDef_Shift,
+ /// This instruction is prefixed.
+ Prefixed = 0x1 << (NewDef_Shift+1)
};
} // end namespace PPCII
@@ -108,10 +110,74 @@ struct LoadImmediateInfo {
unsigned SetCR : 1;
};
+// Index into the OpcodesForSpill array.
+enum SpillOpcodeKey {
+ SOK_Int4Spill,
+ SOK_Int8Spill,
+ SOK_Float8Spill,
+ SOK_Float4Spill,
+ SOK_CRSpill,
+ SOK_CRBitSpill,
+ SOK_VRVectorSpill,
+ SOK_VSXVectorSpill,
+ SOK_VectorFloat8Spill,
+ SOK_VectorFloat4Spill,
+ SOK_VRSaveSpill,
+ SOK_QuadFloat8Spill,
+ SOK_QuadFloat4Spill,
+ SOK_QuadBitSpill,
+ SOK_SpillToVSR,
+ SOK_SPESpill,
+ SOK_LastOpcodeSpill // This must be last on the enum.
+};
+
+// Define list of load and store spill opcodes.
+#define Pwr8LoadOpcodes \
+ { \
+ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
+ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \
+ PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, \
+ PPC::SPILLTOVSR_LD, PPC::EVLDD \
+ }
+
+#define Pwr9LoadOpcodes \
+ { \
+ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
+ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
+ PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, \
+ PPC::QVLFDXb, PPC::SPILLTOVSR_LD \
+ }
+
+#define Pwr8StoreOpcodes \
+ { \
+ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
+ PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, \
+ PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, PPC::SPILLTOVSR_ST, \
+ PPC::EVSTDD \
+ }
+
+#define Pwr9StoreOpcodes \
+ { \
+ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
+ PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
+ PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, \
+ PPC::SPILLTOVSR_ST \
+ }
+
+// Initialize arrays for load and store spill opcodes on supported subtargets.
+#define StoreOpcodesForSpill \
+ { Pwr8StoreOpcodes, Pwr9StoreOpcodes }
+#define LoadOpcodesForSpill \
+ { Pwr8LoadOpcodes, Pwr9LoadOpcodes }
+
class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
+ const unsigned StoreSpillOpcodesArray[2][SOK_LastOpcodeSpill] =
+ StoreOpcodesForSpill;
+ const unsigned LoadSpillOpcodesArray[2][SOK_LastOpcodeSpill] =
+ LoadOpcodesForSpill;
void StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill,
int FrameIdx, const TargetRegisterClass *RC,
@@ -121,13 +187,21 @@ class PPCInstrInfo : public PPCGenInstrInfo {
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr *> &NewMIs) const;
- // If the inst has imm-form and one of its operand is produced by a LI,
- // put the imm into the inst directly and remove the LI if possible.
+ // Replace the instruction with single LI if possible. \p DefMI must be LI or
+ // LI8.
+ bool simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
+ unsigned OpNoForForwarding, MachineInstr **KilledDef) const;
+ // If the inst is imm-form and its register operand is produced by a ADDI, put
+ // the imm into the inst directly and remove the ADDI if possible.
+ bool transformToNewImmFormFedByAdd(MachineInstr &MI, MachineInstr &DefMI,
+ unsigned OpNoForForwarding) const;
+ // If the inst is x-form and has imm-form and one of its operand is produced
+ // by a LI, put the imm into the inst directly and remove the LI if possible.
bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo, MachineInstr &DefMI,
- int64_t Imm) const;
- // If the inst has imm-form and one of its operand is produced by an
- // add-immediate, try to transform it when possible.
+ unsigned ConstantOpNo,
+ MachineInstr &DefMI) const;
+ // If the inst is x-form and has imm-form and one of its operand is produced
+ // by an add-immediate, try to transform it when possible.
bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III,
unsigned ConstantOpNo, MachineInstr &DefMI,
bool KillDefMI) const;
@@ -151,13 +225,20 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool isImmElgibleForForwarding(const MachineOperand &ImmMO,
const MachineInstr &DefMI,
const ImmInstrInfo &III,
- int64_t &Imm) const;
+ int64_t &Imm,
+ int64_t BaseImm = 0) const;
bool isRegElgibleForForwarding(const MachineOperand &RegMO,
const MachineInstr &DefMI,
const MachineInstr &MI, bool KillDefMI,
bool &IsFwdFeederRegKilled) const;
+ unsigned getSpillTarget() const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
+ int16_t getFMAOpIdxInfo(unsigned Opcode) const;
+ void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
virtual void anchor();
protected:
@@ -187,6 +268,10 @@ public:
bool isXFormMemOp(unsigned Opcode) const {
return get(Opcode).TSFlags & PPCII::XFormMemOp;
}
+ bool isPrefixed(unsigned Opcode) const {
+ return get(Opcode).TSFlags & PPCII::Prefixed;
+ }
+
static bool isSameClassPhysRegCopy(unsigned Opcode) {
unsigned CopyOpcodes[] =
{ PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
@@ -233,6 +318,20 @@ public:
return true;
}
+ /// When getMachineCombinerPatterns() finds patterns, this function generates
+ /// the instructions that could replace the original code sequence
+ void genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+
+ /// Return true when there is potentially a faster code sequence for a fma
+ /// chain ending in \p Root. All potential patterns are output in the \p
+ /// P array.
+ bool getFMAPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &P) const;
+
/// Return true when there is potentially a faster code sequence
/// for an instruction chain ending in <Root>. All potential patterns are
/// output in the <Pattern> array.
@@ -242,8 +341,24 @@ public:
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+ /// On PowerPC, we try to reassociate FMA chain which will increase
+ /// instruction size. Set extension resource length limit to 1 for edge case.
+ /// Resource Length is calculated by scaled resource usage in getCycles().
+ /// Because of the division in getCycles(), it returns different cycles due to
+ /// legacy scaled resource usage. So new resource length may be same with
+ /// legacy or 1 bigger than legacy.
+ /// We need to execlude the 1 bigger case even the resource length is not
+ /// perserved for more FMA chain reassociations on PowerPC.
+ int getExtendResourceLenLimit() const override { return 1; }
+
+ void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
+ MachineInstr &NewMI1,
+ MachineInstr &NewMI2) const override;
+
+ void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const override;
+
bool isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
+ Register &SrcReg, Register &DstReg,
unsigned &SubIdx) const override;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
@@ -273,11 +388,12 @@ public:
// Select analysis.
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
- unsigned, unsigned, int &, int &, int &) const override;
+ Register, Register, Register, int &, int &,
+ int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DstReg,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg) const override;
+ const DebugLoc &DL, Register DstReg,
+ ArrayRef<MachineOperand> Cond, Register TrueReg,
+ Register FalseReg) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
@@ -285,28 +401,47 @@ public:
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ // Emits a register spill without updating the register class for vector
+ // registers. This ensures that when we spill a vector register the
+ // element order in the register is the same as it was in memory.
+ void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- unsigned getStoreOpcodeForSpill(unsigned Reg,
- const TargetRegisterClass *RC = nullptr) const;
+ // Emits a register reload without updating the register class for vector
+ // registers. This ensures that when we reload a vector register the
+ // element order in the register is the same as it was in memory.
+ void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const;
- unsigned getLoadOpcodeForSpill(unsigned Reg,
- const TargetRegisterClass *RC = nullptr) const;
+ unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const;
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
- bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const override;
+ bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ Register Reg) const;
+
// If conversion by predication (only supported by some branch instructions).
// All of the profitability checks always return true; it is always
// profitable to use the predicated branches.
@@ -335,8 +470,6 @@ public:
// Predication support.
bool isPredicated(const MachineInstr &MI) const override;
- bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
-
bool PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const override;
@@ -348,11 +481,11 @@ public:
// Comparison optimization.
- bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &Mask, int &Value) const override;
+ bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &Mask, int &Value) const override;
- bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int Mask, int Value,
+ bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int Mask, int Value,
const MachineRegisterInfo *MRI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index f0153fc170ab..673ab63039cf 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -37,9 +37,6 @@ def SDT_PPCstore_scal_int_from_vsr : SDTypeProfile<0, 3, [
def SDT_PPCVexts : SDTypeProfile<1, 2, [
SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2>
]>;
-def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [
- SDTCisVec<0>, SDTCisVec<1>
-]>;
def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -53,6 +50,10 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisInt<2>
]>;
+def SDT_PPCSpToDp : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>,
+ SDTCisInt<1>
+]>;
+
def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3>
]>;
@@ -151,19 +152,19 @@ def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx,
def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
[SDNPHasChain, SDNPMayStore]>;
def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>;
-def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>;
// Extract FPSCR (not modeled at the DAG level).
def PPCmffs : SDNode<"PPCISD::MFFS",
- SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
+ SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
+ [SDNPHasChain]>;
// Perform FADD in round-to-zero mode.
def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
-def PPCfsel : SDNode<"PPCISD::FSEL",
+def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_PPCFPMinMax, []>;
def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_PPCFPMinMax, []>;
@@ -171,8 +172,6 @@ def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
[SDNPMayLoad, SDNPMemOperand]>;
-def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
-def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>;
@@ -199,6 +198,7 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
+def PPCxxspltidp : SDNode<"PPCISD::XXSPLTI_SP_TO_DP", SDT_PPCSpToDp, []>;
def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
@@ -221,6 +221,8 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+def PPCfnmsub : SDNode<"PPCISD::FNMSUB" , SDTFPTernaryOp>;
+
def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>;
// Move 2 i64 values into a VSX register
@@ -255,6 +257,9 @@ def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def PPCcall_notoc : SDNode<"PPCISD::CALL_NOTOC", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
@@ -318,11 +323,32 @@ def SDTDynOp : SDTypeProfile<1, 2, []>;
def SDTDynAreaOp : SDTypeProfile<1, 1, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
def PPCdynareaoffset : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>;
+def PPCprobedalloca : SDNode<"PPCISD::PROBED_ALLOCA", SDTDynOp, [SDNPHasChain]>;
+
+// PC Relative Specific Nodes
+def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>;
//===----------------------------------------------------------------------===//
// PowerPC specific transformation functions and pattern fragments.
//
+// A floating point immediate that is not a positive zero and can be converted
+// to a single precision floating point non-denormal immediate without loss of
+// information.
+def nzFPImmAsi32 : PatLeaf<(fpimm), [{
+ APFloat APFloatOfN = N->getValueAPF();
+ return convertToNonDenormSingle(APFloatOfN) && !N->isExactlyValue(+0.0);
+}]>;
+
+// Convert the floating point immediate into a 32 bit floating point immediate
+// and get a i32 with the resulting bits.
+def getFPAs32BitInt : SDNodeXForm<fpimm, [{
+ APFloat APFloatOfN = N->getValueAPF();
+ convertToNonDenormSingle(APFloatOfN);
+ return CurDAG->getTargetConstant(APFloatOfN.bitcastToAPInt().getZExtValue(),
+ SDLoc(N), MVT::i32);
+}]>;
+
def SHL32 : SDNodeXForm<imm, [{
// Transformation function: 31 - imm
return getI32Imm(31 - N->getZExtValue(), SDLoc(N));
@@ -386,9 +412,10 @@ def immZExt16 : PatLeaf<(imm), [{
// field. Used by instructions like 'ori'.
return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
}], LO16>;
-def immNonAllOneAnyExt8 : ImmLeaf<i32, [{
+def immNonAllOneAnyExt8 : ImmLeaf<i32, [{
return (isInt<8>(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF));
}]>;
+def i32immNonAllOneNonZero : ImmLeaf<i32, [{ return Imm && (Imm != -1); }]>;
def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>;
// imm16Shifted* - These match immediates where the low 16-bits are zero. There
@@ -404,7 +431,7 @@ def imm16ShiftedZExt : PatLeaf<(imm), [{
def imm16ShiftedSExt : PatLeaf<(imm), [{
// imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
- // immediate are set. Used by instructions like 'addis'. Identical to
+ // immediate are set. Used by instructions like 'addis'. Identical to
// imm16ShiftedZExt in 32-bit mode.
if (N->getZExtValue() & 0xFFFF) return false;
if (N->getValueType(0) == MVT::i32)
@@ -723,6 +750,27 @@ def s17imm : Operand<i32> {
let ParserMatchClass = PPCS17ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
}
+def PPCS34ImmAsmOperand : AsmOperandClass {
+ let Name = "S34Imm";
+ let PredicateMethod = "isS34Imm";
+ let RenderMethod = "addImmOperands";
+}
+def s34imm : Operand<i64> {
+ let PrintMethod = "printS34ImmOperand";
+ let EncoderMethod = "getImm34Encoding";
+ let ParserMatchClass = PPCS34ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<34>";
+}
+def PPCImmZeroAsmOperand : AsmOperandClass {
+ let Name = "ImmZero";
+ let PredicateMethod = "isImmZero";
+ let RenderMethod = "addImmOperands";
+}
+def immZero : Operand<i32> {
+ let PrintMethod = "printImmZeroOperand";
+ let ParserMatchClass = PPCImmZeroAsmOperand;
+ let DecoderMethod = "decodeImmZeroOperand";
+}
def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
@@ -733,7 +781,9 @@ def PPCDirectBrAsmOperand : AsmOperandClass {
def directbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
+ let DecoderMethod = "decodeDirectBrTarget";
let ParserMatchClass = PPCDirectBrAsmOperand;
+ let OperandType = "OPERAND_PCREL";
}
def absdirectbrtarget : Operand<OtherVT> {
let PrintMethod = "printAbsBranchOperand";
@@ -747,7 +797,9 @@ def PPCCondBrAsmOperand : AsmOperandClass {
def condbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getCondBrEncoding";
+ let DecoderMethod = "decodeCondBrTarget";
let ParserMatchClass = PPCCondBrAsmOperand;
+ let OperandType = "OPERAND_PCREL";
}
def abscondbrtarget : Operand<OtherVT> {
let PrintMethod = "printAbsBranchOperand";
@@ -757,7 +809,7 @@ def abscondbrtarget : Operand<OtherVT> {
def calltarget : Operand<iPTR> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
- let DecoderMethod = "DecodePCRel24BranchTarget";
+ let DecoderMethod = "decodeDirectBrTarget";
let ParserMatchClass = PPCDirectBrAsmOperand;
let OperandType = "OPERAND_PCREL";
}
@@ -783,6 +835,30 @@ def PPCRegGxRCNoR0Operand : AsmOperandClass {
def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> {
let ParserMatchClass = PPCRegGxRCNoR0Operand;
}
+
+// New addressing modes with 34 bit immediates.
+def PPCDispRI34Operand : AsmOperandClass {
+ let Name = "DispRI34"; let PredicateMethod = "isS34Imm";
+ let RenderMethod = "addImmOperands";
+}
+def dispRI34 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRI34Operand;
+}
+def memri34 : Operand<iPTR> { // memri, imm is a 34-bit value.
+ let PrintMethod = "printMemRegImm34";
+ let MIOperandInfo = (ops dispRI34:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRI34Encoding";
+ let DecoderMethod = "decodeMemRI34Operands";
+}
+// memri, imm is a 34-bit value for pc-relative instructions where
+// base register is set to zero.
+def memri34_pcrel : Operand<iPTR> { // memri, imm is a 34-bit value.
+ let PrintMethod = "printMemRegImm34PCRel";
+ let MIOperandInfo = (ops dispRI34:$imm, immZero:$reg);
+ let EncoderMethod = "getMemRI34PCRelEncoding";
+ let DecoderMethod = "decodeMemRI34PCRelOperands";
+}
+
// A version of ptr_rc usable with the asm parser.
def PPCRegGxRCOperand : AsmOperandClass {
let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
@@ -876,7 +952,7 @@ def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
}
// A single-register address. This is used with the SjLj
-// pseudo-instructions which tranlates to LD/LWZ. These instructions requires
+// pseudo-instructions which translates to LD/LWZ. These instructions requires
// G8RC_NOX0 registers.
def memr : Operand<iPTR> {
let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg);
@@ -913,11 +989,11 @@ def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
// Below forms are all x-form addressing mode, use three different ones so we
// can make a accurate check for x-form instructions in ISEL.
-// x-form addressing mode whose associated diplacement form is D.
+// x-form addressing mode whose associated displacement form is D.
def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; // "stbx"
-// x-form addressing mode whose associated diplacement form is DS.
+// x-form addressing mode whose associated displacement form is DS.
def xaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrIdxX4", [], []>; // "stdx"
-// x-form addressing mode whose associated diplacement form is DQ.
+// x-form addressing mode whose associated displacement form is DQ.
def xaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrIdxX16", [], []>; // "stxvx"
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
@@ -929,26 +1005,32 @@ def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+// PC Relative Address
+def pcreladdr : ComplexPattern<iPTR, 1, "SelectAddrPCRel", [], []>;
+
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">;
-def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">;
-def IsBookE : Predicate<"PPCSubTarget->isBookE()">;
-def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">;
-def HasOnlyMSYNC : Predicate<"PPCSubTarget->hasOnlyMSYNC()">;
-def HasSYNC : Predicate<"!PPCSubTarget->hasOnlyMSYNC()">;
-def IsPPC4xx : Predicate<"PPCSubTarget->isPPC4xx()">;
-def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
-def IsE500 : Predicate<"PPCSubTarget->isE500()">;
-def HasSPE : Predicate<"PPCSubTarget->hasSPE()">;
-def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
-def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
-def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
-def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
-def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
-def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
-def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">;
-def HasFPU : Predicate<"PPCSubTarget->hasFPU()">;
+def In32BitMode : Predicate<"!Subtarget->isPPC64()">;
+def In64BitMode : Predicate<"Subtarget->isPPC64()">;
+def IsBookE : Predicate<"Subtarget->isBookE()">;
+def IsNotBookE : Predicate<"!Subtarget->isBookE()">;
+def HasOnlyMSYNC : Predicate<"Subtarget->hasOnlyMSYNC()">;
+def HasSYNC : Predicate<"!Subtarget->hasOnlyMSYNC()">;
+def IsPPC4xx : Predicate<"Subtarget->isPPC4xx()">;
+def IsPPC6xx : Predicate<"Subtarget->isPPC6xx()">;
+def IsE500 : Predicate<"Subtarget->isE500()">;
+def HasSPE : Predicate<"Subtarget->hasSPE()">;
+def HasICBT : Predicate<"Subtarget->hasICBT()">;
+def HasPartwordAtomics : Predicate<"Subtarget->hasPartwordAtomics()">;
+def NoNaNsFPMath
+ : Predicate<"Subtarget->getTargetMachine().Options.NoNaNsFPMath">;
+def NaNsFPMath
+ : Predicate<"!Subtarget->getTargetMachine().Options.NoNaNsFPMath">;
+def HasBPERMD : Predicate<"Subtarget->hasBPERMD()">;
+def HasExtDiv : Predicate<"Subtarget->hasExtDiv()">;
+def IsISA3_0 : Predicate<"Subtarget->isISA3_0()">;
+def HasFPU : Predicate<"Subtarget->hasFPU()">;
+def PCRelativeMemops : Predicate<"Subtarget->hasPCRelativeMemops()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -1318,7 +1400,20 @@ def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:
(PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
def DYNAREAOFFSET : PPCEmitTimePseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
[(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
-
+// Probed alloca to support stack clash protection.
+let Defs = [R1], Uses = [R1], hasNoSchedulingInfo = 1 in {
+def PROBED_ALLOCA_32 : PPCCustomInserterPseudo<(outs gprc:$result),
+ (ins gprc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_32",
+ [(set i32:$result,
+ (PPCprobedalloca i32:$negsize, iaddr:$fpsi))]>;
+def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs gprc:$fp,
+ gprc:$sp),
+ (ins gprc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_32", []>;
+def PROBED_STACKALLOC_32 : PPCEmitTimePseudo<(outs gprc:$scratch, gprc:$temp),
+ (ins i64imm:$stacksize),
+ "#PROBED_STACKALLOC_32", []>;
+}
+
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
let PPC970_Single = 1 in {
@@ -1412,7 +1507,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
}
// Set the float rounding mode.
-let Uses = [RM], Defs = [RM] in {
+let Uses = [RM], Defs = [RM] in {
def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND),
"#SETRNDi", [(set f64:$FRT, (int_ppc_setrnd (i32 imm:$RND)))]>;
@@ -1724,6 +1819,8 @@ def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$imm), "rfebb $imm",
IIC_BrB, [(PPCrfebb (i32 imm:$imm))]>,
PPC970_DGroup_Single;
+def : InstAlias<"rfebb", (RFEBB 1)>;
+
// DCB* instructions.
def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst",
IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
@@ -1777,6 +1874,11 @@ def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)),
def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
(ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read)
+def : Pat<(int_ppc_dcbt_with_hint xoaddr:$dst, i32:$TH),
+ (DCBT i32:$TH, xoaddr:$dst)>;
+def : Pat<(int_ppc_dcbtst_with_hint xoaddr:$dst, i32:$TH),
+ (DCBTST i32:$TH, xoaddr:$dst)>;
+
// Atomic operations
// FIXME: some of these might be used with constant operands. This will result
// in constant materialization instructions that may be redundant. We currently
@@ -1969,7 +2071,7 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
// PPC32 Load Instructions.
//
-// Unindexed (r+i) Loads.
+// Unindexed (r+i) Loads.
let PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
@@ -2112,7 +2214,7 @@ def LFIWZX : XForm_25_memOp<31, 887, (outs f8rc:$frD), (ins memrr:$src),
}
// Load Multiple
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
"lmw $rD, $src", IIC_LdStLMW, []>;
@@ -2281,10 +2383,15 @@ let isCodeGenOnly = 1 in {
}
}
+// We used to have EIEIO as value but E[0-9A-Z] is a reserved name
+def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins),
+ "eieio", IIC_LdStLoad, []>;
+
def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[HasSYNC]>;
def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[HasSYNC]>;
def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+def : Pat<(int_ppc_eieio), (EnforceIEIO)>;
//===----------------------------------------------------------------------===//
// PPC32 Arithmetic Instructions.
@@ -2331,6 +2438,9 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
}
}
+def : InstAlias<"li $rD, $imm", (ADDI gprc:$rD, ZERO, s16imm:$imm)>;
+def : InstAlias<"lis $rD, $imm", (ADDIS gprc:$rD, ZERO, s17imm:$imm)>;
+
let PPC970_Unit = 1 in { // FXU Operations.
let Defs = [CR0] in {
def ANDI_rec : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
@@ -2419,6 +2529,14 @@ defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
[(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
}
+def : InstAlias<"mr $rA, $rB", (OR gprc:$rA, gprc:$rB, gprc:$rB)>;
+def : InstAlias<"mr. $rA, $rB", (OR_rec gprc:$rA, gprc:$rB, gprc:$rB)>;
+
+def : InstAlias<"not $rA, $rS", (NOR gprc:$rA, gprc:$rS, gprc:$rS)>;
+def : InstAlias<"not. $rA, $rS", (NOR_rec gprc:$rA, gprc:$rS, gprc:$rS)>;
+
+def : InstAlias<"nop", (ORI R0, R0, 0)>;
+
let PPC970_Unit = 1 in { // FXU Operations.
let hasSideEffects = 0 in {
defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
@@ -2465,7 +2583,7 @@ def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB),
"ftsqrt $crD, $fB", IIC_FPCompare>;
-let Uses = [RM] in {
+let Uses = [RM], mayRaiseFPException = 1 in {
let hasSideEffects = 0 in {
defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiw", "$frD, $frB", IIC_FPGeneral,
@@ -2479,46 +2597,46 @@ let Uses = [RM] in {
defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
"frsp", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (fpround f64:$frB))]>;
+ [(set f32:$frD, (any_fpround f64:$frB))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
"frin", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (fround f64:$frB))]>;
+ [(set f64:$frD, (any_fround f64:$frB))]>;
defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
"frin", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (fround f32:$frB))]>;
+ [(set f32:$frD, (any_fround f32:$frB))]>;
}
let hasSideEffects = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
"frip", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (fceil f64:$frB))]>;
+ [(set f64:$frD, (any_fceil f64:$frB))]>;
defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB),
"frip", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (fceil f32:$frB))]>;
+ [(set f32:$frD, (any_fceil f32:$frB))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB),
"friz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (ftrunc f64:$frB))]>;
+ [(set f64:$frD, (any_ftrunc f64:$frB))]>;
defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB),
"friz", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (ftrunc f32:$frB))]>;
+ [(set f32:$frD, (any_ftrunc f32:$frB))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB),
"frim", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (ffloor f64:$frB))]>;
+ [(set f64:$frD, (any_ffloor f64:$frB))]>;
defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
"frim", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (ffloor f32:$frB))]>;
+ [(set f32:$frD, (any_ffloor f32:$frB))]>;
defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
"fsqrt", "$frD, $frB", IIC_FPSqrtD,
- [(set f64:$frD, (fsqrt f64:$frB))]>;
+ [(set f64:$frD, (any_fsqrt f64:$frB))]>;
defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
"fsqrts", "$frD, $frB", IIC_FPSqrtS,
- [(set f32:$frD, (fsqrt f32:$frB))]>;
+ [(set f32:$frD, (any_fsqrt f32:$frB))]>;
}
}
}
@@ -2786,6 +2904,8 @@ def MCRXRX : X_BF3<31, 576, (outs crrc:$BF), (ins),
"mcrxrx $BF", IIC_BrMCRX>, Requires<[IsISA3_0]>;
} // hasSideEffects = 0
+def : InstAlias<"mtcr $rA", (MTCRF 255, gprc:$rA)>;
+
let Predicates = [HasFPU] in {
// Custom inserter instruction to perform FADD in round-to-zero mode.
let Uses = [RM] in {
@@ -2795,7 +2915,7 @@ let Uses = [RM] in {
// The above pseudo gets expanded to make use of the following instructions
// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
-let Uses = [RM], Defs = [RM] in {
+let Uses = [RM], Defs = [RM] in {
def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
"mtfsb0 $FM", IIC_IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
@@ -2931,49 +3051,54 @@ defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
}
}
+def : InstAlias<"sub $rA, $rB, $rC", (SUBF gprc:$rA, gprc:$rC, gprc:$rB)>;
+def : InstAlias<"sub. $rA, $rB, $rC", (SUBF_rec gprc:$rA, gprc:$rC, gprc:$rB)>;
+def : InstAlias<"subc $rA, $rB, $rC", (SUBFC gprc:$rA, gprc:$rC, gprc:$rB)>;
+def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC_rec gprc:$rA, gprc:$rC, gprc:$rB)>;
+
// A-Form instructions. Most of the instructions executed in the FPU are of
// this type.
//
let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasFPU] in { // FPU Operations.
let Uses = [RM] in {
let isCommutable = 1 in {
- defm FMADD : AForm_1r<63, 29,
+ defm FMADD : AForm_1r<63, 29,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
+ [(set f64:$FRT, (any_fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
defm FMADDS : AForm_1r<59, 29,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
- [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
+ [(set f32:$FRT, (any_fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
defm FMSUB : AForm_1r<63, 28,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT,
- (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
+ (any_fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
defm FMSUBS : AForm_1r<59, 28,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT,
- (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
+ (any_fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
defm FNMADD : AForm_1r<63, 31,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT,
- (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
+ (fneg (any_fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
defm FNMADDS : AForm_1r<59, 31,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT,
- (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
+ (fneg (any_fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
defm FNMSUB : AForm_1r<63, 30,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
+ [(set f64:$FRT, (fneg (any_fma f64:$FRA, f64:$FRC,
(fneg f64:$FRB))))]>;
defm FNMSUBS : AForm_1r<59, 30,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
- [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
+ [(set f32:$FRT, (fneg (any_fma f32:$FRA, f32:$FRC,
(fneg f32:$FRB))))]>;
} // isCommutable
}
@@ -2990,43 +3115,43 @@ defm FSELS : AForm_1r<63, 23,
(outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
-let Uses = [RM] in {
+let Uses = [RM], mayRaiseFPException = 1 in {
let isCommutable = 1 in {
defm FADD : AForm_2r<63, 21,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
"fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub,
- [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
+ [(set f64:$FRT, (any_fadd f64:$FRA, f64:$FRB))]>;
defm FADDS : AForm_2r<59, 21,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
"fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
+ [(set f32:$FRT, (any_fadd f32:$FRA, f32:$FRB))]>;
} // isCommutable
defm FDIV : AForm_2r<63, 18,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
"fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD,
- [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
+ [(set f64:$FRT, (any_fdiv f64:$FRA, f64:$FRB))]>;
defm FDIVS : AForm_2r<59, 18,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
"fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS,
- [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
+ [(set f32:$FRT, (any_fdiv f32:$FRA, f32:$FRB))]>;
let isCommutable = 1 in {
defm FMUL : AForm_3r<63, 25,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC),
"fmul", "$FRT, $FRA, $FRC", IIC_FPFused,
- [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
+ [(set f64:$FRT, (any_fmul f64:$FRA, f64:$FRC))]>;
defm FMULS : AForm_3r<59, 25,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC),
"fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral,
- [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
+ [(set f32:$FRT, (any_fmul f32:$FRA, f32:$FRC))]>;
} // isCommutable
defm FSUB : AForm_2r<63, 20,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
"fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub,
- [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
+ [(set f64:$FRT, (any_fsub f64:$FRA, f64:$FRB))]>;
defm FSUBS : AForm_2r<59, 20,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
"fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
+ [(set f32:$FRT, (any_fsub f32:$FRA, f32:$FRB))]>;
}
}
@@ -3158,13 +3283,13 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS $in, tblockaddress:$g)>;
// Support for thread-local storage.
-def PPC32GOT: PPCEmitTimePseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
+def PPC32GOT: PPCEmitTimePseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
[(set i32:$rD, (PPCppc32GOT))]>;
// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
// This uses two output registers, the first as the real output, the second as a
// temporary register, used internally in code generation.
-def PPC32PICGOT: PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
+def PPC32PICGOT: PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
[]>, NoEncode<"$rT">;
def LDgotTprelL32: PPCEmitTimePseudo<(outs gprc_nor0:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
@@ -3302,15 +3427,19 @@ def : Pat<(atomic_fence (timm), (timm)), (SYNC 1)>, Requires<[HasSYNC]>;
def : Pat<(atomic_fence (timm), (timm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
let Predicates = [HasFPU] in {
-// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
-def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
- (FNMSUB $A, $C, $B)>;
-def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
- (FNMSUB $A, $C, $B)>;
-def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
- (FNMSUBS $A, $C, $B)>;
-def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
- (FNMSUBS $A, $C, $B)>;
+// Additional fnmsub patterns for custom node
+def : Pat<(PPCfnmsub f64:$A, f64:$B, f64:$C),
+ (FNMSUB $A, $B, $C)>;
+def : Pat<(PPCfnmsub f32:$A, f32:$B, f32:$C),
+ (FNMSUBS $A, $B, $C)>;
+def : Pat<(fneg (PPCfnmsub f64:$A, f64:$B, f64:$C)),
+ (FMSUB $A, $B, $C)>;
+def : Pat<(fneg (PPCfnmsub f32:$A, f32:$B, f32:$C)),
+ (FMSUBS $A, $B, $C)>;
+def : Pat<(PPCfnmsub f64:$A, f64:$B, (fneg f64:$C)),
+ (FNMADD $A, $B, $C)>;
+def : Pat<(PPCfnmsub f32:$A, f32:$B, (fneg f32:$C)),
+ (FNMADDS $A, $B, $C)>;
// FCOPYSIGN's operand types need not agree.
def : Pat<(fcopysign f64:$frB, f32:$frA),
@@ -3331,6 +3460,10 @@ def crnot : OutPatFrag<(ops node:$in),
def : Pat<(not i1:$in),
(crnot $in)>;
+// Prefixed instructions may require access to the above defs at a later
+// time so we include this after the def.
+include "PPCInstrPrefix.td"
+
// Patterns for arithmetic i1 operations.
def : Pat<(add i1:$a, i1:$b),
(CRXOR $a, $b)>;
@@ -3510,7 +3643,7 @@ defm : ExtSetCCPat<SETEQ,
(RLWINM (CNTLZW $in), 27, 31, 31)>,
OutPatFrag<(ops node:$in),
(RLDICL (CNTLZD $in), 58, 63)> >;
-
+
defm : ExtSetCCPat<SETNE,
PatFrag<(ops node:$in, node:$cc),
(setcc $in, 0, $cc)>,
@@ -3518,7 +3651,7 @@ defm : ExtSetCCPat<SETNE,
(RLWINM (i32not (CNTLZW $in)), 27, 31, 31)>,
OutPatFrag<(ops node:$in),
(RLDICL (i64not (CNTLZD $in)), 58, 63)> >;
-
+
defm : ExtSetCCPat<SETLT,
PatFrag<(ops node:$in, node:$cc),
(setcc $in, 0, $cc)>,
@@ -4128,10 +4261,6 @@ def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
"icbi $src", IIC_LdStICBI, []>;
-// We used to have EIEIO as value but E[0-9A-Z] is a reserved name
-def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins),
- "eieio", IIC_LdStLoad, []>;
-
def WAIT : XForm_24_sync<31, 30, (outs), (ins i32imm:$L),
"wait $L", IIC_LdStLoad, []>;
@@ -4421,17 +4550,53 @@ def DCBFx : PPCAsmPseudo<"dcbf $dst", (ins memrr:$dst)>;
def DCBFL : PPCAsmPseudo<"dcbfl $dst", (ins memrr:$dst)>;
def DCBFLP : PPCAsmPseudo<"dcbflp $dst", (ins memrr:$dst)>;
+def : Pat<(int_ppc_isync), (ISYNC)>;
+def : Pat<(int_ppc_dcbfl xoaddr:$dst),
+ (DCBF 1, xoaddr:$dst)>;
+def : Pat<(int_ppc_dcbflp xoaddr:$dst),
+ (DCBF 3, xoaddr:$dst)>;
+
def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
+def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
+
+def : InstAlias<"xnop", (XORI R0, R0, 0)>;
+
+foreach BR = 0-7 in {
+ def : InstAlias<"mfbr"#BR#" $Rx",
+ (MFDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+ def : InstAlias<"mtbr"#BR#" $Rx",
+ (MTDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+}
+
+def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
+def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
+
def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
+def : InstAlias<"mtudscr $Rx", (MTSPR 3, gprc:$Rx)>;
+def : InstAlias<"mfudscr $Rx", (MFSPR gprc:$Rx, 3)>;
+
def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>;
def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>;
+def : InstAlias<"mtlr $Rx", (MTSPR 8, gprc:$Rx)>;
+def : InstAlias<"mflr $Rx", (MFSPR gprc:$Rx, 8)>;
+
+def : InstAlias<"mtctr $Rx", (MTSPR 9, gprc:$Rx)>;
+def : InstAlias<"mfctr $Rx", (MFSPR gprc:$Rx, 9)>;
+
+def : InstAlias<"mtuamr $Rx", (MTSPR 13, gprc:$Rx)>;
+def : InstAlias<"mfuamr $Rx", (MFSPR gprc:$Rx, 13)>;
+
def : InstAlias<"mtdscr $Rx", (MTSPR 17, gprc:$Rx)>;
def : InstAlias<"mfdscr $Rx", (MFSPR gprc:$Rx, 17)>;
@@ -4453,12 +4618,6 @@ def : InstAlias<"mfsrr0 $Rx", (MFSPR gprc:$Rx, 26)>;
def : InstAlias<"mtsrr1 $Rx", (MTSPR 27, gprc:$Rx)>;
def : InstAlias<"mfsrr1 $Rx", (MFSPR gprc:$Rx, 27)>;
-def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>;
-
-def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>;
-
def : InstAlias<"mtcfar $Rx", (MTSPR 28, gprc:$Rx)>;
def : InstAlias<"mfcfar $Rx", (MFSPR gprc:$Rx, 28)>;
@@ -4468,27 +4627,34 @@ def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>;
def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>;
def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>;
-def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
-def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>;
-def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
-
-def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>;
-def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>;
+foreach SPRG = 4-7 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+}
-def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+foreach SPRG = 0-3 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+}
-def : InstAlias<"xnop", (XORI R0, R0, 0)>;
+def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>;
+def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>;
-def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
-def : InstAlias<"mr. $rA, $rB", (OR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>;
+def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>;
-def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
-def : InstAlias<"not. $rA, $rB", (NOR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>;
-def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
+def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>;
+def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>;
foreach BATR = 0-3 in {
def : InstAlias<"mtdbatu "#BATR#", $Rx",
@@ -4517,86 +4683,36 @@ foreach BATR = 0-3 in {
Requires<[IsPPC6xx]>;
}
-foreach BR = 0-7 in {
- def : InstAlias<"mfbr"#BR#" $Rx",
- (MFDCR gprc:$Rx, !add(BR, 0x80))>,
- Requires<[IsPPC4xx]>;
- def : InstAlias<"mtbr"#BR#" $Rx",
- (MTDCR gprc:$Rx, !add(BR, 0x80))>,
- Requires<[IsPPC4xx]>;
-}
-
-def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>;
-
-def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>;
-
-def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mtppr $RT", (MTSPR 896, gprc:$RT)>;
+def : InstAlias<"mfppr $RT", (MFSPR gprc:$RT, 896)>;
def : InstAlias<"mtesr $Rx", (MTSPR 980, gprc:$Rx)>, Requires<[IsPPC4xx]>;
def : InstAlias<"mfesr $Rx", (MFSPR gprc:$Rx, 980)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>;
-def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>;
+def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>;
def : InstAlias<"mttcr $Rx", (MTSPR 986, gprc:$Rx)>, Requires<[IsPPC4xx]>;
def : InstAlias<"mftcr $Rx", (MFSPR gprc:$Rx, 986)>, Requires<[IsPPC4xx]>;
-def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
-
-def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
- (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
-def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm",
- (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
-def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm",
- (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
-def SUBIC_rec : PPCAsmPseudo<"subic. $rA, $rB, $imm",
- (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
-
-def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-
-def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
-def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
-
-def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>;
-def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>;
+def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-foreach SPRG = 0-3 in {
- def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>;
- def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>;
- def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
- def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
-}
-foreach SPRG = 4-7 in {
- def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>,
- Requires<[IsBookE]>;
- def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>,
- Requires<[IsBookE]>;
- def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
- Requires<[IsBookE]>;
- def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
- Requires<[IsBookE]>;
-}
+def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
+def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>;
-def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>;
+def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>;
+def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>;
-def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>;
+def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>;
-def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>;
-def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>;
-def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
@@ -4609,6 +4725,17 @@ def : InstAlias<"tlbwehi $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 0)>,
def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>,
Requires<[IsPPC4xx]>;
+def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
+
+def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIC_rec : PPCAsmPseudo<"subic. $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+
def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTLWI_rec : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
@@ -4646,6 +4773,13 @@ def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n",
def CLRLSLWI_rec : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
+def : InstAlias<"isellt $rT, $rA, $rB",
+ (ISEL gprc:$rT, gprc_nor0:$rA, gprc:$rB, CR0LT)>;
+def : InstAlias<"iselgt $rT, $rA, $rB",
+ (ISEL gprc:$rT, gprc_nor0:$rA, gprc:$rB, CR0GT)>;
+def : InstAlias<"iseleq $rT, $rA, $rB",
+ (ISEL gprc:$rT, gprc_nor0:$rA, gprc:$rB, CR0EQ)>;
+
def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINM_rec gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
@@ -4694,6 +4828,8 @@ def CLRLSLDI_rec : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
def SUBPCIS : PPCAsmPseudo<"subpcis $RT, $D", (ins g8rc:$RT, s16imm:$D)>;
def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
+def : InstAlias<"rotldi $rA, $rS, $n",
+ (RLDICL_32_64 g8rc:$rA, gprc:$rS, u6imm:$n, 0)>;
def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICL_rec g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCL_rec g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
@@ -4892,6 +5028,8 @@ def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm64:$imm)>;
def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
+def : InstAlias<"trap", (TW 31, R0, R0)>;
+
multiclass TrapExtendedMnemonic<string name, int to> {
def : InstAlias<"td"#name#"i $rA, $imm", (TDI to, g8rc:$rA, s16imm:$imm)>;
def : InstAlias<"td"#name#" $rA, $rB", (TD to, g8rc:$rA, g8rc:$rB)>;
@@ -5025,8 +5163,11 @@ def RotateInsertByte1 {
dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31);
}
-def : Pat<(i32 (bitreverse i32:$A)),
- (RLDICL_32 RotateInsertByte1.Left, 0, 32)>;
+// Clear the upper half of the register when in 64-bit mode
+let Predicates = [In64BitMode] in
+def : Pat<(i32 (bitreverse i32:$A)), (RLDICL_32 RotateInsertByte1.Left, 0, 32)>;
+let Predicates = [In32BitMode] in
+def : Pat<(i32 (bitreverse i32:$A)), RotateInsertByte1.Left>;
// Fast 64-bit reverse bits algorithm:
// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
new file mode 100644
index 000000000000..2bab73418e10
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -0,0 +1,1035 @@
+//===----------------------------------------------------------------------===//
+// PowerPC ISA 3.1 specific type constraints.
+//
+
+def SDT_PPCSplat32 : SDTypeProfile<1, 3, [ SDTCisVT<0, v2i64>,
+ SDTCisVec<1>, SDTCisInt<2>, SDTCisInt<3>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ISA 3.1 specific PPCISD nodes.
+//
+
+def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>;
+
+//===----------------------------------------------------------------------===//
+
+// PC Relative flag (for instructions that use the address of the prefix for
+// address computations).
+class isPCRel { bit PCRel = 1; }
+
+// Top-level class for prefixed instructions.
+class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : Instruction {
+ field bits<64> Inst;
+ field bits<64> SoftFail = 0;
+ bit PCRel = 0; // Default value, set by isPCRel.
+ let Size = 8;
+
+ let Namespace = "PPC";
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+ let Inst{0-5} = pref;
+ let Inst{32-37} = opcode;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+
+ bits<1> Prefixed = 1; // This is a prefixed instruction.
+ let TSFlags{7} = Prefixed;
+
+ // For cases where multiple instruction definitions really represent the
+ // same underlying instruction but with one definition for 64-bit arguments
+ // and one for 32-bit arguments, this bit breaks the degeneracy between
+ // the two forms and allows TableGen to generate mapping tables.
+ bit Interpretation64Bit = 0;
+
+ // Fields used for relation models.
+ string BaseName = "";
+}
+
+class MLS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRS;
+ bits<39> D_RA;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 2;
+ let Inst{8-10} = 0;
+ let Inst{11} = PCRel;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = D_RA{33-16}; // d0
+
+ // The instruction.
+ let Inst{38-42} = FRS{4-0};
+ let Inst{43-47} = D_RA{38-34}; // RA
+ let Inst{48-63} = D_RA{15-0}; // d1
+}
+
+class MLS_DForm_R_SI34_RTA5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<34> SI;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 2;
+ let Inst{8-10} = 0;
+ let Inst{11} = PCRel;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = SI{33-16};
+
+ // The instruction.
+ let Inst{38-42} = RT;
+ let Inst{43-47} = RA;
+ let Inst{48-63} = SI{15-0};
+}
+
+class MLS_DForm_SI34_RT5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<34> SI;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 2;
+ let Inst{8-10} = 0;
+ let Inst{11} = 0;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = SI{33-16};
+
+ // The instruction.
+ let Inst{38-42} = RT;
+ let Inst{43-47} = 0;
+ let Inst{48-63} = SI{15-0};
+}
+
+multiclass MLS_DForm_R_SI34_RTA5_p<bits<6> opcode, dag OOL, dag IOL,
+ dag PCRel_IOL, string asmstr,
+ InstrItinClass itin> {
+ def NAME : MLS_DForm_R_SI34_RTA5<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : MLS_DForm_R_SI34_RTA5<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>, isPCRel;
+}
+
+class 8LS_DForm_R_SI34_RTA5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<39> D_RA;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-10} = 0;
+ let Inst{11} = PCRel;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = D_RA{33-16}; // d0
+
+ // The instruction.
+ let Inst{38-42} = RT{4-0};
+ let Inst{43-47} = D_RA{38-34}; // RA
+ let Inst{48-63} = D_RA{15-0}; // d1
+}
+
+// 8LS:D-Form: [ 1 0 0 // R // d0
+// PO TX T RA d1 ]
+class 8LS_DForm_R_SI34_XT6_RA5<bits<5> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, { opcode, ? }, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<39> D_RA;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 0;
+ let Inst{8} = 0;
+ let Inst{9-10} = 0; // reserved
+ let Inst{11} = PCRel;
+ let Inst{12-13} = 0; // reserved
+ let Inst{14-31} = D_RA{33-16}; // d0
+
+ // The instruction.
+ let Inst{37} = XT{5};
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-47} = D_RA{38-34}; // RA
+ let Inst{48-63} = D_RA{15-0}; // d1
+}
+
+// X-Form: [PO T IMM VRB XO TX]
+class XForm_XT6_IMM5_VB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<5> VRB;
+ bits<5> IMM;
+
+ let Pattern = pattern;
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = IMM;
+ let Inst{16-20} = VRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = XT{5};
+}
+
+class 8RR_XX4Form_IMM8_XTAB6<bits<6> opcode, bits<2> xo,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+ bits<8> IMM;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 1;
+ let Inst{8} = 0;
+ let Inst{9-11} = 0;
+ let Inst{12-13} = 0;
+ let Inst{14-23} = 0;
+ let Inst{24-31} = IMM;
+
+ // The instruction.
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-57} = XC{4-0};
+ let Inst{58-59} = xo;
+ let Inst{60} = XC{5};
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = XT{5};
+}
+
+class VXForm_RD5_N3_VB5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RD;
+ bits<5> VB;
+ bits<3> N;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RD;
+ let Inst{11-12} = 0;
+ let Inst{13-15} = N;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+
+// VX-Form: [PO VRT / UIM RB XO].
+// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
+// "/ UIM" (unused bit followed by a 4-bit immediate)
+// Destructive (insert) forms are suffixed with _ins.
+class VXForm_VRT5_UIM5_RB5_ins<bits<11> xo, string opc, list<dag> pattern>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB),
+ !strconcat(opc, " $vD, $rB, $UIM"), IIC_VecGeneral, pattern>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+
+// VX-Form: [PO VRT RA VRB XO].
+// Destructive (insert) forms are suffixed with _ins.
+class VXForm_VTB5_RA5_ins<bits<11> xo, string opc, list<dag> pattern>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, vrrc:$vB),
+ !strconcat(opc, " $vD, $rA, $vB"), IIC_VecGeneral, pattern>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+
+// VX-Form: [PO VRT RA RB XO].
+// Destructive (insert) forms are suffixed with _ins.
+class VXForm_VRT5_RAB5_ins<bits<11> xo, string opc, list<dag> pattern>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
+ !strconcat(opc, " $vD, $rA, $rB"), IIC_VecGeneral, pattern>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+
+// VN-Form: [PO VRT VRA VRB PS SD XO]
+// SD is "Shift Direction"
+class VNForm_VTAB5_SD3<bits<6> xo, bits<2> ps, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VRT;
+ bits<5> VRA;
+ bits<5> VRB;
+ bits<3> SD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VRT;
+ let Inst{11-15} = VRA;
+ let Inst{16-20} = VRB;
+ let Inst{21-22} = ps;
+ let Inst{23-25} = SD;
+ let Inst{26-31} = xo;
+}
+
+// 8RR:D-Form: [ 1 1 0 // // imm0
+// PO T XO TX imm1 ].
+class 8RR_DForm_IMM32_XT6<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<32> IMM32;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 1;
+ let Inst{8-11} = 0;
+ let Inst{12-13} = 0; // reserved
+ let Inst{14-15} = 0; // reserved
+ let Inst{16-31} = IMM32{31-16};
+
+ // The instruction.
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-46} = xo;
+ let Inst{47} = XT{5};
+ let Inst{48-63} = IMM32{15-0};
+}
+
+// 8RR:D-Form: [ 1 1 0 // // imm0
+// PO T XO IX TX imm1 ].
+class 8RR_DForm_IMM32_XT6_IX<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bit IX;
+ bits<32> IMM32;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 1;
+ let Inst{8-11} = 0;
+ let Inst{12-13} = 0; // reserved
+ let Inst{14-15} = 0; // reserved
+ let Inst{16-31} = IMM32{31-16};
+
+ // The instruction.
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-45} = xo;
+ let Inst{46} = IX;
+ let Inst{47} = XT{5};
+ let Inst{48-63} = IMM32{15-0};
+}
+
+class 8RR_XX4Form_XTABC6<bits<6> opcode, bits<2> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 1;
+ let Inst{8-11} = 0;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = 0;
+
+ // The instruction.
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-57} = XC{4-0};
+ let Inst{58-59} = xo;
+ let Inst{60} = XC{5};
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = XT{5};
+}
+
+class 8RR_XX4Form_IMM3_XTABC6<bits<6> opcode, bits<2> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+ bits<3> IMM;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 1;
+ let Inst{8-11} = 0;
+ let Inst{12-13} = 0;
+ let Inst{14-28} = 0;
+ let Inst{29-31} = IMM;
+
+ // The instruction.
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-57} = XC{4-0};
+ let Inst{58-59} = xo;
+ let Inst{60} = XC{5};
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = XT{5};
+}
+
+// [PO BF / XO2 B XO BX /]
+class XX2_BF3_XO5_XB6_XO9<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL,
+ dag IOL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = xo2;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+multiclass MLS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
+ dag PCRel_IOL, string asmstr,
+ InstrItinClass itin> {
+ def NAME : MLS_DForm_R_SI34_RTA5_MEM<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : MLS_DForm_R_SI34_RTA5_MEM<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>,
+ isPCRel;
+}
+
+multiclass 8LS_DForm_R_SI34_RTA5_p<bits<6> opcode, dag OOL, dag IOL,
+ dag PCRel_IOL, string asmstr,
+ InstrItinClass itin> {
+ def NAME : 8LS_DForm_R_SI34_RTA5<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : 8LS_DForm_R_SI34_RTA5<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>, isPCRel;
+}
+
+multiclass 8LS_DForm_R_SI34_XT6_RA5_p<bits<5> opcode, dag OOL, dag IOL,
+ dag PCRel_IOL, string asmstr,
+ InstrItinClass itin> {
+ def NAME : 8LS_DForm_R_SI34_XT6_RA5<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : 8LS_DForm_R_SI34_XT6_RA5<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>,
+ isPCRel;
+}
+
+def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">;
+def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
+
+let Predicates = [PrefixInstrs] in {
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ defm PADDI8 :
+ MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI),
+ (ins immZero:$RA, s34imm:$SI),
+ "paddi $RT, $RA, $SI", IIC_LdStLFD>;
+ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ def PLI8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT),
+ (ins s34imm:$SI),
+ "pli $RT, $SI", IIC_IntSimple, []>;
+ }
+ }
+ defm PADDI :
+ MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI),
+ (ins immZero:$RA, s34imm:$SI),
+ "paddi $RT, $RA, $SI", IIC_LdStLFD>;
+ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ def PLI : MLS_DForm_SI34_RT5<14, (outs gprc:$RT),
+ (ins s34imm:$SI),
+ "pli $RT, $SI", IIC_IntSimple, []>;
+ }
+
+ let mayLoad = 1, mayStore = 0 in {
+ defm PLXV :
+ 8LS_DForm_R_SI34_XT6_RA5_p<25, (outs vsrc:$XT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plxv $XT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLFS :
+ MLS_DForm_R_SI34_RTA5_MEM_p<48, (outs f4rc:$FRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plfs $FRT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLFD :
+ MLS_DForm_R_SI34_RTA5_MEM_p<50, (outs f8rc:$FRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plfd $FRT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLXSSP :
+ 8LS_DForm_R_SI34_RTA5_p<43, (outs vfrc:$VRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plxssp $VRT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLXSD :
+ 8LS_DForm_R_SI34_RTA5_p<42, (outs vfrc:$VRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plxsd $VRT, $D_RA",
+ IIC_LdStLFD>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ defm PLBZ8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHZ8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHA8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLWA8 :
+ 8LS_DForm_R_SI34_RTA5_p<41, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plwa $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLWZ8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA",
+ IIC_LdStLFD>;
+ }
+ defm PLBZ :
+ MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHZ :
+ MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHA :
+ MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLWZ :
+ MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLWA :
+ 8LS_DForm_R_SI34_RTA5_p<41, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plwa $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLD :
+ 8LS_DForm_R_SI34_RTA5_p<57, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "pld $RT, $D_RA",
+ IIC_LdStLFD>;
+ }
+
+ let mayStore = 1, mayLoad = 0 in {
+ defm PSTXV :
+ 8LS_DForm_R_SI34_XT6_RA5_p<27, (outs), (ins vsrc:$XS, memri34:$D_RA),
+ (ins vsrc:$XS, memri34_pcrel:$D_RA),
+ "pstxv $XS, $D_RA", IIC_LdStLFD>;
+ defm PSTFS :
+ MLS_DForm_R_SI34_RTA5_MEM_p<52, (outs), (ins f4rc:$FRS, memri34:$D_RA),
+ (ins f4rc:$FRS, memri34_pcrel:$D_RA),
+ "pstfs $FRS, $D_RA", IIC_LdStLFD>;
+ defm PSTFD :
+ MLS_DForm_R_SI34_RTA5_MEM_p<54, (outs), (ins f8rc:$FRS, memri34:$D_RA),
+ (ins f8rc:$FRS, memri34_pcrel:$D_RA),
+ "pstfd $FRS, $D_RA", IIC_LdStLFD>;
+ defm PSTXSSP :
+ 8LS_DForm_R_SI34_RTA5_p<47, (outs), (ins vfrc:$VRS, memri34:$D_RA),
+ (ins vfrc:$VRS, memri34_pcrel:$D_RA),
+ "pstxssp $VRS, $D_RA", IIC_LdStLFD>;
+ defm PSTXSD :
+ 8LS_DForm_R_SI34_RTA5_p<46, (outs), (ins vfrc:$VRS, memri34:$D_RA),
+ (ins vfrc:$VRS, memri34_pcrel:$D_RA),
+ "pstxsd $VRS, $D_RA", IIC_LdStLFD>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ defm PSTB8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "pstb $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTH8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "psth $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTW8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "pstw $RS, $D_RA", IIC_LdStLFD>;
+ }
+ defm PSTB :
+ MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins gprc:$RS, memri34:$D_RA),
+ (ins gprc:$RS, memri34_pcrel:$D_RA),
+ "pstb $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTH :
+ MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins gprc:$RS, memri34:$D_RA),
+ (ins gprc:$RS, memri34_pcrel:$D_RA),
+ "psth $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTW :
+ MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins gprc:$RS, memri34:$D_RA),
+ (ins gprc:$RS, memri34_pcrel:$D_RA),
+ "pstw $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTD :
+ 8LS_DForm_R_SI34_RTA5_p<61, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "pstd $RS, $D_RA", IIC_LdStLFD>;
+ }
+}
+
+// TODO: We have an added complexity of 500 here. This is only a temporary
+// solution to have tablegen consider these patterns first. The way we do
+// addressing for PowerPC is complex depending on available D form, X form, or
+// aligned D form loads/stores like DS and DQ forms. The prefixed
+// instructions in this file also add additional PC Relative loads/stores
+// and D form loads/stores with 34 bit immediates. It is very difficult to force
+// instruction selection to consistently pick these first without the current
+// added complexity. Once pc-relative implementation is complete, a set of
+// follow-up patches will address this refactoring and the AddedComplexity will
+// be removed.
+let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
+ // Load i32
+ def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLBZpc $ga, 0)>;
+ def : Pat<(i32 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLBZpc $ga, 0)>;
+ def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHApc $ga, 0)>;
+ def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHZpc $ga, 0)>;
+ def : Pat<(i32 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHZpc $ga, 0)>;
+ def : Pat<(i32 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLWZpc $ga, 0)>;
+
+ // Store i32
+ def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTBpc $RS, $ga, 0)>;
+ def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTHpc $RS, $ga, 0)>;
+ def : Pat<(store i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTWpc $RS, $ga, 0)>;
+
+ // Load i64
+ def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLBZ8pc $ga, 0)>;
+ def : Pat<(i64 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLBZ8pc $ga, 0)>;
+ def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHA8pc $ga, 0)>;
+ def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHZ8pc $ga, 0)>;
+ def : Pat<(i64 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHZ8pc $ga, 0)>;
+ def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLWZ8pc $ga, 0)>;
+ def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLWA8pc $ga, 0)>;
+ def : Pat<(i64 (extloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLWZ8pc $ga, 0)>;
+ def : Pat<(i64 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLDpc $ga, 0)>;
+
+ // Store i64
+ def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTB8pc $RS, $ga, 0)>;
+ def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTH8pc $RS, $ga, 0)>;
+ def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTW8pc $RS, $ga, 0)>;
+ def : Pat<(store i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTDpc $RS, $ga, 0)>;
+
+ // Load f32
+ def : Pat<(f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFSpc $addr, 0)>;
+
+ // Store f32
+ def : Pat<(store f32:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTFSpc $FRS, $ga, 0)>;
+
+ // Load f64
+ def : Pat<(f64 (extloadf32 (PPCmatpcreladdr pcreladdr:$addr))),
+ (COPY_TO_REGCLASS (PLFSpc $addr, 0), VSFRC)>;
+ def : Pat<(f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFDpc $addr, 0)>;
+
+ // Store f64
+ def : Pat<(store f64:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTFDpc $FRS, $ga, 0)>;
+
+ // Load f128
+ def : Pat<(f128 (load (PPCmatpcreladdr pcreladdr:$addr))),
+ (COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>;
+
+ // Store f128
+ def : Pat<(store f128:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTXVpc (COPY_TO_REGCLASS $XS, VSRC), $ga, 0)>;
+
+ // Load v4i32
+ def : Pat<(v4i32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+
+ // Store v4i32
+ def : Pat<(store v4i32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTXVpc $XS, $ga, 0)>;
+
+ // Load v2i64
+ def : Pat<(v2i64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+
+ // Store v2i64
+ def : Pat<(store v2i64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTXVpc $XS, $ga, 0)>;
+
+ // Load v4f32
+ def : Pat<(v4f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+
+ // Store v4f32
+ def : Pat<(store v4f32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTXVpc $XS, $ga, 0)>;
+
+ // Load v2f64
+ def : Pat<(v2f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+
+ // Store v2f64
+ def : Pat<(store v2f64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTXVpc $XS, $ga, 0)>;
+
+ // Atomic Load
+ def : Pat<(atomic_load_8 (PPCmatpcreladdr pcreladdr:$ga)),
+ (PLBZpc $ga, 0)>;
+ def : Pat<(atomic_load_16 (PPCmatpcreladdr pcreladdr:$ga)),
+ (PLHZpc $ga, 0)>;
+ def : Pat<(atomic_load_32 (PPCmatpcreladdr pcreladdr:$ga)),
+ (PLWZpc $ga, 0)>;
+ def : Pat<(atomic_load_64 (PPCmatpcreladdr pcreladdr:$ga)),
+ (PLDpc $ga, 0)>;
+
+ // Atomic Store
+ def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ (PSTBpc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ (PSTHpc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ (PSTWpc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ (PSTB8pc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ (PSTH8pc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ (PSTW8pc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_64 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ (PSTDpc $RS, $ga, 0)>;
+
+ // Special Cases For PPCstore_scal_int_from_vsr
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)),
+ (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>;
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)),
+ (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>;
+
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)),
+ (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>;
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)),
+ (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>;
+
+ // If the PPCmatpcreladdr node is not caught by any other pattern it should be
+ // caught here and turned into a paddi instruction to materialize the address.
+ def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+}
+
+let Predicates = [PrefixInstrs] in {
+ def XXSPLTIW : 8RR_DForm_IMM32_XT6<32, 3, (outs vsrc:$XT),
+ (ins i32imm:$IMM32),
+ "xxspltiw $XT, $IMM32", IIC_VecGeneral,
+ []>;
+ def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT),
+ (ins i32imm:$IMM32),
+ "xxspltidp $XT, $IMM32", IIC_VecGeneral,
+ [(set v2f64:$XT,
+ (PPCxxspltidp i32:$IMM32))]>;
+ def XXSPLTI32DX :
+ 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),
+ (ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32),
+ "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral,
+ [(set v2i64:$XT,
+ (PPCxxsplti32dx v2i64:$XTi, i32:$IX,
+ i32:$IMM32))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XXPERMX :
+ 8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC, u3imm:$UIM),
+ "xxpermx $XT, $XA, $XB, $XC, $UIM",
+ IIC_VecPerm, []>;
+ def XXBLENDVB :
+ 8RR_XX4Form_XTABC6<33, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC), "xxblendvb $XT, $XA, $XB, $XC",
+ IIC_VecGeneral, []>;
+ def XXBLENDVH :
+ 8RR_XX4Form_XTABC6<33, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC), "xxblendvh $XT, $XA, $XB, $XC",
+ IIC_VecGeneral, []>;
+ def XXBLENDVW :
+ 8RR_XX4Form_XTABC6<33, 2, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC), "xxblendvw $XT, $XA, $XB, $XC",
+ IIC_VecGeneral, []>;
+ def XXBLENDVD :
+ 8RR_XX4Form_XTABC6<33, 3, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC), "xxblendvd $XT, $XA, $XB, $XC",
+ IIC_VecGeneral, []>;
+}
+
+let Predicates = [IsISA3_1] in {
+ def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT),
+ (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
+ "vsldbi $VRT, $VRA, $VRB, $SH",
+ IIC_VecGeneral,
+ [(set v16i8:$VRT,
+ (int_ppc_altivec_vsldbi v16i8:$VRA,
+ v16i8:$VRB,
+ i32:$SH))]>;
+ def VSRDBI : VNForm_VTAB5_SD3<22, 1, (outs vrrc:$VRT),
+ (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
+ "vsrdbi $VRT, $VRA, $VRB, $SH",
+ IIC_VecGeneral,
+ [(set v16i8:$VRT,
+ (int_ppc_altivec_vsrdbi v16i8:$VRA,
+ v16i8:$VRB,
+ i32:$SH))]>;
+ def VINSW :
+ VXForm_VRT5_UIM5_RB5_ins<207, "vinsw",
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinsw v4i32:$vDi, i64:$rB,
+ timm:$UIM))]>;
+ def VINSD :
+ VXForm_VRT5_UIM5_RB5_ins<463, "vinsd",
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB,
+ timm:$UIM))]>;
+ def VINSBVLX :
+ VXForm_VTB5_RA5_ins<15, "vinsbvlx",
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vinsbvlx v16i8:$vDi, i64:$rA,
+ v16i8:$vB))]>;
+ def VINSBVRX :
+ VXForm_VTB5_RA5_ins<271, "vinsbvrx",
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vinsbvrx v16i8:$vDi, i64:$rA,
+ v16i8:$vB))]>;
+ def VINSHVLX :
+ VXForm_VTB5_RA5_ins<79, "vinshvlx",
+ [(set v8i16:$vD,
+ (int_ppc_altivec_vinshvlx v8i16:$vDi, i64:$rA,
+ v8i16:$vB))]>;
+ def VINSHVRX :
+ VXForm_VTB5_RA5_ins<335, "vinshvrx",
+ [(set v8i16:$vD,
+ (int_ppc_altivec_vinshvrx v8i16:$vDi, i64:$rA,
+ v8i16:$vB))]>;
+ def VINSWVLX :
+ VXForm_VTB5_RA5_ins<143, "vinswvlx",
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinswvlx v4i32:$vDi, i64:$rA,
+ v4i32:$vB))]>;
+ def VINSWVRX :
+ VXForm_VTB5_RA5_ins<399, "vinswvrx",
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinswvrx v4i32:$vDi, i64:$rA,
+ v4i32:$vB))]>;
+ def VINSBLX :
+ VXForm_VRT5_RAB5_ins<527, "vinsblx",
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vinsblx v16i8:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSBRX :
+ VXForm_VRT5_RAB5_ins<783, "vinsbrx",
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vinsbrx v16i8:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSHLX :
+ VXForm_VRT5_RAB5_ins<591, "vinshlx",
+ [(set v8i16:$vD,
+ (int_ppc_altivec_vinshlx v8i16:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSHRX :
+ VXForm_VRT5_RAB5_ins<847, "vinshrx",
+ [(set v8i16:$vD,
+ (int_ppc_altivec_vinshrx v8i16:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSWLX :
+ VXForm_VRT5_RAB5_ins<655, "vinswlx",
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinswlx v4i32:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSWRX :
+ VXForm_VRT5_RAB5_ins<911, "vinswrx",
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinswrx v4i32:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSDLX :
+ VXForm_VRT5_RAB5_ins<719, "vinsdlx",
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSDRX :
+ VXForm_VRT5_RAB5_ins<975, "vinsdrx",
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA,
+ i64:$rB))]>;
+
+ def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vpdepd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vpdepd v2i64:$vA, v2i64:$vB))]>;
+ def VPEXTD : VXForm_1<1421, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vpextd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vpextd v2i64:$vA, v2i64:$vB))]>;
+ def PDEPD : XForm_6<31, 156, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "pdepd $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_pdepd i64:$rS, i64:$rB))]>;
+ def PEXTD : XForm_6<31, 188, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "pextd $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_pextd i64:$rS, i64:$rB))]>;
+ def VCFUGED : VXForm_1<1357, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vcfuged $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vcfuged v2i64:$vA, v2i64:$vB))]>;
+ def VGNB : VXForm_RD5_N3_VB5<1228, (outs g8rc:$rD), (ins vrrc:$vB, u3imm:$N),
+ "vgnb $rD, $vB, $N", IIC_VecGeneral,
+ [(set i64:$rD,
+ (int_ppc_altivec_vgnb v1i128:$vB, timm:$N))]>;
+ def CFUGED : XForm_6<31, 220, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "cfuged $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_cfuged i64:$rS, i64:$rB))]>;
+ def XXEVAL :
+ 8RR_XX4Form_IMM8_XTAB6<34, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC, u8imm:$IMM),
+ "xxeval $XT, $XA, $XB, $XC, $IMM", IIC_VecGeneral,
+ [(set v2i64:$XT, (int_ppc_vsx_xxeval v2i64:$XA,
+ v2i64:$XB, v2i64:$XC, timm:$IMM))]>;
+ def VCLZDM : VXForm_1<1924, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vclzdm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vclzdm v2i64:$vA, v2i64:$vB))]>;
+ def VCTZDM : VXForm_1<1988, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vctzdm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vctzdm v2i64:$vA, v2i64:$vB))]>;
+ def CNTLZDM : XForm_6<31, 59, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "cntlzdm $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA,
+ (int_ppc_cntlzdm i64:$rS, i64:$rB))]>;
+ def CNTTZDM : XForm_6<31, 571, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "cnttzdm $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA,
+ (int_ppc_cnttzdm i64:$rS, i64:$rB))]>;
+ def XXGENPCVBM :
+ XForm_XT6_IMM5_VB5<60, 916, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM),
+ "xxgenpcvbm $XT, $VRB, $IMM", IIC_VecGeneral, []>;
+ def XXGENPCVHM :
+ XForm_XT6_IMM5_VB5<60, 917, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM),
+ "xxgenpcvhm $XT, $VRB, $IMM", IIC_VecGeneral, []>;
+ def XXGENPCVWM :
+ XForm_XT6_IMM5_VB5<60, 948, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM),
+ "xxgenpcvwm $XT, $VRB, $IMM", IIC_VecGeneral, []>;
+ def XXGENPCVDM :
+ XForm_XT6_IMM5_VB5<60, 949, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM),
+ "xxgenpcvdm $XT, $VRB, $IMM", IIC_VecGeneral, []>;
+ def VCLRLB : VXForm_1<397, (outs vrrc:$vD), (ins vrrc:$vA, gprc:$rB),
+ "vclrlb $vD, $vA, $rB", IIC_VecGeneral,
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vclrlb v16i8:$vA, i32:$rB))]>;
+ def VCLRRB : VXForm_1<461, (outs vrrc:$vD), (ins vrrc:$vA, gprc:$rB),
+ "vclrrb $vD, $vA, $rB", IIC_VecGeneral,
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>;
+
+ def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB),
+ "xvtlsbb $BF, $XB", IIC_VecGeneral, []>;
+
+ // The XFormMemOp flag for the following 8 instructions is set on
+ // the instruction format.
+ let mayLoad = 1, mayStore = 0 in {
+ def LXVRBX : X_XT6_RA5_RB5<31, 13, "lxvrbx", vsrc, []>;
+ def LXVRHX : X_XT6_RA5_RB5<31, 45, "lxvrhx", vsrc, []>;
+ def LXVRWX : X_XT6_RA5_RB5<31, 77, "lxvrwx", vsrc, []>;
+ def LXVRDX : X_XT6_RA5_RB5<31, 109, "lxvrdx", vsrc, []>;
+ }
+
+ let mayLoad = 0, mayStore = 1 in {
+ def STXVRBX : X_XS6_RA5_RB5<31, 141, "stxvrbx", vsrc, []>;
+ def STXVRHX : X_XS6_RA5_RB5<31, 173, "stxvrhx", vsrc, []>;
+ def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>;
+ def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>;
+ }
+}
+
+//---------------------------- Anonymous Patterns ----------------------------//
+let Predicates = [IsISA3_1] in {
+ def : Pat<(v16i8 (int_ppc_vsx_xxgenpcvbm v16i8:$VRB, imm:$IMM)),
+ (v16i8 (COPY_TO_REGCLASS (XXGENPCVBM $VRB, imm:$IMM), VRRC))>;
+ def : Pat<(v8i16 (int_ppc_vsx_xxgenpcvhm v8i16:$VRB, imm:$IMM)),
+ (v8i16 (COPY_TO_REGCLASS (XXGENPCVHM $VRB, imm:$IMM), VRRC))>;
+ def : Pat<(v4i32 (int_ppc_vsx_xxgenpcvwm v4i32:$VRB, imm:$IMM)),
+ (v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>;
+ def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)),
+ (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>;
+ def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, -1)),
+ (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
+ def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
+ (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
+}
+
+let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
+ def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
+ i32immNonAllOneNonZero:$A,
+ i32immNonAllOneNonZero:$A,
+ i32immNonAllOneNonZero:$A)),
+ (v4i32 (XXSPLTIW imm:$A))>;
+ def : Pat<(f32 nzFPImmAsi32:$A),
+ (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
+ VSFRC)>;
+ def : Pat<(f64 nzFPImmAsi32:$A),
+ (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
+ VSFRC)>;
+}
+
+let Predicates = [PrefixInstrs] in {
+ def : Pat<(v16i8 (int_ppc_vsx_xxpermx v16i8:$A, v16i8:$B, v16i8:$C, timm:$D)),
+ (COPY_TO_REGCLASS (XXPERMX (COPY_TO_REGCLASS $A, VSRC),
+ (COPY_TO_REGCLASS $B, VSRC),
+ (COPY_TO_REGCLASS $C, VSRC), $D), VSRC)>;
+ def : Pat<(v16i8 (int_ppc_vsx_xxblendvb v16i8:$A, v16i8:$B, v16i8:$C)),
+ (COPY_TO_REGCLASS
+ (XXBLENDVB (COPY_TO_REGCLASS $A, VSRC),
+ (COPY_TO_REGCLASS $B, VSRC),
+ (COPY_TO_REGCLASS $C, VSRC)), VSRC)>;
+ def : Pat<(v8i16 (int_ppc_vsx_xxblendvh v8i16:$A, v8i16:$B, v8i16:$C)),
+ (COPY_TO_REGCLASS
+ (XXBLENDVH (COPY_TO_REGCLASS $A, VSRC),
+ (COPY_TO_REGCLASS $B, VSRC),
+ (COPY_TO_REGCLASS $C, VSRC)), VSRC)>;
+ def : Pat<(int_ppc_vsx_xxblendvw v4i32:$A, v4i32:$B, v4i32:$C),
+ (XXBLENDVW $A, $B, $C)>;
+ def : Pat<(int_ppc_vsx_xxblendvd v2i64:$A, v2i64:$B, v2i64:$C),
+ (XXBLENDVD $A, $B, $C)>;
+}
+
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td
index d67041d46d9f..2265af2815cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -1,9 +1,9 @@
//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
-//
+//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the QPX extension to the PowerPC instruction set.
@@ -101,7 +101,7 @@ let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs.
//===----------------------------------------------------------------------===//
// Instruction Definitions.
-def HasQPX : Predicate<"PPCSubTarget->hasQPX()">;
+def HasQPX : Predicate<"Subtarget->hasQPX()">;
let Predicates = [HasQPX] in {
let DecoderNamespace = "QPX" in {
let hasSideEffects = 0 in { // QPX instructions don't have side effects.
@@ -167,48 +167,48 @@ let Uses = [RM] in {
// Multiply-add instructions
def QVFMADD : AForm_1<4, 29,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
"qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>;
let isCodeGenOnly = 1 in
def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>;
def QVFMADDSs : AForm_1<0, 29,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
"qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>;
def QVFNMADD : AForm_1<4, 31,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
"qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
v4f64:$FRB)))]>;
let isCodeGenOnly = 1 in
def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>;
def QVFNMADDSs : AForm_1<0, 31,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
"qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
v4f32:$FRB)))]>;
def QVFMSUB : AForm_1<4, 28,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
"qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC,
(fneg v4f64:$FRB)))]>;
let isCodeGenOnly = 1 in
def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>;
def QVFMSUBSs : AForm_1<0, 28,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
"qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC,
(fneg v4f32:$FRB)))]>;
def QVFNMSUB : AForm_1<4, 30,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
"qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
(fneg v4f64:$FRB))))]>;
let isCodeGenOnly = 1 in
def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>;
def QVFNMSUBSs : AForm_1<0, 30,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
"qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
(fneg v4f32:$FRB))))]>;
@@ -899,13 +899,13 @@ def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B),
// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B),
- (QVFNMSUB $A, $B, $C)>;
+ (QVFNMSUB $A, $C, $B)>;
def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B),
- (QVFNMSUB $A, $B, $C)>;
+ (QVFNMSUB $A, $C, $B)>;
def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
- (QVFNMSUBSs $A, $B, $C)>;
+ (QVFNMSUBSs $A, $C, $B)>;
def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
- (QVFNMSUBSs $A, $B, $C)>;
+ (QVFNMSUBSs $A, $C, $B)>;
def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C),
(QVFMADD $A, $B, $C)>;
@@ -1210,4 +1210,3 @@ def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
(QVFTSTNANbs $FRB, $FRB), (i32 7)),
$FRB, $FRA)>;
}
-
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 935c3044ae47..858eb0c9fe50 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -158,7 +158,7 @@ def EFDCFSF : EFXForm_2a<755, (outs sperc:$RT), (ins spe4rc:$RB),
def EFDCFSI : EFXForm_2a<753, (outs sperc:$RT), (ins gprc:$RB),
"efdcfsi $RT, $RB", IIC_FPDGeneral,
- [(set f64:$RT, (sint_to_fp i32:$RB))]>;
+ [(set f64:$RT, (any_sint_to_fp i32:$RB))]>;
def EFDCFSID : EFXForm_2a<739, (outs sperc:$RT), (ins gprc:$RB),
"efdcfsid $RT, $RB", IIC_FPDGeneral,
@@ -169,7 +169,7 @@ def EFDCFUF : EFXForm_2a<754, (outs sperc:$RT), (ins spe4rc:$RB),
def EFDCFUI : EFXForm_2a<752, (outs sperc:$RT), (ins gprc:$RB),
"efdcfui $RT, $RB", IIC_FPDGeneral,
- [(set f64:$RT, (uint_to_fp i32:$RB))]>;
+ [(set f64:$RT, (any_uint_to_fp i32:$RB))]>;
def EFDCFUID : EFXForm_2a<738, (outs sperc:$RT), (ins gprc:$RB),
"efdcfuid $RT, $RB", IIC_FPDGeneral,
@@ -197,7 +197,7 @@ def EFDCTSIDZ : EFXForm_2a<747, (outs gprc:$RT), (ins sperc:$RB),
def EFDCTSIZ : EFXForm_2a<762, (outs gprc:$RT), (ins sperc:$RB),
"efdctsiz $RT, $RB", IIC_FPDGeneral,
- [(set i32:$RT, (fp_to_sint f64:$RB))]>;
+ [(set i32:$RT, (any_fp_to_sint f64:$RB))]>;
def EFDCTUF : EFXForm_2a<758, (outs sperc:$RT), (ins spe4rc:$RB),
"efdctuf $RT, $RB", IIC_FPDGeneral, []>;
@@ -212,7 +212,7 @@ def EFDCTUIDZ : EFXForm_2a<746, (outs gprc:$RT), (ins sperc:$RB),
def EFDCTUIZ : EFXForm_2a<760, (outs gprc:$RT), (ins sperc:$RB),
"efdctuiz $RT, $RB", IIC_FPDGeneral,
- [(set i32:$RT, (fp_to_uint f64:$RB))]>;
+ [(set i32:$RT, (any_fp_to_uint f64:$RB))]>;
def EFDDIV : EFXForm_1<745, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
"efddiv $RT, $RA, $RB", IIC_FPDivD,
@@ -261,14 +261,14 @@ def EFSCFSF : EFXForm_2a<723, (outs spe4rc:$RT), (ins spe4rc:$RB),
def EFSCFSI : EFXForm_2a<721, (outs spe4rc:$RT), (ins gprc:$RB),
"efscfsi $RT, $RB", IIC_FPSGeneral,
- [(set f32:$RT, (sint_to_fp i32:$RB))]>;
+ [(set f32:$RT, (any_sint_to_fp i32:$RB))]>;
def EFSCFUF : EFXForm_2a<722, (outs spe4rc:$RT), (ins spe4rc:$RB),
"efscfuf $RT, $RB", IIC_FPSGeneral, []>;
def EFSCFUI : EFXForm_2a<720, (outs spe4rc:$RT), (ins gprc:$RB),
"efscfui $RT, $RB", IIC_FPSGeneral,
- [(set f32:$RT, (uint_to_fp i32:$RB))]>;
+ [(set f32:$RT, (any_uint_to_fp i32:$RB))]>;
let isCompare = 1 in {
def EFSCMPEQ : EFXForm_3<718, (outs crrc:$crD), (ins spe4rc:$RA, spe4rc:$RB),
@@ -288,7 +288,7 @@ def EFSCTSI : EFXForm_2a<725, (outs gprc:$RT), (ins spe4rc:$RB),
def EFSCTSIZ : EFXForm_2a<730, (outs gprc:$RT), (ins spe4rc:$RB),
"efsctsiz $RT, $RB", IIC_FPSGeneral,
- [(set i32:$RT, (fp_to_sint f32:$RB))]>;
+ [(set i32:$RT, (any_fp_to_sint f32:$RB))]>;
def EFSCTUF : EFXForm_2a<726, (outs sperc:$RT), (ins spe4rc:$RB),
"efsctuf $RT, $RB", IIC_FPSGeneral, []>;
@@ -299,7 +299,7 @@ def EFSCTUI : EFXForm_2a<724, (outs gprc:$RT), (ins spe4rc:$RB),
def EFSCTUIZ : EFXForm_2a<728, (outs gprc:$RT), (ins spe4rc:$RB),
"efsctuiz $RT, $RB", IIC_FPSGeneral,
- [(set i32:$RT, (fp_to_uint f32:$RB))]>;
+ [(set i32:$RT, (any_fp_to_uint f32:$RB))]>;
def EFSDIV : EFXForm_1<713, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB),
"efsdiv $RT, $RA, $RB", IIC_FPDivD,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 95e5ff6b130d..9ba5058a6f81 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1,9 +1,9 @@
//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
-//
+//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the VSX extension to the PowerPC instruction set.
@@ -25,6 +25,32 @@
// ** in PPCVSXSwapRemoval::gatherVectorInstructions(). **
// ****************************************************************************
+// *********************************** NOTE ***********************************
+// ** When adding new anonymous patterns to this file, please add them to **
+// ** the section titled Anonymous Patterns. Chances are that the existing **
+// ** predicate blocks already contain a combination of features that you **
+// ** are after. There is a list of blocks at the top of the section. If **
+// ** you definitely need a new combination of predicates, please add that **
+// ** combination to the list. **
+// ** File Structure: **
+// ** - Custom PPCISD node definitions **
+// ** - Predicate definitions: predicates to specify the subtargets for **
+// ** which an instruction or pattern can be emitted. **
+// ** - Instruction formats: classes instantiated by the instructions. **
+// ** These generally correspond to instruction formats in section 1.6 of **
+// ** the ISA document. **
+// ** - Instruction definitions: the actual definitions of the instructions **
+// ** often including input patterns that they match. **
+// ** - Helper DAG definitions: We define a number of dag objects to use as **
+// ** input or output patterns for consciseness of the code. **
+// ** - Anonymous patterns: input patterns that an instruction matches can **
+// ** often not be specified as part of the instruction definition, so an **
+// ** anonymous pattern must be specified mapping an input pattern to an **
+// ** output pattern. These are generally guarded by subtarget predicates. **
+// ** - Instruction aliases: used to define extended mnemonics for assembly **
+// ** printing (for example: xxswapd for xxpermdi with 0x2 as the imm). **
+// ****************************************************************************
+
def PPCRegVSRCAsmOperand : AsmOperandClass {
let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
}
@@ -89,6 +115,7 @@ def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [
SDTCisVec<0>, SDTCisPtrTy<1>
]>;
+//--------------------------- Custom PPC nodes -------------------------------//
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
@@ -111,7 +138,24 @@ def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-
+def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED",
+ SDTypeProfile<1, 1, []>, []>;
+
+//-------------------------- Predicate definitions ---------------------------//
+def HasVSX : Predicate<"Subtarget->hasVSX()">;
+def IsLittleEndian : Predicate<"Subtarget->isLittleEndian()">;
+def IsBigEndian : Predicate<"!Subtarget->isLittleEndian()">;
+def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">;
+def HasP8Vector : Predicate<"Subtarget->hasP8Vector()">;
+def HasDirectMove : Predicate<"Subtarget->hasDirectMove()">;
+def NoP9Vector : Predicate<"!Subtarget->hasP9Vector()">;
+def HasP9Vector : Predicate<"Subtarget->hasP9Vector()">;
+def NoP9Altivec : Predicate<"!Subtarget->hasP9Altivec()">;
+
+//--------------------- VSX-specific instruction formats ---------------------//
+// By default, all VSX instructions are to be selected over their Altivec
+// counter parts and they do not have unmodeled sideeffects.
+let AddedComplexity = 400, hasSideEffects = 0 in {
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
ValueType OutTy, ValueType InTy> {
@@ -144,14 +188,119 @@ class XX3Form_2s<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let XB = XA;
}
-def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
-def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
-def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
-def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
+let Predicates = [HasVSX, HasP9Vector] in {
+class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB),
+ !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
+class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isRecordForm;
+
+// [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
+// So we use different operand class for VRB
+class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ RegisterOperand vbtype, list<dag> pattern>
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
+ !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT XO VRB XO /]
+class X_VT5_XO5_VB5_VSFR<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$vT), (ins vrrc:$vB),
+ !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
+class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isRecordForm;
+
+// [PO T XO B XO BX /]
+class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
+ list<dag> pattern>
+ : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$rT), (ins vsfrc:$XB),
+ !strconcat(opc, " $rT, $XB"), IIC_VecFP, pattern>;
-let Predicates = [HasVSX] in {
-let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-let hasSideEffects = 0 in { // VSX instructions don't have side effects.
+// [PO T XO B XO BX TX]
+class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
+ RegisterOperand vtype, list<dag> pattern>
+ : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB),
+ !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>;
+
+// [PO T A B XO AX BX TX], src and dest register use different operand class
+class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc,
+ RegisterOperand xty, RegisterOperand aty, RegisterOperand bty,
+ InstrItinClass itin, list<dag> pattern>
+ : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
+ !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
+
+// [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isRecordForm;
+
+// [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vTi, vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>,
+ RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">;
+
+// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isRecordForm;
+
+class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc,
+ list<dag> pattern>
+ : Z23Form_8<opcode, xo,
+ (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc),
+ !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> {
+ let RC = ex;
+}
+
+// [PO BF // VRA VRB XO /]
+class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB),
+ !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> {
+ let Pattern = pattern;
+}
+
+// [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different
+// "out" and "in" dag
+class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
+ RegisterOperand vtype, list<dag> pattern>
+ : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
+ !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
+
+// [PO S RA RB XO SX]
+class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
+ RegisterOperand vtype, list<dag> pattern>
+ : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
+ !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
+} // Predicates = HasP9Vector
+} // AddedComplexity = 400, hasSideEffects = 0
+
+multiclass ScalToVecWPermute<ValueType Ty, dag In, dag NonPermOut, dag PermOut> {
+ def : Pat<(Ty (scalar_to_vector In)), (Ty NonPermOut)>;
+ def : Pat<(Ty (PPCSToV In)), (Ty PermOut)>;
+}
+
+//-------------------------- Instruction definitions -------------------------//
+// VSX instructions require the VSX feature, they are to be selected over
+// equivalent Altivec patterns (as they address a larger register set) and
+// they do not have unmodeled side effects.
+let Predicates = [HasVSX], AddedComplexity = 400 in {
+let hasSideEffects = 0 in {
// Load indexed instructions
let mayLoad = 1, mayStore = 0 in {
@@ -213,53 +362,53 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
}
} // mayStore
- let Uses = [RM] in {
+ let Uses = [RM], mayRaiseFPException = 1 in {
// Add/Mul Instructions
let isCommutable = 1 in {
def XSADDDP : XX3Form<60, 32,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xsadddp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>;
+ [(set f64:$XT, (any_fadd f64:$XA, f64:$XB))]>;
def XSMULDP : XX3Form<60, 48,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xsmuldp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>;
+ [(set f64:$XT, (any_fmul f64:$XA, f64:$XB))]>;
def XVADDDP : XX3Form<60, 96,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvadddp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fadd v2f64:$XA, v2f64:$XB))]>;
def XVADDSP : XX3Form<60, 64,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvaddsp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fadd v4f32:$XA, v4f32:$XB))]>;
def XVMULDP : XX3Form<60, 112,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvmuldp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fmul v2f64:$XA, v2f64:$XB))]>;
def XVMULSP : XX3Form<60, 80,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvmulsp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fmul v4f32:$XA, v4f32:$XB))]>;
}
// Subtract Instructions
def XSSUBDP : XX3Form<60, 40,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xssubdp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>;
+ [(set f64:$XT, (any_fsub f64:$XA, f64:$XB))]>;
def XVSUBDP : XX3Form<60, 104,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvsubdp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fsub v2f64:$XA, v2f64:$XB))]>;
def XVSUBSP : XX3Form<60, 72,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvsubsp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fsub v4f32:$XA, v4f32:$XB))]>;
// FMA Instructions
let BaseName = "XSMADDADP" in {
@@ -267,7 +416,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSMADDADP : XX3Form<60, 33,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
+ [(set f64:$XT, (any_fma f64:$XA, f64:$XB, f64:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -283,7 +432,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSMSUBADP : XX3Form<60, 49,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
+ [(set f64:$XT, (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -299,7 +448,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSNMADDADP : XX3Form<60, 161,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
+ [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -315,7 +464,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSNMSUBADP : XX3Form<60, 177,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
+ [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -331,7 +480,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVMADDADP : XX3Form<60, 97,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
+ [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -347,7 +496,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVMADDASP : XX3Form<60, 65,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmaddasp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
+ [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -363,7 +512,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVMSUBADP : XX3Form<60, 113,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
+ [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -379,7 +528,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVMSUBASP : XX3Form<60, 81,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
+ [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -395,7 +544,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVNMADDADP : XX3Form<60, 225,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
+ [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -427,7 +576,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVNMSUBADP : XX3Form<60, 241,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
+ [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -443,7 +592,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVNMSUBASP : XX3Form<60, 209,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
+ [(set v4f32:$XT, (fneg (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -458,11 +607,11 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSDIVDP : XX3Form<60, 56,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xsdivdp $XT, $XA, $XB", IIC_FPDivD,
- [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>;
+ [(set f64:$XT, (any_fdiv f64:$XA, f64:$XB))]>;
def XSSQRTDP : XX2Form<60, 75,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xssqrtdp $XT, $XB", IIC_FPSqrtD,
- [(set f64:$XT, (fsqrt f64:$XB))]>;
+ [(set f64:$XT, (any_fsqrt f64:$XB))]>;
def XSREDP : XX2Form<60, 90,
(outs vsfrc:$XT), (ins vsfrc:$XB),
@@ -483,20 +632,20 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVDIVDP : XX3Form<60, 120,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvdivdp $XT, $XA, $XB", IIC_FPDivD,
- [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fdiv v2f64:$XA, v2f64:$XB))]>;
def XVDIVSP : XX3Form<60, 88,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvdivsp $XT, $XA, $XB", IIC_FPDivS,
- [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fdiv v4f32:$XA, v4f32:$XB))]>;
def XVSQRTDP : XX2Form<60, 203,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvsqrtdp $XT, $XB", IIC_FPSqrtD,
- [(set v2f64:$XT, (fsqrt v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fsqrt v2f64:$XB))]>;
def XVSQRTSP : XX2Form<60, 139,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvsqrtsp $XT, $XB", IIC_FPSqrtS,
- [(set v4f32:$XT, (fsqrt v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fsqrt v4f32:$XB))]>;
def XVTDIVDP : XX3Form_1<60, 125,
(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
@@ -740,65 +889,65 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSRDPI : XX2Form<60, 73,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpi $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (fround f64:$XB))]>;
+ [(set f64:$XT, (any_fround f64:$XB))]>;
def XSRDPIC : XX2Form<60, 107,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpic $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ [(set f64:$XT, (any_fnearbyint f64:$XB))]>;
def XSRDPIM : XX2Form<60, 121,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpim $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (ffloor f64:$XB))]>;
+ [(set f64:$XT, (any_ffloor f64:$XB))]>;
def XSRDPIP : XX2Form<60, 105,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpip $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (fceil f64:$XB))]>;
+ [(set f64:$XT, (any_fceil f64:$XB))]>;
def XSRDPIZ : XX2Form<60, 89,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpiz $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (ftrunc f64:$XB))]>;
+ [(set f64:$XT, (any_ftrunc f64:$XB))]>;
def XVRDPI : XX2Form<60, 201,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpi $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fround v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fround v2f64:$XB))]>;
def XVRDPIC : XX2Form<60, 235,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpic $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fnearbyint v2f64:$XB))]>;
def XVRDPIM : XX2Form<60, 249,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpim $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (ffloor v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_ffloor v2f64:$XB))]>;
def XVRDPIP : XX2Form<60, 233,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpip $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fceil v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fceil v2f64:$XB))]>;
def XVRDPIZ : XX2Form<60, 217,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpiz $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (ftrunc v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_ftrunc v2f64:$XB))]>;
def XVRSPI : XX2Form<60, 137,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspi $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fround v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fround v4f32:$XB))]>;
def XVRSPIC : XX2Form<60, 171,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspic $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fnearbyint v4f32:$XB))]>;
def XVRSPIM : XX2Form<60, 185,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspim $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (ffloor v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_ffloor v4f32:$XB))]>;
def XVRSPIP : XX2Form<60, 169,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspip $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fceil v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fceil v4f32:$XB))]>;
def XVRSPIZ : XX2Form<60, 153,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspiz $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (ftrunc v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_ftrunc v4f32:$XB))]>;
// Max/Min Instructions
let isCommutable = 1 in {
@@ -835,7 +984,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
[(set vsrc:$XT,
(int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
} // isCommutable
-} // Uses = [RM]
+ } // Uses = [RM], mayRaiseFPException
// Logical Instructions
let isCommutable = 1 in
@@ -924,433 +1073,8 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
(outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
-} // hasSideEffects
-
-// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
-// instruction selection into a branch sequence.
-let PPC970_Single = 1 in {
-
- def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
- (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
- "#SELECT_CC_VSRC",
- []>;
- def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
- (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
- "#SELECT_VSRC",
- [(set v2f64:$dst,
- (select i1:$cond, v2f64:$T, v2f64:$F))]>;
- def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
- (ins crrc:$cond, f8rc:$T, f8rc:$F,
- i32imm:$BROPC), "#SELECT_CC_VSFRC",
- []>;
- def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
- (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
- "#SELECT_VSFRC",
- [(set f64:$dst,
- (select i1:$cond, f64:$T, f64:$F))]>;
- def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
- (ins crrc:$cond, f4rc:$T, f4rc:$F,
- i32imm:$BROPC), "#SELECT_CC_VSSRC",
- []>;
- def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
- (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
- "#SELECT_VSSRC",
- [(set f32:$dst,
- (select i1:$cond, f32:$T, f32:$F))]>;
-}
-} // AddedComplexity
-
-def : InstAlias<"xvmovdp $XT, $XB",
- (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
-def : InstAlias<"xvmovsp $XT, $XB",
- (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
-
-def : InstAlias<"xxspltd $XT, $XB, 0",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
-def : InstAlias<"xxspltd $XT, $XB, 1",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
-def : InstAlias<"xxmrghd $XT, $XA, $XB",
- (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
-def : InstAlias<"xxmrgld $XT, $XA, $XB",
- (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
-def : InstAlias<"xxswapd $XT, $XB",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
-def : InstAlias<"xxspltd $XT, $XB, 0",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
-def : InstAlias<"xxspltd $XT, $XB, 1",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
-def : InstAlias<"xxswapd $XT, $XB",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
-
-let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-
-def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
- (v4i32 (XXLNOR $A, $A))>;
-def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A),
- (and v4i32:$B, v4i32:$C))),
- (v4i32 (XXSEL $A, $B, $C))>;
-
-let Predicates = [IsBigEndian] in {
-def : Pat<(v2f64 (scalar_to_vector f64:$A)),
- (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
-
-def : Pat<(f64 (extractelt v2f64:$S, 0)),
- (f64 (EXTRACT_SUBREG $S, sub_64))>;
-def : Pat<(f64 (extractelt v2f64:$S, 1)),
- (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
-}
-
-let Predicates = [IsLittleEndian] in {
-def : Pat<(v2f64 (scalar_to_vector f64:$A)),
- (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
- (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
-
-def : Pat<(f64 (extractelt v2f64:$S, 0)),
- (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
-def : Pat<(f64 (extractelt v2f64:$S, 1)),
- (f64 (EXTRACT_SUBREG $S, sub_64))>;
-}
-
-// Additional fnmsub patterns: -a*b + c == -(a*b - c)
-def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C),
- (XSNMSUBADP $C, $A, $B)>;
-def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C),
- (XSNMSUBADP $C, $A, $B)>;
-
-def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C),
- (XVNMSUBADP $C, $A, $B)>;
-def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C),
- (XVNMSUBADP $C, $A, $B)>;
-
-def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C),
- (XVNMSUBASP $C, $A, $B)>;
-def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C),
- (XVNMSUBASP $C, $A, $B)>;
-
-def : Pat<(v2f64 (bitconvert v4f32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2f64 (bitconvert v4i32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2f64 (bitconvert v8i16:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2f64 (bitconvert v16i8:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-
-def : Pat<(v4f32 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v4i32 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v8i16 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v16i8 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2i64 (bitconvert v4f32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2i64 (bitconvert v4i32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2i64 (bitconvert v8i16:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2i64 (bitconvert v16i8:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-
-def : Pat<(v4f32 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v4i32 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v8i16 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v16i8 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2f64 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v2i64 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2f64 (bitconvert v1i128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v1i128 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2i64 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v4i32 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v8i16 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v16i8 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
- (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
-def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)),
- (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>;
-
-def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
- (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>;
-def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
- (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
-
-def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>;
-def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>;
-
-// Loads.
-let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
- def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-
- // Stores.
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
- (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
-}
-
-// Load vector big endian order
-let Predicates = [IsLittleEndian, HasVSX] in {
- def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
- def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
-}
-
-let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
- def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
- (STXVW4X $rS, xoaddr:$dst)>;
-}
-
-// Permutes.
-def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
-
-// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
-// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
-def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>;
-
-// Selects.
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
- (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)),
- (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
- (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)),
- (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
- (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
- (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)),
- (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
- (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)),
- (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
- (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
- (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
- (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
- (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
- (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
- (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
- (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
- (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
- (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
- (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
- (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-// Divides.
-def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
- (XVDIVSP $A, $B)>;
-def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
- (XVDIVDP $A, $B)>;
-
-// Reciprocal estimate
-def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
- (XVRESP $A)>;
-def : Pat<(int_ppc_vsx_xvredp v2f64:$A),
- (XVREDP $A)>;
-
-// Recip. square root estimate
-def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
- (XVRSQRTESP $A)>;
-def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
- (XVRSQRTEDP $A)>;
-
-// Vector selection
-def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
- (COPY_TO_REGCLASS
- (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
- (COPY_TO_REGCLASS $vB, VSRC),
- (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
-def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
- (COPY_TO_REGCLASS
- (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
- (COPY_TO_REGCLASS $vB, VSRC),
- (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
-def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
- (XXSEL $vC, $vB, $vA)>;
-def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
- (XXSEL $vC, $vB, $vA)>;
-def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
- (XXSEL $vC, $vB, $vA)>;
-def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
- (XXSEL $vC, $vB, $vA)>;
-
-def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)),
- (v4f32 (XVMAXSP $src1, $src2))>;
-def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)),
- (v4f32 (XVMINSP $src1, $src2))>;
-def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)),
- (v2f64 (XVMAXDP $src1, $src2))>;
-def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)),
- (v2f64 (XVMINDP $src1, $src2))>;
-
-let Predicates = [IsLittleEndian] in {
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
-} // IsLittleEndian
-
-let Predicates = [IsBigEndian] in {
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-} // IsBigEndian
-
-} // AddedComplexity
-} // HasVSX
-
-def FpMinMax {
- dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
- (COPY_TO_REGCLASS $B, VSFRC)),
- VSSRC);
- dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
- (COPY_TO_REGCLASS $B, VSFRC)),
- VSSRC);
-}
-
-let AddedComplexity = 400, Predicates = [HasVSX] in {
- // f32 Min.
- def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
- (f32 FpMinMax.F32Min)>;
- def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
- (f32 FpMinMax.F32Min)>;
- def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Min)>;
- def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Min)>;
- // F32 Max.
- def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
- (f32 FpMinMax.F32Max)>;
- def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
- (f32 FpMinMax.F32Max)>;
- def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Max)>;
- def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Max)>;
-
- // f64 Min.
- def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
- (f64 (XSMINDP $A, $B))>;
- def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
- (f64 (XSMINDP $A, $B))>;
- def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
- (f64 (XSMINDP $A, $B))>;
- def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
- (f64 (XSMINDP $A, $B))>;
- // f64 Max.
- def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
- (f64 (XSMAXDP $A, $B))>;
- def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
- (f64 (XSMAXDP $A, $B))>;
- def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
- (f64 (XSMAXDP $A, $B))>;
- def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
- (f64 (XSMAXDP $A, $B))>;
-}
-
-def ScalarLoads {
- dag Li8 = (i32 (extloadi8 xoaddr:$src));
- dag ZELi8 = (i32 (zextloadi8 xoaddr:$src));
- dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src));
- dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8));
- dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8));
-
- dag Li16 = (i32 (extloadi16 xoaddr:$src));
- dag ZELi16 = (i32 (zextloadi16 xoaddr:$src));
- dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src));
- dag SELi16 = (i32 (sextloadi16 xoaddr:$src));
- dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src));
-
- dag Li32 = (i32 (load xoaddr:$src));
-}
-
-def DWToSPExtractConv {
- dag El0US1 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
- dag El1US1 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
- dag El0US2 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
- dag El1US2 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
- dag El0SS1 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
- dag El1SS1 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
- dag El0SS2 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
- dag El1SS2 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
- dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
- dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
-}
-
// The following VSX instructions were introduced in Power ISA 2.07
-/* FIXME: if the operands are v2i64, these patterns will not match.
- we should define new patterns or otherwise match the same patterns
- when the elements are larger than i32.
-*/
-def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
-def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
-def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
-let Predicates = [HasP8Vector] in {
-let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+let Predicates = [HasVSX, HasP8Vector] in {
let isCommutable = 1 in {
def XXLEQV : XX3Form<60, 186,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
@@ -1363,9 +1087,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
v4i32:$XB)))]>;
} // isCommutable
- def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
- (XXLEQV $A, $B)>;
-
let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
isReMaterializable = 1 in {
def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins),
@@ -1379,7 +1100,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
// VSX scalar loads introduced in ISA 2.07
- let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
+ let mayLoad = 1, mayStore = 0 in {
let CodeSize = 3 in
def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src),
"lxsspx $XT, $src", IIC_LdStLFD, []>;
@@ -1404,7 +1125,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
- let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
+ let mayStore = 1, mayLoad = 0 in {
let CodeSize = 3 in
def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
"stxsspx $XT, $dst", IIC_LdStSTFD, []>;
@@ -1422,64 +1143,42 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
[(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
} // mayStore
- def : Pat<(f64 (extloadf32 xoaddr:$src)),
- (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
- def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))),
- (f32 (XFLOADf32 xoaddr:$src))>;
- def : Pat<(f64 (fpextend f32:$src)),
- (COPY_TO_REGCLASS $src, VSFRC)>;
-
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
- (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
- (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
- (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
- (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
- (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
- (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
- (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
- (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
- (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
- (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
// VSX Elementary Scalar FP arithmetic (SP)
+ let mayRaiseFPException = 1 in {
let isCommutable = 1 in {
def XSADDSP : XX3Form<60, 0,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsaddsp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>;
+ [(set f32:$XT, (any_fadd f32:$XA, f32:$XB))]>;
def XSMULSP : XX3Form<60, 16,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsmulsp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>;
+ [(set f32:$XT, (any_fmul f32:$XA, f32:$XB))]>;
} // isCommutable
+
def XSSUBSP : XX3Form<60, 8,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xssubsp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
+ [(set f32:$XT, (any_fsub f32:$XA, f32:$XB))]>;
def XSDIVSP : XX3Form<60, 24,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsdivsp $XT, $XA, $XB", IIC_FPDivS,
- [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>;
+ [(set f32:$XT, (any_fdiv f32:$XA, f32:$XB))]>;
+
def XSRESP : XX2Form<60, 26,
(outs vssrc:$XT), (ins vssrc:$XB),
"xsresp $XT, $XB", IIC_VecFP,
[(set f32:$XT, (PPCfre f32:$XB))]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1, mayRaiseFPException = 1 in
def XSRSP : XX2Form<60, 281,
(outs vssrc:$XT), (ins vsfrc:$XB),
- "xsrsp $XT, $XB", IIC_VecFP, []>;
+ "xsrsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (any_fpround f64:$XB))]>;
def XSSQRTSP : XX2Form<60, 11,
(outs vssrc:$XT), (ins vssrc:$XB),
"xssqrtsp $XT, $XB", IIC_FPSqrtS,
- [(set f32:$XT, (fsqrt f32:$XB))]>;
+ [(set f32:$XT, (any_fsqrt f32:$XB))]>;
def XSRSQRTESP : XX2Form<60, 10,
(outs vssrc:$XT), (ins vssrc:$XB),
"xsrsqrtesp $XT, $XB", IIC_VecFP,
@@ -1492,10 +1191,11 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsmaddasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>,
+ [(set f32:$XT, (any_fma f32:$XA, f32:$XB, f32:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
def XSMADDMSP : XX3Form<60, 9,
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
@@ -1510,11 +1210,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fma f32:$XA, f32:$XB,
+ [(set f32:$XT, (any_fma f32:$XA, f32:$XB,
(fneg f32:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
def XSMSUBMSP : XX3Form<60, 25,
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
@@ -1529,11 +1230,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsnmaddasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB,
f32:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
def XSNMADDMSP : XX3Form<60, 137,
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
@@ -1548,11 +1250,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsnmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB,
(fneg f32:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
def XSNMSUBMSP : XX3Form<60, 153,
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
@@ -1561,12 +1264,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
AltVSXFMARel;
}
- // Additional xsnmsubasp patterns: -a*b + c == -(a*b - c)
- def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C),
- (XSNMSUBASP $C, $A, $B)>;
- def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C),
- (XSNMSUBASP $C, $A, $B)>;
-
// Single Precision Conversions (FP <-> INT)
def XSCVSXDSP : XX2Form<60, 312,
(outs vssrc:$XT), (ins vsfrc:$XB),
@@ -1582,72 +1279,16 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
"xscvdpspn $XT, $XB", IIC_VecFP, []>;
def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
"xscvspdpn $XT, $XB", IIC_VecFP, []>;
+ } // mayRaiseFPException
- let Predicates = [IsLittleEndian] in {
- def : Pat<DWToSPExtractConv.El0SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El0US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
- }
-
- let Predicates = [IsBigEndian] in {
- def : Pat<DWToSPExtractConv.El0SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El0US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- }
-
- // Instructions for converting float to i64 feeding a store.
- let Predicates = [NoP9Vector] in {
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8),
- (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8),
- (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>;
- }
-
- // Instructions for converting float to i32 feeding a store.
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4),
- (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4),
- (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
-
- def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
- def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
- def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
- def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
-} // AddedComplexity = 400
-} // HasP8Vector
-
-let AddedComplexity = 400 in {
-let Predicates = [HasDirectMove] in {
+ let Predicates = [HasVSX, HasDirectMove] in {
// VSX direct move instructions
def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
"mfvsrd $rA, $XT", IIC_VecGeneral,
[(set i64:$rA, (PPCmfvsr f64:$XT))]>,
Requires<[In64BitMode]>;
- let isCodeGenOnly = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT),
"mfvsrd $rA, $XT", IIC_VecGeneral,
[]>,
@@ -1655,7 +1296,8 @@ let Predicates = [HasDirectMove] in {
def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
"mfvsrwz $rA, $XT", IIC_VecGeneral,
[(set i32:$rA, (PPCmfvsr f64:$XT))]>;
- let isCodeGenOnly = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT),
"mfvsrwz $rA, $XT", IIC_VecGeneral,
[]>;
@@ -1663,7 +1305,8 @@ let Predicates = [HasDirectMove] in {
"mtvsrd $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsra i64:$rA))]>,
Requires<[In64BitMode]>;
- let isCodeGenOnly = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA),
"mtvsrd $XT, $rA", IIC_VecGeneral,
[]>,
@@ -1671,56 +1314,547 @@ let Predicates = [HasDirectMove] in {
def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
"mtvsrwa $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsra i32:$rA))]>;
- let isCodeGenOnly = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA),
"mtvsrwa $XT, $rA", IIC_VecGeneral,
[]>;
def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
"mtvsrwz $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
- let isCodeGenOnly = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA),
"mtvsrwz $XT, $rA", IIC_VecGeneral,
[]>;
-} // HasDirectMove
+ } // HasDirectMove
-let Predicates = [IsISA3_0, HasDirectMove] in {
- def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
- "mtvsrws $XT, $rA", IIC_VecGeneral, []>;
+} // HasVSX, HasP8Vector
- def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB),
- "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
- []>, Requires<[In64BitMode]>;
+let Predicates = [HasVSX, IsISA3_0, HasDirectMove] in {
+def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
+ "mtvsrws $XT, $rA", IIC_VecGeneral, []>;
- def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT),
- "mfvsrld $rA, $XT", IIC_VecGeneral,
- []>, Requires<[In64BitMode]>;
+def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB),
+ "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
+ []>, Requires<[In64BitMode]>;
-} // IsISA3_0, HasDirectMove
-} // AddedComplexity = 400
+def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT),
+ "mfvsrld $rA, $XT", IIC_VecGeneral,
+ []>, Requires<[In64BitMode]>;
-// We want to parse this from asm, but we don't want to emit this as it would
-// be emitted with a VSX reg. So leave Emit = 0 here.
-def : InstAlias<"mfvrd $rA, $XT",
- (MFVRD g8rc:$rA, vrrc:$XT), 0>;
-def : InstAlias<"mffprd $rA, $src",
- (MFVSRD g8rc:$rA, f8rc:$src)>;
-def : InstAlias<"mtvrd $XT, $rA",
- (MTVRD vrrc:$XT, g8rc:$rA), 0>;
-def : InstAlias<"mtfprd $dst, $rA",
- (MTVSRD f8rc:$dst, g8rc:$rA)>;
-def : InstAlias<"mfvrwz $rA, $XT",
- (MFVRWZ gprc:$rA, vrrc:$XT), 0>;
-def : InstAlias<"mffprwz $rA, $src",
- (MFVSRWZ gprc:$rA, f8rc:$src)>;
-def : InstAlias<"mtvrwa $XT, $rA",
- (MTVRWA vrrc:$XT, gprc:$rA), 0>;
-def : InstAlias<"mtfprwa $dst, $rA",
- (MTVSRWA f8rc:$dst, gprc:$rA)>;
-def : InstAlias<"mtvrwz $XT, $rA",
- (MTVRWZ vrrc:$XT, gprc:$rA), 0>;
-def : InstAlias<"mtfprwz $dst, $rA",
- (MTVSRWZ f8rc:$dst, gprc:$rA)>;
+} // HasVSX, IsISA3_0, HasDirectMove
+
+let Predicates = [HasVSX, HasP9Vector] in {
+ // Quad-Precision Scalar Move Instructions:
+ // Copy Sign
+ def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp",
+ [(set f128:$vT,
+ (fcopysign f128:$vB, f128:$vA))]>;
+
+ // Absolute/Negative-Absolute/Negate
+ def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp",
+ [(set f128:$vT, (fabs f128:$vB))]>;
+ def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp",
+ [(set f128:$vT, (fneg (fabs f128:$vB)))]>;
+ def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp",
+ [(set f128:$vT, (fneg f128:$vB))]>;
+
+ //===--------------------------------------------------------------------===//
+ // Quad-Precision Scalar Floating-Point Arithmetic Instructions:
+
+ // Add/Divide/Multiply/Subtract
+ let mayRaiseFPException = 1 in {
+ let isCommutable = 1 in {
+ def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp",
+ [(set f128:$vT, (any_fadd f128:$vA, f128:$vB))]>;
+ def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp",
+ [(set f128:$vT, (any_fmul f128:$vA, f128:$vB))]>;
+ }
+ def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" ,
+ [(set f128:$vT, (any_fsub f128:$vA, f128:$vB))]>;
+ def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp",
+ [(set f128:$vT, (any_fdiv f128:$vA, f128:$vB))]>;
+ // Square-Root
+ def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp",
+ [(set f128:$vT, (any_fsqrt f128:$vB))]>;
+ // (Negative) Multiply-{Add/Subtract}
+ def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
+ [(set f128:$vT,
+ (any_fma f128:$vA, f128:$vB, f128:$vTi))]>;
+ def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
+ [(set f128:$vT,
+ (any_fma f128:$vA, f128:$vB,
+ (fneg f128:$vTi)))]>;
+ def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
+ [(set f128:$vT,
+ (fneg (any_fma f128:$vA, f128:$vB,
+ f128:$vTi)))]>;
+ def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
+ [(set f128:$vT,
+ (fneg (any_fma f128:$vA, f128:$vB,
+ (fneg f128:$vTi))))]>;
+
+ let isCommutable = 1 in {
+ def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
+ [(set f128:$vT,
+ (int_ppc_addf128_round_to_odd
+ f128:$vA, f128:$vB))]>;
+ def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo",
+ [(set f128:$vT,
+ (int_ppc_mulf128_round_to_odd
+ f128:$vA, f128:$vB))]>;
+ }
+ def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo",
+ [(set f128:$vT,
+ (int_ppc_subf128_round_to_odd
+ f128:$vA, f128:$vB))]>;
+ def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo",
+ [(set f128:$vT,
+ (int_ppc_divf128_round_to_odd
+ f128:$vA, f128:$vB))]>;
+ def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo",
+ [(set f128:$vT,
+ (int_ppc_sqrtf128_round_to_odd f128:$vB))]>;
+
+
+ def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo",
+ [(set f128:$vT,
+ (int_ppc_fmaf128_round_to_odd
+ f128:$vA,f128:$vB,f128:$vTi))]>;
+
+ def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" ,
+ [(set f128:$vT,
+ (int_ppc_fmaf128_round_to_odd
+ f128:$vA, f128:$vB, (fneg f128:$vTi)))]>;
+ def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo",
+ [(set f128:$vT,
+ (fneg (int_ppc_fmaf128_round_to_odd
+ f128:$vA, f128:$vB, f128:$vTi)))]>;
+ def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo",
+ [(set f128:$vT,
+ (fneg (int_ppc_fmaf128_round_to_odd
+ f128:$vA, f128:$vB, (fneg f128:$vTi))))]>;
+ } // mayRaiseFPException
+
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ // QP Compare Ordered/Unordered
+ let hasSideEffects = 1 in {
+ def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
+ def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
+
+ // DP/QP Compare Exponents
+ def XSCMPEXPDP : XX3Form_1<60, 59,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
+
+ // DP Compare ==, >=, >, !=
+ // Use vsrc for XT, because the entire register of XT is set.
+ // XT.dword[1] = 0x0000_0000_0000_0000
+ def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc,
+ IIC_FPCompare, []>;
+ def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc,
+ IIC_FPCompare, []>;
+ def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc,
+ IIC_FPCompare, []>;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Quad-Precision Floating-Point Conversion Instructions:
+
+ let mayRaiseFPException = 1 in {
+ // Convert DP -> QP
+ def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc,
+ [(set f128:$vT, (any_fpextend f64:$vB))]>;
+
+ // Round & Convert QP -> DP (dword[1] is set to zero)
+ def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>;
+ def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo",
+ [(set f64:$vT,
+ (int_ppc_truncf128_round_to_odd
+ f128:$vB))]>;
+ }
+
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
+ let hasSideEffects = 1 in {
+ def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
+ def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>;
+ def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>;
+ def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>;
+ }
+
+ // Convert (Un)Signed DWord -> QP.
+ def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>;
+ def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>;
+
+ // (Round &) Convert DP <-> HP
+ // Note! xscvdphp's src and dest register both use the left 64 bits, so we use
+ // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits,
+ // but we still use vsfrc for it.
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>;
+ def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>;
+ }
+
+ // Vector HP -> SP
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>;
+ def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc,
+ [(set v4f32:$XT,
+ (int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
+
+ let mayRaiseFPException = 1 in {
+ // Round to Quad-Precision Integer [with Inexact]
+ def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>;
+ def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>;
+ }
+
+ // Round Quad-Precision to Double-Extended Precision (fp80)
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>;
+
+ //===--------------------------------------------------------------------===//
+ // Insert/Extract Instructions
+
+ // Insert Exponent DP/QP
+ // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
+ "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
+ // vB NOTE: only vB.dword[0] is used, that's why we don't use
+ // X_VT5_VA5_VB5 form
+ def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
+ "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>;
+ }
+
+ // Extract Exponent/Significand DP/QP
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>;
+ def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>;
+
+ def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>;
+ def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>;
+ }
+
+ // Vector Insert Word
+ // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
+ def XXINSERTW :
+ XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
+ (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM),
+ "xxinsertw $XT, $XB, $UIM", IIC_VecFP,
+ [(set v4i32:$XT, (PPCvecinsert v4i32:$XTi, v4i32:$XB,
+ imm32SExt16:$UIM))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ // Vector Extract Unsigned Word
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
+ (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
+ "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
+
+ // Vector Insert Exponent DP/SP
+ def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
+ IIC_VecFP, [(set v2f64: $XT,(int_ppc_vsx_xviexpdp v2i64:$XA, v2i64:$XB))]>;
+ def XVIEXPSP : XX3_XT5_XA5_XB5<60, 216, "xviexpsp", vsrc, vsrc, vsrc,
+ IIC_VecFP, [(set v4f32: $XT,(int_ppc_vsx_xviexpsp v4i32:$XA, v4i32:$XB))]>;
+
+ // Vector Extract Exponent/Significand DP/SP
+ def XVXEXPDP : XX2_XT6_XO5_XB6<60, 0, 475, "xvxexpdp", vsrc,
+ [(set v2i64: $XT,
+ (int_ppc_vsx_xvxexpdp v2f64:$XB))]>;
+ def XVXEXPSP : XX2_XT6_XO5_XB6<60, 8, 475, "xvxexpsp", vsrc,
+ [(set v4i32: $XT,
+ (int_ppc_vsx_xvxexpsp v4f32:$XB))]>;
+ def XVXSIGDP : XX2_XT6_XO5_XB6<60, 1, 475, "xvxsigdp", vsrc,
+ [(set v2i64: $XT,
+ (int_ppc_vsx_xvxsigdp v2f64:$XB))]>;
+ def XVXSIGSP : XX2_XT6_XO5_XB6<60, 9, 475, "xvxsigsp", vsrc,
+ [(set v4i32: $XT,
+ (int_ppc_vsx_xvxsigsp v4f32:$XB))]>;
+
+ // Test Data Class SP/DP/QP
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
+ (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
+ "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
+ def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
+ (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
+ "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
+ def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708,
+ (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
+ "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
+ }
+
+ // Vector Test Data Class SP/DP
+ def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
+ (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
+ "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
+ [(set v4i32: $XT,
+ (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>;
+ def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
+ (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
+ "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
+ [(set v2i64: $XT,
+ (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
+
+ // Maximum/Minimum Type-C/Type-J DP
+ def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
+ IIC_VecFP,
+ [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
+ def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc,
+ IIC_VecFP,
+ [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>;
+
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc,
+ IIC_VecFP, []>;
+ def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
+ IIC_VecFP, []>;
+ }
+
+ // Vector Byte-Reverse H/W/D/Q Word
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>;
+ def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc,
+ [(set v4i32:$XT, (bswap v4i32:$XB))]>;
+ def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc,
+ [(set v2i64:$XT, (bswap v2i64:$XB))]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
+
+ // Vector Permute
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc,
+ IIC_VecPerm, []>;
+ def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc,
+ IIC_VecPerm, []>;
+ }
+
+ // Vector Splat Immediate Byte
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
+ "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
+
+ // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
+ // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
+ let mayLoad = 1, mayStore = 0 in {
+ // Load Vector
+ def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
+ "lxv $XT, $src", IIC_LdStLFD, []>;
+ // Load DWord
+ def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
+ "lxsd $vD, $src", IIC_LdStLFD, []>;
+ // Load SP from src, convert it to DP, and place in dword[0]
+ def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src),
+ "lxssp $vD, $src", IIC_LdStLFD, []>;
+
+ // Load as Integer Byte/Halfword & Zero Indexed
+ def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
+ [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>;
+ def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc,
+ [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>;
+
+ // Load Vector Halfword*8/Byte*16 Indexed
+ def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>;
+ def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>;
+
+ // Load Vector Indexed
+ def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
+ [(set v2f64:$XT, (load xaddrX16:$src))]>;
+ // Load Vector (Left-justified) with Length
+ def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
+ "lxvl $XT, $src, $rB", IIC_LdStLoad,
+ [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>;
+ def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
+ "lxvll $XT, $src, $rB", IIC_LdStLoad,
+ [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>;
+
+ // Load Vector Word & Splat Indexed
+ def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
+ } // mayLoad
+
+ // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
+ // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
+ let mayStore = 1, mayLoad = 0 in {
+ // Store Vector
+ def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
+ "stxv $XT, $dst", IIC_LdStSTFD, []>;
+ // Store DWord
+ def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
+ "stxsd $vS, $dst", IIC_LdStSTFD, []>;
+ // Convert DP of dword[0] to SP, and Store to dst
+ def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst),
+ "stxssp $vS, $dst", IIC_LdStSTFD, []>;
+
+ // Store as Integer Byte/Halfword Indexed
+ def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc,
+ [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>;
+ def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc,
+ [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>;
+ let isCodeGenOnly = 1 in {
+ def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>;
+ def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>;
+ }
+
+ // Store Vector Halfword*8/Byte*16 Indexed
+ def STXVH8X : X_XS6_RA5_RB5<31, 940, "stxvh8x" , vsrc, []>;
+ def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>;
+
+ // Store Vector Indexed
+ def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
+ [(store v2f64:$XT, xaddrX16:$dst)]>;
+
+ // Store Vector (Left-justified) with Length
+ def STXVL : XX1Form_memOp<31, 397, (outs),
+ (ins vsrc:$XT, memr:$dst, g8rc:$rB),
+ "stxvl $XT, $dst, $rB", IIC_LdStLoad,
+ [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst,
+ i64:$rB)]>;
+ def STXVLL : XX1Form_memOp<31, 429, (outs),
+ (ins vsrc:$XT, memr:$dst, g8rc:$rB),
+ "stxvll $XT, $dst, $rB", IIC_LdStLoad,
+ [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst,
+ i64:$rB)]>;
+ } // mayStore
+
+ def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
+ "#DFLOADf32",
+ [(set f32:$XT, (load iaddrX4:$src))]>;
+ def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
+ "#DFLOADf64",
+ [(set f64:$XT, (load iaddrX4:$src))]>;
+ def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
+ "#DFSTOREf32",
+ [(store f32:$XT, iaddrX4:$dst)]>;
+ def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
+ "#DFSTOREf64",
+ [(store f64:$XT, iaddrX4:$dst)]>;
+
+ let mayStore = 1 in {
+ def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
+ (ins spilltovsrrc:$XT, memrr:$dst),
+ "#SPILLTOVSR_STX", []>;
+ def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
+ "#SPILLTOVSR_ST", []>;
+ }
+ let mayLoad = 1 in {
+ def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
+ (ins memrr:$src),
+ "#SPILLTOVSR_LDX", []>;
+ def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
+ "#SPILLTOVSR_LD", []>;
+
+ }
+ } // HasP9Vector
+} // hasSideEffects = 0
+
+let PPC970_Single = 1, AddedComplexity = 400 in {
+
+ def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
+ (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
+ "#SELECT_CC_VSRC",
+ []>;
+ def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
+ (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
+ "#SELECT_VSRC",
+ [(set v2f64:$dst,
+ (select i1:$cond, v2f64:$T, v2f64:$F))]>;
+ def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
+ (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSFRC",
+ []>;
+ def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
+ (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
+ "#SELECT_VSFRC",
+ [(set f64:$dst,
+ (select i1:$cond, f64:$T, f64:$F))]>;
+ def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
+ (ins crrc:$cond, f4rc:$T, f4rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSSRC",
+ []>;
+ def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
+ (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
+ "#SELECT_VSSRC",
+ [(set f32:$dst,
+ (select i1:$cond, f32:$T, f32:$F))]>;
+}
+}
+
+//----------------------------- DAG Definitions ------------------------------//
+def FpMinMax {
+ dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
+ (COPY_TO_REGCLASS $B, VSFRC)),
+ VSSRC);
+ dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
+ (COPY_TO_REGCLASS $B, VSFRC)),
+ VSSRC);
+}
+
+def ScalarLoads {
+ dag Li8 = (i32 (extloadi8 xoaddr:$src));
+ dag ZELi8 = (i32 (zextloadi8 xoaddr:$src));
+ dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src));
+ dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8));
+ dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8));
+
+ dag Li16 = (i32 (extloadi16 xoaddr:$src));
+ dag ZELi16 = (i32 (zextloadi16 xoaddr:$src));
+ dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src));
+ dag SELi16 = (i32 (sextloadi16 xoaddr:$src));
+ dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src));
+
+ dag Li32 = (i32 (load xoaddr:$src));
+}
+
+def DWToSPExtractConv {
+ dag El0US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag El0SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
+ dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
+}
+
+def WToDPExtractConv {
+ dag El0S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 0))));
+ dag El1S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 1))));
+ dag El2S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 2))));
+ dag El3S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 3))));
+ dag El0U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 0))));
+ dag El1U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 1))));
+ dag El2U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 2))));
+ dag El3U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 3))));
+ dag BV02S = (v2f64 (build_vector El0S, El2S));
+ dag BV13S = (v2f64 (build_vector El1S, El3S));
+ dag BV02U = (v2f64 (build_vector El0U, El2U));
+ dag BV13U = (v2f64 (build_vector El1U, El3U));
+}
/* Direct moves of various widths from GPR's into VSR's. Each move lines
the value up into element 0 (both BE and LE). Namely, entities smaller than
@@ -2038,1806 +2172,11 @@ def VectorExtractions {
dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
}
-def NoP9Altivec : Predicate<"!PPCSubTarget->hasP9Altivec()">;
-let AddedComplexity = 400 in {
-// v4f32 scalar <-> vector conversions (BE)
-let Predicates = [IsBigEndian, HasP8Vector] in {
- def : Pat<(v4f32 (scalar_to_vector f32:$A)),
- (v4f32 (XSCVDPSPN $A))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 0)),
- (f32 (XSCVSPDPN $S))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 1)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 2)),
- (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 3)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
- (f32 VectorExtractions.BE_VARIABLE_FLOAT)>;
-} // IsBigEndian, HasP8Vector
-
-// Variable index vector_extract for v2f64 does not require P8Vector
-let Predicates = [IsBigEndian, HasVSX] in
- def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
- (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>;
-
-let Predicates = [IsBigEndian, HasDirectMove] in {
- // v16i8 scalar <-> vector conversions (BE)
- def : Pat<(v16i8 (scalar_to_vector i32:$A)),
- (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
- def : Pat<(v8i16 (scalar_to_vector i32:$A)),
- (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
- def : Pat<(v4i32 (scalar_to_vector i32:$A)),
- (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
- def : Pat<(v2i64 (scalar_to_vector i64:$A)),
- (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
-
- // v2i64 scalar <-> vector conversions (BE)
- def : Pat<(i64 (vector_extract v2i64:$S, 0)),
- (i64 VectorExtractions.LE_DWORD_1)>;
- def : Pat<(i64 (vector_extract v2i64:$S, 1)),
- (i64 VectorExtractions.LE_DWORD_0)>;
- def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
- (i64 VectorExtractions.BE_VARIABLE_DWORD)>;
-} // IsBigEndian, HasDirectMove
-
-let Predicates = [IsBigEndian, HasDirectMove, NoP9Altivec] in {
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 VectorExtractions.LE_BYTE_15)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 VectorExtractions.LE_BYTE_14)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 VectorExtractions.LE_BYTE_13)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 VectorExtractions.LE_BYTE_12)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 VectorExtractions.LE_BYTE_11)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 VectorExtractions.LE_BYTE_10)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 VectorExtractions.LE_BYTE_9)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 VectorExtractions.LE_BYTE_8)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 VectorExtractions.LE_BYTE_7)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 VectorExtractions.LE_BYTE_6)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 VectorExtractions.LE_BYTE_5)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 VectorExtractions.LE_BYTE_4)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 VectorExtractions.LE_BYTE_3)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 VectorExtractions.LE_BYTE_2)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 VectorExtractions.LE_BYTE_1)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 VectorExtractions.LE_BYTE_0)>;
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 VectorExtractions.BE_VARIABLE_BYTE)>;
-
- // v8i16 scalar <-> vector conversions (BE)
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 VectorExtractions.LE_HALF_7)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 VectorExtractions.LE_HALF_6)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 VectorExtractions.LE_HALF_5)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 VectorExtractions.LE_HALF_4)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 VectorExtractions.LE_HALF_3)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 VectorExtractions.LE_HALF_2)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 VectorExtractions.LE_HALF_1)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 7)),
- (i32 VectorExtractions.LE_HALF_0)>;
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 VectorExtractions.BE_VARIABLE_HALF)>;
-
- // v4i32 scalar <-> vector conversions (BE)
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 VectorExtractions.LE_WORD_3)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 VectorExtractions.LE_WORD_1)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 VectorExtractions.LE_WORD_0)>;
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 VectorExtractions.BE_VARIABLE_WORD)>;
-} // IsBigEndian, HasDirectMove, NoP9Altivec
-
-// v4f32 scalar <-> vector conversions (LE)
-let Predicates = [IsLittleEndian, HasP8Vector] in {
- def : Pat<(v4f32 (scalar_to_vector f32:$A)),
- (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 0)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 1)),
- (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 2)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 3)),
- (f32 (XSCVSPDPN $S))>;
- def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
- (f32 VectorExtractions.LE_VARIABLE_FLOAT)>;
-} // IsLittleEndian, HasP8Vector
-
-// Variable index vector_extract for v2f64 does not require P8Vector
-let Predicates = [IsLittleEndian, HasVSX] in
- def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
- (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
-
-def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
- (STXVD2X $rS, xoaddr:$dst)>;
-def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
- (STXVW4X $rS, xoaddr:$dst)>;
-def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
-def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-
-// Variable index unsigned vector_extract on Power9
-let Predicates = [HasP9Altivec, IsLittleEndian] in {
- def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
- (VEXTUBRX $Idx, $S)>;
-
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
- (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
- (VEXTUHRX (LI8 0), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
- (VEXTUHRX (LI8 2), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
- (VEXTUHRX (LI8 4), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
- (VEXTUHRX (LI8 6), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
- (VEXTUHRX (LI8 8), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
- (VEXTUHRX (LI8 10), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
- (VEXTUHRX (LI8 12), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
- (VEXTUHRX (LI8 14), $S)>;
-
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
- (VEXTUWRX (LI8 0), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
- (VEXTUWRX (LI8 4), $S)>;
- // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
- (VEXTUWRX (LI8 12), $S)>;
-
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
- (EXTSW (VEXTUWRX (LI8 0), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
- (EXTSW (VEXTUWRX (LI8 4), $S))>;
- // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
- (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
- (EXTSW (VEXTUWRX (LI8 12), $S))>;
-
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX
- (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX
- (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>;
- // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>;
-}
-
-let Predicates = [HasP9Altivec, IsBigEndian] in {
- def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
- (VEXTUBLX $Idx, $S)>;
-
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
- (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
- (VEXTUHLX (LI8 0), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
- (VEXTUHLX (LI8 2), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
- (VEXTUHLX (LI8 4), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
- (VEXTUHLX (LI8 6), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
- (VEXTUHLX (LI8 8), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
- (VEXTUHLX (LI8 10), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
- (VEXTUHLX (LI8 12), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
- (VEXTUHLX (LI8 14), $S)>;
-
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
- (VEXTUWLX (LI8 0), $S)>;
-
- // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
- (VEXTUWLX (LI8 8), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
- (VEXTUWLX (LI8 12), $S)>;
-
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
- (EXTSW (VEXTUWLX (LI8 0), $S))>;
- // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
- (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
- (EXTSW (VEXTUWLX (LI8 8), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
- (EXTSW (VEXTUWLX (LI8 12), $S))>;
-
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX
- (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX
- (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>;
- // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>;
-}
-
-let Predicates = [IsLittleEndian, HasDirectMove] in {
- // v16i8 scalar <-> vector conversions (LE)
- def : Pat<(v16i8 (scalar_to_vector i32:$A)),
- (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
- def : Pat<(v8i16 (scalar_to_vector i32:$A)),
- (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
- def : Pat<(v4i32 (scalar_to_vector i32:$A)),
- (v4i32 MovesToVSR.LE_WORD_0)>;
- def : Pat<(v2i64 (scalar_to_vector i64:$A)),
- (v2i64 MovesToVSR.LE_DWORD_0)>;
- // v2i64 scalar <-> vector conversions (LE)
- def : Pat<(i64 (vector_extract v2i64:$S, 0)),
- (i64 VectorExtractions.LE_DWORD_0)>;
- def : Pat<(i64 (vector_extract v2i64:$S, 1)),
- (i64 VectorExtractions.LE_DWORD_1)>;
- def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
- (i64 VectorExtractions.LE_VARIABLE_DWORD)>;
-} // IsLittleEndian, HasDirectMove
-
-let Predicates = [IsLittleEndian, HasDirectMove, NoP9Altivec] in {
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 VectorExtractions.LE_BYTE_0)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 VectorExtractions.LE_BYTE_1)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 VectorExtractions.LE_BYTE_2)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 VectorExtractions.LE_BYTE_3)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 VectorExtractions.LE_BYTE_4)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 VectorExtractions.LE_BYTE_5)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 VectorExtractions.LE_BYTE_6)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 VectorExtractions.LE_BYTE_7)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 VectorExtractions.LE_BYTE_8)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 VectorExtractions.LE_BYTE_9)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 VectorExtractions.LE_BYTE_10)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 VectorExtractions.LE_BYTE_11)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 VectorExtractions.LE_BYTE_12)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 VectorExtractions.LE_BYTE_13)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 VectorExtractions.LE_BYTE_14)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 VectorExtractions.LE_BYTE_15)>;
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 VectorExtractions.LE_VARIABLE_BYTE)>;
-
- // v8i16 scalar <-> vector conversions (LE)
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 VectorExtractions.LE_HALF_0)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 VectorExtractions.LE_HALF_1)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 VectorExtractions.LE_HALF_2)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 VectorExtractions.LE_HALF_3)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 VectorExtractions.LE_HALF_4)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 VectorExtractions.LE_HALF_5)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 VectorExtractions.LE_HALF_6)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 7)),
- (i32 VectorExtractions.LE_HALF_7)>;
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 VectorExtractions.LE_VARIABLE_HALF)>;
-
- // v4i32 scalar <-> vector conversions (LE)
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 VectorExtractions.LE_WORD_0)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 VectorExtractions.LE_WORD_1)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 VectorExtractions.LE_WORD_3)>;
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 VectorExtractions.LE_VARIABLE_WORD)>;
-} // IsLittleEndian, HasDirectMove, NoP9Altivec
-
-let Predicates = [HasDirectMove, HasVSX] in {
-// bitconvert f32 -> i32
-// (convert to 32-bit fp single, shift right 1 word, move to GPR)
-def : Pat<(i32 (bitconvert f32:$S)),
- (i32 (MFVSRWZ (EXTRACT_SUBREG
- (XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3),
- sub_64)))>;
-// bitconvert i32 -> f32
-// (move to FPR, shift left 1 word, convert to 64-bit fp single)
-def : Pat<(f32 (bitconvert i32:$A)),
- (f32 (XSCVSPDPN
- (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>;
-
-// bitconvert f64 -> i64
-// (move to GPR, nothing else needed)
-def : Pat<(i64 (bitconvert f64:$S)),
- (i64 (MFVSRD $S))>;
-
-// bitconvert i64 -> f64
-// (move to FPR, nothing else needed)
-def : Pat<(f64 (bitconvert i64:$S)),
- (f64 (MTVSRD $S))>;
-
-// Rounding to integer.
-def : Pat<(i64 (lrint f64:$S)),
- (i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (lrint f32:$S)),
- (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (llrint f64:$S)),
- (i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (llrint f32:$S)),
- (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (lround f64:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (lround f32:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-def : Pat<(i64 (llround f64:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (llround f32:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-}
-
-let Predicates = [HasVSX] in {
-// Rounding for single precision.
-def : Pat<(f32 (fround f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPI
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (fnearbyint f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIC
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (ffloor f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIM
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (fceil f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIP
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (ftrunc f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIZ
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-}
-
-// Materialize a zero-vector of long long
-def : Pat<(v2i64 immAllZerosV),
- (v2i64 (XXLXORz))>;
-}
-
def AlignValues {
dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
}
-// The following VSX instructions were introduced in Power ISA 3.0
-def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
-let AddedComplexity = 400, Predicates = [HasP9Vector] in {
-
- // [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
-
- // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isRecordForm;
-
- // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
- // So we use different operand class for VRB
- class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- RegisterOperand vbtype, list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
-
- // [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5_VSFR<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$vT), (ins vrrc:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
-
- // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isRecordForm;
-
- // [PO T XO B XO BX /]
- class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
- list<dag> pattern>
- : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$rT), (ins vsfrc:$XB),
- !strconcat(opc, " $rT, $XB"), IIC_VecFP, pattern>;
-
- // [PO T XO B XO BX TX]
- class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
- RegisterOperand vtype, list<dag> pattern>
- : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB),
- !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>;
-
- // [PO T A B XO AX BX TX], src and dest register use different operand class
- class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc,
- RegisterOperand xty, RegisterOperand aty, RegisterOperand bty,
- InstrItinClass itin, list<dag> pattern>
- : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
- !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
-
- // [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>;
-
- // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isRecordForm;
-
- // [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vTi, vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">;
-
- // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isRecordForm;
-
- //===--------------------------------------------------------------------===//
- // Quad-Precision Scalar Move Instructions:
-
- // Copy Sign
- def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp",
- [(set f128:$vT,
- (fcopysign f128:$vB, f128:$vA))]>;
-
- // Absolute/Negative-Absolute/Negate
- def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp",
- [(set f128:$vT, (fabs f128:$vB))]>;
- def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp",
- [(set f128:$vT, (fneg (fabs f128:$vB)))]>;
- def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp",
- [(set f128:$vT, (fneg f128:$vB))]>;
-
- //===--------------------------------------------------------------------===//
- // Quad-Precision Scalar Floating-Point Arithmetic Instructions:
-
- // Add/Divide/Multiply/Subtract
- let isCommutable = 1 in {
- def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp",
- [(set f128:$vT, (fadd f128:$vA, f128:$vB))]>;
- def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp",
- [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>;
- }
- def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" ,
- [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>;
- def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp",
- [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>;
- // Square-Root
- def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp",
- [(set f128:$vT, (fsqrt f128:$vB))]>;
- // (Negative) Multiply-{Add/Subtract}
- def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
- [(set f128:$vT,
- (fma f128:$vA, f128:$vB,
- f128:$vTi))]>;
- def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
- [(set f128:$vT,
- (fma f128:$vA, f128:$vB,
- (fneg f128:$vTi)))]>;
- def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
- [(set f128:$vT,
- (fneg (fma f128:$vA, f128:$vB,
- f128:$vTi)))]>;
- def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
- [(set f128:$vT,
- (fneg (fma f128:$vA, f128:$vB,
- (fneg f128:$vTi))))]>;
-
- let isCommutable = 1 in {
- def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
- [(set f128:$vT,
- (int_ppc_addf128_round_to_odd
- f128:$vA, f128:$vB))]>;
- def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo",
- [(set f128:$vT,
- (int_ppc_mulf128_round_to_odd
- f128:$vA, f128:$vB))]>;
- }
- def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo",
- [(set f128:$vT,
- (int_ppc_subf128_round_to_odd
- f128:$vA, f128:$vB))]>;
- def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo",
- [(set f128:$vT,
- (int_ppc_divf128_round_to_odd
- f128:$vA, f128:$vB))]>;
- def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo",
- [(set f128:$vT,
- (int_ppc_sqrtf128_round_to_odd f128:$vB))]>;
-
-
- def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo",
- [(set f128:$vT,
- (int_ppc_fmaf128_round_to_odd
- f128:$vA,f128:$vB,f128:$vTi))]>;
-
- def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" ,
- [(set f128:$vT,
- (int_ppc_fmaf128_round_to_odd
- f128:$vA, f128:$vB, (fneg f128:$vTi)))]>;
- def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo",
- [(set f128:$vT,
- (fneg (int_ppc_fmaf128_round_to_odd
- f128:$vA, f128:$vB, f128:$vTi)))]>;
- def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo",
- [(set f128:$vT,
- (fneg (int_ppc_fmaf128_round_to_odd
- f128:$vA, f128:$vB, (fneg f128:$vTi))))]>;
-
- // Additional fnmsub patterns: -a*b + c == -(a*b - c)
- def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>;
- def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>;
-
- //===--------------------------------------------------------------------===//
- // Quad/Double-Precision Compare Instructions:
-
- // [PO BF // VRA VRB XO /]
- class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB),
- !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> {
- let Pattern = pattern;
- }
-
- // QP Compare Ordered/Unordered
- def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
- def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
-
- // DP/QP Compare Exponents
- def XSCMPEXPDP : XX3Form_1<60, 59,
- (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
- "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
- def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
-
- // DP Compare ==, >=, >, !=
- // Use vsrc for XT, because the entire register of XT is set.
- // XT.dword[1] = 0x0000_0000_0000_0000
- def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc,
- IIC_FPCompare, []>;
- def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc,
- IIC_FPCompare, []>;
- def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc,
- IIC_FPCompare, []>;
-
- //===--------------------------------------------------------------------===//
- // Quad-Precision Floating-Point Conversion Instructions:
-
- // Convert DP -> QP
- def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc,
- [(set f128:$vT, (fpextend f64:$vB))]>;
-
- // Round & Convert QP -> DP (dword[1] is set to zero)
- def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>;
- def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo",
- [(set f64:$vT,
- (int_ppc_truncf128_round_to_odd
- f128:$vB))]>;
-
- // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
- def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
- def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>;
- def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>;
- def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>;
-
- // Convert (Un)Signed DWord -> QP.
- def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>;
- def : Pat<(f128 (sint_to_fp i64:$src)),
- (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))),
- (f128 (XSCVSDQP $src))>;
- def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))),
- (f128 (XSCVSDQP (VEXTSW2Ds $src)))>;
-
- def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>;
- def : Pat<(f128 (uint_to_fp i64:$src)),
- (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))),
- (f128 (XSCVUDQP $src))>;
-
- // Convert (Un)Signed Word -> QP.
- def : Pat<(f128 (sint_to_fp i32:$src)),
- (f128 (XSCVSDQP (MTVSRWA $src)))>;
- def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))),
- (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>;
- def : Pat<(f128 (uint_to_fp i32:$src)),
- (f128 (XSCVUDQP (MTVSRWZ $src)))>;
- def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
- (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
-
- //===--------------------------------------------------------------------===//
- // Round to Floating-Point Integer Instructions
-
- // (Round &) Convert DP <-> HP
- // Note! xscvdphp's src and dest register both use the left 64 bits, so we use
- // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits,
- // but we still use vsfrc for it.
- def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>;
- def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>;
-
- // Vector HP -> SP
- def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>;
- def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc,
- [(set v4f32:$XT,
- (int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
-
- // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
- // separate pattern so that it can convert the input register class from
- // VRRC(v8i16) to VSRC.
- def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)),
- (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>;
-
- class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc,
- list<dag> pattern>
- : Z23Form_8<opcode, xo,
- (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc),
- !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> {
- let RC = ex;
- }
-
- // Round to Quad-Precision Integer [with Inexact]
- def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>;
- def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>;
-
- // Use current rounding mode
- def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>;
- // Round to nearest, ties away from zero
- def : Pat<(f128 (fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>;
- // Round towards Zero
- def : Pat<(f128 (ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>;
- // Round towards +Inf
- def : Pat<(f128 (fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>;
- // Round towards -Inf
- def : Pat<(f128 (ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>;
-
- // Use current rounding mode, [with Inexact]
- def : Pat<(f128 (frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>;
-
- // Round Quad-Precision to Double-Extended Precision (fp80)
- def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>;
-
- //===--------------------------------------------------------------------===//
- // Insert/Extract Instructions
-
- // Insert Exponent DP/QP
- // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
- def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
- "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
- // vB NOTE: only vB.dword[0] is used, that's why we don't use
- // X_VT5_VA5_VB5 form
- def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
- "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>;
-
- def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)),
- (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>;
-
- // Extract Exponent/Significand DP/QP
- def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>;
- def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>;
-
- def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>;
- def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>;
-
- def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)),
- (i64 (MFVSRD (EXTRACT_SUBREG
- (v2i64 (XSXEXPQP $vA)), sub_64)))>;
-
- // Vector Insert Word
- // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
- def XXINSERTW :
- XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
- (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM),
- "xxinsertw $XT, $XB, $UIM", IIC_VecFP,
- [(set v4i32:$XT, (PPCvecinsert v4i32:$XTi, v4i32:$XB,
- imm32SExt16:$UIM))]>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
-
- // Vector Extract Unsigned Word
- def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
- (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
- "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
-
- // Vector Insert Exponent DP/SP
- def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
- IIC_VecFP, [(set v2f64: $XT,(int_ppc_vsx_xviexpdp v2i64:$XA, v2i64:$XB))]>;
- def XVIEXPSP : XX3_XT5_XA5_XB5<60, 216, "xviexpsp", vsrc, vsrc, vsrc,
- IIC_VecFP, [(set v4f32: $XT,(int_ppc_vsx_xviexpsp v4i32:$XA, v4i32:$XB))]>;
-
- // Vector Extract Exponent/Significand DP/SP
- def XVXEXPDP : XX2_XT6_XO5_XB6<60, 0, 475, "xvxexpdp", vsrc,
- [(set v2i64: $XT,
- (int_ppc_vsx_xvxexpdp v2f64:$XB))]>;
- def XVXEXPSP : XX2_XT6_XO5_XB6<60, 8, 475, "xvxexpsp", vsrc,
- [(set v4i32: $XT,
- (int_ppc_vsx_xvxexpsp v4f32:$XB))]>;
- def XVXSIGDP : XX2_XT6_XO5_XB6<60, 1, 475, "xvxsigdp", vsrc,
- [(set v2i64: $XT,
- (int_ppc_vsx_xvxsigdp v2f64:$XB))]>;
- def XVXSIGSP : XX2_XT6_XO5_XB6<60, 9, 475, "xvxsigsp", vsrc,
- [(set v4i32: $XT,
- (int_ppc_vsx_xvxsigsp v4f32:$XB))]>;
-
- let AddedComplexity = 400, Predicates = [HasP9Vector] in {
- // Extra patterns expanding to vector Extract Word/Insert Word
- def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)),
- (v4i32 (XXINSERTW $A, $B, imm:$IMM))>;
- def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)),
- (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>;
- } // AddedComplexity = 400, HasP9Vector
-
- //===--------------------------------------------------------------------===//
-
- // Test Data Class SP/DP/QP
- def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
- (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
- "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
- def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
- (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
- "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
- def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708,
- (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
- "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
-
- // Vector Test Data Class SP/DP
- def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
- (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
- "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
- [(set v4i32: $XT,
- (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>;
- def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
- (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
- "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
- [(set v2i64: $XT,
- (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
-
- //===--------------------------------------------------------------------===//
-
- // Maximum/Minimum Type-C/Type-J DP
- def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
- IIC_VecFP,
- [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
- def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc,
- IIC_VecFP, []>;
- def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc,
- IIC_VecFP,
- [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>;
- def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
- IIC_VecFP, []>;
-
- //===--------------------------------------------------------------------===//
-
- // Vector Byte-Reverse H/W/D/Q Word
- def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>;
- def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc,
- [(set v4i32:$XT, (bswap v4i32:$XB))]>;
- def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc,
- [(set v2i64:$XT, (bswap v2i64:$XB))]>;
- def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
-
- // Vector Reverse
- def : Pat<(v8i16 (bswap v8i16 :$A)),
- (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
- def : Pat<(v1i128 (bswap v1i128 :$A)),
- (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
-
- // Vector Permute
- def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc,
- IIC_VecPerm, []>;
- def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc,
- IIC_VecPerm, []>;
-
- // Vector Splat Immediate Byte
- def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
- "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
-
- //===--------------------------------------------------------------------===//
- // Vector/Scalar Load/Store Instructions
-
- // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
- // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
- // Load Vector
- def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
- "lxv $XT, $src", IIC_LdStLFD, []>;
- // Load DWord
- def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
- "lxsd $vD, $src", IIC_LdStLFD, []>;
- // Load SP from src, convert it to DP, and place in dword[0]
- def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src),
- "lxssp $vD, $src", IIC_LdStLFD, []>;
-
- // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different
- // "out" and "in" dag
- class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
- RegisterOperand vtype, list<dag> pattern>
- : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
- !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
-
- // Load as Integer Byte/Halfword & Zero Indexed
- def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
- [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>;
- def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc,
- [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>;
-
- // Load Vector Halfword*8/Byte*16 Indexed
- def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>;
- def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>;
-
- // Load Vector Indexed
- def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
- [(set v2f64:$XT, (load xaddrX16:$src))]>;
- // Load Vector (Left-justified) with Length
- def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
- "lxvl $XT, $src, $rB", IIC_LdStLoad,
- [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>;
- def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
- "lxvll $XT, $src, $rB", IIC_LdStLoad,
- [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>;
-
- // Load Vector Word & Splat Indexed
- def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
- } // mayLoad
-
- // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
- // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
- // Store Vector
- def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
- "stxv $XT, $dst", IIC_LdStSTFD, []>;
- // Store DWord
- def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
- "stxsd $vS, $dst", IIC_LdStSTFD, []>;
- // Convert DP of dword[0] to SP, and Store to dst
- def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst),
- "stxssp $vS, $dst", IIC_LdStSTFD, []>;
-
- // [PO S RA RB XO SX]
- class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
- RegisterOperand vtype, list<dag> pattern>
- : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
- !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
-
- // Store as Integer Byte/Halfword Indexed
- def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc,
- [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>;
- def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc,
- [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>;
- let isCodeGenOnly = 1 in {
- def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>;
- def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>;
- }
-
- // Store Vector Halfword*8/Byte*16 Indexed
- def STXVH8X : X_XS6_RA5_RB5<31, 940, "stxvh8x" , vsrc, []>;
- def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>;
-
- // Store Vector Indexed
- def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
- [(store v2f64:$XT, xaddrX16:$dst)]>;
-
- // Store Vector (Left-justified) with Length
- def STXVL : XX1Form_memOp<31, 397, (outs),
- (ins vsrc:$XT, memr:$dst, g8rc:$rB),
- "stxvl $XT, $dst, $rB", IIC_LdStLoad,
- [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst,
- i64:$rB)]>;
- def STXVLL : XX1Form_memOp<31, 429, (outs),
- (ins vsrc:$XT, memr:$dst, g8rc:$rB),
- "stxvll $XT, $dst, $rB", IIC_LdStLoad,
- [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst,
- i64:$rB)]>;
- } // mayStore
-
- let Predicates = [IsLittleEndian] in {
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
- }
-
- let Predicates = [IsBigEndian] in {
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
- }
-
- // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
- // of f64
- def : Pat<(v8i16 (PPCmtvsrz i32:$A)),
- (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
- def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
- (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
-
- // Patterns for which instructions from ISA 3.0 are a better match
- let Predicates = [IsLittleEndian, HasP9Vector] in {
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
-
- def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)),
- (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>;
- def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst),
- (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
-
- def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)),
- (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>;
- def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst),
- (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
- } // IsLittleEndian, HasP9Vector
-
- let Predicates = [IsBigEndian, HasP9Vector] in {
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
- } // IsBigEndian, HasP9Vector
-
- // D-Form Load/Store
- def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)),
- (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>;
-
- def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst),
- (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst),
- (STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst),
- (STXV $rS, memrix16:$dst)>;
-
-
- def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)),
- (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>;
- def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst),
- (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
-
- let AddedComplexity = 400 in {
- // LIWAX - This instruction is used for sign extending i32 -> i64.
- // LIWZX - This instruction will be emitted for i32, f32, and when
- // zero-extending i32 to i64 (zext i32 -> i64).
- let Predicates = [IsLittleEndian] in {
-
- def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>;
-
- def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
-
- def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
- (v4i32 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
-
- def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
- (v4f32 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
- }
-
- let Predicates = [IsBigEndian] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
- (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>;
-
- def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
- (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>;
-
- def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
- (v4i32 (XXSLDWIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
-
- def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
- (v4f32 (XXSLDWIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
- }
-
- }
-
- // Build vectors from i8 loads
- def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
- (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>;
- def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)),
- (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),
- (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)),
- (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)),
- (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)),
- (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>;
-
- // Build vectors from i16 loads
- def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)),
- (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)),
- (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)),
- (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)),
- (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
- (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
-
- // Load/convert and convert/store patterns for f16.
- def : Pat<(f64 (extloadf16 xoaddr:$src)),
- (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
- def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
- (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
- def : Pat<(f32 (extloadf16 xoaddr:$src)),
- (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
- def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
- (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
- def : Pat<(f64 (f16_to_fp i32:$A)),
- (f64 (XSCVHPDP (MTVSRWZ $A)))>;
- def : Pat<(f32 (f16_to_fp i32:$A)),
- (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>;
- def : Pat<(i32 (fp_to_f16 f32:$A)),
- (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
- def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
-
- let Predicates = [IsBigEndian, HasP9Vector] in {
- // Scalar stores of i8
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
-
- // Scalar stores of i16
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
- } // IsBigEndian, HasP9Vector
-
- let Predicates = [IsLittleEndian, HasP9Vector] in {
- // Scalar stores of i8
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
-
- // Scalar stores of i16
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- } // IsLittleEndian, HasP9Vector
-
-
- // Vector sign extensions
- def : Pat<(f64 (PPCVexts f64:$A, 1)),
- (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
- def : Pat<(f64 (PPCVexts f64:$A, 2)),
- (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
-
- def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
- "#DFLOADf32",
- [(set f32:$XT, (load iaddrX4:$src))]>;
- def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
- "#DFLOADf64",
- [(set f64:$XT, (load iaddrX4:$src))]>;
- def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
- "#DFSTOREf32",
- [(store f32:$XT, iaddrX4:$dst)]>;
- def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
- "#DFSTOREf64",
- [(store f64:$XT, iaddrX4:$dst)]>;
-
- def : Pat<(f64 (extloadf32 iaddrX4:$src)),
- (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>;
- def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))),
- (f32 (DFLOADf32 iaddrX4:$src))>;
-
- def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)),
- (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>;
- def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)),
- (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>;
-
- let AddedComplexity = 400 in {
- // The following pseudoinstructions are used to ensure the utilization
- // of all 64 VSX registers.
- let Predicates = [IsLittleEndian, HasP9Vector] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
-
- def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
- (v2f64 (XXPERMDIs
- (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
- (v2f64 (XXPERMDIs
- (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- iaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- } // IsLittleEndian, HasP9Vector
-
- let Predicates = [IsBigEndian, HasP9Vector] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
- (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
- (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
-
- def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
- (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
- (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), iaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- } // IsBigEndian, HasP9Vector
- }
-
- let Predicates = [IsBigEndian, HasP9Vector] in {
-
- // (Un)Signed DWord vector extract -> QP
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
-
- // (Un)Signed Word vector extract -> QP
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
- foreach Idx = [0,2,3] in {
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>;
- }
- foreach Idx = 0-3 in {
- def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
- (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>;
- }
-
- // (Un)Signed HWord vector extract -> QP
- foreach Idx = 0-7 in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg
- (vector_extract v8i16:$src, Idx), i16)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
- sub_64)))>;
- // The SDAG adds the `and` since an `i16` is being extracted as an `i32`.
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v8i16:$src, Idx)), 65535))),
- (f128 (XSCVUDQP (EXTRACT_SUBREG
- (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
- }
-
- // (Un)Signed Byte vector extract -> QP
- foreach Idx = 0-15 in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
- i8)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>;
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v16i8:$src, Idx)), 255))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>;
- }
-
- // Unsiged int in vsx register -> QP
- def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
- (f128 (XSCVUDQP
- (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>;
- } // IsBigEndian, HasP9Vector
-
- let Predicates = [IsLittleEndian, HasP9Vector] in {
-
- // (Un)Signed DWord vector extract -> QP
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
-
- // (Un)Signed Word vector extract -> QP
- foreach Idx = [[0,3],[1,2],[3,0]] in {
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)),
- sub_64)))>;
- }
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
-
- foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in {
- def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
- (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>;
- }
-
- // (Un)Signed HWord vector extract -> QP
- // The Nested foreach lists identifies the vector element and corresponding
- // register byte location.
- foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg
- (vector_extract v8i16:$src, !head(Idx)), i16)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG (VEXTSH2D
- (VEXTRACTUH !head(!tail(Idx)), $src)),
- sub_64)))>;
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v8i16:$src, !head(Idx))),
- 65535))),
- (f128 (XSCVUDQP (EXTRACT_SUBREG
- (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
- }
-
- // (Un)Signed Byte vector extract -> QP
- foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7],
- [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg
- (vector_extract v16i8:$src, !head(Idx)), i8)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG
- (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)),
- sub_64)))>;
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v16i8:$src, !head(Idx))),
- 255))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG
- (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
- }
-
- // Unsiged int in vsx register -> QP
- def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
- (f128 (XSCVUDQP
- (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>;
- } // IsLittleEndian, HasP9Vector
-
- // Convert (Un)Signed DWord in memory -> QP
- def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
- (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>;
- def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))),
- (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>;
- def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))),
- (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>;
- def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))),
- (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>;
-
- // Convert Unsigned HWord in memory -> QP
- def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)),
- (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>;
-
- // Convert Unsigned Byte in memory -> QP
- def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)),
- (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>;
-
- // Truncate & Convert QP -> (Un)Signed (D)Word.
- def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>;
- def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>;
- def : Pat<(i32 (fp_to_sint f128:$src)),
- (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>;
- def : Pat<(i32 (fp_to_uint f128:$src)),
- (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>;
-
- // Instructions for store(fptosi).
- // The 8-byte version is repeated here due to availability of D-Form STXSD.
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8),
- (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8),
- (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4),
- (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2),
- (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1),
- (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8),
- (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8),
- (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2),
- (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1),
- (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
-
- // Instructions for store(fptoui).
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8),
- (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8),
- (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4),
- (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2),
- (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1),
- (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8),
- (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8),
- (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2),
- (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1),
- (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
-
- // Round & Convert QP -> DP/SP
- def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>;
- def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>;
-
- // Convert SP -> QP
- def : Pat<(f128 (fpextend f32:$src)),
- (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>;
-
- def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)),
- (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC),
- (COPY_TO_REGCLASS $XB, VSSRC)),
- VSSRC))>;
- def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
- (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC),
- (COPY_TO_REGCLASS $XB, VSSRC)),
- VSSRC))>;
-
-} // end HasP9Vector, AddedComplexity
-
-let AddedComplexity = 400 in {
- let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsBigEndian] in {
- def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)),
- (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
- }
- let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsLittleEndian] in {
- def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)),
- (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
- }
-}
-
-let Predicates = [HasP9Vector], hasSideEffects = 0 in {
- let mayStore = 1 in {
- def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
- (ins spilltovsrrc:$XT, memrr:$dst),
- "#SPILLTOVSR_STX", []>;
- def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
- "#SPILLTOVSR_ST", []>;
- }
- let mayLoad = 1 in {
- def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
- (ins memrr:$src),
- "#SPILLTOVSR_LDX", []>;
- def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
- "#SPILLTOVSR_LD", []>;
-
- }
-}
// Integer extend helper dags 32 -> 64
def AnyExts {
dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32);
@@ -3847,10 +2186,10 @@ def AnyExts {
}
def DblToFlt {
- dag A0 = (f32 (fpround (f64 (extractelt v2f64:$A, 0))));
- dag A1 = (f32 (fpround (f64 (extractelt v2f64:$A, 1))));
- dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0))));
- dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
+ dag A0 = (f32 (any_fpround (f64 (extractelt v2f64:$A, 0))));
+ dag A1 = (f32 (any_fpround (f64 (extractelt v2f64:$A, 1))));
+ dag B0 = (f32 (any_fpround (f64 (extractelt v2f64:$B, 0))));
+ dag B1 = (f32 (any_fpround (f64 (extractelt v2f64:$B, 1))));
}
def ExtDbl {
@@ -4041,397 +2380,2261 @@ def MrgWords {
dag CVCAU = (v4i32 (XVCVDPUXWS CA));
}
-// Patterns for BUILD_VECTOR nodes.
+//---------------------------- Anonymous Patterns ----------------------------//
+// Predicate combinations are kept in roughly chronological order in terms of
+// instruction availability in the architecture. For example, VSX came in with
+// ISA 2.06 (Power7). There have since been additions in ISA 2.07 (Power8) and
+// ISA 3.0 (Power9). However, the granularity of features on later subtargets
+// is finer for various reasons. For example, we have Power8Vector,
+// Power8Altivec, DirectMove that all came in with ISA 2.07. The situation is
+// similar with ISA 3.0 with Power9Vector, Power9Altivec, IsISA3_0. Then there
+// are orthogonal predicates such as endianness for which the order was
+// arbitrarily chosen to be Big, Little.
+//
+// Predicate combinations available:
+// [HasVSX]
+// [HasVSX, IsBigEndian]
+// [HasVSX, IsLittleEndian]
+// [HasVSX, NoP9Vector]
+// [HasVSX, HasOnlySwappingMemOps]
+// [HasVSX, HasOnlySwappingMemOps, IsBigEndian]
+// [HasVSX, HasP8Vector]
+// [HasVSX, HasP8Vector, IsBigEndian]
+// [HasVSX, HasP8Vector, IsLittleEndian]
+// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian]
+// [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian]
+// [HasVSX, HasDirectMove]
+// [HasVSX, HasDirectMove, IsBigEndian]
+// [HasVSX, HasDirectMove, IsLittleEndian]
+// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian]
+// [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian]
+// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian]
+// [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian]
+// [HasVSX, HasP9Vector]
+// [HasVSX, HasP9Vector, IsBigEndian]
+// [HasVSX, HasP9Vector, IsLittleEndian]
+// [HasVSX, HasP9Altivec]
+// [HasVSX, HasP9Altivec, IsBigEndian]
+// [HasVSX, HasP9Altivec, IsLittleEndian]
+// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian]
+// [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian]
+
let AddedComplexity = 400 in {
+// Valid for any VSX subtarget, regardless of endianness.
+let Predicates = [HasVSX] in {
+def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
+ (v4i32 (XXLNOR $A, $A))>;
+def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A),
+ (and v4i32:$B, v4i32:$C))),
+ (v4i32 (XXSEL $A, $B, $C))>;
- let Predicates = [HasVSX] in {
- // Build vectors of floating point converted to i32.
- def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A,
- DblToInt.A, DblToInt.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>;
- def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A,
- DblToUInt.A, DblToUInt.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>;
- def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
- (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC),
- (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>;
- def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
- (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC),
- (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
- def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
- (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
- def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
- (v2f64 (LXVDSX xoaddr:$A))>;
- def : Pat<(v2i64 (PPCldsplat xoaddr:$A)),
- (v2i64 (LXVDSX xoaddr:$A))>;
-
- // Build vectors of floating point converted to i64.
- def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
- def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
- def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)),
- (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>;
- def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)),
- (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>;
- }
+// Additional fnmsub pattern for PPC specific ISD opcode
+def : Pat<(PPCfnmsub f64:$A, f64:$B, f64:$C),
+ (XSNMSUBADP $C, $A, $B)>;
+def : Pat<(fneg (PPCfnmsub f64:$A, f64:$B, f64:$C)),
+ (XSMSUBADP $C, $A, $B)>;
+def : Pat<(PPCfnmsub f64:$A, f64:$B, (fneg f64:$C)),
+ (XSNMADDADP $C, $A, $B)>;
- let Predicates = [HasVSX, NoP9Vector] in {
- // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
- def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
- (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
- (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
- def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
- (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
- (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
- }
+def : Pat<(PPCfnmsub v2f64:$A, v2f64:$B, v2f64:$C),
+ (XVNMSUBADP $C, $A, $B)>;
+def : Pat<(fneg (PPCfnmsub v2f64:$A, v2f64:$B, v2f64:$C)),
+ (XVMSUBADP $C, $A, $B)>;
+def : Pat<(PPCfnmsub v2f64:$A, v2f64:$B, (fneg v2f64:$C)),
+ (XVNMADDADP $C, $A, $B)>;
- let Predicates = [IsBigEndian, HasP8Vector] in {
- def : Pat<DWToSPExtractConv.BVU,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
- (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
- def : Pat<DWToSPExtractConv.BVS,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
- (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
- def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
-
- // Elements in a register on a BE system are in order <0, 1, 2, 3>.
- // The store instructions store the second word from the left.
- // So to align element zero, we need to modulo-left-shift by 3 words.
- // Similar logic applies for elements 2 and 3.
- foreach Idx = [ [0,3], [2,1], [3,2] ] in {
- def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- }
- }
+def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C),
+ (XVNMSUBASP $C, $A, $B)>;
+def : Pat<(fneg (PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C)),
+ (XVMSUBASP $C, $A, $B)>;
+def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, (fneg v4f32:$C)),
+ (XVNMADDASP $C, $A, $B)>;
- let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in {
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- }
-
- // Big endian, available on all targets with VSX
- let Predicates = [IsBigEndian, HasVSX] in {
- def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
- (v2f64 (XXPERMDI
- (COPY_TO_REGCLASS $A, VSRC),
- (COPY_TO_REGCLASS $B, VSRC), 0))>;
- // Using VMRGEW to assemble the final vector would be a lower latency
- // solution. However, we choose to go with the slightly higher latency
- // XXPERMDI for 2 reasons:
- // 1. This is likely to occur in unrolled loops where regpressure is high,
- // so we want to use the latter as it has access to all 64 VSX registers.
- // 2. Using Altivec instructions in this sequence would likely cause the
- // allocation of Altivec registers even for the loads which in turn would
- // force the use of LXSIWZX for the loads, adding a cycle of latency to
- // each of the loads which would otherwise be able to use LFIWZX.
- def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
- (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B),
- (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>;
- def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
- (VMRGEW MrgFP.AC, MrgFP.BD)>;
- def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
- DblToFlt.B0, DblToFlt.B1)),
- (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
-
- // Convert 4 doubles to a vector of ints.
- def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
- DblToInt.C, DblToInt.D)),
- (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>;
- def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
- DblToUInt.C, DblToUInt.D)),
- (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
- ExtDbl.B0S, ExtDbl.B1S)),
- (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
- ExtDbl.B0U, ExtDbl.B1U)),
- (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
- }
+def : Pat<(v2f64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
- let Predicates = [IsLittleEndian, HasP8Vector] in {
- def : Pat<DWToSPExtractConv.BVU,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
- (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
- def : Pat<DWToSPExtractConv.BVS,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
- (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
- def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
-
- // Elements in a register on a LE system are in order <3, 2, 1, 0>.
- // The store instructions store the second word from the left.
- // So to align element 3, we need to modulo-left-shift by 3 words.
- // Similar logic applies for elements 0 and 1.
- foreach Idx = [ [0,2], [1,1], [3,3] ] in {
- def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- }
- }
+def : Pat<(v4f32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
- let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in {
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- }
-
- let Predicates = [IsLittleEndian, HasVSX] in {
- // Little endian, available on all targets with VSX
- def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
- (v2f64 (XXPERMDI
- (COPY_TO_REGCLASS $B, VSRC),
- (COPY_TO_REGCLASS $A, VSRC), 0))>;
- // Using VMRGEW to assemble the final vector would be a lower latency
- // solution. However, we choose to go with the slightly higher latency
- // XXPERMDI for 2 reasons:
- // 1. This is likely to occur in unrolled loops where regpressure is high,
- // so we want to use the latter as it has access to all 64 VSX registers.
- // 2. Using Altivec instructions in this sequence would likely cause the
- // allocation of Altivec registers even for the loads which in turn would
- // force the use of LXSIWZX for the loads, adding a cycle of latency to
- // each of the loads which would otherwise be able to use LFIWZX.
- def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
- (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C),
- (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>;
- def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
- (VMRGEW MrgFP.AC, MrgFP.BD)>;
- def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
- DblToFlt.B0, DblToFlt.B1)),
- (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
-
- // Convert 4 doubles to a vector of ints.
- def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
- DblToInt.C, DblToInt.D)),
- (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>;
- def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
- DblToUInt.C, DblToUInt.D)),
- (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
- ExtDbl.B0S, ExtDbl.B1S)),
- (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
- ExtDbl.B0U, ExtDbl.B1U)),
- (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
- }
+def : Pat<(v2i64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
- let Predicates = [HasDirectMove] in {
- // Endianness-neutral constant splat on P8 and newer targets. The reason
- // for this pattern is that on targets with direct moves, we don't expand
- // BUILD_VECTOR nodes for v4i32.
- def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
- immSExt5NonZero:$A, immSExt5NonZero:$A)),
- (v4i32 (VSPLTISW imm:$A))>;
- }
+def : Pat<(v4f32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
- let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in {
- // Big endian integer vectors using direct moves.
- def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
- (v2i64 (XXPERMDI
- (COPY_TO_REGCLASS (MTVSRD $A), VSRC),
- (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (XXPERMDI
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
- (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
- }
+def : Pat<(v2f64 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v2i64 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
- let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in {
- // Little endian integer vectors using direct moves.
- def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
- (v2i64 (XXPERMDI
- (COPY_TO_REGCLASS (MTVSRD $B), VSRC),
- (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (XXPERMDI
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
- (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
- }
+def : Pat<(v2f64 (bitconvert v1i128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v1i128 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
- let Predicates = [HasP8Vector] in {
- def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))),
- (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))),
- (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))),
- (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
- (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- }
+def : Pat<(v2i64 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
- let Predicates = [HasP9Vector] in {
- // Endianness-neutral patterns for const splats with ISA 3.0 instructions.
- def : Pat<(v4i32 (scalar_to_vector i32:$A)),
- (v4i32 (MTVSRWS $A))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
- (v4i32 (MTVSRWS $A))>;
- def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)),
- (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
- def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
- (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
- def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
- (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
- def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
- def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
- (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
- (DFLOADf32 iaddrX4:$A),
- VSFRC)), 0))>;
- def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
- (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
- (DFLOADf32 iaddrX4:$A),
- VSFRC)), 0))>;
- def : Pat<(v4f32 (PPCldsplat xoaddr:$A)),
- (v4f32 (LXVWSX xoaddr:$A))>;
- def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
- (v4i32 (LXVWSX xoaddr:$A))>;
- }
+def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
+ (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
+def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)),
+ (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>;
- let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
- def : Pat<(i64 (extractelt v2i64:$A, 1)),
- (i64 (MFVSRLD $A))>;
- // Better way to build integer vectors if we have MTVSRDD. Big endian.
- def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
- (v2i64 (MTVSRDD $rB, $rA))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (MTVSRDD
- (RLDIMI AnyExts.B, AnyExts.A, 32, 0),
- (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
- }
+def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
+ (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>;
+def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
+ (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
- let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
- def : Pat<(i64 (extractelt v2i64:$A, 0)),
- (i64 (MFVSRLD $A))>;
- // Better way to build integer vectors if we have MTVSRDD. Little endian.
- def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
- (v2i64 (MTVSRDD $rB, $rA))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (MTVSRDD
- (RLDIMI AnyExts.C, AnyExts.D, 32, 0),
- (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
- }
- // P9 Altivec instructions that can be used to build vectors.
- // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
- // with complexities of existing build vector patterns in this file.
- let Predicates = [HasP9Altivec, IsLittleEndian] in {
- def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)),
- (v2i64 (VEXTSW2D $A))>;
- def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)),
- (v2i64 (VEXTSH2D $A))>;
- def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1,
- HWordToWord.LE_A2, HWordToWord.LE_A3)),
- (v4i32 (VEXTSH2W $A))>;
- def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1,
- ByteToWord.LE_A2, ByteToWord.LE_A3)),
- (v4i32 (VEXTSB2W $A))>;
- def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
- (v2i64 (VEXTSB2D $A))>;
- }
+def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>;
+def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>;
- let Predicates = [HasP9Altivec, IsBigEndian] in {
- def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)),
- (v2i64 (VEXTSW2D $A))>;
- def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)),
- (v2i64 (VEXTSH2D $A))>;
- def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1,
- HWordToWord.BE_A2, HWordToWord.BE_A3)),
- (v4i32 (VEXTSH2W $A))>;
- def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
- ByteToWord.BE_A2, ByteToWord.BE_A3)),
- (v4i32 (VEXTSB2W $A))>;
- def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
- (v2i64 (VEXTSB2D $A))>;
- }
+// Permutes.
+def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
- let Predicates = [HasP9Altivec] in {
- def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)),
- (v2i64 (VEXTSB2D $A))>;
- def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)),
- (v2i64 (VEXTSH2D $A))>;
- def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)),
- (v2i64 (VEXTSW2D $A))>;
- def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)),
- (v4i32 (VEXTSB2W $A))>;
- def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)),
- (v4i32 (VEXTSH2W $A))>;
- }
+// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
+// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
+def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)),
+ (XXPERMDI $src, $src, 2)>;
+
+// Selects.
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
+ (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)),
+ (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
+ (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)),
+ (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
+ (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
+ (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)),
+ (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
+ (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)),
+ (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
+ (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+ (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
+ (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+ (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
+ (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+ (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+ (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
+ (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+ (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
+ (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+ (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+// Divides.
+def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
+ (XVDIVSP $A, $B)>;
+def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
+ (XVDIVDP $A, $B)>;
+
+// Reciprocal estimate
+def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
+ (XVRESP $A)>;
+def : Pat<(int_ppc_vsx_xvredp v2f64:$A),
+ (XVREDP $A)>;
+
+// Recip. square root estimate
+def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
+ (XVRSQRTESP $A)>;
+def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
+ (XVRSQRTEDP $A)>;
+
+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+
+def : Pat<(v4f32 (any_fmaxnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMAXSP $src1, $src2))>;
+def : Pat<(v4f32 (any_fminnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMINSP $src1, $src2))>;
+def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMAXDP $src1, $src2))>;
+def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMINDP $src1, $src2))>;
+
+// f32 abs
+def : Pat<(f32 (fabs f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSABSDP
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
+// f32 nabs
+def : Pat<(f32 (fneg (fabs f32:$S))),
+ (f32 (COPY_TO_REGCLASS (XSNABSDP
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
+// f32 Min.
+def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
+ (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
+ (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Min)>;
+// F32 Max.
+def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
+ (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
+ (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Max)>;
+
+// f64 Min.
+def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
+ (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
+ (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
+ (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+ (f64 (XSMINDP $A, $B))>;
+// f64 Max.
+def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
+ (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
+ (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
+ (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+ (f64 (XSMAXDP $A, $B))>;
+
+def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
+// Rounding for single precision.
+def : Pat<(f32 (any_fround f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPI
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (any_fnearbyint f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIC
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (any_ffloor f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIM
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (any_fceil f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIP
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (any_ftrunc f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIZ
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (any_frint f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIC
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
+
+// Rounding for double precision.
+def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>;
+def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
+
+// Materialize a zero-vector of long long
+def : Pat<(v2i64 immAllZerosV),
+ (v2i64 (XXLXORz))>;
+
+// Build vectors of floating point converted to i32.
+def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A,
+ DblToInt.A, DblToInt.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>;
+def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A,
+ DblToUInt.A, DblToUInt.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>;
+def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
+ (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC),
+ (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>;
+def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
+ (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC),
+ (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
+defm : ScalToVecWPermute<
+ v4i32, FltToIntLoad.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1),
+ (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC)>;
+defm : ScalToVecWPermute<
+ v4i32, FltToUIntLoad.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1),
+ (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC)>;
+def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
+ (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
+def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
+ (v2f64 (LXVDSX xoaddr:$A))>;
+def : Pat<(v2i64 (PPCldsplat xoaddr:$A)),
+ (v2i64 (LXVDSX xoaddr:$A))>;
+
+// Build vectors of floating point converted to i64.
+def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
+def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
+defm : ScalToVecWPermute<
+ v2i64, DblToLongLoad.A,
+ (XVCVDPSXDS (LXVDSX xoaddr:$A)), (XVCVDPSXDS (LXVDSX xoaddr:$A))>;
+defm : ScalToVecWPermute<
+ v2i64, DblToULongLoad.A,
+ (XVCVDPUXDS (LXVDSX xoaddr:$A)), (XVCVDPUXDS (LXVDSX xoaddr:$A))>;
+} // HasVSX
+
+// Any big endian VSX subtarget.
+let Predicates = [HasVSX, IsBigEndian] in {
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
+
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+
+def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>;
+
+def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
+ (v2f64 (XXPERMDI
+ (COPY_TO_REGCLASS $A, VSRC),
+ (COPY_TO_REGCLASS $B, VSRC), 0))>;
+// Using VMRGEW to assemble the final vector would be a lower latency
+// solution. However, we choose to go with the slightly higher latency
+// XXPERMDI for 2 reasons:
+// 1. This is likely to occur in unrolled loops where regpressure is high,
+// so we want to use the latter as it has access to all 64 VSX registers.
+// 2. Using Altivec instructions in this sequence would likely cause the
+// allocation of Altivec registers even for the loads which in turn would
+// force the use of LXSIWZX for the loads, adding a cycle of latency to
+// each of the loads which would otherwise be able to use LFIWZX.
+def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+ (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B),
+ (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>;
+def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
+ (VMRGEW MrgFP.AC, MrgFP.BD)>;
+def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
+ DblToFlt.B0, DblToFlt.B1)),
+ (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
+
+// Convert 4 doubles to a vector of ints.
+def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
+ DblToInt.C, DblToInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>;
+def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
+ DblToUInt.C, DblToUInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
+ ExtDbl.B0S, ExtDbl.B1S)),
+ (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
+ ExtDbl.B0U, ExtDbl.B1U)),
+ (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 1))))),
+ (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 0))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
+ (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XVCVSPDP $A))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
+ (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$B, 0))))),
+ (v2f64 (XVCVSPDP (XXPERMDI $A, $B, 0)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$B, 3))))),
+ (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
+ (XXPERMDI $A, $B, 3), 1)))>;
+def : Pat<WToDPExtractConv.BV02S,
+ (v2f64 (XVCVSXWDP $A))>;
+def : Pat<WToDPExtractConv.BV13S,
+ (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 3)))>;
+def : Pat<WToDPExtractConv.BV02U,
+ (v2f64 (XVCVUXWDP $A))>;
+def : Pat<WToDPExtractConv.BV13U,
+ (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 3)))>;
+} // HasVSX, IsBigEndian
+
+// Any little endian VSX subtarget.
+let Predicates = [HasVSX, IsLittleEndian] in {
+defm : ScalToVecWPermute<v2f64, (f64 f64:$A),
+ (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
+ (SUBREG_TO_REG (i64 1), $A, sub_64), 0),
+ (SUBREG_TO_REG (i64 1), $A, sub_64)>;
+
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+
+def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+
+def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
+
+// Little endian, available on all targets with VSX
+def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
+ (v2f64 (XXPERMDI
+ (COPY_TO_REGCLASS $B, VSRC),
+ (COPY_TO_REGCLASS $A, VSRC), 0))>;
+// Using VMRGEW to assemble the final vector would be a lower latency
+// solution. However, we choose to go with the slightly higher latency
+// XXPERMDI for 2 reasons:
+// 1. This is likely to occur in unrolled loops where regpressure is high,
+// so we want to use the latter as it has access to all 64 VSX registers.
+// 2. Using Altivec instructions in this sequence would likely cause the
+// allocation of Altivec registers even for the loads which in turn would
+// force the use of LXSIWZX for the loads, adding a cycle of latency to
+// each of the loads which would otherwise be able to use LFIWZX.
+def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+ (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C),
+ (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>;
+def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
+ (VMRGEW MrgFP.AC, MrgFP.BD)>;
+def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
+ DblToFlt.B0, DblToFlt.B1)),
+ (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
+
+// Convert 4 doubles to a vector of ints.
+def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
+ DblToInt.C, DblToInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>;
+def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
+ DblToUInt.C, DblToUInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
+ ExtDbl.B0S, ExtDbl.B1S)),
+ (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
+ ExtDbl.B0U, ExtDbl.B1U)),
+ (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 1))))),
+ (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 0))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
+ (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP $A))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
+ (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$B, 0))))),
+ (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $B, $A, 3),
+ (XXPERMDI $B, $A, 3), 1)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$B, 3))))),
+ (v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
+def : Pat<WToDPExtractConv.BV02S,
+ (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
+def : Pat<WToDPExtractConv.BV13S,
+ (v2f64 (XVCVSXWDP $A))>;
+def : Pat<WToDPExtractConv.BV02U,
+ (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
+def : Pat<WToDPExtractConv.BV13U,
+ (v2f64 (XVCVUXWDP $A))>;
+} // HasVSX, IsLittleEndian
+
+// Any pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, NoP9Vector] in {
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8),
+ (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8),
+ (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>;
+
+// Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
+defm : ScalToVecWPermute<
+ v4i32, DblToIntLoad.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1),
+ (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC)>;
+defm : ScalToVecWPermute<
+ v4i32, DblToUIntLoad.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1),
+ (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC)>;
+defm : ScalToVecWPermute<
+ v2i64, FltToLongLoad.A,
+ (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0),
+ (SUBREG_TO_REG (i64 1), (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A),
+ VSFRC)), sub_64)>;
+defm : ScalToVecWPermute<
+ v2i64, FltToULongLoad.A,
+ (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0),
+ (SUBREG_TO_REG (i64 1), (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A),
+ VSFRC)), sub_64)>;
+} // HasVSX, NoP9Vector
+
+// Any VSX subtarget that only has loads and stores that load in big endian
+// order regardless of endianness. This is really pre-Power9 subtargets.
+let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
+ def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
+ // Stores.
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+} // HasVSX, HasOnlySwappingMemOps
+
+// Big endian VSX subtarget that only has loads and stores that always load
+// in big endian order. Really big endian pre-Power9 subtargets.
+let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in {
+ def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
+} // HasVSX, HasOnlySwappingMemOps, IsBigEndian
+
+// Any Power8 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector] in {
+def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
+ (XXLEQV $A, $B)>;
+def : Pat<(f64 (extloadf32 xoaddr:$src)),
+ (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
+def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))),
+ (f32 (XFLOADf32 xoaddr:$src))>;
+def : Pat<(f64 (any_fpextend f32:$src)),
+ (COPY_TO_REGCLASS $src, VSFRC)>;
+
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+ (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
+ (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+ (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
+ (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
+ (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+ (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
+ (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+ (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
+ (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
+ (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+// Additional fnmsub pattern for PPC specific ISD opcode
+def : Pat<(PPCfnmsub f32:$A, f32:$B, f32:$C),
+ (XSNMSUBASP $C, $A, $B)>;
+def : Pat<(fneg (PPCfnmsub f32:$A, f32:$B, f32:$C)),
+ (XSMSUBASP $C, $A, $B)>;
+def : Pat<(PPCfnmsub f32:$A, f32:$B, (fneg f32:$C)),
+ (XSNMADDASP $C, $A, $B)>;
+
+// f32 neg
+// Although XSNEGDP is available in P7, we want to select it starting from P8,
+// so that FNMSUBS can be selected for fneg-fmsub pattern on P7. (VSX version,
+// XSNMSUBASP, is available since P8)
+def : Pat<(f32 (fneg f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSNEGDP
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
+// Instructions for converting float to i32 feeding a store.
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4),
+ (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4),
+ (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+
+def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+
+def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))),
+ (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))),
+ (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))),
+ (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
+ (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+} // HasVSX, HasP8Vector
+
+// Big endian Power8 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, IsBigEndian] in {
+def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+
+// v4f32 scalar <-> vector conversions (BE)
+def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XSCVDPSPN $A))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN $S))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.BE_VARIABLE_FLOAT)>;
+
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
+
+// LIWAX - This instruction is used for sign extending i32 -> i64.
+// LIWZX - This instruction will be emitted for i32, f32, and when
+// zero-extending i32 to i64 (zext i32 -> i64).
+def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>;
+def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
+ (v4i32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
+def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
+ (v4f32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
+
+def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
+ (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
+def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
+ (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+// Elements in a register on a BE system are in order <0, 1, 2, 3>.
+// The store instructions store the second word from the left.
+// So to align element zero, we need to modulo-left-shift by 3 words.
+// Similar logic applies for elements 2 and 3.
+foreach Idx = [ [0,3], [2,1], [3,2] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
}
+} // HasVSX, HasP8Vector, IsBigEndian
+
+// Little endian Power8 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, IsLittleEndian] in {
+def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
-// Put this P9Altivec related definition here since it's possible to be
-// selected to VSX instruction xvnegsp, avoid possible undef.
-let Predicates = [HasP9Altivec] in {
+// v4f32 scalar <-> vector conversions (LE)
+ // The permuted version is no better than the version that puts the value
+ // into the right element because XSCVDPSPN is different from all the other
+ // instructions used for PPCSToV.
+ defm : ScalToVecWPermute<v4f32, (f32 f32:$A),
+ (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1),
+ (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 3)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN $S))>;
+def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.LE_VARIABLE_FLOAT)>;
+
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
+
+// LIWAX - This instruction is used for sign extending i32 -> i64.
+// LIWZX - This instruction will be emitted for i32, f32, and when
+// zero-extending i32 to i64 (zext i32 -> i64).
+defm : ScalToVecWPermute<
+ v2i64, (i64 (sextloadi32 xoaddr:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64)>;
+
+defm : ScalToVecWPermute<
+ v2i64, (i64 (zextloadi32 xoaddr:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
+
+defm : ScalToVecWPermute<
+ v4i32, (i32 (load xoaddr:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
+
+defm : ScalToVecWPermute<
+ v4f32, (f32 (load xoaddr:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
+
+def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
+ (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
+def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
+ (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+// Elements in a register on a LE system are in order <3, 2, 1, 0>.
+// The store instructions store the second word from the left.
+// So to align element 3, we need to modulo-left-shift by 3 words.
+// Similar logic applies for elements 0 and 1.
+foreach Idx = [ [0,2], [1,1], [3,3] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+}
+} // HasVSX, HasP8Vector, IsLittleEndian
+
+// Big endian pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian] in {
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian
+
+// Little endian pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] in {
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+} // HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian
+
+// Any VSX target with direct moves.
+let Predicates = [HasVSX, HasDirectMove] in {
+// bitconvert f32 -> i32
+// (convert to 32-bit fp single, shift right 1 word, move to GPR)
+def : Pat<(i32 (bitconvert f32:$S)),
+ (i32 (MFVSRWZ (EXTRACT_SUBREG
+ (XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3),
+ sub_64)))>;
+// bitconvert i32 -> f32
+// (move to FPR, shift left 1 word, convert to 64-bit fp single)
+def : Pat<(f32 (bitconvert i32:$A)),
+ (f32 (XSCVSPDPN
+ (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>;
- def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
- (v4i32 (VABSDUW $A, $B))>;
+// bitconvert f64 -> i64
+// (move to GPR, nothing else needed)
+def : Pat<(i64 (bitconvert f64:$S)),
+ (i64 (MFVSRD $S))>;
- def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
- (v8i16 (VABSDUH $A, $B))>;
+// bitconvert i64 -> f64
+// (move to FPR, nothing else needed)
+def : Pat<(f64 (bitconvert i64:$S)),
+ (f64 (MTVSRD $S))>;
- def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
- (v16i8 (VABSDUB $A, $B))>;
+// Rounding to integer.
+def : Pat<(i64 (lrint f64:$S)),
+ (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (lrint f32:$S)),
+ (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (llrint f64:$S)),
+ (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (llrint f32:$S)),
+ (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (lround f64:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (lround f32:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
+def : Pat<(i64 (llround f64:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (llround f32:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
+
+// Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
+// of f64
+def : Pat<(v8i16 (PPCmtvsrz i32:$A)),
+ (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
+def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
+ (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
+
+// Endianness-neutral constant splat on P8 and newer targets. The reason
+// for this pattern is that on targets with direct moves, we don't expand
+// BUILD_VECTOR nodes for v4i32.
+def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
+ immSExt5NonZero:$A, immSExt5NonZero:$A)),
+ (v4i32 (VSPLTISW imm:$A))>;
+} // HasVSX, HasDirectMove
+
+// Big endian VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, IsBigEndian] in {
+// v16i8 scalar <-> vector conversions (BE)
+def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
+def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
+def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
+def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
+
+// v2i64 scalar <-> vector conversions (BE)
+def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.BE_VARIABLE_DWORD)>;
+} // HasVSX, HasDirectMove, IsBigEndian
+
+// Little endian VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, IsLittleEndian] in {
+ // v16i8 scalar <-> vector conversions (LE)
+ defm : ScalToVecWPermute<v16i8, (i32 i32:$A),
+ (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC),
+ (COPY_TO_REGCLASS MovesToVSR.LE_WORD_1, VSRC)>;
+ defm : ScalToVecWPermute<v8i16, (i32 i32:$A),
+ (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC),
+ (COPY_TO_REGCLASS MovesToVSR.LE_WORD_1, VSRC)>;
+ defm : ScalToVecWPermute<v4i32, (i32 i32:$A), MovesToVSR.LE_WORD_0,
+ (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
+ defm : ScalToVecWPermute<v2i64, (i64 i64:$A), MovesToVSR.LE_DWORD_0,
+ MovesToVSR.LE_DWORD_1>;
+
+ // v2i64 scalar <-> vector conversions (LE)
+ def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.LE_VARIABLE_DWORD)>;
+} // HasVSX, HasDirectMove, IsLittleEndian
+
+// Big endian pre-P9 VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian] in {
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_BYTE)>;
+
+// v8i16 scalar <-> vector conversions (BE)
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_HALF)>;
+
+// v4i32 scalar <-> vector conversions (BE)
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_WORD)>;
+} // HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian
+
+// Little endian pre-P9 VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian] in {
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_BYTE)>;
+
+// v8i16 scalar <-> vector conversions (LE)
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_HALF)>;
+
+// v4i32 scalar <-> vector conversions (LE)
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_WORD)>;
+} // HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian
+
+// Big endian pre-Power9 VSX subtarget that has direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian] in {
+// Big endian integer vectors using direct moves.
+def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
+ (v2i64 (XXPERMDI
+ (COPY_TO_REGCLASS (MTVSRD $A), VSRC),
+ (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+ (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian
+
+// Little endian pre-Power9 VSX subtarget that has direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] in {
+// Little endian integer vectors using direct moves.
+def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
+ (v2i64 (XXPERMDI
+ (COPY_TO_REGCLASS (MTVSRD $B), VSRC),
+ (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+ (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+}
+
+// Any Power9 VSX subtarget.
+let Predicates = [HasVSX, HasP9Vector] in {
+// Additional fnmsub pattern for PPC specific ISD opcode
+def : Pat<(PPCfnmsub f128:$A, f128:$B, f128:$C),
+ (XSNMSUBQP $C, $A, $B)>;
+def : Pat<(fneg (PPCfnmsub f128:$A, f128:$B, f128:$C)),
+ (XSMSUBQP $C, $A, $B)>;
+def : Pat<(PPCfnmsub f128:$A, f128:$B, (fneg f128:$C)),
+ (XSNMADDQP $C, $A, $B)>;
+
+def : Pat<(f128 (sint_to_fp i64:$src)),
+ (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVSDQP $src))>;
+def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVSDQP (VEXTSW2Ds $src)))>;
+def : Pat<(f128 (uint_to_fp i64:$src)),
+ (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP $src))>;
+
+// Convert (Un)Signed Word -> QP.
+def : Pat<(f128 (sint_to_fp i32:$src)),
+ (f128 (XSCVSDQP (MTVSRWA $src)))>;
+def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))),
+ (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>;
+def : Pat<(f128 (uint_to_fp i32:$src)),
+ (f128 (XSCVUDQP (MTVSRWZ $src)))>;
+def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
+ (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
+
+// Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
+// separate pattern so that it can convert the input register class from
+// VRRC(v8i16) to VSRC.
+def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)),
+ (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>;
+
+// Use current rounding mode
+def : Pat<(f128 (any_fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>;
+// Round to nearest, ties away from zero
+def : Pat<(f128 (any_fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>;
+// Round towards Zero
+def : Pat<(f128 (any_ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>;
+// Round towards +Inf
+def : Pat<(f128 (any_fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>;
+// Round towards -Inf
+def : Pat<(f128 (any_ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>;
+// Use current rounding mode, [with Inexact]
+def : Pat<(f128 (any_frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>;
+
+def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)),
+ (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>;
+
+def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)),
+ (i64 (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (XSXEXPQP $vA)), sub_64)))>;
+
+// Extra patterns expanding to vector Extract Word/Insert Word
+def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)),
+ (v4i32 (XXINSERTW $A, $B, imm:$IMM))>;
+def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)),
+ (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>;
+
+// Vector Reverse
+def : Pat<(v8i16 (bswap v8i16 :$A)),
+ (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
+def : Pat<(v1i128 (bswap v1i128 :$A)),
+ (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
+
+// D-Form Load/Store
+def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)),
+ (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
+def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>;
+
+def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst),
+ (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>;
+def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst),
+ (STXV $rS, memrix16:$dst)>;
+def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst),
+ (STXV $rS, memrix16:$dst)>;
+
+def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>;
+def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst),
+ (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+
+// Build vectors from i8 loads
+defm : ScalToVecWPermute<v16i8, ScalarLoads.Li8,
+ (VSPLTBs 7, (LXSIBZX xoaddr:$src)),
+ (VSPLTBs 7, (LXSIBZX xoaddr:$src))>;
+defm : ScalToVecWPermute<v8i16, ScalarLoads.ZELi8,
+ (VSPLTHs 3, (LXSIBZX xoaddr:$src)),
+ (VSPLTHs 3, (LXSIBZX xoaddr:$src))>;
+defm : ScalToVecWPermute<v4i32, ScalarLoads.ZELi8,
+ (XXSPLTWs (LXSIBZX xoaddr:$src), 1),
+ (XXSPLTWs (LXSIBZX xoaddr:$src), 1)>;
+defm : ScalToVecWPermute<v2i64, ScalarLoads.ZELi8i64,
+ (XXPERMDIs (LXSIBZX xoaddr:$src), 0),
+ (XXPERMDIs (LXSIBZX xoaddr:$src), 0)>;
+defm : ScalToVecWPermute<v4i32, ScalarLoads.SELi8,
+ (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1),
+ (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1)>;
+defm : ScalToVecWPermute<v2i64, ScalarLoads.SELi8i64,
+ (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0),
+ (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0)>;
+
+// Build vectors from i16 loads
+defm : ScalToVecWPermute<v8i16, ScalarLoads.Li16,
+ (VSPLTHs 3, (LXSIHZX xoaddr:$src)),
+ (VSPLTHs 3, (LXSIHZX xoaddr:$src))>;
+defm : ScalToVecWPermute<v4i32, ScalarLoads.ZELi16,
+ (XXSPLTWs (LXSIHZX xoaddr:$src), 1),
+ (XXSPLTWs (LXSIHZX xoaddr:$src), 1)>;
+defm : ScalToVecWPermute<v2i64, ScalarLoads.ZELi16i64,
+ (XXPERMDIs (LXSIHZX xoaddr:$src), 0),
+ (XXPERMDIs (LXSIHZX xoaddr:$src), 0)>;
+defm : ScalToVecWPermute<v4i32, ScalarLoads.SELi16,
+ (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1),
+ (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1)>;
+defm : ScalToVecWPermute<v2i64, ScalarLoads.SELi16i64,
+ (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0),
+ (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0)>;
+
+// Load/convert and convert/store patterns for f16.
+def : Pat<(f64 (extloadf16 xoaddr:$src)),
+ (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
+def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
+def : Pat<(f32 (extloadf16 xoaddr:$src)),
+ (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
+def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
+def : Pat<(f64 (f16_to_fp i32:$A)),
+ (f64 (XSCVHPDP (MTVSRWZ $A)))>;
+def : Pat<(f32 (f16_to_fp i32:$A)),
+ (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>;
+def : Pat<(i32 (fp_to_f16 f32:$A)),
+ (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
+def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
+
+// Vector sign extensions
+def : Pat<(f64 (PPCVexts f64:$A, 1)),
+ (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
+def : Pat<(f64 (PPCVexts f64:$A, 2)),
+ (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
+
+def : Pat<(f64 (extloadf32 iaddrX4:$src)),
+ (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>;
+def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))),
+ (f32 (DFLOADf32 iaddrX4:$src))>;
+
+def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)),
+ (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>;
+def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)),
+ (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>;
+
+// Convert (Un)Signed DWord in memory -> QP
+def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
+ (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>;
+def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))),
+ (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>;
+def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))),
+ (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>;
+def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))),
+ (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>;
+
+// Convert Unsigned HWord in memory -> QP
+def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)),
+ (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>;
+
+// Convert Unsigned Byte in memory -> QP
+def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)),
+ (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>;
+
+// Truncate & Convert QP -> (Un)Signed (D)Word.
+def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>;
+def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>;
+def : Pat<(i32 (fp_to_sint f128:$src)),
+ (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>;
+def : Pat<(i32 (fp_to_uint f128:$src)),
+ (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>;
+
+// Instructions for store(fptosi).
+// The 8-byte version is repeated here due to availability of D-Form STXSD.
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8),
+ (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
+ xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8),
+ (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
+ iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4),
+ (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2),
+ (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1),
+ (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+ (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+ (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2),
+ (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1),
+ (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
+
+// Instructions for store(fptoui).
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8),
+ (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
+ xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8),
+ (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
+ iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4),
+ (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2),
+ (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1),
+ (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+ (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+ (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2),
+ (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1),
+ (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+
+// Round & Convert QP -> DP/SP
+def : Pat<(f64 (any_fpround f128:$src)), (f64 (XSCVQPDP $src))>;
+def : Pat<(f32 (any_fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>;
+
+// Convert SP -> QP
+def : Pat<(f128 (any_fpextend f32:$src)),
+ (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>;
+
+def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)),
+ (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC),
+ (COPY_TO_REGCLASS $XB, VSSRC)),
+ VSSRC))>;
+def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
+ (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC),
+ (COPY_TO_REGCLASS $XB, VSSRC)),
+ VSSRC))>;
+
+// Endianness-neutral patterns for const splats with ISA 3.0 instructions.
+defm : ScalToVecWPermute<v4i32, (i32 i32:$A), (MTVSRWS $A), (MTVSRWS $A)>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+ (v4i32 (MTVSRWS $A))>;
+def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)),
+ (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
+defm : ScalToVecWPermute<v4i32, FltToIntLoad.A,
+ (XVCVSPSXWS (LXVWSX xoaddr:$A)),
+ (XVCVSPSXWS (LXVWSX xoaddr:$A))>;
+defm : ScalToVecWPermute<v4i32, FltToUIntLoad.A,
+ (XVCVSPUXWS (LXVWSX xoaddr:$A)),
+ (XVCVSPUXWS (LXVWSX xoaddr:$A))>;
+defm : ScalToVecWPermute<
+ v4i32, DblToIntLoadP9.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1),
+ (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64)>;
+defm : ScalToVecWPermute<
+ v4i32, DblToUIntLoadP9.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1),
+ (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), sub_64)>;
+defm : ScalToVecWPermute<
+ v2i64, FltToLongLoadP9.A,
+ (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0),
+ (SUBREG_TO_REG
+ (i64 1),
+ (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>;
+defm : ScalToVecWPermute<
+ v2i64, FltToULongLoadP9.A,
+ (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0),
+ (SUBREG_TO_REG
+ (i64 1),
+ (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>;
+def : Pat<(v4f32 (PPCldsplat xoaddr:$A)),
+ (v4f32 (LXVWSX xoaddr:$A))>;
+def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
+ (v4i32 (LXVWSX xoaddr:$A))>;
+} // HasVSX, HasP9Vector
+
+// Big endian Power9 subtarget.
+let Predicates = [HasVSX, HasP9Vector, IsBigEndian] in {
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
+
+// Scalar stores of i8
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+
+// Scalar stores of i16
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+
+def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+
+def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
+ (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
+ (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+
+// (Un)Signed DWord vector extract -> QP
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+
+// (Un)Signed Word vector extract -> QP
+def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
+foreach Idx = [0,2,3] in {
+ def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>;
+}
+foreach Idx = 0-3 in {
+ def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
+ (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>;
+}
+
+// (Un)Signed HWord vector extract -> QP
+foreach Idx = 0-7 in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg
+ (vector_extract v8i16:$src, Idx), i16)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
+ sub_64)))>;
+ // The SDAG adds the `and` since an `i16` is being extracted as an `i32`.
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v8i16:$src, Idx)), 65535))),
+ (f128 (XSCVUDQP (EXTRACT_SUBREG
+ (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
+}
+
+// (Un)Signed Byte vector extract -> QP
+foreach Idx = 0-15 in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
+ i8)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>;
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v16i8:$src, Idx)), 255))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>;
+}
+
+// Unsiged int in vsx register -> QP
+def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP
+ (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>;
+} // HasVSX, HasP9Vector, IsBigEndian
+
+// Little endian Power9 subtarget.
+let Predicates = [HasVSX, HasP9Vector, IsLittleEndian] in {
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+
+def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>;
+def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst),
+ (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+
+def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>;
+def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst),
+ (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+
+// Scalar stores of i8
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
+
+// Scalar stores of i16
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+
+defm : ScalToVecWPermute<
+ v2i64, (i64 (load iaddrX4:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>;
+defm : ScalToVecWPermute<
+ v2i64, (i64 (load xaddrX4:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>;
+defm : ScalToVecWPermute<
+ v2f64, (f64 (load iaddrX4:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>;
+defm : ScalToVecWPermute<
+ v2f64, (f64 (load xaddrX4:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>;
+
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ iaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+
+// (Un)Signed DWord vector extract -> QP
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+
+// (Un)Signed Word vector extract -> QP
+foreach Idx = [[0,3],[1,2],[3,0]] in {
+ def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)),
+ sub_64)))>;
+}
+def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
- // As PPCVABSD description, the last operand indicates whether do the
- // sign bit flip.
- def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
- (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
+foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in {
+ def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
+ (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>;
}
+
+// (Un)Signed HWord vector extract -> QP
+// The Nested foreach lists identifies the vector element and corresponding
+// register byte location.
+foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg
+ (vector_extract v8i16:$src, !head(Idx)), i16)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG (VEXTSH2D
+ (VEXTRACTUH !head(!tail(Idx)), $src)),
+ sub_64)))>;
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v8i16:$src, !head(Idx))),
+ 65535))),
+ (f128 (XSCVUDQP (EXTRACT_SUBREG
+ (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
+}
+
+// (Un)Signed Byte vector extract -> QP
+foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7],
+ [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg
+ (vector_extract v16i8:$src, !head(Idx)), i8)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG
+ (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)),
+ sub_64)))>;
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v16i8:$src, !head(Idx))),
+ 255))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG
+ (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
+}
+
+// Unsiged int in vsx register -> QP
+def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP
+ (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>;
+} // HasVSX, HasP9Vector, IsLittleEndian
+
+// Any Power9 VSX subtarget that supports Power9 Altivec.
+let Predicates = [HasVSX, HasP9Altivec] in {
+// Put this P9Altivec related definition here since it's possible to be
+// selected to VSX instruction xvnegsp, avoid possible undef.
+def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
+ (v4i32 (VABSDUW $A, $B))>;
+
+def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
+ (v8i16 (VABSDUH $A, $B))>;
+
+def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
+ (v16i8 (VABSDUB $A, $B))>;
+
+// As PPCVABSD description, the last operand indicates whether do the
+// sign bit flip.
+def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
+ (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
+} // HasVSX, HasP9Altivec
+
+// Big endian Power9 VSX subtargets with P9 Altivec support.
+let Predicates = [HasVSX, HasP9Altivec, IsBigEndian] in {
+def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
+ (VEXTUBLX $Idx, $S)>;
+
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
+ (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
+ (VEXTUHLX (LI8 0), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
+ (VEXTUHLX (LI8 2), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
+ (VEXTUHLX (LI8 4), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
+ (VEXTUHLX (LI8 6), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
+ (VEXTUHLX (LI8 8), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
+ (VEXTUHLX (LI8 10), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
+ (VEXTUHLX (LI8 12), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
+ (VEXTUHLX (LI8 14), $S)>;
+
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
+ (VEXTUWLX (LI8 0), $S)>;
+
+// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
+ (VEXTUWLX (LI8 8), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
+ (VEXTUWLX (LI8 12), $S)>;
+
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
+ (EXTSW (VEXTUWLX (LI8 0), $S))>;
+// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
+ (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
+ (EXTSW (VEXTUWLX (LI8 8), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
+ (EXTSW (VEXTUWLX (LI8 12), $S))>;
+
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX
+ (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX
+ (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>;
+// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>;
+
+// P9 Altivec instructions that can be used to build vectors.
+// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
+// with complexities of existing build vector patterns in this file.
+def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)),
+ (v2i64 (VEXTSW2D $A))>;
+def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)),
+ (v2i64 (VEXTSH2D $A))>;
+def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1,
+ HWordToWord.BE_A2, HWordToWord.BE_A3)),
+ (v4i32 (VEXTSH2W $A))>;
+def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
+ ByteToWord.BE_A2, ByteToWord.BE_A3)),
+ (v4i32 (VEXTSB2W $A))>;
+def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
+ (v2i64 (VEXTSB2D $A))>;
+} // HasVSX, HasP9Altivec, IsBigEndian
+
+// Little endian Power9 VSX subtargets with P9 Altivec support.
+let Predicates = [HasVSX, HasP9Altivec, IsLittleEndian] in {
+def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
+ (VEXTUBRX $Idx, $S)>;
+
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
+ (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
+ (VEXTUHRX (LI8 0), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
+ (VEXTUHRX (LI8 2), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
+ (VEXTUHRX (LI8 4), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
+ (VEXTUHRX (LI8 6), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
+ (VEXTUHRX (LI8 8), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
+ (VEXTUHRX (LI8 10), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
+ (VEXTUHRX (LI8 12), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
+ (VEXTUHRX (LI8 14), $S)>;
+
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
+ (VEXTUWRX (LI8 0), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
+ (VEXTUWRX (LI8 4), $S)>;
+// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
+ (VEXTUWRX (LI8 12), $S)>;
+
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
+ (EXTSW (VEXTUWRX (LI8 0), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
+ (EXTSW (VEXTUWRX (LI8 4), $S))>;
+// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
+ (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
+ (EXTSW (VEXTUWRX (LI8 12), $S))>;
+
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX
+ (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX
+ (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>;
+// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>;
+
+// P9 Altivec instructions that can be used to build vectors.
+// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
+// with complexities of existing build vector patterns in this file.
+def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)),
+ (v2i64 (VEXTSW2D $A))>;
+def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)),
+ (v2i64 (VEXTSH2D $A))>;
+def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1,
+ HWordToWord.LE_A2, HWordToWord.LE_A3)),
+ (v4i32 (VEXTSH2W $A))>;
+def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1,
+ ByteToWord.LE_A2, ByteToWord.LE_A3)),
+ (v4i32 (VEXTSB2W $A))>;
+def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
+ (v2i64 (VEXTSB2D $A))>;
+} // HasVSX, HasP9Altivec, IsLittleEndian
+
+// Big endian VSX subtarget that supports additional direct moves from ISA3.0.
+let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian] in {
+def : Pat<(i64 (extractelt v2i64:$A, 1)),
+ (i64 (MFVSRLD $A))>;
+// Better way to build integer vectors if we have MTVSRDD. Big endian.
+def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
+ (v2i64 (MTVSRDD $rB, $rA))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (MTVSRDD
+ (RLDIMI AnyExts.B, AnyExts.A, 32, 0),
+ (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
+
+def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)),
+ (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
+} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian
+
+// Little endian VSX subtarget that supports direct moves from ISA3.0.
+let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] in {
+def : Pat<(i64 (extractelt v2i64:$A, 0)),
+ (i64 (MFVSRLD $A))>;
+// Better way to build integer vectors if we have MTVSRDD. Little endian.
+def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
+ (v2i64 (MTVSRDD $rB, $rA))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (MTVSRDD
+ (RLDIMI AnyExts.C, AnyExts.D, 32, 0),
+ (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
+
+def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)),
+ (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
+} // HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian
+} // AddedComplexity = 400
+
+//---------------------------- Instruction aliases ---------------------------//
+def : InstAlias<"xvmovdp $XT, $XB",
+ (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+def : InstAlias<"xvmovsp $XT, $XB",
+ (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+def : InstAlias<"xxmrghd $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
+def : InstAlias<"xxmrgld $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
+def : InstAlias<"mfvrd $rA, $XT",
+ (MFVRD g8rc:$rA, vrrc:$XT), 0>;
+def : InstAlias<"mffprd $rA, $src",
+ (MFVSRD g8rc:$rA, f8rc:$src)>;
+def : InstAlias<"mtvrd $XT, $rA",
+ (MTVRD vrrc:$XT, g8rc:$rA), 0>;
+def : InstAlias<"mtfprd $dst, $rA",
+ (MTVSRD f8rc:$dst, g8rc:$rA)>;
+def : InstAlias<"mfvrwz $rA, $XT",
+ (MFVRWZ gprc:$rA, vrrc:$XT), 0>;
+def : InstAlias<"mffprwz $rA, $src",
+ (MFVSRWZ gprc:$rA, f8rc:$src)>;
+def : InstAlias<"mtvrwa $XT, $rA",
+ (MTVRWA vrrc:$XT, gprc:$rA), 0>;
+def : InstAlias<"mtfprwa $dst, $rA",
+ (MTVSRWA f8rc:$dst, gprc:$rA)>;
+def : InstAlias<"mtvrwz $XT, $rA",
+ (MTVRWZ vrrc:$XT, gprc:$rA), 0>;
+def : InstAlias<"mtfprwz $dst, $rA",
+ (MTVSRWZ f8rc:$dst, gprc:$rA)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index b761f337533b..a7546d2be5d8 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -53,7 +53,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -74,6 +73,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <cassert>
#include <iterator>
#include <utility>
@@ -244,10 +244,10 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
-static const std::string PHINodeNameSuffix = ".phi";
-static const std::string CastNodeNameSuffix = ".cast";
-static const std::string GEPNodeIncNameSuffix = ".inc";
-static const std::string GEPNodeOffNameSuffix = ".off";
+static constexpr StringRef PHINodeNameSuffix = ".phi";
+static constexpr StringRef CastNodeNameSuffix = ".cast";
+static constexpr StringRef GEPNodeIncNameSuffix = ".inc";
+static constexpr StringRef GEPNodeOffNameSuffix = ".off";
FunctionPass *llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM) {
return new PPCLoopInstrFormPrep(TM);
@@ -263,7 +263,7 @@ static bool IsPtrInBounds(Value *BasePtr) {
return false;
}
-static std::string getInstrName(const Value *I, const std::string Suffix) {
+static std::string getInstrName(const Value *I, StringRef Suffix) {
assert(I && "Invalid paramater!");
if (I->hasName())
return (I->getName() + Suffix).str();
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
index 83cca11b27a3..2b0e604e0ccd 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
@@ -54,6 +54,7 @@ private:
static StringRef getCPUSuffix(const PPCSubtarget *Subtarget);
static std::string createMASSVFuncName(Function &Func,
const PPCSubtarget *Subtarget);
+ bool handlePowSpecialCases(CallInst *CI, Function &Func, Module &M);
bool lowerMASSVCall(CallInst *CI, Function &Func, Module &M,
const PPCSubtarget *Subtarget);
};
@@ -96,6 +97,34 @@ PPCLowerMASSVEntries::createMASSVFuncName(Function &Func,
return MASSVEntryName;
}
+/// If there are proper fast-math flags, this function creates llvm.pow
+/// intrinsics when the exponent is 0.25 or 0.75.
+bool PPCLowerMASSVEntries::handlePowSpecialCases(CallInst *CI, Function &Func,
+ Module &M) {
+ if (Func.getName() != "__powf4_massv" && Func.getName() != "__powd2_massv")
+ return false;
+
+ if (Constant *Exp = dyn_cast<Constant>(CI->getArgOperand(1)))
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(Exp->getSplatValue())) {
+ // If the argument is 0.75 or 0.25 it is cheaper to turn it into pow
+ // intrinsic so that it could be optimzed as sequence of sqrt's.
+ if (!CI->hasNoInfs() || !CI->hasApproxFunc())
+ return false;
+
+ if (!CFP->isExactlyValue(0.75) && !CFP->isExactlyValue(0.25))
+ return false;
+
+ if (CFP->isExactlyValue(0.25) && !CI->hasNoSignedZeros())
+ return false;
+
+ CI->setCalledFunction(
+ Intrinsic::getDeclaration(&M, Intrinsic::pow, CI->getType()));
+ return true;
+ }
+
+ return false;
+}
+
/// Lowers generic MASSV entries to PowerPC subtarget-specific MASSV entries.
/// e.g.: __sind2_massv --> __sind2_P9 for a Power9 subtarget.
/// Both function prototypes and their callsites are updated during lowering.
@@ -105,11 +134,14 @@ bool PPCLowerMASSVEntries::lowerMASSVCall(CallInst *CI, Function &Func,
if (CI->use_empty())
return false;
+ // Handling pow(x, 0.25), pow(x, 0.75), powf(x, 0.25), powf(x, 0.75)
+ if (handlePowSpecialCases(CI, Func, M))
+ return true;
+
std::string MASSVEntryName = createMASSVFuncName(Func, Subtarget);
FunctionCallee FCache = M.getOrInsertFunction(
MASSVEntryName, Func.getFunctionType(), Func.getAttributes());
- CallSite CS(CI);
CI->setCalledFunction(FCache);
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index b6496f189a3a..236f98f32e18 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -29,10 +29,6 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
-static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
- return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
-}
-
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
AsmPrinter &AP) {
const TargetMachine &TM = AP.TM;
@@ -41,13 +37,6 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
MCContext &Ctx = AP.OutContext;
SmallString<128> Name;
- StringRef Suffix;
- if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
- Suffix = "$non_lazy_ptr";
-
- if (!Suffix.empty())
- Name += DL.getPrivateGlobalPrefix();
-
if (!MO.isGlobal()) {
assert(MO.isSymbol() && "Isn't a symbol reference");
Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
@@ -56,30 +45,13 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
TM.getNameWithPrefix(Name, GV, Mang);
}
- Name += Suffix;
MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
- // If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
- // then add the suffix.
- if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
- MachineModuleInfoMachO &MachO = getMachOMMI(AP);
-
- MachineModuleInfoImpl::StubValueTy &StubSym = MachO.getGVStubEntry(Sym);
-
- if (!StubSym.getPointer()) {
- assert(MO.isGlobal() && "Extern symbol not handled yet");
- StubSym = MachineModuleInfoImpl::
- StubValueTy(AP.getSymbol(MO.getGlobal()),
- !MO.getGlobal()->hasInternalLinkage());
- }
- return Sym;
- }
-
return Sym;
}
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
- AsmPrinter &Printer, bool IsDarwin) {
+ AsmPrinter &Printer) {
MCContext &Ctx = Printer.OutContext;
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
@@ -106,13 +78,30 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
break;
}
- if (MO.getTargetFlags() == PPCII::MO_PLT)
+ if (MO.getTargetFlags() == PPCII::MO_PLT)
RefKind = MCSymbolRefExpr::VK_PLT;
+ else if (MO.getTargetFlags() == PPCII::MO_PCREL_FLAG)
+ RefKind = MCSymbolRefExpr::VK_PCREL;
+ else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_GOT_FLAG))
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL;
- const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+ const MachineInstr *MI = MO.getParent();
+ const MachineFunction *MF = MI->getMF();
const Module *M = MF->getFunction().getParent();
const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
const TargetMachine &TM = Printer.TM;
+
+ unsigned MIOpcode = MI->getOpcode();
+ assert((Subtarget->isUsingPCRelativeCalls() || MIOpcode != PPC::BL8_NOTOC) &&
+ "BL8_NOTOC is only valid when using PC Relative Calls.");
+ if (Subtarget->isUsingPCRelativeCalls()) {
+ if (MIOpcode == PPC::TAILB || MIOpcode == PPC::TAILB8 ||
+ MIOpcode == PPC::TCRETURNdi || MIOpcode == PPC::TCRETURNdi8 ||
+ MIOpcode == PPC::BL8_NOTOC) {
+ RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
+ }
+ }
+
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
// If -msecure-plt -fPIC, add 32768 to symbol.
if (Subtarget->isSecurePlt() && TM.isPositionIndependent() &&
@@ -137,10 +126,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
// Add ha16() / lo16() markers if required.
switch (access) {
case PPCII::MO_LO:
- Expr = PPCMCExpr::createLo(Expr, IsDarwin, Ctx);
+ Expr = PPCMCExpr::createLo(Expr, Ctx);
break;
case PPCII::MO_HA:
- Expr = PPCMCExpr::createHa(Expr, IsDarwin, Ctx);
+ Expr = PPCMCExpr::createHa(Expr, Ctx);
break;
}
@@ -148,20 +137,18 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
}
void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP, bool IsDarwin) {
+ AsmPrinter &AP) {
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MCOperand MCOp;
- if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP,
- IsDarwin))
+ if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP))
OutMI.addOperand(MCOp);
}
}
bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
- MCOperand &OutMO, AsmPrinter &AP,
- bool IsDarwin) {
+ MCOperand &OutMO, AsmPrinter &AP) {
switch (MO.getType()) {
default:
llvm_unreachable("unknown operand type");
@@ -170,6 +157,9 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
assert(MO.getReg() > PPC::NoRegister &&
MO.getReg() < PPC::NUM_TARGET_REGS &&
"Invalid register for this target!");
+ // Ignore all implicit register operands.
+ if (MO.isImplicit())
+ return false;
OutMO = MCOperand::createReg(MO.getReg());
return true;
case MachineOperand::MO_Immediate:
@@ -181,20 +171,20 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
return true;
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
- OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, IsDarwin);
+ OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP);
return true;
case MachineOperand::MO_JumpTableIndex:
- OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, IsDarwin);
+ OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
return true;
case MachineOperand::MO_ConstantPoolIndex:
- OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, IsDarwin);
+ OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
return true;
case MachineOperand::MO_BlockAddress:
- OutMO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP,
- IsDarwin);
+ OutMO =
+ GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP);
return true;
case MachineOperand::MO_MCSymbol:
- OutMO = GetSymbolRef(MO, MO.getMCSymbol(), AP, IsDarwin);
+ OutMO = GetSymbolRef(MO, MO.getMCSymbol(), AP);
return true;
case MachineOperand::MO_RegisterMask:
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 74192cb20cd0..d2aba6bd6e8d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -57,6 +57,8 @@ STATISTIC(NumRotatesCollapsed,
"Number of pairs of rotate left, clear left/right collapsed");
STATISTIC(NumEXTSWAndSLDICombined,
"Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
+STATISTIC(NumLoadImmZeroFoldedAndRemoved,
+ "Number of LI(8) reg, 0 that are folded to r0 and removed");
static cl::opt<bool>
FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -124,9 +126,14 @@ public:
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
+ initialize(MF);
+ // At this point, TOC pointer should not be used in a function that uses
+ // PC-Relative addressing.
+ assert((MF.getRegInfo().use_empty(PPC::X2) ||
+ !MF.getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) &&
+ "TOC pointer used in a function using PC-Relative addressing!");
if (skipFunction(MF.getFunction()))
return false;
- initialize(MF);
return simplifyCode();
}
};
@@ -314,7 +321,22 @@ bool PPCMIPeephole::simplifyCode(void) {
default:
break;
-
+ case PPC::LI:
+ case PPC::LI8: {
+ // If we are materializing a zero, look for any use operands for which
+ // zero means immediate zero. All such operands can be replaced with
+ // PPC::ZERO.
+ if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
+ break;
+ unsigned MIDestReg = MI.getOperand(0).getReg();
+ for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
+ Simplified |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
+ if (MRI->use_nodbg_empty(MIDestReg)) {
+ ++NumLoadImmZeroFoldedAndRemoved;
+ ToErase = &MI;
+ }
+ break;
+ }
case PPC::STD: {
MachineFrameInfo &MFI = MF->getFrameInfo();
if (MFI.hasVarSizedObjects() ||
@@ -541,10 +563,12 @@ bool PPCMIPeephole::simplifyCode(void) {
if (!P1 || !P2)
break;
- // Remove the passed FRSP instruction if it only feeds this MI and
- // set any uses of that FRSP (in this MI) to the source of the FRSP.
+ // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
+ // and set any uses of that FRSP/XSRSP (in this MI) to the source of
+ // the FRSP/XSRSP.
auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
- if (RoundInstr->getOpcode() == PPC::FRSP &&
+ unsigned Opc = RoundInstr->getOpcode();
+ if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
Simplified = true;
Register ConvReg1 = RoundInstr->getOperand(1).getReg();
@@ -554,7 +578,7 @@ bool PPCMIPeephole::simplifyCode(void) {
if (Use.getOperand(i).isReg() &&
Use.getOperand(i).getReg() == FRSPDefines)
Use.getOperand(i).setReg(ConvReg1);
- LLVM_DEBUG(dbgs() << "Removing redundant FRSP:\n");
+ LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
LLVM_DEBUG(RoundInstr->dump());
LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
LLVM_DEBUG(MI.dump());
@@ -882,11 +906,6 @@ bool PPCMIPeephole::simplifyCode(void) {
// while in PowerPC ISA, lowerest bit is at index 63.
APInt MaskSrc =
APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
- // Current APInt::getBitsSetWithWrap sets all bits to 0 if loBit is
- // equal to highBit.
- // If MBSrc - MESrc == 1, we expect a full set mask instead of Null.
- if (SrcMaskFull && (MBSrc - MESrc == 1))
- MaskSrc.setAllBits();
APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
APInt FinalMask = RotatedSrcMask & MaskMI;
@@ -1540,6 +1559,12 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "To: ");
LLVM_DEBUG(MI.dump());
NumRotatesCollapsed++;
+ // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
+ if (MRI->use_nodbg_empty(SrcReg)) {
+ assert(!SrcMI->hasImplicitDef() &&
+ "Not expecting an implicit def with this instr.");
+ SrcMI->eraseFromParent();
+ }
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 2f65d6a2855b..daf88589bb52 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -10,48 +10,55 @@
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<bool> PPCDisableNonVolatileCR(
+ "ppc-disable-non-volatile-cr",
+ cl::desc("Disable the use of non-volatile CR register fields"),
+ cl::init(false), cl::Hidden);
void PPCFunctionInfo::anchor() {}
+PPCFunctionInfo::PPCFunctionInfo(const MachineFunction &MF)
+ : DisableNonVolatileCR(PPCDisableNonVolatileCR) {}
-MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
+MCSymbol *PPCFunctionInfo::getPICOffsetSymbol(MachineFunction &MF) const {
const DataLayout &DL = MF.getDataLayout();
return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
Twine(MF.getFunctionNumber()) +
"$poff");
}
-MCSymbol *PPCFunctionInfo::getGlobalEPSymbol() const {
+MCSymbol *PPCFunctionInfo::getGlobalEPSymbol(MachineFunction &MF) const {
const DataLayout &DL = MF.getDataLayout();
return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
"func_gep" +
Twine(MF.getFunctionNumber()));
}
-MCSymbol *PPCFunctionInfo::getLocalEPSymbol() const {
+MCSymbol *PPCFunctionInfo::getLocalEPSymbol(MachineFunction &MF) const {
const DataLayout &DL = MF.getDataLayout();
return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
"func_lep" +
Twine(MF.getFunctionNumber()));
}
-MCSymbol *PPCFunctionInfo::getTOCOffsetSymbol() const {
+MCSymbol *PPCFunctionInfo::getTOCOffsetSymbol(MachineFunction &MF) const {
const DataLayout &DL = MF.getDataLayout();
return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
"func_toc" +
Twine(MF.getFunctionNumber()));
}
-bool PPCFunctionInfo::isLiveInSExt(unsigned VReg) const {
- for (const std::pair<unsigned, ISD::ArgFlagsTy> &LiveIn : LiveInAttrs)
+bool PPCFunctionInfo::isLiveInSExt(Register VReg) const {
+ for (const std::pair<Register, ISD::ArgFlagsTy> &LiveIn : LiveInAttrs)
if (LiveIn.first == VReg)
return LiveIn.second.isSExt();
return false;
}
-bool PPCFunctionInfo::isLiveInZExt(unsigned VReg) const {
- for (const std::pair<unsigned, ISD::ArgFlagsTy> &LiveIn : LiveInAttrs)
+bool PPCFunctionInfo::isLiveInZExt(Register VReg) const {
+ for (const std::pair<Register, ISD::ArgFlagsTy> &LiveIn : LiveInAttrs)
if (LiveIn.first == VReg)
return LiveIn.second.isZExt();
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 2b341b5952c8..29ca53e273d7 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -65,6 +65,10 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// SpillsCR - Indicates whether CR is spilled in the current function.
bool SpillsCR = false;
+ /// DisableNonVolatileCR - Indicates whether non-volatile CR fields would be
+ /// disabled.
+ bool DisableNonVolatileCR = false;
+
/// Indicates whether VRSAVE is spilled in the current function.
bool SpillsVRSAVE = false;
@@ -112,24 +116,17 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// If any of CR[2-4] need to be saved in the prologue and restored in the
/// epilogue then they are added to this array. This is used for the
/// 64-bit SVR4 ABI.
- SmallVector<unsigned, 3> MustSaveCRs;
-
- /// Hold onto our MachineFunction context.
- MachineFunction &MF;
+ SmallVector<Register, 3> MustSaveCRs;
/// Whether this uses the PIC Base register or not.
bool UsesPICBase = false;
- /// True if this function has a subset of CSRs that is handled explicitly via
- /// copies
- bool IsSplitCSR = false;
-
/// We keep track attributes for each live-in virtual registers
/// to use SExt/ZExt flags in later optimization.
- std::vector<std::pair<unsigned, ISD::ArgFlagsTy>> LiveInAttrs;
+ std::vector<std::pair<Register, ISD::ArgFlagsTy>> LiveInAttrs;
public:
- explicit PPCFunctionInfo(MachineFunction &MF) : MF(MF) {}
+ explicit PPCFunctionInfo(const MachineFunction &MF);
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -175,6 +172,9 @@ public:
void setSpillsCR() { SpillsCR = true; }
bool isCRSpilled() const { return SpillsCR; }
+ void setDisableNonVolatileCR() { DisableNonVolatileCR = true; }
+ bool isNonVolatileCRDisabled() const { return DisableNonVolatileCR; }
+
void setSpillsVRSAVE() { SpillsVRSAVE = true; }
bool isVRSAVESpilled() const { return SpillsVRSAVE; }
@@ -200,36 +200,33 @@ public:
void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
/// This function associates attributes for each live-in virtual register.
- void addLiveInAttr(unsigned VReg, ISD::ArgFlagsTy Flags) {
+ void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags) {
LiveInAttrs.push_back(std::make_pair(VReg, Flags));
}
/// This function returns true if the specified vreg is
/// a live-in register and sign-extended.
- bool isLiveInSExt(unsigned VReg) const;
+ bool isLiveInSExt(Register VReg) const;
/// This function returns true if the specified vreg is
/// a live-in register and zero-extended.
- bool isLiveInZExt(unsigned VReg) const;
+ bool isLiveInZExt(Register VReg) const;
int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
- const SmallVectorImpl<unsigned> &
+ const SmallVectorImpl<Register> &
getMustSaveCRs() const { return MustSaveCRs; }
- void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
+ void addMustSaveCR(Register Reg) { MustSaveCRs.push_back(Reg); }
void setUsesPICBase(bool uses) { UsesPICBase = uses; }
bool usesPICBase() const { return UsesPICBase; }
- bool isSplitCSR() const { return IsSplitCSR; }
- void setIsSplitCSR(bool s) { IsSplitCSR = s; }
-
- MCSymbol *getPICOffsetSymbol() const;
+ MCSymbol *getPICOffsetSymbol(MachineFunction &MF) const;
- MCSymbol *getGlobalEPSymbol() const;
- MCSymbol *getLocalEPSymbol() const;
- MCSymbol *getTOCOffsetSymbol() const;
+ MCSymbol *getGlobalEPSymbol(MachineFunction &MF) const;
+ MCSymbol *getLocalEPSymbol(MachineFunction &MF) const;
+ MCSymbol *getTOCOffsetSymbol(MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
index a38c8f475066..5649d7d13966 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -15,6 +15,16 @@ static cl::opt<bool>
DisableAddiLoadHeuristic("disable-ppc-sched-addi-load",
cl::desc("Disable scheduling addi instruction before"
"load for ppc"), cl::Hidden);
+static cl::opt<bool>
+ EnableAddiHeuristic("ppc-postra-bias-addi",
+ cl::desc("Enable scheduling addi instruction as early"
+ "as possible post ra"),
+ cl::Hidden, cl::init(true));
+
+static bool isADDIInstr(const GenericScheduler::SchedCandidate &Cand) {
+ return Cand.SU->getInstr()->getOpcode() == PPC::ADDI ||
+ Cand.SU->getInstr()->getOpcode() == PPC::ADDI8;
+}
bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
@@ -22,19 +32,13 @@ bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
if (DisableAddiLoadHeuristic)
return false;
- auto isADDIInstr = [&] (const MachineInstr &Inst) {
- return Inst.getOpcode() == PPC::ADDI || Inst.getOpcode() == PPC::ADDI8;
- };
-
SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand;
SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand;
- if (isADDIInstr(*FirstCand.SU->getInstr()) &&
- SecondCand.SU->getInstr()->mayLoad()) {
+ if (isADDIInstr(FirstCand) && SecondCand.SU->getInstr()->mayLoad()) {
TryCand.Reason = Stall;
return true;
}
- if (FirstCand.SU->getInstr()->mayLoad() &&
- isADDIInstr(*SecondCand.SU->getInstr())) {
+ if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(SecondCand)) {
TryCand.Reason = NoCand;
return true;
}
@@ -61,6 +65,38 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
return;
}
+bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand) const {
+ if (!EnableAddiHeuristic)
+ return false;
+
+ if (isADDIInstr(TryCand) && !isADDIInstr(Cand)) {
+ TryCand.Reason = Stall;
+ return true;
+ }
+ return false;
+}
+
+void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand) {
+ PostGenericScheduler::tryCandidate(Cand, TryCand);
+
+ if (!Cand.isValid())
+ return;
+
+ // Add powerpc post ra specific heuristic only when TryCand isn't selected or
+ // selected as node order.
+ if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
+ return;
+
+ // There are some benefits to schedule the ADDI as early as possible post ra
+ // to avoid stalled by vector instructions which take up all the hw units.
+ // And ADDI is usually used to post inc the loop indvar, which matters the
+ // performance.
+ if (biasAddiCandidate(Cand, TryCand))
+ return;
+}
+
void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {
// Custom PPC PostRA specific behavior here.
PostGenericScheduler::enterMBB(MBB);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
index 93532d9545a6..a9734ca71859 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
@@ -42,6 +42,9 @@ protected:
SUnit *pickNode(bool &IsTopNode) override;
void enterMBB(MachineBasicBlock *MBB) override;
void leaveMBB() override;
+
+ void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override;
+ bool biasAddiCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
new file mode 100644
index 000000000000..815dfd1402f4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -0,0 +1,203 @@
+//===- PPCMacroFusion.cpp - PowerPC Macro Fusion --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the PowerPC implementation of the DAG scheduling
+/// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MacroFusion.h"
+
+using namespace llvm;
+namespace {
+
+class FusionFeature {
+public:
+ typedef SmallDenseSet<unsigned> FusionOpSet;
+
+ enum FusionKind {
+ #define FUSION_KIND(KIND) FK_##KIND
+ #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) \
+ FUSION_KIND(KIND),
+ #include "PPCMacroFusion.def"
+ FUSION_KIND(END)
+ };
+private:
+ // Each fusion feature is assigned with one fusion kind. All the
+ // instructions with the same fusion kind have the same fusion characteristic.
+ FusionKind Kd;
+ // True if this feature is enabled.
+ bool Supported;
+ // li rx, si
+ // load rt, ra, rx
+ // The dependent operand index in the second op(load). And the negative means
+ // it could be any one.
+ int DepOpIdx;
+ // The first fusion op set.
+ FusionOpSet OpSet1;
+ // The second fusion op set.
+ FusionOpSet OpSet2;
+public:
+ FusionFeature(FusionKind Kind, bool HasFeature, int Index,
+ const FusionOpSet &First, const FusionOpSet &Second) :
+ Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First),
+ OpSet2(Second) {}
+
+ bool hasOp1(unsigned Opc) const { return OpSet1.count(Opc) != 0; }
+ bool hasOp2(unsigned Opc) const { return OpSet2.count(Opc) != 0; }
+ bool isSupported() const { return Supported; }
+ Optional<unsigned> depOpIdx() const {
+ if (DepOpIdx < 0)
+ return None;
+ return DepOpIdx;
+ }
+
+ FusionKind getKind() const { return Kd; }
+};
+
+static bool matchingRegOps(const MachineInstr &FirstMI,
+ int FirstMIOpIndex,
+ const MachineInstr &SecondMI,
+ int SecondMIOpIndex) {
+ const MachineOperand &Op1 = FirstMI.getOperand(FirstMIOpIndex);
+ const MachineOperand &Op2 = SecondMI.getOperand(SecondMIOpIndex);
+ if (!Op1.isReg() || !Op2.isReg())
+ return false;
+
+ return Op1.getReg() == Op2.getReg();
+}
+
+// Return true if the FirstMI meets the constraints of SecondMI according to
+// fusion specification.
+static bool checkOpConstraints(FusionFeature::FusionKind Kd,
+ const MachineInstr &FirstMI,
+ const MachineInstr &SecondMI) {
+ switch (Kd) {
+ // The hardware didn't require any specific check for the fused instructions'
+ // operands. Therefore, return true to indicate that, it is fusable.
+ default: return true;
+ // [addi rt,ra,si - lxvd2x xt,ra,rb] etc.
+ case FusionFeature::FK_AddiLoad: {
+ // lxvd2x(ra) cannot be zero
+ const MachineOperand &RA = SecondMI.getOperand(1);
+ if (!RA.isReg())
+ return true;
+
+ return Register::isVirtualRegister(RA.getReg()) ||
+ (RA.getReg() != PPC::ZERO && RA.getReg() != PPC::ZERO8);
+ }
+ // [addis rt,ra,si - ld rt,ds(ra)] etc.
+ case FusionFeature::FK_AddisLoad: {
+ const MachineOperand &RT = SecondMI.getOperand(0);
+ if (!RT.isReg())
+ return true;
+
+ // Only check it for non-virtual register.
+ if (!Register::isVirtualRegister(RT.getReg()))
+ // addis(rt) = ld(ra) = ld(rt)
+ // ld(rt) cannot be zero
+ if (!matchingRegOps(SecondMI, 0, SecondMI, 2) ||
+ (RT.getReg() == PPC::ZERO || RT.getReg() == PPC::ZERO8))
+ return false;
+
+ // addis(si) first 12 bits must be all 1s or all 0s
+ const MachineOperand &SI = FirstMI.getOperand(2);
+ if (!SI.isImm())
+ return true;
+ int64_t Imm = SI.getImm();
+ if (((Imm & 0xFFF0) != 0) && ((Imm & 0xFFF0) != 0xFFF0))
+ return false;
+
+ // If si = 1111111111110000 and the msb of the d/ds field of the load equals
+ // 1, then fusion does not occur.
+ if ((Imm & 0xFFF0) == 0xFFF0) {
+ const MachineOperand &D = SecondMI.getOperand(1);
+ if (!D.isImm())
+ return true;
+
+ // 14 bit for DS field, while 16 bit for D field.
+ int MSB = 15;
+ if (SecondMI.getOpcode() == PPC::LD)
+ MSB = 13;
+
+ return (D.getImm() & (1ULL << MSB)) == 0;
+ }
+ return true;
+ }
+ }
+
+ llvm_unreachable("All the cases should have been handled");
+ return true;
+}
+
+/// Check if the instr pair, FirstMI and SecondMI, should be fused together.
+/// Given SecondMI, when FirstMI is unspecified, then check if SecondMI may be
+/// part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ // We use the PPC namespace to avoid the need to prefix opcodes with PPC:: in
+ // the def file.
+ using namespace PPC;
+
+ const PPCSubtarget &ST = static_cast<const PPCSubtarget&>(TSI);
+ static const FusionFeature FusionFeatures[] = {
+ #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) { \
+ FusionFeature::FUSION_KIND(KIND), ST.HAS_FEATURE(), DEP_OP_IDX, { OPSET1 },\
+ { OPSET2 } },
+ #include "PPCMacroFusion.def"
+ };
+ #undef FUSION_KIND
+
+ for (auto &Feature : FusionFeatures) {
+ // Skip if the feature is not supported.
+ if (!Feature.isSupported())
+ continue;
+
+ // Only when the SecondMI is fusable, we are starting to look for the
+ // fusable FirstMI.
+ if (Feature.hasOp2(SecondMI.getOpcode())) {
+ // If FirstMI == nullptr, that means, we're only checking whether SecondMI
+ // can be fused at all.
+ if (!FirstMI)
+ return true;
+
+ // Checking if the FirstMI is fusable with the SecondMI.
+ if (!Feature.hasOp1(FirstMI->getOpcode()))
+ continue;
+
+ auto DepOpIdx = Feature.depOpIdx();
+ if (DepOpIdx.hasValue()) {
+ // Checking if the result of the FirstMI is the desired operand of the
+ // SecondMI if the DepOpIdx is set. Otherwise, ignore it.
+ if (!matchingRegOps(*FirstMI, 0, SecondMI, *DepOpIdx))
+ return false;
+ }
+
+ // Checking more on the instruction operands.
+ if (checkOpConstraints(Feature.getKind(), *FirstMI, SecondMI))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // end anonymous namespace
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation () {
+ return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.def
new file mode 100644
index 000000000000..c7e4e7c22e0a
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -0,0 +1,45 @@
+//=== ---- PPCMacroFusion.def - PowerPC MacroFuson Candidates -v-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https)//llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains descriptions of the macro-fusion pair for PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef FUSION_FEATURE
+
+// Each FUSION_FEATURE is assigned with one TYPE, and can be enabled/disabled
+// by HAS_FEATURE. The instructions pair is fusable only when the opcode
+// of the first instruction is in OPSET1, and the second instruction opcode is
+// in OPSET2. And if DEP_OP_IDX >=0, we will check the result of first OP is
+// the operand of the second op with DEP_OP_IDX as its operand index. We assume
+// that the result of the first op is its operand zero.
+#define FUSION_FEATURE(TYPE, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2)
+
+#endif
+
+#ifndef FUSION_OP_SET
+#define FUSION_OP_SET(...) __VA_ARGS__
+#endif
+
+// Power8 User Manual Section 10.1.12, Instruction Fusion
+// {addi} followed by one of these {lxvd2x, lxvw4x, lxvdsx, lvebx, lvehx,
+// lvewx, lvx, lxsdx}
+FUSION_FEATURE(AddiLoad, hasAddiLoadFusion, 2, \
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL), \
+ FUSION_OP_SET(LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX, \
+ LVX, LXSDX))
+
+// {addis) followed by one of these {ld, lbz, lhz, lwz}
+FUSION_FEATURE(AddisLoad, hasAddisLoadFusion, 2, \
+ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), \
+ FUSION_OP_SET(LD, LBZ, LBZ8, LHZ, LHZ8, LWZ, LWZ8))
+
+#undef FUSION_FEATURE
+#undef FUSION_OP_SET
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.h
new file mode 100644
index 000000000000..91cbedf4558f
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.h
@@ -0,0 +1,22 @@
+//===- PPCMacroFusion.h - PowerPC Macro Fusion ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the PowerPC definition of the DAG scheduling
+/// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createPowerPCMacroFusionDAGMutation());
+/// to PPCPassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation();
+} // llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index a4b4bf2973d1..4ea714ff15f7 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -109,6 +109,16 @@ namespace {
// Track the operand that kill Reg. We would unset the kill flag of
// the operand if there is a following redundant load immediate.
int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
+
+ // We can't just clear implicit kills, so if we encounter one, stop
+ // looking further.
+ if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
+ LLVM_DEBUG(dbgs()
+ << "Encountered an implicit kill, cannot proceed: ");
+ LLVM_DEBUG(AfterBBI->dump());
+ break;
+ }
+
if (KillIdx != -1) {
assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 01b97ba6ab20..35f5e1fbebcd 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -142,6 +142,8 @@ const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
if (MF->getFunction().getCallingConv() == CallingConv::AnyReg) {
+ if (!TM.isPPC64() && Subtarget.isAIXABI())
+ report_fatal_error("AnyReg unimplemented on 32-bit AIX.");
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_SaveList;
if (Subtarget.hasAltivec())
@@ -149,21 +151,20 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_AllRegs_SaveList;
}
- if (Subtarget.isDarwinABI())
- return TM.isPPC64()
- ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList
- : CSR_Darwin64_SaveList)
- : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList
- : CSR_Darwin32_SaveList);
-
- if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
- return CSR_SRV464_TLS_PE_SaveList;
-
// On PPC64, we might need to save r2 (but only if it is not reserved).
- bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
+ // We do not need to treat R2 as callee-saved when using PC-Relative calls
+ // because any direct uses of R2 will cause it to be reserved. If the function
+ // is a leaf or the only uses of R2 are implicit uses for calls, the calls
+ // will use the @notoc relocation which will cause this function to set the
+ // st_other bit to 1, thereby communicating to its caller that it arbitrarily
+ // clobbers the TOC.
+ bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2) &&
+ !Subtarget.isUsingPCRelativeCalls();
// Cold calling convention CSRs.
if (MF->getFunction().getCallingConv() == CallingConv::Cold) {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("Cold calling unimplemented on AIX.");
if (TM.isPPC64()) {
if (Subtarget.hasAltivec())
return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
@@ -181,12 +182,13 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// Standard calling convention CSRs.
if (TM.isPPC64()) {
if (Subtarget.hasAltivec())
- return SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
- : CSR_SVR464_Altivec_SaveList;
- return SaveR2 ? CSR_SVR464_R2_SaveList
- : CSR_SVR464_SaveList;
+ return SaveR2 ? CSR_PPC64_R2_Altivec_SaveList
+ : CSR_PPC64_Altivec_SaveList;
+ return SaveR2 ? CSR_PPC64_R2_SaveList : CSR_PPC64_SaveList;
}
// 32-bit targets.
+ if (Subtarget.isAIXABI())
+ return CSR_AIX32_SaveList;
if (Subtarget.hasAltivec())
return CSR_SVR432_Altivec_SaveList;
else if (Subtarget.hasSPE())
@@ -194,31 +196,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_SVR432_SaveList;
}
-const MCPhysReg *
-PPCRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
- assert(MF && "Invalid MachineFunction pointer.");
- const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
- if (Subtarget.isDarwinABI())
- return nullptr;
- if (!TM.isPPC64())
- return nullptr;
- if (MF->getFunction().getCallingConv() != CallingConv::CXX_FAST_TLS)
- return nullptr;
- if (!MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
- return nullptr;
-
- // On PPC64, we might need to save r2 (but only if it is not reserved).
- bool SaveR2 = !getReservedRegs(*MF).test(PPC::X2);
- if (Subtarget.hasAltivec())
- return SaveR2
- ? CSR_SVR464_R2_Altivec_ViaCopy_SaveList
- : CSR_SVR464_Altivec_ViaCopy_SaveList;
- else
- return SaveR2
- ? CSR_SVR464_R2_ViaCopy_SaveList
- : CSR_SVR464_ViaCopy_SaveList;
-}
-
const uint32_t *
PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -231,14 +208,9 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_64_AllRegs_RegMask;
}
- if (Subtarget.isDarwinABI())
- return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask
- : CSR_Darwin64_RegMask)
- : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
- : CSR_Darwin32_RegMask);
if (Subtarget.isAIXABI()) {
assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet.");
- return TM.isPPC64() ? CSR_AIX64_RegMask : CSR_AIX32_RegMask;
+ return TM.isPPC64() ? CSR_PPC64_RegMask : CSR_AIX32_RegMask;
}
if (CC == CallingConv::Cold) {
@@ -250,12 +222,12 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
: CSR_SVR32_ColdCC_RegMask));
}
- return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
- : CSR_SVR464_RegMask)
- : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
- : (Subtarget.hasSPE()
- ? CSR_SVR432_SPE_RegMask
- : CSR_SVR432_RegMask));
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
+ : CSR_PPC64_RegMask)
+ : (Subtarget.hasAltivec()
+ ? CSR_SVR432_Altivec_RegMask
+ : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
+ : CSR_SVR432_RegMask));
}
const uint32_t*
@@ -295,8 +267,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, PPC::LR8);
markSuperRegs(Reserved, PPC::RM);
- if (!Subtarget.isDarwinABI() || !Subtarget.hasAltivec())
- markSuperRegs(Reserved, PPC::VRSAVE);
+ markSuperRegs(Reserved, PPC::VRSAVE);
// The SVR4 ABI reserves r2 and r13
if (Subtarget.isSVR4ABI()) {
@@ -369,7 +340,8 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
int FrIdx = Info[i].getFrameIdx();
unsigned Reg = Info[i].getReg();
- unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(Reg);
+ const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg);
+ unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(RC);
if (!MFI.isFixedObjectIndex(FrIdx)) {
// This is not a fixed object. If it requires alignment then we may still
// need to use the XForm.
@@ -389,7 +361,7 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
return false;
}
-bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
+bool PPCRegisterInfo::isCallerPreservedPhysReg(MCRegister PhysReg,
const MachineFunction &MF) const {
assert(Register::isPhysicalRegister(PhysReg));
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
@@ -508,15 +480,70 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Get the maximum call stack size.
unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
+ Align MaxAlign = MFI.getMaxAlign();
+ assert(isAligned(MaxAlign, maxCallFrameSize) &&
+ "Maximum call-frame size not sufficiently aligned");
+ (void)MaxAlign;
+
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ bool KillNegSizeReg = MI.getOperand(1).isKill();
+ Register NegSizeReg = MI.getOperand(1).getReg();
+
+ prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, Reg);
+ // Grow the stack and update the stack pointer link, then determine the
+ // address of new allocated space.
+ if (LP64) {
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::R1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize);
+ }
+
+ // Discard the DYNALLOC instruction.
+ MBB.erase(II);
+}
+
+/// To accomplish dynamic stack allocation, we have to calculate exact size
+/// subtracted from the stack pointer according alignment information and get
+/// previous frame pointer.
+void PPCRegisterInfo::prepareDynamicAlloca(MachineBasicBlock::iterator II,
+ Register &NegSizeReg,
+ bool &KillNegSizeReg,
+ Register &FramePointer) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = TM.isPPC64();
+ DebugLoc dl = MI.getDebugLoc();
// Get the total frame size.
unsigned FrameSize = MFI.getStackSize();
// Get stack alignments.
const PPCFrameLowering *TFI = getFrameLowering(MF);
- unsigned TargetAlign = TFI->getStackAlignment();
- unsigned MaxAlign = MFI.getMaxAlignment();
- assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
- "Maximum call-frame size not sufficiently aligned");
+ Align TargetAlign = TFI->getStackAlign();
+ Align MaxAlign = MFI.getMaxAlign();
// Determine the previous frame's address. If FrameSize can't be
// represented as 16 bits or we need special alignment, then we load the
@@ -526,32 +553,26 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
if (LP64)
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), Reg)
- .addReg(PPC::X31)
- .addImm(FrameSize);
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), FramePointer)
+ .addReg(PPC::X31)
+ .addImm(FrameSize);
else
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
- .addReg(PPC::R31)
- .addImm(FrameSize);
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), FramePointer)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
} else if (LP64) {
- BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
- .addImm(0)
- .addReg(PPC::X1);
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), FramePointer)
+ .addImm(0)
+ .addReg(PPC::X1);
} else {
- BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
- .addImm(0)
- .addReg(PPC::R1);
+ BuildMI(MBB, II, dl, TII.get(PPC::LWZ), FramePointer)
+ .addImm(0)
+ .addReg(PPC::R1);
}
-
- bool KillNegSizeReg = MI.getOperand(1).isKill();
- Register NegSizeReg = MI.getOperand(1).getReg();
-
- // Grow the stack and update the stack pointer link, then determine the
- // address of new allocated space.
+ // Determine the actual NegSizeReg according to alignment info.
if (LP64) {
if (MaxAlign > TargetAlign) {
unsigned UnalNegSizeReg = NegSizeReg;
@@ -560,23 +581,15 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Unfortunately, there is no andi, only andi., and we can't insert that
// here because we might clobber cr0 while it is live.
BuildMI(MBB, II, dl, TII.get(PPC::LI8), NegSizeReg)
- .addImm(~(MaxAlign-1));
+ .addImm(~(MaxAlign.value() - 1));
unsigned NegSizeReg1 = NegSizeReg;
NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC);
BuildMI(MBB, II, dl, TII.get(PPC::AND8), NegSizeReg)
- .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
- .addReg(NegSizeReg1, RegState::Kill);
+ .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
+ .addReg(NegSizeReg1, RegState::Kill);
KillNegSizeReg = true;
}
-
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(Reg, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
- .addReg(PPC::X1)
- .addImm(maxCallFrameSize);
} else {
if (MaxAlign > TargetAlign) {
unsigned UnalNegSizeReg = NegSizeReg;
@@ -585,26 +598,47 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Unfortunately, there is no andi, only andi., and we can't insert that
// here because we might clobber cr0 while it is live.
BuildMI(MBB, II, dl, TII.get(PPC::LI), NegSizeReg)
- .addImm(~(MaxAlign-1));
+ .addImm(~(MaxAlign.value() - 1));
unsigned NegSizeReg1 = NegSizeReg;
NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC);
BuildMI(MBB, II, dl, TII.get(PPC::AND), NegSizeReg)
- .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
- .addReg(NegSizeReg1, RegState::Kill);
+ .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
+ .addReg(NegSizeReg1, RegState::Kill);
KillNegSizeReg = true;
}
+ }
+}
- BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1)
- .addReg(Reg, RegState::Kill)
- .addReg(PPC::R1)
- .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
- .addReg(PPC::R1)
- .addImm(maxCallFrameSize);
+void PPCRegisterInfo::lowerPrepareProbedAlloca(
+ MachineBasicBlock::iterator II) const {
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = TM.isPPC64();
+ DebugLoc dl = MI.getDebugLoc();
+ Register FramePointer = MI.getOperand(0).getReg();
+ Register FinalStackPtr = MI.getOperand(1).getReg();
+ bool KillNegSizeReg = MI.getOperand(2).isKill();
+ Register NegSizeReg = MI.getOperand(2).getReg();
+ prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
+ if (LP64) {
+ BuildMI(MBB, II, dl, TII.get(PPC::ADD8), FinalStackPtr)
+ .addReg(PPC::X1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::ADD4), FinalStackPtr)
+ .addReg(PPC::R1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
}
- // Discard the DYNALLOC instruction.
MBB.erase(II);
}
@@ -665,7 +699,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
if (SrcReg != PPC::CR0) {
- unsigned Reg1 = Reg;
+ Register Reg1 = Reg;
Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
// rlwinm rA, rA, ShiftBits, 0, 31.
@@ -710,7 +744,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
- unsigned Reg1 = Reg;
+ Register Reg1 = Reg;
Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned ShiftBits = getEncodingValue(DestReg)*4;
@@ -814,7 +848,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
// If the saved register wasn't CR0LT, shift the bits left so that the bit
// to store is the first one. Mask all but that bit.
- unsigned Reg1 = Reg;
+ Register Reg1 = Reg;
Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
// rlwinm rA, rA, ShiftBits, 0, 0.
@@ -940,20 +974,17 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
}
bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
- unsigned Reg, int &FrameIdx) const {
- const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
- // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
- // ABI, return true to prevent allocating an additional frame slot.
- // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
- // is arbitrary and will be subsequently ignored. For 32-bit, we have
- // previously created the stack slot if needed, so return its FrameIdx.
- if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
- if (TM.isPPC64())
- FrameIdx = 0;
- else {
- const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- FrameIdx = FI->getCRSpillFrameIndex();
- }
+ Register Reg, int &FrameIdx) const {
+ // For the nonvolatile condition registers (CR2, CR3, CR4) return true to
+ // prevent allocating an additional frame slot.
+ // For 64-bit ELF and AIX, the CR save area is in the linkage area at SP+8,
+ // for 32-bit AIX the CR save area is in the linkage area at SP+4.
+ // We have created a FrameIndex to that spill slot to keep the CalleSaveInfos
+ // valid.
+ // For 32-bit ELF, we have previously created the stack slot if needed, so
+ // return its FrameIdx.
+ if (PPC::CR2 <= Reg && Reg <= PPC::CR4) {
+ FrameIdx = MF.getInfo<PPCFunctionInfo>()->getCRSpillFrameIndex();
return true;
}
return false;
@@ -1051,6 +1082,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
+ if (FPSI && FrameIndex == FPSI &&
+ (OpC == PPC::PREPARE_PROBED_ALLOCA_64 ||
+ OpC == PPC::PREPARE_PROBED_ALLOCA_32)) {
+ lowerPrepareProbedAlloca(II);
+ return;
+ }
+
// Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
if (OpC == PPC::SPILL_CR) {
lowerCRSpilling(II, FrameIndex);
@@ -1122,7 +1160,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
- unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC),
+ Register SRegHi = MF.getRegInfo().createVirtualRegister(RC),
SReg = MF.getRegInfo().createVirtualRegister(RC);
// Insert a set of rA with the full offset value before the ld, st, or add
@@ -1245,10 +1283,10 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
/// Insert defining instruction(s) for BaseReg to
/// be a pointer to FrameIdx at the beginning of the basic block.
-void PPCRegisterInfo::
-materializeFrameBaseRegister(MachineBasicBlock *MBB,
- unsigned BaseReg, int FrameIdx,
- int64_t Offset) const {
+void PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ Register BaseReg,
+ int FrameIdx,
+ int64_t Offset) const {
unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
MachineBasicBlock::iterator Ins = MBB->begin();
@@ -1267,7 +1305,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addFrameIndex(FrameIdx).addImm(Offset);
}
-void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const {
unsigned FIOperandNum = 0;
while (!MI.getOperand(FIOperandNum).isFI()) {
@@ -1292,7 +1330,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
}
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
- unsigned BaseReg,
+ Register BaseReg,
int64_t Offset) const {
unsigned FIOperandNum = 0;
while (!MI->getOperand(FIOperandNum).isFI()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index a5fbb0c6ec64..61acd955e1cb 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -84,7 +84,6 @@ public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
- const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const override;
@@ -92,7 +91,8 @@ public:
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override;
+ bool isCallerPreservedPhysReg(MCRegister PhysReg,
+ const MachineFunction &MF) const override;
/// We require the register scavenger.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
@@ -101,16 +101,16 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
- return true;
- }
-
bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override {
return true;
}
void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
void lowerDynamicAreaOffset(MachineBasicBlock::iterator II) const;
+ void prepareDynamicAlloca(MachineBasicBlock::iterator II,
+ Register &NegSizeReg, bool &KillNegSizeReg,
+ Register &FramePointer) const;
+ void lowerPrepareProbedAlloca(MachineBasicBlock::iterator II) const;
void lowerCRSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
void lowerCRRestore(MachineBasicBlock::iterator II,
@@ -124,7 +124,7 @@ public:
void lowerVRSAVERestore(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
- bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+ bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
int &FrameIdx) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
@@ -132,12 +132,12 @@ public:
// Support for virtual base registers.
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- void materializeFrameBaseRegister(MachineBasicBlock *MBB,
- unsigned BaseReg, int FrameIdx,
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB, Register BaseReg,
+ int FrameIdx,
int64_t Offset) const override;
- void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
+ bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
int64_t Offset) const override;
// Debug information queries.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 4719e947b172..b45757c1acc5 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -156,7 +156,7 @@ foreach Index = 32-63 in {
def VSX#Index : VSXReg<Index, "vs"#Index>;
}
-// The reprsentation of r0 when treated as the constant 0.
+// The representation of r0 when treated as the constant 0.
def ZERO : GPR<0, "0">, DwarfRegAlias<R0>;
def ZERO8 : GP8<ZERO, "0">, DwarfRegAlias<X0>;
@@ -363,11 +363,23 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32,
CR1LT, CR1GT, CR1EQ, CR1UN,
CR0LT, CR0GT, CR0EQ, CR0UN)> {
let Size = 32;
+ let AltOrders = [(sub CRBITRC, CR2LT, CR2GT, CR2EQ, CR2UN, CR3LT, CR3GT,
+ CR3EQ, CR3UN, CR4LT, CR4GT, CR4EQ, CR4UN)];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<PPCSubtarget>().isELFv2ABI() &&
+ MF.getInfo<PPCFunctionInfo>()->isNonVolatileCRDisabled();
+ }];
}
-def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
- CR7, CR2, CR3, CR4)>;
-
+def CRRC : RegisterClass<"PPC", [i32], 32,
+ (add CR0, CR1, CR5, CR6,
+ CR7, CR2, CR3, CR4)> {
+ let AltOrders = [(sub CRRC, CR2, CR3, CR4)];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<PPCSubtarget>().isELFv2ABI() &&
+ MF.getInfo<PPCFunctionInfo>()->isNonVolatileCRDisabled();
+ }];
+}
// The CTR registers are not allocatable because they're used by the
// decrement-and-branch instructions, and thus need to stay live across
// multiple basic blocks.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index 6a79cca89194..0a1ae7e55b3c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -20,7 +20,7 @@ def P9Model : SchedMachineModel {
// Load latency is 4 or 5 cycles depending on the load. This latency assumes
// that we have a cache hit. For a cache miss the load latency will be more.
- // There are two instructions (lxvl, lxvll) that have a latencty of 6 cycles.
+ // There are two instructions (lxvl, lxvll) that have a latency of 6 cycles.
// However it is not worth bumping this value up to 6 when the vast majority
// of instructions are 4 or 5 cycles.
let LoadLatency = 5;
@@ -40,9 +40,11 @@ def P9Model : SchedMachineModel {
let CompleteModel = 1;
- // Do not support QPX (Quad Processing eXtension) or SPE (Signal Procesing
- // Engine) on Power 9.
- let UnsupportedFeatures = [HasQPX, HasSPE];
+ // Do not support QPX (Quad Processing eXtension), SPE (Signal Processing
+ // Engine), prefixed instructions on Power 9, PC relative mem ops, or
+ // instructions introduced in ISA 3.1.
+ let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops,
+ IsISA3_1];
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 64566f4d6609..3836cc960394 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -78,6 +78,9 @@ void PPCSubtarget::initializeEnvironment() {
HasP8Crypto = false;
HasP9Vector = false;
HasP9Altivec = false;
+ HasP10Vector = false;
+ HasPrefixInstrs = false;
+ HasPCRelativeMemops = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
@@ -100,8 +103,8 @@ void PPCSubtarget::initializeEnvironment() {
IsPPC6xx = false;
IsE500 = false;
FeatureMFTB = false;
+ AllowsUnalignedFPAccess = false;
DeprecatedDST = false;
- HasLazyResolverStubs = false;
HasICBT = false;
HasInvariantFunctionDescriptors = false;
HasPartwordAtomics = false;
@@ -109,19 +112,24 @@ void PPCSubtarget::initializeEnvironment() {
IsQPXStackUnaligned = false;
HasHTM = false;
HasFloat128 = false;
+ HasFusion = false;
+ HasAddiLoadFusion = false;
+ HasAddisLoadFusion = false;
IsISA3_0 = false;
+ IsISA3_1 = false;
UseLongCalls = false;
SecurePlt = false;
VectorsUseTwoUnits = false;
UsePPCPreRASchedStrategy = false;
UsePPCPostRASchedStrategy = false;
+ PredictableSelectIsExpensive = false;
HasPOPCNTD = POPCNTD_Unavailable;
}
void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine default and user specified characteristics
- std::string CPUName = CPU;
+ std::string CPUName = std::string(CPU);
if (CPUName.empty() || CPU == "generic") {
// If cross-compiling with -march=ppc64le without -mcpu
if (TargetTriple.getArch() == Triple::ppc64le)
@@ -143,10 +151,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (IsPPC64 && has64BitSupport())
Use64BitRegs = true;
- // Set up darwin-specific properties.
- if (isDarwin())
- HasLazyResolverStubs = true;
-
if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13) ||
TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() ||
TargetTriple.isMusl())
@@ -173,26 +177,10 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
}
-/// Return true if accesses to the specified global have to go through a dyld
-/// lazy resolution stub. This means that an extra load is required to get the
-/// address of the global.
-bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
- if (!HasLazyResolverStubs)
- return false;
- if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
- return true;
- // 32 bit macho has no relocation for a-b if a is undefined, even if b is in
- // the section that is being relocated. This means we have to use o load even
- // for GVs that are known to be local to the dso.
- if (GV->isDeclarationForLinker() || GV->hasCommonLinkage())
- return true;
- return false;
-}
-
bool PPCSubtarget::enableMachineScheduler() const { return true; }
bool PPCSubtarget::enableMachinePipeliner() const {
- return (CPUDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
+ return getSchedModel().hasInstrSchedModel() && EnableMachinePipeliner;
}
bool PPCSubtarget::useDFAforSMS() const { return false; }
@@ -242,3 +230,8 @@ bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
+
+bool PPCSubtarget::isUsingPCRelativeCalls() const {
+ return isPPC64() && hasPCRelativeMemops() && isELFv2ABI() &&
+ CodeModel::Medium == getTargetMachine().getCodeModel();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 044e982740e9..ec329022c457 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -34,36 +34,36 @@ class StringRef;
namespace PPC {
// -m directive values.
- enum {
- DIR_NONE,
- DIR_32,
- DIR_440,
- DIR_601,
- DIR_602,
- DIR_603,
- DIR_7400,
- DIR_750,
- DIR_970,
- DIR_A2,
- DIR_E500,
- DIR_E500mc,
- DIR_E5500,
- DIR_PWR3,
- DIR_PWR4,
- DIR_PWR5,
- DIR_PWR5X,
- DIR_PWR6,
- DIR_PWR6X,
- DIR_PWR7,
- DIR_PWR8,
- DIR_PWR9,
- DIR_PWR_FUTURE,
- DIR_64
- };
+enum {
+ DIR_NONE,
+ DIR_32,
+ DIR_440,
+ DIR_601,
+ DIR_602,
+ DIR_603,
+ DIR_7400,
+ DIR_750,
+ DIR_970,
+ DIR_A2,
+ DIR_E500,
+ DIR_E500mc,
+ DIR_E5500,
+ DIR_PWR3,
+ DIR_PWR4,
+ DIR_PWR5,
+ DIR_PWR5X,
+ DIR_PWR6,
+ DIR_PWR6X,
+ DIR_PWR7,
+ DIR_PWR8,
+ DIR_PWR9,
+ DIR_PWR10,
+ DIR_PWR_FUTURE,
+ DIR_64
+};
}
class GlobalValue;
-class TargetMachine;
class PPCSubtarget : public PPCGenSubtargetInfo {
public:
@@ -105,6 +105,9 @@ protected:
bool HasP8Crypto;
bool HasP9Vector;
bool HasP9Altivec;
+ bool HasP10Vector;
+ bool HasPrefixInstrs;
+ bool HasPCRelativeMemops;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -126,7 +129,6 @@ protected:
bool FeatureMFTB;
bool AllowsUnalignedFPAccess;
bool DeprecatedDST;
- bool HasLazyResolverStubs;
bool IsLittleEndian;
bool HasICBT;
bool HasInvariantFunctionDescriptors;
@@ -134,12 +136,17 @@ protected:
bool HasDirectMove;
bool HasHTM;
bool HasFloat128;
+ bool HasFusion;
+ bool HasAddiLoadFusion;
+ bool HasAddisLoadFusion;
bool IsISA3_0;
+ bool IsISA3_1;
bool UseLongCalls;
bool SecurePlt;
bool VectorsUseTwoUnits;
bool UsePPCPreRASchedStrategy;
bool UsePPCPostRASchedStrategy;
+ bool PredictableSelectIsExpensive;
POPCNTDKind HasPOPCNTD;
@@ -230,11 +237,6 @@ public:
/// the individual condition register bits.
bool useCRBits() const { return UseCRBits; }
- /// hasLazyResolverStub - Return true if accesses to the specified global have
- /// to go through a dyld lazy resolution stub. This means that an extra load
- /// is required to get the address of the global.
- bool hasLazyResolverStub(const GlobalValue *GV) const;
-
// isLittleEndian - True if generating little-endian code
bool isLittleEndian() const { return IsLittleEndian; }
@@ -261,6 +263,9 @@ public:
bool hasP8Crypto() const { return HasP8Crypto; }
bool hasP9Vector() const { return HasP9Vector; }
bool hasP9Altivec() const { return HasP9Altivec; }
+ bool hasP10Vector() const { return HasP10Vector; }
+ bool hasPrefixInstrs() const { return HasPrefixInstrs; }
+ bool hasPCRelativeMemops() const { return HasPCRelativeMemops; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasBPERMD() const { return HasBPERMD; }
@@ -294,16 +299,24 @@ public:
return Align(16);
}
- // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no
- // red zone and PPC64 SVR4ABI has a 288-byte red zone.
unsigned getRedZoneSize() const {
- return isDarwinABI() ? 224 : (isPPC64() ? 288 : 0);
+ if (isPPC64())
+ // 288 bytes = 18*8 (FPRs) + 18*8 (GPRs, GPR13 reserved)
+ return 288;
+
+ // AIX PPC32: 220 bytes = 18*8 (FPRs) + 19*4 (GPRs);
+ // PPC32 SVR4ABI has no redzone.
+ return isAIXABI() ? 220 : 0;
}
bool hasHTM() const { return HasHTM; }
bool hasFloat128() const { return HasFloat128; }
bool isISA3_0() const { return IsISA3_0; }
+ bool isISA3_1() const { return IsISA3_1; }
bool useLongCalls() const { return UseLongCalls; }
+ bool hasFusion() const { return HasFusion; }
+ bool hasAddiLoadFusion() const { return HasAddiLoadFusion; }
+ bool hasAddisLoadFusion() const { return HasAddisLoadFusion; }
bool needsSwapsForVSXMemOps() const {
return hasVSX() && isLittleEndian() && !hasP9Vector();
}
@@ -312,8 +325,6 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
- /// isDarwin - True if this is any darwin platform.
- bool isDarwin() const { return TargetTriple.isMacOSX(); }
/// isBGQ - True if this is a BG/Q platform.
bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
@@ -321,13 +332,13 @@ public:
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
- bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
bool isAIXABI() const { return TargetTriple.isOSAIX(); }
- bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); }
+ bool isSVR4ABI() const { return !isAIXABI(); }
bool isELFv2ABI() const;
bool is64BitELFABI() const { return isSVR4ABI() && isPPC64(); }
bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); }
+ bool isUsingPCRelativeCalls() const;
/// Originally, this function return hasISEL(). Now we always enable it,
/// but may expand the ISEL instruction later.
@@ -389,6 +400,10 @@ public:
}
bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
+
+ bool isPredictableSelectIsExpensive() const {
+ return PredictableSelectIsExpensive;
+ }
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 17e1196eea59..4b809e0c8553 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -78,9 +78,9 @@ protected:
Register OutReg = MI.getOperand(0).getReg();
Register InReg = MI.getOperand(1).getReg();
DebugLoc DL = MI.getDebugLoc();
- unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
+ Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
unsigned Opc1, Opc2;
- const unsigned OrigRegs[] = {OutReg, InReg, GPR3};
+ const Register OrigRegs[] = {OutReg, InReg, GPR3};
switch (MI.getOpcode()) {
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 2caf4c99a1f8..f15f9c7f4942 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
#include "PPCMachineScheduler.h"
+#include "PPCMacroFusion.h"
#include "PPCSubtarget.h"
#include "PPCTargetObjectFile.h"
#include "PPCTargetTransformInfo.h"
@@ -158,7 +159,7 @@ static std::string getDataLayoutString(const Triple &T) {
static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
const Triple &TT) {
- std::string FullFS = FS;
+ std::string FullFS = std::string(FS);
// Make sure 64-bit features are available when CPUname is generic
if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
@@ -223,6 +224,9 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
+ assert((!TT.isOSAIX() || !RM.hasValue() || *RM == Reloc::PIC_) &&
+ "Invalid relocation model for AIX.");
+
if (RM.hasValue())
return *RM;
@@ -230,8 +234,8 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
if (TT.isOSDarwin())
return Reloc::DynamicNoPIC;
- // Big Endian PPC is PIC by default.
- if (TT.getArch() == Triple::ppc64)
+ // Big Endian PPC and AIX default to PIC.
+ if (TT.getArch() == Triple::ppc64 || TT.isOSAIX())
return Reloc::PIC_;
// Rest are static by default.
@@ -272,6 +276,9 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
std::make_unique<GenericScheduler>(C));
// add DAG Mutations here.
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.hasFusion())
+ DAG->addMutation(createPowerPCMacroFusionDAGMutation());
+
return DAG;
}
@@ -283,6 +290,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler(
std::make_unique<PPCPostRASchedStrategy>(C) :
std::make_unique<PostGenericScheduler>(C), true);
// add DAG Mutations here.
+ if (ST.hasFusion())
+ DAG->addMutation(createPowerPCMacroFusionDAGMutation());
return DAG;
}
@@ -495,7 +504,7 @@ void PPCPassConfig::addPreRegAlloc() {
// PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on
// LiveVariables. This (unnecessary) dependency has been removed now,
// however a stage-2 clang build fails without LiveVariables computed here.
- addPass(&LiveVariablesID, false);
+ addPass(&LiveVariablesID);
addPass(createPPCTLSDynamicCallPass());
}
if (EnableExtraTOCRegDeps)
@@ -522,9 +531,9 @@ void PPCPassConfig::addPreEmitPass() {
addPass(createPPCExpandISELPass());
if (getOptLevel() != CodeGenOpt::None)
- addPass(createPPCEarlyReturnPass(), false);
+ addPass(createPPCEarlyReturnPass());
// Must run branch selection immediately preceding the asm printer.
- addPass(createPPCBranchSelectionPass(), false);
+ addPass(createPPCBranchSelectionPass());
}
TargetTransformInfo
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index e237fab1b267..d52c9f92bd1d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "PPCTargetObjectFile.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -18,7 +19,6 @@ void
PPC64LinuxTargetObjectFile::
Initialize(MCContext &Ctx, const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
}
MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index e05699cc95ec..53556ffc267d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -33,6 +33,10 @@ EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
cl::desc("Enable using coldcc calling conv for cold "
"internal functions"));
+static cl::opt<bool>
+LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),
+ cl::desc("Do not add instruction count to lsr cost model"));
+
// The latency of mtctr is only justified if there are more than 4
// comparisons that will be removed as a result.
static cl::opt<unsigned>
@@ -55,9 +59,10 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCost(Imm, Ty);
+ return BaseT::getIntImmCost(Imm, Ty, CostKind);
assert(Ty->isIntegerTy());
@@ -85,9 +90,10 @@ int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
}
int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty);
+ return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
assert(Ty->isIntegerTy());
@@ -115,13 +121,14 @@ int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return TTI::TCC_Free;
break;
}
- return PPCTTIImpl::getIntImmCost(Imm, Ty);
+ return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty);
+ return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind);
assert(Ty->isIntegerTy());
@@ -199,22 +206,28 @@ int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return TTI::TCC_Free;
}
- return PPCTTIImpl::getIntImmCost(Imm, Ty);
+ return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
-unsigned PPCTTIImpl::getUserCost(const User *U,
- ArrayRef<const Value *> Operands) {
+unsigned
+PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
+ // We already implement getCastInstrCost and getMemoryOpCost where we perform
+ // the vector adjustment there.
+ if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
+ return BaseT::getUserCost(U, Operands, CostKind);
+
if (U->getType()->isVectorTy()) {
// Instructions that need to be split should cost more.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, U->getType());
- return LT.first * BaseT::getUserCost(U, Operands);
+ return LT.first * BaseT::getUserCost(U, Operands, CostKind);
}
- return BaseT::getUserCost(U, Operands);
+ return BaseT::getUserCost(U, Operands, CostKind);
}
-bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
- TargetLibraryInfo *LibInfo) {
+bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
+ SmallPtrSetImpl<const Value *> &Visited) {
const PPCTargetMachine &TM = ST->getTargetMachine();
// Loop through the inline asm constraints and look for something that
@@ -233,8 +246,11 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
// Determining the address of a TLS variable results in a function call in
// certain TLS models.
- std::function<bool(const Value*)> memAddrUsesCTR =
- [&memAddrUsesCTR, &TM](const Value *MemAddr) -> bool {
+ std::function<bool(const Value *)> memAddrUsesCTR =
+ [&memAddrUsesCTR, &TM, &Visited](const Value *MemAddr) -> bool {
+ // No need to traverse again if we already checked this operand.
+ if (!Visited.insert(MemAddr).second)
+ return false;
const auto *GV = dyn_cast<GlobalValue>(MemAddr);
if (!GV) {
// Recurse to check for constants that refer to TLS global variables.
@@ -264,7 +280,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
J != JE; ++J) {
if (CallInst *CI = dyn_cast<CallInst>(J)) {
// Inline ASM is okay, unless it clobbers the ctr register.
- if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
+ if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
if (asmClobbersCTR(IA))
return true;
continue;
@@ -277,7 +293,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
switch (F->getIntrinsicID()) {
default: continue;
- // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
+ // If we have a call to loop_decrement or set_loop_iterations,
// we're definitely using CTR.
case Intrinsic::set_loop_iterations:
case Intrinsic::loop_decrement:
@@ -308,6 +324,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
return true;
else
continue; // ISD::FCOPYSIGN is never a library call.
+ case Intrinsic::fma: Opcode = ISD::FMA; break;
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
@@ -412,8 +429,9 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
return true;
} else if (isa<BinaryOperator>(J) &&
- J->getType()->getScalarType()->isPPC_FP128Ty()) {
- // Most operations on ppc_f128 values become calls.
+ (J->getType()->getScalarType()->isFP128Ty() ||
+ J->getType()->getScalarType()->isPPC_FP128Ty())) {
+ // Most operations on f128 or ppc_f128 values become calls.
return true;
} else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
@@ -498,9 +516,10 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
// We don't want to spill/restore the counter register, and so we don't
// want to use the counter register if the loop contains calls.
+ SmallPtrSet<const Value *, 4> Visited;
for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
I != IE; ++I)
- if (mightUseCTR(*I, LibInfo))
+ if (mightUseCTR(*I, LibInfo, Visited))
return false;
SmallVector<BasicBlock*, 4> ExitingBlocks;
@@ -549,6 +568,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
BaseT::getUnrollingPreferences(L, SE, UP);
}
+void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
// This function returns true to allow using coldcc calling convention.
// Returning true results in coldcc being used for functions which are cold at
// all call sites when the callers of the functions are not calling any other
@@ -637,11 +660,12 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
if (CacheLineSize.getNumOccurrences() > 0)
return CacheLineSize;
- // On P7, P8 or P9 we have a cache line size of 128.
+ // Starting with P7 we have a cache line size of 128.
unsigned Directive = ST->getCPUDirective();
// Assume that Future CPU has the same cache line size as the others.
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
- Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE)
+ Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||
+ Directive == PPC::DIR_PWR_FUTURE)
return 128;
// On other processors return a default of 64 bytes.
@@ -673,9 +697,11 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// For P7 and P8, floating-point instructions have a 6-cycle latency and
// there are two execution units, so unroll by 12x for latency hiding.
// FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
+ // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready
// Assume that future is the same as the others.
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
- Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE)
+ Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||
+ Directive == PPC::DIR_PWR_FUTURE)
return 12;
// For most things, modern systems have two execution units (and
@@ -711,6 +737,7 @@ int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
}
int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo,
@@ -718,9 +745,15 @@ int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+ // TODO: Handle more cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+ Op2Info, Opd1PropInfo,
+ Opd2PropInfo, Args, CxtI);
// Fallback to the default implementation.
- int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+ Op2Info,
Opd1PropInfo, Opd2PropInfo);
return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
}
@@ -739,17 +772,33 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
nullptr);
}
+int PPCTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Opcode == Instruction::PHI ? 0 : 1;
+ // Branches are assumed to be predicted.
+ return CostKind == TTI::TCK_RecipThroughput ? 0 : 1;
+}
+
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
- int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src);
- return vectorCostAdjustment(Cost, Opcode, Dst, Src);
+ int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
+ Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src);
+ // TODO: Allow non-throughput costs that aren't binary.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost == 0 ? 0 : 1;
+ return Cost;
}
int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
- int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost;
return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
}
@@ -828,13 +877,22 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
+ if (TLI->getValueType(DL, Src, true) == MVT::Other)
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind);
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
- int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind);
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost;
+
Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
bool IsAltivecType = ST->hasAltivec() &&
@@ -856,7 +914,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// Aligned loads and stores are easy.
unsigned SrcBytes = LT.second.getStoreSize();
- if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
+ if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)
return Cost;
// If we can use the permutation-based load sequence, then this is also
@@ -868,7 +926,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// longer true.
if (Opcode == Instruction::Load &&
((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
- Alignment >= LT.second.getScalarType().getStoreSize())
+ *Alignment >= LT.second.getScalarType().getStoreSize())
return Cost + LT.first; // Add the cost of the permutations.
// For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
@@ -893,22 +951,20 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// stores, loads are expanded using the vector-load + permutation sequence,
// which is much less expensive).
if (Src->isVectorTy() && Opcode == Instruction::Store)
- for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
+ for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;
+ ++i)
Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
return Cost;
}
-int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int PPCTTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
+ Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
assert(isa<VectorType>(VecTy) &&
@@ -919,7 +975,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
// Firstly, the cost of load/store operation.
int Cost =
- getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
+ getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
+ CostKind);
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
@@ -931,18 +988,9 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
return Cost;
}
-unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
-}
-
-unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type*> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed) {
- if (ID == Intrinsic::bswap && ST->hasP9Vector())
- return TLI->getTypeLegalizationCost(DL, RetTy).first;
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
- ScalarizationCostPassed);
+unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
@@ -967,3 +1015,16 @@ bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
*BI = HWLoopInfo.ExitBranch;
return true;
}
+
+bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+ TargetTransformInfo::LSRCost &C2) {
+ // PowerPC default behaviour here is "instruction number 1st priority".
+ // If LsrNoInsnsCost is set, call default implementation.
+ if (!LsrNoInsnsCost)
+ return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
+ C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
+ std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
+ C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
+ else
+ return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 35388d14f606..d998521084e1 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -33,7 +33,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
const PPCSubtarget *getST() const { return ST; }
const PPCTargetLowering *getTLI() const { return TLI; }
- bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo);
+ bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
+ SmallPtrSetImpl<const Value *> &Visited);
public:
explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
@@ -44,14 +45,16 @@ public:
/// @{
using BaseT::getIntImmCost;
- int getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ Type *Ty, TTI::TargetCostKind CostKind);
- unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
+ unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
@@ -63,6 +66,10 @@ public:
TargetLibraryInfo *LibInfo);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+ bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+ TargetTransformInfo::LSRCost &C2);
/// @}
@@ -87,6 +94,7 @@ public:
int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -95,24 +103,24 @@ public:
const Instruction *CxtI = nullptr);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
+ int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr);
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF);
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type*> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = UINT_MAX);
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
+ unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
/// @}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index c7efdf42a7c6..407f980bd35e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -10,10 +10,13 @@
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "MCTargetDesc/RISCVTargetStreamer.h"
+#include "RISCVInstrInfo.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "Utils/RISCVBaseInfo.h"
#include "Utils/RISCVMatInt.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
@@ -32,6 +35,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/RISCVAttributes.h"
#include "llvm/Support/TargetRegistry.h"
#include <limits>
@@ -50,9 +54,16 @@ STATISTIC(RISCVNumInstrsCompressed,
namespace {
struct RISCVOperand;
+struct ParserOptionsSet {
+ bool IsPicEnabled;
+};
+
class RISCVAsmParser : public MCTargetAsmParser {
SmallVector<FeatureBitset, 4> FeatureBitStack;
+ SmallVector<ParserOptionsSet, 4> ParserOptionsStack;
+ ParserOptionsSet ParserOptions;
+
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); }
bool isRV32E() const { return getSTI().hasFeature(RISCV::FeatureRV32E); }
@@ -74,6 +85,8 @@ class RISCVAsmParser : public MCTargetAsmParser {
bool MatchingInlineAsm) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -118,6 +131,9 @@ class RISCVAsmParser : public MCTargetAsmParser {
// 'add' is an overloaded mnemonic.
bool checkPseudoAddTPRel(MCInst &Inst, OperandVector &Operands);
+ // Check instruction constraints.
+ bool validateInstruction(MCInst &Inst, OperandVector &Operands);
+
/// Helper for processing MC instructions that have been successfully matched
/// by MatchAndEmitInstruction. Modifications to the emitted instructions,
/// like the expansion of pseudo instructions (e.g., "li"), can be performed
@@ -138,11 +154,15 @@ class RISCVAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
OperandMatchResultTy parseBareSymbol(OperandVector &Operands);
OperandMatchResultTy parseCallSymbol(OperandVector &Operands);
+ OperandMatchResultTy parsePseudoJumpSymbol(OperandVector &Operands);
OperandMatchResultTy parseJALOffset(OperandVector &Operands);
+ OperandMatchResultTy parseVTypeI(OperandVector &Operands);
+ OperandMatchResultTy parseMaskReg(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
bool parseDirectiveOption();
+ bool parseDirectiveAttribute();
void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (!(getSTI().getFeatureBits()[Feature])) {
@@ -152,6 +172,10 @@ class RISCVAsmParser : public MCTargetAsmParser {
}
}
+ bool getFeatureBits(uint64_t Feature) {
+ return getSTI().getFeatureBits()[Feature];
+ }
+
void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (getSTI().getFeatureBits()[Feature]) {
MCSubtargetInfo &STI = copySTI();
@@ -161,10 +185,15 @@ class RISCVAsmParser : public MCTargetAsmParser {
}
void pushFeatureBits() {
+ assert(FeatureBitStack.size() == ParserOptionsStack.size() &&
+ "These two stacks must be kept synchronized");
FeatureBitStack.push_back(getSTI().getFeatureBits());
+ ParserOptionsStack.push_back(ParserOptions);
}
bool popFeatureBits() {
+ assert(FeatureBitStack.size() == ParserOptionsStack.size() &&
+ "These two stacks must be kept synchronized");
if (FeatureBitStack.empty())
return true;
@@ -172,8 +201,13 @@ class RISCVAsmParser : public MCTargetAsmParser {
copySTI().setFeatureBits(FeatureBits);
setAvailableFeatures(ComputeAvailableFeatures(FeatureBits));
+ ParserOptions = ParserOptionsStack.pop_back_val();
+
return false;
}
+
+ std::unique_ptr<RISCVOperand> defaultMaskRegOp() const;
+
public:
enum RISCVMatchResultTy {
Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
@@ -207,6 +241,9 @@ public:
"doesn't support the D instruction set extension (ignoring "
"target-abi)\n";
}
+
+ const MCObjectFileInfo *MOFI = Parser.getContext().getObjectFileInfo();
+ ParserOptions.IsPicEnabled = MOFI->isPositionIndependent();
}
};
@@ -218,7 +255,8 @@ struct RISCVOperand : public MCParsedAsmOperand {
Token,
Register,
Immediate,
- SystemRegister
+ SystemRegister,
+ VType,
} Kind;
bool IsRV64;
@@ -239,12 +277,32 @@ struct RISCVOperand : public MCParsedAsmOperand {
// e.g.: read/write or user/supervisor/machine privileges.
};
+ enum class VSEW {
+ SEW_8 = 0,
+ SEW_16,
+ SEW_32,
+ SEW_64,
+ SEW_128,
+ SEW_256,
+ SEW_512,
+ SEW_1024,
+ };
+
+ enum class VLMUL { LMUL_1 = 0, LMUL_2, LMUL_4, LMUL_8 };
+
+ struct VTypeOp {
+ VSEW Sew;
+ VLMUL Lmul;
+ unsigned Encoding;
+ };
+
SMLoc StartLoc, EndLoc;
union {
StringRef Tok;
RegOp Reg;
ImmOp Imm;
struct SysRegOp SysReg;
+ struct VTypeOp VType;
};
RISCVOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -268,14 +326,21 @@ public:
case KindTy::SystemRegister:
SysReg = o.SysReg;
break;
+ case KindTy::VType:
+ VType = o.VType;
+ break;
}
}
bool isToken() const override { return Kind == KindTy::Token; }
bool isReg() const override { return Kind == KindTy::Register; }
+ bool isV0Reg() const {
+ return Kind == KindTy::Register && Reg.RegNum == RISCV::V0;
+ }
bool isImm() const override { return Kind == KindTy::Immediate; }
bool isMem() const override { return false; }
bool isSystemRegister() const { return Kind == KindTy::SystemRegister; }
+ bool isVType() const { return Kind == KindTy::VType; }
bool isGPR() const {
return Kind == KindTy::Register &&
@@ -337,6 +402,16 @@ public:
VK == RISCVMCExpr::VK_RISCV_CALL_PLT);
}
+ bool isPseudoJumpSymbol() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ // Must be of 'immediate' type but not a constant.
+ if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
+ return false;
+ return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
+ VK == RISCVMCExpr::VK_RISCV_CALL;
+ }
+
bool isTPRelAddSymbol() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
@@ -349,6 +424,8 @@ public:
bool isCSRSystemRegister() const { return isSystemRegister(); }
+ bool isVTypeI() const { return isVType(); }
+
/// Return true if the operand is a valid for the fence instruction e.g.
/// ('iorw').
bool isFenceArg() const {
@@ -426,6 +503,17 @@ public:
return (isRV64() && isUInt<6>(Imm)) || isUInt<5>(Imm);
}
+ bool isUImmLog2XLenHalf() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ if (!evaluateConstantImm(getImm(), Imm, VK) ||
+ VK != RISCVMCExpr::VK_RISCV_None)
+ return false;
+ return (isRV64() && isUInt<5>(Imm)) || isUInt<4>(Imm);
+ }
+
bool isUImm5() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
@@ -445,6 +533,15 @@ public:
VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isSImm5() const {
+ if (!isImm())
+ return false;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ int64_t Imm;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isSImm6() const {
if (!isImm())
return false;
@@ -452,7 +549,7 @@ public:
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isInt<6>(Imm) &&
- VK == RISCVMCExpr::VK_RISCV_None;
+ VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm6NonZero() const {
@@ -610,6 +707,16 @@ public:
return IsConstantImm && (Imm == 0) && VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isSImm5Plus1() const {
+ if (!isImm())
+ return false;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ int64_t Imm;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isInt<5>(Imm - 1) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
/// getStartLoc - Gets location of the first token of this operand
SMLoc getStartLoc() const override { return StartLoc; }
/// getEndLoc - Gets location of the last token of this operand
@@ -637,6 +744,51 @@ public:
return Tok;
}
+ static StringRef getSEWStr(VSEW Sew) {
+ switch (Sew) {
+ case VSEW::SEW_8:
+ return "e8";
+ case VSEW::SEW_16:
+ return "e16";
+ case VSEW::SEW_32:
+ return "e32";
+ case VSEW::SEW_64:
+ return "e64";
+ case VSEW::SEW_128:
+ return "e128";
+ case VSEW::SEW_256:
+ return "e256";
+ case VSEW::SEW_512:
+ return "e512";
+ case VSEW::SEW_1024:
+ return "e1024";
+ }
+ return "";
+ }
+
+ static StringRef getLMULStr(VLMUL Lmul) {
+ switch (Lmul) {
+ case VLMUL::LMUL_1:
+ return "m1";
+ case VLMUL::LMUL_2:
+ return "m2";
+ case VLMUL::LMUL_4:
+ return "m4";
+ case VLMUL::LMUL_8:
+ return "m8";
+ }
+ return "";
+ }
+
+ StringRef getVType(SmallString<32> &Buf) const {
+ assert(Kind == KindTy::VType && "Invalid access!");
+ Buf.append(getSEWStr(VType.Sew));
+ Buf.append(",");
+ Buf.append(getLMULStr(VType.Lmul));
+
+ return Buf.str();
+ }
+
void print(raw_ostream &OS) const override {
switch (Kind) {
case KindTy::Immediate:
@@ -652,6 +804,10 @@ public:
case KindTy::SystemRegister:
OS << "<sysreg: " << getSysReg() << '>';
break;
+ case KindTy::VType:
+ SmallString<32> VTypeBuf;
+ OS << "<vtype: " << getVType(VTypeBuf) << '>';
+ break;
}
}
@@ -696,6 +852,20 @@ public:
return Op;
}
+ static std::unique_ptr<RISCVOperand> createVType(APInt Sew, APInt Lmul,
+ SMLoc S, bool IsRV64) {
+ auto Op = std::make_unique<RISCVOperand>(KindTy::VType);
+ Sew.ashrInPlace(3);
+ unsigned SewLog2 = Sew.logBase2();
+ unsigned LmulLog2 = Lmul.logBase2();
+ Op->VType.Sew = static_cast<VSEW>(SewLog2);
+ Op->VType.Lmul = static_cast<VLMUL>(LmulLog2);
+ Op->VType.Encoding = (SewLog2 << 2) | LmulLog2;
+ Op->StartLoc = S;
+ Op->IsRV64 = IsRV64;
+ return Op;
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
assert(Expr && "Expr shouldn't be null!");
int64_t Imm = 0;
@@ -719,6 +889,16 @@ public:
addExpr(Inst, getImm());
}
+ void addSImm5Plus1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ int64_t Imm = 0;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ bool IsConstant = evaluateConstantImm(getImm(), Imm, VK);
+ assert(IsConstant && "Expect constant value!");
+ (void)IsConstant;
+ Inst.addOperand(MCOperand::createImm(Imm - 1));
+ }
+
void addFenceArgOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// isFenceArg has validated the operand, meaning this cast is safe
@@ -743,6 +923,11 @@ public:
Inst.addOperand(MCOperand::createImm(SysReg.Encoding));
}
+ void addVTypeIOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(VType.Encoding));
+ }
+
// Returns the rounding mode represented by this RISCVOperand. Should only
// be called after checking isFRMArg.
RISCVFPRndMode::RoundingMode getRoundingMode() const {
@@ -820,6 +1005,8 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
default:
break;
case Match_Success:
+ if (validateInstruction(Inst, Operands))
+ return true;
return processInstruction(Inst, IDLoc, Operands, Out);
case Match_MissingFeature: {
assert(MissingFeatures.any() && "Unknown missing features!");
@@ -886,6 +1073,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (isRV64())
return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 6) - 1);
return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 5) - 1);
+ case Match_InvalidUImmLog2XLenHalf:
+ if (isRV64())
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 4) - 1);
case Match_InvalidUImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
case Match_InvalidSImm6:
@@ -976,6 +1167,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a bare symbol name");
}
+ case Match_InvalidPseudoJumpSymbol: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "operand must be a valid jump target");
+ }
case Match_InvalidCallSymbol: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a bare symbol name");
@@ -984,6 +1179,20 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a symbol with %tprel_add modifier");
}
+ case Match_InvalidVTypeI: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc,
+ "operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8]");
+ }
+ case Match_InvalidVMaskRegister: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "operand must be v0.t");
+ }
+ case Match_InvalidSImm5Plus1: {
+ return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 4) + 1,
+ (1 << 4),
+ "immediate must be in the range");
+ }
}
llvm_unreachable("Unknown match type detected!");
@@ -1010,17 +1219,25 @@ static bool matchRegisterNameHelper(bool IsRV32E, Register &RegNo,
bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
+ if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ return Error(StartLoc, "invalid register name");
+ return false;
+}
+
+OperandMatchResultTy RISCVAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
RegNo = 0;
StringRef Name = getLexer().getTok().getIdentifier();
- if (matchRegisterNameHelper(isRV32E(), (Register&)RegNo, Name))
- return Error(StartLoc, "invalid register name");
+ if (matchRegisterNameHelper(isRV32E(), (Register &)RegNo, Name))
+ return MatchOperand_NoMatch;
getParser().Lex(); // Eat identifier token.
- return false;
+ return MatchOperand_Success;
}
OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
@@ -1113,6 +1330,8 @@ RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
return MatchOperand_ParseFail;
auto SysReg = RISCVSysReg::lookupSysRegByName(Identifier);
+ if (!SysReg)
+ SysReg = RISCVSysReg::lookupSysRegByAltName(Identifier);
// Accept a named Sys Reg if the required features are present.
if (SysReg) {
if (!SysReg->haveRequiredFeatures(getSTI().getFeatureBits())) {
@@ -1287,6 +1506,27 @@ OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
return MatchOperand_Success;
}
+OperandMatchResultTy
+RISCVAsmParser::parsePseudoJumpSymbol(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
+
+ if (getParser().parseExpression(Res))
+ return MatchOperand_ParseFail;
+
+ if (Res->getKind() != MCExpr::ExprKind::SymbolRef ||
+ cast<MCSymbolRefExpr>(Res)->getKind() ==
+ MCSymbolRefExpr::VariantKind::VK_PLT) {
+ Error(S, "operand must be a valid jump target");
+ return MatchOperand_ParseFail;
+ }
+
+ Res = RISCVMCExpr::create(Res, RISCVMCExpr::VK_RISCV_CALL, getContext());
+ Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
// Parsing jal operands is fiddly due to the `jal foo` and `jal ra, foo`
// both being acceptable forms. When parsing `jal ra, foo` this function
@@ -1304,6 +1544,74 @@ OperandMatchResultTy RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
return parseImmediate(Operands);
}
+OperandMatchResultTy RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return MatchOperand_NoMatch;
+
+ // Parse "e8,m1"
+ StringRef Name = getLexer().getTok().getIdentifier();
+ if (!Name.consume_front("e"))
+ return MatchOperand_NoMatch;
+ APInt Sew(16, Name, 10);
+ if (Sew != 8 && Sew != 16 && Sew != 32 && Sew != 64 && Sew != 128 &&
+ Sew != 256 && Sew != 512 && Sew != 1024)
+ return MatchOperand_NoMatch;
+ getLexer().Lex();
+
+ if (getLexer().getKind() == AsmToken::EndOfStatement) {
+ Operands.push_back(
+ RISCVOperand::createVType(Sew, APInt(16, 1), S, isRV64()));
+
+ return MatchOperand_Success;
+ }
+
+ if (!getLexer().is(AsmToken::Comma))
+ return MatchOperand_NoMatch;
+ getLexer().Lex();
+
+ Name = getLexer().getTok().getIdentifier();
+ if (!Name.consume_front("m"))
+ return MatchOperand_NoMatch;
+ APInt Lmul(16, Name, 10);
+ if (Lmul != 1 && Lmul != 2 && Lmul != 4 && Lmul != 8)
+ return MatchOperand_NoMatch;
+ getLexer().Lex();
+
+ if (getLexer().getKind() != AsmToken::EndOfStatement)
+ return MatchOperand_NoMatch;
+
+ Operands.push_back(RISCVOperand::createVType(Sew, Lmul, S, isRV64()));
+
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) {
+ switch (getLexer().getKind()) {
+ default:
+ return MatchOperand_NoMatch;
+ case AsmToken::Identifier:
+ StringRef Name = getLexer().getTok().getIdentifier();
+ if (!Name.consume_back(".t")) {
+ Error(getLoc(), "expected '.t' suffix");
+ return MatchOperand_ParseFail;
+ }
+ Register RegNo;
+ matchRegisterNameHelper(isRV32E(), RegNo, Name);
+
+ if (RegNo == RISCV::NoRegister)
+ return MatchOperand_NoMatch;
+ if (RegNo != RISCV::V0)
+ return MatchOperand_NoMatch;
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ getLexer().Lex();
+ Operands.push_back(RISCVOperand::createReg(RegNo, S, E, isRV64()));
+ }
+
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
if (getLexer().isNot(AsmToken::LParen)) {
@@ -1533,6 +1841,8 @@ bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".option")
return parseDirectiveOption();
+ else if (IDVal == ".attribute")
+ return parseDirectiveAttribute();
return true;
}
@@ -1599,6 +1909,30 @@ bool RISCVAsmParser::parseDirectiveOption() {
return false;
}
+ if (Option == "pic") {
+ getTargetStreamer().emitDirectiveOptionPIC();
+
+ Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
+
+ ParserOptions.IsPicEnabled = true;
+ return false;
+ }
+
+ if (Option == "nopic") {
+ getTargetStreamer().emitDirectiveOptionNoPIC();
+
+ Parser.Lex();
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
+
+ ParserOptions.IsPicEnabled = false;
+ return false;
+ }
+
if (Option == "relax") {
getTargetStreamer().emitDirectiveOptionRelax();
@@ -1631,12 +1965,157 @@ bool RISCVAsmParser::parseDirectiveOption() {
return false;
}
+/// parseDirectiveAttribute
+/// ::= .attribute expression ',' ( expression | "string" )
+/// ::= .attribute identifier ',' ( expression | "string" )
+bool RISCVAsmParser::parseDirectiveAttribute() {
+ MCAsmParser &Parser = getParser();
+ int64_t Tag;
+ SMLoc TagLoc;
+ TagLoc = Parser.getTok().getLoc();
+ if (Parser.getTok().is(AsmToken::Identifier)) {
+ StringRef Name = Parser.getTok().getIdentifier();
+ Optional<unsigned> Ret =
+ ELFAttrs::attrTypeFromString(Name, RISCVAttrs::RISCVAttributeTags);
+ if (!Ret.hasValue()) {
+ Error(TagLoc, "attribute name not recognised: " + Name);
+ return false;
+ }
+ Tag = Ret.getValue();
+ Parser.Lex();
+ } else {
+ const MCExpr *AttrExpr;
+
+ TagLoc = Parser.getTok().getLoc();
+ if (Parser.parseExpression(AttrExpr))
+ return true;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(AttrExpr);
+ if (check(!CE, TagLoc, "expected numeric constant"))
+ return true;
+
+ Tag = CE->getValue();
+ }
+
+ if (Parser.parseToken(AsmToken::Comma, "comma expected"))
+ return true;
+
+ StringRef StringValue;
+ int64_t IntegerValue = 0;
+ bool IsIntegerValue = true;
+
+ // RISC-V attributes have a string value if the tag number is odd
+ // and an integer value if the tag number is even.
+ if (Tag % 2)
+ IsIntegerValue = false;
+
+ SMLoc ValueExprLoc = Parser.getTok().getLoc();
+ if (IsIntegerValue) {
+ const MCExpr *ValueExpr;
+ if (Parser.parseExpression(ValueExpr))
+ return true;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ValueExpr);
+ if (!CE)
+ return Error(ValueExprLoc, "expected numeric constant");
+ IntegerValue = CE->getValue();
+ } else {
+ if (Parser.getTok().isNot(AsmToken::String))
+ return Error(Parser.getTok().getLoc(), "expected string constant");
+
+ StringValue = Parser.getTok().getStringContents();
+ Parser.Lex();
+ }
+
+ if (Parser.parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.attribute' directive"))
+ return true;
+
+ if (Tag == RISCVAttrs::ARCH) {
+ StringRef Arch = StringValue;
+ if (Arch.consume_front("rv32"))
+ clearFeatureBits(RISCV::Feature64Bit, "64bit");
+ else if (Arch.consume_front("rv64"))
+ setFeatureBits(RISCV::Feature64Bit, "64bit");
+ else
+ return Error(ValueExprLoc, "bad arch string " + Arch);
+
+ while (!Arch.empty()) {
+ if (Arch[0] == 'i')
+ clearFeatureBits(RISCV::FeatureRV32E, "e");
+ else if (Arch[0] == 'e')
+ setFeatureBits(RISCV::FeatureRV32E, "e");
+ else if (Arch[0] == 'g') {
+ clearFeatureBits(RISCV::FeatureRV32E, "e");
+ setFeatureBits(RISCV::FeatureStdExtM, "m");
+ setFeatureBits(RISCV::FeatureStdExtA, "a");
+ setFeatureBits(RISCV::FeatureStdExtF, "f");
+ setFeatureBits(RISCV::FeatureStdExtD, "d");
+ } else if (Arch[0] == 'm')
+ setFeatureBits(RISCV::FeatureStdExtM, "m");
+ else if (Arch[0] == 'a')
+ setFeatureBits(RISCV::FeatureStdExtA, "a");
+ else if (Arch[0] == 'f')
+ setFeatureBits(RISCV::FeatureStdExtF, "f");
+ else if (Arch[0] == 'd') {
+ setFeatureBits(RISCV::FeatureStdExtF, "f");
+ setFeatureBits(RISCV::FeatureStdExtD, "d");
+ } else if (Arch[0] == 'c') {
+ setFeatureBits(RISCV::FeatureStdExtC, "c");
+ } else
+ return Error(ValueExprLoc, "bad arch string " + Arch);
+
+ Arch = Arch.drop_front(1);
+ int major = 0;
+ int minor = 0;
+ Arch.consumeInteger(10, major);
+ Arch.consume_front("p");
+ Arch.consumeInteger(10, minor);
+ if (major != 0 || minor != 0) {
+ Arch = Arch.drop_until([](char c) { return c == '_' || c == '"'; });
+ Arch = Arch.drop_while([](char c) { return c == '_'; });
+ }
+ }
+ }
+
+ if (IsIntegerValue)
+ getTargetStreamer().emitAttribute(Tag, IntegerValue);
+ else {
+ if (Tag != RISCVAttrs::ARCH) {
+ getTargetStreamer().emitTextAttribute(Tag, StringValue);
+ } else {
+ std::string formalArchStr = "rv32";
+ if (getFeatureBits(RISCV::Feature64Bit))
+ formalArchStr = "rv64";
+ if (getFeatureBits(RISCV::FeatureRV32E))
+ formalArchStr = (Twine(formalArchStr) + "e1p9").str();
+ else
+ formalArchStr = (Twine(formalArchStr) + "i2p0").str();
+
+ if (getFeatureBits(RISCV::FeatureStdExtM))
+ formalArchStr = (Twine(formalArchStr) + "_m2p0").str();
+ if (getFeatureBits(RISCV::FeatureStdExtA))
+ formalArchStr = (Twine(formalArchStr) + "_a2p0").str();
+ if (getFeatureBits(RISCV::FeatureStdExtF))
+ formalArchStr = (Twine(formalArchStr) + "_f2p0").str();
+ if (getFeatureBits(RISCV::FeatureStdExtD))
+ formalArchStr = (Twine(formalArchStr) + "_d2p0").str();
+ if (getFeatureBits(RISCV::FeatureStdExtC))
+ formalArchStr = (Twine(formalArchStr) + "_c2p0").str();
+
+ getTargetStreamer().emitTextAttribute(Tag, formalArchStr);
+ }
+ }
+
+ return false;
+}
+
void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
MCInst CInst;
bool Res = compressInst(CInst, Inst, getSTI(), S.getContext());
if (Res)
++RISCVNumInstrsCompressed;
- S.EmitInstruction((Res ? CInst : Inst), getSTI());
+ S.emitInstruction((Res ? CInst : Inst), getSTI());
}
void RISCVAsmParser::emitLoadImm(Register DestReg, int64_t Value,
@@ -1672,7 +2151,7 @@ void RISCVAsmParser::emitAuipcInstPair(MCOperand DestReg, MCOperand TmpReg,
MCSymbol *TmpLabel = Ctx.createTempSymbol(
"pcrel_hi", /* AlwaysAddSuffix */ true, /* CanBeUnnamed */ false);
- Out.EmitLabel(TmpLabel);
+ Out.emitLabel(TmpLabel);
const RISCVMCExpr *SymbolHi = RISCVMCExpr::create(Symbol, VKHi, Ctx);
emitToStreamer(
@@ -1717,8 +2196,7 @@ void RISCVAsmParser::emitLoadAddress(MCInst &Inst, SMLoc IDLoc,
const MCExpr *Symbol = Inst.getOperand(1).getExpr();
unsigned SecondOpcode;
RISCVMCExpr::VariantKind VKHi;
- // FIXME: Should check .option (no)pic when implemented
- if (getContext().getObjectFileInfo()->isPositionIndependent()) {
+ if (ParserOptions.IsPicEnabled) {
SecondOpcode = isRV64() ? RISCV::LD : RISCV::LW;
VKHi = RISCVMCExpr::VK_RISCV_GOT_HI;
} else {
@@ -1789,6 +2267,89 @@ bool RISCVAsmParser::checkPseudoAddTPRel(MCInst &Inst,
return false;
}
+std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultMaskRegOp() const {
+ return RISCVOperand::createReg(RISCV::NoRegister, llvm::SMLoc(),
+ llvm::SMLoc(), isRV64());
+}
+
+bool RISCVAsmParser::validateInstruction(MCInst &Inst,
+ OperandVector &Operands) {
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+ unsigned TargetFlags =
+ (MCID.TSFlags >> RISCV::ConstraintOffset) & RISCV::ConstraintMask;
+ if (TargetFlags == RISCV::NoConstraint)
+ return false;
+
+ unsigned DestReg = Inst.getOperand(0).getReg();
+ // Operands[1] will be the first operand, DestReg.
+ SMLoc Loc = Operands[1]->getStartLoc();
+ if ((TargetFlags == RISCV::WidenV) || (TargetFlags == RISCV::WidenW) ||
+ (TargetFlags == RISCV::SlideUp) || (TargetFlags == RISCV::Vrgather) ||
+ (TargetFlags == RISCV::Vcompress)) {
+ if (TargetFlags != RISCV::WidenW) {
+ unsigned Src2Reg = Inst.getOperand(1).getReg();
+ if (DestReg == Src2Reg)
+ return Error(Loc, "The destination vector register group cannot overlap"
+ " the source vector register group.");
+ if (TargetFlags == RISCV::WidenV) {
+ // Assume DestReg LMUL is 2 at least for widening/narrowing operations.
+ if (DestReg + 1 == Src2Reg)
+ return Error(Loc,
+ "The destination vector register group cannot overlap"
+ " the source vector register group.");
+ }
+ }
+ if (Inst.getOperand(2).isReg()) {
+ unsigned Src1Reg = Inst.getOperand(2).getReg();
+ if (DestReg == Src1Reg)
+ return Error(Loc, "The destination vector register group cannot overlap"
+ " the source vector register group.");
+ if (TargetFlags == RISCV::WidenV || TargetFlags == RISCV::WidenW) {
+ // Assume DestReg LMUL is 2 at least for widening/narrowing operations.
+ if (DestReg + 1 == Src1Reg)
+ return Error(Loc,
+ "The destination vector register group cannot overlap"
+ " the source vector register group.");
+ }
+ }
+ if (Inst.getNumOperands() == 4) {
+ unsigned MaskReg = Inst.getOperand(3).getReg();
+
+ if (DestReg == MaskReg)
+ return Error(Loc, "The destination vector register group cannot overlap"
+ " the mask register.");
+ }
+ } else if (TargetFlags == RISCV::Narrow) {
+ unsigned Src2Reg = Inst.getOperand(1).getReg();
+ if (DestReg == Src2Reg)
+ return Error(Loc, "The destination vector register group cannot overlap"
+ " the source vector register group.");
+ // Assume Src2Reg LMUL is 2 at least for widening/narrowing operations.
+ if (DestReg == Src2Reg + 1)
+ return Error(Loc, "The destination vector register group cannot overlap"
+ " the source vector register group.");
+ } else if (TargetFlags == RISCV::WidenCvt || TargetFlags == RISCV::Iota) {
+ unsigned Src2Reg = Inst.getOperand(1).getReg();
+ if (DestReg == Src2Reg)
+ return Error(Loc, "The destination vector register group cannot overlap"
+ " the source vector register group.");
+ if (TargetFlags == RISCV::WidenCvt) {
+ // Assume DestReg LMUL is 2 at least for widening/narrowing operations.
+ if (DestReg + 1 == Src2Reg)
+ return Error(Loc, "The destination vector register group cannot overlap"
+ " the source vector register group.");
+ }
+ if (Inst.getNumOperands() == 3) {
+ unsigned MaskReg = Inst.getOperand(2).getReg();
+
+ if (DestReg == MaskReg)
+ return Error(Loc, "The destination vector register group cannot overlap"
+ " the mask register.");
+ }
+ }
+ return false;
+}
+
bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
OperandVector &Operands,
MCStreamer &Out) {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 1461a40227bf..37edc19398a5 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Endian.h"
@@ -31,10 +32,12 @@ typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
class RISCVDisassembler : public MCDisassembler {
+ std::unique_ptr<MCInstrInfo const> const MCII;
public:
- RISCVDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
- : MCDisassembler(STI, Ctx) {}
+ RISCVDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ MCInstrInfo const *MCII)
+ : MCDisassembler(STI, Ctx), MCII(MCII) {}
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -45,7 +48,7 @@ public:
static MCDisassembler *createRISCVDisassembler(const Target &T,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new RISCVDisassembler(STI, Ctx);
+ return new RISCVDisassembler(STI, Ctx, T.createMCInstrInfo());
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVDisassembler() {
@@ -148,6 +151,33 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo >= 32)
+ return MCDisassembler::Fail;
+
+ Register Reg = RISCV::V0 + RegNo;
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address, const void *Decoder) {
+ Register Reg = RISCV::NoRegister;
+ switch (RegNo) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ Reg = RISCV::V0;
+ break;
+ case 1:
+ break;
+ }
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
// Add implied SP operand for instructions *SP compressed instructions. The SP
// operand isn't explicitly encoded in the instruction.
static void addImplySP(MCInst &Inst, int64_t Address, const void *Decoder) {
@@ -349,6 +379,19 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
+ if (STI.getFeatureBits()[RISCV::FeatureExtZbproposedc] &&
+ STI.getFeatureBits()[RISCV::FeatureStdExtC]) {
+ LLVM_DEBUG(
+ dbgs() << "Trying RVBC32 table (BitManip 16-bit Instruction):\n");
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTableRVBC16, MI, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 2;
+ return Result;
+ }
+ }
+
LLVM_DEBUG(dbgs() << "Trying RISCV_C table (16-bit Instruction):\n");
// Calling the auto-generated decoder function.
Result = decodeInstruction(DecoderTable16, MI, Insn, Address, this, STI);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 373d0ccb1857..bb1f1cc7f49a 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -23,12 +23,75 @@
using namespace llvm;
+Optional<MCFixupKind> RISCVAsmBackend::getFixupKind(StringRef Name) const {
+ if (STI.getTargetTriple().isOSBinFormatELF()) {
+ unsigned Type;
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/RISCV.def"
+#undef ELF_RELOC
+ .Default(-1u);
+ if (Type != -1u)
+ return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
+ }
+ return None;
+}
+
+const MCFixupKindInfo &
+RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[] = {
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // RISCVFixupKinds.h.
+ //
+ // name offset bits flags
+ {"fixup_riscv_hi20", 12, 20, 0},
+ {"fixup_riscv_lo12_i", 20, 12, 0},
+ {"fixup_riscv_lo12_s", 0, 32, 0},
+ {"fixup_riscv_pcrel_hi20", 12, 20,
+ MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsTarget},
+ {"fixup_riscv_pcrel_lo12_i", 20, 12,
+ MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsTarget},
+ {"fixup_riscv_pcrel_lo12_s", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsTarget},
+ {"fixup_riscv_got_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_riscv_tprel_hi20", 12, 20, 0},
+ {"fixup_riscv_tprel_lo12_i", 20, 12, 0},
+ {"fixup_riscv_tprel_lo12_s", 0, 32, 0},
+ {"fixup_riscv_tprel_add", 0, 0, 0},
+ {"fixup_riscv_tls_got_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_riscv_tls_gd_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_riscv_jal", 12, 20, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_riscv_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_riscv_rvc_jump", 2, 11, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_riscv_rvc_branch", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_riscv_call", 0, 64, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_riscv_call_plt", 0, 64, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_riscv_relax", 0, 0, 0},
+ {"fixup_riscv_align", 0, 0, 0}};
+ static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
+ "Not all fixup kinds added to Infos array");
+
+ // Fixup kinds from .reloc directive are like R_RISCV_NONE. They
+ // do not require any extra processing.
+ if (Kind >= FirstLiteralRelocationKind)
+ return MCAsmBackend::getFixupKindInfo(FK_NONE);
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+}
+
// If linker relaxation is enabled, or the relax option had previously been
// enabled, always emit relocations even if the fixup can be resolved. This is
// necessary for correctness as offsets may change during relaxation.
bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
const MCValue &Target) {
+ if (Fixup.getKind() >= FirstLiteralRelocationKind)
+ return true;
switch (Fixup.getTargetKind()) {
default:
break;
@@ -76,10 +139,10 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
}
}
-void RISCVAsmBackend::relaxInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- MCInst &Res) const {
+void RISCVAsmBackend::relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
// TODO: replace this with call to auto generated uncompressinstr() function.
+ MCInst Res;
switch (Inst.getOpcode()) {
default:
llvm_unreachable("Opcode not expected!");
@@ -110,6 +173,7 @@ void RISCVAsmBackend::relaxInstruction(const MCInst &Inst,
Res.addOperand(Inst.getOperand(0));
break;
}
+ Inst = std::move(Res);
}
// Given a compressed control flow instruction this function returns
@@ -318,8 +382,11 @@ void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
MutableArrayRef<char> Data, uint64_t Value,
bool IsResolved,
const MCSubtargetInfo *STI) const {
+ MCFixupKind Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return;
MCContext &Ctx = Asm.getContext();
- MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+ MCFixupKindInfo Info = getFixupKindInfo(Kind);
if (!Value)
return; // Doesn't change encoding.
// Apply any target-specific value adjustments.
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index 1c3c587355a2..090132af3585 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -97,55 +97,16 @@ public:
return RISCV::NumTargetFixupKinds;
}
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
- const static MCFixupKindInfo Infos[] = {
- // This table *must* be in the order that the fixup_* kinds are defined in
- // RISCVFixupKinds.h.
- //
- // name offset bits flags
- { "fixup_riscv_hi20", 12, 20, 0 },
- { "fixup_riscv_lo12_i", 20, 12, 0 },
- { "fixup_riscv_lo12_s", 0, 32, 0 },
- { "fixup_riscv_pcrel_hi20", 12, 20,
- MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsTarget },
- { "fixup_riscv_pcrel_lo12_i", 20, 12,
- MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsTarget },
- { "fixup_riscv_pcrel_lo12_s", 0, 32,
- MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsTarget },
- { "fixup_riscv_got_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_tprel_hi20", 12, 20, 0 },
- { "fixup_riscv_tprel_lo12_i", 20, 12, 0 },
- { "fixup_riscv_tprel_lo12_s", 0, 32, 0 },
- { "fixup_riscv_tprel_add", 0, 0, 0 },
- { "fixup_riscv_tls_got_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_tls_gd_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_jal", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_rvc_jump", 2, 11, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_rvc_branch", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_call", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_call_plt", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_relax", 0, 0, 0 },
- { "fixup_riscv_align", 0, 0, 0 }
- };
- static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
- "Not all fixup kinds added to Infos array");
-
- if (Kind < FirstTargetFixupKind)
- return MCAsmBackend::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
- }
+ Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
bool mayNeedRelaxation(const MCInst &Inst,
const MCSubtargetInfo &STI) const override;
unsigned getRelaxedOpcode(unsigned Op) const;
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override;
-
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index 08b75795ed4b..b38ba2bff582 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -52,6 +52,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
const MCExpr *Expr = Fixup.getValue();
// Determine the type of the relocation
unsigned Kind = Fixup.getTargetKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return Kind - FirstLiteralRelocationKind;
if (IsPCRel) {
switch (Kind) {
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 40fa195f3790..079dc919928a 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -15,14 +15,18 @@
#include "RISCVMCTargetDesc.h"
#include "Utils/RISCVBaseInfo.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/RISCVAttributes.h"
using namespace llvm;
// This part is for ELF object output.
RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S,
const MCSubtargetInfo &STI)
- : RISCVTargetStreamer(S) {
+ : RISCVTargetStreamer(S), CurrentVendor("riscv") {
MCAssembler &MCA = getStreamer().getAssembler();
const FeatureBitset &Features = STI.getFeatureBits();
auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend());
@@ -62,7 +66,104 @@ MCELFStreamer &RISCVTargetELFStreamer::getStreamer() {
void RISCVTargetELFStreamer::emitDirectiveOptionPush() {}
void RISCVTargetELFStreamer::emitDirectiveOptionPop() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionPIC() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionNoPIC() {}
void RISCVTargetELFStreamer::emitDirectiveOptionRVC() {}
void RISCVTargetELFStreamer::emitDirectiveOptionNoRVC() {}
void RISCVTargetELFStreamer::emitDirectiveOptionRelax() {}
void RISCVTargetELFStreamer::emitDirectiveOptionNoRelax() {}
+
+void RISCVTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ setAttributeItem(Attribute, Value, /*OverwriteExisting=*/true);
+}
+
+void RISCVTargetELFStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef String) {
+ setAttributeItem(Attribute, String, /*OverwriteExisting=*/true);
+}
+
+void RISCVTargetELFStreamer::emitIntTextAttribute(unsigned Attribute,
+ unsigned IntValue,
+ StringRef StringValue) {
+ setAttributeItems(Attribute, IntValue, StringValue,
+ /*OverwriteExisting=*/true);
+}
+
+void RISCVTargetELFStreamer::finishAttributeSection() {
+ if (Contents.empty())
+ return;
+
+ if (AttributeSection) {
+ Streamer.SwitchSection(AttributeSection);
+ } else {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ AttributeSection = MCA.getContext().getELFSection(
+ ".riscv.attributes", ELF::SHT_RISCV_ATTRIBUTES, 0);
+ Streamer.SwitchSection(AttributeSection);
+
+ Streamer.emitInt8(ELFAttrs::Format_Version);
+ }
+
+ // Vendor size + Vendor name + '\0'
+ const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+ // Tag + Tag Size
+ const size_t TagHeaderSize = 1 + 4;
+
+ const size_t ContentsSize = calculateContentSize();
+
+ Streamer.emitInt32(VendorHeaderSize + TagHeaderSize + ContentsSize);
+ Streamer.emitBytes(CurrentVendor);
+ Streamer.emitInt8(0); // '\0'
+
+ Streamer.emitInt8(ELFAttrs::File);
+ Streamer.emitInt32(TagHeaderSize + ContentsSize);
+
+ // Size should have been accounted for already, now
+ // emit each field as its type (ULEB or String).
+ for (AttributeItem item : Contents) {
+ Streamer.emitULEB128IntValue(item.Tag);
+ switch (item.Type) {
+ default:
+ llvm_unreachable("Invalid attribute type");
+ case AttributeType::Numeric:
+ Streamer.emitULEB128IntValue(item.IntValue);
+ break;
+ case AttributeType::Text:
+ Streamer.emitBytes(item.StringValue);
+ Streamer.emitInt8(0); // '\0'
+ break;
+ case AttributeType::NumericAndText:
+ Streamer.emitULEB128IntValue(item.IntValue);
+ Streamer.emitBytes(item.StringValue);
+ Streamer.emitInt8(0); // '\0'
+ break;
+ }
+ }
+
+ Contents.clear();
+}
+
+size_t RISCVTargetELFStreamer::calculateContentSize() const {
+ size_t Result = 0;
+ for (AttributeItem item : Contents) {
+ switch (item.Type) {
+ case AttributeType::Hidden:
+ break;
+ case AttributeType::Numeric:
+ Result += getULEB128Size(item.Tag);
+ Result += getULEB128Size(item.IntValue);
+ break;
+ case AttributeType::Text:
+ Result += getULEB128Size(item.Tag);
+ Result += item.StringValue.size() + 1; // string + '\0'
+ break;
+ case AttributeType::NumericAndText:
+ Result += getULEB128Size(item.Tag);
+ Result += getULEB128Size(item.IntValue);
+ Result += item.StringValue.size() + 1; // string + '\0';
+ break;
+ }
+ }
+ return Result;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index 138df786eaf3..392c87054d43 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -15,16 +15,94 @@
namespace llvm {
class RISCVTargetELFStreamer : public RISCVTargetStreamer {
+private:
+ enum class AttributeType { Hidden, Numeric, Text, NumericAndText };
+
+ struct AttributeItem {
+ AttributeType Type;
+ unsigned Tag;
+ unsigned IntValue;
+ std::string StringValue;
+ };
+
+ StringRef CurrentVendor;
+ SmallVector<AttributeItem, 64> Contents;
+
+ MCSection *AttributeSection = nullptr;
+
+ AttributeItem *getAttributeItem(unsigned Attribute) {
+ for (size_t i = 0; i < Contents.size(); ++i)
+ if (Contents[i].Tag == Attribute)
+ return &Contents[i];
+ return nullptr;
+ }
+
+ void setAttributeItem(unsigned Attribute, unsigned Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item.
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->Type = AttributeType::Numeric;
+ Item->IntValue = Value;
+ return;
+ }
+
+ // Create new attribute item.
+ Contents.push_back({AttributeType::Numeric, Attribute, Value, ""});
+ }
+
+ void setAttributeItem(unsigned Attribute, StringRef Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item.
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->Type = AttributeType::Text;
+ Item->StringValue = std::string(Value);
+ return;
+ }
+
+ // Create new attribute item.
+ Contents.push_back({AttributeType::Text, Attribute, 0, std::string(Value)});
+ }
+
+ void setAttributeItems(unsigned Attribute, unsigned IntValue,
+ StringRef StringValue, bool OverwriteExisting) {
+ // Look for existing attribute item.
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->Type = AttributeType::NumericAndText;
+ Item->IntValue = IntValue;
+ Item->StringValue = std::string(StringValue);
+ return;
+ }
+
+ // Create new attribute item.
+ Contents.push_back({AttributeType::NumericAndText, Attribute, IntValue,
+ std::string(StringValue)});
+ }
+
+ void emitAttribute(unsigned Attribute, unsigned Value) override;
+ void emitTextAttribute(unsigned Attribute, StringRef String) override;
+ void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
+ StringRef StringValue) override;
+ void finishAttributeSection() override;
+ size_t calculateContentSize() const;
+
public:
MCELFStreamer &getStreamer();
RISCVTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
- virtual void emitDirectiveOptionPush();
- virtual void emitDirectiveOptionPop();
- virtual void emitDirectiveOptionRVC();
- virtual void emitDirectiveOptionNoRVC();
- virtual void emitDirectiveOptionRelax();
- virtual void emitDirectiveOptionNoRelax();
+ void emitDirectiveOptionPush() override;
+ void emitDirectiveOptionPop() override;
+ void emitDirectiveOptionPIC() override;
+ void emitDirectiveOptionNoPIC() override;
+ void emitDirectiveOptionRVC() override;
+ void emitDirectiveOptionNoRVC() override;
+ void emitDirectiveOptionRelax() override;
+ void emitDirectiveOptionNoRelax() override;
};
}
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 22bb80ae34e2..eae3e13dbe40 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -73,7 +73,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, uint64_t Address,
Res = uncompressInst(UncompressedMI, *MI, MRI, STI);
if (Res)
NewMI = const_cast<MCInst *>(&UncompressedMI);
- if (NoAliases || !printAliasInstr(NewMI, STI, O))
+ if (NoAliases || !printAliasInstr(NewMI, Address, STI, O))
printInstruction(NewMI, Address, STI, O);
printAnnotation(O, Annot);
}
@@ -150,6 +150,39 @@ void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo,
return;
}
+void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ unsigned Sew = (Imm >> 2) & 0x7;
+ unsigned Lmul = Imm & 0x3;
+
+ Lmul = 0x1 << Lmul;
+ Sew = 0x1 << (Sew + 3);
+ O << "e" << Sew << ",m" << Lmul;
+}
+
+void RISCVInstPrinter::printVMaskReg(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+
+ assert(MO.isReg() && "printVMaskReg can only print register operands");
+ if (MO.getReg() == RISCV::NoRegister)
+ return;
+ O << ", ";
+ printRegName(O, MO.getReg());
+ O << ".t";
+}
+
+void RISCVInstPrinter::printSImm5Plus1(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+
+ assert(MO.isImm() && "printSImm5Plus1 can only print constant operands");
+ O << MO.getImm() + 1;
+}
+
const char *RISCVInstPrinter::getRegisterName(unsigned RegNo) {
return getRegisterName(RegNo, ArchRegNames ? RISCV::NoRegAltName
: RISCV::ABIRegAltName);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
index aeb2ea636060..fdaa00c5f8eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
@@ -17,7 +17,6 @@
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
-class MCOperand;
class RISCVInstPrinter : public MCInstPrinter {
public:
@@ -41,14 +40,20 @@ public:
raw_ostream &O);
void printAtomicMemOp(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVTypeI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printVMaskReg(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSImm5Plus1(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
- bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx,
+ bool printAliasInstr(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
const MCSubtargetInfo &STI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
static const char *getRegisterName(unsigned RegNo, unsigned AltIdx);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index de99960848a5..816206c477df 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -80,6 +80,10 @@ public:
unsigned getImmOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+
+ unsigned getVMaskReg(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
};
} // end anonymous namespace
@@ -89,13 +93,14 @@ MCCodeEmitter *llvm::createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
return new RISCVMCCodeEmitter(Ctx, MCII);
}
-// Expand PseudoCALL(Reg) and PseudoTAIL to AUIPC and JALR with relocation
-// types. We expand PseudoCALL(Reg) and PseudoTAIL while encoding, meaning AUIPC
-// and JALR won't go through RISCV MC to MC compressed instruction
-// transformation. This is acceptable because AUIPC has no 16-bit form and
-// C_JALR have no immediate operand field. We let linker relaxation deal with
-// it. When linker relaxation enabled, AUIPC and JALR have chance relax to JAL.
-// If C extension is enabled, JAL has chance relax to C_JAL.
+// Expand PseudoCALL(Reg), PseudoTAIL and PseudoJump to AUIPC and JALR with
+// relocation types. We expand those pseudo-instructions while encoding them,
+// meaning AUIPC and JALR won't go through RISCV MC to MC compressed
+// instruction transformation. This is acceptable because AUIPC has no 16-bit
+// form and C_JALR has no immediate operand field. We let linker relaxation
+// deal with it. When linker relaxation is enabled, AUIPC and JALR have a
+// chance to relax to JAL.
+// If the C extension is enabled, JAL has a chance relax to C_JAL.
void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -108,9 +113,12 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
} else if (MI.getOpcode() == RISCV::PseudoCALLReg) {
Func = MI.getOperand(1);
Ra = MI.getOperand(0).getReg();
- } else {
+ } else if (MI.getOpcode() == RISCV::PseudoCALL) {
Func = MI.getOperand(0);
Ra = RISCV::X1;
+ } else if (MI.getOpcode() == RISCV::PseudoJump) {
+ Func = MI.getOperand(1);
+ Ra = MI.getOperand(0).getReg();
}
uint32_t Binary;
@@ -125,8 +133,9 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
support::endian::write(OS, Binary, support::little);
- if (MI.getOpcode() == RISCV::PseudoTAIL)
- // Emit JALR X0, X6, 0
+ if (MI.getOpcode() == RISCV::PseudoTAIL ||
+ MI.getOpcode() == RISCV::PseudoJump)
+ // Emit JALR X0, Ra, 0
TmpInst = MCInstBuilder(RISCV::JALR).addReg(RISCV::X0).addReg(Ra).addImm(0);
else
// Emit JALR Ra, Ra, 0
@@ -180,9 +189,13 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
// Get byte count of instruction.
unsigned Size = Desc.getSize();
+ // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
+ // instructions for each pseudo, and must be updated when adding new pseudos
+ // or changing existing ones.
if (MI.getOpcode() == RISCV::PseudoCALLReg ||
MI.getOpcode() == RISCV::PseudoCALL ||
- MI.getOpcode() == RISCV::PseudoTAIL) {
+ MI.getOpcode() == RISCV::PseudoTAIL ||
+ MI.getOpcode() == RISCV::PseudoJump) {
expandFunctionCall(MI, OS, Fixups, STI);
MCNumEmitted += 2;
return;
@@ -368,4 +381,20 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
return 0;
}
+unsigned RISCVMCCodeEmitter::getVMaskReg(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ MCOperand MO = MI.getOperand(OpNo);
+ assert(MO.isReg() && "Expected a register.");
+
+ switch (MO.getReg()) {
+ default:
+ llvm_unreachable("Invalid mask register.");
+ case RISCV::V0:
+ return 0;
+ case RISCV::NoRegister:
+ return 1;
+ }
+}
+
#include "RISCVGenMCCodeEmitter.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
index 167e7d553e7d..77038cee4e9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
@@ -19,7 +19,7 @@
namespace llvm {
class StringRef;
-class MCOperand;
+
class RISCVMCExpr : public MCTargetExpr {
public:
enum VariantKind {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index c37482be3c2c..a474224e1a4e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -56,7 +57,7 @@ static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI = new RISCVMCAsmInfo(TT);
Register SP = MRI.getDwarfRegNum(RISCV::X2, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0);
MAI->addInitialFrameState(Inst);
return MAI;
@@ -64,7 +65,7 @@ static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
static MCSubtargetInfo *createRISCVMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
- std::string CPUName = CPU;
+ std::string CPUName = std::string(CPU);
if (CPUName.empty())
CPUName = TT.isArch64Bit() ? "generic-rv64" : "generic-rv32";
return createRISCVMCSubtargetInfoImpl(TT, CPUName, FS);
@@ -93,6 +94,49 @@ static MCTargetStreamer *createRISCVAsmTargetStreamer(MCStreamer &S,
return new RISCVTargetAsmStreamer(S, OS);
}
+static MCTargetStreamer *createRISCVNullTargetStreamer(MCStreamer &S) {
+ return new RISCVTargetStreamer(S);
+}
+
+namespace {
+
+class RISCVMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info)
+ : MCInstrAnalysis(Info) {}
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override {
+ if (isConditionalBranch(Inst)) {
+ int64_t Imm;
+ if (Size == 2)
+ Imm = Inst.getOperand(1).getImm();
+ else
+ Imm = Inst.getOperand(2).getImm();
+ Target = Addr + Imm;
+ return true;
+ }
+
+ if (Inst.getOpcode() == RISCV::C_JAL || Inst.getOpcode() == RISCV::C_J) {
+ Target = Addr + Inst.getOperand(0).getImm();
+ return true;
+ }
+
+ if (Inst.getOpcode() == RISCV::JAL) {
+ Target = Addr + Inst.getOperand(1).getImm();
+ return true;
+ }
+
+ return false;
+ }
+};
+
+} // end anonymous namespace
+
+static MCInstrAnalysis *createRISCVInstrAnalysis(const MCInstrInfo *Info) {
+ return new RISCVMCInstrAnalysis(Info);
+}
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) {
TargetRegistry::RegisterMCAsmInfo(*T, createRISCVMCAsmInfo);
@@ -104,8 +148,12 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
TargetRegistry::RegisterMCSubtargetInfo(*T, createRISCVMCSubtargetInfo);
TargetRegistry::RegisterObjectTargetStreamer(
*T, createRISCVObjectTargetStreamer);
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createRISCVInstrAnalysis);
// Register the asm target streamer.
TargetRegistry::RegisterAsmTargetStreamer(*T, createRISCVAsmTargetStreamer);
+ // Register the null target streamer.
+ TargetRegistry::RegisterNullTargetStreamer(*T,
+ createRISCVNullTargetStreamer);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
index b30997533ddf..5216a689715a 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@@ -26,11 +26,7 @@ class MCInstrInfo;
class MCObjectTargetWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
-class StringRef;
class Target;
-class Triple;
-class raw_ostream;
-class raw_pwrite_stream;
MCCodeEmitter *createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 913e1f744192..54a2fb288579 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -11,12 +11,59 @@
//===----------------------------------------------------------------------===//
#include "RISCVTargetStreamer.h"
+#include "RISCVSubtarget.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/RISCVAttributes.h"
using namespace llvm;
RISCVTargetStreamer::RISCVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+void RISCVTargetStreamer::finish() { finishAttributeSection(); }
+
+void RISCVTargetStreamer::emitDirectiveOptionPush() {}
+void RISCVTargetStreamer::emitDirectiveOptionPop() {}
+void RISCVTargetStreamer::emitDirectiveOptionPIC() {}
+void RISCVTargetStreamer::emitDirectiveOptionNoPIC() {}
+void RISCVTargetStreamer::emitDirectiveOptionRVC() {}
+void RISCVTargetStreamer::emitDirectiveOptionNoRVC() {}
+void RISCVTargetStreamer::emitDirectiveOptionRelax() {}
+void RISCVTargetStreamer::emitDirectiveOptionNoRelax() {}
+void RISCVTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {}
+void RISCVTargetStreamer::finishAttributeSection() {}
+void RISCVTargetStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef String) {}
+void RISCVTargetStreamer::emitIntTextAttribute(unsigned Attribute,
+ unsigned IntValue,
+ StringRef StringValue) {}
+
+void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
+ if (STI.hasFeature(RISCV::FeatureRV32E))
+ emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_4);
+ else
+ emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16);
+
+ std::string Arch = "rv32";
+ if (STI.hasFeature(RISCV::Feature64Bit))
+ Arch = "rv64";
+ if (STI.hasFeature(RISCV::FeatureRV32E))
+ Arch += "e1p9";
+ else
+ Arch += "i2p0";
+ if (STI.hasFeature(RISCV::FeatureStdExtM))
+ Arch += "_m2p0";
+ if (STI.hasFeature(RISCV::FeatureStdExtA))
+ Arch += "_a2p0";
+ if (STI.hasFeature(RISCV::FeatureStdExtF))
+ Arch += "_f2p0";
+ if (STI.hasFeature(RISCV::FeatureStdExtD))
+ Arch += "_d2p0";
+ if (STI.hasFeature(RISCV::FeatureStdExtC))
+ Arch += "_c2p0";
+
+ emitTextAttribute(RISCVAttrs::ARCH, Arch);
+}
+
// This part is for ascii assembly output
RISCVTargetAsmStreamer::RISCVTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
@@ -30,6 +77,14 @@ void RISCVTargetAsmStreamer::emitDirectiveOptionPop() {
OS << "\t.option\tpop\n";
}
+void RISCVTargetAsmStreamer::emitDirectiveOptionPIC() {
+ OS << "\t.option\tpic\n";
+}
+
+void RISCVTargetAsmStreamer::emitDirectiveOptionNoPIC() {
+ OS << "\t.option\tnopic\n";
+}
+
void RISCVTargetAsmStreamer::emitDirectiveOptionRVC() {
OS << "\t.option\trvc\n";
}
@@ -45,3 +100,18 @@ void RISCVTargetAsmStreamer::emitDirectiveOptionRelax() {
void RISCVTargetAsmStreamer::emitDirectiveOptionNoRelax() {
OS << "\t.option\tnorelax\n";
}
+
+void RISCVTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ OS << "\t.attribute\t" << Attribute << ", " << Twine(Value) << "\n";
+}
+
+void RISCVTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef String) {
+ OS << "\t.attribute\t" << Attribute << ", \"" << String << "\"\n";
+}
+
+void RISCVTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
+ unsigned IntValue,
+ StringRef StringValue) {}
+
+void RISCVTargetAsmStreamer::finishAttributeSection() {}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
index 1becc134b2a2..32fa20f25d82 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
@@ -10,30 +10,49 @@
#define LLVM_LIB_TARGET_RISCV_RISCVTARGETSTREAMER_H
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
class RISCVTargetStreamer : public MCTargetStreamer {
public:
RISCVTargetStreamer(MCStreamer &S);
-
- virtual void emitDirectiveOptionPush() = 0;
- virtual void emitDirectiveOptionPop() = 0;
- virtual void emitDirectiveOptionRVC() = 0;
- virtual void emitDirectiveOptionNoRVC() = 0;
- virtual void emitDirectiveOptionRelax() = 0;
- virtual void emitDirectiveOptionNoRelax() = 0;
+ void finish() override;
+
+ virtual void emitDirectiveOptionPush();
+ virtual void emitDirectiveOptionPop();
+ virtual void emitDirectiveOptionPIC();
+ virtual void emitDirectiveOptionNoPIC();
+ virtual void emitDirectiveOptionRVC();
+ virtual void emitDirectiveOptionNoRVC();
+ virtual void emitDirectiveOptionRelax();
+ virtual void emitDirectiveOptionNoRelax();
+ virtual void emitAttribute(unsigned Attribute, unsigned Value);
+ virtual void finishAttributeSection();
+ virtual void emitTextAttribute(unsigned Attribute, StringRef String);
+ virtual void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
+ StringRef StringValue);
+
+ void emitTargetAttributes(const MCSubtargetInfo &STI);
};
// This part is for ascii assembly output
class RISCVTargetAsmStreamer : public RISCVTargetStreamer {
formatted_raw_ostream &OS;
+ void finishAttributeSection() override;
+ void emitAttribute(unsigned Attribute, unsigned Value) override;
+ void emitTextAttribute(unsigned Attribute, StringRef String) override;
+ void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
+ StringRef StringValue) override;
+
public:
RISCVTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
void emitDirectiveOptionPush() override;
void emitDirectiveOptionPop() override;
+ void emitDirectiveOptionPIC() override;
+ void emitDirectiveOptionNoPIC() override;
void emitDirectiveOptionRVC() override;
void emitDirectiveOptionNoRVC() override;
void emitDirectiveOptionRelax() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
index f23f742a4782..9baa2cc2741a 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h
@@ -43,6 +43,9 @@ void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
FunctionPass *createRISCVExpandPseudoPass();
void initializeRISCVExpandPseudoPass(PassRegistry &);
+FunctionPass *createRISCVExpandAtomicPseudoPass();
+void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
+
InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
RISCVSubtarget &,
RISCVRegisterBankInfo &);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
index 770e883221d1..f0583f691936 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td
@@ -16,21 +16,21 @@ def FeatureStdExtM
: SubtargetFeature<"m", "HasStdExtM", "true",
"'M' (Integer Multiplication and Division)">;
def HasStdExtM : Predicate<"Subtarget->hasStdExtM()">,
- AssemblerPredicate<"FeatureStdExtM",
+ AssemblerPredicate<(all_of FeatureStdExtM),
"'M' (Integer Multiplication and Division)">;
def FeatureStdExtA
: SubtargetFeature<"a", "HasStdExtA", "true",
"'A' (Atomic Instructions)">;
def HasStdExtA : Predicate<"Subtarget->hasStdExtA()">,
- AssemblerPredicate<"FeatureStdExtA",
+ AssemblerPredicate<(all_of FeatureStdExtA),
"'A' (Atomic Instructions)">;
def FeatureStdExtF
: SubtargetFeature<"f", "HasStdExtF", "true",
"'F' (Single-Precision Floating-Point)">;
def HasStdExtF : Predicate<"Subtarget->hasStdExtF()">,
- AssemblerPredicate<"FeatureStdExtF",
+ AssemblerPredicate<(all_of FeatureStdExtF),
"'F' (Single-Precision Floating-Point)">;
def FeatureStdExtD
@@ -38,30 +38,130 @@ def FeatureStdExtD
"'D' (Double-Precision Floating-Point)",
[FeatureStdExtF]>;
def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">,
- AssemblerPredicate<"FeatureStdExtD",
+ AssemblerPredicate<(all_of FeatureStdExtD),
"'D' (Double-Precision Floating-Point)">;
def FeatureStdExtC
: SubtargetFeature<"c", "HasStdExtC", "true",
"'C' (Compressed Instructions)">;
def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">,
- AssemblerPredicate<"FeatureStdExtC",
+ AssemblerPredicate<(all_of FeatureStdExtC),
"'C' (Compressed Instructions)">;
-def FeatureRVCHints
- : SubtargetFeature<"rvc-hints", "EnableRVCHintInstrs", "true",
- "Enable RVC Hint Instructions.">;
+def FeatureExtZbb
+ : SubtargetFeature<"experimental-zbb", "HasStdExtZbb", "true",
+ "'Zbb' (Base 'B' Instructions)">;
+def HasStdExtZbb : Predicate<"Subtarget->hasStdExtZbb()">,
+ AssemblerPredicate<(all_of FeatureExtZbb),
+ "'Zbb' (Base 'B' Instructions)">;
+
+def FeatureExtZbc
+ : SubtargetFeature<"experimental-zbc", "HasStdExtZbc", "true",
+ "'Zbc' (Carry-Less 'B' Instructions)">;
+def HasStdExtZbc : Predicate<"Subtarget->hasStdExtZbc()">,
+ AssemblerPredicate<(all_of FeatureExtZbc),
+ "'Zbc' (Carry-Less 'B' Instructions)">;
+
+def FeatureExtZbe
+ : SubtargetFeature<"experimental-zbe", "HasStdExtZbe", "true",
+ "'Zbe' (Extract-Deposit 'B' Instructions)">;
+def HasStdExtZbe : Predicate<"Subtarget->hasStdExtZbe()">,
+ AssemblerPredicate<(all_of FeatureExtZbe),
+ "'Zbe' (Extract-Deposit 'B' Instructions)">;
+
+def FeatureExtZbf
+ : SubtargetFeature<"experimental-zbf", "HasStdExtZbf", "true",
+ "'Zbf' (Bit-Field 'B' Instructions)">;
+def HasStdExtZbf : Predicate<"Subtarget->hasStdExtZbf()">,
+ AssemblerPredicate<(all_of FeatureExtZbf),
+ "'Zbf' (Bit-Field 'B' Instructions)">;
+
+def FeatureExtZbm
+ : SubtargetFeature<"experimental-zbm", "HasStdExtZbm", "true",
+ "'Zbm' (Matrix 'B' Instructions)">;
+def HasStdExtZbm : Predicate<"Subtarget->hasStdExtZbm()">,
+ AssemblerPredicate<(all_of FeatureExtZbm),
+ "'Zbm' (Matrix 'B' Instructions)">;
+
+def FeatureExtZbp
+ : SubtargetFeature<"experimental-zbp", "HasStdExtZbp", "true",
+ "'Zbp' (Permutation 'B' Instructions)">;
+def HasStdExtZbp : Predicate<"Subtarget->hasStdExtZbp()">,
+ AssemblerPredicate<(all_of FeatureExtZbp),
+ "'Zbp' (Permutation 'B' Instructions)">;
+
+def FeatureExtZbr
+ : SubtargetFeature<"experimental-zbr", "HasStdExtZbr", "true",
+ "'Zbr' (Polynomial Reduction 'B' Instructions)">;
+def HasStdExtZbr : Predicate<"Subtarget->hasStdExtZbr()">,
+ AssemblerPredicate<(all_of FeatureExtZbr),
+ "'Zbr' (Polynomial Reduction 'B' Instructions)">;
+
+def FeatureExtZbs
+ : SubtargetFeature<"experimental-zbs", "HasStdExtZbs", "true",
+ "'Zbs' (Single-Bit 'B' Instructions)">;
+def HasStdExtZbs : Predicate<"Subtarget->hasStdExtZbs()">,
+ AssemblerPredicate<(all_of FeatureExtZbs),
+ "'Zbs' (Single-Bit 'B' Instructions)">;
+
+def FeatureExtZbt
+ : SubtargetFeature<"experimental-zbt", "HasStdExtZbt", "true",
+ "'Zbt' (Ternary 'B' Instructions)">;
+def HasStdExtZbt : Predicate<"Subtarget->hasStdExtZbt()">,
+ AssemblerPredicate<(all_of FeatureExtZbt),
+ "'Zbt' (Ternary 'B' Instructions)">;
+
+// Some instructions belong to both the basic and the permutation
+// subextensions. They should be enabled if either has been specified.
+def HasStdExtZbbOrZbp
+ : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()">,
+ AssemblerPredicate<(any_of FeatureExtZbb, FeatureExtZbp)>;
+
+def FeatureExtZbproposedc
+ : SubtargetFeature<"experimental-zbproposedc", "HasStdExtZbproposedc", "true",
+ "'Zbproposedc' (Proposed Compressed 'B' Instructions)">;
+def HasStdExtZbproposedc : Predicate<"Subtarget->hasStdExtZbproposedc()">,
+ AssemblerPredicate<(all_of FeatureExtZbproposedc),
+ "'Zbproposedc' (Proposed Compressed 'B' Instructions)">;
+
+def FeatureStdExtB
+ : SubtargetFeature<"experimental-b", "HasStdExtB", "true",
+ "'B' (Bit Manipulation Instructions)",
+ [FeatureExtZbb,
+ FeatureExtZbc,
+ FeatureExtZbe,
+ FeatureExtZbf,
+ FeatureExtZbm,
+ FeatureExtZbp,
+ FeatureExtZbr,
+ FeatureExtZbs,
+ FeatureExtZbt]>;
+def HasStdExtB : Predicate<"Subtarget->hasStdExtB()">,
+ AssemblerPredicate<(all_of FeatureStdExtB),
+ "'B' (Bit Manipulation Instructions)">;
+
+def FeatureNoRVCHints
+ : SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false",
+ "Disable RVC Hint Instructions.">;
def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">,
- AssemblerPredicate<"FeatureRVCHints",
- "RVC Hint Instructions">;
+ AssemblerPredicate<(all_of(not FeatureNoRVCHints)),
+ "RVC Hint Instructions">;
+
+def FeatureStdExtV
+ : SubtargetFeature<"experimental-v", "HasStdExtV", "true",
+ "'V' (Vector Instructions)",
+ [FeatureStdExtF]>;
+def HasStdExtV : Predicate<"Subtarget->hasStdExtV()">,
+ AssemblerPredicate<(all_of FeatureStdExtV),
+ "'V' (Vector Instructions)">;
def Feature64Bit
: SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">;
def IsRV64 : Predicate<"Subtarget->is64Bit()">,
- AssemblerPredicate<"Feature64Bit",
+ AssemblerPredicate<(all_of Feature64Bit),
"RV64I Base Instruction Set">;
def IsRV32 : Predicate<"!Subtarget->is64Bit()">,
- AssemblerPredicate<"!Feature64Bit",
+ AssemblerPredicate<(all_of (not Feature64Bit)),
"RV32I Base Instruction Set">;
def RV64 : HwMode<"+64bit">;
@@ -71,7 +171,7 @@ def FeatureRV32E
: SubtargetFeature<"e", "IsRV32E", "true",
"Implements RV32E (provides 16 rather than 32 GPRs)">;
def IsRV32E : Predicate<"Subtarget->isRV32E()">,
- AssemblerPredicate<"FeatureRV32E">;
+ AssemblerPredicate<(all_of FeatureRV32E)>;
def FeatureRelax
: SubtargetFeature<"relax", "EnableLinkerRelax", "true",
@@ -82,6 +182,9 @@ foreach i = {1-31} in
SubtargetFeature<"reserve-x"#i, "UserReservedRegister[RISCV::X"#i#"]",
"true", "Reserve X"#i>;
+def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore",
+ "true", "Enable save/restore.">;
+
//===----------------------------------------------------------------------===//
// Named operands for CSR instructions.
//===----------------------------------------------------------------------===//
@@ -104,15 +207,13 @@ include "RISCVSchedRocket64.td"
// RISC-V processors supported.
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"generic-rv32", NoSchedModel, [FeatureRVCHints]>;
+def : ProcessorModel<"generic-rv32", NoSchedModel, []>;
-def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit,
- FeatureRVCHints]>;
+def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>;
-def : ProcessorModel<"rocket-rv32", Rocket32Model, [FeatureRVCHints]>;
+def : ProcessorModel<"rocket-rv32", Rocket32Model, []>;
-def : ProcessorModel<"rocket-rv64", Rocket64Model, [Feature64Bit,
- FeatureRVCHints]>;
+def : ProcessorModel<"rocket-rv64", Rocket64Model, [Feature64Bit]>;
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index f4aa28bcc0c1..8955994b1c2e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -11,9 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "RISCV.h"
#include "MCTargetDesc/RISCVInstPrinter.h"
#include "MCTargetDesc/RISCVMCExpr.h"
+#include "MCTargetDesc/RISCVTargetStreamer.h"
+#include "RISCV.h"
#include "RISCVTargetMachine.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/ADT/Statistic.h"
@@ -37,14 +38,18 @@ STATISTIC(RISCVNumInstrsCompressed,
namespace {
class RISCVAsmPrinter : public AsmPrinter {
+ const MCSubtargetInfo *STI;
+
public:
explicit RISCVAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)) {}
+ : AsmPrinter(TM, std::move(Streamer)), STI(TM.getMCSubtargetInfo()) {}
StringRef getPassName() const override { return "RISCV Assembly Printer"; }
- void EmitInstruction(const MachineInstr *MI) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void emitInstruction(const MachineInstr *MI) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &OS) override;
@@ -59,6 +64,12 @@ public:
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
return LowerRISCVMachineOperandToMCOperand(MO, MCOp, *this);
}
+
+ void emitStartOfAsmFile(Module &M) override;
+ void emitEndOfAsmFile(Module &M) override;
+
+private:
+ void emitAttributes();
};
}
@@ -66,8 +77,7 @@ public:
#include "RISCVGenCompressInstEmitter.inc"
void RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
MCInst CInst;
- bool Res = compressInst(CInst, Inst, *TM.getMCSubtargetInfo(),
- OutStreamer->getContext());
+ bool Res = compressInst(CInst, Inst, *STI, OutStreamer->getContext());
if (Res)
++RISCVNumInstrsCompressed;
AsmPrinter::EmitToStreamer(*OutStreamer, Res ? CInst : Inst);
@@ -77,7 +87,7 @@ void RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
// instructions) auto-generated.
#include "RISCVGenMCPseudoLowering.inc"
-void RISCVAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) {
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
@@ -154,6 +164,45 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS);
}
+bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ // Set the current MCSubtargetInfo to a copy which has the correct
+ // feature bits for the current MachineFunction
+ MCSubtargetInfo &NewSTI =
+ OutStreamer->getContext().getSubtargetCopy(*TM.getMCSubtargetInfo());
+ NewSTI.setFeatureBits(MF.getSubtarget().getFeatureBits());
+ STI = &NewSTI;
+
+ SetupMachineFunction(MF);
+ emitFunctionBody();
+ return false;
+}
+
+void RISCVAsmPrinter::emitStartOfAsmFile(Module &M) {
+ if (TM.getTargetTriple().isOSBinFormatELF())
+ emitAttributes();
+}
+
+void RISCVAsmPrinter::emitEndOfAsmFile(Module &M) {
+ RISCVTargetStreamer &RTS =
+ static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
+
+ if (TM.getTargetTriple().isOSBinFormatELF())
+ RTS.finishAttributeSection();
+}
+
+void RISCVAsmPrinter::emitAttributes() {
+ RISCVTargetStreamer &RTS =
+ static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
+
+ const Triple &TT = TM.getTargetTriple();
+ StringRef CPU = TM.getTargetCPU();
+ StringRef FS = TM.getTargetFeatureString();
+ const RISCVTargetMachine &RTM = static_cast<const RISCVTargetMachine &>(TM);
+ const RISCVSubtarget STI(TT, CPU, FS, /*ABIName=*/"", RTM);
+
+ RTS.emitTargetAttributes(STI);
+}
+
// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVAsmPrinter() {
RegisterAsmPrinter<RISCVAsmPrinter> X(getTheRISCV32Target());
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
new file mode 100644
index 000000000000..26ce16486bd9
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -0,0 +1,618 @@
+//===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands atomic pseudo instructions into
+// target instructions. This pass should be run at the last possible moment,
+// avoiding the possibility for other passes to break the requirements for
+// forward progress in the LR/SC block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVTargetMachine.h"
+
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \
+ "RISCV atomic pseudo instruction expansion pass"
+
+namespace {
+
+class RISCVExpandAtomicPseudo : public MachineFunctionPass {
+public:
+ const RISCVInstrInfo *TII;
+ static char ID;
+
+ RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {
+ initializeRISCVExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return RISCV_EXPAND_ATOMIC_PSEUDO_NAME;
+ }
+
+private:
+ bool expandMBB(MachineBasicBlock &MBB);
+ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicBinOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
+ bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, bool IsMasked,
+ int Width, MachineBasicBlock::iterator &NextMBBI);
+};
+
+char RISCVExpandAtomicPseudo::ID = 0;
+
+bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= expandMBB(MBB);
+ return Modified;
+}
+
+bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= expandMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
+ // instructions for each pseudo, and must be updated when adding new pseudos
+ // or changing existing ones.
+ switch (MBBI->getOpcode()) {
+ case RISCV::PseudoAtomicLoadNand32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
+ NextMBBI);
+ case RISCV::PseudoAtomicLoadNand64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicSwap32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadAdd32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadSub32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadNand32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadUMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadUMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
+ NextMBBI);
+ case RISCV::PseudoCmpXchg32:
+ return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
+ case RISCV::PseudoCmpXchg64:
+ return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
+ case RISCV::PseudoMaskedCmpXchg32:
+ return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
+ }
+
+ return false;
+}
+
+static unsigned getLRForRMW32(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::LR_W;
+ case AtomicOrdering::Acquire:
+ return RISCV::LR_W_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::LR_W;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::LR_W_AQ;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::LR_W_AQ_RL;
+ }
+}
+
+static unsigned getSCForRMW32(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::SC_W;
+ case AtomicOrdering::Acquire:
+ return RISCV::SC_W;
+ case AtomicOrdering::Release:
+ return RISCV::SC_W_RL;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::SC_W_RL;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::SC_W_AQ_RL;
+ }
+}
+
+static unsigned getLRForRMW64(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::LR_D;
+ case AtomicOrdering::Acquire:
+ return RISCV::LR_D_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::LR_D;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::LR_D_AQ;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::LR_D_AQ_RL;
+ }
+}
+
+static unsigned getSCForRMW64(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::SC_D;
+ case AtomicOrdering::Acquire:
+ return RISCV::SC_D;
+ case AtomicOrdering::Release:
+ return RISCV::SC_D_RL;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::SC_D_RL;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::SC_D_AQ_RL;
+ }
+}
+
+static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
+ if (Width == 32)
+ return getLRForRMW32(Ordering);
+ if (Width == 64)
+ return getLRForRMW64(Ordering);
+ llvm_unreachable("Unexpected LR width\n");
+}
+
+static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
+ if (Width == 32)
+ return getSCForRMW32(Ordering);
+ if (Width == 64)
+ return getSCForRMW64(Ordering);
+ llvm_unreachable("Unexpected SC width\n");
+}
+
+static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
+ DebugLoc DL, MachineBasicBlock *ThisMBB,
+ MachineBasicBlock *LoopMBB,
+ MachineBasicBlock *DoneMBB,
+ AtomicRMWInst::BinOp BinOp, int Width) {
+ Register DestReg = MI.getOperand(0).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register IncrReg = MI.getOperand(3).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
+
+ // .loop:
+ // lr.[w|d] dest, (addr)
+ // binop scratch, dest, val
+ // sc.[w|d] scratch, scratch, (addr)
+ // bnez scratch, loop
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+ .addReg(AddrReg);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Nand:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-1);
+ break;
+ }
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(ScratchReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopMBB);
+}
+
+static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
+ MachineBasicBlock *MBB, Register DestReg,
+ Register OldValReg, Register NewValReg,
+ Register MaskReg, Register ScratchReg) {
+ assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
+ assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
+ assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
+
+ // We select bits from newval and oldval using:
+ // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
+ // r = oldval ^ ((oldval ^ newval) & masktargetdata);
+ BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
+ .addReg(OldValReg)
+ .addReg(NewValReg);
+ BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(MaskReg);
+ BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
+ .addReg(OldValReg)
+ .addReg(ScratchReg);
+}
+
+static void doMaskedAtomicBinOpExpansion(
+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
+ assert(Width == 32 && "Should never need to expand masked 64-bit operations");
+ Register DestReg = MI.getOperand(0).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register IncrReg = MI.getOperand(3).getReg();
+ Register MaskReg = MI.getOperand(4).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
+
+ // .loop:
+ // lr.w destreg, (alignedaddr)
+ // binop scratch, destreg, incr
+ // xor scratch, destreg, scratch
+ // and scratch, scratch, masktargetdata
+ // xor scratch, destreg, scratch
+ // sc.w scratch, scratch, (alignedaddr)
+ // bnez scratch, loop
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
+ .addReg(IncrReg)
+ .addImm(0);
+ break;
+ case AtomicRMWInst::Add:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Sub:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Nand:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-1);
+ break;
+ }
+
+ insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
+ ScratchReg);
+
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(ScratchReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopMBB);
+}
+
+bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ MachineFunction *MF = MBB.getParent();
+ auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopMBB);
+ MF->insert(++LoopMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopMBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopMBB);
+
+ if (!IsMasked)
+ doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
+ else
+ doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
+ Width);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
+static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
+ MachineBasicBlock *MBB, Register ValReg,
+ Register ShamtReg) {
+ BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+ BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+}
+
+bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+ assert(IsMasked == true &&
+ "Should only need to expand masked atomic max/min");
+ assert(Width == 32 && "Should never need to expand masked 64-bit operations");
+
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
+ MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopIfBodyMBB->addSuccessor(LoopTailMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ Register DestReg = MI.getOperand(0).getReg();
+ Register Scratch1Reg = MI.getOperand(1).getReg();
+ Register Scratch2Reg = MI.getOperand(2).getReg();
+ Register AddrReg = MI.getOperand(3).getReg();
+ Register IncrReg = MI.getOperand(4).getReg();
+ Register MaskReg = MI.getOperand(5).getReg();
+ bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
+
+ //
+ // .loophead:
+ // lr.w destreg, (alignedaddr)
+ // and scratch2, destreg, mask
+ // mv scratch1, destreg
+ // [sext scratch2 if signed min/max]
+ // ifnochangeneeded scratch2, incr, .looptail
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
+ .addReg(DestReg)
+ .addImm(0);
+
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Max: {
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(Scratch2Reg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::Min: {
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(IncrReg)
+ .addReg(Scratch2Reg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::UMax:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(Scratch2Reg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ case AtomicRMWInst::UMin:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(IncrReg)
+ .addReg(Scratch2Reg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+
+ // .loopifbody:
+ // xor scratch1, destreg, incr
+ // and scratch1, scratch1, mask
+ // xor scratch1, destreg, scratch1
+ insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
+ MaskReg, Scratch1Reg);
+
+ // .looptail:
+ // sc.w scratch1, scratch1, (addr)
+ // bnez scratch1, loop
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
+ .addReg(AddrReg)
+ .addReg(Scratch1Reg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(Scratch1Reg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
+bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
+ int Width, MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopHeadMBB->addSuccessor(DoneMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ Register DestReg = MI.getOperand(0).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register CmpValReg = MI.getOperand(3).getReg();
+ Register NewValReg = MI.getOperand(4).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
+
+ if (!IsMasked) {
+ // .loophead:
+ // lr.[w|d] dest, (addr)
+ // bne dest, cmpval, done
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
+ .addReg(DestReg)
+ .addReg(CmpValReg)
+ .addMBB(DoneMBB);
+ // .looptail:
+ // sc.[w|d] scratch, newval, (addr)
+ // bnez scratch, loophead
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(NewValReg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+ } else {
+ // .loophead:
+ // lr.w dest, (addr)
+ // and scratch, dest, mask
+ // bne scratch, cmpval, done
+ Register MaskReg = MI.getOperand(5).getReg();
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(CmpValReg)
+ .addMBB(DoneMBB);
+
+ // .looptail:
+ // xor scratch, dest, newval
+ // and scratch, scratch, mask
+ // xor scratch, dest, scratch
+ // sc.w scratch, scratch, (adrr)
+ // bnez scratch, loophead
+ insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
+ MaskReg, ScratchReg);
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(ScratchReg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+ }
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
+ RISCV_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
+
+namespace llvm {
+
+FunctionPass *createRISCVExpandAtomicPseudoPass() {
+ return new RISCVExpandAtomicPseudo();
+}
+
+} // end of namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 84bce0f48562..504355fb8bf8 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -43,17 +43,6 @@ private:
bool expandMBB(MachineBasicBlock &MBB);
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
- bool expandAtomicBinOp(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
- bool IsMasked, int Width,
- MachineBasicBlock::iterator &NextMBBI);
- bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- AtomicRMWInst::BinOp, bool IsMasked, int Width,
- MachineBasicBlock::iterator &NextMBBI);
- bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, bool IsMasked,
- int Width, MachineBasicBlock::iterator &NextMBBI);
bool expandAuipcInstPair(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI,
@@ -98,41 +87,10 @@ bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
+ // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
+ // instructions for each pseudo, and must be updated when adding new pseudos
+ // or changing existing ones.
switch (MBBI->getOpcode()) {
- case RISCV::PseudoAtomicLoadNand32:
- return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
- NextMBBI);
- case RISCV::PseudoAtomicLoadNand64:
- return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
- NextMBBI);
- case RISCV::PseudoMaskedAtomicSwap32:
- return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
- NextMBBI);
- case RISCV::PseudoMaskedAtomicLoadAdd32:
- return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
- case RISCV::PseudoMaskedAtomicLoadSub32:
- return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
- case RISCV::PseudoMaskedAtomicLoadNand32:
- return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
- NextMBBI);
- case RISCV::PseudoMaskedAtomicLoadMax32:
- return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
- NextMBBI);
- case RISCV::PseudoMaskedAtomicLoadMin32:
- return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
- NextMBBI);
- case RISCV::PseudoMaskedAtomicLoadUMax32:
- return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
- NextMBBI);
- case RISCV::PseudoMaskedAtomicLoadUMin32:
- return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
- NextMBBI);
- case RISCV::PseudoCmpXchg32:
- return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
- case RISCV::PseudoCmpXchg64:
- return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
- case RISCV::PseudoMaskedCmpXchg32:
- return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
case RISCV::PseudoLLA:
return expandLoadLocalAddress(MBB, MBBI, NextMBBI);
case RISCV::PseudoLA:
@@ -146,481 +104,6 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
return false;
}
-static unsigned getLRForRMW32(AtomicOrdering Ordering) {
- switch (Ordering) {
- default:
- llvm_unreachable("Unexpected AtomicOrdering");
- case AtomicOrdering::Monotonic:
- return RISCV::LR_W;
- case AtomicOrdering::Acquire:
- return RISCV::LR_W_AQ;
- case AtomicOrdering::Release:
- return RISCV::LR_W;
- case AtomicOrdering::AcquireRelease:
- return RISCV::LR_W_AQ;
- case AtomicOrdering::SequentiallyConsistent:
- return RISCV::LR_W_AQ_RL;
- }
-}
-
-static unsigned getSCForRMW32(AtomicOrdering Ordering) {
- switch (Ordering) {
- default:
- llvm_unreachable("Unexpected AtomicOrdering");
- case AtomicOrdering::Monotonic:
- return RISCV::SC_W;
- case AtomicOrdering::Acquire:
- return RISCV::SC_W;
- case AtomicOrdering::Release:
- return RISCV::SC_W_RL;
- case AtomicOrdering::AcquireRelease:
- return RISCV::SC_W_RL;
- case AtomicOrdering::SequentiallyConsistent:
- return RISCV::SC_W_AQ_RL;
- }
-}
-
-static unsigned getLRForRMW64(AtomicOrdering Ordering) {
- switch (Ordering) {
- default:
- llvm_unreachable("Unexpected AtomicOrdering");
- case AtomicOrdering::Monotonic:
- return RISCV::LR_D;
- case AtomicOrdering::Acquire:
- return RISCV::LR_D_AQ;
- case AtomicOrdering::Release:
- return RISCV::LR_D;
- case AtomicOrdering::AcquireRelease:
- return RISCV::LR_D_AQ;
- case AtomicOrdering::SequentiallyConsistent:
- return RISCV::LR_D_AQ_RL;
- }
-}
-
-static unsigned getSCForRMW64(AtomicOrdering Ordering) {
- switch (Ordering) {
- default:
- llvm_unreachable("Unexpected AtomicOrdering");
- case AtomicOrdering::Monotonic:
- return RISCV::SC_D;
- case AtomicOrdering::Acquire:
- return RISCV::SC_D;
- case AtomicOrdering::Release:
- return RISCV::SC_D_RL;
- case AtomicOrdering::AcquireRelease:
- return RISCV::SC_D_RL;
- case AtomicOrdering::SequentiallyConsistent:
- return RISCV::SC_D_AQ_RL;
- }
-}
-
-static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
- if (Width == 32)
- return getLRForRMW32(Ordering);
- if (Width == 64)
- return getLRForRMW64(Ordering);
- llvm_unreachable("Unexpected LR width\n");
-}
-
-static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
- if (Width == 32)
- return getSCForRMW32(Ordering);
- if (Width == 64)
- return getSCForRMW64(Ordering);
- llvm_unreachable("Unexpected SC width\n");
-}
-
-static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
- DebugLoc DL, MachineBasicBlock *ThisMBB,
- MachineBasicBlock *LoopMBB,
- MachineBasicBlock *DoneMBB,
- AtomicRMWInst::BinOp BinOp, int Width) {
- Register DestReg = MI.getOperand(0).getReg();
- Register ScratchReg = MI.getOperand(1).getReg();
- Register AddrReg = MI.getOperand(2).getReg();
- Register IncrReg = MI.getOperand(3).getReg();
- AtomicOrdering Ordering =
- static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
-
- // .loop:
- // lr.[w|d] dest, (addr)
- // binop scratch, dest, val
- // sc.[w|d] scratch, scratch, (addr)
- // bnez scratch, loop
- BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
- .addReg(AddrReg);
- switch (BinOp) {
- default:
- llvm_unreachable("Unexpected AtomicRMW BinOp");
- case AtomicRMWInst::Nand:
- BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
- .addReg(DestReg)
- .addReg(IncrReg);
- BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
- .addReg(ScratchReg)
- .addImm(-1);
- break;
- }
- BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
- .addReg(AddrReg)
- .addReg(ScratchReg);
- BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
- .addReg(ScratchReg)
- .addReg(RISCV::X0)
- .addMBB(LoopMBB);
-}
-
-static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
- MachineBasicBlock *MBB, Register DestReg,
- Register OldValReg, Register NewValReg,
- Register MaskReg, Register ScratchReg) {
- assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
- assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
- assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
-
- // We select bits from newval and oldval using:
- // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
- // r = oldval ^ ((oldval ^ newval) & masktargetdata);
- BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
- .addReg(OldValReg)
- .addReg(NewValReg);
- BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
- .addReg(ScratchReg)
- .addReg(MaskReg);
- BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
- .addReg(OldValReg)
- .addReg(ScratchReg);
-}
-
-static void doMaskedAtomicBinOpExpansion(
- const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
- MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
- MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
- assert(Width == 32 && "Should never need to expand masked 64-bit operations");
- Register DestReg = MI.getOperand(0).getReg();
- Register ScratchReg = MI.getOperand(1).getReg();
- Register AddrReg = MI.getOperand(2).getReg();
- Register IncrReg = MI.getOperand(3).getReg();
- Register MaskReg = MI.getOperand(4).getReg();
- AtomicOrdering Ordering =
- static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
-
- // .loop:
- // lr.w destreg, (alignedaddr)
- // binop scratch, destreg, incr
- // xor scratch, destreg, scratch
- // and scratch, scratch, masktargetdata
- // xor scratch, destreg, scratch
- // sc.w scratch, scratch, (alignedaddr)
- // bnez scratch, loop
- BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
- .addReg(AddrReg);
- switch (BinOp) {
- default:
- llvm_unreachable("Unexpected AtomicRMW BinOp");
- case AtomicRMWInst::Xchg:
- BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
- .addReg(IncrReg)
- .addImm(0);
- break;
- case AtomicRMWInst::Add:
- BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
- .addReg(DestReg)
- .addReg(IncrReg);
- break;
- case AtomicRMWInst::Sub:
- BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
- .addReg(DestReg)
- .addReg(IncrReg);
- break;
- case AtomicRMWInst::Nand:
- BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
- .addReg(DestReg)
- .addReg(IncrReg);
- BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
- .addReg(ScratchReg)
- .addImm(-1);
- break;
- }
-
- insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
- ScratchReg);
-
- BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
- .addReg(AddrReg)
- .addReg(ScratchReg);
- BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
- .addReg(ScratchReg)
- .addReg(RISCV::X0)
- .addMBB(LoopMBB);
-}
-
-bool RISCVExpandPseudo::expandAtomicBinOp(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
- MachineBasicBlock::iterator &NextMBBI) {
- MachineInstr &MI = *MBBI;
- DebugLoc DL = MI.getDebugLoc();
-
- MachineFunction *MF = MBB.getParent();
- auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
-
- // Insert new MBBs.
- MF->insert(++MBB.getIterator(), LoopMBB);
- MF->insert(++LoopMBB->getIterator(), DoneMBB);
-
- // Set up successors and transfer remaining instructions to DoneMBB.
- LoopMBB->addSuccessor(LoopMBB);
- LoopMBB->addSuccessor(DoneMBB);
- DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
- DoneMBB->transferSuccessors(&MBB);
- MBB.addSuccessor(LoopMBB);
-
- if (!IsMasked)
- doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
- else
- doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
- Width);
-
- NextMBBI = MBB.end();
- MI.eraseFromParent();
-
- LivePhysRegs LiveRegs;
- computeAndAddLiveIns(LiveRegs, *LoopMBB);
- computeAndAddLiveIns(LiveRegs, *DoneMBB);
-
- return true;
-}
-
-static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
- MachineBasicBlock *MBB, Register ValReg,
- Register ShamtReg) {
- BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
- .addReg(ValReg)
- .addReg(ShamtReg);
- BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
- .addReg(ValReg)
- .addReg(ShamtReg);
-}
-
-bool RISCVExpandPseudo::expandAtomicMinMaxOp(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
- MachineBasicBlock::iterator &NextMBBI) {
- assert(IsMasked == true &&
- "Should only need to expand masked atomic max/min");
- assert(Width == 32 && "Should never need to expand masked 64-bit operations");
-
- MachineInstr &MI = *MBBI;
- DebugLoc DL = MI.getDebugLoc();
- MachineFunction *MF = MBB.getParent();
- auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
-
- // Insert new MBBs.
- MF->insert(++MBB.getIterator(), LoopHeadMBB);
- MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
- MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
- MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
-
- // Set up successors and transfer remaining instructions to DoneMBB.
- LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
- LoopHeadMBB->addSuccessor(LoopTailMBB);
- LoopIfBodyMBB->addSuccessor(LoopTailMBB);
- LoopTailMBB->addSuccessor(LoopHeadMBB);
- LoopTailMBB->addSuccessor(DoneMBB);
- DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
- DoneMBB->transferSuccessors(&MBB);
- MBB.addSuccessor(LoopHeadMBB);
-
- Register DestReg = MI.getOperand(0).getReg();
- Register Scratch1Reg = MI.getOperand(1).getReg();
- Register Scratch2Reg = MI.getOperand(2).getReg();
- Register AddrReg = MI.getOperand(3).getReg();
- Register IncrReg = MI.getOperand(4).getReg();
- Register MaskReg = MI.getOperand(5).getReg();
- bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
- AtomicOrdering Ordering =
- static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
-
- //
- // .loophead:
- // lr.w destreg, (alignedaddr)
- // and scratch2, destreg, mask
- // mv scratch1, destreg
- // [sext scratch2 if signed min/max]
- // ifnochangeneeded scratch2, incr, .looptail
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
- .addReg(AddrReg);
- BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
- .addReg(DestReg)
- .addReg(MaskReg);
- BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
- .addReg(DestReg)
- .addImm(0);
-
- switch (BinOp) {
- default:
- llvm_unreachable("Unexpected AtomicRMW BinOp");
- case AtomicRMWInst::Max: {
- insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
- BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
- .addReg(Scratch2Reg)
- .addReg(IncrReg)
- .addMBB(LoopTailMBB);
- break;
- }
- case AtomicRMWInst::Min: {
- insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
- BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
- .addReg(IncrReg)
- .addReg(Scratch2Reg)
- .addMBB(LoopTailMBB);
- break;
- }
- case AtomicRMWInst::UMax:
- BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
- .addReg(Scratch2Reg)
- .addReg(IncrReg)
- .addMBB(LoopTailMBB);
- break;
- case AtomicRMWInst::UMin:
- BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
- .addReg(IncrReg)
- .addReg(Scratch2Reg)
- .addMBB(LoopTailMBB);
- break;
- }
-
- // .loopifbody:
- // xor scratch1, destreg, incr
- // and scratch1, scratch1, mask
- // xor scratch1, destreg, scratch1
- insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
- MaskReg, Scratch1Reg);
-
- // .looptail:
- // sc.w scratch1, scratch1, (addr)
- // bnez scratch1, loop
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
- .addReg(AddrReg)
- .addReg(Scratch1Reg);
- BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
- .addReg(Scratch1Reg)
- .addReg(RISCV::X0)
- .addMBB(LoopHeadMBB);
-
- NextMBBI = MBB.end();
- MI.eraseFromParent();
-
- LivePhysRegs LiveRegs;
- computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
- computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
- computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
- computeAndAddLiveIns(LiveRegs, *DoneMBB);
-
- return true;
-}
-
-bool RISCVExpandPseudo::expandAtomicCmpXchg(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
- int Width, MachineBasicBlock::iterator &NextMBBI) {
- MachineInstr &MI = *MBBI;
- DebugLoc DL = MI.getDebugLoc();
- MachineFunction *MF = MBB.getParent();
- auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
- auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
-
- // Insert new MBBs.
- MF->insert(++MBB.getIterator(), LoopHeadMBB);
- MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
- MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
-
- // Set up successors and transfer remaining instructions to DoneMBB.
- LoopHeadMBB->addSuccessor(LoopTailMBB);
- LoopHeadMBB->addSuccessor(DoneMBB);
- LoopTailMBB->addSuccessor(DoneMBB);
- LoopTailMBB->addSuccessor(LoopHeadMBB);
- DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
- DoneMBB->transferSuccessors(&MBB);
- MBB.addSuccessor(LoopHeadMBB);
-
- Register DestReg = MI.getOperand(0).getReg();
- Register ScratchReg = MI.getOperand(1).getReg();
- Register AddrReg = MI.getOperand(2).getReg();
- Register CmpValReg = MI.getOperand(3).getReg();
- Register NewValReg = MI.getOperand(4).getReg();
- AtomicOrdering Ordering =
- static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
-
- if (!IsMasked) {
- // .loophead:
- // lr.[w|d] dest, (addr)
- // bne dest, cmpval, done
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
- .addReg(AddrReg);
- BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
- .addReg(DestReg)
- .addReg(CmpValReg)
- .addMBB(DoneMBB);
- // .looptail:
- // sc.[w|d] scratch, newval, (addr)
- // bnez scratch, loophead
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
- .addReg(AddrReg)
- .addReg(NewValReg);
- BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
- .addReg(ScratchReg)
- .addReg(RISCV::X0)
- .addMBB(LoopHeadMBB);
- } else {
- // .loophead:
- // lr.w dest, (addr)
- // and scratch, dest, mask
- // bne scratch, cmpval, done
- Register MaskReg = MI.getOperand(5).getReg();
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
- .addReg(AddrReg);
- BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
- .addReg(DestReg)
- .addReg(MaskReg);
- BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
- .addReg(ScratchReg)
- .addReg(CmpValReg)
- .addMBB(DoneMBB);
-
- // .looptail:
- // xor scratch, dest, newval
- // and scratch, scratch, mask
- // xor scratch, dest, scratch
- // sc.w scratch, scratch, (adrr)
- // bnez scratch, loophead
- insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
- MaskReg, ScratchReg);
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
- .addReg(AddrReg)
- .addReg(ScratchReg);
- BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
- .addReg(ScratchReg)
- .addReg(RISCV::X0)
- .addMBB(LoopHeadMBB);
- }
-
- NextMBBI = MBB.end();
- MI.eraseFromParent();
-
- LivePhysRegs LiveRegs;
- computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
- computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
- computeAndAddLiveIns(LiveRegs, *DoneMBB);
-
- return true;
-}
-
bool RISCVExpandPseudo::expandAuipcInstPair(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index f7cd19cbb8e7..43adc7426c79 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -23,6 +23,100 @@
using namespace llvm;
+// Get the ID of the libcall used for spilling and restoring callee saved
+// registers. The ID is representative of the number of registers saved or
+// restored by the libcall, except it is zero-indexed - ID 0 corresponds to a
+// single register.
+static int getLibCallID(const MachineFunction &MF,
+ const std::vector<CalleeSavedInfo> &CSI) {
+ const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
+
+ if (CSI.empty() || !RVFI->useSaveRestoreLibCalls(MF))
+ return -1;
+
+ Register MaxReg = RISCV::NoRegister;
+ for (auto &CS : CSI)
+ // RISCVRegisterInfo::hasReservedSpillSlot assigns negative frame indexes to
+ // registers which can be saved by libcall.
+ if (CS.getFrameIdx() < 0)
+ MaxReg = std::max(MaxReg.id(), CS.getReg());
+
+ if (MaxReg == RISCV::NoRegister)
+ return -1;
+
+ switch (MaxReg) {
+ default:
+ llvm_unreachable("Something has gone wrong!");
+ case /*s11*/ RISCV::X27: return 12;
+ case /*s10*/ RISCV::X26: return 11;
+ case /*s9*/ RISCV::X25: return 10;
+ case /*s8*/ RISCV::X24: return 9;
+ case /*s7*/ RISCV::X23: return 8;
+ case /*s6*/ RISCV::X22: return 7;
+ case /*s5*/ RISCV::X21: return 6;
+ case /*s4*/ RISCV::X20: return 5;
+ case /*s3*/ RISCV::X19: return 4;
+ case /*s2*/ RISCV::X18: return 3;
+ case /*s1*/ RISCV::X9: return 2;
+ case /*s0*/ RISCV::X8: return 1;
+ case /*ra*/ RISCV::X1: return 0;
+ }
+}
+
+// Get the name of the libcall used for spilling callee saved registers.
+// If this function will not use save/restore libcalls, then return a nullptr.
+static const char *
+getSpillLibCallName(const MachineFunction &MF,
+ const std::vector<CalleeSavedInfo> &CSI) {
+ static const char *const SpillLibCalls[] = {
+ "__riscv_save_0",
+ "__riscv_save_1",
+ "__riscv_save_2",
+ "__riscv_save_3",
+ "__riscv_save_4",
+ "__riscv_save_5",
+ "__riscv_save_6",
+ "__riscv_save_7",
+ "__riscv_save_8",
+ "__riscv_save_9",
+ "__riscv_save_10",
+ "__riscv_save_11",
+ "__riscv_save_12"
+ };
+
+ int LibCallID = getLibCallID(MF, CSI);
+ if (LibCallID == -1)
+ return nullptr;
+ return SpillLibCalls[LibCallID];
+}
+
+// Get the name of the libcall used for restoring callee saved registers.
+// If this function will not use save/restore libcalls, then return a nullptr.
+static const char *
+getRestoreLibCallName(const MachineFunction &MF,
+ const std::vector<CalleeSavedInfo> &CSI) {
+ static const char *const RestoreLibCalls[] = {
+ "__riscv_restore_0",
+ "__riscv_restore_1",
+ "__riscv_restore_2",
+ "__riscv_restore_3",
+ "__riscv_restore_4",
+ "__riscv_restore_5",
+ "__riscv_restore_6",
+ "__riscv_restore_7",
+ "__riscv_restore_8",
+ "__riscv_restore_9",
+ "__riscv_restore_10",
+ "__riscv_restore_11",
+ "__riscv_restore_12"
+ };
+
+ int LibCallID = getLibCallID(MF, CSI);
+ if (LibCallID == -1)
+ return nullptr;
+ return RestoreLibCalls[LibCallID];
+}
+
bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -48,10 +142,10 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
uint64_t FrameSize = MFI.getStackSize();
// Get the alignment.
- unsigned StackAlign = getStackAlignment();
+ Align StackAlign = getStackAlign();
if (RI->needsStackRealignment(MF)) {
- unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment());
- FrameSize += (MaxStackAlign - StackAlign);
+ Align MaxStackAlign = std::max(StackAlign, MFI.getMaxAlign());
+ FrameSize += (MaxStackAlign.value() - StackAlign.value());
StackAlign = MaxStackAlign;
}
@@ -105,6 +199,17 @@ static Register getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; }
// Returns the register used to hold the stack pointer.
static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; }
+static SmallVector<CalleeSavedInfo, 8>
+getNonLibcallCSI(const std::vector<CalleeSavedInfo> &CSI) {
+ SmallVector<CalleeSavedInfo, 8> NonLibcallCSI;
+
+ for (auto &CS : CSI)
+ if (CS.getFrameIdx() >= 0)
+ NonLibcallCSI.push_back(CS);
+
+ return NonLibcallCSI;
+}
+
void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -117,6 +222,11 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
Register SPReg = getSPReg(STI);
Register BPReg = RISCVABI::getBPReg();
+ // Since spillCalleeSavedRegisters may have inserted a libcall, skip past
+ // any instructions marked as FrameSetup
+ while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+ ++MBBI;
+
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc DL;
@@ -124,12 +234,38 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// Determine the correct frame layout
determineFrameLayout(MF);
+ // If libcalls are used to spill and restore callee-saved registers, the frame
+ // has two sections; the opaque section managed by the libcalls, and the
+ // section managed by MachineFrameInfo which can also hold callee saved
+ // registers in fixed stack slots, both of which have negative frame indices.
+ // This gets even more complicated when incoming arguments are passed via the
+ // stack, as these too have negative frame indices. An example is detailed
+ // below:
+ //
+ // | incoming arg | <- FI[-3]
+ // | libcallspill |
+ // | calleespill | <- FI[-2]
+ // | calleespill | <- FI[-1]
+ // | this_frame | <- FI[0]
+ //
+ // For negative frame indices, the offset from the frame pointer will differ
+ // depending on which of these groups the frame index applies to.
+ // The following calculates the correct offset knowing the number of callee
+ // saved registers spilt by the two methods.
+ if (int LibCallRegs = getLibCallID(MF, MFI.getCalleeSavedInfo()) + 1) {
+ // Calculate the size of the frame managed by the libcall. The libcalls are
+ // implemented such that the stack will always be 16 byte aligned.
+ unsigned LibCallFrameSize = alignTo((STI.getXLen() / 8) * LibCallRegs, 16);
+ RVFI->setLibCallStackSize(LibCallFrameSize);
+ }
+
// FIXME (note copied from Lanai): This appears to be overallocating. Needs
// investigation. Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = MFI.getStackSize();
+ uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize();
// Early exit if there is no need to allocate on the stack
- if (StackSize == 0 && !MFI.adjustsStack())
+ if (RealStackSize == 0 && !MFI.adjustsStack())
return;
// If the stack pointer has been marked as reserved, then produce an error if
@@ -140,31 +276,42 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
// Split the SP adjustment to reduce the offsets of callee saved spill.
- if (FirstSPAdjustAmount)
+ if (FirstSPAdjustAmount) {
StackSize = FirstSPAdjustAmount;
+ RealStackSize = FirstSPAdjustAmount;
+ }
// Allocate space on the stack if necessary.
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup);
- // Emit ".cfi_def_cfa_offset StackSize"
+ // Emit ".cfi_def_cfa_offset RealStackSize"
unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
+ const auto &CSI = MFI.getCalleeSavedInfo();
+
// The frame pointer is callee-saved, and code has been generated for us to
// save it to the stack. We need to skip over the storing of callee-saved
// registers as the frame pointer must be modified after it has been saved
// to the stack, not before.
// FIXME: assumes exactly one instruction is used to save each callee-saved
// register.
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- std::advance(MBBI, CSI.size());
+ std::advance(MBBI, getNonLibcallCSI(CSI).size());
// Iterate over list of callee-saved registers and emit .cfi_offset
// directives.
for (const auto &Entry : CSI) {
- int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx());
+ int FrameIdx = Entry.getFrameIdx();
+ int64_t Offset;
+ // Offsets for objects with fixed locations (IE: those saved by libcall) are
+ // simply calculated from the frame index.
+ if (FrameIdx < 0)
+ Offset = FrameIdx * (int64_t) STI.getXLen() / 8;
+ else
+ Offset = MFI.getObjectOffset(Entry.getFrameIdx()) -
+ RVFI->getLibCallStackSize();
Register Reg = Entry.getReg();
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, RI->getDwarfRegNum(Reg, true), Offset));
@@ -179,11 +326,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MF.getFunction(), "Frame pointer required, but has been reserved."});
adjustReg(MBB, MBBI, DL, FPReg, SPReg,
- StackSize - RVFI->getVarArgsSaveSize(), MachineInstr::FrameSetup);
+ RealStackSize - RVFI->getVarArgsSaveSize(),
+ MachineInstr::FrameSetup);
- // Emit ".cfi_def_cfa $fp, -RVFI->getVarArgsSaveSize()"
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
- nullptr, RI->getDwarfRegNum(FPReg, true), -RVFI->getVarArgsSaveSize()));
+ // Emit ".cfi_def_cfa $fp, RVFI->getVarArgsSaveSize()"
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+ nullptr, RI->getDwarfRegNum(FPReg, true), RVFI->getVarArgsSaveSize()));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
@@ -201,7 +349,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
if (!hasFP(MF)) {
// Emit ".cfi_def_cfa_offset StackSize"
unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
@@ -211,15 +359,15 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// Realign Stack
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
if (RI->needsStackRealignment(MF)) {
- unsigned MaxAlignment = MFI.getMaxAlignment();
+ Align MaxAlignment = MFI.getMaxAlign();
const RISCVInstrInfo *TII = STI.getInstrInfo();
- if (isInt<12>(-(int)MaxAlignment)) {
+ if (isInt<12>(-(int)MaxAlignment.value())) {
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg)
.addReg(SPReg)
- .addImm(-(int)MaxAlignment);
+ .addImm(-(int)MaxAlignment.value());
} else {
- unsigned ShiftAmount = countTrailingZeros(MaxAlignment);
+ unsigned ShiftAmount = Log2(MaxAlignment);
Register VR =
MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR)
@@ -264,15 +412,26 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
// last instruction.
if (!MBBI->isTerminator())
MBBI = std::next(MBBI);
+
+ // If callee-saved registers are saved via libcall, place stack adjustment
+ // before this call.
+ while (MBBI != MBB.begin() &&
+ std::prev(MBBI)->getFlag(MachineInstr::FrameDestroy))
+ --MBBI;
}
+ const auto &CSI = getNonLibcallCSI(MFI.getCalleeSavedInfo());
+
// Skip to before the restores of callee-saved registers
// FIXME: assumes exactly one instruction is used to restore each
// callee-saved register.
- auto LastFrameDestroy = std::prev(MBBI, MFI.getCalleeSavedInfo().size());
+ auto LastFrameDestroy = MBBI;
+ if (!CSI.empty())
+ LastFrameDestroy = std::prev(MBBI, CSI.size());
uint64_t StackSize = MFI.getStackSize();
- uint64_t FPOffset = StackSize - RVFI->getVarArgsSaveSize();
+ uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize();
+ uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
// Restore the stack pointer using the value of the frame pointer. Only
// necessary if the stack pointer was modified, meaning the stack size is
@@ -302,7 +461,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
- unsigned &FrameReg) const {
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
@@ -310,7 +469,7 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
// Callee-saved registers should be referenced relative to the stack
// pointer (positive offset), otherwise use the frame pointer (negative
// offset).
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ const auto &CSI = getNonLibcallCSI(MFI.getCalleeSavedInfo());
int MinCSFI = 0;
int MaxCSFI = -1;
@@ -330,7 +489,7 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
if (FirstSPAdjustAmount)
Offset += FirstSPAdjustAmount;
else
- Offset += MF.getFrameInfo().getStackSize();
+ Offset += MFI.getStackSize();
} else if (RI->needsStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
// If the stack was realigned, the frame pointer is set in order to allow
// SP to be restored, so we need another base register to record the stack
@@ -339,13 +498,20 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
FrameReg = RISCVABI::getBPReg();
else
FrameReg = RISCV::X2;
- Offset += MF.getFrameInfo().getStackSize();
+ Offset += MFI.getStackSize();
+ if (FI < 0)
+ Offset += RVFI->getLibCallStackSize();
} else {
FrameReg = RI->getFrameRegister(MF);
- if (hasFP(MF))
+ if (hasFP(MF)) {
Offset += RVFI->getVarArgsSaveSize();
- else
- Offset += MF.getFrameInfo().getStackSize();
+ if (FI >= 0)
+ Offset -= RVFI->getLibCallStackSize();
+ } else {
+ Offset += MFI.getStackSize();
+ if (FI < 0)
+ Offset += RVFI->getLibCallStackSize();
+ }
}
return Offset;
}
@@ -407,8 +573,8 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
// still needs an emergency spill slot for branch relaxation. This case
// would currently be missed.
if (!isInt<11>(MFI.estimateStackSize(MF))) {
- int RegScavFI = MFI.CreateStackObject(
- RegInfo->getSpillSize(*RC), RegInfo->getSpillAlignment(*RC), false);
+ int RegScavFI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC),
+ RegInfo->getSpillAlign(*RC), false);
RS->addScavengingFrameIndex(RegScavFI);
}
}
@@ -461,16 +627,17 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
// add sp,sp,-64
uint64_t
RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
+ const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
uint64_t StackSize = MFI.getStackSize();
- uint64_t StackAlign = getStackAlignment();
- // FIXME: Disable SplitSPAdjust if save-restore libcall enabled when the patch
- // landing. The callee saved registers will be pushed by the
- // save-restore libcalls, so we don't have to split the SP adjustment
- // in this case.
- //
+ // Disable SplitSPAdjust if save-restore libcall used. The callee saved
+ // registers will be pushed by the save-restore libcalls, so we don't have to
+ // split the SP adjustment in this case.
+ if (RVFI->getLibCallStackSize())
+ return 0;
+
// Return the FirstSPAdjustAmount if the StackSize can not fit in signed
// 12-bit and there exists a callee saved register need to be pushed.
if (!isInt<12>(StackSize) && (CSI.size() > 0)) {
@@ -480,7 +647,130 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
// load/store instruction and we have to stick with the stack alignment.
// 2048 is 16-byte alignment. The stack alignment for RV32 and RV64 is 16,
// for RV32E is 4. So (2048 - StackAlign) will satisfy the stack alignment.
- return 2048 - StackAlign;
+ return 2048 - getStackAlign().value();
}
return 0;
}
+
+bool RISCVFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+ DebugLoc DL;
+ if (MI != MBB.end() && !MI->isDebugInstr())
+ DL = MI->getDebugLoc();
+
+ const char *SpillLibCall = getSpillLibCallName(*MF, CSI);
+ if (SpillLibCall) {
+ // Add spill libcall via non-callee-saved register t0.
+ BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5)
+ .addExternalSymbol(SpillLibCall, RISCVII::MO_CALL)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Add registers spilled in libcall as liveins.
+ for (auto &CS : CSI)
+ MBB.addLiveIn(CS.getReg());
+ }
+
+ // Manually spill values not spilled by libcall.
+ const auto &NonLibcallCSI = getNonLibcallCSI(CSI);
+ for (auto &CS : NonLibcallCSI) {
+ // Insert the spill to the stack frame.
+ Register Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), RC, TRI);
+ }
+
+ return true;
+}
+
+bool RISCVFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+ DebugLoc DL;
+ if (MI != MBB.end() && !MI->isDebugInstr())
+ DL = MI->getDebugLoc();
+
+ // Manually restore values not restored by libcall. Insert in reverse order.
+ // loadRegFromStackSlot can insert multiple instructions.
+ const auto &NonLibcallCSI = getNonLibcallCSI(CSI);
+ for (auto &CS : reverse(NonLibcallCSI)) {
+ Register Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
+ assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
+ }
+
+ const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI);
+ if (RestoreLibCall) {
+ // Add restore libcall via tail call.
+ MachineBasicBlock::iterator NewMI =
+ BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL))
+ .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL)
+ .setMIFlag(MachineInstr::FrameDestroy);
+
+ // Remove trailing returns, since the terminator is now a tail call to the
+ // restore function.
+ if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) {
+ NewMI->copyImplicitOps(*MF, *MI);
+ MI->eraseFromParent();
+ }
+ }
+
+ return true;
+}
+
+bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+ const MachineFunction *MF = MBB.getParent();
+ const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
+
+ if (!RVFI->useSaveRestoreLibCalls(*MF))
+ return true;
+
+ // Inserting a call to a __riscv_save libcall requires the use of the register
+ // t0 (X5) to hold the return address. Therefore if this register is already
+ // used we can't insert the call.
+
+ RegScavenger RS;
+ RS.enterBasicBlock(*TmpMBB);
+ return !RS.isRegUsed(RISCV::X5);
+}
+
+bool RISCVFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+ const MachineFunction *MF = MBB.getParent();
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+ const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
+
+ if (!RVFI->useSaveRestoreLibCalls(*MF))
+ return true;
+
+ // Using the __riscv_restore libcalls to restore CSRs requires a tail call.
+ // This means if we still need to continue executing code within this function
+ // the restore cannot take place in this basic block.
+
+ if (MBB.succ_size() > 1)
+ return false;
+
+ MachineBasicBlock *SuccMBB =
+ MBB.succ_empty() ? TmpMBB->getFallThrough() : *MBB.succ_begin();
+
+ // Doing a tail call should be safe if there are no successors, because either
+ // we have a returning block or the end of the block is unreachable, so the
+ // restore will be eliminated regardless.
+ if (!SuccMBB)
+ return true;
+
+ // The successor can only contain a return, since we would effectively be
+ // replacing the successor with our own tail return at the end of our block.
+ return SuccMBB->isReturnBlock() && SuccMBB->size() == 1;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 3a16cf93cf10..1517c847a04c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -30,7 +30,7 @@ public:
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
@@ -46,12 +46,24 @@ public:
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
// Get the first stack adjustment amount for SplitSPAdjust.
// Return 0 if we don't want to to split the SP adjustment in prologue and
// epilogue.
uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const;
+ bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
+ bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+
protected:
const RISCVSubtarget &STI;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index f66d06c20e37..a0ae05081adc 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -10,55 +10,19 @@
//
//===----------------------------------------------------------------------===//
+#include "RISCVISelDAGToDAG.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
-#include "RISCV.h"
-#include "RISCVTargetMachine.h"
#include "Utils/RISCVMatInt.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
#define DEBUG_TYPE "riscv-isel"
-// RISCV-specific code to select RISCV machine instructions for
-// SelectionDAG operations.
-namespace {
-class RISCVDAGToDAGISel final : public SelectionDAGISel {
- const RISCVSubtarget *Subtarget = nullptr;
-
-public:
- explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine)
- : SelectionDAGISel(TargetMachine) {}
-
- StringRef getPassName() const override {
- return "RISCV DAG->DAG Pattern Instruction Selection";
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- Subtarget = &MF.getSubtarget<RISCVSubtarget>();
- return SelectionDAGISel::runOnMachineFunction(MF);
- }
-
- void PostprocessISelDAG() override;
-
- void Select(SDNode *Node) override;
-
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
- std::vector<SDValue> &OutOps) override;
-
- bool SelectAddrFI(SDValue Addr, SDValue &Base);
-
-// Include the pieces autogenerated from the target description.
-#include "RISCVGenDAGISel.inc"
-
-private:
- void doPeepholeLoadStoreADDI();
-};
-}
-
void RISCVDAGToDAGISel::PostprocessISelDAG() {
doPeepholeLoadStoreADDI();
}
@@ -111,6 +75,30 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
EVT VT = Node->getValueType(0);
switch (Opcode) {
+ case ISD::ADD: {
+ // Optimize (add r, imm) to (addi (addi r, imm0) imm1) if applicable. The
+ // immediate must be in specific ranges and have a single use.
+ if (auto *ConstOp = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
+ if (!(ConstOp->hasOneUse()))
+ break;
+ // The imm must be in range [-4096,-2049] or [2048,4094].
+ int64_t Imm = ConstOp->getSExtValue();
+ if (!(-4096 <= Imm && Imm <= -2049) && !(2048 <= Imm && Imm <= 4094))
+ break;
+ // Break the imm to imm0+imm1.
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ const SDValue ImmOp0 = CurDAG->getTargetConstant(Imm - Imm / 2, DL, VT);
+ const SDValue ImmOp1 = CurDAG->getTargetConstant(Imm / 2, DL, VT);
+ auto *NodeAddi0 = CurDAG->getMachineNode(RISCV::ADDI, DL, VT,
+ Node->getOperand(0), ImmOp0);
+ auto *NodeAddi1 = CurDAG->getMachineNode(RISCV::ADDI, DL, VT,
+ SDValue(NodeAddi0, 0), ImmOp1);
+ ReplaceNode(Node, NodeAddi1);
+ return;
+ }
+ break;
+ }
case ISD::Constant: {
auto ConstNode = cast<ConstantSDNode>(Node);
if (VT == XLenVT && ConstNode->isNullValue()) {
@@ -197,8 +185,9 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
}
// Merge an ADDI into the offset of a load/store instruction where possible.
-// (load (add base, off), 0) -> (load base, off)
-// (store val, (add base, off)) -> (store val, base, off)
+// (load (addi base, off1), off2) -> (load base, off1+off2)
+// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
+// This is possible when off1+off2 fits a 12-bit immediate.
void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
@@ -239,10 +228,7 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
break;
}
- // Currently, the load/store offset must be 0 to be considered for this
- // peephole optimisation.
- if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)) ||
- N->getConstantOperandVal(OffsetOpIdx) != 0)
+ if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
continue;
SDValue Base = N->getOperand(BaseOpIdx);
@@ -252,14 +238,39 @@ void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
continue;
SDValue ImmOperand = Base.getOperand(1);
+ uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
if (auto Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
- ImmOperand = CurDAG->getTargetConstant(
- Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType());
+ int64_t Offset1 = Const->getSExtValue();
+ int64_t CombinedOffset = Offset1 + Offset2;
+ if (!isInt<12>(CombinedOffset))
+ continue;
+ ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
+ ImmOperand.getValueType());
} else if (auto GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
+ // If the off1 in (addi base, off1) is a global variable's address (its
+ // low part, really), then we can rely on the alignment of that variable
+ // to provide a margin of safety before off1 can overflow the 12 bits.
+ // Check if off2 falls within that margin; if so off1+off2 can't overflow.
+ const DataLayout &DL = CurDAG->getDataLayout();
+ Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
+ if (Offset2 != 0 && Alignment <= Offset2)
+ continue;
+ int64_t Offset1 = GA->getOffset();
+ int64_t CombinedOffset = Offset1 + Offset2;
ImmOperand = CurDAG->getTargetGlobalAddress(
GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
- GA->getOffset(), GA->getTargetFlags());
+ CombinedOffset, GA->getTargetFlags());
+ } else if (auto CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
+ // Ditto.
+ Align Alignment = CP->getAlign();
+ if (Offset2 != 0 && Alignment <= Offset2)
+ continue;
+ int64_t Offset1 = CP->getOffset();
+ int64_t CombinedOffset = Offset1 + Offset2;
+ ImmOperand = CurDAG->getTargetConstantPool(
+ CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
+ CombinedOffset, CP->getTargetFlags());
} else {
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
new file mode 100644
index 000000000000..dcf733ec3675
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -0,0 +1,56 @@
+//===---- RISCVISelDAGToDAG.h - A dag to dag inst selector for RISCV ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the RISCV target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVISELDAGTODAG_H
+#define LLVM_LIB_TARGET_RISCV_RISCVISELDAGTODAG_H
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+// RISCV-specific code to select RISCV machine instructions for
+// SelectionDAG operations.
+namespace llvm {
+class RISCVDAGToDAGISel : public SelectionDAGISel {
+ const RISCVSubtarget *Subtarget = nullptr;
+
+public:
+ explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine)
+ : SelectionDAGISel(TargetMachine) {}
+
+ StringRef getPassName() const override {
+ return "RISCV DAG->DAG Pattern Instruction Selection";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Subtarget = &MF.getSubtarget<RISCVSubtarget>();
+ return SelectionDAGISel::runOnMachineFunction(MF);
+ }
+
+ void PostprocessISelDAG() override;
+
+ void Select(SDNode *Node) override;
+
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) override;
+
+ bool SelectAddrFI(SDValue Addr, SDValue &Base);
+
+// Include the pieces autogenerated from the target description.
+#include "RISCVGenDAGISel.inc"
+
+private:
+ void doPeepholeLoadStoreADDI();
+};
+}
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a1e3e326a97a..91fc69b5bc10 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -218,6 +219,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
@@ -235,6 +237,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Effectively disable jump table generation.
setMinimumJumpTableEntries(INT_MAX);
+
+ // Jumps are expensive, compared to logic
+ setJumpIsExpensive();
+
+ // We can use any register for comparisons
+ setHasMultipleConditionRegisters();
}
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
@@ -344,6 +352,17 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
+bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const {
+ if (VT == MVT::f32 && !Subtarget.hasStdExtF())
+ return false;
+ if (VT == MVT::f64 && !Subtarget.hasStdExtD())
+ return false;
+ if (Imm.isNegZero())
+ return false;
+ return Imm.isZero();
+}
+
bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
return (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
(VT == MVT::f64 && Subtarget.hasStdExtD());
@@ -426,6 +445,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
return FPConv;
}
+ case ISD::INTRINSIC_WO_CHAIN:
+ return LowerINTRINSIC_WO_CHAIN(Op, DAG);
}
}
@@ -442,7 +463,7 @@ static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
- return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
N->getOffset(), Flags);
}
@@ -829,6 +850,20 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
return DAG.getMergeValues(Parts, DL);
}
+SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDLoc DL(Op);
+ switch (IntNo) {
+ default:
+ return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::thread_pointer: {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ return DAG.getRegister(RISCV::X4, PtrVT);
+ }
+ }
+}
+
// Returns the opcode of the target-specific SDNode that implements the 32-bit
// form of the given Opcode.
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
@@ -918,8 +953,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue RCW =
DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
- Results.push_back(RCW);
- Results.push_back(RCW.getValue(1));
+ Results.push_back(
+ DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
Results.push_back(RCW.getValue(2));
break;
}
@@ -1206,13 +1241,13 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
Register HiReg = MI.getOperand(1).getReg();
Register SrcReg = MI.getOperand(2).getReg();
const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
- int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
+ int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
RI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
- MachineMemOperand::MOLoad, 8, 8);
+ MachineMemOperand::MOLoad, 8, Align(8));
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
.addFrameIndex(FI)
.addImm(0)
@@ -1238,11 +1273,11 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
Register LoReg = MI.getOperand(1).getReg();
Register HiReg = MI.getOperand(2).getReg();
const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
- int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex();
+ int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
- MachineMemOperand::MOStore, 8, 8);
+ MachineMemOperand::MOStore, 8, Align(8));
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
.addFrameIndex(FI)
@@ -1464,14 +1499,15 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
VA1.getLocVT(), CCValAssign::Full));
} else {
// Both halves must be passed on the stack, with proper alignment.
- unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign());
+ Align StackAlign =
+ std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
State.addLoc(
CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
State.AllocateStack(XLenInBytes, StackAlign),
VA1.getLocVT(), CCValAssign::Full));
State.addLoc(CCValAssign::getMem(
- ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
- CCValAssign::Full));
+ ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
+ LocVT2, CCValAssign::Full));
return false;
}
@@ -1482,8 +1518,8 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
} else {
// The second half is passed via the stack, without additional alignment.
State.addLoc(CCValAssign::getMem(
- ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2,
- CCValAssign::Full));
+ ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
+ LocVT2, CCValAssign::Full));
}
return false;
@@ -1551,7 +1587,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
// original type is larger than 2*XLEN, so the register alignment rule does
// not apply.
unsigned TwoXLenInBytes = (2 * XLen) / 8;
- if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes &&
+ if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
// Skip 'odd' register if necessary.
@@ -1578,13 +1614,13 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
Register Reg = State.AllocateReg(ArgGPRs);
LocVT = MVT::i32;
if (!Reg) {
- unsigned StackOffset = State.AllocateStack(8, 8);
+ unsigned StackOffset = State.AllocateStack(8, Align(8));
State.addLoc(
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
return false;
}
if (!State.AllocateReg(ArgGPRs))
- State.AllocateStack(4, 4);
+ State.AllocateStack(4, Align(4));
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
@@ -1624,7 +1660,8 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s);
else
Reg = State.AllocateReg(ArgGPRs);
- unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
+ unsigned StackOffset =
+ Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
// If we reach this point and PendingLocs is non-empty, we must be at the
// end of a split argument that must be passed indirectly.
@@ -1679,7 +1716,7 @@ void RISCVTargetLowering::analyzeInputArgs(
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
- ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
+ ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << '\n');
llvm_unreachable(nullptr);
@@ -1893,13 +1930,13 @@ static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
- unsigned Offset4 = State.AllocateStack(4, 4);
+ unsigned Offset4 = State.AllocateStack(4, Align(4));
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
return false;
}
if (LocVT == MVT::i64 || LocVT == MVT::f64) {
- unsigned Offset5 = State.AllocateStack(8, 8);
+ unsigned Offset5 = State.AllocateStack(8, Align(8));
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
return false;
}
@@ -2158,7 +2195,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsTailCall)
++NumTailCalls;
- else if (CLI.CS && CLI.CS.isMustTailCall())
+ else if (CLI.CB && CLI.CB->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
@@ -2174,17 +2211,17 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue Arg = OutVals[i];
unsigned Size = Flags.getByValSize();
- unsigned Align = Flags.getByValAlign();
+ Align Alignment = Flags.getNonZeroByValAlign();
- int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false);
+ int FI =
+ MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
- Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align,
+ Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
/*IsVolatile=*/false,
- /*AlwaysInline=*/false,
- IsTailCall, MachinePointerInfo(),
- MachinePointerInfo());
+ /*AlwaysInline=*/false, IsTailCall,
+ MachinePointerInfo(), MachinePointerInfo());
ByValArgs.push_back(FIPtr);
}
@@ -2359,6 +2396,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
}
Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
Glue = Chain.getValue(1);
// Mark the end of the call, which is glued to the call itself.
@@ -2528,6 +2566,10 @@ void RISCVTargetLowering::validateCCReservedRegs(
F, "Argument register required, but has been reserved."});
}
+bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
+ return CI->isTailCall();
+}
+
const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((RISCVISD::NodeType)Opcode) {
case RISCVISD::FIRST_NUMBER:
@@ -2917,12 +2959,12 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
return Result;
}
-unsigned RISCVTargetLowering::getExceptionPointerRegister(
+Register RISCVTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
return RISCV::X10;
}
-unsigned RISCVTargetLowering::getExceptionSelectorRegister(
+Register RISCVTargetLowering::getExceptionSelectorRegister(
const Constant *PersonalityFn) const {
return RISCV::X11;
}
@@ -2937,6 +2979,26 @@ bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
return true;
}
+bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const {
+ // Check integral scalar types.
+ if (VT.isScalarInteger()) {
+ // Do not perform the transformation on riscv32 with the M extension.
+ if (!Subtarget.is64Bit() && Subtarget.hasStdExtM())
+ return false;
+ if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
+ if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t))
+ return false;
+ int64_t Imm = ConstNode->getSExtValue();
+ if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) ||
+ isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm))
+ return true;
+ }
+ }
+
+ return false;
+}
+
#define GET_REGISTER_MATCHER
#include "RISCVGenAsmMatcher.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
index b2ad75d67024..e420e879efc9 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -74,6 +74,8 @@ public:
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
bool hasBitPreservingFPLogic(EVT VT) const override;
@@ -114,6 +116,7 @@ public:
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
+ bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
return isa<LoadInst>(I) || isa<StoreInst>(I);
@@ -127,6 +130,10 @@ public:
return ISD::SIGN_EXTEND;
}
+ ISD::NodeType getExtendForAtomicCmpSwapArg() const override {
+ return ISD::SIGN_EXTEND;
+ }
+
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
if (DAG.getMachineFunction().getFunction().hasMinSize())
return false;
@@ -137,12 +144,12 @@ public:
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override;
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
bool shouldExtendTypeInLibCall(EVT Type) const override;
@@ -154,13 +161,6 @@ public:
Register getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const override;
-private:
- void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- bool IsRet) const;
- void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- bool IsRet, CallLoweringInfo *CLI) const;
// Lower incoming arguments, copy physregs into vregs
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
@@ -177,10 +177,38 @@ private:
SelectionDAG &DAG) const override;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override {
return true;
}
+ bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+ bool shouldConsiderGEPOffsetSplit() const override { return true; }
+
+ bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const override;
+
+ TargetLowering::AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder, AtomicRMWInst *AI,
+ Value *AlignedAddr, Value *Incr,
+ Value *Mask, Value *ShiftAmt,
+ AtomicOrdering Ord) const override;
+ TargetLowering::AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
+ Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilder<> &Builder,
+ AtomicCmpXchgInst *CI,
+ Value *AlignedAddr, Value *CmpVal,
+ Value *NewVal, Value *Mask,
+ AtomicOrdering Ord) const override;
+
+private:
+ void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ bool IsRet) const;
+ void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ bool IsRet, CallLoweringInfo *CLI) const;
template <class NodeTy>
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
@@ -189,7 +217,6 @@ private:
bool UseGOT) const;
SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
- bool shouldConsiderGEPOffsetSplit() const override { return true; }
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
@@ -200,24 +227,12 @@ private:
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
bool isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVector<CCValAssign, 16> &ArgLocs) const;
- TargetLowering::AtomicExpansionKind
- shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
- virtual Value *emitMaskedAtomicRMWIntrinsic(
- IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
- Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override;
- TargetLowering::AtomicExpansionKind
- shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
- virtual Value *
- emitMaskedAtomicCmpXchgIntrinsic(IRBuilder<> &Builder, AtomicCmpXchgInst *CI,
- Value *AlignedAddr, Value *CmpVal,
- Value *NewVal, Value *Mask,
- AtomicOrdering Ord) const override;
-
/// Generate error diagnostics if any register used by CC has been marked
/// reserved.
void validateCCReservedRegs(
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 3ed10cca5377..a47945a6a515 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -49,6 +49,19 @@ def InstFormatCB : InstFormat<15>;
def InstFormatCJ : InstFormat<16>;
def InstFormatOther : InstFormat<17>;
+class RISCVVConstraint<bits<4> val> {
+ bits<4> Value = val;
+}
+def NoConstraint : RISCVVConstraint<0>;
+def WidenV : RISCVVConstraint<1>;
+def WidenW : RISCVVConstraint<2>;
+def WidenCvt : RISCVVConstraint<3>;
+def Narrow : RISCVVConstraint<4>;
+def Iota : RISCVVConstraint<5>;
+def SlideUp : RISCVVConstraint<6>;
+def Vrgather : RISCVVConstraint<7>;
+def Vcompress : RISCVVConstraint<8>;
+
// The following opcode names match those given in Table 19.1 in the
// RISC-V User-level ISA specification ("RISC-V base opcode map").
class RISCVOpcode<bits<7> val> {
@@ -71,6 +84,7 @@ def OPC_MSUB : RISCVOpcode<0b1000111>;
def OPC_NMSUB : RISCVOpcode<0b1001011>;
def OPC_NMADD : RISCVOpcode<0b1001111>;
def OPC_OP_FP : RISCVOpcode<0b1010011>;
+def OPC_OP_V : RISCVOpcode<0b1010111>;
def OPC_BRANCH : RISCVOpcode<0b1100011>;
def OPC_JALR : RISCVOpcode<0b1100111>;
def OPC_JAL : RISCVOpcode<0b1101111>;
@@ -99,6 +113,10 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
let Pattern = pattern;
let TSFlags{4-0} = format.Value;
+
+ // Defaults
+ RISCVVConstraint RVVConstraint = NoConstraint;
+ let TSFlags{8-5} = RVVConstraint.Value;
}
// Pseudo instructions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
new file mode 100644
index 000000000000..e5f154966ba6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
@@ -0,0 +1,300 @@
+//===-- RISCVInstrFormatsV.td - RISCV V Instruction Formats --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V V extension instruction formats.
+//
+//===----------------------------------------------------------------------===//
+
+class RISCVVFormat<bits<3> val> {
+ bits<3> Value = val;
+}
+def OPIVV : RISCVVFormat<0b000>;
+def OPFVV : RISCVVFormat<0b001>;
+def OPMVV : RISCVVFormat<0b010>;
+def OPIVI : RISCVVFormat<0b011>;
+def OPIVX : RISCVVFormat<0b100>;
+def OPFVF : RISCVVFormat<0b101>;
+def OPMVX : RISCVVFormat<0b110>;
+
+class RISCVMOP<bits<3> val> {
+ bits<3> Value = val;
+}
+def MOPLDUnitStrideU : RISCVMOP<0b000>;
+def MOPLDStridedU : RISCVMOP<0b010>;
+def MOPLDIndexedU : RISCVMOP<0b011>;
+def MOPLDUnitStrideS : RISCVMOP<0b100>;
+def MOPLDStridedS : RISCVMOP<0b110>;
+def MOPLDIndexedS : RISCVMOP<0b111>;
+
+def MOPSTUnitStride : RISCVMOP<0b000>;
+def MOPSTStrided : RISCVMOP<0b010>;
+def MOPSTIndexedOrder: RISCVMOP<0b011>;
+def MOPSTIndexedUnOrd: RISCVMOP<0b111>;
+
+class RISCVLSUMOP<bits<5> val> {
+ bits<5> Value = val;
+}
+def LUMOPUnitStride : RISCVLSUMOP<0b00000>;
+def LUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>;
+def LUMOPUnitStrideFF: RISCVLSUMOP<0b10000>;
+def SUMOPUnitStride : RISCVLSUMOP<0b00000>;
+def SUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>;
+
+class RISCVWidth<bits<3> val> {
+ bits<3> Value = val;
+}
+def LSWidthVByte : RISCVWidth<0b000>;
+def LSWidthVHalf : RISCVWidth<0b101>;
+def LSWidthVWord : RISCVWidth<0b110>;
+def LSWidthVSEW : RISCVWidth<0b111>;
+
+class RVInstSetVLi<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<5> rs1;
+ bits<5> rd;
+ bits<11> vtypei;
+
+ let Inst{31} = 0;
+ let Inst{30-20} = vtypei;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b111;
+ let Inst{11-7} = rd;
+ let Opcode = OPC_OP_V.Value;
+
+ let Defs = [VTYPE, VL];
+}
+
+class RVInstSetVL<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31} = 1;
+ let Inst{30-25} = 0b000000;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b111;
+ let Inst{11-7} = rd;
+ let Opcode = OPC_OP_V.Value;
+
+ let Defs = [VTYPE, VL];
+}
+
+class RVInstVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> vs2;
+ bits<5> vs1;
+ bits<5> vd;
+ bit vm;
+
+ let Inst{31-26} = funct6;
+ let Inst{25} = vm;
+ let Inst{24-20} = vs2;
+ let Inst{19-15} = vs1;
+ let Inst{14-12} = opv.Value;
+ let Inst{11-7} = vd;
+ let Opcode = OPC_OP_V.Value;
+
+ let Uses = [VTYPE, VL];
+}
+
+class RVInstVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> vs2;
+ bits<5> rs1;
+ bits<5> vd;
+ bit vm;
+
+ let Inst{31-26} = funct6;
+ let Inst{25} = vm;
+ let Inst{24-20} = vs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = opv.Value;
+ let Inst{11-7} = vd;
+ let Opcode = OPC_OP_V.Value;
+
+ let Uses = [VTYPE, VL];
+}
+
+class RVInstV2<bits<6> funct6, bits<5> vs2, RISCVVFormat opv, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> rs1;
+ bits<5> vd;
+ bit vm;
+
+ let Inst{31-26} = funct6;
+ let Inst{25} = vm;
+ let Inst{24-20} = vs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = opv.Value;
+ let Inst{11-7} = vd;
+ let Opcode = OPC_OP_V.Value;
+
+ let Uses = [VTYPE, VL];
+}
+
+class RVInstIVI<bits<6> funct6, dag outs, dag ins, string opcodestr,
+ string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> vs2;
+ bits<5> imm;
+ bits<5> vd;
+ bit vm;
+
+ let Inst{31-26} = funct6;
+ let Inst{25} = vm;
+ let Inst{24-20} = vs2;
+ let Inst{19-15} = imm;
+ let Inst{14-12} = 0b011;
+ let Inst{11-7} = vd;
+ let Opcode = OPC_OP_V.Value;
+
+ let Uses = [VTYPE, VL];
+}
+
+class RVInstV<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, dag outs,
+ dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> vs2;
+ bits<5> vd;
+ bit vm;
+
+ let Inst{31-26} = funct6;
+ let Inst{25} = vm;
+ let Inst{24-20} = vs2;
+ let Inst{19-15} = vs1;
+ let Inst{14-12} = opv.Value;
+ let Inst{11-7} = vd;
+ let Opcode = OPC_OP_V.Value;
+
+ let Uses = [VTYPE, VL];
+}
+
+class RVInstVLU<bits<3> nf, RISCVMOP mop, RISCVLSUMOP lumop,
+ RISCVWidth width, dag outs, dag ins, string opcodestr,
+ string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> rs1;
+ bits<5> vd;
+ bit vm;
+
+ let Inst{31-29} = nf;
+ let Inst{28-26} = mop.Value;
+ let Inst{25} = vm;
+ let Inst{24-20} = lumop.Value;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = width.Value;
+ let Inst{11-7} = vd;
+ let Opcode = OPC_LOAD_FP.Value;
+
+ let Uses = [VTYPE, VL];
+}
+
+class RVInstVLS<bits<3> nf, RISCVMOP mop, RISCVWidth width,
+ dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> vd;
+ bit vm;
+
+ let Inst{31-29} = nf;
+ let Inst{28-26} = mop.Value;
+ let Inst{25} = vm;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = width.Value;
+ let Inst{11-7} = vd;
+ let Opcode = OPC_LOAD_FP.Value;
+
+ let Uses = [VTYPE, VL];
+}
+
+class RVInstVLX<bits<3> nf, RISCVMOP mop, RISCVWidth width,
+ dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> vs2;
+ bits<5> rs1;
+ bits<5> vd;
+ bit vm;
+
+ let Inst{31-29} = nf;
+ let Inst{28-26} = mop.Value;
+ let Inst{25} = vm;
+ let Inst{24-20} = vs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = width.Value;
+ let Inst{11-7} = vd;
+ let Opcode = OPC_LOAD_FP.Value;
+
+ let Uses = [VTYPE, VL];
+}
+
+class RVInstVSU<bits<3> nf, RISCVMOP mop, RISCVLSUMOP sumop,
+ RISCVWidth width, dag outs, dag ins, string opcodestr,
+ string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> rs1;
+ bits<5> vs3;
+ bit vm;
+
+ let Inst{31-29} = nf;
+ let Inst{28-26} = mop.Value;
+ let Inst{25} = vm;
+ let Inst{24-20} = sumop.Value;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = width.Value;
+ let Inst{11-7} = vs3;
+ let Opcode = OPC_STORE_FP.Value;
+
+ let Uses = [VTYPE, VL];
+}
+
+class RVInstVSS<bits<3> nf, RISCVMOP mop, RISCVWidth width,
+ dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> vs3;
+ bit vm;
+
+ let Inst{31-29} = nf;
+ let Inst{28-26} = mop.Value;
+ let Inst{25} = vm;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = width.Value;
+ let Inst{11-7} = vs3;
+ let Opcode = OPC_STORE_FP.Value;
+
+ let Uses = [VTYPE, VL];
+}
+
+class RVInstVSX<bits<3> nf, RISCVMOP mop, RISCVWidth width,
+ dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> vs2;
+ bits<5> rs1;
+ bits<5> vs3;
+ bit vm;
+
+ let Inst{31-29} = nf;
+ let Inst{28-26} = mop.Value;
+ let Inst{25} = vm;
+ let Inst{24-20} = vs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = width.Value;
+ let Inst{11-7} = vs3;
+ let Opcode = OPC_STORE_FP.Value;
+
+ let Uses = [VTYPE, VL];
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index f6bc52968a33..d39ec505127c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -76,10 +76,10 @@ unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
break;
}
- if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
- MI.getOperand(1).getImm() == 0) {
- FrameIndex = MI.getOperand(0).getIndex();
- return MI.getOperand(2).getReg();
+ if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
+ MI.getOperand(2).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
}
return 0;
@@ -112,7 +112,7 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned SrcReg, bool IsKill, int FI,
+ Register SrcReg, bool IsKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
@@ -139,7 +139,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DstReg, int FI,
+ Register DstReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
@@ -342,7 +342,7 @@ unsigned RISCVInstrInfo::insertBranch(
*BytesAdded = 0;
// Shouldn't be a fall through.
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert(TBB && "insertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 3 || Cond.size() == 0) &&
"RISCV branch conditions have two components!");
@@ -471,14 +471,38 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case TargetOpcode::KILL:
case TargetOpcode::DBG_VALUE:
return 0;
+ // These values are determined based on RISCVExpandAtomicPseudoInsts,
+ // RISCVExpandPseudoInsts and RISCVMCCodeEmitter, depending on where the
+ // pseudos are expanded.
case RISCV::PseudoCALLReg:
case RISCV::PseudoCALL:
+ case RISCV::PseudoJump:
case RISCV::PseudoTAIL:
case RISCV::PseudoLLA:
case RISCV::PseudoLA:
case RISCV::PseudoLA_TLS_IE:
case RISCV::PseudoLA_TLS_GD:
return 8;
+ case RISCV::PseudoAtomicLoadNand32:
+ case RISCV::PseudoAtomicLoadNand64:
+ return 20;
+ case RISCV::PseudoMaskedAtomicSwap32:
+ case RISCV::PseudoMaskedAtomicLoadAdd32:
+ case RISCV::PseudoMaskedAtomicLoadSub32:
+ return 28;
+ case RISCV::PseudoMaskedAtomicLoadNand32:
+ return 32;
+ case RISCV::PseudoMaskedAtomicLoadMax32:
+ case RISCV::PseudoMaskedAtomicLoadMin32:
+ return 44;
+ case RISCV::PseudoMaskedAtomicLoadUMax32:
+ case RISCV::PseudoMaskedAtomicLoadUMin32:
+ return 36;
+ case RISCV::PseudoCmpXchg32:
+ case RISCV::PseudoCmpXchg64:
+ return 16;
+ case RISCV::PseudoMaskedCmpXchg32:
+ return 32;
case TargetOpcode::INLINEASM:
case TargetOpcode::INLINEASM_BR: {
const MachineFunction &MF = *MI.getParent()->getParent();
@@ -777,6 +801,8 @@ void RISCVInstrInfo::buildOutlinedFrame(
}
}
+ MBB.addLiveIn(RISCV::X5);
+
// Add in a return instruction to the end of the outlined frame.
MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
.addReg(RISCV::X0, RegState::Define)
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 625b61875133..21bc508cdc9c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -38,13 +38,13 @@ public:
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ MachineBasicBlock::iterator MBBI, Register SrcReg,
bool IsKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, unsigned DstReg,
+ MachineBasicBlock::iterator MBBI, Register DstReg,
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
@@ -133,5 +133,24 @@ public:
protected:
const RISCVSubtarget &STI;
};
-}
+
+namespace RISCV {
+// Match with the definitions in RISCVInstrFormatsV.td
+enum RVVConstraintType {
+ NoConstraint = 0,
+ WidenV = 1,
+ WidenW = 2,
+ WidenCvt = 3,
+ Narrow = 4,
+ Iota = 5,
+ SlideUp = 6,
+ Vrgather = 7,
+ Vcompress = 8,
+
+ ConstraintOffset = 5,
+ ConstraintMask = 0b1111
+};
+} // end namespace RISCV
+
+} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 81f1abe8337e..b9483062ddeb 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -144,6 +144,20 @@ def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> {
let OperandNamespace = "RISCVOp";
}
+// A 12-bit signed immediate plus one where the imm range will be -2047~2048.
+def simm12_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
+ [{return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]> {
+ let ParserMatchClass = SImmAsmOperand<12>;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeSImmOperand<12>";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;
+ return MCOp.isBareSymbolRef();
+ }];
+}
+
// A 13-bit signed immediate where the least significant bit is zero.
def simm13_lsb0 : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<13, "Lsb0">;
@@ -222,6 +236,18 @@ def call_symbol : Operand<XLenVT> {
let ParserMatchClass = CallSymbol;
}
+def PseudoJumpSymbol : AsmOperandClass {
+ let Name = "PseudoJumpSymbol";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidPseudoJumpSymbol";
+ let ParserMethod = "parsePseudoJumpSymbol";
+}
+
+// A bare symbol used for pseudo jumps only.
+def pseudo_jump_symbol : Operand<XLenVT> {
+ let ParserMatchClass = PseudoJumpSymbol;
+}
+
def TPRelAddSymbol : AsmOperandClass {
let Name = "TPRelAddSymbol";
let RenderMethod = "addImmOperands";
@@ -284,6 +310,12 @@ def HI20 : SDNodeXForm<imm, [{
SDLoc(N), N->getValueType(0));
}]>;
+// Return the negation of an immediate value.
+def NegImm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-N->getSExtValue(), SDLoc(N),
+ N->getValueType(0));
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction Formats
//===----------------------------------------------------------------------===//
@@ -299,7 +331,7 @@ class BranchCC_rri<bits<3> funct3, string opcodestr>
: RVInstB<funct3, OPC_BRANCH, (outs),
(ins GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12),
opcodestr, "$rs1, $rs2, $imm12">,
- Sched<[WriteJmp]> {
+ Sched<[WriteJmp, ReadJmp, ReadJmp]> {
let isBranch = 1;
let isTerminator = 1;
}
@@ -557,6 +589,18 @@ def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs),
}
//===----------------------------------------------------------------------===//
+// Debug instructions
+//===----------------------------------------------------------------------===//
+
+let isBarrier = 1, isReturn = 1, isTerminator = 1 in {
+def DRET : Priv<"dret", 0b0111101>, Sched<[]> {
+ let rd = 0;
+ let rs1 = 0;
+ let rs2 = 0b10010;
+}
+} // isBarrier = 1, isReturn = 1, isTerminator = 1
+
+//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
//===----------------------------------------------------------------------===//
@@ -752,7 +796,7 @@ def : MnemonicAlias<"sbreak", "ebreak">;
//
// Naming convention: For 'generic' pattern classes, we use the naming
// convention PatTy1Ty2. For pattern classes which offer a more complex
-// expension, prefix the class name, e.g. BccPat.
+// expansion, prefix the class name, e.g. BccPat.
//===----------------------------------------------------------------------===//
/// Generic pattern classes
@@ -845,12 +889,12 @@ def : PatGprSimm12<setult, SLTIU>;
// handled by a RISC-V instruction.
def : Pat<(seteq GPR:$rs1, 0), (SLTIU GPR:$rs1, 1)>;
def : Pat<(seteq GPR:$rs1, GPR:$rs2), (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>;
-def : Pat<(seteq GPR:$rs1, simm12:$imm12),
- (SLTIU (XORI GPR:$rs1, simm12:$imm12), 1)>;
+def : Pat<(seteq GPR:$rs1, simm12_plus1:$imm12),
+ (SLTIU (ADDI GPR:$rs1, (NegImm simm12_plus1:$imm12)), 1)>;
def : Pat<(setne GPR:$rs1, 0), (SLTU X0, GPR:$rs1)>;
def : Pat<(setne GPR:$rs1, GPR:$rs2), (SLTU X0, (XOR GPR:$rs1, GPR:$rs2))>;
-def : Pat<(setne GPR:$rs1, simm12:$imm12),
- (SLTU X0, (XORI GPR:$rs1, simm12:$imm12))>;
+def : Pat<(setne GPR:$rs1, simm12_plus1:$imm12),
+ (SLTU X0, (ADDI GPR:$rs1, (NegImm simm12_plus1:$imm12)))>;
def : Pat<(setugt GPR:$rs1, GPR:$rs2), (SLTU GPR:$rs2, GPR:$rs1)>;
def : Pat<(setuge GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>;
def : Pat<(setule GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs2, GPR:$rs1), 1)>;
@@ -968,6 +1012,12 @@ def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)),
def : Pat<(riscv_tail (iPTR texternalsym:$dst)),
(PseudoTAIL texternalsym:$dst)>;
+let isCall = 0, isBarrier = 0, isCodeGenOnly = 0, hasSideEffects = 0,
+ mayStore = 0, mayLoad = 0 in
+def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []> {
+ let AsmString = "jump\t$target, $rd";
+}
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
@@ -1126,3 +1176,5 @@ include "RISCVInstrInfoA.td"
include "RISCVInstrInfoF.td"
include "RISCVInstrInfoD.td"
include "RISCVInstrInfoC.td"
+include "RISCVInstrInfoB.td"
+include "RISCVInstrInfoV.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index de73c8df9367..7fce37519b93 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -235,13 +235,13 @@ class PseudoMaskedAMOUMinUMax
class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
- (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;
+ (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
timm:$ordering),
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
- imm:$ordering)>;
+ timm:$ordering)>;
def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
@@ -310,7 +310,7 @@ def PseudoMaskedCmpXchg32
def : Pat<(int_riscv_masked_cmpxchg_i32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
(PseudoMaskedCmpXchg32
- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
} // Predicates = [HasStdExtA]
@@ -387,5 +387,5 @@ defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>;
def : Pat<(int_riscv_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
(PseudoMaskedCmpXchg32
- GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
} // Predicates = [HasStdExtA, IsRV64]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
new file mode 100644
index 000000000000..34a463626e29
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -0,0 +1,634 @@
+//===-- RISCVInstrInfoB.td - RISC-V 'B' instructions -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard 'B' Bitmanip
+// extension, version 0.92.
+// This version is still experimental as the 'B' extension hasn't been
+// ratified yet.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand definitions.
+//===----------------------------------------------------------------------===//
+
+def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
+ let Name = "UImmLog2XLenHalf";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidUImmLog2XLenHalf";
+}
+
+def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+ if (Subtarget->is64Bit())
+ return isUInt<5>(Imm);
+ return isUInt<4>(Imm);
+}]> {
+ let ParserMatchClass = UImmLog2XLenHalfAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<5>";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ if (STI.getTargetTriple().isArch64Bit())
+ return isUInt<5>(Imm);
+ return isUInt<4>(Imm);
+ }];
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction class templates
+//===----------------------------------------------------------------------===//
+
+// Some of these templates should be moved to RISCVInstrFormats.td once the B
+// extension has been ratified.
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVBUnary<bits<7> funct7, bits<5> funct5, bits<3> funct3,
+ RISCVOpcode opcode, string opcodestr>
+ : RVInstR<funct7, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1),
+ opcodestr, "$rd, $rs1"> {
+ let Inst{24-20} = funct5;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVBALUW_ri<bits<3> funct3, string opcodestr>
+ : RVInstI<funct3, OPC_OP_IMM_32, (outs GPR:$rd),
+ (ins GPR:$rs1, simm12:$imm12), opcodestr, "$rd, $rs1, $imm12">;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVBShift_ri<bits<5> funct5, bits<3> funct3, RISCVOpcode opcode,
+ string opcodestr>
+ : RVInstI<funct3, opcode, (outs GPR:$rd),
+ (ins GPR:$rs1, uimmlog2xlen:$shamt), opcodestr,
+ "$rd, $rs1, $shamt"> {
+ bits<6> shamt;
+
+ let Inst{31-27} = funct5;
+ // NOTE: the bit op(26)=1 is used to select funnel shifts. All other
+ // shifts operations and operations that live in the encoding space
+ // of the shifts (single bit operations, grev, gorc) use op(26) = 0
+ let Inst{26} = 0;
+ let Inst{25-20} = shamt;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVBShiftW_ri<bits<7> funct7, bits<3> funct3, RISCVOpcode opcode,
+ string opcodestr>
+ : RVInstI<funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1, uimm5:$shamt),
+ opcodestr, "$rd, $rs1, $shamt"> {
+ bits<5> shamt;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = shamt;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVBShfl_ri<bits<6> funct6, bits<3> funct3, RISCVOpcode opcode,
+ string opcodestr>
+ : RVInstI<funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1, shfl_uimm:$shamt),
+ opcodestr, "$rd, $rs1, $shamt"> {
+ bits<6> shamt;
+
+ let Inst{31-26} = funct6;
+ let Inst{25-20} = shamt;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVBTernaryR<bits<2> funct2, bits<3> funct3_b, RISCVOpcode opcode,
+ string opcodestr, string argstr>
+ : RVInstR4<funct2, opcode, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, GPR:$rs3), opcodestr, argstr> {
+ let Inst{14-12} = funct3_b;
+}
+
+// Currently used by FSRI only
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVBTernaryImm6<bits<3> funct3_b, RISCVOpcode opcode,
+ string opcodestr, string argstr>
+ : RVInstR4<0b10, opcode, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs3, uimmlog2xlen:$shamt),
+ opcodestr, argstr> {
+ bits<6> shamt;
+
+ // NOTE: the first argument of RVInstR4 is hardcoded to 0b10 like the other
+ // funnel shift instructions. The second bit of the argument though is
+ // overwritten by the shamt as the encoding of this particular instruction
+ // requires. This is to obtain op(26) = 1 as required by funnel shift
+ // instructions without the need of a confusing argument in the definition
+ // of the instruction.
+ let Inst{25-20} = shamt;
+ let Inst{14-12} = funct3_b;
+}
+
+// Currently used by FSRIW only
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVBTernaryImm5<bits<2> funct2, bits<3> funct3_b, RISCVOpcode opcode,
+ string opcodestr, string argstr>
+ : RVInstR4<funct2, opcode, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs3, uimm5:$shamt), opcodestr, argstr> {
+ bits<5> shamt;
+
+ let Inst{24-20} = shamt;
+ let Inst{14-12} = funct3_b;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZbbOrZbp] in {
+def ANDN : ALU_rr<0b0100000, 0b111, "andn">, Sched<[]>;
+def ORN : ALU_rr<0b0100000, 0b110, "orn">, Sched<[]>;
+def XNOR : ALU_rr<0b0100000, 0b100, "xnor">, Sched<[]>;
+} // Predicates = [HasStdExtZbbOrZbp]
+
+let Predicates = [HasStdExtZbb] in {
+def SLO : ALU_rr<0b0010000, 0b001, "slo">, Sched<[]>;
+def SRO : ALU_rr<0b0010000, 0b101, "sro">, Sched<[]>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbbOrZbp] in {
+def ROL : ALU_rr<0b0110000, 0b001, "rol">, Sched<[]>;
+def ROR : ALU_rr<0b0110000, 0b101, "ror">, Sched<[]>;
+} // Predicates = [HasStdExtZbbOrZbp]
+
+let Predicates = [HasStdExtZbs] in {
+def SBCLR : ALU_rr<0b0100100, 0b001, "sbclr">, Sched<[]>;
+def SBSET : ALU_rr<0b0010100, 0b001, "sbset">, Sched<[]>;
+def SBINV : ALU_rr<0b0110100, 0b001, "sbinv">, Sched<[]>;
+def SBEXT : ALU_rr<0b0100100, 0b101, "sbext">, Sched<[]>;
+} // Predicates = [HasStdExtZbs]
+
+let Predicates = [HasStdExtZbp] in {
+def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>;
+def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>;
+} // Predicates = [HasStdExtZbp]
+
+let Predicates = [HasStdExtZbb] in {
+def SLOI : RVBShift_ri<0b00100, 0b001, OPC_OP_IMM, "sloi">, Sched<[]>;
+def SROI : RVBShift_ri<0b00100, 0b101, OPC_OP_IMM, "sroi">, Sched<[]>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbbOrZbp] in
+def RORI : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">, Sched<[]>;
+
+let Predicates = [HasStdExtZbs] in {
+def SBCLRI : RVBShift_ri<0b01001, 0b001, OPC_OP_IMM, "sbclri">, Sched<[]>;
+def SBSETI : RVBShift_ri<0b00101, 0b001, OPC_OP_IMM, "sbseti">, Sched<[]>;
+def SBINVI : RVBShift_ri<0b01101, 0b001, OPC_OP_IMM, "sbinvi">, Sched<[]>;
+def SBEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "sbexti">, Sched<[]>;
+} // Predicates = [HasStdExtZbs]
+
+let Predicates = [HasStdExtZbp] in {
+def GREVI : RVBShift_ri<0b01101, 0b101, OPC_OP_IMM, "grevi">, Sched<[]>;
+def GORCI : RVBShift_ri<0b00101, 0b101, OPC_OP_IMM, "gorci">, Sched<[]>;
+} // Predicates = [HasStdExtZbp]
+
+let Predicates = [HasStdExtZbt] in {
+def CMIX : RVBTernaryR<0b11, 0b001, OPC_OP, "cmix", "$rd, $rs2, $rs1, $rs3">,
+ Sched<[]>;
+def CMOV : RVBTernaryR<0b11, 0b101, OPC_OP, "cmov", "$rd, $rs2, $rs1, $rs3">,
+ Sched<[]>;
+def FSL : RVBTernaryR<0b10, 0b001, OPC_OP, "fsl", "$rd, $rs1, $rs3, $rs2">,
+ Sched<[]>;
+def FSR : RVBTernaryR<0b10, 0b101, OPC_OP, "fsr", "$rd, $rs1, $rs3, $rs2">,
+ Sched<[]>;
+def FSRI : RVBTernaryImm6<0b101, OPC_OP_IMM, "fsri",
+ "$rd, $rs1, $rs3, $shamt">, Sched<[]>;
+} // Predicates = [HasStdExtZbt]
+
+let Predicates = [HasStdExtZbb] in {
+def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, RISCVOpcode<0b0010011>, "clz">,
+ Sched<[]>;
+def CTZ : RVBUnary<0b0110000, 0b00001, 0b001, RISCVOpcode<0b0010011>, "ctz">,
+ Sched<[]>;
+def PCNT : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0010011>, "pcnt">,
+ Sched<[]>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbm, IsRV64] in
+def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, RISCVOpcode<0b0010011>,
+ "bmatflip">, Sched<[]>;
+
+let Predicates = [HasStdExtZbb] in {
+def SEXTB : RVBUnary<0b0110000, 0b00100, 0b001, RISCVOpcode<0b0010011>,
+ "sext.b">, Sched<[]>;
+def SEXTH : RVBUnary<0b0110000, 0b00101, 0b001, RISCVOpcode<0b0010011>,
+ "sext.h">, Sched<[]>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbr] in {
+def CRC32B : RVBUnary<0b0110000, 0b10000, 0b001, RISCVOpcode<0b0010011>,
+ "crc32.b">, Sched<[]>;
+def CRC32H : RVBUnary<0b0110000, 0b10001, 0b001, RISCVOpcode<0b0010011>,
+ "crc32.h">, Sched<[]>;
+def CRC32W : RVBUnary<0b0110000, 0b10010, 0b001, RISCVOpcode<0b0010011>,
+ "crc32.w">, Sched<[]>;
+} // Predicates = [HasStdExtZbr]
+
+let Predicates = [HasStdExtZbr, IsRV64] in
+def CRC32D : RVBUnary<0b0110000, 0b10011, 0b001, RISCVOpcode<0b0010011>,
+ "crc32.d">, Sched<[]>;
+
+let Predicates = [HasStdExtZbr] in {
+def CRC32CB : RVBUnary<0b0110000, 0b11000, 0b001, RISCVOpcode<0b0010011>,
+ "crc32c.b">, Sched<[]>;
+def CRC32CH : RVBUnary<0b0110000, 0b11001, 0b001, RISCVOpcode<0b0010011>,
+ "crc32c.h">, Sched<[]>;
+def CRC32CW : RVBUnary<0b0110000, 0b11010, 0b001, RISCVOpcode<0b0010011>,
+ "crc32c.w">, Sched<[]>;
+} // Predicates = [HasStdExtZbr]
+
+let Predicates = [HasStdExtZbr, IsRV64] in
+def CRC32CD : RVBUnary<0b0110000, 0b11011, 0b001, RISCVOpcode<0b0010011>,
+ "crc32c.d">, Sched<[]>;
+
+let Predicates = [HasStdExtZbc] in {
+def CLMUL : ALU_rr<0b0000101, 0b001, "clmul">, Sched<[]>;
+def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr">, Sched<[]>;
+def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh">, Sched<[]>;
+} // Predicates = [HasStdExtZbc]
+
+let Predicates = [HasStdExtZbb] in {
+def MIN : ALU_rr<0b0000101, 0b100, "min">, Sched<[]>;
+def MAX : ALU_rr<0b0000101, 0b101, "max">, Sched<[]>;
+def MINU : ALU_rr<0b0000101, 0b110, "minu">, Sched<[]>;
+def MAXU : ALU_rr<0b0000101, 0b111, "maxu">, Sched<[]>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbp] in {
+def SHFL : ALU_rr<0b0000100, 0b001, "shfl">, Sched<[]>;
+def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, Sched<[]>;
+} // Predicates = [HasStdExtZbp]
+
+let Predicates = [HasStdExtZbe] in {
+def BDEP : ALU_rr<0b0100100, 0b110, "bdep">, Sched<[]>;
+def BEXT : ALU_rr<0b0000100, 0b110, "bext">, Sched<[]>;
+} // Predicates = [HasStdExtZbe]
+
+let Predicates = [HasStdExtZbbOrZbp] in {
+def PACK : ALU_rr<0b0000100, 0b100, "pack">, Sched<[]>;
+def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>;
+} // Predicates = [HasStdExtZbbOrZbp]
+
+let Predicates = [HasStdExtZbm, IsRV64] in {
+def BMATOR : ALU_rr<0b0000100, 0b011, "bmator">, Sched<[]>;
+def BMATXOR : ALU_rr<0b0100100, 0b011, "bmatxor">, Sched<[]>;
+} // Predicates = [HasStdExtZbm, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp] in
+def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>;
+
+let Predicates = [HasStdExtZbf] in
+def BFP : ALU_rr<0b0100100, 0b111, "bfp">, Sched<[]>;
+
+let Predicates = [HasStdExtZbp] in {
+def SHFLI : RVBShfl_ri<0b000010, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>;
+def UNSHFLI : RVBShfl_ri<0b000010, 0b101, OPC_OP_IMM, "unshfli">, Sched<[]>;
+} // Predicates = [HasStdExtZbp]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def ADDIWU : RVBALUW_ri<0b100, "addiwu">, Sched<[]>;
+def SLLIUW : RVBShift_ri<0b00001, 0b001, OPC_OP_IMM_32, "slliu.w">, Sched<[]>;
+def ADDWU : ALUW_rr<0b0000101, 0b000, "addwu">, Sched<[]>;
+def SUBWU : ALUW_rr<0b0100101, 0b000, "subwu">, Sched<[]>;
+def ADDUW : ALUW_rr<0b0000100, 0b000, "addu.w">, Sched<[]>;
+def SUBUW : ALUW_rr<0b0100100, 0b000, "subu.w">, Sched<[]>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def SLOW : ALUW_rr<0b0010000, 0b001, "slow">, Sched<[]>;
+def SROW : ALUW_rr<0b0010000, 0b101, "srow">, Sched<[]>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+def ROLW : ALUW_rr<0b0110000, 0b001, "rolw">, Sched<[]>;
+def RORW : ALUW_rr<0b0110000, 0b101, "rorw">, Sched<[]>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+
+let Predicates = [HasStdExtZbs, IsRV64] in {
+def SBCLRW : ALUW_rr<0b0100100, 0b001, "sbclrw">, Sched<[]>;
+def SBSETW : ALUW_rr<0b0010100, 0b001, "sbsetw">, Sched<[]>;
+def SBINVW : ALUW_rr<0b0110100, 0b001, "sbinvw">, Sched<[]>;
+def SBEXTW : ALUW_rr<0b0100100, 0b101, "sbextw">, Sched<[]>;
+} // Predicates = [HasStdExtZbs, IsRV64]
+
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>;
+def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def SLOIW : RVBShiftW_ri<0b0010000, 0b001, OPC_OP_IMM_32, "sloiw">, Sched<[]>;
+def SROIW : RVBShiftW_ri<0b0010000, 0b101, OPC_OP_IMM_32, "sroiw">, Sched<[]>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">, Sched<[]>;
+
+let Predicates = [HasStdExtZbs, IsRV64] in {
+def SBCLRIW : RVBShiftW_ri<0b0100100, 0b001, OPC_OP_IMM_32, "sbclriw">,
+ Sched<[]>;
+def SBSETIW : RVBShiftW_ri<0b0010100, 0b001, OPC_OP_IMM_32, "sbsetiw">,
+ Sched<[]>;
+def SBINVIW : RVBShiftW_ri<0b0110100, 0b001, OPC_OP_IMM_32, "sbinviw">,
+ Sched<[]>;
+} // Predicates = [HasStdExtZbs, IsRV64]
+
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, Sched<[]>;
+def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, Sched<[]>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
+let Predicates = [HasStdExtZbt, IsRV64] in {
+def FSLW : RVBTernaryR<0b10, 0b001, OPC_OP_32,
+ "fslw", "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
+def FSRW : RVBTernaryR<0b10, 0b101, OPC_OP_32, "fsrw",
+ "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
+def FSRIW : RVBTernaryImm5<0b10, 0b101, OPC_OP_IMM_32,
+ "fsriw", "$rd, $rs1, $rs3, $shamt">, Sched<[]>;
+} // Predicates = [HasStdExtZbt, IsRV64]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, RISCVOpcode<0b0011011>,
+ "clzw">, Sched<[]>;
+def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, RISCVOpcode<0b0011011>,
+ "ctzw">, Sched<[]>;
+def PCNTW : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0011011>,
+ "pcntw">, Sched<[]>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbc, IsRV64] in {
+def CLMULW : ALUW_rr<0b0000101, 0b001, "clmulw">, Sched<[]>;
+def CLMULRW : ALUW_rr<0b0000101, 0b010, "clmulrw">, Sched<[]>;
+def CLMULHW : ALUW_rr<0b0000101, 0b011, "clmulhw">, Sched<[]>;
+} // Predicates = [HasStdExtZbc, IsRV64]
+
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def SHFLW : ALUW_rr<0b0000100, 0b001, "shflw">, Sched<[]>;
+def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, Sched<[]>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
+let Predicates = [HasStdExtZbe, IsRV64] in {
+def BDEPW : ALUW_rr<0b0100100, 0b110, "bdepw">, Sched<[]>;
+def BEXTW : ALUW_rr<0b0000100, 0b110, "bextw">, Sched<[]>;
+} // Predicates = [HasStdExtZbe, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>;
+def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+
+let Predicates = [HasStdExtZbf, IsRV64] in
+def BFPW : ALUW_rr<0b0100100, 0b111, "bfpw">, Sched<[]>;
+
+//===----------------------------------------------------------------------===//
+// Future compressed instructions
+//===----------------------------------------------------------------------===//
+
+// The presence of these instructions in the B extension is purely experimental
+// and they should be moved to the C extension as soon as they are ratified.
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVBInstC<bits<2> funct2, string opcodestr>
+ : RVInst16<(outs GPRC:$rs_wb), (ins GPRC:$rs), opcodestr, "$rs", [],
+ InstFormatCR> {
+ bits<3> rs;
+ let Constraints = "$rs = $rs_wb";
+
+ let Inst{15-12} = 0b0110;
+ let Inst{11-10} = funct2;
+ let Inst{9-7} = rs;
+ let Inst{6-0} = 0b0000001;
+}
+
+// The namespace RVBC exists to avoid encoding conflicts with the compressed
+// instructions c.addi16sp and c.lui already implemented in the C extension.
+
+let DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtC] in {
+def C_NOT : RVBInstC<0b00, "c.not">, Sched<[]>;
+def C_NEG : RVBInstC<0b01, "c.neg">, Sched<[]>;
+} // DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtC]
+
+let DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtZbbOrZbp, HasStdExtC, IsRV64] in
+def C_ZEXTW : RVBInstC<0b10, "c.zext.w">, Sched<[]>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZbb, IsRV32] in {
+def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF)>;
+def : InstAlias<"zext.h $rd, $rs", (PACK GPR:$rd, GPR:$rs, X0)>;
+} // Predicates = [HasStdExtZbb, IsRV32]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF)>;
+def : InstAlias<"zext.h $rd, $rs", (PACKW GPR:$rd, GPR:$rs, X0)>;
+def : InstAlias<"zext.w $rd, $rs", (PACK GPR:$rd, GPR:$rs, X0)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp] in {
+def : InstAlias<"rev.p $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00001)>,
+ Sched<[]>;
+def : InstAlias<"rev2.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00010)>,
+ Sched<[]>;
+def : InstAlias<"rev.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00011)>,
+ Sched<[]>;
+def : InstAlias<"rev4.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00100)>,
+ Sched<[]>;
+def : InstAlias<"rev2.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00110)>,
+ Sched<[]>;
+def : InstAlias<"rev.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00111)>,
+ Sched<[]>;
+def : InstAlias<"rev8.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01000)>,
+ Sched<[]>;
+def : InstAlias<"rev4.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01100)>,
+ Sched<[]>;
+def : InstAlias<"rev2.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01110)>,
+ Sched<[]>;
+def : InstAlias<"rev.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01111)>,
+ Sched<[]>;
+
+def : InstAlias<"zip.n $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0001)>,
+ Sched<[]>;
+def : InstAlias<"unzip.n $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0001)>,
+ Sched<[]>;
+def : InstAlias<"zip2.b $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0010)>,
+ Sched<[]>;
+def : InstAlias<"unzip2.b $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0010)>,
+ Sched<[]>;
+def : InstAlias<"zip.b $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0011)>,
+ Sched<[]>;
+def : InstAlias<"unzip.b $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0011)>,
+ Sched<[]>;
+def : InstAlias<"zip4.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0100)>,
+ Sched<[]>;
+def : InstAlias<"unzip4.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0100)>,
+ Sched<[]>;
+def : InstAlias<"zip2.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0110)>,
+ Sched<[]>;
+def : InstAlias<"unzip2.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0110)>,
+ Sched<[]>;
+def : InstAlias<"zip.h $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0111)>,
+ Sched<[]>;
+def : InstAlias<"unzip.h $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0111)>,
+ Sched<[]>;
+
+def : InstAlias<"orc.p $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00001)>,
+ Sched<[]>;
+def : InstAlias<"orc2.n $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00010)>,
+ Sched<[]>;
+def : InstAlias<"orc.n $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00011)>,
+ Sched<[]>;
+def : InstAlias<"orc4.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00100)>,
+ Sched<[]>;
+def : InstAlias<"orc2.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00110)>,
+ Sched<[]>;
+def : InstAlias<"orc.b $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b00111)>,
+ Sched<[]>;
+def : InstAlias<"orc8.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01000)>,
+ Sched<[]>;
+def : InstAlias<"orc4.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01100)>,
+ Sched<[]>;
+def : InstAlias<"orc2.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01110)>,
+ Sched<[]>;
+def : InstAlias<"orc.h $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b01111)>,
+ Sched<[]>;
+} // Predicates = [HasStdExtZbbOrZbp]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV32] in {
+def : InstAlias<"rev16 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b10000)>, Sched<[]>;
+def : InstAlias<"rev8 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11000)>, Sched<[]>;
+def : InstAlias<"rev4 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11100)>, Sched<[]>;
+def : InstAlias<"rev2 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11110)>, Sched<[]>;
+def : InstAlias<"rev $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b11111)>, Sched<[]>;
+
+def : InstAlias<"zip8 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1000)>,
+ Sched<[]>;
+def : InstAlias<"unzip8 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1000)>,
+ Sched<[]>;
+def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1100)>,
+ Sched<[]>;
+def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1100)>,
+ Sched<[]>;
+def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1110)>,
+ Sched<[]>;
+def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1110)>,
+ Sched<[]>;
+def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1111)>,
+ Sched<[]>;
+def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1111)>,
+ Sched<[]>;
+
+def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b10000)>, Sched<[]>;
+def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11000)>, Sched<[]>;
+def : InstAlias<"orc4 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11100)>, Sched<[]>;
+def : InstAlias<"orc2 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11110)>, Sched<[]>;
+def : InstAlias<"orc $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11111)>, Sched<[]>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV32]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+def : InstAlias<"rev16.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b010000)>,
+ Sched<[]>;
+def : InstAlias<"rev8.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011000)>,
+ Sched<[]>;
+def : InstAlias<"rev4.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011100)>,
+ Sched<[]>;
+def : InstAlias<"rev2.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011110)>,
+ Sched<[]>;
+def : InstAlias<"rev.w $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b011111)>,
+ Sched<[]>;
+def : InstAlias<"rev32 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b100000)>,
+ Sched<[]>;
+def : InstAlias<"rev16 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b110000)>,
+ Sched<[]>;
+def : InstAlias<"rev8 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111000)>,
+ Sched<[]>;
+def : InstAlias<"rev4 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111100)>,
+ Sched<[]>;
+def : InstAlias<"rev2 $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111110)>,
+ Sched<[]>;
+def : InstAlias<"rev $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b111111)>,
+ Sched<[]>;
+
+def : InstAlias<"zip8.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01000)>,
+ Sched<[]>;
+def : InstAlias<"unzip8.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01000)>,
+ Sched<[]>;
+def : InstAlias<"zip4.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01100)>,
+ Sched<[]>;
+def : InstAlias<"unzip4.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01100)>,
+ Sched<[]>;
+def : InstAlias<"zip2.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01110)>,
+ Sched<[]>;
+def : InstAlias<"unzip2.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01110)>,
+ Sched<[]>;
+def : InstAlias<"zip.w $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b01111)>,
+ Sched<[]>;
+def : InstAlias<"unzip.w $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b01111)>,
+ Sched<[]>;
+def : InstAlias<"zip16 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b10000)>,
+ Sched<[]>;
+def : InstAlias<"unzip16 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b10000)>,
+ Sched<[]>;
+def : InstAlias<"zip8 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11000)>,
+ Sched<[]>;
+def : InstAlias<"unzip8 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11000)>,
+ Sched<[]>;
+def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11100)>,
+ Sched<[]>;
+def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11100)>,
+ Sched<[]>;
+def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11110)>,
+ Sched<[]>;
+def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11110)>,
+ Sched<[]>;
+def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b11111)>,
+ Sched<[]>;
+def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b11111)>,
+ Sched<[]>;
+
+def : InstAlias<"orc16.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b010000)>,
+ Sched<[]>;
+def : InstAlias<"orc8.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011000)>,
+ Sched<[]>;
+def : InstAlias<"orc4.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011100)>,
+ Sched<[]>;
+def : InstAlias<"orc2.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011110)>,
+ Sched<[]>;
+def : InstAlias<"orc.w $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b011111)>,
+ Sched<[]>;
+def : InstAlias<"orc32 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b100000)>,
+ Sched<[]>;
+def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b110000)>,
+ Sched<[]>;
+def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111000)>,
+ Sched<[]>;
+def : InstAlias<"orc4 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111100)>,
+ Sched<[]>;
+def : InstAlias<"orc2 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111110)>,
+ Sched<[]>;
+def : InstAlias<"orc $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b111111)>,
+ Sched<[]>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+
+//===----------------------------------------------------------------------===//
+// Compressed Instruction patterns
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtZbproposedc, HasStdExtC] in {
+def : CompressPat<(XORI GPRC:$rs1, GPRC:$rs1, -1),
+ (C_NOT GPRC:$rs1)>;
+def : CompressPat<(SUB GPRC:$rs1, X0, GPRC:$rs1),
+ (C_NEG GPRC:$rs1)>;
+} // Predicates = [HasStdExtZbproposedc, HasStdExtC]
+
+let Predicates = [HasStdExtZbproposedc, HasStdExtZbbOrZbp, HasStdExtC, IsRV64] in {
+def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0),
+ (C_ZEXTW GPRC:$rs1)>;
+} // Predicates = [HasStdExtZbproposedc, HasStdExtC, IsRV64]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 4a036eb52bb8..6c36f53cd563 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -42,15 +42,13 @@ class FPFMADDynFrmAlias<FPFMAD_rrr_frm Inst, string OpcodeStr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUD_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
: RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR64:$rd),
- (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">,
- Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
+ (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUD_rr_frm<bits<7> funct7, string opcodestr>
: RVInstRFrm<funct7, OPC_OP_FP, (outs FPR64:$rd),
(ins FPR64:$rs1, FPR64:$rs2, frmarg:$funct3), opcodestr,
- "$rd, $rs1, $rs2, $funct3">,
- Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
+ "$rd, $rs1, $rs2, $funct3">;
class FPALUDDynFrmAlias<FPALUD_rr_frm Inst, string OpcodeStr>
: InstAlias<OpcodeStr#" $rd, $rs1, $rs2",
@@ -72,7 +70,7 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd),
(ins GPR:$rs1, simm12:$imm12),
"fld", "$rd, ${imm12}(${rs1})">,
- Sched<[WriteFLD64, ReadMemBase]>;
+ Sched<[WriteFLD64, ReadFMemBase]>;
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
@@ -81,7 +79,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
def FSD : RVInstS<0b011, OPC_STORE_FP, (outs),
(ins FPR64:$rs2, GPR:$rs1, simm12:$imm12),
"fsd", "$rs2, ${imm12}(${rs1})">,
- Sched<[WriteFST64, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteFST64, ReadStoreData, ReadFMemBase]>;
def FMADD_D : FPFMAD_rrr_frm<OPC_MADD, "fmadd.d">,
Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>;
@@ -96,26 +94,35 @@ def FNMADD_D : FPFMAD_rrr_frm<OPC_NMADD, "fnmadd.d">,
Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>;
def : FPFMADDynFrmAlias<FNMADD_D, "fnmadd.d">;
-def FADD_D : FPALUD_rr_frm<0b0000001, "fadd.d">;
+def FADD_D : FPALUD_rr_frm<0b0000001, "fadd.d">,
+ Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
def : FPALUDDynFrmAlias<FADD_D, "fadd.d">;
-def FSUB_D : FPALUD_rr_frm<0b0000101, "fsub.d">;
+def FSUB_D : FPALUD_rr_frm<0b0000101, "fsub.d">,
+ Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
def : FPALUDDynFrmAlias<FSUB_D, "fsub.d">;
-def FMUL_D : FPALUD_rr_frm<0b0001001, "fmul.d">;
+def FMUL_D : FPALUD_rr_frm<0b0001001, "fmul.d">,
+ Sched<[WriteFMul64, ReadFMul64, ReadFMul64]>;
def : FPALUDDynFrmAlias<FMUL_D, "fmul.d">;
-def FDIV_D : FPALUD_rr_frm<0b0001101, "fdiv.d">;
+def FDIV_D : FPALUD_rr_frm<0b0001101, "fdiv.d">,
+ Sched<[WriteFDiv64, ReadFDiv64, ReadFDiv64]>;
def : FPALUDDynFrmAlias<FDIV_D, "fdiv.d">;
def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d">,
- Sched<[WriteFSqrt32, ReadFSqrt32]> {
+ Sched<[WriteFSqrt64, ReadFSqrt64]> {
let rs2 = 0b00000;
}
def : FPUnaryOpDynFrmAlias<FSQRT_D, "fsqrt.d", FPR64, FPR64>;
-def FSGNJ_D : FPALUD_rr<0b0010001, 0b000, "fsgnj.d">;
-def FSGNJN_D : FPALUD_rr<0b0010001, 0b001, "fsgnjn.d">;
-def FSGNJX_D : FPALUD_rr<0b0010001, 0b010, "fsgnjx.d">;
-def FMIN_D : FPALUD_rr<0b0010101, 0b000, "fmin.d">;
-def FMAX_D : FPALUD_rr<0b0010101, 0b001, "fmax.d">;
+def FSGNJ_D : FPALUD_rr<0b0010001, 0b000, "fsgnj.d">,
+ Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>;
+def FSGNJN_D : FPALUD_rr<0b0010001, 0b001, "fsgnjn.d">,
+ Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>;
+def FSGNJX_D : FPALUD_rr<0b0010001, 0b010, "fsgnjx.d">,
+ Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>;
+def FMIN_D : FPALUD_rr<0b0010101, 0b000, "fmin.d">,
+ Sched<[WriteFMinMax64, ReadFMinMax64, ReadFMinMax64]>;
+def FMAX_D : FPALUD_rr<0b0010101, 0b001, "fmax.d">,
+ Sched<[WriteFMinMax64, ReadFMinMax64, ReadFMinMax64]>;
def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d">,
Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]> {
@@ -299,11 +306,15 @@ def : PatFpr64Fpr64<setole, FLE_D>;
def : Pat<(seto FPR64:$rs1, FPR64:$rs2),
(AND (FEQ_D FPR64:$rs1, FPR64:$rs1),
(FEQ_D FPR64:$rs2, FPR64:$rs2))>;
+def : Pat<(seto FPR64:$rs1, FPR64:$rs1),
+ (FEQ_D $rs1, $rs1)>;
def : Pat<(setuo FPR64:$rs1, FPR64:$rs2),
(SLTIU (AND (FEQ_D FPR64:$rs1, FPR64:$rs1),
(FEQ_D FPR64:$rs2, FPR64:$rs2)),
1)>;
+def : Pat<(setuo FPR64:$rs1, FPR64:$rs1),
+ (SLTIU (FEQ_D $rs1, $rs1), 1)>;
def Select_FPR64_Using_CC_GPR : SelectCC_rrirr<FPR64, GPR>;
@@ -332,6 +343,10 @@ def SplitF64Pseudo
} // Predicates = [HasStdExtD]
let Predicates = [HasStdExtD, IsRV32] in {
+
+/// Float constants
+def : Pat<(f64 (fpimm0)), (FCVT_D_W X0)>;
+
// double->[u]int. Round-to-zero must be used.
def : Pat<(fp_to_sint FPR64:$rs1), (FCVT_W_D FPR64:$rs1, 0b001)>;
def : Pat<(fp_to_uint FPR64:$rs1), (FCVT_WU_D FPR64:$rs1, 0b001)>;
@@ -342,6 +357,10 @@ def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_WU GPR:$rs1)>;
} // Predicates = [HasStdExtD, IsRV32]
let Predicates = [HasStdExtD, IsRV64] in {
+
+/// Float constants
+def : Pat<(f64 (fpimm0)), (FMV_D_X X0)>;
+
def : Pat<(bitconvert GPR:$rs1), (FMV_D_X GPR:$rs1)>;
def : Pat<(bitconvert FPR64:$rs1), (FMV_X_D FPR64:$rs1)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 782c3f65af14..ce5c3abb6a06 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -60,8 +60,7 @@ class FPFMASDynFrmAlias<FPFMAS_rrr_frm Inst, string OpcodeStr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUS_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
: RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR32:$rd),
- (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">,
- Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
+ (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class FPALUS_rr_frm<bits<7> funct7, string opcodestr>
@@ -106,7 +105,7 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd),
(ins GPR:$rs1, simm12:$imm12),
"flw", "$rd, ${imm12}(${rs1})">,
- Sched<[WriteFLD32, ReadMemBase]>;
+ Sched<[WriteFLD32, ReadFMemBase]>;
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
@@ -115,7 +114,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
def FSW : RVInstS<0b010, OPC_STORE_FP, (outs),
(ins FPR32:$rs2, GPR:$rs1, simm12:$imm12),
"fsw", "$rs2, ${imm12}(${rs1})">,
- Sched<[WriteFST32, ReadStoreData, ReadMemBase]>;
+ Sched<[WriteFST32, ReadStoreData, ReadFMemBase]>;
def FMADD_S : FPFMAS_rrr_frm<OPC_MADD, "fmadd.s">,
Sched<[WriteFMulAdd32, ReadFMulAdd32, ReadFMulAdd32, ReadFMulAdd32]>;
@@ -149,11 +148,16 @@ def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s">,
}
def : FPUnaryOpDynFrmAlias<FSQRT_S, "fsqrt.s", FPR32, FPR32>;
-def FSGNJ_S : FPALUS_rr<0b0010000, 0b000, "fsgnj.s">;
-def FSGNJN_S : FPALUS_rr<0b0010000, 0b001, "fsgnjn.s">;
-def FSGNJX_S : FPALUS_rr<0b0010000, 0b010, "fsgnjx.s">;
-def FMIN_S : FPALUS_rr<0b0010100, 0b000, "fmin.s">;
-def FMAX_S : FPALUS_rr<0b0010100, 0b001, "fmax.s">;
+def FSGNJ_S : FPALUS_rr<0b0010000, 0b000, "fsgnj.s">,
+ Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>;
+def FSGNJN_S : FPALUS_rr<0b0010000, 0b001, "fsgnjn.s">,
+ Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>;
+def FSGNJX_S : FPALUS_rr<0b0010000, 0b010, "fsgnjx.s">,
+ Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>;
+def FMIN_S : FPALUS_rr<0b0010100, 0b000, "fmin.s">,
+ Sched<[WriteFMinMax32, ReadFMinMax32, ReadFMinMax32]>;
+def FMAX_S : FPALUS_rr<0b0010100, 0b001, "fmax.s">,
+ Sched<[WriteFMinMax32, ReadFMinMax32, ReadFMinMax32]>;
def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s">,
Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> {
@@ -282,6 +286,9 @@ def PseudoFSW : PseudoStore<"fsw", FPR32>;
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
+/// Floating point constants
+def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
+
/// Generic pattern classes
class PatFpr32Fpr32<SDPatternOperator OpNode, RVInstR Inst>
: Pat<(OpNode FPR32:$rs1, FPR32:$rs2), (Inst $rs1, $rs2)>;
@@ -291,6 +298,9 @@ class PatFpr32Fpr32DynFrm<SDPatternOperator OpNode, RVInstRFrm Inst>
let Predicates = [HasStdExtF] in {
+/// Float constants
+def : Pat<(f32 (fpimm0)), (FMV_W_X X0)>;
+
/// Float conversion operations
// Moves (no conversion)
@@ -356,11 +366,15 @@ def : PatFpr32Fpr32<setole, FLE_S>;
def : Pat<(seto FPR32:$rs1, FPR32:$rs2),
(AND (FEQ_S FPR32:$rs1, FPR32:$rs1),
(FEQ_S FPR32:$rs2, FPR32:$rs2))>;
+def : Pat<(seto FPR32:$rs1, FPR32:$rs1),
+ (FEQ_S $rs1, $rs1)>;
def : Pat<(setuo FPR32:$rs1, FPR32:$rs2),
(SLTIU (AND (FEQ_S FPR32:$rs1, FPR32:$rs1),
(FEQ_S FPR32:$rs2, FPR32:$rs2)),
1)>;
+def : Pat<(setuo FPR32:$rs1, FPR32:$rs1),
+ (SLTIU (FEQ_S $rs1, $rs1), 1)>;
def Select_FPR32_Using_CC_GPR : SelectCC_rrirr<FPR32, GPR>;
@@ -384,16 +398,6 @@ def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
} // Predicates = [HasStdExtF, IsRV32]
-let Predicates = [HasStdExtF, IsRV32] in {
-// FP->[u]int. Round-to-zero must be used
-def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
-def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
-
-// [u]int->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
-def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
-} // Predicates = [HasStdExtF, IsRV32]
-
let Predicates = [HasStdExtF, IsRV64] in {
def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (FMV_W_X GPR:$src)>;
def : Pat<(riscv_fmv_x_anyextw_rv64 FPR32:$src), (FMV_X_W FPR32:$src)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
new file mode 100644
index 000000000000..1c7f53fecb8c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -0,0 +1,873 @@
+//===-- RISCVInstrInfoV.td - RISC-V 'V' instructions -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file describes the RISC-V instructions from the standard 'V' Vector
+/// extension, version 0.8.
+/// This version is still experimental as the 'V' extension hasn't been
+/// ratified yet.
+///
+//===----------------------------------------------------------------------===//
+
+include "RISCVInstrFormatsV.td"
+
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+def VTypeIAsmOperand : AsmOperandClass {
+ let Name = "VTypeI";
+ let ParserMethod = "parseVTypeI";
+ let DiagnosticType = "InvalidVTypeI";
+}
+
+def VTypeIOp : Operand<XLenVT> {
+ let ParserMatchClass = VTypeIAsmOperand;
+ let PrintMethod = "printVTypeI";
+ let DecoderMethod = "decodeUImmOperand<11>";
+}
+
+def VRegAsmOperand : AsmOperandClass {
+ let Name = "RVVRegOpOperand";
+ let RenderMethod = "addRegOperands";
+ let PredicateMethod = "isReg";
+ let ParserMethod = "parseRegister";
+}
+
+def VRegOp : RegisterOperand<VR> {
+ let ParserMatchClass = VRegAsmOperand;
+ let PrintMethod = "printOperand";
+}
+
+def VMaskAsmOperand : AsmOperandClass {
+ let Name = "RVVMaskRegOpOperand";
+ let RenderMethod = "addRegOperands";
+ let PredicateMethod = "isV0Reg";
+ let ParserMethod = "parseMaskReg";
+ let IsOptional = 1;
+ let DefaultMethod = "defaultMaskRegOp";
+ let DiagnosticType = "InvalidVMaskRegister";
+}
+
+def VMaskOp : RegisterOperand<VMV0> {
+ let ParserMatchClass = VMaskAsmOperand;
+ let PrintMethod = "printVMaskReg";
+ let EncoderMethod = "getVMaskReg";
+ let DecoderMethod = "decodeVMaskReg";
+}
+
+def simm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> {
+ let ParserMatchClass = SImmAsmOperand<5>;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeSImmOperand<5>";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isInt<5>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+
+def SImm5Plus1AsmOperand : AsmOperandClass {
+ let Name = "SImm5Plus1";
+ let RenderMethod = "addSImm5Plus1Operands";
+ let DiagnosticType = "InvalidSImm5Plus1";
+}
+
+def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
+ [{return isInt<5>(Imm - 1);}]> {
+ let ParserMatchClass = SImm5Plus1AsmOperand;
+ let PrintMethod = "printSImm5Plus1";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isInt<5>(Imm - 1);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction class templates
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+// load vd, (rs1), vm
+class VUnitStrideLoad<RISCVMOP mop, RISCVLSUMOP lumop, RISCVWidth width,
+ string opcodestr>
+ : RVInstVLU<0b000, mop, lumop, width, (outs VRegOp:$vd),
+ (ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
+
+// load vd, (rs1), rs2, vm
+class VStridedLoad<RISCVMOP mop, RISCVWidth width, string opcodestr>
+ : RVInstVLS<0b000, mop, width, (outs VRegOp:$vd),
+ (ins GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr,
+ "$vd, (${rs1}), $rs2$vm">;
+
+// load vd, (rs1), vs2, vm
+class VIndexedLoad<RISCVMOP mop, RISCVWidth width, string opcodestr>
+ : RVInstVLX<0b000, mop, width, (outs VRegOp:$vd),
+ (ins GPR:$rs1, VRegOp:$vs2, VMaskOp:$vm), opcodestr,
+ "$vd, (${rs1}), $vs2$vm">;
+
+// vl<nf>r.v vd, (rs1)
+class VWholeLoad<bits<3> nf, string opcodestr>
+ : RVInstVLU<nf, MOPLDUnitStrideU, LUMOPUnitStrideWholeReg,
+ LSWidthVSEW, (outs VRegOp:$vd), (ins GPR:$rs1),
+ opcodestr, "$vd, (${rs1})"> {
+ let vm = 1;
+ let Uses = [];
+}
+} // hasSideEffects = 0, mayLoad = 1, mayStore = 0
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+// store vd, vs3, (rs1), vm
+class VUnitStrideStore<RISCVMOP mop, RISCVLSUMOP sumop, RISCVWidth width,
+ string opcodestr>
+ : RVInstVSU<0b000, mop, sumop, width, (outs),
+ (ins VRegOp:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr,
+ "$vs3, (${rs1})$vm">;
+
+// store vd, vs3, (rs1), rs2, vm
+class VStridedStore<RISCVMOP mop, RISCVWidth width, string opcodestr>
+ : RVInstVSS<0b000, mop, width, (outs),
+ (ins VRegOp:$vs3, GPR:$rs1, GPR:$rs2, VMaskOp:$vm),
+ opcodestr, "$vs3, (${rs1}), $rs2$vm">;
+
+// store vd, vs3, (rs1), vs2, vm
+class VIndexedStore<RISCVMOP mop, RISCVWidth width, string opcodestr>
+ : RVInstVSX<0b000, mop, width, (outs),
+ (ins VRegOp:$vs3, GPR:$rs1, VRegOp:$vs2, VMaskOp:$vm),
+ opcodestr, "$vs3, (${rs1}), $vs2$vm">;
+
+// vs<nf>r.v vd, (rs1)
+class VWholeStore<bits<3> nf, string opcodestr>
+ : RVInstVSU<nf, MOPSTUnitStride, SUMOPUnitStrideWholeReg,
+ LSWidthVSEW, (outs), (ins VRegOp:$vs3, GPR:$rs1),
+ opcodestr, "$vs3, (${rs1})"> {
+ let vm = 1;
+ let Uses = [];
+}
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 1
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+// op vd, vs2, vs1, vm
+class VALUVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVV<funct6, opv, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, VRegOp:$vs1, VMaskOp:$vm),
+ opcodestr, "$vd, $vs2, $vs1$vm">;
+
+// op vd, vs2, vs1, v0 (without mask, use v0 as carry input)
+class VALUmVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVV<funct6, opv, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, VRegOp:$vs1, VMV0:$v0),
+ opcodestr, "$vd, $vs2, $vs1, v0"> {
+ let vm = 0;
+}
+
+// op vd, vs1, vs2, vm (reverse the order of vs1 and vs2)
+class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVV<funct6, opv, (outs VRegOp:$vd),
+ (ins VRegOp:$vs1, VRegOp:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $vs1, $vs2$vm">;
+
+// op vd, vs1, vs2
+class VALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVV<funct6, opv, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, VRegOp:$vs1),
+ opcodestr, "$vd, $vs2, $vs1"> {
+ let vm = 1;
+}
+
+// op vd, vs2, rs1, vm
+class VALUVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVX<funct6, opv, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, GPR:$rs1, VMaskOp:$vm),
+ opcodestr, "$vd, $vs2, $rs1$vm">;
+
+// op vd, vs2, rs1, v0 (without mask, use v0 as carry input)
+class VALUmVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVX<funct6, opv, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, GPR:$rs1, VMV0:$v0),
+ opcodestr, "$vd, $vs2, $rs1, v0"> {
+ let vm = 0;
+}
+
+// op vd, rs1, vs2, vm (reverse the order of rs1 and vs2)
+class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVX<funct6, opv, (outs VRegOp:$vd),
+ (ins GPR:$rs1, VRegOp:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $rs1, $vs2$vm">;
+
+// op vd, vs1, vs2
+class VALUVXNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVX<funct6, opv, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, GPR:$rs1),
+ opcodestr, "$vd, $vs2, $rs1"> {
+ let vm = 1;
+}
+
+// op vd, vs2, imm, vm
+class VALUVI<bits<6> funct6, string opcodestr, Operand optype = simm5>
+ : RVInstIVI<funct6, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, optype:$imm, VMaskOp:$vm),
+ opcodestr, "$vd, $vs2, $imm$vm">;
+
+// op vd, vs2, imm, v0 (without mask, use v0 as carry input)
+class VALUmVI<bits<6> funct6, string opcodestr, Operand optype = simm5>
+ : RVInstIVI<funct6, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, optype:$imm, VMV0:$v0),
+ opcodestr, "$vd, $vs2, $imm, v0"> {
+ let vm = 0;
+}
+
+// op vd, vs2, imm, vm
+class VALUVINoVm<bits<6> funct6, string opcodestr, Operand optype = simm5>
+ : RVInstIVI<funct6, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, optype:$imm),
+ opcodestr, "$vd, $vs2, $imm"> {
+ let vm = 1;
+}
+
+// op vd, vs2, rs1, vm (Float)
+class VALUVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVX<funct6, opv, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, FPR32:$rs1, VMaskOp:$vm),
+ opcodestr, "$vd, $vs2, $rs1$vm">;
+
+// op vd, rs1, vs2, vm (Float) (with mask, reverse the order of rs1 and vs2)
+class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVX<funct6, opv, (outs VRegOp:$vd),
+ (ins FPR32:$rs1, VRegOp:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $rs1, $vs2$vm">;
+
+// op vd, vs2, vm (use vs1 as instruction encoding)
+class VALUVs2<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr>
+ : RVInstV<funct6, vs1, opv, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, VMaskOp:$vm),
+ opcodestr, "$vd, $vs2$vm">;
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
+//===----------------------------------------------------------------------===//
+// Combination of instruction classes.
+// Use these multiclasses to define instructions more easily.
+//===----------------------------------------------------------------------===//
+multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>;
+}
+
+multiclass VALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+}
+
+multiclass VALUr_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
+ def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+}
+
+multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>;
+}
+
+multiclass VALU_IV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">;
+}
+
+multiclass VALUr_IV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+}
+
+multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+}
+
+multiclass VALU_MV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">;
+}
+
+multiclass VALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
+ def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">;
+}
+
+multiclass VALU_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+}
+
+multiclass VALUr_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">;
+ def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+}
+
+multiclass VALUr_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+}
+
+multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>;
+}
+
+multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">;
+ def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">;
+ def IM : VALUmVI<funct6, opcodestr # ".vim">;
+}
+
+multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> {
+ def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">;
+ def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">;
+}
+
+multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> {
+ def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">;
+ def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">;
+ def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>;
+}
+
+multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">;
+ def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">;
+}
+
+multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+}
+
+multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+}
+
+multiclass VALUr_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">;
+ def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+}
+
+multiclass VALU_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">;
+}
+
+multiclass VALU_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtV] in {
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
+def VSETVLI : RVInstSetVLi<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei),
+ "vsetvli", "$rd, $rs1, $vtypei">;
+
+def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
+ "vsetvl", "$rd, $rs1, $rs2">;
+} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
+
+// Vector Unit-Stride Instructions
+def VLB_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStride, LSWidthVByte, "vlb.v">;
+def VLH_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStride, LSWidthVHalf, "vlh.v">;
+def VLW_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStride, LSWidthVWord, "vlw.v">;
+
+def VLBU_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVByte, "vlbu.v">;
+def VLHU_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVHalf, "vlhu.v">;
+def VLWU_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVWord, "vlwu.v">;
+
+def VLE_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStride, LSWidthVSEW, "vle.v">;
+
+def VLBFF_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStrideFF, LSWidthVByte, "vlbff.v">;
+def VLHFF_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStrideFF, LSWidthVHalf, "vlhff.v">;
+def VLWFF_V : VUnitStrideLoad<MOPLDUnitStrideS, LUMOPUnitStrideFF, LSWidthVWord, "vlwff.v">;
+
+def VLBUFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVByte, "vlbuff.v">;
+def VLHUFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVHalf, "vlhuff.v">;
+def VLWUFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVWord, "vlwuff.v">;
+
+def VLEFF_V : VUnitStrideLoad<MOPLDUnitStrideU, LUMOPUnitStrideFF, LSWidthVSEW, "vleff.v">;
+
+def VSB_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVByte, "vsb.v">;
+def VSH_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVHalf, "vsh.v">;
+def VSW_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVWord, "vsw.v">;
+
+def VSE_V : VUnitStrideStore<MOPSTUnitStride, SUMOPUnitStride, LSWidthVSEW, "vse.v">;
+
+// Vector Strided Instructions
+def VLSB_V : VStridedLoad<MOPLDStridedS, LSWidthVByte, "vlsb.v">;
+def VLSH_V : VStridedLoad<MOPLDStridedS, LSWidthVHalf, "vlsh.v">;
+def VLSW_V : VStridedLoad<MOPLDStridedS, LSWidthVWord, "vlsw.v">;
+
+def VLSBU_V : VStridedLoad<MOPLDStridedU, LSWidthVByte, "vlsbu.v">;
+def VLSHU_V : VStridedLoad<MOPLDStridedU, LSWidthVHalf, "vlshu.v">;
+def VLSWU_V : VStridedLoad<MOPLDStridedU, LSWidthVWord, "vlswu.v">;
+
+def VLSE_V : VStridedLoad<MOPLDStridedU, LSWidthVSEW, "vlse.v">;
+
+def VSSB_V : VStridedStore<MOPSTStrided, LSWidthVByte, "vssb.v">;
+def VSSH_V : VStridedStore<MOPSTStrided, LSWidthVHalf, "vssh.v">;
+def VSSW_V : VStridedStore<MOPSTStrided, LSWidthVWord, "vssw.v">;
+def VSSE_V : VStridedStore<MOPSTStrided, LSWidthVSEW, "vsse.v">;
+
+// Vector Indexed Instructions
+def VLXB_V : VIndexedLoad<MOPLDIndexedS, LSWidthVByte, "vlxb.v">;
+def VLXH_V : VIndexedLoad<MOPLDIndexedS, LSWidthVHalf, "vlxh.v">;
+def VLXW_V : VIndexedLoad<MOPLDIndexedS, LSWidthVWord, "vlxw.v">;
+
+def VLXBU_V : VIndexedLoad<MOPLDIndexedU, LSWidthVByte, "vlxbu.v">;
+def VLXHU_V : VIndexedLoad<MOPLDIndexedU, LSWidthVHalf, "vlxhu.v">;
+def VLXWU_V : VIndexedLoad<MOPLDIndexedU, LSWidthVWord, "vlxwu.v">;
+
+def VLXE_V : VIndexedLoad<MOPLDIndexedU, LSWidthVSEW, "vlxe.v">;
+
+def VSXB_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVByte, "vsxb.v">;
+def VSXH_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVHalf, "vsxh.v">;
+def VSXW_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVWord, "vsxw.v">;
+def VSXE_V : VIndexedStore<MOPSTIndexedOrder, LSWidthVSEW, "vsxe.v">;
+
+def VSUXB_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVByte, "vsuxb.v">;
+def VSUXH_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVHalf, "vsuxh.v">;
+def VSUXW_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVWord, "vsuxw.v">;
+def VSUXE_V : VIndexedStore<MOPSTIndexedUnOrd, LSWidthVSEW, "vsuxe.v">;
+
+def VL1R_V : VWholeLoad<0, "vl1r.v">;
+def VS1R_V : VWholeStore<0, "vs1r.v">;
+
+// Vector Single-Width Integer Add and Subtract
+defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
+defm VSUB_V : VALU_IV_V_X<"vsub", 0b000010>;
+defm VRSUB_V : VALU_IV_X_I<"vrsub", 0b000011>;
+
+// Vector Widening Integer Add/Subtract
+// Refer to 11.2 Widening Vector Arithmetic Instructions
+// The destination vector register group cannot overlap a source vector
+// register group of a different element width (including the mask register
+// if masked), otherwise an illegal instruction exception is raised.
+let Constraints = "@earlyclobber $vd" in {
+let RVVConstraint = WidenV in {
+defm VWADDU_V : VALU_MV_V_X<"vwaddu", 0b110000>;
+defm VWSUBU_V : VALU_MV_V_X<"vwsubu", 0b110010>;
+defm VWADD_V : VALU_MV_V_X<"vwadd", 0b110001>;
+defm VWSUB_V : VALU_MV_V_X<"vwsub", 0b110011>;
+} // RVVConstraint = WidenV
+// Set earlyclobber for following instructions for second and mask operands.
+// This has the downside that the earlyclobber constraint is too coarse and
+// will impose unnecessary restrictions by not allowing the destination to
+// overlap with the first (wide) operand.
+let RVVConstraint = WidenW in {
+defm VWADDU_W : VALU_MV_V_X<"vwaddu", 0b110100, "w">;
+defm VWSUBU_W : VALU_MV_V_X<"vwsubu", 0b110110, "w">;
+defm VWADD_W : VALU_MV_V_X<"vwadd", 0b110101, "w">;
+defm VWSUB_W : VALU_MV_V_X<"vwsub", 0b110111, "w">;
+} // RVVConstraint = WidenW
+} // Constraints = "@earlyclobber $vd"
+
+def : InstAlias<"vwcvt.x.x.v $vd, $vs$vm",
+ (VWADD_VX VRegOp:$vd, VRegOp:$vs, X0, VMaskOp:$vm)>;
+def : InstAlias<"vwcvtu.x.x.v $vd, $vs$vm",
+ (VWADDU_VX VRegOp:$vd, VRegOp:$vs, X0, VMaskOp:$vm)>;
+
+// Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
+defm VADC_V : VALUm_IV_V_X_I<"vadc", 0b010000>;
+defm VMADC_V : VALUm_IV_V_X_I<"vmadc", 0b010001>;
+defm VMADC_V : VALUNoVm_IV_V_X_I<"vmadc", 0b010001>;
+defm VSBC_V : VALUm_IV_V_X<"vsbc", 0b010010>;
+defm VMSBC_V : VALUm_IV_V_X<"vmsbc", 0b010011>;
+defm VMSBC_V : VALUNoVm_IV_V_X<"vmsbc", 0b010011>;
+
+// Vector Bitwise Logical Instructions
+defm VAND_V : VALU_IV_V_X_I<"vand", 0b001001>;
+defm VOR_V : VALU_IV_V_X_I<"vor", 0b001010>;
+defm VXOR_V : VALU_IV_V_X_I<"vxor", 0b001011>;
+
+def : InstAlias<"vnot.v $vd, $vs$vm",
+ (VXOR_VI VRegOp:$vd, VRegOp:$vs, -1, VMaskOp:$vm)>;
+
+// Vector Single-Width Bit Shift Instructions
+defm VSLL_V : VALU_IV_V_X_I<"vsll", 0b100101, uimm5>;
+defm VSRL_V : VALU_IV_V_X_I<"vsrl", 0b101000, uimm5>;
+defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>;
+
+// Vector Narrowing Integer Right Shift Instructions
+// Refer to 11.3. Narrowing Vector Arithmetic Instructions
+// The destination vector register group cannot overlap the first source
+// vector register group (specified by vs2). The destination vector register
+// group cannot overlap the mask register if used, unless LMUL=1.
+let Constraints = "@earlyclobber $vd", RVVConstraint = Narrow in {
+defm VNSRL_W : VALU_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">;
+defm VNSRA_W : VALU_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = Narrow
+
+// Vector Integer Comparison Instructions
+defm VMSEQ_V : VALU_IV_V_X_I<"vmseq", 0b011000>;
+defm VMSNE_V : VALU_IV_V_X_I<"vmsne", 0b011001>;
+defm VMSLTU_V : VALU_IV_V_X<"vmsltu", 0b011010>;
+defm VMSLT_V : VALU_IV_V_X<"vmslt", 0b011011>;
+defm VMSLEU_V : VALU_IV_V_X_I<"vmsleu", 0b011100>;
+defm VMSLE_V : VALU_IV_V_X_I<"vmsle", 0b011101>;
+defm VMSGTU_V : VALU_IV_X_I<"vmsgtu", 0b011110>;
+defm VMSGT_V : VALU_IV_X_I<"vmsgt", 0b011111>;
+
+def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm",
+ (VMSLTU_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
+def : InstAlias<"vmsgt.vv $vd, $va, $vb$vm",
+ (VMSLT_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
+def : InstAlias<"vmsgeu.vv $vd, $va, $vb$vm",
+ (VMSLEU_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
+def : InstAlias<"vmsge.vv $vd, $va, $vb$vm",
+ (VMSLE_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
+def : InstAlias<"vmsltu.vi $vd, $va, $imm$vm",
+ (VMSLEU_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm,
+ VMaskOp:$vm), 0>;
+def : InstAlias<"vmslt.vi $vd, $va, $imm$vm",
+ (VMSLE_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm,
+ VMaskOp:$vm), 0>;
+def : InstAlias<"vmsgeu.vi $vd, $va, $imm$vm",
+ (VMSGTU_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm,
+ VMaskOp:$vm), 0>;
+def : InstAlias<"vmsge.vi $vd, $va, $imm$vm",
+ (VMSGT_VI VRegOp:$vd, VRegOp:$va, simm5_plus1:$imm,
+ VMaskOp:$vm), 0>;
+
+// Vector Integer Min/Max Instructions
+defm VMINU_V : VALU_IV_V_X<"vminu", 0b000100>;
+defm VMIN_V : VALU_IV_V_X<"vmin", 0b000101>;
+defm VMAXU_V : VALU_IV_V_X<"vmaxu", 0b000110>;
+defm VMAX_V : VALU_IV_V_X<"vmax", 0b000111>;
+
+// Vector Single-Width Integer Multiply Instructions
+defm VMUL_V : VALU_MV_V_X<"vmul", 0b100101>;
+defm VMULH_V : VALU_MV_V_X<"vmulh", 0b100111>;
+defm VMULHU_V : VALU_MV_V_X<"vmulhu", 0b100100>;
+defm VMULHSU_V : VALU_MV_V_X<"vmulhsu", 0b100110>;
+
+// Vector Integer Divide Instructions
+defm VDIVU_V : VALU_MV_V_X<"vdivu", 0b100000>;
+defm VDIV_V : VALU_MV_V_X<"vdiv", 0b100001>;
+defm VREMU_V : VALU_MV_V_X<"vremu", 0b100010>;
+defm VREM_V : VALU_MV_V_X<"vrem", 0b100011>;
+
+// Vector Widening Integer Multiply Instructions
+let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
+defm VWMUL_V : VALU_MV_V_X<"vwmul", 0b111011>;
+defm VWMULU_V : VALU_MV_V_X<"vwmulu", 0b111000>;
+defm VWMULSU_V : VALU_MV_V_X<"vwmulsu", 0b111010>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
+
+// Vector Single-Width Integer Multiply-Add Instructions
+defm VMACC_V : VALUr_MV_V_X<"vmacc", 0b101101>;
+defm VNMSAC_V : VALUr_MV_V_X<"vnmsac", 0b101111>;
+defm VMADD_V : VALUr_MV_V_X<"vmadd", 0b101001>;
+defm VNMSUB_V : VALUr_MV_V_X<"vnmsub", 0b101011>;
+
+// Vector Widening Integer Multiply-Add Instructions
+let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
+defm VWMACCU_V : VALUr_MV_V_X<"vwmaccu", 0b111100>;
+defm VWMACC_V : VALUr_MV_V_X<"vwmacc", 0b111101>;
+defm VWMACCSU_V : VALUr_MV_V_X<"vwmaccsu", 0b111111>;
+defm VWMACCUS_V : VALUr_MV_X<"vwmaccus", 0b111110>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
+
+// Vector Integer Merge Instructions
+defm VMERGE_V : VALUm_IV_V_X_I<"vmerge", 0b010111>;
+
+// Vector Integer Move Instructions
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1 in {
+// op vd, vs1
+def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VRegOp:$vd),
+ (ins VRegOp:$vs1), "vmv.v.v", "$vd, $vs1">;
+// op vd, rs1
+def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VRegOp:$vd),
+ (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">;
+// op vd, imm
+def VMV_V_I : RVInstIVI<0b010111, (outs VRegOp:$vd),
+ (ins simm5:$imm), "vmv.v.i", "$vd, $imm">;
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
+// Vector Fixed-Point Arithmetic Instructions
+defm VSADDU_V : VALU_IV_V_X_I<"vsaddu", 0b100000>;
+defm VSADD_V : VALU_IV_V_X_I<"vsadd", 0b100001>;
+defm VSSUBU_V : VALU_IV_V_X<"vssubu", 0b100010>;
+defm VSSUB_V : VALU_IV_V_X<"vssub", 0b100011>;
+
+// Vector Single-Width Averaging Add and Subtract
+defm VAADDU_V : VALU_MV_V_X<"vaaddu", 0b001000>;
+defm VAADD_V : VALU_MV_V_X<"vaadd", 0b001001>;
+defm VASUBU_V : VALU_MV_V_X<"vasubu", 0b001010>;
+defm VASUB_V : VALU_MV_V_X<"vasub", 0b001011>;
+
+// Vector Single-Width Fractional Multiply with Rounding and Saturation
+defm VSMUL_V : VALU_IV_V_X<"vsmul", 0b100111>;
+
+// Vector Single-Width Scaling Shift Instructions
+defm VSSRL_V : VALU_IV_V_X_I<"vssrl", 0b101010, uimm5>;
+defm VSSRA_V : VALU_IV_V_X_I<"vssra", 0b101011, uimm5>;
+
+// Vector Narrowing Fixed-Point Clip Instructions
+let Constraints = "@earlyclobber $vd", RVVConstraint = Narrow in {
+defm VNCLIPU_W : VALU_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">;
+defm VNCLIP_W : VALU_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = Narrow
+
+// Vector Single-Width Floating-Point Add/Subtract Instructions
+defm VFADD_V : VALU_FV_V_F<"vfadd", 0b000000>;
+defm VFSUB_V : VALU_FV_V_F<"vfsub", 0b000010>;
+defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>;
+
+// Vector Widening Floating-Point Add/Subtract Instructions
+let Constraints = "@earlyclobber $vd" in {
+let RVVConstraint = WidenV in {
+defm VFWADD_V : VALU_FV_V_F<"vfwadd", 0b110000>;
+defm VFWSUB_V : VALU_FV_V_F<"vfwsub", 0b110010>;
+} // RVVConstraint = WidenV
+// Set earlyclobber for following instructions for second and mask operands.
+// This has the downside that the earlyclobber constraint is too coarse and
+// will impose unnecessary restrictions by not allowing the destination to
+// overlap with the first (wide) operand.
+let RVVConstraint = WidenW in {
+defm VFWADD_W : VALU_FV_V_F<"vfwadd", 0b110100, "w">;
+defm VFWSUB_W : VALU_FV_V_F<"vfwsub", 0b110110, "w">;
+} // RVVConstraint = WidenW
+} // Constraints = "@earlyclobber $vd"
+
+// Vector Single-Width Floating-Point Multiply/Divide Instructions
+defm VFMUL_V : VALU_FV_V_F<"vfmul", 0b100100>;
+defm VFDIV_V : VALU_FV_V_F<"vfdiv", 0b100000>;
+defm VFRDIV_V : VALU_FV_F<"vfrdiv", 0b100001>;
+
+// Vector Widening Floating-Point Multiply
+let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
+defm VFWMUL_V : VALU_FV_V_F<"vfwmul", 0b111000>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
+
+// Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+defm VFMACC_V : VALUr_FV_V_F<"vfmacc", 0b101100>;
+defm VFNMACC_V : VALUr_FV_V_F<"vfnmacc", 0b101101>;
+defm VFMSAC_V : VALUr_FV_V_F<"vfmsac", 0b101110>;
+defm VFNMSAC_V : VALUr_FV_V_F<"vfnmsac", 0b101111>;
+defm VFMADD_V : VALUr_FV_V_F<"vfmadd", 0b101000>;
+defm VFNMADD_V : VALUr_FV_V_F<"vfnmadd", 0b101001>;
+defm VFMSUB_V : VALUr_FV_V_F<"vfmsub", 0b101010>;
+defm VFNMSUB_V : VALUr_FV_V_F<"vfnmsub", 0b101011>;
+
+// Vector Widening Floating-Point Fused Multiply-Add Instructions
+let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
+defm VFWMACC_V : VALUr_FV_V_F<"vfwmacc", 0b111100>;
+defm VFWNMACC_V : VALUr_FV_V_F<"vfwnmacc", 0b111101>;
+defm VFWMSAC_V : VALUr_FV_V_F<"vfwmsac", 0b111110>;
+defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
+
+// Vector Floating-Point Square-Root Instruction
+defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b100011, 0b00000>;
+
+// Vector Floating-Point MIN/MAX Instructions
+defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>;
+defm VFMAX_V : VALU_FV_V_F<"vfmax", 0b000110>;
+
+// Vector Floating-Point Sign-Injection Instructions
+defm VFSGNJ_V : VALU_FV_V_F<"vfsgnj", 0b001000>;
+defm VFSGNJN_V : VALU_FV_V_F<"vfsgnjn", 0b001001>;
+defm VFSGNJX_V : VALU_FV_V_F<"vfsgnjx", 0b001010>;
+
+// Vector Floating-Point Compare Instructions
+defm VMFEQ_V : VALU_FV_V_F<"vmfeq", 0b011000>;
+defm VMFNE_V : VALU_FV_V_F<"vmfne", 0b011100>;
+defm VMFLT_V : VALU_FV_V_F<"vmflt", 0b011011>;
+defm VMFLE_V : VALU_FV_V_F<"vmfle", 0b011001>;
+defm VMFGT_V : VALU_FV_F<"vmfgt", 0b011101>;
+defm VMFGE_V : VALU_FV_F<"vmfge", 0b011111>;
+
+def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm",
+ (VMFLT_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
+def : InstAlias<"vmfge.vv $vd, $va, $vb$vm",
+ (VMFLE_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>;
+
+// Vector Floating-Point Classify Instruction
+defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b100011, 0b10000>;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+// Vector Floating-Point Merge Instruction
+def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2, FPR32:$rs1, VMV0:$v0),
+ "vfmerge.vfm", "$vd, $vs2, $rs1, v0"> {
+ let vm = 0;
+}
+
+// Vector Floating-Point Move Instruction
+def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VRegOp:$vd),
+ (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1"> {
+ let vs2 = 0;
+ let vm = 1;
+}
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
+// Single-Width Floating-Point/Integer Type-Convert Instructions
+defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b100010, 0b00000>;
+defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b100010, 0b00001>;
+defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b100010, 0b00010>;
+defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b100010, 0b00011>;
+
+// Widening Floating-Point/Integer Type-Convert Instructions
+let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt in {
+defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b100010, 0b01000>;
+defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b100010, 0b01001>;
+defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b100010, 0b01010>;
+defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b100010, 0b01011>;
+defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b100010, 0b01100>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt
+
+// Narrowing Floating-Point/Integer Type-Convert Instructions
+let Constraints = "@earlyclobber $vd", RVVConstraint = Narrow in {
+defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b100010, 0b10000>;
+defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b100010, 0b10001>;
+defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b100010, 0b10010>;
+defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b100010, 0b10011>;
+defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b100010, 0b10100>;
+defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b100010, 0b10101>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = Narrow
+
+// Vector Single-Width Integer Reduction Instructions
+defm VREDSUM : VALU_MV_V<"vredsum", 0b000000>;
+defm VREDMAXU : VALU_MV_V<"vredmaxu", 0b000110>;
+defm VREDMAX : VALU_MV_V<"vredmax", 0b000111>;
+defm VREDMINU : VALU_MV_V<"vredminu", 0b000100>;
+defm VREDMIN : VALU_MV_V<"vredmin", 0b000101>;
+defm VREDAND : VALU_MV_V<"vredand", 0b000001>;
+defm VREDOR : VALU_MV_V<"vredor", 0b000010>;
+defm VREDXOR : VALU_MV_V<"vredxor", 0b000011>;
+
+// Vector Widening Integer Reduction Instructions
+let Constraints = "@earlyclobber $vd" in {
+// Set earlyclobber for following instructions for second and mask operands.
+// This has the downside that the earlyclobber constraint is too coarse and
+// will impose unnecessary restrictions by not allowing the destination to
+// overlap with the first (wide) operand.
+defm VWREDSUMU : VALU_IV_V<"vwredsumu", 0b110000>;
+defm VWREDSUM : VALU_IV_V<"vwredsum", 0b110001>;
+} // Constraints = "@earlyclobber $vd"
+
+// Vector Single-Width Floating-Point Reduction Instructions
+defm VFREDOSUM : VALU_FV_V<"vfredosum", 0b000011>;
+defm VFREDSUM : VALU_FV_V<"vfredsum", 0b000001>;
+defm VFREDMAX : VALU_FV_V<"vfredmax", 0b000111>;
+defm VFREDMIN : VALU_FV_V<"vfredmin", 0b000101>;
+
+// Vector Widening Floating-Point Reduction Instructions
+let Constraints = "@earlyclobber $vd" in {
+// Set earlyclobber for following instructions for second and mask operands.
+// This has the downside that the earlyclobber constraint is too coarse and
+// will impose unnecessary restrictions by not allowing the destination to
+// overlap with the first (wide) operand.
+defm VFWREDOSUM : VALU_FV_V<"vfwredosum", 0b110011>;
+defm VFWREDSUM : VALU_FV_V<"vfwredsum", 0b110001>;
+} // Constraints = "@earlyclobber $vd"
+
+// Vector Mask-Register Logical Instructions
+defm VMAND_M : VALU_MV_Mask<"vmand", 0b011001, "m">;
+defm VMNAND_M : VALU_MV_Mask<"vmnand", 0b011101, "m">;
+defm VMANDNOT_M : VALU_MV_Mask<"vmandnot", 0b011000, "m">;
+defm VMXOR_M : VALU_MV_Mask<"vmxor", 0b011011, "m">;
+defm VMOR_M : VALU_MV_Mask<"vmor", 0b011010, "m">;
+defm VMNOR_M : VALU_MV_Mask<"vmnor", 0b011110, "m">;
+defm VMORNOT_M : VALU_MV_Mask<"vmornot", 0b011100, "m">;
+defm VMXNOR_M : VALU_MV_Mask<"vmxnor", 0b011111, "m">;
+
+def : InstAlias<"vmcpy.m $vd, $vs",
+ (VMAND_MM VRegOp:$vd, VRegOp:$vs, VRegOp:$vs)>;
+def : InstAlias<"vmclr.m $vd",
+ (VMXOR_MM VRegOp:$vd, VRegOp:$vd, VRegOp:$vd)>;
+def : InstAlias<"vmset.m $vd",
+ (VMXNOR_MM VRegOp:$vd, VRegOp:$vd, VRegOp:$vd)>;
+def : InstAlias<"vmnot.m $vd, $vs",
+ (VMNAND_MM VRegOp:$vd, VRegOp:$vs, VRegOp:$vs)>;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+// Vector mask population count vpopc
+def VPOPC_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd),
+ (ins VRegOp:$vs2, VMaskOp:$vm),
+ "vpopc.m", "$vd, $vs2$vm">;
+
+// vfirst find-first-set mask bit
+def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd),
+ (ins VRegOp:$vs2, VMaskOp:$vm),
+ "vfirst.m", "$vd, $vs2$vm">;
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
+// vmsbf.m set-before-first mask bit
+defm VMSBF_M : VALU_MV_VS2<"vmsbf.m", 0b010100, 0b00001>;
+
+// vmsif.m set-including-first mask bit
+defm VMSIF_M : VALU_MV_VS2<"vmsif.m", 0b010100, 0b00011>;
+
+// vmsof.m set-only-first mask bit
+defm VMSOF_M : VALU_MV_VS2<"vmsof.m", 0b010100, 0b00010>;
+
+// Vector Iota Instruction
+let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in {
+defm VIOTA_M : VALU_MV_VS2<"viota.m", 0b010100, 0b10000>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota
+
+// Vector Element Index Instruction
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VRegOp:$vd),
+ (ins VMaskOp:$vm), "vid.v", "$vd$vm"> {
+ let vs2 = 0;
+}
+
+// Integer Scalar Move Instructions
+let vm = 1 in {
+def VMV_X_S : RVInstV<0b010000, 0b00000, OPMVV, (outs GPR:$vd),
+ (ins VRegOp:$vs2), "vmv.x.s", "$vd, $vs2">;
+def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VRegOp:$vd),
+ (ins GPR:$rs1), "vmv.s.x", "$vd, $rs1">;
+
+}
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1 in {
+// Floating-Point Scalar Move Instructions
+def VFMV_F_S : RVInstV<0b010000, 0b00000, OPFVV, (outs FPR32:$vd),
+ (ins VRegOp:$vs2), "vfmv.f.s", "$vd, $vs2">;
+def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VRegOp:$vd),
+ (ins FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">;
+
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1
+
+// Vector Slide Instructions
+let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
+defm VSLIDEUP_V : VALU_IV_X_I<"vslideup", 0b001110, uimm5>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
+defm VSLIDEDOWN_V : VALU_IV_X_I<"vslidedown", 0b001111, uimm5>;
+
+let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
+defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
+defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>;
+
+// Vector Register Gather Instruction
+let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in {
+defm VRGATHER_V : VALU_IV_V_X_I<"vrgather", 0b001100, uimm5>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather
+
+// Vector Compress Instruction
+let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress in {
+defm VCOMPRESS_V : VALU_MV_Mask<"vcompress", 0b010111>;
+} // Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+foreach nf = [1, 2, 4, 8] in {
+ def VMV#nf#R_V : RVInstV<0b100111, !add(nf, -1), OPIVI, (outs VRegOp:$vd),
+ (ins VRegOp:$vs2), "vmv" # nf # "r.v",
+ "$vd, $vs2"> {
+ let Uses = [];
+ let vm = 1;
+ }
+}
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+} // Predicates = [HasStdExtV]
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp
index 5bd09a546114..4d1f47da209d 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp
@@ -16,6 +16,7 @@
#include "RISCVTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "riscv-isel"
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 585bff2bc20a..c379a8d8f0d6 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H
+#include "RISCVSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -22,7 +23,6 @@ namespace llvm {
/// and contains private RISCV-specific information for each MachineFunction.
class RISCVMachineFunctionInfo : public MachineFunctionInfo {
private:
- MachineFunction &MF;
/// FrameIndex for start of varargs area
int VarArgsFrameIndex = 0;
/// Size of the save area used for varargs
@@ -30,9 +30,11 @@ private:
/// FrameIndex used for transferring values between 64-bit FPRs and a pair
/// of 32-bit GPRs via the stack.
int MoveF64FrameIndex = -1;
+ /// Size of any opaque stack adjustment due to save/restore libcalls.
+ unsigned LibCallStackSize = 0;
public:
- RISCVMachineFunctionInfo(MachineFunction &MF) : MF(MF) {}
+ RISCVMachineFunctionInfo(const MachineFunction &MF) {}
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
@@ -40,11 +42,22 @@ public:
unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; }
void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; }
- int getMoveF64FrameIndex() {
+ int getMoveF64FrameIndex(MachineFunction &MF) {
if (MoveF64FrameIndex == -1)
- MoveF64FrameIndex = MF.getFrameInfo().CreateStackObject(8, 8, false);
+ MoveF64FrameIndex =
+ MF.getFrameInfo().CreateStackObject(8, Align(8), false);
return MoveF64FrameIndex;
}
+
+ unsigned getLibCallStackSize() const { return LibCallStackSize; }
+ void setLibCallStackSize(unsigned Size) { LibCallStackSize = Size; }
+
+ bool useSaveRestoreLibCalls(const MachineFunction &MF) const {
+ // We cannot use fixed locations for the callee saved spill slots if the
+ // function uses a varargs save area.
+ return MF.getSubtarget<RISCVSubtarget>().enableSaveRestore() &&
+ VarArgsSaveSize == 0 && !MF.getFrameInfo().hasTailCall();
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 207742520ed6..cb7d55eb0f0c 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -12,6 +12,7 @@
#include "RISCVRegisterInfo.h"
#include "RISCV.h"
+#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -34,6 +35,8 @@ static_assert(RISCV::F31_F == RISCV::F0_F + 31,
static_assert(RISCV::F1_D == RISCV::F0_D + 1, "Register list not consecutive");
static_assert(RISCV::F31_D == RISCV::F0_D + 31,
"Register list not consecutive");
+static_assert(RISCV::V1 == RISCV::V0 + 1, "Register list not consecutive");
+static_assert(RISCV::V31 == RISCV::V0 + 31, "Register list not consecutive");
RISCVRegisterInfo::RISCVRegisterInfo(unsigned HwMode)
: RISCVGenRegisterInfo(RISCV::X1, /*DwarfFlavour*/0, /*EHFlavor*/0,
@@ -91,11 +94,11 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
bool RISCVRegisterInfo::isAsmClobberable(const MachineFunction &MF,
- unsigned PhysReg) const {
+ MCRegister PhysReg) const {
return !MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(PhysReg);
}
-bool RISCVRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
+bool RISCVRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
return PhysReg == RISCV::X0;
}
@@ -103,6 +106,39 @@ const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}
+// Frame indexes representing locations of CSRs which are given a fixed location
+// by save/restore libcalls.
+static const std::map<unsigned, int> FixedCSRFIMap = {
+ {/*ra*/ RISCV::X1, -1},
+ {/*s0*/ RISCV::X8, -2},
+ {/*s1*/ RISCV::X9, -3},
+ {/*s2*/ RISCV::X18, -4},
+ {/*s3*/ RISCV::X19, -5},
+ {/*s4*/ RISCV::X20, -6},
+ {/*s5*/ RISCV::X21, -7},
+ {/*s6*/ RISCV::X22, -8},
+ {/*s7*/ RISCV::X23, -9},
+ {/*s8*/ RISCV::X24, -10},
+ {/*s9*/ RISCV::X25, -11},
+ {/*s10*/ RISCV::X26, -12},
+ {/*s11*/ RISCV::X27, -13}
+};
+
+bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ Register Reg,
+ int &FrameIdx) const {
+ const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
+ if (!RVFI->useSaveRestoreLibCalls(MF))
+ return false;
+
+ auto FII = FixedCSRFIMap.find(Reg);
+ if (FII == FixedCSRFIMap.end())
+ return false;
+
+ FrameIdx = FII->second;
+ return true;
+}
+
void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
@@ -115,7 +151,7 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
DebugLoc DL = MI.getDebugLoc();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- unsigned FrameReg;
+ Register FrameReg;
int Offset =
getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg) +
MI.getOperand(FIOperandNum + 1).getImm();
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 30b639517fde..ffbb60abf755 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -31,12 +31,15 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const override;
bool isAsmClobberable(const MachineFunction &MF,
- unsigned PhysReg) const override;
+ MCRegister PhysReg) const override;
- bool isConstantPhysReg(unsigned PhysReg) const override;
+ bool isConstantPhysReg(MCRegister PhysReg) const override;
const uint32_t *getNoPreservedMask() const override;
+ bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
+ int &FrameIdx) const override;
+
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
@@ -51,10 +54,6 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
return true;
}
- bool trackLivenessAfterRegAlloc(const MachineFunction &) const override {
- return true;
- }
-
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
unsigned Kind = 0) const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 82b37afd0805..7544b4b3b845 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -33,7 +33,21 @@ class RISCVReg64<RISCVReg32 subreg> : Register<""> {
let AltNames = subreg.AltNames;
}
+class RISCVRegWithSubRegs<bits<5> Enc, string n, list<Register> subregs,
+ list<string> alt = []>
+ : RegisterWithSubRegs<n, subregs> {
+ let HWEncoding{4-0} = Enc;
+ let AltNames = alt;
+}
+
def ABIRegAltName : RegAltNameIndex;
+
+def sub_vrm2 : SubRegIndex<64, -1>;
+def sub_vrm2_hi : SubRegIndex<64, -1>;
+def sub_vrm4 : SubRegIndex<128, -1>;
+def sub_vrm4_hi : SubRegIndex<128, -1>;
+def sub_vrm8 : SubRegIndex<256, -1>;
+def sub_vrm8_hi : SubRegIndex<256, -1>;
} // Namespace = "RISCV"
// Integer registers
@@ -233,3 +247,88 @@ def FPR64C : RegisterClass<"RISCV", [f64], 64, (add
(sequence "F%u_D", 10, 15),
(sequence "F%u_D", 8, 9)
)>;
+
+// Vector registers
+let RegAltNameIndices = [ABIRegAltName] in {
+ foreach Index = 0-31 in {
+ def V#Index : RISCVReg<Index, "v"#Index, ["v"#Index]>, DwarfRegNum<[!add(Index, 64)]>;
+ }
+
+ foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
+ 24, 26, 28, 30] in {
+ def V#Index#M2 : RISCVRegWithSubRegs<Index, "v"#Index,
+ [!cast<Register>("V"#Index),
+ !cast<Register>("V"#!add(Index, 1))],
+ ["v"#Index]>,
+ DwarfRegAlias<!cast<Register>("V"#Index)> {
+ let SubRegIndices = [sub_vrm2, sub_vrm2_hi];
+ }
+ }
+
+ foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in {
+ def V#Index#M4 : RISCVRegWithSubRegs<Index, "v"#Index,
+ [!cast<Register>("V"#Index#"M2"),
+ !cast<Register>("V"#!add(Index, 2)#"M2")],
+ ["v"#Index]>,
+ DwarfRegAlias<!cast<Register>("V"#Index)> {
+ let SubRegIndices = [sub_vrm4, sub_vrm4_hi];
+ }
+ }
+
+ foreach Index = [0, 8, 16, 24] in {
+ def V#Index#M8 : RISCVRegWithSubRegs<Index, "v"#Index,
+ [!cast<Register>("V"#Index#"M4"),
+ !cast<Register>("V"#!add(Index, 4)#"M4")],
+ ["v"#Index]>,
+ DwarfRegAlias<!cast<Register>("V"#Index)> {
+ let SubRegIndices = [sub_vrm8, sub_vrm8_hi];
+ }
+ }
+
+ def VTYPE : RISCVReg<0, "vtype", ["vtype"]>;
+ def VL : RISCVReg<0, "vl", ["vl"]>;
+}
+
+class RegisterTypes<list<ValueType> reg_types> {
+ list<ValueType> types = reg_types;
+}
+
+// The order of registers represents the preferred allocation sequence,
+// meaning caller-save regs are listed before callee-save.
+def VR : RegisterClass<"RISCV", [nxv8i8, nxv4i16, nxv2i32, nxv1i64],
+ 64, (add
+ (sequence "V%u", 25, 31),
+ (sequence "V%u", 8, 24),
+ (sequence "V%u", 0, 7)
+ )> {
+ let Size = 64;
+}
+
+def VRM2 : RegisterClass<"RISCV", [nxv16i8, nxv8i16, nxv4i32, nxv2i64], 64,
+ (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2,
+ V18M2, V20M2, V22M2, V24M2, V0M2, V2M2, V4M2, V6M2)> {
+ let Size = 128;
+}
+
+def VRM4 : RegisterClass<"RISCV", [nxv32i8, nxv16i16, nxv8i32, nxv4i64], 64,
+ (add V28M4, V8M4, V12M4, V16M4, V20M4, V24M4, V0M4, V4M4)> {
+ let Size = 256;
+}
+
+def VRM8 : RegisterClass<"RISCV", [nxv32i16, nxv16i32, nxv8i64], 64,
+ (add V8M8, V16M8, V24M8, V0M8)> {
+ let Size = 512;
+}
+
+def VMaskVT : RegisterTypes<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, nxv32i1]>;
+
+def VM : RegisterClass<"RISCV", VMaskVT.types, 64, (add
+ (sequence "V%u", 25, 31),
+ (sequence "V%u", 8, 24),
+ (sequence "V%u", 0, 7))> {
+ let Size = 64;
+}
+
+def VMV0 : RegisterClass<"RISCV", VMaskVT.types, 64, (add V0)> {
+ let Size = 64;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td
index 8a91a70b61c7..305e2b9b5927 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket32.td
@@ -17,6 +17,7 @@ def Rocket32Model : SchedMachineModel {
let LoadLatency = 3;
let MispredictPenalty = 3;
let CompleteModel = 1;
+ let UnsupportedFeatures = [HasStdExtV];
}
//===----------------------------------------------------------------------===//
@@ -88,10 +89,18 @@ def : WriteRes<WriteAtomicLDW, [Rocket32UnitMem]>;
def : WriteRes<WriteAtomicSTW, [Rocket32UnitMem]>;
// Most FP single precision operations are 4 cycles
-def : WriteRes<WriteFALU32, [Rocket32UnitFPALU]> { let Latency = 4; }
+let Latency = 4 in {
+def : WriteRes<WriteFALU32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFSGNJ32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMinMax32, [Rocket32UnitFPALU]>;
+}
// Most FP double precision operations are 6 cycles
-def : WriteRes<WriteFALU64, [Rocket32UnitFPALU]> { let Latency = 6; }
+let Latency = 6 in {
+def : WriteRes<WriteFALU64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFSGNJ64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMinMax64, [Rocket32UnitFPALU]>;
+}
let Latency = 2 in {
def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>;
@@ -180,6 +189,7 @@ def : ReadAdvance<ReadAtomicLDW, 0>;
def : ReadAdvance<ReadAtomicLDD, 0>;
def : ReadAdvance<ReadAtomicSTW, 0>;
def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
def : ReadAdvance<ReadFALU32, 0>;
def : ReadAdvance<ReadFALU64, 0>;
def : ReadAdvance<ReadFMul32, 0>;
@@ -194,6 +204,10 @@ def : ReadAdvance<ReadFSqrt32, 0>;
def : ReadAdvance<ReadFSqrt64, 0>;
def : ReadAdvance<ReadFCmp32, 0>;
def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
def : ReadAdvance<ReadFCvtF32ToI32, 0>;
def : ReadAdvance<ReadFCvtF32ToI64, 0>;
def : ReadAdvance<ReadFCvtF64ToI32, 0>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td
index 79e79f90f2f0..e8514a275c45 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedRocket64.td
@@ -16,6 +16,7 @@ def Rocket64Model : SchedMachineModel {
let IssueWidth = 1; // 1 micro-ops are dispatched per cycle.
let LoadLatency = 3;
let MispredictPenalty = 3;
+ let UnsupportedFeatures = [HasStdExtV];
}
//===----------------------------------------------------------------------===//
@@ -101,10 +102,18 @@ def : WriteRes<WriteAtomicSTW, [Rocket64UnitMem]>;
def : WriteRes<WriteAtomicSTD, [Rocket64UnitMem]>;
// Most FP single precision operations are 4 cycles
-def : WriteRes<WriteFALU32, [Rocket64UnitFPALU]> { let Latency = 4; }
+let Latency = 4 in {
+def : WriteRes<WriteFALU32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFSGNJ32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMinMax32, [Rocket64UnitFPALU]>;
+}
+let Latency = 6 in {
// Most FP double precision operations are 6 cycles
-def : WriteRes<WriteFALU64, [Rocket64UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFALU64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFSGNJ64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMinMax64, [Rocket64UnitFPALU]>;
+}
// Conversion instructions
let Latency = 2 in {
@@ -181,6 +190,7 @@ def : ReadAdvance<ReadAtomicLDW, 0>;
def : ReadAdvance<ReadAtomicLDD, 0>;
def : ReadAdvance<ReadAtomicSTW, 0>;
def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFMemBase, 0>;
def : ReadAdvance<ReadFALU32, 0>;
def : ReadAdvance<ReadFALU64, 0>;
def : ReadAdvance<ReadFMul32, 0>;
@@ -195,6 +205,10 @@ def : ReadAdvance<ReadFSqrt32, 0>;
def : ReadAdvance<ReadFSqrt64, 0>;
def : ReadAdvance<ReadFCmp32, 0>;
def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFSGNJ32, 0>;
+def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMinMax32, 0>;
+def : ReadAdvance<ReadFMinMax64, 0>;
def : ReadAdvance<ReadFCvtF32ToI32, 0>;
def : ReadAdvance<ReadFCvtF32ToI64, 0>;
def : ReadAdvance<ReadFCvtF64ToI32, 0>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
index 9e2762a5d171..bbcd03d46236 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -71,6 +71,10 @@ def WriteFClass32 : SchedWrite; // 32-bit floating point classify
def WriteFClass64 : SchedWrite; // 64-bit floating point classify
def WriteFCmp32 : SchedWrite; // 32-bit floating point compare
def WriteFCmp64 : SchedWrite; // 64-bit floating point compare
+def WriteFSGNJ32 : SchedWrite; // 32-bit floating point sign-injection
+def WriteFSGNJ64 : SchedWrite; // 64-bit floating point sign-injection
+def WriteFMinMax32 : SchedWrite; // 32-bit floating point min or max
+def WriteFMinMax64 : SchedWrite; // 64-bit floating point min or max
def WriteFMovF32ToI32 : SchedWrite;
def WriteFMovI32ToF32 : SchedWrite;
@@ -89,6 +93,7 @@ def ReadJmp : SchedRead;
def ReadJalr : SchedRead;
def ReadCSR : SchedRead;
def ReadMemBase : SchedRead;
+def ReadFMemBase : SchedRead;
def ReadStoreData : SchedRead;
def ReadIALU : SchedRead;
def ReadIALU32 : SchedRead; // 32-bit integer ALU operations on RV64I
@@ -120,6 +125,10 @@ def ReadFSqrt32 : SchedRead; // 32-bit floating point sqrt
def ReadFSqrt64 : SchedRead; // 64-bit floating point sqrt
def ReadFCmp32 : SchedRead;
def ReadFCmp64 : SchedRead;
+def ReadFSGNJ32 : SchedRead;
+def ReadFSGNJ64 : SchedRead;
+def ReadFMinMax32 : SchedRead;
+def ReadFMinMax64 : SchedRead;
def ReadFCvtF32ToI32 : SchedRead;
def ReadFCvtF32ToI64 : SchedRead;
def ReadFCvtF64ToI32 : SchedRead;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 83e7e2d52cc1..47a48c820a29 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -33,7 +33,7 @@ RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(
const Triple &TT, StringRef CPU, StringRef FS, StringRef ABIName) {
// Determine default and user-specified characteristics
bool Is64Bit = TT.isArch64Bit();
- std::string CPUName = CPU;
+ std::string CPUName = std::string(CPU);
if (CPUName.empty())
CPUName = Is64Bit ? "generic-rv64" : "generic-rv32";
ParseSubtargetFeatures(CPUName, FS);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 605d4abcc9ae..fe1285f23b15 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -39,10 +39,23 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool HasStdExtF = false;
bool HasStdExtD = false;
bool HasStdExtC = false;
+ bool HasStdExtB = false;
+ bool HasStdExtZbb = false;
+ bool HasStdExtZbc = false;
+ bool HasStdExtZbe = false;
+ bool HasStdExtZbf = false;
+ bool HasStdExtZbm = false;
+ bool HasStdExtZbp = false;
+ bool HasStdExtZbr = false;
+ bool HasStdExtZbs = false;
+ bool HasStdExtZbt = false;
+ bool HasStdExtZbproposedc = false;
+ bool HasStdExtV = false;
bool HasRV64 = false;
bool IsRV32E = false;
bool EnableLinkerRelax = false;
- bool EnableRVCHintInstrs = false;
+ bool EnableRVCHintInstrs = true;
+ bool EnableSaveRestore = false;
unsigned XLen = 32;
MVT XLenVT = MVT::i32;
RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
@@ -87,10 +100,23 @@ public:
bool hasStdExtF() const { return HasStdExtF; }
bool hasStdExtD() const { return HasStdExtD; }
bool hasStdExtC() const { return HasStdExtC; }
+ bool hasStdExtB() const { return HasStdExtB; }
+ bool hasStdExtZbb() const { return HasStdExtZbb; }
+ bool hasStdExtZbc() const { return HasStdExtZbc; }
+ bool hasStdExtZbe() const { return HasStdExtZbe; }
+ bool hasStdExtZbf() const { return HasStdExtZbf; }
+ bool hasStdExtZbm() const { return HasStdExtZbm; }
+ bool hasStdExtZbp() const { return HasStdExtZbp; }
+ bool hasStdExtZbr() const { return HasStdExtZbr; }
+ bool hasStdExtZbs() const { return HasStdExtZbs; }
+ bool hasStdExtZbt() const { return HasStdExtZbt; }
+ bool hasStdExtZbproposedc() const { return HasStdExtZbproposedc; }
+ bool hasStdExtV() const { return HasStdExtV; }
bool is64Bit() const { return HasRV64; }
bool isRV32E() const { return IsRV32E; }
bool enableLinkerRelax() const { return EnableLinkerRelax; }
bool enableRVCHintInstrs() const { return EnableRVCHintInstrs; }
+ bool enableSaveRestore() const { return EnableSaveRestore; }
MVT getXLenVT() const { return XLenVT; }
unsigned getXLen() const { return XLen; }
RISCVABI::ABI getTargetABI() const { return TargetABI; }
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index a46a32c4e7f2..8e75647bd4a9 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -20,6 +20,8 @@ include "llvm/TableGen/SearchableTable.td"
class SysReg<string name, bits<12> op> {
string Name = name;
bits<12> Encoding = op;
+ // A maximum of one alias is supported right now.
+ string AltName = name;
// FIXME: add these additional fields when needed.
// Privilege Access: Read and Write = 0, 1, 2; Read-Only = 3.
// Privilege Mode: User = 0, System = 1 or Machine = 3.
@@ -36,7 +38,7 @@ class SysReg<string name, bits<12> op> {
def SysRegsList : GenericTable {
let FilterClass = "SysReg";
// FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed.
- let Fields = [ "Name", "Encoding", "FeaturesRequired", "isRV32Only" ];
+ let Fields = [ "Name", "Encoding", "AltName", "FeaturesRequired", "isRV32Only" ];
let PrimaryKey = [ "Encoding" ];
let PrimaryKeyName = "lookupSysRegByEncoding";
@@ -47,6 +49,11 @@ def lookupSysRegByName : SearchIndex {
let Key = [ "Name" ];
}
+def lookupSysRegByAltName : SearchIndex {
+ let Table = SysRegsList;
+ let Key = [ "AltName" ];
+}
+
// The following CSR encodings match those given in Tables 2.2,
// 2.3, 2.4 and 2.5 in the RISC-V Instruction Set Manual
// Volume II: Privileged Architecture.
@@ -303,6 +310,7 @@ def: SysReg<"mhpmcounter31h", 0xB9F>;
//===--------------------------
// Machine Counter Setup
//===--------------------------
+def : SysReg<"mcountinhibit", 0x320>;
def : SysReg<"mhpmevent3", 0x323>;
def : SysReg<"mhpmevent4", 0x324>;
def : SysReg<"mhpmevent5", 0x325>;
@@ -346,4 +354,19 @@ def : SysReg<"tdata3", 0x7A3>;
//===-----------------------------------------------
def : SysReg<"dcsr", 0x7B0>;
def : SysReg<"dpc", 0x7B1>;
-def : SysReg<"dscratch", 0x7B2>;
+
+// "dscratch" is an alternative name for "dscratch0" which appeared in earlier
+// drafts of the RISC-V debug spec
+let AltName = "dscratch" in
+def : SysReg<"dscratch0", 0x7B2>;
+def : SysReg<"dscratch1", 0x7B3>;
+
+//===-----------------------------------------------
+// User Vector CSRs
+//===-----------------------------------------------
+def : SysReg<"vstart", 0x008>;
+def : SysReg<"vxsat", 0x009>;
+def : SysReg<"vxrm", 0x00A>;
+def : SysReg<"vl", 0xC20>;
+def : SysReg<"vtype", 0xC21>;
+def : SysReg<"vlenb", 0xC22>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index de71c01753de..75683e2fd8e9 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -128,6 +128,7 @@ public:
bool addGlobalInstructionSelect() override;
void addPreEmitPass() override;
void addPreEmitPass2() override;
+ void addPreSched2() override;
void addPreRegAlloc() override;
};
}
@@ -167,13 +168,16 @@ bool RISCVPassConfig::addGlobalInstructionSelect() {
return false;
}
+void RISCVPassConfig::addPreSched2() {}
+
void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
void RISCVPassConfig::addPreEmitPass2() {
+ addPass(createRISCVExpandPseudoPass());
// Schedule the expansion of AMOs at the last possible moment, avoiding the
// possibility for other passes to break the requirements for forward
// progress in the LR/SC block.
- addPass(createRISCVExpandPseudoPass());
+ addPass(createRISCVExpandAtomicPseudoPass());
}
void RISCVPassConfig::addPreRegAlloc() {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp
index bbd45c970d3d..fba86b463764 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp
@@ -17,7 +17,6 @@ using namespace llvm;
void RISCVELFTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
SmallDataSection = getContext().getELFSection(
".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
@@ -105,10 +104,11 @@ bool RISCVELFTargetObjectFile::isConstantInSmallSection(
MCSection *RISCVELFTargetObjectFile::getSectionForConstant(
const DataLayout &DL, SectionKind Kind, const Constant *C,
- unsigned &Align) const {
+ Align &Alignment) const {
if (isConstantInSmallSection(DL, C))
return SmallDataSection;
// Otherwise, we work the same as ELF.
- return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align);
+ return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C,
+ Alignment);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetObjectFile.h
index b2daaaa9d364..830a7d813c15 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetObjectFile.h
@@ -12,7 +12,6 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
namespace llvm {
-class RISCVTargetMachine;
/// This implementation is used for RISCV ELF targets.
class RISCVELFTargetObjectFile : public TargetLoweringObjectFileELF {
@@ -36,7 +35,7 @@ public:
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
- unsigned &Align) const override;
+ Align &Alignment) const override;
void getModuleMetadata(Module &M) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 90fcd679c523..bd78f801c59a 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -15,7 +15,8 @@ using namespace llvm;
#define DEBUG_TYPE "riscvtti"
-int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy() &&
"getIntImmCost can only estimate cost of materialising integers");
@@ -30,7 +31,7 @@ int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
}
int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+ Type *Ty, TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy() &&
"getIntImmCost can only estimate cost of materialising integers");
@@ -78,7 +79,7 @@ int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &
}
// Otherwise, use the full materialisation cost.
- return getIntImmCost(Imm, Ty);
+ return getIntImmCost(Imm, Ty, CostKind);
}
// By default, prevent hoisting.
@@ -86,7 +87,8 @@ int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &
}
int RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
// Prevent hoisting in unknown cases.
return TTI::TCC_Free;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index d219ba81bb56..392700707760 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -41,12 +41,13 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
- int getIntImmCost(const APInt &Imm, Type *Ty);
- int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ Type *Ty, TTI::TargetCostKind CostKind);
};
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H \ No newline at end of file
+#endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
index d36c528bba1e..4e6cdd8606b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
@@ -45,7 +45,7 @@ enum {
InstFormatCJ = 16,
InstFormatOther = 17,
- InstFormatMask = 31
+ InstFormatMask = 31,
};
// RISC-V Specific Machine Operand Flags
@@ -157,6 +157,7 @@ namespace RISCVSysReg {
struct SysReg {
const char *Name;
unsigned Encoding;
+ const char *AltName;
// FIXME: add these additional fields when needed.
// Privilege Access: Read, Write, Read-Only.
// unsigned ReadWrite;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 2d3137f38821..16e159621672 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -68,6 +68,8 @@ class SparcAsmParser : public MCTargetAsmParser {
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
@@ -600,7 +602,7 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
for (const MCInst &I : Instructions) {
- Out.EmitInstruction(I, getSTI());
+ Out.emitInstruction(I, getSTI());
}
return false;
}
@@ -630,20 +632,29 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
bool SparcAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
+ if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ return Error(StartLoc, "invalid register name");
+ return false;
+}
+
+OperandMatchResultTy SparcAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = Parser.getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
RegNo = 0;
if (getLexer().getKind() != AsmToken::Percent)
- return false;
+ return MatchOperand_Success;
Parser.Lex();
unsigned regKind = SparcOperand::rk_None;
if (matchRegisterName(Tok, RegNo, regKind)) {
Parser.Lex();
- return false;
+ return MatchOperand_Success;
}
- return Error(StartLoc, "invalid register name");
+ getLexer().UnLex(Tok);
+ return MatchOperand_NoMatch;
}
static void applyMnemonicAliases(StringRef &Mnemonic,
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonFeatures.td b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonFeatures.td
index e0ea4e9c7645..75273eff1868 100755
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonFeatures.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonFeatures.td
@@ -16,9 +16,9 @@
//support to casa instruction; for leon3 subtarget only
def UMACSMACSupport : SubtargetFeature<
- "hasumacsmac",
- "HasUmacSmac",
- "true",
+ "hasumacsmac",
+ "HasUmacSmac",
+ "true",
"Enable UMAC and SMAC for LEON3 and LEON4 processors"
>;
@@ -30,9 +30,9 @@ def UMACSMACSupport : SubtargetFeature<
//support to casa instruction; for leon3 subtarget only
def LeonCASA : SubtargetFeature<
- "hasleoncasa",
- "HasLeonCasa",
- "true",
+ "hasleoncasa",
+ "HasLeonCasa",
+ "true",
"Enable CASA instruction for LEON3 and LEON4 processors"
>;
@@ -40,7 +40,7 @@ def InsertNOPLoad: SubtargetFeature<
"insertnopload",
"InsertNOPLoad",
"true",
- "LEON3 erratum fix: Insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction"
+ "LEON3 erratum fix: Insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction"
>;
def DetectRoundChange : SubtargetFeature<
@@ -55,7 +55,7 @@ def FixAllFDIVSQRT : SubtargetFeature<
"fixallfdivsqrt",
"FixAllFDIVSQRT",
"true",
- "LEON erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store"
+ "LEON erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store"
>;
def LeonCycleCounter
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 2e8fa0dbaf4c..83c44e0682ce 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -15,6 +15,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -270,8 +271,8 @@ namespace {
llvm_unreachable("fixupNeedsRelaxation() unimplemented");
return false;
}
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override {
// FIXME.
llvm_unreachable("relaxInstruction() unimplemented");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
index 8a673de69911..f6728a070736 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
@@ -46,7 +46,8 @@ void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const
void SparcInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot, const MCSubtargetInfo &STI,
raw_ostream &O) {
- if (!printAliasInstr(MI, STI, O) && !printSparcAliasInstr(MI, STI, O))
+ if (!printAliasInstr(MI, Address, STI, O) &&
+ !printSparcAliasInstr(MI, STI, O))
printInstruction(MI, Address, STI, O);
printAnnotation(O, Annot);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
index cb85fe98ed42..11587f165ef2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
@@ -33,10 +33,10 @@ public:
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
- bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
- raw_ostream &O);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx,
+ bool printAliasInstr(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
const MCSubtargetInfo &STI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 1a2a040990ae..c5cc2ea34bb7 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -42,8 +42,6 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Triple &TheTriple) {
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
-
- UseIntegratedAssembler = true;
}
const MCExpr*
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 7eb27f55baac..fb2bcdc6c91b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -37,7 +37,7 @@ static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT);
unsigned Reg = MRI.getDwarfRegNum(SP::O6, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, Reg, 0);
MAI->addInitialFrameState(Inst);
return MAI;
}
@@ -47,7 +47,7 @@ static MCAsmInfo *createSparcV9MCAsmInfo(const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT);
unsigned Reg = MRI.getDwarfRegNum(SP::O6, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 2047);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, Reg, 2047);
MAI->addInitialFrameState(Inst);
return MAI;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index e5699bb1c133..f360946b9a79 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -27,10 +27,6 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class MCTargetOptions;
class Target;
-class Triple;
-class StringRef;
-class raw_pwrite_stream;
-class raw_ostream;
MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.h b/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.h
index 967c463f5281..aabc4f149829 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.h
@@ -21,7 +21,6 @@
namespace llvm {
class FunctionPass;
class SparcTargetMachine;
- class formatted_raw_ostream;
class AsmPrinter;
class MCInst;
class MachineInstr;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.td
index ca6147edc46b..da95602309a1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/Sparc.td
@@ -150,7 +150,7 @@ def : Processor<"ut699", LEON3Itineraries,
[FeatureLeon, InsertNOPLoad, FeatureNoFSMULD, FeatureNoFMULS, FixAllFDIVSQRT]>;
// LEON3 FT (GR712RC). Provides features for the GR712RC processor.
-// - covers all the erratum fixed for LEON3 and support for the CASA instruction.
+// - covers all the erratum fixed for LEON3 and support for the CASA instruction.
def : Processor<"gr712rc", LEON3Itineraries,
[FeatureLeon, LeonCASA]>;
@@ -158,9 +158,9 @@ def : Processor<"gr712rc", LEON3Itineraries,
def : Processor<"leon4", LEON4Itineraries,
[FeatureLeon, UMACSMACSupport, LeonCASA]>;
-// LEON 4 FT (GR740)
+// LEON 4 FT (GR740)
// TO DO: Place-holder: Processor specific features will be added *very* soon here.
-def : Processor<"gr740", LEON4Itineraries,
+def : Processor<"gr740", LEON4Itineraries,
[FeatureLeon, UMACSMACSupport, LeonCASA, LeonCycleCounter,
FeaturePWRPSR]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index f0caf3bc284f..069e43c6f544 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -52,8 +52,8 @@ namespace {
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
const char *Modifier = nullptr);
- void EmitFunctionBodyStart() override;
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitFunctionBodyStart() override;
+ void emitInstruction(const MachineInstr *MI) override;
static const char *getRegisterName(unsigned RegNo) {
return SparcInstPrinter::getRegisterName(RegNo);
@@ -108,7 +108,7 @@ static void EmitCall(MCStreamer &OutStreamer,
MCInst CallInst;
CallInst.setOpcode(SP::CALL);
CallInst.addOperand(Callee);
- OutStreamer.EmitInstruction(CallInst, STI);
+ OutStreamer.emitInstruction(CallInst, STI);
}
static void EmitSETHI(MCStreamer &OutStreamer,
@@ -119,7 +119,7 @@ static void EmitSETHI(MCStreamer &OutStreamer,
SETHIInst.setOpcode(SP::SETHIi);
SETHIInst.addOperand(RD);
SETHIInst.addOperand(Imm);
- OutStreamer.EmitInstruction(SETHIInst, STI);
+ OutStreamer.emitInstruction(SETHIInst, STI);
}
static void EmitBinary(MCStreamer &OutStreamer, unsigned Opcode,
@@ -131,7 +131,7 @@ static void EmitBinary(MCStreamer &OutStreamer, unsigned Opcode,
Inst.addOperand(RD);
Inst.addOperand(RS1);
Inst.addOperand(Src2);
- OutStreamer.EmitInstruction(Inst, STI);
+ OutStreamer.emitInstruction(Inst, STI);
}
static void EmitOR(MCStreamer &OutStreamer,
@@ -233,15 +233,15 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
// or <MO>, %lo(_GLOBAL_OFFSET_TABLE_+(<EndLabel>-<StartLabel>))), <MO>
// add <MO>, %o7, <MO>
- OutStreamer->EmitLabel(StartLabel);
+ OutStreamer->emitLabel(StartLabel);
MCOperand Callee = createPCXCallOP(EndLabel, OutContext);
EmitCall(*OutStreamer, Callee, STI);
- OutStreamer->EmitLabel(SethiLabel);
+ OutStreamer->emitLabel(SethiLabel);
MCOperand hiImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_PC22,
GOTLabel, StartLabel, SethiLabel,
OutContext);
EmitSETHI(*OutStreamer, hiImm, MCRegOP, STI);
- OutStreamer->EmitLabel(EndLabel);
+ OutStreamer->emitLabel(EndLabel);
MCOperand loImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_PC10,
GOTLabel, StartLabel, EndLabel,
OutContext);
@@ -249,8 +249,7 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
EmitADD(*OutStreamer, MCRegOP, RegO7, MCRegOP, STI);
}
-void SparcAsmPrinter::EmitInstruction(const MachineInstr *MI)
-{
+void SparcAsmPrinter::emitInstruction(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default: break;
@@ -270,7 +269,7 @@ void SparcAsmPrinter::EmitInstruction(const MachineInstr *MI)
} while ((++I != E) && I->isInsideBundle()); // Delay slot check.
}
-void SparcAsmPrinter::EmitFunctionBodyStart() {
+void SparcAsmPrinter::emitFunctionBodyStart() {
if (!MF->getSubtarget<SparcSubtarget>().is64Bit())
return;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcCallingConv.td
index 4be432211f1d..db540d6f0c42 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -67,7 +67,7 @@ def RetCC_Sparc32 : CallingConv<[
// bits of an integer register while the float goes in a floating point
// register.
//
-// The difference is encoded in LLVM IR using the inreg atttribute on function
+// The difference is encoded in LLVM IR using the inreg attribute on function
// arguments:
//
// C: void f(float, float);
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index 0f74f2bb344c..8d8424641cd9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -104,7 +104,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
// rather than reporting an error, as would be sensible. This is
// poor, but fixing that bogosity is going to be a large project.
// For now, just see if it's lied, and report an error here.
- if (!NeedsStackRealignment && MFI.getMaxAlignment() > getStackAlignment())
+ if (!NeedsStackRealignment && MFI.getMaxAlign() > getStackAlign())
report_fatal_error("Function \"" + Twine(MF.getName()) + "\" required "
"stack re-alignment, but LLVM couldn't handle it "
"(probably because it has a dynamic alloca).");
@@ -146,9 +146,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
// Finally, ensure that the size is sufficiently aligned for the
// data on the stack.
- if (MFI.getMaxAlignment() > 0) {
- NumBytes = alignTo(NumBytes, MFI.getMaxAlignment());
- }
+ NumBytes = alignTo(NumBytes, MFI.getMaxAlign());
// Update stack size with corrected value.
MFI.setStackSize(NumBytes);
@@ -189,9 +187,10 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
regUnbiased = SP::O6;
// andn %regUnbiased, MaxAlign-1, %regUnbiased
- int MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
BuildMI(MBB, MBBI, dl, TII.get(SP::ANDNri), regUnbiased)
- .addReg(regUnbiased).addImm(MaxAlign - 1);
+ .addReg(regUnbiased)
+ .addImm(MaxAlign.value() - 1U);
if (Bias) {
// add %g1, -BIAS, %o6
@@ -258,9 +257,9 @@ bool SparcFrameLowering::hasFP(const MachineFunction &MF) const {
MFI.isFrameAddressTaken();
}
-
-int SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const {
+int SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ Register &FrameReg) const {
const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const SparcRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.h
index 8e6001da05db..3ec9dc8b85dd 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -39,7 +39,7 @@ public:
RegScavenger *RS = nullptr) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
/// targetHandlesStackFrameRounding - Returns true if the target is
/// responsible for rounding up the stack frame (probably at emitPrologue
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index d853d0608519..116352e08382 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -59,23 +59,21 @@ static bool CC_Sparc_Assign_Split_64(unsigned &ValNo, MVT &ValVT,
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
// Try to get first reg.
- if (unsigned Reg = State.AllocateReg(RegList)) {
+ if (Register Reg = State.AllocateReg(RegList)) {
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
} else {
// Assign whole thing in stack.
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(8,4),
- LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomMem(
+ ValNo, ValVT, State.AllocateStack(8, Align(4)), LocVT, LocInfo));
return true;
}
// Try to get second reg.
- if (unsigned Reg = State.AllocateReg(RegList))
+ if (Register Reg = State.AllocateReg(RegList))
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(4,4),
- LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomMem(
+ ValNo, ValVT, State.AllocateStack(4, Align(4)), LocVT, LocInfo));
return true;
}
@@ -88,13 +86,13 @@ static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT,
};
// Try to get first reg.
- if (unsigned Reg = State.AllocateReg(RegList))
+ if (Register Reg = State.AllocateReg(RegList))
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else
return false;
// Try to get second reg.
- if (unsigned Reg = State.AllocateReg(RegList))
+ if (Register Reg = State.AllocateReg(RegList))
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
else
return false;
@@ -112,7 +110,7 @@ static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
// Stack space is allocated for all arguments starting from [%fp+BIAS+128].
unsigned size = (LocVT == MVT::f128) ? 16 : 8;
- unsigned alignment = (LocVT == MVT::f128) ? 16 : 8;
+ Align alignment = (LocVT == MVT::f128) ? Align(16) : Align(8);
unsigned Offset = State.AllocateStack(size, alignment);
unsigned Reg = 0;
@@ -152,7 +150,7 @@ static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations");
- unsigned Offset = State.AllocateStack(4, 4);
+ unsigned Offset = State.AllocateStack(4, Align(4));
if (LocVT == MVT::f32 && Offset < 16*8) {
// Promote floats to %f0-%f31.
@@ -266,7 +264,7 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain, CallingConv::ID CallConv,
// If the function returns a struct, copy the SRetReturnReg to I0
if (MF.getFunction().hasStructRetAttr()) {
SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
- unsigned Reg = SFI->getSRetReturnReg();
+ Register Reg = SFI->getSRetReturnReg();
if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
auto PtrVT = getPointerTy(DAG.getDataLayout());
@@ -431,7 +429,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo());
} else {
- unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
+ Register loReg = MF.addLiveIn(NextVA.getLocReg(),
&SP::IntRegsRegClass);
LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
}
@@ -522,7 +520,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
if (MF.getFunction().hasStructRetAttr()) {
// Copy the SRet Argument to SRetReturnReg.
SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
- unsigned Reg = SFI->getSRetReturnReg();
+ Register Reg = SFI->getSRetReturnReg();
if (!Reg) {
Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass);
SFI->setSRetReturnReg(Reg);
@@ -597,7 +595,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_64(
// All integer register arguments are promoted by the caller to i64.
// Create a virtual register for the promoted live-in value.
- unsigned VReg = MF.addLiveIn(VA.getLocReg(),
+ Register VReg = MF.addLiveIn(VA.getLocReg(),
getRegClassFor(VA.getLocVT()));
SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
@@ -668,7 +666,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_64(
// of how many arguments were actually passed.
SmallVector<SDValue, 8> OutChains;
for (; ArgOffset < 6*8; ArgOffset += 8) {
- unsigned VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass);
+ Register VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass);
SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
int FI = MF.getFrameInfo().CreateFixedObject(8, ArgOffset + ArgArea, true);
auto PtrVT = getPointerTy(MF.getDataLayout());
@@ -692,9 +690,9 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee,
- ImmutableCallSite CS) {
- if (CS)
- return CS.hasFnAttr(Attribute::ReturnsTwice);
+ const CallBase *Call) {
+ if (Call)
+ return Call->hasFnAttr(Attribute::ReturnsTwice);
const Function *CalleeFn = nullptr;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
@@ -753,14 +751,14 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
SDValue Arg = OutVals[i];
unsigned Size = Flags.getByValSize();
- unsigned Align = Flags.getByValAlign();
+ Align Alignment = Flags.getNonZeroByValAlign();
if (Size > 0U) {
- int FI = MFI.CreateStackObject(Size, Align, false);
+ int FI = MFI.CreateStackObject(Size, Alignment, false);
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue SizeNode = DAG.getConstant(Size, dl, MVT::i32);
- Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
+ Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Alignment,
false, // isVolatile,
(Size <= 32), // AlwaysInline if size <= 32,
false, // isTailCall
@@ -931,12 +929,12 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- unsigned Reg = toCallerWindow(RegsToPass[i].first);
+ Register Reg = toCallerWindow(RegsToPass[i].first);
Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
- bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CS);
+ bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CB);
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
@@ -1018,7 +1016,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// this table could be generated automatically from RegInfo.
Register SparcTargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
- Register Reg = StringSwitch<unsigned>(RegName)
+ Register Reg = StringSwitch<Register>(RegName)
.Case("i0", SP::I0).Case("i1", SP::I1).Case("i2", SP::I2).Case("i3", SP::I3)
.Case("i4", SP::I4).Case("i5", SP::I5).Case("i6", SP::I6).Case("i7", SP::I7)
.Case("o0", SP::O0).Case("o1", SP::O1).Case("o2", SP::O2).Case("o3", SP::O3)
@@ -1060,7 +1058,7 @@ static void fixupVariableFloatArgs(SmallVectorImpl<CCValAssign> &ArgLocs,
CCValAssign NewVA;
// Determine the offset into the argument array.
- unsigned firstReg = (ValTy == MVT::f64) ? SP::D0 : SP::Q0;
+ Register firstReg = (ValTy == MVT::f64) ? SP::D0 : SP::Q0;
unsigned argSize = (ValTy == MVT::f64) ? 8 : 16;
unsigned Offset = argSize * (VA.getLocReg() - firstReg);
assert(Offset < 16*8 && "Offset out of range, bad register enum?");
@@ -1127,7 +1125,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Collect the set of registers to pass to the function and their values.
// This will be emitted as a sequence of CopyToReg nodes glued to the call
// instruction.
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
// Collect chains from all the memory opeations that copy arguments to the
// stack. They must follow the stack pointer adjustment above and precede the
@@ -1243,7 +1241,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
SDValue Callee = CLI.Callee;
- bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CS);
+ bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CB);
unsigned TF = isPositionIndependent() ? SparcMCExpr::VK_Sparc_WPLT30 : 0;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT, 0, TF);
@@ -1292,7 +1290,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Set inreg flag manually for codegen generated library calls that
// return float.
- if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CS)
+ if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
CLI.Ins[0].Flags.setInReg();
RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_Sparc64);
@@ -1467,6 +1465,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
// Turn FP extload into load/fpextend
for (MVT VT : MVT::fp_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
}
@@ -1476,6 +1475,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
// Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
@@ -1517,6 +1518,12 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ // Lower f16 conversion operations into library calls
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
@@ -1906,10 +1913,8 @@ SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF,
GA->getOffset(), TF);
if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
- return DAG.getTargetConstantPool(CP->getConstVal(),
- CP->getValueType(0),
- CP->getAlignment(),
- CP->getOffset(), TF);
+ return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
+ CP->getAlign(), CP->getOffset(), TF);
if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
return DAG.getTargetBlockAddress(BA->getBlockAddress(),
@@ -2131,7 +2136,7 @@ SDValue SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain,
if (ArgTy->isFP128Ty()) {
// Create a stack object and pass the pointer to the library function.
- int FI = MFI.CreateStackObject(16, 8, false);
+ int FI = MFI.CreateStackObject(16, Align(8), false);
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
Chain = DAG.getStore(Chain, DL, Entry.Node, FIPtr, MachinePointerInfo(),
/* Alignment = */ 8);
@@ -2162,7 +2167,7 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
if (RetTy->isFP128Ty()) {
// Create a Stack Object to receive the return value of type f128.
ArgListEntry Entry;
- int RetFI = MFI.CreateStackObject(16, 8, false);
+ int RetFI = MFI.CreateStackObject(16, Align(8), false);
RetPtr = DAG.getFrameIndex(RetFI, PtrVT);
Entry.Node = RetPtr;
Entry.Ty = PointerType::getUnqual(RetTy);
@@ -2239,54 +2244,54 @@ SDValue SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
switch(SPCC) {
default: {
- SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType());
+ SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_UL : {
SDValue Mask = DAG.getConstant(1, DL, Result.getValueType());
Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
- SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType());
+ SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_ULE: {
- SDValue RHS = DAG.getTargetConstant(2, DL, Result.getValueType());
+ SDValue RHS = DAG.getConstant(2, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_UG : {
- SDValue RHS = DAG.getTargetConstant(1, DL, Result.getValueType());
+ SDValue RHS = DAG.getConstant(1, DL, Result.getValueType());
SPCC = SPCC::ICC_G;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_UGE: {
- SDValue RHS = DAG.getTargetConstant(1, DL, Result.getValueType());
+ SDValue RHS = DAG.getConstant(1, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_U : {
- SDValue RHS = DAG.getTargetConstant(3, DL, Result.getValueType());
+ SDValue RHS = DAG.getConstant(3, DL, Result.getValueType());
SPCC = SPCC::ICC_E;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_O : {
- SDValue RHS = DAG.getTargetConstant(3, DL, Result.getValueType());
+ SDValue RHS = DAG.getConstant(3, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_LG : {
SDValue Mask = DAG.getConstant(3, DL, Result.getValueType());
Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
- SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType());
+ SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
SPCC = SPCC::ICC_NE;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
case SPCC::FCC_UE : {
SDValue Mask = DAG.getConstant(3, DL, Result.getValueType());
Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
- SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType());
+ SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
SPCC = SPCC::ICC_E;
return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
}
@@ -2544,15 +2549,16 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
const SparcSubtarget *Subtarget) {
SDValue Chain = Op.getOperand(0); // Legalize the chain.
SDValue Size = Op.getOperand(1); // Legalize the size.
- unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- unsigned StackAlign = Subtarget->getFrameLowering()->getStackAlignment();
+ MaybeAlign Alignment =
+ cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
+ Align StackAlign = Subtarget->getFrameLowering()->getStackAlign();
EVT VT = Size->getValueType(0);
SDLoc dl(Op);
// TODO: implement over-aligned alloca. (Note: also implies
// supporting support for overaligned function frames + dynamic
// allocations, at all, which currently isn't supported)
- if (Align > StackAlign) {
+ if (Alignment && *Alignment > StackAlign) {
const MachineFunction &MF = DAG.getMachineFunction();
report_fatal_error("Function \"" + Twine(MF.getName()) + "\": "
"over-aligned dynamic alloca not supported.");
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h
index 2838ca4bdc66..c6d0011b88a5 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -103,14 +103,14 @@ namespace llvm {
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
return SP::I0;
}
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
return SP::I1;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrAliases.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrAliases.td
index d4d056ea0af6..4a0e8c856f27 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrAliases.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrAliases.td
@@ -281,7 +281,7 @@ defm : int_cond_alias<"pos", 0b1110>;
defm : int_cond_alias<"neg", 0b0110>;
defm : int_cond_alias<"vc", 0b1111>;
defm : int_cond_alias<"vs", 0b0111>;
-let EmitPriority = 0 in
+let EmitPriority = 0 in
{
defm : int_cond_alias<"", 0b1000>; // same as a; gnu asm, not in manual
defm : int_cond_alias<"nz", 0b1001>; // same as ne
@@ -306,7 +306,7 @@ defm : fp_cond_alias<"uge", 0b1100>;
defm : fp_cond_alias<"le", 0b1101>;
defm : fp_cond_alias<"ule", 0b1110>;
defm : fp_cond_alias<"o", 0b1111>;
-let EmitPriority = 0 in
+let EmitPriority = 0 in
{
defm : fp_cond_alias<"", 0b1000>; // same as a; gnu asm, not in manual
defm : fp_cond_alias<"nz", 0b0001>; // same as ne
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td
index fbf08b49d60c..2d8f063f7ed1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -24,7 +24,7 @@ class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern,
let DecoderNamespace = "Sparc";
field bits<32> SoftFail = 0;
-
+
let Itinerary = itin;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 31185aa508af..dc3a41c63098 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -393,7 +393,7 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
void SparcInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
+ Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
@@ -403,7 +403,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const MachineFrameInfo &MFI = MF->getFrameInfo();
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
// On the order of operands here: think "[FrameIdx + 0] = SrcReg".
if (RC == &SP::I64RegsRegClass)
@@ -432,7 +432,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void SparcInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
+ Register DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
@@ -442,7 +442,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const MachineFrameInfo &MFI = MF->getFrameInfo();
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
if (RC == &SP::I64RegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDXri), DestReg).addFrameIndex(FI).addImm(0)
@@ -468,11 +468,10 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Can't load this register from stack slot");
}
-unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const
-{
+Register SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
SparcMachineFunctionInfo *SparcFI = MF->getInfo<SparcMachineFunctionInfo>();
- unsigned GlobalBaseReg = SparcFI->getGlobalBaseReg();
- if (GlobalBaseReg != 0)
+ Register GlobalBaseReg = SparcFI->getGlobalBaseReg();
+ if (GlobalBaseReg)
return GlobalBaseReg;
// Insert the set of GlobalBaseReg into the first MBB of the function
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.h
index f0b3dde6dec3..b25de8e5a690 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -86,17 +86,17 @@ public:
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- unsigned getGlobalBaseReg(MachineFunction *MF) const;
+ Register getGlobalBaseReg(MachineFunction *MF) const;
// Lower pseudo instructions after register allocation.
bool expandPostRAPseudo(MachineInstr &MI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
index f26f4a1c1a84..8b01313c7911 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -27,12 +27,12 @@ def Is32Bit : Predicate<"!Subtarget->is64Bit()">;
def Is64Bit : Predicate<"Subtarget->is64Bit()">;
def UseSoftMulDiv : Predicate<"Subtarget->useSoftMulDiv()">,
- AssemblerPredicate<"FeatureSoftMulDiv">;
+ AssemblerPredicate<(all_of FeatureSoftMulDiv)>;
// HasV9 - This predicate is true when the target processor supports V9
// instructions. Note that the machine may be running in 32-bit mode.
def HasV9 : Predicate<"Subtarget->isV9()">,
- AssemblerPredicate<"FeatureV9">;
+ AssemblerPredicate<(all_of FeatureV9)>;
// HasNoV9 - This predicate is true when the target doesn't have V9
// instructions. Use of this is just a hack for the isel not having proper
@@ -41,11 +41,11 @@ def HasNoV9 : Predicate<"!Subtarget->isV9()">;
// HasVIS - This is true when the target processor has VIS extensions.
def HasVIS : Predicate<"Subtarget->isVIS()">,
- AssemblerPredicate<"FeatureVIS">;
+ AssemblerPredicate<(all_of FeatureVIS)>;
def HasVIS2 : Predicate<"Subtarget->isVIS2()">,
- AssemblerPredicate<"FeatureVIS2">;
+ AssemblerPredicate<(all_of FeatureVIS2)>;
def HasVIS3 : Predicate<"Subtarget->isVIS3()">,
- AssemblerPredicate<"FeatureVIS3">;
+ AssemblerPredicate<(all_of FeatureVIS3)>;
// HasHardQuad - This is true when the target processor supports quad floating
// point instructions.
@@ -58,7 +58,7 @@ def HasLeonCASA : Predicate<"Subtarget->hasLeonCasa()">;
// HasPWRPSR - This is true when the target processor supports partial
// writes to the PSR register that only affects the ET field.
def HasPWRPSR : Predicate<"Subtarget->hasPWRPSR()">,
- AssemblerPredicate<"FeaturePWRPSR">;
+ AssemblerPredicate<(all_of FeaturePWRPSR)>;
// HasUMAC_SMAC - This is true when the target processor supports the
// UMAC and SMAC instructions
@@ -529,7 +529,7 @@ let DecoderMethod = "DecodeLoadCP", Defs = [CPSR] in {
"ld [$addr], %csr", []>;
}
}
-
+
let DecoderMethod = "DecodeLoadFP" in
let Defs = [FSR] in {
let rd = 0 in {
@@ -571,12 +571,12 @@ let DecoderMethod = "DecodeStoreQFP" in
defm STQF : StoreA<"stq", 0b100110, 0b110110, store, QFPRegs, f128>,
Requires<[HasV9, HasHardQuad]>;
-let DecoderMethod = "DecodeStoreCP" in
- defm STC : Store<"st", 0b110100, store, CoprocRegs, i32>;
-
-let DecoderMethod = "DecodeStoreCPPair" in
+let DecoderMethod = "DecodeStoreCP" in
+ defm STC : Store<"st", 0b110100, store, CoprocRegs, i32>;
+
+let DecoderMethod = "DecodeStoreCPPair" in
defm STDC : Store<"std", 0b110111, store, CoprocPair, v2i32, IIC_std>;
-
+
let DecoderMethod = "DecodeStoreCP", rd = 0 in {
let Defs = [CPSR] in {
def STCSRrr : F3_1<3, 0b110101, (outs MEMrr:$addr), (ins),
@@ -897,7 +897,7 @@ def CBCOND : CPBranchSP<(ins brtarget:$imm22, CCOp:$cond),
[(SPbrfcc bb:$imm22, imm:$cond)]>;
def CBCONDA : CPBranchSPA<(ins brtarget:$imm22, CCOp:$cond),
"cb$cond,a $imm22", []>;
-
+
// Section B.24 - Call and Link Instruction, p. 125
// This is the only Format 1 instruction
let Uses = [O6],
@@ -1080,7 +1080,7 @@ let hasSideEffects = 1, rd = 0, rs1 = 0b01111, rs2 = 0 in
def STBAR : F3_1<2, 0b101000, (outs), (ins), "stbar", []>;
-// Section B.31 - Unimplmented Instruction
+// Section B.31 - Unimplemented Instruction
let rd = 0 in
def UNIMP : F2_1<0b000, (outs), (ins i32imm:$imm22),
"unimp $imm22", []>;
@@ -1186,7 +1186,7 @@ def FABSS : F3_3u<2, 0b110100, 0b000001001,
// Floating-point Square Root Instructions, p.145
// FSQRTS generates an erratum on LEON processors, so by disabling this instruction
// this will be promoted to use FSQRTD with doubles instead.
-let Predicates = [HasNoFdivSqrtFix] in
+let Predicates = [HasNoFdivSqrtFix] in
def FSQRTS : F3_3u<2, 0b110100, 0b000101001,
(outs FPRegs:$rd), (ins FPRegs:$rs2),
"fsqrts $rs2, $rd",
@@ -1515,8 +1515,8 @@ let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in
def MEMBARi : F3_2<2, 0b101000, (outs), (ins MembarTag:$simm13),
"membar $simm13", []>;
-// The CAS instruction, unlike other instructions, only comes in a
-// form which requires an ASI be provided. The ASI value hardcoded
+// The CAS instruction, unlike other instructions, only comes in a
+// form which requires an ASI be provided. The ASI value hardcoded
// here is ASI_PRIMARY, the default unprivileged ASI for SparcV9.
let Predicates = [HasV9], Constraints = "$swap = $rd", asi = 0b10000000 in
def CASrr: F3_1_asi<3, 0b111100,
@@ -1536,18 +1536,18 @@ let Predicates = [HasLeonCASA], Constraints = "$swap = $rd", asi = 0b00001010 in
"casa [$rs1] 10, $rs2, $rd",
[(set i32:$rd,
(atomic_cmp_swap_32 iPTR:$rs1, i32:$rs2, i32:$swap))]>;
-
+
// CASA supported on some LEON3 and all LEON4 processors. Same pattern as
// CASrr, above, but with a different ASI. This version is supported for
-// inline assembly lowering only.
+// inline assembly lowering only.
let Predicates = [HasLeonCASA], Constraints = "$swap = $rd" in
def CASArr: F3_1_asi<3, 0b111100,
(outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2,
IntRegs:$swap, i8imm:$asi),
"casa [$rs1] $asi, $rs2, $rd", []>;
-
+
// TODO: Add DAG sequence to lower these instructions. Currently, only provided
-// as inline assembler-supported instructions.
+// as inline assembler-supported instructions.
let Predicates = [HasUMAC_SMAC], Defs = [Y, ASR18], Uses = [Y, ASR18] in {
def SMACrr : F3_1<2, 0b111111,
(outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2, ASRRegs:$asr18),
@@ -1558,12 +1558,12 @@ let Predicates = [HasUMAC_SMAC], Defs = [Y, ASR18], Uses = [Y, ASR18] in {
(outs IntRegs:$rd), (ins IntRegs:$rs1, simm13Op:$simm13, ASRRegs:$asr18),
"smac $rs1, $simm13, $rd",
[], IIC_smac_umac>;
-
+
def UMACrr : F3_1<2, 0b111110,
(outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2, ASRRegs:$asr18),
"umac $rs1, $rs2, $rd",
[], IIC_smac_umac>;
-
+
def UMACri : F3_2<2, 0b111110,
(outs IntRegs:$rd), (ins IntRegs:$rs1, simm13Op:$simm13, ASRRegs:$asr18),
"umac $rs1, $simm13, $rd",
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
index fe5705878693..d557c8ea22e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -19,14 +19,14 @@ namespace llvm {
class SparcMachineFunctionInfo : public MachineFunctionInfo {
virtual void anchor();
private:
- unsigned GlobalBaseReg;
+ Register GlobalBaseReg;
/// VarArgsFrameOffset - Frame offset to start of varargs area.
int VarArgsFrameOffset;
/// SRetReturnReg - Holds the virtual register into which the sret
/// argument is passed.
- unsigned SRetReturnReg;
+ Register SRetReturnReg;
/// IsLeafProc - True if the function is a leaf procedure.
bool IsLeafProc;
@@ -38,14 +38,14 @@ namespace llvm {
: GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0),
IsLeafProc(false) {}
- unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
- void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+ Register getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; }
int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
- unsigned getSRetReturnReg() const { return SRetReturnReg; }
- void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+ Register getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(Register Reg) { SRetReturnReg = Reg; }
void setLeafProc(bool rhs) { IsLeafProc = rhs; }
bool isLeafProc() const { return IsLeafProc; }
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index 19a90e98db7e..990dbe23e7ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -173,7 +173,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
const SparcFrameLowering *TFI = getFrameLowering(MF);
- unsigned FrameReg;
+ Register FrameReg;
int Offset;
Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.td
index 98959d512955..8225bc21e8fe 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -359,14 +359,14 @@ let isAllocatable = 0 in {
// Ancillary state registers
def ASRRegs : RegisterClass<"SP", [i32], 32,
(add Y, (sequence "ASR%u", 1, 31))>;
-
+
// This register class should not be used to hold i64 values.
def CoprocRegs : RegisterClass<"SP", [i32], 32,
(add (sequence "C%u", 0, 31))>;
// Should be in the same order as CoprocRegs.
def CoprocPair : RegisterClass<"SP", [v2i32], 64,
- (add C0_C1, C2_C3, C4_C5, C6_C7,
+ (add C0_C1, C2_C3, C4_C5, C6_C7,
C8_C9, C10_C11, C12_C13, C14_C15,
C16_C17, C18_C19, C20_C21, C22_C23,
C24_C25, C26_C27, C28_C29, C30_C31)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSchedule.td b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSchedule.td
index 31e43c9bd95d..0f05372b7050 100755
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSchedule.td
@@ -1,4 +1,4 @@
-//===-- SparcSchedule.td - Describe the Sparc Itineries ----*- tablegen -*-===//
+//===-- SparcSchedule.td - Describe the Sparc Itineraries ----*- tablegen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index 075a002a358d..dbc6cf8e5b86 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -50,7 +50,7 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
HasLeonCycleCounter = false;
// Determine default and user specified characteristics
- std::string CPUName = CPU;
+ std::string CPUName = std::string(CPU);
if (CPUName.empty())
CPUName = (Is64Bit) ? "v9" : "v8";
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 76f387842f73..d48d94e2faf1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -99,7 +99,8 @@ SparcTargetMachine::SparcTargetMachine(
CM, getEffectiveRelocModel(RM), is64bit, JIT),
OL),
TLOF(std::make_unique<SparcELFTargetObjectFile>()),
- Subtarget(TT, CPU, FS, *this, is64bit), is64Bit(is64bit) {
+ Subtarget(TT, std::string(CPU), std::string(FS), *this, is64bit),
+ is64Bit(is64bit) {
initAsmInfo();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetObjectFile.cpp
index e6ad4d2d67aa..c03510fa090d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcTargetObjectFile.cpp
@@ -11,13 +11,13 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
void SparcELFTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
}
const MCExpr *SparcELFTargetObjectFile::getTTypeGlobalReference(
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 607266d552a6..d5a3a19446c7 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -53,8 +53,6 @@ enum RegisterKind {
GRH32Reg,
GR64Reg,
GR128Reg,
- ADDR32Reg,
- ADDR64Reg,
FP32Reg,
FP64Reg,
FP128Reg,
@@ -109,7 +107,7 @@ private:
// Base + Disp + Index, where Base and Index are LLVM registers or 0.
// MemKind says what type of memory this is and RegKind says what type
- // the base register has (ADDR32Reg or ADDR64Reg). Length is the operand
+ // the base register has (GR32Reg or GR64Reg). Length is the operand
// length for D(L,B)-style operands, otherwise it is null.
struct MemOp {
unsigned Base : 12;
@@ -348,8 +346,8 @@ public:
bool isGRX32() const { return false; }
bool isGR64() const { return isReg(GR64Reg); }
bool isGR128() const { return isReg(GR128Reg); }
- bool isADDR32() const { return isReg(ADDR32Reg); }
- bool isADDR64() const { return isReg(ADDR64Reg); }
+ bool isADDR32() const { return isReg(GR32Reg); }
+ bool isADDR64() const { return isReg(GR64Reg); }
bool isADDR128() const { return false; }
bool isFP32() const { return isReg(FP32Reg); }
bool isFP64() const { return isReg(FP64Reg); }
@@ -361,16 +359,16 @@ public:
bool isAR32() const { return isReg(AR32Reg); }
bool isCR64() const { return isReg(CR64Reg); }
bool isAnyReg() const { return (isReg() || isImm(0, 15)); }
- bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); }
- bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); }
- bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); }
- bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); }
- bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); }
- bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); }
- bool isBDLAddr64Disp12Len4() const { return isMemDisp12Len4(ADDR64Reg); }
- bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); }
- bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, ADDR64Reg); }
- bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); }
+ bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, GR32Reg); }
+ bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, GR32Reg); }
+ bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, GR64Reg); }
+ bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, GR64Reg); }
+ bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, GR64Reg); }
+ bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, GR64Reg); }
+ bool isBDLAddr64Disp12Len4() const { return isMemDisp12Len4(GR64Reg); }
+ bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(GR64Reg); }
+ bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, GR64Reg); }
+ bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, GR64Reg); }
bool isU1Imm() const { return isImm(0, 1); }
bool isU2Imm() const { return isImm(0, 3); }
bool isU3Imm() const { return isImm(0, 7); }
@@ -405,26 +403,24 @@ private:
SMLoc StartLoc, EndLoc;
};
- bool parseRegister(Register &Reg);
+ bool parseRegister(Register &Reg, bool RestoreOnFailure = false);
- bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs,
- bool IsAddress = false);
+ bool parseIntegerRegister(Register &Reg, RegisterGroup Group);
OperandMatchResultTy parseRegister(OperandVector &Operands,
- RegisterGroup Group, const unsigned *Regs,
RegisterKind Kind);
OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
- bool parseAddress(bool &HaveReg1, Register &Reg1,
- bool &HaveReg2, Register &Reg2,
- const MCExpr *&Disp, const MCExpr *&Length);
+ bool parseAddress(bool &HaveReg1, Register &Reg1, bool &HaveReg2,
+ Register &Reg2, const MCExpr *&Disp, const MCExpr *&Length,
+ bool HasLength = false, bool HasVectorIndex = false);
bool parseAddressRegister(Register &Reg);
bool ParseDirectiveInsn(SMLoc L);
OperandMatchResultTy parseAddress(OperandVector &Operands,
- MemoryKind MemKind, const unsigned *Regs,
+ MemoryKind MemKind,
RegisterKind RegKind);
OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
@@ -449,6 +445,10 @@ public:
// Override MCTargetAsmParser.
bool ParseDirective(AsmToken DirectiveID) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
+ bool RestoreOnFailure);
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -458,76 +458,78 @@ public:
// Used by the TableGen code to parse particular operand types.
OperandMatchResultTy parseGR32(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg);
+ return parseRegister(Operands, GR32Reg);
}
OperandMatchResultTy parseGRH32(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg);
+ return parseRegister(Operands, GRH32Reg);
}
OperandMatchResultTy parseGRX32(OperandVector &Operands) {
llvm_unreachable("GRX32 should only be used for pseudo instructions");
}
OperandMatchResultTy parseGR64(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg);
+ return parseRegister(Operands, GR64Reg);
}
OperandMatchResultTy parseGR128(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg);
+ return parseRegister(Operands, GR128Reg);
}
OperandMatchResultTy parseADDR32(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg);
+ // For the AsmParser, we will accept %r0 for ADDR32 as well.
+ return parseRegister(Operands, GR32Reg);
}
OperandMatchResultTy parseADDR64(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg);
+ // For the AsmParser, we will accept %r0 for ADDR64 as well.
+ return parseRegister(Operands, GR64Reg);
}
OperandMatchResultTy parseADDR128(OperandVector &Operands) {
llvm_unreachable("Shouldn't be used as an operand");
}
OperandMatchResultTy parseFP32(OperandVector &Operands) {
- return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg);
+ return parseRegister(Operands, FP32Reg);
}
OperandMatchResultTy parseFP64(OperandVector &Operands) {
- return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg);
+ return parseRegister(Operands, FP64Reg);
}
OperandMatchResultTy parseFP128(OperandVector &Operands) {
- return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg);
+ return parseRegister(Operands, FP128Reg);
}
OperandMatchResultTy parseVR32(OperandVector &Operands) {
- return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg);
+ return parseRegister(Operands, VR32Reg);
}
OperandMatchResultTy parseVR64(OperandVector &Operands) {
- return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg);
+ return parseRegister(Operands, VR64Reg);
}
OperandMatchResultTy parseVF128(OperandVector &Operands) {
llvm_unreachable("Shouldn't be used as an operand");
}
OperandMatchResultTy parseVR128(OperandVector &Operands) {
- return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg);
+ return parseRegister(Operands, VR128Reg);
}
OperandMatchResultTy parseAR32(OperandVector &Operands) {
- return parseRegister(Operands, RegAR, SystemZMC::AR32Regs, AR32Reg);
+ return parseRegister(Operands, AR32Reg);
}
OperandMatchResultTy parseCR64(OperandVector &Operands) {
- return parseRegister(Operands, RegCR, SystemZMC::CR64Regs, CR64Reg);
+ return parseRegister(Operands, CR64Reg);
}
OperandMatchResultTy parseAnyReg(OperandVector &Operands) {
return parseAnyRegister(Operands);
}
OperandMatchResultTy parseBDAddr32(OperandVector &Operands) {
- return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg);
+ return parseAddress(Operands, BDMem, GR32Reg);
}
OperandMatchResultTy parseBDAddr64(OperandVector &Operands) {
- return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg);
+ return parseAddress(Operands, BDMem, GR64Reg);
}
OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) {
- return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg);
+ return parseAddress(Operands, BDXMem, GR64Reg);
}
OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) {
- return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg);
+ return parseAddress(Operands, BDLMem, GR64Reg);
}
OperandMatchResultTy parseBDRAddr64(OperandVector &Operands) {
- return parseAddress(Operands, BDRMem, SystemZMC::GR64Regs, ADDR64Reg);
+ return parseAddress(Operands, BDRMem, GR64Reg);
}
OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) {
- return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg);
+ return parseAddress(Operands, BDVMem, GR64Reg);
}
OperandMatchResultTy parsePCRel12(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 12), (1LL << 12) - 1, false);
@@ -691,27 +693,37 @@ void SystemZOperand::print(raw_ostream &OS) const {
}
// Parse one register of the form %<prefix><number>.
-bool SystemZAsmParser::parseRegister(Register &Reg) {
+bool SystemZAsmParser::parseRegister(Register &Reg, bool RestoreOnFailure) {
Reg.StartLoc = Parser.getTok().getLoc();
// Eat the % prefix.
if (Parser.getTok().isNot(AsmToken::Percent))
return Error(Parser.getTok().getLoc(), "register expected");
+ const AsmToken &PercentTok = Parser.getTok();
Parser.Lex();
// Expect a register name.
- if (Parser.getTok().isNot(AsmToken::Identifier))
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ if (RestoreOnFailure)
+ getLexer().UnLex(PercentTok);
return Error(Reg.StartLoc, "invalid register");
+ }
// Check that there's a prefix.
StringRef Name = Parser.getTok().getString();
- if (Name.size() < 2)
+ if (Name.size() < 2) {
+ if (RestoreOnFailure)
+ getLexer().UnLex(PercentTok);
return Error(Reg.StartLoc, "invalid register");
+ }
char Prefix = Name[0];
// Treat the rest of the register name as a register number.
- if (Name.substr(1).getAsInteger(10, Reg.Num))
+ if (Name.substr(1).getAsInteger(10, Reg.Num)) {
+ if (RestoreOnFailure)
+ getLexer().UnLex(PercentTok);
return Error(Reg.StartLoc, "invalid register");
+ }
// Look for valid combinations of prefix and number.
if (Prefix == 'r' && Reg.Num < 16)
@@ -724,49 +736,102 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {
Reg.Group = RegAR;
else if (Prefix == 'c' && Reg.Num < 16)
Reg.Group = RegCR;
- else
+ else {
+ if (RestoreOnFailure)
+ getLexer().UnLex(PercentTok);
return Error(Reg.StartLoc, "invalid register");
+ }
Reg.EndLoc = Parser.getTok().getLoc();
Parser.Lex();
return false;
}
-// Parse a register of group Group. If Regs is nonnull, use it to map
-// the raw register number to LLVM numbering, with zero entries
-// indicating an invalid register. IsAddress says whether the
-// register appears in an address context. Allow FP Group if expecting
-// RegV Group, since the f-prefix yields the FP group even while used
-// with vector instructions.
-bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group,
- const unsigned *Regs, bool IsAddress) {
- if (parseRegister(Reg))
- return true;
- if (Reg.Group != Group && !(Reg.Group == RegFP && Group == RegV))
- return Error(Reg.StartLoc, "invalid operand for instruction");
- if (Regs && Regs[Reg.Num] == 0)
- return Error(Reg.StartLoc, "invalid register pair");
- if (Reg.Num == 0 && IsAddress)
- return Error(Reg.StartLoc, "%r0 used in an address");
- if (Regs)
- Reg.Num = Regs[Reg.Num];
- return false;
-}
-
-// Parse a register and add it to Operands. The other arguments are as above.
+// Parse a register of kind Kind and add it to Operands.
OperandMatchResultTy
-SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group,
- const unsigned *Regs, RegisterKind Kind) {
- if (Parser.getTok().isNot(AsmToken::Percent))
+SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterKind Kind) {
+ Register Reg;
+ RegisterGroup Group;
+ switch (Kind) {
+ case GR32Reg:
+ case GRH32Reg:
+ case GR64Reg:
+ case GR128Reg:
+ Group = RegGR;
+ break;
+ case FP32Reg:
+ case FP64Reg:
+ case FP128Reg:
+ Group = RegFP;
+ break;
+ case VR32Reg:
+ case VR64Reg:
+ case VR128Reg:
+ Group = RegV;
+ break;
+ case AR32Reg:
+ Group = RegAR;
+ break;
+ case CR64Reg:
+ Group = RegCR;
+ break;
+ }
+
+ // Handle register names of the form %<prefix><number>
+ if (Parser.getTok().is(AsmToken::Percent)) {
+ if (parseRegister(Reg))
+ return MatchOperand_ParseFail;
+
+ // Check the parsed register group "Reg.Group" with the expected "Group"
+ // Have to error out if user specified wrong prefix.
+ switch (Group) {
+ case RegGR:
+ case RegFP:
+ case RegAR:
+ case RegCR:
+ if (Group != Reg.Group) {
+ Error(Reg.StartLoc, "invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+ break;
+ case RegV:
+ if (Reg.Group != RegV && Reg.Group != RegFP) {
+ Error(Reg.StartLoc, "invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+ break;
+ }
+ } else if (Parser.getTok().is(AsmToken::Integer)) {
+ if (parseIntegerRegister(Reg, Group))
+ return MatchOperand_ParseFail;
+ }
+ // Otherwise we didn't match a register operand.
+ else
return MatchOperand_NoMatch;
- Register Reg;
- bool IsAddress = (Kind == ADDR32Reg || Kind == ADDR64Reg);
- if (parseRegister(Reg, Group, Regs, IsAddress))
+ // Determine the LLVM register number according to Kind.
+ const unsigned *Regs;
+ switch (Kind) {
+ case GR32Reg: Regs = SystemZMC::GR32Regs; break;
+ case GRH32Reg: Regs = SystemZMC::GRH32Regs; break;
+ case GR64Reg: Regs = SystemZMC::GR64Regs; break;
+ case GR128Reg: Regs = SystemZMC::GR128Regs; break;
+ case FP32Reg: Regs = SystemZMC::FP32Regs; break;
+ case FP64Reg: Regs = SystemZMC::FP64Regs; break;
+ case FP128Reg: Regs = SystemZMC::FP128Regs; break;
+ case VR32Reg: Regs = SystemZMC::VR32Regs; break;
+ case VR64Reg: Regs = SystemZMC::VR64Regs; break;
+ case VR128Reg: Regs = SystemZMC::VR128Regs; break;
+ case AR32Reg: Regs = SystemZMC::AR32Regs; break;
+ case CR64Reg: Regs = SystemZMC::CR64Regs; break;
+ }
+ if (Regs[Reg.Num] == 0) {
+ Error(Reg.StartLoc, "invalid register pair");
return MatchOperand_ParseFail;
+ }
- Operands.push_back(SystemZOperand::createReg(Kind, Reg.Num,
- Reg.StartLoc, Reg.EndLoc));
+ Operands.push_back(
+ SystemZOperand::createReg(Kind, Regs[Reg.Num], Reg.StartLoc, Reg.EndLoc));
return MatchOperand_Success;
}
@@ -831,11 +896,39 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
return MatchOperand_Success;
}
+bool SystemZAsmParser::parseIntegerRegister(Register &Reg,
+ RegisterGroup Group) {
+ Reg.StartLoc = Parser.getTok().getLoc();
+ // We have an integer token
+ const MCExpr *Register;
+ if (Parser.parseExpression(Register))
+ return true;
+
+ const auto *CE = dyn_cast<MCConstantExpr>(Register);
+ if (!CE)
+ return true;
+
+ int64_t MaxRegNum = (Group == RegV) ? 31 : 15;
+ int64_t Value = CE->getValue();
+ if (Value < 0 || Value > MaxRegNum) {
+ Error(Parser.getTok().getLoc(), "invalid register");
+ return true;
+ }
+
+ // Assign the Register Number
+ Reg.Num = (unsigned)Value;
+ Reg.Group = Group;
+ Reg.EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ // At this point, successfully parsed an integer register.
+ return false;
+}
+
// Parse a memory operand into Reg1, Reg2, Disp, and Length.
bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1,
bool &HaveReg2, Register &Reg2,
- const MCExpr *&Disp,
- const MCExpr *&Length) {
+ const MCExpr *&Disp, const MCExpr *&Length,
+ bool HasLength, bool HasVectorIndex) {
// Parse the displacement, which must always be present.
if (getParser().parseExpression(Disp))
return true;
@@ -844,6 +937,27 @@ bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1,
HaveReg1 = false;
HaveReg2 = false;
Length = nullptr;
+
+ // If we have a scenario as below:
+ // vgef %v0, 0(0), 0
+ // This is an example of a "BDVMem" instruction type.
+ //
+ // So when we parse this as an integer register, the register group
+ // needs to be tied to "RegV". Usually when the prefix is passed in
+ // as %<prefix><reg-number> its easy to check which group it should belong to
+ // However, if we're passing in just the integer there's no real way to
+ // "check" what register group it should belong to.
+ //
+ // When the user passes in the register as an integer, the user assumes that
+ // the compiler is responsible for substituting it as the right kind of
+ // register. Whereas, when the user specifies a "prefix", the onus is on
+ // the user to make sure they pass in the right kind of register.
+ //
+ // The restriction only applies to the first Register (i.e. Reg1). Reg2 is
+ // always a general register. Reg1 should be of group RegV if "HasVectorIndex"
+ // (i.e. insn is of type BDVMem) is true.
+ RegisterGroup RegGroup = HasVectorIndex ? RegV : RegGR;
+
if (getLexer().is(AsmToken::LParen)) {
Parser.Lex();
@@ -852,18 +966,47 @@ bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1,
HaveReg1 = true;
if (parseRegister(Reg1))
return true;
+ }
+ // So if we have an integer as the first token in ([tok1], ..), it could:
+ // 1. Refer to a "Register" (i.e X,R,V fields in BD[X|R|V]Mem type of
+ // instructions)
+ // 2. Refer to a "Length" field (i.e L field in BDLMem type of instructions)
+ else if (getLexer().is(AsmToken::Integer)) {
+ if (HasLength) {
+ // Instruction has a "Length" field, safe to parse the first token as
+ // the "Length" field
+ if (getParser().parseExpression(Length))
+ return true;
+ } else {
+ // Otherwise, if the instruction has no "Length" field, parse the
+ // token as a "Register". We don't have to worry about whether the
+ // instruction is invalid here, because the caller will take care of
+ // error reporting.
+ HaveReg1 = true;
+ if (parseIntegerRegister(Reg1, RegGroup))
+ return true;
+ }
} else {
- // Parse the length.
- if (getParser().parseExpression(Length))
- return true;
+ // If its not an integer or a percent token, then if the instruction
+ // is reported to have a "Length" then, parse it as "Length".
+ if (HasLength) {
+ if (getParser().parseExpression(Length))
+ return true;
+ }
}
// Check whether there's a second register.
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex();
HaveReg2 = true;
- if (parseRegister(Reg2))
- return true;
+
+ if (getLexer().is(AsmToken::Integer)) {
+ if (parseIntegerRegister(Reg2, RegGR))
+ return true;
+ } else {
+ if (parseRegister(Reg2))
+ return true;
+ }
}
// Consume the closing bracket.
@@ -883,9 +1026,6 @@ SystemZAsmParser::parseAddressRegister(Register &Reg) {
} else if (Reg.Group != RegGR) {
Error(Reg.StartLoc, "invalid address register");
return true;
- } else if (Reg.Num == 0) {
- Error(Reg.StartLoc, "%r0 used in an address");
- return true;
}
return false;
}
@@ -894,16 +1034,27 @@ SystemZAsmParser::parseAddressRegister(Register &Reg) {
// are as above.
OperandMatchResultTy
SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
- const unsigned *Regs, RegisterKind RegKind) {
+ RegisterKind RegKind) {
SMLoc StartLoc = Parser.getTok().getLoc();
unsigned Base = 0, Index = 0, LengthReg = 0;
Register Reg1, Reg2;
bool HaveReg1, HaveReg2;
const MCExpr *Disp;
const MCExpr *Length;
- if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length))
+
+ bool HasLength = (MemKind == BDLMem) ? true : false;
+ bool HasVectorIndex = (MemKind == BDVMem) ? true : false;
+ if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length, HasLength,
+ HasVectorIndex))
return MatchOperand_ParseFail;
+ const unsigned *Regs;
+ switch (RegKind) {
+ case GR32Reg: Regs = SystemZMC::GR32Regs; break;
+ case GR64Reg: Regs = SystemZMC::GR64Regs; break;
+ default: llvm_unreachable("invalid RegKind");
+ }
+
switch (MemKind) {
case BDMem:
// If we have Reg1, it must be an address register.
@@ -912,11 +1063,7 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
return MatchOperand_ParseFail;
Base = Regs[Reg1.Num];
}
- // There must be no Reg2 or length.
- if (Length) {
- Error(StartLoc, "invalid use of length addressing");
- return MatchOperand_ParseFail;
- }
+ // There must be no Reg2.
if (HaveReg2) {
Error(StartLoc, "invalid use of indexed addressing");
return MatchOperand_ParseFail;
@@ -940,11 +1087,6 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
return MatchOperand_ParseFail;
Base = Regs[Reg2.Num];
}
- // There must be no length.
- if (Length) {
- Error(StartLoc, "invalid use of length addressing");
- return MatchOperand_ParseFail;
- }
break;
case BDLMem:
// If we have Reg2, it must be an address register.
@@ -977,11 +1119,6 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
return MatchOperand_ParseFail;
Base = Regs[Reg2.Num];
}
- // There must be no length.
- if (Length) {
- Error(StartLoc, "invalid use of length addressing");
- return MatchOperand_ParseFail;
- }
break;
case BDVMem:
// We must have Reg1, and it must be a vector register.
@@ -996,16 +1133,11 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
return MatchOperand_ParseFail;
Base = Regs[Reg2.Num];
}
- // There must be no length.
- if (Length) {
- Error(StartLoc, "invalid use of length addressing");
- return MatchOperand_ParseFail;
- }
break;
}
SMLoc EndLoc =
- SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp,
Index, Length, LengthReg,
StartLoc, EndLoc));
@@ -1118,15 +1250,15 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
}
// Emit as a regular instruction.
- Parser.getStreamer().EmitInstruction(Inst, getSTI());
+ Parser.getStreamer().emitInstruction(Inst, getSTI());
return false;
}
bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ SMLoc &EndLoc, bool RestoreOnFailure) {
Register Reg;
- if (parseRegister(Reg))
+ if (parseRegister(Reg, RestoreOnFailure))
return true;
if (Reg.Group == RegGR)
RegNo = SystemZMC::GR64Regs[Reg.Num];
@@ -1143,6 +1275,25 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
return false;
}
+bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
+}
+
+OperandMatchResultTy SystemZAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ bool Result =
+ ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
+ bool PendingErrors = getParser().hasPendingError();
+ getParser().clearPendingErrors();
+ if (PendingErrors)
+ return MatchOperand_ParseFail;
+ if (Result)
+ return MatchOperand_NoMatch;
+ return MatchOperand_Success;
+}
+
bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
@@ -1215,7 +1366,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
bool HaveReg1, HaveReg2;
const MCExpr *Expr;
const MCExpr *Length;
- if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Expr, Length))
+ if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Expr, Length,
+ /*HasLength*/ true, /*HasVectorIndex*/ true))
return true;
// If the register combination is not valid for any instruction, reject it.
// Otherwise, fall back to reporting an unrecognized instruction.
@@ -1252,7 +1404,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (MatchResult) {
case Match_Success:
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, getSTI());
+ Out.emitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature: {
@@ -1322,7 +1474,7 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
}
int64_t Value = CE->getValue();
MCSymbol *Sym = Ctx.createTempSymbol();
- Out.EmitLabel(Sym);
+ Out.emitLabel(Sym);
const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
Ctx);
Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
index 5893b227c08c..fac363cae713 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
@@ -155,7 +155,8 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
MO.getExpr()->print(O, &MAI);
}
-void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum,
+void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI,
+ uint64_t Address, int OpNum,
raw_ostream &O) {
// Output the PC-relative operand.
printPCRelOperand(MI, OpNum, O);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
index 5628e9252f03..cfe1bd89c3eb 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
@@ -46,6 +46,10 @@ public:
private:
// Print various types of operand.
void printOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
+ raw_ostream &O) {
+ printOperand(MI, OpNum, O);
+ }
void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
@@ -65,7 +69,12 @@ private:
void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU48ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
- void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printPCRelOperand(const MCInst *MI, uint64_t /*Address*/, int OpNum,
+ raw_ostream &O) {
+ printPCRelOperand(MI, OpNum, O);
+ }
+ void printPCRelTLSOperand(const MCInst *MI, uint64_t Address, int OpNum,
+ raw_ostream &O);
// Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
// This forms part of the instruction name rather than the operand list.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 23d8585095cc..e62f5040898f 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -63,10 +63,6 @@ public:
const MCAsmLayout &Layout) const override {
return false;
}
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {
- llvm_unreachable("SystemZ does do not have assembler relaxation");
- }
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index d6cdacfcab92..e540ff4e4811 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -23,6 +23,4 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {
UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
-
- UseIntegratedAssembler = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index eb2112674a12..f2ef1ad6c698 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -150,10 +150,9 @@ static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT,
const MCTargetOptions &Options) {
MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
- MCCFIInstruction Inst =
- MCCFIInstruction::createDefCfa(nullptr,
- MRI.getDwarfRegNum(SystemZ::R15D, true),
- SystemZMC::CFAOffsetFromInitialSP);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(
+ nullptr, MRI.getDwarfRegNum(SystemZ::R15D, true),
+ SystemZMC::CFAOffsetFromInitialSP);
MAI->addInitialFrameState(Inst);
return MAI;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h
index 0808160f627c..bedbd061ea5c 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZ.h
@@ -193,6 +193,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZTDCPass();
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 67c4aa08f90d..4109bfc11337 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -92,9 +92,9 @@ static void lowerAlignmentHint(const MachineInstr *MI, MCInst &LoweredMI,
return;
const MachineMemOperand *MMO = *MI->memoperands_begin();
unsigned AlignmentHint = 0;
- if (MMO->getAlignment() >= 16)
+ if (MMO->getAlign() >= Align(16))
AlignmentHint = 4;
- else if (MMO->getAlignment() >= 8)
+ else if (MMO->getAlign() >= Align(8))
AlignmentHint = 3;
if (AlignmentHint == 0)
return;
@@ -124,7 +124,7 @@ static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) {
.addImm(0);
}
-void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI;
switch (MI->getOpcode()) {
@@ -479,7 +479,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// that instead.
case SystemZ::Trap: {
MCSymbol *DotSym = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(DotSym);
+ OutStreamer->emitLabel(DotSym);
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);
const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext);
@@ -492,7 +492,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// to the relative immediate field of the jump instruction. (eg. "jo .+2")
case SystemZ::CondTrap: {
MCSymbol *DotSym = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(DotSym);
+ OutStreamer->emitLabel(DotSym);
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);
const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext);
@@ -522,7 +522,6 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, LoweredMI);
}
-
// Emit the largest nop instruction smaller than or equal to NumBytes
// bytes. Return the size of nop emitted.
static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
@@ -532,22 +531,22 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
return 0;
}
else if (NumBytes < 4) {
- OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCRAsm)
- .addImm(0).addReg(SystemZ::R0D), STI);
+ OutStreamer.emitInstruction(
+ MCInstBuilder(SystemZ::BCRAsm).addImm(0).addReg(SystemZ::R0D), STI);
return 2;
}
else if (NumBytes < 6) {
- OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCAsm)
- .addImm(0).addReg(0).addImm(0).addReg(0),
- STI);
+ OutStreamer.emitInstruction(
+ MCInstBuilder(SystemZ::BCAsm).addImm(0).addReg(0).addImm(0).addReg(0),
+ STI);
return 4;
}
else {
MCSymbol *DotSym = OutContext.createTempSymbol();
const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext);
- OutStreamer.EmitLabel(DotSym);
- OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BRCLAsm)
- .addImm(0).addExpr(Dot), STI);
+ OutStreamer.emitLabel(DotSym);
+ OutStreamer.emitInstruction(
+ MCInstBuilder(SystemZ::BRCLAsm).addImm(0).addExpr(Dot), STI);
return 6;
}
}
@@ -560,9 +559,9 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
OutStreamer->PushSection();
OutStreamer->SwitchSection(
Ctx.getELFSection("__mcount_loc", ELF::SHT_PROGBITS, ELF::SHF_ALLOC));
- OutStreamer->EmitSymbolValue(DotSym, 8);
+ OutStreamer->emitSymbolValue(DotSym, 8);
OutStreamer->PopSection();
- OutStreamer->EmitLabel(DotSym);
+ OutStreamer->emitLabel(DotSym);
}
if (MF->getFunction().hasFnAttribute("mnop-mcount")) {
@@ -573,8 +572,9 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
const MCSymbolRefExpr *Op =
MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_PLT, Ctx);
- OutStreamer->EmitInstruction(MCInstBuilder(SystemZ::BRASL)
- .addReg(SystemZ::R0D).addExpr(Op), getSubtargetInfo());
+ OutStreamer->emitInstruction(
+ MCInstBuilder(SystemZ::BRASL).addReg(SystemZ::R0D).addExpr(Op),
+ getSubtargetInfo());
}
void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
@@ -585,7 +585,7 @@ void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
auto &Ctx = OutStreamer->getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(MILabel);
+ OutStreamer->emitLabel(MILabel);
SM.recordStackMap(*MILabel, MI);
assert(NumNOPBytes % 2 == 0 && "Invalid number of NOP bytes requested!");
@@ -618,7 +618,7 @@ void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
SystemZMCInstLower &Lower) {
auto &Ctx = OutStreamer->getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(MILabel);
+ OutStreamer->emitLabel(MILabel);
SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers Opers(&MI);
@@ -685,8 +685,8 @@ getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) {
llvm_unreachable("Invalid SystemCPModifier!");
}
-void SystemZAsmPrinter::
-EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+void SystemZAsmPrinter::emitMachineConstantPoolValue(
+ MachineConstantPoolValue *MCPV) {
auto *ZCPV = static_cast<SystemZConstantPoolValue*>(MCPV);
const MCExpr *Expr =
@@ -695,7 +695,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
OutContext);
uint64_t Size = getDataLayout().getTypeAllocSize(ZCPV->getType());
- OutStreamer->EmitValue(Expr, Size);
+ OutStreamer->emitValue(Expr, Size);
}
bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -719,7 +719,7 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
+void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {
emitStackMaps(SM);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
index d01a17c2ebe2..2d7562c7238d 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -32,9 +32,9 @@ public:
// Override AsmPrinter.
StringRef getPassName() const override { return "SystemZ Assembly Printer"; }
- void EmitInstruction(const MachineInstr *MI) override;
- void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
- void EmitEndOfAsmFile(Module &M) override;
+ void emitInstruction(const MachineInstr *MI) override;
+ void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
+ void emitEndOfAsmFile(Module &M) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &OS) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h
index 4432adc6a269..d4c7ce07420b 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -108,7 +108,7 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
// the location (register or stack slot) for the indirect pointer.
// (This duplicates the usual i64 calling convention rules.)
unsigned Reg = State.AllocateReg(SystemZ::ArgGPRs);
- unsigned Offset = Reg ? 0 : State.AllocateStack(8, 8);
+ unsigned Offset = Reg ? 0 : State.AllocateStack(8, Align(8));
// Use that same location for all the pending parts.
for (auto &It : PendingMembers) {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index ffeee4da95cc..86c6b2985385 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -25,13 +25,12 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV,
return new SystemZConstantPoolValue(GV, Modifier);
}
-int SystemZConstantPoolValue::
-getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
+int SystemZConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) {
const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
for (unsigned I = 0, E = Constants.size(); I != E; ++I) {
if (Constants[I].isMachineConstantPoolEntry() &&
- (Constants[I].getAlignment() & AlignMask) == 0) {
+ Constants[I].getAlign() >= Alignment) {
auto *ZCPV =
static_cast<SystemZConstantPoolValue *>(Constants[I].Val.MachineCPVal);
if (ZCPV->GV == GV && ZCPV->Modifier == Modifier)
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
index 6cb7710abdfe..da610ab45070 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -43,7 +43,7 @@ public:
// Override MachineConstantPoolValue.
int getExistingMachineCPValue(MachineConstantPool *CP,
- unsigned Alignment) override;
+ Align Alignment) override;
void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
void print(raw_ostream &O) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
new file mode 100644
index 000000000000..7d21d29d270e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
@@ -0,0 +1,120 @@
+//===---------- SystemZPhysRegCopy.cpp - Handle phys reg copies -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass makes sure that a COPY of a physical register will be
+// implementable after register allocation in copyPhysReg() (this could be
+// done in EmitInstrWithCustomInserter() instead if COPY instructions would
+// be passed to it).
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define SYSTEMZ_COPYPHYSREGS_NAME "SystemZ Copy Physregs"
+
+namespace llvm {
+ void initializeSystemZCopyPhysRegsPass(PassRegistry&);
+}
+
+namespace {
+
+class SystemZCopyPhysRegs : public MachineFunctionPass {
+public:
+ static char ID;
+ SystemZCopyPhysRegs()
+ : MachineFunctionPass(ID), TII(nullptr), MRI(nullptr) {
+ initializeSystemZCopyPhysRegsPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return SYSTEMZ_COPYPHYSREGS_NAME; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+
+ bool visitMBB(MachineBasicBlock &MBB);
+
+ const SystemZInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+};
+
+char SystemZCopyPhysRegs::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(SystemZCopyPhysRegs, "systemz-copy-physregs",
+ SYSTEMZ_COPYPHYSREGS_NAME, false, false)
+
+FunctionPass *llvm::createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM) {
+ return new SystemZCopyPhysRegs();
+}
+
+void SystemZCopyPhysRegs::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool SystemZCopyPhysRegs::visitMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ // Certain special registers can only be copied from a subset of the
+ // default register class of the type. It is therefore necessary to create
+ // the target copy instructions before regalloc instead of in copyPhysReg().
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E; ) {
+ MachineInstr *MI = &*MBBI++;
+ if (!MI->isCopy())
+ continue;
+
+ DebugLoc DL = MI->getDebugLoc();
+ Register SrcReg = MI->getOperand(1).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
+ if (DstReg.isVirtual() &&
+ (SrcReg == SystemZ::CC || SystemZ::AR32BitRegClass.contains(SrcReg))) {
+ Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass);
+ if (SrcReg == SystemZ::CC)
+ BuildMI(MBB, MI, DL, TII->get(SystemZ::IPM), Tmp);
+ else
+ BuildMI(MBB, MI, DL, TII->get(SystemZ::EAR), Tmp).addReg(SrcReg);
+ MI->getOperand(1).setReg(Tmp);
+ Modified = true;
+ }
+ else if (SrcReg.isVirtual() &&
+ SystemZ::AR32BitRegClass.contains(DstReg)) {
+ Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass);
+ MI->getOperand(0).setReg(Tmp);
+ BuildMI(MBB, MBBI, DL, TII->get(SystemZ::SAR), DstReg).addReg(Tmp);
+ Modified = true;
+ }
+ }
+
+ return Modified;
+}
+
+bool SystemZCopyPhysRegs::runOnMachineFunction(MachineFunction &F) {
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ MRI = &F.getRegInfo();
+
+ bool Modified = false;
+ for (auto &MBB : F)
+ Modified |= visitMBB(MBB);
+
+ return Modified;
+}
+
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
index dae795e845b0..28f58cb310af 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
@@ -10,13 +10,13 @@
//
//===----------------------------------------------------------------------===//
-class SystemZFeature<string extname, string intname, string desc>
- : Predicate<"Subtarget->has"##intname##"()">,
- AssemblerPredicate<"Feature"##intname, extname>,
- SubtargetFeature<extname, "Has"##intname, "true", desc>;
+class SystemZFeature<string extname, string intname, dag featdag, string desc>
+ : Predicate<"Subtarget->has"#intname#"()">,
+ AssemblerPredicate<featdag, extname>,
+ SubtargetFeature<extname, "Has"#intname, "true", desc>;
class SystemZMissingFeature<string intname>
- : Predicate<"!Subtarget->has"##intname##"()">;
+ : Predicate<"!Subtarget->has"#intname#"()">;
class SystemZFeatureList<list<SystemZFeature> x> {
list<SystemZFeature> List = x;
@@ -25,6 +25,13 @@ class SystemZFeatureList<list<SystemZFeature> x> {
class SystemZFeatureAdd<list<SystemZFeature> x, list<SystemZFeature> y>
: SystemZFeatureList<!listconcat(x, y)>;
+// This feature is added as a subtarget feature whenever the function is
+// compiled to use soft-float.
+def FeatureSoftFloat : SystemZFeature<
+ "soft-float", "SoftFloat", (all_of FeatureSoftFloat),
+ "Use software emulation for floating point"
+>;
+
//===----------------------------------------------------------------------===//
//
// New features added in the Ninth Edition of the z/Architecture
@@ -32,54 +39,54 @@ class SystemZFeatureAdd<list<SystemZFeature> x, list<SystemZFeature> y>
//===----------------------------------------------------------------------===//
def FeatureDistinctOps : SystemZFeature<
- "distinct-ops", "DistinctOps",
+ "distinct-ops", "DistinctOps", (all_of FeatureDistinctOps),
"Assume that the distinct-operands facility is installed"
>;
def FeatureFastSerialization : SystemZFeature<
- "fast-serialization", "FastSerialization",
+ "fast-serialization", "FastSerialization", (all_of FeatureFastSerialization),
"Assume that the fast-serialization facility is installed"
>;
def FeatureFPExtension : SystemZFeature<
- "fp-extension", "FPExtension",
+ "fp-extension", "FPExtension", (all_of FeatureFPExtension),
"Assume that the floating-point extension facility is installed"
>;
def FeatureHighWord : SystemZFeature<
- "high-word", "HighWord",
+ "high-word", "HighWord", (all_of FeatureHighWord),
"Assume that the high-word facility is installed"
>;
def FeatureInterlockedAccess1 : SystemZFeature<
- "interlocked-access1", "InterlockedAccess1",
+ "interlocked-access1", "InterlockedAccess1", (all_of FeatureInterlockedAccess1),
"Assume that interlocked-access facility 1 is installed"
>;
def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
def FeatureLoadStoreOnCond : SystemZFeature<
- "load-store-on-cond", "LoadStoreOnCond",
+ "load-store-on-cond", "LoadStoreOnCond", (all_of FeatureLoadStoreOnCond),
"Assume that the load/store-on-condition facility is installed"
>;
def FeatureNoLoadStoreOnCond : SystemZMissingFeature<"LoadStoreOnCond">;
def FeaturePopulationCount : SystemZFeature<
- "population-count", "PopulationCount",
+ "population-count", "PopulationCount", (all_of FeaturePopulationCount),
"Assume that the population-count facility is installed"
>;
def FeatureMessageSecurityAssist3 : SystemZFeature<
- "message-security-assist-extension3", "MessageSecurityAssist3",
+ "message-security-assist-extension3", "MessageSecurityAssist3", (all_of FeatureMessageSecurityAssist3),
"Assume that the message-security-assist extension facility 3 is installed"
>;
def FeatureMessageSecurityAssist4 : SystemZFeature<
- "message-security-assist-extension4", "MessageSecurityAssist4",
+ "message-security-assist-extension4", "MessageSecurityAssist4", (all_of FeatureMessageSecurityAssist4),
"Assume that the message-security-assist extension facility 4 is installed"
>;
def FeatureResetReferenceBitsMultiple : SystemZFeature<
- "reset-reference-bits-multiple", "ResetReferenceBitsMultiple",
+ "reset-reference-bits-multiple", "ResetReferenceBitsMultiple", (all_of FeatureResetReferenceBitsMultiple),
"Assume that the reset-reference-bits-multiple facility is installed"
>;
@@ -103,37 +110,37 @@ def Arch9NewFeatures : SystemZFeatureList<[
//===----------------------------------------------------------------------===//
def FeatureExecutionHint : SystemZFeature<
- "execution-hint", "ExecutionHint",
+ "execution-hint", "ExecutionHint", (all_of FeatureExecutionHint),
"Assume that the execution-hint facility is installed"
>;
def FeatureLoadAndTrap : SystemZFeature<
- "load-and-trap", "LoadAndTrap",
+ "load-and-trap", "LoadAndTrap", (all_of FeatureLoadAndTrap),
"Assume that the load-and-trap facility is installed"
>;
def FeatureMiscellaneousExtensions : SystemZFeature<
- "miscellaneous-extensions", "MiscellaneousExtensions",
+ "miscellaneous-extensions", "MiscellaneousExtensions", (all_of FeatureMiscellaneousExtensions),
"Assume that the miscellaneous-extensions facility is installed"
>;
def FeatureProcessorAssist : SystemZFeature<
- "processor-assist", "ProcessorAssist",
+ "processor-assist", "ProcessorAssist", (all_of FeatureProcessorAssist),
"Assume that the processor-assist facility is installed"
>;
def FeatureTransactionalExecution : SystemZFeature<
- "transactional-execution", "TransactionalExecution",
+ "transactional-execution", "TransactionalExecution", (all_of FeatureTransactionalExecution),
"Assume that the transactional-execution facility is installed"
>;
def FeatureDFPZonedConversion : SystemZFeature<
- "dfp-zoned-conversion", "DFPZonedConversion",
+ "dfp-zoned-conversion", "DFPZonedConversion", (all_of FeatureDFPZonedConversion),
"Assume that the DFP zoned-conversion facility is installed"
>;
def FeatureEnhancedDAT2 : SystemZFeature<
- "enhanced-dat-2", "EnhancedDAT2",
+ "enhanced-dat-2", "EnhancedDAT2", (all_of FeatureEnhancedDAT2),
"Assume that the enhanced-DAT facility 2 is installed"
>;
@@ -154,27 +161,27 @@ def Arch10NewFeatures : SystemZFeatureList<[
//===----------------------------------------------------------------------===//
def FeatureLoadAndZeroRightmostByte : SystemZFeature<
- "load-and-zero-rightmost-byte", "LoadAndZeroRightmostByte",
+ "load-and-zero-rightmost-byte", "LoadAndZeroRightmostByte", (all_of FeatureLoadAndZeroRightmostByte),
"Assume that the load-and-zero-rightmost-byte facility is installed"
>;
def FeatureLoadStoreOnCond2 : SystemZFeature<
- "load-store-on-cond-2", "LoadStoreOnCond2",
+ "load-store-on-cond-2", "LoadStoreOnCond2", (all_of FeatureLoadStoreOnCond2),
"Assume that the load/store-on-condition facility 2 is installed"
>;
def FeatureMessageSecurityAssist5 : SystemZFeature<
- "message-security-assist-extension5", "MessageSecurityAssist5",
+ "message-security-assist-extension5", "MessageSecurityAssist5", (all_of FeatureMessageSecurityAssist5),
"Assume that the message-security-assist extension facility 5 is installed"
>;
def FeatureDFPPackedConversion : SystemZFeature<
- "dfp-packed-conversion", "DFPPackedConversion",
+ "dfp-packed-conversion", "DFPPackedConversion", (all_of FeatureDFPPackedConversion),
"Assume that the DFP packed-conversion facility is installed"
>;
def FeatureVector : SystemZFeature<
- "vector", "Vector",
+ "vector", "Vector", (all_of FeatureVector),
"Assume that the vectory facility is installed"
>;
def FeatureNoVector : SystemZMissingFeature<"Vector">;
@@ -194,38 +201,38 @@ def Arch11NewFeatures : SystemZFeatureList<[
//===----------------------------------------------------------------------===//
def FeatureMiscellaneousExtensions2 : SystemZFeature<
- "miscellaneous-extensions-2", "MiscellaneousExtensions2",
+ "miscellaneous-extensions-2", "MiscellaneousExtensions2", (all_of FeatureMiscellaneousExtensions2),
"Assume that the miscellaneous-extensions facility 2 is installed"
>;
def FeatureGuardedStorage : SystemZFeature<
- "guarded-storage", "GuardedStorage",
+ "guarded-storage", "GuardedStorage", (all_of FeatureGuardedStorage),
"Assume that the guarded-storage facility is installed"
>;
def FeatureMessageSecurityAssist7 : SystemZFeature<
- "message-security-assist-extension7", "MessageSecurityAssist7",
+ "message-security-assist-extension7", "MessageSecurityAssist7", (all_of FeatureMessageSecurityAssist7),
"Assume that the message-security-assist extension facility 7 is installed"
>;
def FeatureMessageSecurityAssist8 : SystemZFeature<
- "message-security-assist-extension8", "MessageSecurityAssist8",
+ "message-security-assist-extension8", "MessageSecurityAssist8", (all_of FeatureMessageSecurityAssist8),
"Assume that the message-security-assist extension facility 8 is installed"
>;
def FeatureVectorEnhancements1 : SystemZFeature<
- "vector-enhancements-1", "VectorEnhancements1",
+ "vector-enhancements-1", "VectorEnhancements1", (all_of FeatureVectorEnhancements1),
"Assume that the vector enhancements facility 1 is installed"
>;
def FeatureNoVectorEnhancements1 : SystemZMissingFeature<"VectorEnhancements1">;
def FeatureVectorPackedDecimal : SystemZFeature<
- "vector-packed-decimal", "VectorPackedDecimal",
+ "vector-packed-decimal", "VectorPackedDecimal", (all_of FeatureVectorPackedDecimal),
"Assume that the vector packed decimal facility is installed"
>;
def FeatureInsertReferenceBitsMultiple : SystemZFeature<
- "insert-reference-bits-multiple", "InsertReferenceBitsMultiple",
+ "insert-reference-bits-multiple", "InsertReferenceBitsMultiple", (all_of FeatureInsertReferenceBitsMultiple),
"Assume that the insert-reference-bits-multiple facility is installed"
>;
@@ -246,32 +253,32 @@ def Arch12NewFeatures : SystemZFeatureList<[
//===----------------------------------------------------------------------===//
def FeatureMiscellaneousExtensions3 : SystemZFeature<
- "miscellaneous-extensions-3", "MiscellaneousExtensions3",
+ "miscellaneous-extensions-3", "MiscellaneousExtensions3", (all_of FeatureMiscellaneousExtensions3),
"Assume that the miscellaneous-extensions facility 3 is installed"
>;
def FeatureMessageSecurityAssist9 : SystemZFeature<
- "message-security-assist-extension9", "MessageSecurityAssist9",
+ "message-security-assist-extension9", "MessageSecurityAssist9", (all_of FeatureMessageSecurityAssist9),
"Assume that the message-security-assist extension facility 9 is installed"
>;
def FeatureVectorEnhancements2 : SystemZFeature<
- "vector-enhancements-2", "VectorEnhancements2",
+ "vector-enhancements-2", "VectorEnhancements2", (all_of FeatureVectorEnhancements2),
"Assume that the vector enhancements facility 2 is installed"
>;
def FeatureVectorPackedDecimalEnhancement : SystemZFeature<
- "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement",
+ "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement", (all_of FeatureVectorPackedDecimalEnhancement),
"Assume that the vector packed decimal enhancement facility is installed"
>;
def FeatureEnhancedSort : SystemZFeature<
- "enhanced-sort", "EnhancedSort",
+ "enhanced-sort", "EnhancedSort", (all_of FeatureEnhancedSort),
"Assume that the enhanced-sort facility is installed"
>;
def FeatureDeflateConversion : SystemZFeature<
- "deflate-conversion", "DeflateConversion",
+ "deflate-conversion", "DeflateConversion", (all_of FeatureDeflateConversion),
"Assume that the deflate-conversion facility is installed"
>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 3cdf6bf98ee0..985722fdcab4 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -62,18 +63,6 @@ SystemZFrameLowering::SystemZFrameLowering()
RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset;
}
-static bool usePackedStack(MachineFunction &MF) {
- bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack");
- bool IsVarArg = MF.getFunction().isVarArg();
- bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC;
- bool BackChain = MF.getFunction().hasFnAttribute("backchain");
- bool FramAddressTaken = MF.getFrameInfo().isFrameAddressTaken();
- if (HasPackedStackAttr && BackChain)
- report_fatal_error("packed-stack with backchain is currently unsupported.");
- return HasPackedStackAttr && !IsVarArg && CallConv && !BackChain &&
- !FramAddressTaken;
-}
-
bool SystemZFrameLowering::
assignCalleeSavedSpillSlots(MachineFunction &MF,
const TargetRegisterInfo *TRI,
@@ -87,71 +76,44 @@ assignCalleeSavedSpillSlots(MachineFunction &MF,
unsigned LowGPR = 0;
unsigned HighGPR = SystemZ::R15D;
int StartSPOffset = SystemZMC::CallFrameSize;
- int CurrOffset;
- if (!usePackedStack(MF)) {
- for (auto &CS : CSI) {
- unsigned Reg = CS.getReg();
- int Offset = RegSpillOffsets[Reg];
- if (Offset) {
- if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) {
- LowGPR = Reg;
- StartSPOffset = Offset;
- }
- Offset -= SystemZMC::CallFrameSize;
- int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset);
- CS.setFrameIdx(FrameIdx);
- } else
- CS.setFrameIdx(INT32_MAX);
- }
-
- // Save the range of call-saved registers, for use by the
- // prologue/epilogue inserters.
- ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset);
- if (IsVarArg) {
- // Also save the GPR varargs, if any. R6D is call-saved, so would
- // already be included, but we also need to handle the call-clobbered
- // argument registers.
- unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
- if (FirstGPR < SystemZ::NumArgGPRs) {
- unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
- int Offset = RegSpillOffsets[Reg];
- if (StartSPOffset > Offset) {
- LowGPR = Reg; StartSPOffset = Offset;
- }
+ for (auto &CS : CSI) {
+ unsigned Reg = CS.getReg();
+ int Offset = getRegSpillOffset(MF, Reg);
+ if (Offset) {
+ if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) {
+ LowGPR = Reg;
+ StartSPOffset = Offset;
}
- }
- ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);
+ Offset -= SystemZMC::CallFrameSize;
+ int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset);
+ CS.setFrameIdx(FrameIdx);
+ } else
+ CS.setFrameIdx(INT32_MAX);
+ }
- CurrOffset = -SystemZMC::CallFrameSize;
- } else {
- // Packed stack: put all the GPRs at the top of the Register save area.
- uint32_t LowGR64Num = UINT32_MAX;
- for (auto &CS : CSI) {
- unsigned Reg = CS.getReg();
- if (SystemZ::GR64BitRegClass.contains(Reg)) {
- unsigned GR64Num = SystemZMC::getFirstReg(Reg);
- int Offset = -8 * (15 - GR64Num + 1);
- if (LowGR64Num > GR64Num) {
- LowGR64Num = GR64Num;
- StartSPOffset = SystemZMC::CallFrameSize + Offset;
- }
- int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset);
- CS.setFrameIdx(FrameIdx);
- } else
- CS.setFrameIdx(INT32_MAX);
+ // Save the range of call-saved registers, for use by the
+ // prologue/epilogue inserters.
+ ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset);
+ if (IsVarArg) {
+ // Also save the GPR varargs, if any. R6D is call-saved, so would
+ // already be included, but we also need to handle the call-clobbered
+ // argument registers.
+ unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
+ if (FirstGPR < SystemZ::NumArgGPRs) {
+ unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
+ int Offset = getRegSpillOffset(MF, Reg);
+ if (StartSPOffset > Offset) {
+ LowGPR = Reg; StartSPOffset = Offset;
+ }
}
- if (LowGR64Num < UINT32_MAX)
- LowGPR = SystemZMC::GR64Regs[LowGR64Num];
-
- // Save the range of call-saved registers, for use by the
- // prologue/epilogue inserters.
- ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset);
- ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);
-
- CurrOffset = LowGPR ? -(SystemZMC::CallFrameSize - StartSPOffset) : 0;
}
+ ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);
// Create fixed stack objects for the remaining registers.
+ int CurrOffset = -SystemZMC::CallFrameSize;
+ if (usePackedStack(MF))
+ CurrOffset += StartSPOffset;
+
for (auto &CS : CSI) {
if (CS.getFrameIdx() != INT32_MAX)
continue;
@@ -234,11 +196,9 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
}
}
-bool SystemZFrameLowering::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool SystemZFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -296,11 +256,9 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-bool SystemZFrameLowering::
-restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool SystemZFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -358,9 +316,10 @@ void SystemZFrameLowering::
processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ bool BackChain = MF.getFunction().hasFnAttribute("backchain");
- if (!usePackedStack(MF))
- // Always create the full incoming register save area.
+ if (!usePackedStack(MF) || BackChain)
+ // Create the incoming register save area.
getOrCreateFramePointerSaveIndex(MF);
// Get the size of our stack frame to be allocated ...
@@ -382,16 +341,15 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF,
// are outside the reach of an unsigned 12-bit displacement.
// Create 2 for the case where both addresses in an MVC are
// out of range.
- RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false));
- RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false));
+ RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false));
+ RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false));
}
}
// Emit instructions before MBBI (in MBB) to add NumBytes to Reg.
static void emitIncrement(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- const DebugLoc &DL,
- unsigned Reg, int64_t NumBytes,
+ MachineBasicBlock::iterator &MBBI, const DebugLoc &DL,
+ Register Reg, int64_t NumBytes,
const TargetInstrInfo *TII) {
while (NumBytes) {
unsigned Opcode;
@@ -416,12 +374,39 @@ static void emitIncrement(MachineBasicBlock &MBB,
}
}
+// Add CFI for the new CFA offset.
+static void buildCFAOffs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, int Offset,
+ const SystemZInstrInfo *ZII) {
+ unsigned CFIIndex = MBB.getParent()->addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset));
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+}
+
+// Add CFI for the new frame location.
+static void buildDefCFAReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned Reg,
+ const SystemZInstrInfo *ZII) {
+ MachineFunction &MF = *MBB.getParent();
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+}
+
void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+ const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
+ const SystemZTargetLowering &TLI = *STI.getTargetLowering();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
- auto *ZII =
- static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ auto *ZII = static_cast<const SystemZInstrInfo *>(STI.getInstrInfo());
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineModuleInfo &MMI = MF.getMMI();
@@ -504,19 +489,31 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// Allocate StackSize bytes.
int64_t Delta = -int64_t(StackSize);
- emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
-
- // Add CFI for the allocation.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, SPOffsetFromCFA + Delta));
- BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ const unsigned ProbeSize = TLI.getStackProbeSize(MF);
+ bool FreeProbe = (ZFI->getSpillGPRRegs().GPROffset &&
+ (ZFI->getSpillGPRRegs().GPROffset + StackSize) < ProbeSize);
+ if (!FreeProbe &&
+ MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) {
+ // Stack probing may involve looping, but splitting the prologue block
+ // is not possible at this point since it would invalidate the
+ // SaveBlocks / RestoreBlocks sets of PEI in the single block function
+ // case. Build a pseudo to be handled later by inlineStackProbe().
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::PROBED_STACKALLOC))
+ .addImm(StackSize);
+ }
+ else {
+ emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
+ buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII);
+ }
SPOffsetFromCFA += Delta;
- if (StoreBackchain)
+ if (StoreBackchain) {
+ // The back chain is stored topmost with packed-stack.
+ int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0;
BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
- .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0)
- .addReg(0);
+ .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D)
+ .addImm(Offset).addReg(0);
+ }
}
if (HasFP) {
@@ -525,11 +522,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(SystemZ::R15D);
// Add CFI for the new frame location.
- unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true);
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaRegister(nullptr, HardFP));
- BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII);
// Mark the FramePtr as live at the beginning of every block except
// the entry block. (We'll have marked R11 as live on entry when
@@ -560,7 +553,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// Add CFI for the this save.
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- unsigned IgnoredFrameReg;
+ Register IgnoredFrameReg;
int64_t Offset =
getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg);
@@ -622,6 +615,91 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const {
+ auto *ZII =
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
+ const SystemZTargetLowering &TLI = *STI.getTargetLowering();
+
+ MachineInstr *StackAllocMI = nullptr;
+ for (MachineInstr &MI : PrologMBB)
+ if (MI.getOpcode() == SystemZ::PROBED_STACKALLOC) {
+ StackAllocMI = &MI;
+ break;
+ }
+ if (StackAllocMI == nullptr)
+ return;
+ uint64_t StackSize = StackAllocMI->getOperand(0).getImm();
+ const unsigned ProbeSize = TLI.getStackProbeSize(MF);
+ uint64_t NumFullBlocks = StackSize / ProbeSize;
+ uint64_t Residual = StackSize % ProbeSize;
+ int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
+ MachineBasicBlock *MBB = &PrologMBB;
+ MachineBasicBlock::iterator MBBI = StackAllocMI;
+ const DebugLoc DL = StackAllocMI->getDebugLoc();
+
+ // Allocate a block of Size bytes on the stack and probe it.
+ auto allocateAndProbe = [&](MachineBasicBlock &InsMBB,
+ MachineBasicBlock::iterator InsPt, unsigned Size,
+ bool EmitCFI) -> void {
+ emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII);
+ if (EmitCFI) {
+ SPOffsetFromCFA -= Size;
+ buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII);
+ }
+ // Probe by means of a volatile compare.
+ MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
+ BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG))
+ .addReg(SystemZ::R0D, RegState::Undef)
+ .addReg(SystemZ::R15D).addImm(Size - 8).addReg(0)
+ .addMemOperand(MMO);
+ };
+
+ if (NumFullBlocks < 3) {
+ // Emit unrolled probe statements.
+ for (unsigned int i = 0; i < NumFullBlocks; i++)
+ allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/);
+ } else {
+ // Emit a loop probing the pages.
+ uint64_t LoopAlloc = ProbeSize * NumFullBlocks;
+ SPOffsetFromCFA -= LoopAlloc;
+
+ BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D)
+ .addReg(SystemZ::R15D);
+ buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII);
+ emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII);
+ buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc),
+ ZII);
+
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB);
+ MBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(DoneMBB);
+
+ MBB = LoopMBB;
+ allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/);
+ BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR))
+ .addReg(SystemZ::R15D).addReg(SystemZ::R1D);
+ BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB);
+
+ MBB = DoneMBB;
+ MBBI = DoneMBB->begin();
+ buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII);
+
+ recomputeLiveIns(*DoneMBB);
+ recomputeLiveIns(*LoopMBB);
+ }
+
+ if (Residual)
+ allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/);
+
+ StackAllocMI->eraseFromParent();
+}
+
bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
MF.getFrameInfo().hasVarSizedObjects() ||
@@ -639,7 +717,7 @@ SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
- unsigned &FrameReg) const {
+ Register &FrameReg) const {
// Our incoming SP is actually SystemZMC::CallFrameSize below the CFA, so
// add that difference here.
int64_t Offset =
@@ -664,14 +742,43 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
}
}
+unsigned SystemZFrameLowering::getRegSpillOffset(MachineFunction &MF,
+ Register Reg) const {
+ bool IsVarArg = MF.getFunction().isVarArg();
+ bool BackChain = MF.getFunction().hasFnAttribute("backchain");
+ bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
+ unsigned Offset = RegSpillOffsets[Reg];
+ if (usePackedStack(MF) && !(IsVarArg && !SoftFloat)) {
+ if (SystemZ::GR64BitRegClass.contains(Reg))
+ // Put all GPRs at the top of the Register save area with packed
+ // stack. Make room for the backchain if needed.
+ Offset += BackChain ? 24 : 32;
+ else
+ Offset = 0;
+ }
+ return Offset;
+}
+
int SystemZFrameLowering::
getOrCreateFramePointerSaveIndex(MachineFunction &MF) const {
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
int FI = ZFI->getFramePointerSaveIndex();
if (!FI) {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
- FI = MFFrame.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
+ // The back chain is stored topmost with packed-stack.
+ int Offset = usePackedStack(MF) ? -8 : -SystemZMC::CallFrameSize;
+ FI = MFFrame.CreateFixedObject(8, Offset, false);
ZFI->setFramePointerSaveIndex(FI);
}
return FI;
}
+
+bool SystemZFrameLowering::usePackedStack(MachineFunction &MF) const {
+ bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack");
+ bool BackChain = MF.getFunction().hasFnAttribute("backchain");
+ bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
+ if (HasPackedStackAttr && BackChain && !SoftFloat)
+ report_fatal_error("packed-stack + backchain + hard-float is unsupported.");
+ bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC;
+ return HasPackedStackAttr && CallConv;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 4189a92b8294..8752acc7e5ae 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -32,33 +32,36 @@ public:
RegScavenger *RS) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBII,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const
- override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBII,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const override;
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const override;
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
// Return the byte offset from the incoming stack pointer of Reg's
- // ABI-defined save slot. Return 0 if no slot is defined for Reg.
- unsigned getRegSpillOffset(unsigned Reg) const {
- return RegSpillOffsets[Reg];
- }
+ // ABI-defined save slot. Return 0 if no slot is defined for Reg. Adjust
+ // the offset in case MF has packed-stack.
+ unsigned getRegSpillOffset(MachineFunction &MF, Register Reg) const;
// Get or create the frame index of where the old frame pointer is stored.
int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const;
+
+ bool usePackedStack(MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 3927a977e6fc..37328684399b 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1456,7 +1456,8 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
auto *StoreA = cast<StoreSDNode>(N);
auto *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I));
auto *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I));
- return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB);
+ return !LoadA->isVolatile() && LoadA->getMemoryVT() == LoadB->getMemoryVT() &&
+ canUseBlockOperation(StoreA, LoadB);
}
void SystemZDAGToDAGISel::Select(SDNode *Node) {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index ab00069497af..eb1e51341ec4 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -88,25 +88,27 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
else
addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
- if (Subtarget.hasVector()) {
- addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
- addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
- } else {
- addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
- addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
- }
- if (Subtarget.hasVectorEnhancements1())
- addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
- else
- addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
+ if (!useSoftFloat()) {
+ if (Subtarget.hasVector()) {
+ addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
+ } else {
+ addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
+ }
+ if (Subtarget.hasVectorEnhancements1())
+ addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
+ else
+ addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
- if (Subtarget.hasVector()) {
- addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
+ if (Subtarget.hasVector()) {
+ addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
+ }
}
// Compute derived properties from the register classes
@@ -639,12 +641,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::STRICT_FP_ROUND);
setTargetDAGCombine(ISD::FP_EXTEND);
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::SDIV);
setTargetDAGCombine(ISD::UDIV);
setTargetDAGCombine(ISD::SREM);
setTargetDAGCombine(ISD::UREM);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -666,6 +672,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
IsStrictFPEnabled = true;
}
+bool SystemZTargetLowering::useSoftFloat() const {
+ return Subtarget.hasSoftFloat();
+}
+
EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &, EVT VT) const {
if (!VT.isVector())
@@ -816,6 +826,15 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
}
+/// Returns true if stack probing through inline assembly is requested.
+bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+ return false;
+}
+
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// We can use CGFI or CLGFI.
return isInt<32>(Imm) || isUInt<32>(Imm);
@@ -1123,12 +1142,14 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
case 'f': // Floating-point register
- if (VT == MVT::f64)
- return std::make_pair(0U, &SystemZ::FP64BitRegClass);
- else if (VT == MVT::f128)
- return std::make_pair(0U, &SystemZ::FP128BitRegClass);
- return std::make_pair(0U, &SystemZ::FP32BitRegClass);
-
+ if (!useSoftFloat()) {
+ if (VT == MVT::f64)
+ return std::make_pair(0U, &SystemZ::FP64BitRegClass);
+ else if (VT == MVT::f128)
+ return std::make_pair(0U, &SystemZ::FP128BitRegClass);
+ return std::make_pair(0U, &SystemZ::FP32BitRegClass);
+ }
+ break;
case 'v': // Vector register
if (Subtarget.hasVector()) {
if (VT == MVT::f32)
@@ -1156,6 +1177,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
SystemZMC::GR64Regs, 16);
}
if (Constraint[1] == 'f') {
+ if (useSoftFloat())
+ return std::make_pair(
+ 0u, static_cast<const TargetRegisterClass *>(nullptr));
if (VT == MVT::f32)
return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
SystemZMC::FP32Regs, 16);
@@ -1166,6 +1190,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
SystemZMC::FP64Regs, 16);
}
if (Constraint[1] == 'v') {
+ if (!Subtarget.hasVector())
+ return std::make_pair(
+ 0u, static_cast<const TargetRegisterClass *>(nullptr));
if (VT == MVT::f32)
return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
SystemZMC::VR32Regs, 32);
@@ -1179,6 +1206,19 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const {
+
+ Register Reg = StringSwitch<Register>(RegName)
+ .Case("r15", SystemZ::R15D)
+ .Default(0);
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
void SystemZTargetLowering::
LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
std::vector<SDValue> &Ops,
@@ -1437,17 +1477,19 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
// ...and a similar frame index for the caller-allocated save area
// that will be used to store the incoming registers.
- int64_t RegSaveOffset = -SystemZMC::CallFrameSize;
+ int64_t RegSaveOffset =
+ -SystemZMC::CallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
// Store the FPR varargs in the reserved frame slots. (We store the
// GPRs as part of the prologue.)
- if (NumFixedFPRs < SystemZ::NumArgFPRs) {
+ if (NumFixedFPRs < SystemZ::NumArgFPRs && !useSoftFloat()) {
SDValue MemOps[SystemZ::NumArgFPRs];
for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
- unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
- int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
+ unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ArgFPRs[I]);
+ int FI =
+ MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize + Offset, true);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
&SystemZ::FP64BitRegClass);
@@ -1633,6 +1675,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsTailCall)
return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
Glue = Chain.getValue(1);
// Mark the end of the call, which is glued to the call itself.
@@ -2020,8 +2063,9 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
// We must have an 8- or 16-bit load.
auto *Load = cast<LoadSDNode>(C.Op0);
- unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
- if (NumBits != 8 && NumBits != 16)
+ unsigned NumBits = Load->getMemoryVT().getSizeInBits();
+ if ((NumBits != 8 && NumBits != 16) ||
+ NumBits != Load->getMemoryVT().getStoreSizeInBits())
return;
// The load must be an extending one and the constant must be within the
@@ -2161,15 +2205,6 @@ static bool shouldSwapCmpOperands(const Comparison &C) {
return false;
}
-// Return a version of comparison CC mask CCMask in which the LT and GT
-// actions are swapped.
-static unsigned reverseCCMask(unsigned CCMask) {
- return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
- (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
- (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
- (CCMask & SystemZ::CCMASK_CMP_UO));
-}
-
// Check whether C tests for equality between X and Y and whether X - Y
// or Y - X is also computed. In that case it's better to compare the
// result of the subtraction against zero.
@@ -2205,7 +2240,7 @@ static void adjustForFNeg(Comparison &C) {
SDNode *N = *I;
if (N->getOpcode() == ISD::FNEG) {
C.Op0 = SDValue(N, 0);
- C.CCMask = reverseCCMask(C.CCMask);
+ C.CCMask = SystemZ::reverseCCMask(C.CCMask);
return;
}
}
@@ -2572,7 +2607,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
if (shouldSwapCmpOperands(C)) {
std::swap(C.Op0, C.Op1);
- C.CCMask = reverseCCMask(C.CCMask);
+ C.CCMask = SystemZ::reverseCCMask(C.CCMask);
}
adjustForTestUnderMask(DAG, DL, C);
@@ -3103,7 +3138,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue *CPV =
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
- Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3118,7 +3153,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue *CPV =
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
- Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3136,7 +3171,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
// Add the per-symbol offset.
CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
- SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
+ SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
DTPOffset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), DTPOffset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3161,7 +3196,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue *CPV =
SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
- Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3202,11 +3237,11 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SDValue Result;
if (CP->isMachineConstantPoolEntry())
- Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
- CP->getAlignment());
+ Result =
+ DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
else
- Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
- CP->getAlignment(), CP->getOffset());
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
+ CP->getOffset());
// Use LARL to load the address of the constant pool entry.
return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
@@ -3214,6 +3249,8 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
+ auto *TFL =
+ static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setFrameAddressIsTaken(true);
@@ -3222,9 +3259,12 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ // Return null if the back chain is not present.
+ bool HasBackChain = MF.getFunction().hasFnAttribute("backchain");
+ if (TFL->usePackedStack(MF) && !HasBackChain)
+ return DAG.getConstant(0, DL, PtrVT);
+
// By definition, the frame address is the address of the back chain.
- auto *TFL =
- static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
@@ -3355,9 +3395,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
SDLoc DL(Op);
return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
- /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
- /*isTailCall*/false,
- MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+ Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
+ /*isTailCall*/ false, MachinePointerInfo(DstSV),
+ MachinePointerInfo(SrcSV));
}
SDValue SystemZTargetLowering::
@@ -3398,10 +3438,17 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
// Get the new stack pointer value.
- SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
-
- // Copy the new stack pointer back.
- Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+ SDValue NewSP;
+ if (hasInlineStackProbe(MF)) {
+ NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
+ DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
+ Chain = NewSP.getValue(1);
+ }
+ else {
+ NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
+ // Copy the new stack pointer back.
+ Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+ }
// The allocated data lives above the 160 bytes allocated for the standard
// frame, plus any outgoing stack arguments. We don't know how much that
@@ -3995,7 +4042,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
}
MachineMemOperand::Flags
-SystemZTargetLowering::getMMOFlags(const Instruction &I) const {
+SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
// Because of how we convert atomic_load and atomic_store to normal loads and
// stores in the DAG, we need to ensure that the MMOs are marked volatile
// since DAGCombine hasn't been updated to account for atomic, but non
@@ -4362,7 +4409,7 @@ static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
}
// Bytes is a VPERM-like permute vector, except that -1 is used for
-// undefined bytes. Return true if it can be performed using VSLDI.
+// undefined bytes. Return true if it can be performed using VSLDB.
// When returning true, set StartIndex to the shift amount and OpNo0
// and OpNo1 to the VPERM operands that should be used as the first
// and second shift operand respectively.
@@ -4420,23 +4467,86 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
return Op;
}
+static bool isZeroVector(SDValue N) {
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+ if (N->getOpcode() == ISD::SPLAT_VECTOR)
+ if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
+ return Op->getZExtValue() == 0;
+ return ISD::isBuildVectorAllZeros(N.getNode());
+}
+
+// Return the index of the zero/undef vector, or UINT32_MAX if not found.
+static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
+ for (unsigned I = 0; I < Num ; I++)
+ if (isZeroVector(Ops[I]))
+ return I;
+ return UINT32_MAX;
+}
+
// Bytes is a VPERM-like permute vector, except that -1 is used for
// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
-// VSLDI or VPERM.
+// VSLDB or VPERM.
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
SDValue *Ops,
const SmallVectorImpl<int> &Bytes) {
for (unsigned I = 0; I < 2; ++I)
Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
- // First see whether VSLDI can be used.
+ // First see whether VSLDB can be used.
unsigned StartIndex, OpNo0, OpNo1;
if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
Ops[OpNo1],
DAG.getTargetConstant(StartIndex, DL, MVT::i32));
- // Fall back on VPERM. Construct an SDNode for the permute vector.
+ // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
+ // eliminate a zero vector by reusing any zero index in the permute vector.
+ unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
+ if (ZeroVecIdx != UINT32_MAX) {
+ bool MaskFirst = true;
+ int ZeroIdx = -1;
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
+ unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+ unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
+ if (OpNo == ZeroVecIdx && I == 0) {
+ // If the first byte is zero, use mask as first operand.
+ ZeroIdx = 0;
+ break;
+ }
+ if (OpNo != ZeroVecIdx && Byte == 0) {
+ // If mask contains a zero, use it by placing that vector first.
+ ZeroIdx = I + SystemZ::VectorBytes;
+ MaskFirst = false;
+ break;
+ }
+ }
+ if (ZeroIdx != -1) {
+ SDValue IndexNodes[SystemZ::VectorBytes];
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
+ if (Bytes[I] >= 0) {
+ unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+ unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
+ if (OpNo == ZeroVecIdx)
+ IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
+ else {
+ unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
+ IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
+ }
+ } else
+ IndexNodes[I] = DAG.getUNDEF(MVT::i32);
+ }
+ SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
+ SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
+ if (MaskFirst)
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
+ Mask);
+ else
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
+ Mask);
+ }
+ }
+
SDValue IndexNodes[SystemZ::VectorBytes];
for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
if (Bytes[I] >= 0)
@@ -4444,16 +4554,20 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
else
IndexNodes[I] = DAG.getUNDEF(MVT::i32);
SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
- return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
+ (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
}
namespace {
// Describes a general N-operand vector shuffle.
struct GeneralShuffle {
- GeneralShuffle(EVT vt) : VT(vt) {}
+ GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
void addUndef();
bool add(SDValue, unsigned);
SDValue getNode(SelectionDAG &, const SDLoc &);
+ void tryPrepareForUnpack();
+ bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
+ SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
// The operands of the shuffle.
SmallVector<SDValue, SystemZ::VectorBytes> Ops;
@@ -4465,6 +4579,9 @@ struct GeneralShuffle {
// The type of the shuffle result.
EVT VT;
+
+ // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
+ unsigned UnpackFromEltSize;
};
}
@@ -4547,6 +4664,9 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
if (Ops.size() == 0)
return DAG.getUNDEF(VT);
+ // Use a single unpack if possible as the last operation.
+ tryPrepareForUnpack();
+
// Make sure that there are at least two shuffle operands.
if (Ops.size() == 1)
Ops.push_back(DAG.getUNDEF(MVT::v16i8));
@@ -4612,13 +4732,117 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
// to VPERM.
unsigned OpNo0, OpNo1;
SDValue Op;
- if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
+ if (unpackWasPrepared() && Ops[1].isUndef())
+ Op = Ops[0];
+ else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
else
Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
+
+ Op = insertUnpackIfPrepared(DAG, DL, Op);
+
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
+#ifndef NDEBUG
+static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
+ dbgs() << Msg.c_str() << " { ";
+ for (unsigned i = 0; i < Bytes.size(); i++)
+ dbgs() << Bytes[i] << " ";
+ dbgs() << "}\n";
+}
+#endif
+
+// If the Bytes vector matches an unpack operation, prepare to do the unpack
+// after all else by removing the zero vector and the effect of the unpack on
+// Bytes.
+void GeneralShuffle::tryPrepareForUnpack() {
+ uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
+ if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
+ return;
+
+ // Only do this if removing the zero vector reduces the depth, otherwise
+ // the critical path will increase with the final unpack.
+ if (Ops.size() > 2 &&
+ Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
+ return;
+
+ // Find an unpack that would allow removing the zero vector from Ops.
+ UnpackFromEltSize = 1;
+ for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
+ bool MatchUnpack = true;
+ SmallVector<int, SystemZ::VectorBytes> SrcBytes;
+ for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
+ unsigned ToEltSize = UnpackFromEltSize * 2;
+ bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
+ if (!IsZextByte)
+ SrcBytes.push_back(Bytes[Elt]);
+ if (Bytes[Elt] != -1) {
+ unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
+ if (IsZextByte != (OpNo == ZeroVecOpNo)) {
+ MatchUnpack = false;
+ break;
+ }
+ }
+ }
+ if (MatchUnpack) {
+ if (Ops.size() == 2) {
+ // Don't use unpack if a single source operand needs rearrangement.
+ for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
+ if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
+ UnpackFromEltSize = UINT_MAX;
+ return;
+ }
+ }
+ break;
+ }
+ }
+ if (UnpackFromEltSize > 4)
+ return;
+
+ LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
+ << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
+ << ".\n";
+ dumpBytes(Bytes, "Original Bytes vector:"););
+
+ // Apply the unpack in reverse to the Bytes array.
+ unsigned B = 0;
+ for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
+ Elt += UnpackFromEltSize;
+ for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
+ Bytes[B] = Bytes[Elt];
+ }
+ while (B < SystemZ::VectorBytes)
+ Bytes[B++] = -1;
+
+ // Remove the zero vector from Ops
+ Ops.erase(&Ops[ZeroVecOpNo]);
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
+ if (Bytes[I] >= 0) {
+ unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+ if (OpNo > ZeroVecOpNo)
+ Bytes[I] -= SystemZ::VectorBytes;
+ }
+
+ LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
+ dbgs() << "\n";);
+}
+
+SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
+ const SDLoc &DL,
+ SDValue Op) {
+ if (!unpackWasPrepared())
+ return Op;
+ unsigned InBits = UnpackFromEltSize * 8;
+ EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
+ SystemZ::VectorBits / InBits);
+ SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
+ unsigned OutBits = InBits * 2;
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
+ SystemZ::VectorBits / OutBits);
+ return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
+}
+
// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
static bool isScalarToVector(SDValue Op) {
for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
@@ -5013,9 +5237,8 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getNode(ISD::BITCAST, DL, VT, Res);
}
-SDValue
-SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
- unsigned UnpackHigh) const {
+SDValue SystemZTargetLowering::
+lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
SDValue PackedOp = Op.getOperand(0);
EVT OutVT = Op.getValueType();
EVT InVT = PackedOp.getValueType();
@@ -5025,11 +5248,39 @@ SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
FromBits *= 2;
EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
SystemZ::VectorBits / FromBits);
- PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
+ PackedOp =
+ DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
} while (FromBits != ToBits);
return PackedOp;
}
+// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
+SDValue SystemZTargetLowering::
+lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
+ SDValue PackedOp = Op.getOperand(0);
+ SDLoc DL(Op);
+ EVT OutVT = Op.getValueType();
+ EVT InVT = PackedOp.getValueType();
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned OutNumElts = OutVT.getVectorNumElements();
+ unsigned NumInPerOut = InNumElts / OutNumElts;
+
+ SDValue ZeroVec =
+ DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
+
+ SmallVector<int, 16> Mask(InNumElts);
+ unsigned ZeroVecElt = InNumElts;
+ for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
+ unsigned MaskElt = PackedElt * NumInPerOut;
+ unsigned End = MaskElt + NumInPerOut - 1;
+ for (; MaskElt < End; MaskElt++)
+ Mask[MaskElt] = ZeroVecElt++;
+ Mask[MaskElt] = PackedElt;
+ }
+ SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
+ return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
+}
+
SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
unsigned ByScalar) const {
// Look for cases where a vector shift can use the *_BY_SCALAR form.
@@ -5195,9 +5446,9 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::SIGN_EXTEND_VECTOR_INREG:
- return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
+ return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
case ISD::ZERO_EXTEND_VECTOR_INREG:
- return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
+ return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
case ISD::SHL:
return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
case ISD::SRL:
@@ -5315,6 +5566,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(BR_CCMASK);
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
+ OPCODE(PROBED_ALLOCA);
OPCODE(POPCNT);
OPCODE(SMUL_LOHI);
OPCODE(UMUL_LOHI);
@@ -6056,6 +6308,32 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
return SDValue();
}
+SDValue SystemZTargetLowering::combineINT_TO_FP(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ if (DCI.Level != BeforeLegalizeTypes)
+ return SDValue();
+ unsigned Opcode = N->getOpcode();
+ EVT OutVT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+ unsigned OutScalarBits = OutVT.getScalarSizeInBits();
+ unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
+
+ // Insert an extension before type-legalization to avoid scalarization, e.g.:
+ // v2f64 = uint_to_fp v2i16
+ // =>
+ // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
+ if (OutVT.isVector() && OutScalarBits > InScalarBits) {
+ MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(OutVT.getScalarSizeInBits()),
+ OutVT.getVectorNumElements());
+ unsigned ExtOpcode =
+ (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
+ SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
+ return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
+ }
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6243,15 +6521,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
return false;
// Compute the effective CC mask for the new branch or select.
- switch (CCMask) {
- case SystemZ::CCMASK_CMP_EQ: break;
- case SystemZ::CCMASK_CMP_NE: break;
- case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break;
- case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break;
- case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break;
- case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break;
- default: return false;
- }
+ CCMask = SystemZ::reverseCCMask(CCMask);
// Return the updated CCReg link.
CCReg = IPM->getOperand(0);
@@ -6367,6 +6637,34 @@ SDValue SystemZTargetLowering::combineIntDIVREM(
return SDValue();
}
+SDValue SystemZTargetLowering::combineINTRINSIC(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ unsigned Id = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (Id) {
+ // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
+ // or larger is simply a vector load.
+ case Intrinsic::s390_vll:
+ case Intrinsic::s390_vlrl:
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (C->getZExtValue() >= 15)
+ return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
+ N->getOperand(3), MachinePointerInfo());
+ break;
+ // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
+ case Intrinsic::s390_vstl:
+ case Intrinsic::s390_vstrl:
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (C->getZExtValue() >= 15)
+ return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
+ N->getOperand(4), MachinePointerInfo());
+ break;
+ }
+
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
return N->getOperand(0);
@@ -6391,6 +6689,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
@@ -6399,6 +6699,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM: return combineIntDIVREM(N, DCI);
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
}
return SDValue();
@@ -6580,7 +6882,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
if (IsLogical) {
- Known = Known.zext(BitWidth, true);
+ Known = Known.zext(BitWidth);
} else
Known = Known.sext(BitWidth);
break;
@@ -6609,7 +6911,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// Known has the width of the source operand(s). Adjust if needed to match
// the passed bitwidth.
if (Known.getBitWidth() != BitWidth)
- Known = Known.zextOrTrunc(BitWidth, false);
+ Known = Known.anyextOrTrunc(BitWidth);
}
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
@@ -6690,38 +6992,29 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
}
+unsigned
+SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ unsigned StackAlign = TFI->getStackAlignment();
+ assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
+ "Unexpected stack alignment");
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute.
+ unsigned StackProbeSize = 4096;
+ const Function &Fn = MF.getFunction();
+ if (Fn.hasFnAttribute("stack-probe-size"))
+ Fn.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+ // Round down to the stack alignment.
+ StackProbeSize &= ~(StackAlign - 1);
+ return StackProbeSize ? StackProbeSize : StackAlign;
+}
+
//===----------------------------------------------------------------------===//
// Custom insertion
//===----------------------------------------------------------------------===//
-// Create a new basic block after MBB.
-static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
- MachineFunction &MF = *MBB->getParent();
- MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
- MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
- return NewMBB;
-}
-
-// Split MBB after MI and return the new block (the one that contains
-// instructions after MI).
-static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
- MachineBasicBlock *MBB) {
- MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
- NewMBB->splice(NewMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)), MBB->end());
- NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
- return NewMBB;
-}
-
-// Split MBB before MI and return the new block (the one that contains MI).
-static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
- MachineBasicBlock *MBB) {
- MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
- NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
- NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
- return NewMBB;
-}
-
// Force base value Base into a register before MI. Return the register.
static Register forceReg(MachineInstr &MI, MachineOperand &Base,
const SystemZInstrInfo *TII) {
@@ -6892,8 +7185,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
bool CCKilled =
(LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockAfter(LastMI, MBB);
- MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
+ MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
// Unless CC was killed in the last Select instruction, mark it as
// live-in to both FalseMBB and JoinMBB.
@@ -6986,8 +7279,8 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
CCMask ^= CCValid;
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
// Unless CC was killed in the CondStore instruction, mark it as
// live-in to both FalseMBB and JoinMBB.
@@ -7070,8 +7363,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
// Insert a basic block for the main loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
// StartMBB:
// ...
@@ -7188,10 +7481,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
// Insert 3 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
- MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
+ MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
// StartMBB:
// ...
@@ -7299,9 +7592,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
// Insert 2 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
// StartMBB:
// ...
@@ -7461,7 +7754,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// When generating more than one CLC, all but the last will need to
// branch to the end when a difference is found.
MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
- splitBlockAfter(MI, MBB) : nullptr);
+ SystemZ::splitBlockAfter(MI, MBB) : nullptr);
// Check for the loop form, in which operand 5 is the trip count.
if (MI.getNumExplicitOperands() > 5) {
@@ -7485,9 +7778,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
Register NextCountReg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *NextMBB =
+ (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
// StartMBB:
// # fall through to LoopMMB
@@ -7603,7 +7897,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// If there's another CLC to go, branch to the end if a difference
// was found.
if (EndMBB && Length > 0) {
- MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
.addMBB(EndMBB);
@@ -7643,8 +7937,8 @@ MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
uint64_t End2Reg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
// StartMBB:
// # fall through to LoopMMB
@@ -7755,6 +8049,97 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
return MBB;
}
+MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
+ MachineInstr &MI, MachineBasicBlock *MBB) const {
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ DebugLoc DL = MI.getDebugLoc();
+ const unsigned ProbeSize = getStackProbeSize(MF);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SizeReg = MI.getOperand(2).getReg();
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
+ MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
+ MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
+ MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
+
+ MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
+
+ Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+
+ // LoopTestMBB
+ // BRC TailTestMBB
+ // # fallthrough to LoopBodyMBB
+ StartMBB->addSuccessor(LoopTestMBB);
+ MBB = LoopTestMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
+ .addReg(SizeReg)
+ .addMBB(StartMBB)
+ .addReg(IncReg)
+ .addMBB(LoopBodyMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
+ .addReg(PHIReg)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT)
+ .addMBB(TailTestMBB);
+ MBB->addSuccessor(LoopBodyMBB);
+ MBB->addSuccessor(TailTestMBB);
+
+ // LoopBodyMBB: Allocate and probe by means of a volatile compare.
+ // J LoopTestMBB
+ MBB = LoopBodyMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
+ .addReg(PHIReg)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
+ .addReg(SystemZ::R15D)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
+ .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
+ .setMemRefs(VolLdMMO);
+ BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
+ MBB->addSuccessor(LoopTestMBB);
+
+ // TailTestMBB
+ // BRC DoneMBB
+ // # fallthrough to TailMBB
+ MBB = TailTestMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
+ .addReg(PHIReg)
+ .addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
+ .addMBB(DoneMBB);
+ MBB->addSuccessor(TailMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ // TailMBB
+ // # fallthrough to DoneMBB
+ MBB = TailMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
+ .addReg(SystemZ::R15D)
+ .addReg(PHIReg);
+ BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
+ .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
+ .setMemRefs(VolLdMMO);
+ MBB->addSuccessor(DoneMBB);
+
+ // DoneMBB
+ MBB = DoneMBB;
+ BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
+ .addReg(SystemZ::R15D);
+
+ MI.eraseFromParent();
+ return DoneMBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
switch (MI.getOpcode()) {
@@ -8015,6 +8400,9 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::LTXBRCompare_VecPseudo:
return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
+ case SystemZ::PROBED_ALLOCA:
+ return emitProbedAlloca(MI, MBB);
+
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, MBB);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index defcaa6eb6eb..27637762296a 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -83,6 +83,10 @@ enum NodeType : unsigned {
// base of the dynamically-allocatable area.
ADJDYNALLOC,
+ // For allocating stack space when using stack clash protector.
+ // Allocation is performed by block, and each block is probed.
+ PROBED_ALLOCA,
+
// Count number of bits set in operand 0 per byte.
POPCNT,
@@ -393,6 +397,8 @@ public:
explicit SystemZTargetLowering(const TargetMachine &TM,
const SystemZSubtarget &STI);
+ bool useSoftFloat() const override;
+
// Override TargetLowering.
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
return MVT::i32;
@@ -426,6 +432,7 @@ public:
EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
+ bool hasInlineStackProbe(MachineFunction &MF) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -437,6 +444,14 @@ public:
bool *Fast) const override;
bool isTruncateFree(Type *, Type *) const override;
bool isTruncateFree(EVT, EVT) const override;
+
+ bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
+ bool MathUsed) const override {
+ // Form add and sub with overflow intrinsics regardless of any extra
+ // users of the math result.
+ return VT == MVT::i32 || VT == MVT::i64;
+ }
+
const char *getTargetNodeName(unsigned Opcode) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
@@ -471,16 +486,19 @@ public:
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
+ Register getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const override;
+
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
return SystemZ::R6D;
}
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
return SystemZ::R7D;
}
@@ -543,6 +561,8 @@ public:
return true;
}
+ unsigned getStackProbeSize(MachineFunction &MF) const;
+
private:
const SystemZSubtarget &Subtarget;
@@ -607,8 +627,8 @@ private:
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
- unsigned UnpackHigh) const;
+ SDValue lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
bool canTreatAsByteVector(EVT VT) const;
@@ -629,11 +649,13 @@ private:
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineINTRINSIC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue unwrapAddress(SDValue N) const override;
@@ -676,8 +698,11 @@ private:
MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
MachineBasicBlock *MBB,
unsigned Opcode) const;
+ MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
- MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
+ MachineMemOperand::Flags
+ getTargetMMOFlags(const Instruction &I) const override;
const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
index ec7639e71f81..9fc786f92635 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
namespace llvm {
@@ -36,7 +35,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) {
int64_t Offset = 0;
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags,
- MFFrame.getObjectSize(FI), MFFrame.getObjectAlignment(FI));
+ MFFrame.getObjectSize(FI), MFFrame.getObjectAlign(FI));
return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 6d03274fe8a6..337164d55e5f 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -438,8 +438,8 @@ let Uses = [FPC], mayRaiseFPException = 1,
def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>;
def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;
}
- def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
- def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>;
+ defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
+ defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, load, 8>;
}
// Subtraction.
@@ -449,8 +449,8 @@ let Uses = [FPC], mayRaiseFPException = 1,
def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>;
def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>;
- def SEB : BinaryRXE<"seb", 0xED0B, any_fsub, FP32, load, 4>;
- def SDB : BinaryRXE<"sdb", 0xED1B, any_fsub, FP64, load, 8>;
+ defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, any_fsub, FP32, load, 4>;
+ defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, any_fsub, FP64, load, 8>;
}
// Multiplication.
@@ -460,8 +460,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>;
def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>;
}
- def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>;
- def MDB : BinaryRXE<"mdb", 0xED1C, any_fmul, FP64, load, 8>;
+ defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, load, 4>;
+ defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, any_fmul, FP64, load, 8>;
}
// f64 multiplication of two FP32 registers.
@@ -503,8 +503,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
- def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
- def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
+ defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
+ defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
}
// Fused multiply-subtract.
@@ -512,8 +512,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
- def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
- def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
+ defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
+ defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
}
// Division.
@@ -522,8 +522,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>;
def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>;
- def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>;
- def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;
+ defm DEB : BinaryRXEAndPseudo<"deb", 0xED0D, any_fdiv, FP32, load, 4>;
+ defm DDB : BinaryRXEAndPseudo<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;
}
// Divide to integer.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index f064d33ac2f3..50f1e09c6ee5 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2334,49 +2334,49 @@ class FixedCmpBranchRSYb<CondVariant V, string mnemonic, bits<16> opcode,
class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls>
: InstRIb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$RI2),
- mnemonic##"\t$R1, $RI2", []> {
+ mnemonic#"\t$R1, $RI2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchUnaryRIL<string mnemonic, bits<12> opcode, RegisterOperand cls>
: InstRILb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget32:$RI2),
- mnemonic##"\t$R1, $RI2", []> {
+ mnemonic#"\t$R1, $RI2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchUnaryRR<string mnemonic, bits<8> opcode, RegisterOperand cls>
: InstRR<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2),
- mnemonic##"\t$R1, $R2", []> {
+ mnemonic#"\t$R1, $R2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2),
- mnemonic##"\t$R1, $R2", []> {
+ mnemonic#"\t$R1, $R2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchUnaryRX<string mnemonic, bits<8> opcode, RegisterOperand cls>
: InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2),
- mnemonic##"\t$R1, $XBD2", []> {
+ mnemonic#"\t$R1, $XBD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchUnaryRXY<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRXYa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr20only:$XBD2),
- mnemonic##"\t$R1, $XBD2", []> {
+ mnemonic#"\t$R1, $XBD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchBinaryRSI<string mnemonic, bits<8> opcode, RegisterOperand cls>
: InstRSI<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, brtarget16:$RI2),
- mnemonic##"\t$R1, $R3, $RI2", []> {
+ mnemonic#"\t$R1, $R3, $RI2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
@@ -2384,7 +2384,7 @@ class BranchBinaryRSI<string mnemonic, bits<8> opcode, RegisterOperand cls>
class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRIEe<opcode, (outs cls:$R1),
(ins cls:$R1src, cls:$R3, brtarget16:$RI2),
- mnemonic##"\t$R1, $R3, $RI2", []> {
+ mnemonic#"\t$R1, $R3, $RI2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
@@ -2392,7 +2392,7 @@ class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls>
class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls>
: InstRSa<opcode, (outs cls:$R1),
(ins cls:$R1src, cls:$R3, bdaddr12only:$BD2),
- mnemonic##"\t$R1, $R3, $BD2", []> {
+ mnemonic#"\t$R1, $R3, $BD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
@@ -2400,7 +2400,7 @@ class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls>
class BranchBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRSYa<opcode,
(outs cls:$R1), (ins cls:$R1src, cls:$R3, bdaddr20only:$BD2),
- mnemonic##"\t$R1, $R3, $BD2", []> {
+ mnemonic#"\t$R1, $R3, $BD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
@@ -2421,7 +2421,7 @@ class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
multiclass LoadMultipleRSPair<string mnemonic, bits<8> rsOpcode,
bits<16> rsyOpcode, RegisterOperand cls> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : LoadMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;
let DispSize = "20" in
@@ -2487,7 +2487,7 @@ class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
SDPatternOperator operator, RegisterOperand cls,
bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>;
let DispSize = "20" in
@@ -2567,7 +2567,7 @@ class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode,
bits<16> rsyOpcode, RegisterOperand cls> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : StoreMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;
let DispSize = "20" in
@@ -2807,6 +2807,10 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
let mayLoad = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let MemKey = mnemonic#cls;
+ let MemType = "target";
}
// Like CondUnaryRSY, but used for the raw assembly form. The condition-code
@@ -2884,7 +2888,7 @@ class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
SDPatternOperator operator, RegisterOperand cls,
bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>;
let DispSize = "20" in
@@ -2907,13 +2911,15 @@ class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, ImmOpWithPattern imm>
class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0,
- bits<4> m5 = 0>
+ bits<4> m5 = 0, string fp_mnemonic = "">
: InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2),
mnemonic#"\t$V1, $V2",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2)))]> {
let M3 = type;
let M4 = m4;
let M5 = m5;
+ let OpKey = fp_mnemonic#!subst("VR", "FP", !cast<string>(tr1.op));
+ let OpType = "reg";
}
class UnaryVRRaGeneric<string mnemonic, bits<16> opcode, bits<4> m4 = 0,
@@ -2948,7 +2954,7 @@ multiclass UnaryExtraVRRaSPair<string mnemonic, bits<16> opcode,
def : InstAlias<mnemonic#"\t$V1, $V2",
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, 0)>;
let Defs = [CC] in
- def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2,
+ def S : UnaryVRRa<mnemonic#"s", opcode, operator_cc, tr1, tr2,
type, 0, 1>;
}
@@ -2992,17 +2998,17 @@ multiclass UnaryVRXAlign<string mnemonic, bits<16> opcode> {
class SideEffectBinaryRX<string mnemonic, bits<8> opcode,
RegisterOperand cls>
: InstRXa<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
- mnemonic##"\t$R1, $XBD2", []>;
+ mnemonic#"\t$R1, $XBD2", []>;
class SideEffectBinaryRXY<string mnemonic, bits<16> opcode,
RegisterOperand cls>
: InstRXYa<opcode, (outs), (ins cls:$R1, bdxaddr20only:$XBD2),
- mnemonic##"\t$R1, $XBD2", []>;
+ mnemonic#"\t$R1, $XBD2", []>;
class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode,
RegisterOperand cls>
: InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2),
- mnemonic##"\t$R1, $RI2", []> {
+ mnemonic#"\t$R1, $RI2", []> {
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
// complex.
@@ -3045,16 +3051,16 @@ class SideEffectBinarySIL<string mnemonic, bits<16> opcode,
class SideEffectBinarySSa<string mnemonic, bits<8> opcode>
: InstSSa<opcode, (outs), (ins bdladdr12onlylen8:$BDL1, bdaddr12only:$BD2),
- mnemonic##"\t$BDL1, $BD2", []>;
+ mnemonic#"\t$BDL1, $BD2", []>;
class SideEffectBinarySSb<string mnemonic, bits<8> opcode>
: InstSSb<opcode,
(outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2),
- mnemonic##"\t$BDL1, $BDL2", []>;
+ mnemonic#"\t$BDL1, $BDL2", []>;
class SideEffectBinarySSf<string mnemonic, bits<8> opcode>
: InstSSf<opcode, (outs), (ins bdaddr12only:$BD1, bdladdr12onlylen8:$BDL2),
- mnemonic##"\t$BD1, $BDL2", []>;
+ mnemonic#"\t$BD1, $BDL2", []>;
class SideEffectBinarySSE<string mnemonic, bits<16> opcode>
: InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2),
@@ -3211,6 +3217,8 @@ class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = !subst("loc", "sel", mnemonic);
let NumOpsValue = "2";
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
}
// Like CondBinaryRRF, but used for the raw assembly form. The condition-code
@@ -3252,6 +3260,8 @@ class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = mnemonic;
let NumOpsValue = "3";
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
}
// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code
@@ -3299,7 +3309,7 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
ImmOpWithPattern imm> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>,
+ def K : BinaryRIE<mnemonic#"k", opcode2, operator, cls, imm>,
Requires<[FeatureDistinctOps]>;
let NumOpsValue = "2" in
def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>;
@@ -3376,7 +3386,7 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
SDPatternOperator operator, RegisterOperand cls> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRSY<mnemonic##"k", opcode2, operator, cls>,
+ def K : BinaryRSY<mnemonic#"k", opcode2, operator, cls>,
Requires<[FeatureDistinctOps]>;
let NumOpsValue = "2" in
def "" : BinaryRS<mnemonic, opcode1, operator, cls>;
@@ -3448,7 +3458,7 @@ class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
SDPatternOperator operator, RegisterOperand cls,
SDPatternOperator load, bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,
bdxaddr12pair>;
@@ -3479,7 +3489,7 @@ class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
bits<16> siyOpcode, SDPatternOperator operator,
Operand imm> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
let DispSize = "20" in
@@ -3575,7 +3585,7 @@ multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode,
def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
!and (modifier, 14)>;
let Defs = [CC] in
- def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ def S : BinaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type,
!add (!and (modifier, 14), 1)>;
}
@@ -3604,7 +3614,7 @@ multiclass BinaryExtraVRRbSPair<string mnemonic, bits<16> opcode,
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, 0)>;
let Defs = [CC] in
- def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 1>;
+ def S : BinaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, 1>;
}
multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
@@ -3619,7 +3629,7 @@ multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0,
- bits<4> m6 = 0>
+ bits<4> m6 = 0, string fp_mnemonic = "">
: InstVRRc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
mnemonic#"\t$V1, $V2, $V3",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
@@ -3627,6 +3637,8 @@ class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M4 = type;
let M5 = m5;
let M6 = m6;
+ let OpKey = fp_mnemonic#"MemFold"#!subst("VR", "FP", !cast<string>(tr1.op));
+ let OpType = "reg";
}
class BinaryVRRcGeneric<string mnemonic, bits<16> opcode, bits<4> m5 = 0,
@@ -3655,7 +3667,7 @@ multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode,
def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type,
m5, !and (modifier, 14)>;
let Defs = [CC] in
- def S : BinaryVRRc<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ def S : BinaryVRRc<mnemonic#"s", opcode, operator_cc, tr1, tr2, type,
m5, !add (!and (modifier, 14), 1)>;
}
@@ -3752,7 +3764,7 @@ class StoreBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
multiclass StoreBinaryRSPair<string mnemonic, bits<8> rsOpcode,
bits<16> rsyOpcode, RegisterOperand cls,
bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : StoreBinaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;
let DispSize = "20" in
@@ -3892,7 +3904,7 @@ class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
SDPatternOperator operator, RegisterOperand cls,
SDPatternOperator load, bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : CompareRX<mnemonic, rxOpcode, operator, cls,
load, bytes, bdxaddr12pair>;
@@ -3920,7 +3932,7 @@ class CompareRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
multiclass CompareRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
RegisterOperand cls, bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : CompareRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;
let DispSize = "20" in
@@ -3931,7 +3943,7 @@ multiclass CompareRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
class CompareSSb<string mnemonic, bits<8> opcode>
: InstSSb<opcode,
(outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2),
- mnemonic##"\t$BDL1, $BDL2", []> {
+ mnemonic#"\t$BDL1, $BDL2", []> {
let isCompare = 1;
let mayLoad = 1;
}
@@ -3978,7 +3990,7 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
}
class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- TypedReg tr, bits<4> type>
+ TypedReg tr, bits<4> type, string fp_mnemonic = "">
: InstVRRa<opcode, (outs), (ins tr.op:$V1, tr.op:$V2),
mnemonic#"\t$V1, $V2",
[(set CC, (operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2)))]> {
@@ -3986,6 +3998,8 @@ class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M3 = type;
let M4 = 0;
let M5 = 0;
+ let OpKey = fp_mnemonic#!subst("VR", "FP", !cast<string>(tr.op));
+ let OpType = "reg";
}
class CompareVRRaGeneric<string mnemonic, bits<16> opcode>
@@ -4043,7 +4057,7 @@ class TestVRRg<string mnemonic, bits<16> opcode>
class SideEffectTernarySSc<string mnemonic, bits<8> opcode>
: InstSSc<opcode, (outs), (ins bdladdr12onlylen4:$BDL1,
shift12only:$BD2, imm32zx4:$I3),
- mnemonic##"\t$BDL1, $BD2, $I3", []>;
+ mnemonic#"\t$BDL1, $BD2, $I3", []>;
class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
@@ -4179,7 +4193,7 @@ class TernaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
RegisterOperand cls, bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : TernaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;
let DispSize = "20" in
@@ -4303,7 +4317,7 @@ multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, 0)>;
let Defs = [CC] in
- def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ def S : TernaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type,
imm32zx4even_timm, !add(!and (modifier, 14), 1)>;
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",
(!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
@@ -4371,7 +4385,7 @@ class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
}
// Ternary operation where the assembler mnemonic has an extra operand to
-// optionally allow specifiying arbitrary M6 values.
+// optionally allow specifying arbitrary M6 values.
multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode,
SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type> {
@@ -4399,7 +4413,8 @@ multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> {
}
class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
+ TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0,
+ string fp_mnemonic = "">
: InstVRRe<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
mnemonic#"\t$V1, $V2, $V3, $V4",
@@ -4408,6 +4423,8 @@ class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
(tr1.vt tr1.op:$V4)))]> {
let M5 = m5;
let M6 = type;
+ let OpKey = fp_mnemonic#"MemFold"#!subst("VR", "FP", !cast<string>(tr1.op));
+ let OpType = "reg";
}
class TernaryVRReFloatGeneric<string mnemonic, bits<16> opcode>
@@ -4536,7 +4553,7 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, tr2.op:$V4, 0)>;
let Defs = [CC] in
- def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc,
+ def S : QuaternaryVRRd<mnemonic#"s", opcode, operator_cc,
tr1, tr2, tr2, tr2, type,
imm32zx4even_timm, !add (!and (modifier, 14), 1)>;
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
@@ -4630,7 +4647,7 @@ class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
SDPatternOperator operator, RegisterOperand cls> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>;
let DispSize = "20" in
@@ -4650,13 +4667,13 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
: InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
- mnemonic##"\t$M1, $XBD2",
+ mnemonic#"\t$M1, $XBD2",
[(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>;
class PrefetchRILPC<string mnemonic, bits<12> opcode,
SDPatternOperator operator>
: InstRILc<opcode, (outs), (ins imm32zx4_timm:$M1, pcrel32:$RI2),
- mnemonic##"\t$M1, $RI2",
+ mnemonic#"\t$M1, $RI2",
[(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> {
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
@@ -4765,7 +4782,9 @@ multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
AddressingMode mode>
: Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> {
- let OpKey = mnemonic#"rk"#cls;
+ let OpKey = !subst("mscrk", "msrkc",
+ !subst("msgcrk", "msgrkc",
+ mnemonic#"rk"#cls));
let OpType = "mem";
let MemKey = mnemonic#cls;
let MemType = "pseudo";
@@ -4775,6 +4794,40 @@ class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
let hasNoSchedulingInfo = 1;
}
+// Same as MemFoldPseudo but for mapping a W... vector instruction
+class MemFoldPseudo_FP<string mnemonic, RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode>
+ : MemFoldPseudo<mnemonic, cls, bytes, mode> {
+ let OpKey = mnemonic#"r"#"MemFold"#cls;
+}
+
+class MemFoldPseudo_FPTern<string mnemonic, RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode>
+ : Pseudo<(outs cls:$R1), (ins cls:$R2, cls:$R3, mode:$XBD2), []> {
+ let OpKey = mnemonic#"r"#"MemFold"#cls;
+ let OpType = "mem";
+ let MemKey = mnemonic#cls;
+ let MemType = "pseudo";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let HasIndex = 1;
+ let hasNoSchedulingInfo = 1;
+}
+
+// Same as MemFoldPseudo but for Load On Condition with CC operands.
+class MemFoldPseudo_CondMove<string mnemonic, RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode>
+ : Pseudo<(outs cls:$R1),
+ (ins cls:$R2, mode:$XBD2, cond4:$valid, cond4:$M3), []> {
+ let OpKey = !subst("loc", "sel", mnemonic)#"r"#cls;
+ let OpType = "mem";
+ let MemKey = mnemonic#cls;
+ let MemType = "pseudo";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let hasNoSchedulingInfo = 1;
+}
+
// Like CompareRI, but expanded after RA depending on the choice of register.
class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
ImmOpWithPattern imm>
@@ -4813,6 +4866,8 @@ class CondBinaryRRFPseudo<string mnemonic, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = !subst("loc", "sel", mnemonic);
let NumOpsValue = "2";
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
}
// Like CondBinaryRRFa, but expanded after RA depending on the choice of
@@ -4826,6 +4881,8 @@ class CondBinaryRRFaPseudo<string mnemonic, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = mnemonic;
let NumOpsValue = "3";
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
}
// Like CondBinaryRIE, but expanded after RA depending on the choice of
@@ -4842,8 +4899,9 @@ class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm>
// Like CondUnaryRSY, but expanded after RA depending on the choice of
// register.
-class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
- bits<5> bytes, AddressingMode mode = bdaddr20only>
+class CondUnaryRSYPseudo<string mnemonic, SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
: Pseudo<(outs cls:$R1),
(ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
[(set cls:$R1,
@@ -4854,6 +4912,10 @@ class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
let mayLoad = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let MemKey = mnemonic#cls;
+ let MemType = "target";
}
// Like CondStoreRSY, but expanded after RA depending on the choice of
@@ -5039,7 +5101,6 @@ multiclass BinaryRXYAndPseudo<string mnemonic, bits<16> opcode,
SDPatternOperator operator, RegisterOperand cls,
SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr20only> {
-
def "" : BinaryRXY<mnemonic, opcode, operator, cls, load, bytes, mode> {
let MemKey = mnemonic#cls;
let MemType = "target";
@@ -5052,7 +5113,7 @@ multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,
bits<16> rxyOpcode, SDPatternOperator operator,
RegisterOperand cls,
SDPatternOperator load, bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,
bdxaddr12pair> {
let DispSize = "12";
@@ -5066,6 +5127,43 @@ multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,
def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>;
}
+multiclass BinaryRXEAndPseudo<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes> {
+ def "" : BinaryRXE<mnemonic, opcode, operator, cls, load, bytes> {
+ let MemKey = mnemonic#cls;
+ let MemType = "target";
+ }
+ def _MemFoldPseudo : MemFoldPseudo_FP<mnemonic, cls, bytes, bdxaddr12pair>;
+}
+
+multiclass TernaryRXFAndPseudo<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, RegisterOperand cls1,
+ RegisterOperand cls2, SDPatternOperator load,
+ bits<5> bytes> {
+ def "" : TernaryRXF<mnemonic, opcode, operator, cls1, cls2, load, bytes> {
+ let MemKey = mnemonic#cls1;
+ let MemType = "target";
+ }
+ def _MemFoldPseudo : MemFoldPseudo_FPTern<mnemonic, cls1, bytes, bdxaddr12pair>;
+}
+
+multiclass CondUnaryRSYPairAndMemFold<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only> {
+ defm "" : CondUnaryRSYPair<mnemonic, opcode, operator, cls, bytes, mode>;
+ def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>;
+}
+
+multiclass CondUnaryRSYPseudoAndMemFold<string mnemonic,
+ SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only> {
+ def "" : CondUnaryRSYPseudo<mnemonic, operator, cls, bytes, mode>;
+ def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>;
+}
+
// Define an instruction that operates on two fixed-length blocks of memory,
// and associated pseudo instructions for operating on blocks of any size.
// The Sequence form uses a straight-line sequence of instructions and
@@ -5086,7 +5184,7 @@ multiclass MemorySS<string mnemonic, bits<8> opcode,
}
}
-// The same, but setting a CC result as comparion operator.
+// The same, but setting a CC result as comparison operator.
multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
SDPatternOperator sequence, SDPatternOperator loop> {
def "" : SideEffectBinarySSa<mnemonic, opcode>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 97c8fa7aa32e..223cfcba2fac 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -513,8 +513,8 @@ unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB,
return Count;
}
-bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &Mask,
+bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &Mask,
int &Value) const {
assert(MI.isCompare() && "Caller should have checked for a comparison");
@@ -532,8 +532,9 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Pred,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles,
+ Register DstReg, Register TrueReg,
+ Register FalseReg, int &CondCycles,
+ int &TrueCycles,
int &FalseCycles) const {
// Not all subtargets have LOCR instructions.
if (!STI.hasLoadStoreOnCond())
@@ -565,10 +566,10 @@ bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DstReg,
+ const DebugLoc &DL, Register DstReg,
ArrayRef<MachineOperand> Pred,
- unsigned TrueReg,
- unsigned FalseReg) const {
+ Register TrueReg,
+ Register FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
@@ -606,7 +607,7 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
}
bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
- unsigned Reg,
+ Register Reg,
MachineRegisterInfo *MRI) const {
unsigned DefOpc = DefMI.getOpcode();
if (DefOpc != SystemZ::LHIMux && DefOpc != SystemZ::LHI &&
@@ -819,18 +820,11 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- // Move CC value from/to a GR32.
- if (SrcReg == SystemZ::CC) {
- auto MIB = BuildMI(MBB, MBBI, DL, get(SystemZ::IPM), DestReg);
- if (KillSrc) {
- const MachineFunction *MF = MBB.getParent();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- MIB->addRegisterKilled(SrcReg, TRI);
- }
- return;
- }
+ // Move CC value from a GR32.
if (DestReg == SystemZ::CC) {
- BuildMI(MBB, MBBI, DL, get(SystemZ::TMLH))
+ unsigned Opcode =
+ SystemZ::GR32BitRegClass.contains(SrcReg) ? SystemZ::TMLH : SystemZ::TMHH;
+ BuildMI(MBB, MBBI, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(3 << (SystemZ::IPM_CC - 16));
return;
@@ -855,12 +849,6 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opcode = SystemZ::VLR;
else if (SystemZ::AR32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::CPYA;
- else if (SystemZ::AR32BitRegClass.contains(DestReg) &&
- SystemZ::GR32BitRegClass.contains(SrcReg))
- Opcode = SystemZ::SAR;
- else if (SystemZ::GR32BitRegClass.contains(DestReg) &&
- SystemZ::AR32BitRegClass.contains(SrcReg))
- Opcode = SystemZ::EAR;
else
llvm_unreachable("Impossible reg-to-reg copy");
@@ -869,7 +857,7 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
void SystemZInstrInfo::storeRegToStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
bool isKill, int FrameIdx, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -884,7 +872,7 @@ void SystemZInstrInfo::storeRegToStackSlot(
}
void SystemZInstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
int FrameIdx, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -1005,33 +993,36 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MachineBasicBlock::iterator InsertPt, int FrameIndex,
LiveIntervals *LIS, VirtRegMap *VRM) const {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Size = MFI.getObjectSize(FrameIndex);
unsigned Opcode = MI.getOpcode();
+ // Check CC liveness if new instruction introduces a dead def of CC.
+ MCRegUnitIterator CCUnit(SystemZ::CC, TRI);
+ SlotIndex MISlot = SlotIndex();
+ LiveRange *CCLiveRange = nullptr;
+ bool CCLiveAtMI = true;
+ if (LIS) {
+ MISlot = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
+ CCLiveRange = &LIS->getRegUnit(*CCUnit);
+ CCLiveAtMI = CCLiveRange->liveAt(MISlot);
+ }
+ ++CCUnit;
+ assert(!CCUnit.isValid() && "CC only has one reg unit.");
+
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
- if (LIS != nullptr && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
+ if (!CCLiveAtMI && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
isInt<8>(MI.getOperand(2).getImm()) && !MI.getOperand(3).getReg()) {
-
- // Check CC liveness, since new instruction introduces a dead
- // def of CC.
- MCRegUnitIterator CCUnit(SystemZ::CC, TRI);
- LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit);
- ++CCUnit;
- assert(!CCUnit.isValid() && "CC only has one reg unit.");
- SlotIndex MISlot =
- LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
- if (!CCLiveRange.liveAt(MISlot)) {
- // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
- MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt,
- MI.getDebugLoc(), get(SystemZ::AGSI))
- .addFrameIndex(FrameIndex)
- .addImm(0)
- .addImm(MI.getOperand(2).getImm());
- BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true);
- CCLiveRange.createDeadDef(MISlot, LIS->getVNInfoAllocator());
- return BuiltMI;
- }
+ // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
+ MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt,
+ MI.getDebugLoc(), get(SystemZ::AGSI))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(MI.getOperand(2).getImm());
+ BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true);
+ CCLiveRange->createDeadDef(MISlot, LIS->getVNInfoAllocator());
+ return BuiltMI;
}
return nullptr;
}
@@ -1090,6 +1081,32 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
return BuiltMI;
}
+ unsigned MemImmOpc = 0;
+ switch (Opcode) {
+ case SystemZ::LHIMux:
+ case SystemZ::LHI: MemImmOpc = SystemZ::MVHI; break;
+ case SystemZ::LGHI: MemImmOpc = SystemZ::MVGHI; break;
+ case SystemZ::CHIMux:
+ case SystemZ::CHI: MemImmOpc = SystemZ::CHSI; break;
+ case SystemZ::CGHI: MemImmOpc = SystemZ::CGHSI; break;
+ case SystemZ::CLFIMux:
+ case SystemZ::CLFI:
+ if (isUInt<16>(MI.getOperand(1).getImm()))
+ MemImmOpc = SystemZ::CLFHSI;
+ break;
+ case SystemZ::CLGFI:
+ if (isUInt<16>(MI.getOperand(1).getImm()))
+ MemImmOpc = SystemZ::CLGHSI;
+ break;
+ default: break;
+ }
+ if (MemImmOpc)
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
+ get(MemImmOpc))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(MI.getOperand(1).getImm());
+
if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {
bool Op0IsGPR = (Opcode == SystemZ::LGDR);
bool Op1IsGPR = (Opcode == SystemZ::LDGR);
@@ -1159,57 +1176,144 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
}
// If the spilled operand is the final one or the instruction is
- // commutable, try to change <INSN>R into <INSN>.
+ // commutable, try to change <INSN>R into <INSN>. Don't introduce a def of
+ // CC if it is live and MI does not define it.
unsigned NumOps = MI.getNumExplicitOperands();
int MemOpcode = SystemZ::getMemOpcode(Opcode);
+ if (MemOpcode == -1 ||
+ (CCLiveAtMI && !MI.definesRegister(SystemZ::CC) &&
+ get(MemOpcode).hasImplicitDefOfPhysReg(SystemZ::CC)))
+ return nullptr;
+
+ // Check if all other vregs have a usable allocation in the case of vector
+ // to FP conversion.
+ const MCInstrDesc &MCID = MI.getDesc();
+ for (unsigned I = 0, E = MCID.getNumOperands(); I != E; ++I) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[I];
+ if (MCOI.OperandType != MCOI::OPERAND_REGISTER || I == OpNum)
+ continue;
+ const TargetRegisterClass *RC = TRI->getRegClass(MCOI.RegClass);
+ if (RC == &SystemZ::VR32BitRegClass || RC == &SystemZ::VR64BitRegClass) {
+ Register Reg = MI.getOperand(I).getReg();
+ Register PhysReg = Register::isVirtualRegister(Reg)
+ ? (VRM ? VRM->getPhys(Reg) : Register())
+ : Reg;
+ if (!PhysReg ||
+ !(SystemZ::FP32BitRegClass.contains(PhysReg) ||
+ SystemZ::FP64BitRegClass.contains(PhysReg) ||
+ SystemZ::VF128BitRegClass.contains(PhysReg)))
+ return nullptr;
+ }
+ }
+ // Fused multiply and add/sub need to have the same dst and accumulator reg.
+ bool FusedFPOp = (Opcode == SystemZ::WFMADB || Opcode == SystemZ::WFMASB ||
+ Opcode == SystemZ::WFMSDB || Opcode == SystemZ::WFMSSB);
+ if (FusedFPOp) {
+ Register DstReg = VRM->getPhys(MI.getOperand(0).getReg());
+ Register AccReg = VRM->getPhys(MI.getOperand(3).getReg());
+ if (OpNum == 0 || OpNum == 3 || DstReg != AccReg)
+ return nullptr;
+ }
+
+ // Try to swap compare operands if possible.
+ bool NeedsCommute = false;
+ if ((MI.getOpcode() == SystemZ::CR || MI.getOpcode() == SystemZ::CGR ||
+ MI.getOpcode() == SystemZ::CLR || MI.getOpcode() == SystemZ::CLGR ||
+ MI.getOpcode() == SystemZ::WFCDB || MI.getOpcode() == SystemZ::WFCSB ||
+ MI.getOpcode() == SystemZ::WFKDB || MI.getOpcode() == SystemZ::WFKSB) &&
+ OpNum == 0 && prepareCompareSwapOperands(MI))
+ NeedsCommute = true;
+
+ bool CCOperands = false;
+ if (MI.getOpcode() == SystemZ::LOCRMux || MI.getOpcode() == SystemZ::LOCGR ||
+ MI.getOpcode() == SystemZ::SELRMux || MI.getOpcode() == SystemZ::SELGR) {
+ assert(MI.getNumOperands() == 6 && NumOps == 5 &&
+ "LOCR/SELR instruction operands corrupt?");
+ NumOps -= 2;
+ CCOperands = true;
+ }
// See if this is a 3-address instruction that is convertible to 2-address
// and suitable for folding below. Only try this with virtual registers
// and a provided VRM (during regalloc).
- bool NeedsCommute = false;
- if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) {
+ if (NumOps == 3 && SystemZ::getTargetMemOpcode(MemOpcode) != -1) {
if (VRM == nullptr)
- MemOpcode = -1;
+ return nullptr;
else {
- assert(NumOps == 3 && "Expected two source registers.");
Register DstReg = MI.getOperand(0).getReg();
Register DstPhys =
(Register::isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg);
Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg()
: ((OpNum == 1 && MI.isCommutable())
? MI.getOperand(2).getReg()
- : Register()));
+ : Register()));
if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg &&
Register::isVirtualRegister(SrcReg) &&
DstPhys == VRM->getPhys(SrcReg))
NeedsCommute = (OpNum == 1);
else
- MemOpcode = -1;
+ return nullptr;
}
}
- if (MemOpcode >= 0) {
- if ((OpNum == NumOps - 1) || NeedsCommute) {
- const MCInstrDesc &MemDesc = get(MemOpcode);
- uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
- assert(AccessBytes != 0 && "Size of access should be known");
- assert(AccessBytes <= Size && "Access outside the frame index");
- uint64_t Offset = Size - AccessBytes;
- MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
- MI.getDebugLoc(), get(MemOpcode));
+ if ((OpNum == NumOps - 1) || NeedsCommute || FusedFPOp) {
+ const MCInstrDesc &MemDesc = get(MemOpcode);
+ uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
+ assert(AccessBytes != 0 && "Size of access should be known");
+ assert(AccessBytes <= Size && "Access outside the frame index");
+ uint64_t Offset = Size - AccessBytes;
+ MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
+ MI.getDebugLoc(), get(MemOpcode));
+ if (MI.isCompare()) {
+ assert(NumOps == 2 && "Expected 2 register operands for a compare.");
+ MIB.add(MI.getOperand(NeedsCommute ? 1 : 0));
+ }
+ else if (FusedFPOp) {
+ MIB.add(MI.getOperand(0));
+ MIB.add(MI.getOperand(3));
+ MIB.add(MI.getOperand(OpNum == 1 ? 2 : 1));
+ }
+ else {
MIB.add(MI.getOperand(0));
if (NeedsCommute)
MIB.add(MI.getOperand(2));
else
for (unsigned I = 1; I < OpNum; ++I)
MIB.add(MI.getOperand(I));
- MIB.addFrameIndex(FrameIndex).addImm(Offset);
- if (MemDesc.TSFlags & SystemZII::HasIndex)
- MIB.addReg(0);
- transferDeadCC(&MI, MIB);
- transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
- return MIB;
}
+ MIB.addFrameIndex(FrameIndex).addImm(Offset);
+ if (MemDesc.TSFlags & SystemZII::HasIndex)
+ MIB.addReg(0);
+ if (CCOperands) {
+ unsigned CCValid = MI.getOperand(NumOps).getImm();
+ unsigned CCMask = MI.getOperand(NumOps + 1).getImm();
+ MIB.addImm(CCValid);
+ MIB.addImm(NeedsCommute ? CCMask ^ CCValid : CCMask);
+ }
+ if (MIB->definesRegister(SystemZ::CC) &&
+ (!MI.definesRegister(SystemZ::CC) ||
+ MI.registerDefIsDead(SystemZ::CC))) {
+ MIB->addRegisterDead(SystemZ::CC, TRI);
+ if (CCLiveRange)
+ CCLiveRange->createDeadDef(MISlot, LIS->getVNInfoAllocator());
+ }
+ // Constrain the register classes if converted from a vector opcode. The
+ // allocated regs are in an FP reg-class per previous check above.
+ for (const MachineOperand &MO : MIB->operands())
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (MRI.getRegClass(Reg) == &SystemZ::VR32BitRegClass)
+ MRI.setRegClass(Reg, &SystemZ::FP32BitRegClass);
+ else if (MRI.getRegClass(Reg) == &SystemZ::VR64BitRegClass)
+ MRI.setRegClass(Reg, &SystemZ::FP64BitRegClass);
+ else if (MRI.getRegClass(Reg) == &SystemZ::VR128BitRegClass)
+ MRI.setRegClass(Reg, &SystemZ::VF128BitRegClass);
+ }
+
+ transferDeadCC(&MI, MIB);
+ transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
+ transferMIFlag(&MI, MIB, MachineInstr::NoFPExcept);
+ return MIB;
}
return nullptr;
@@ -1718,6 +1822,80 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
return 0;
}
+bool SystemZInstrInfo::
+prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const {
+ assert(MBBI->isCompare() && MBBI->getOperand(0).isReg() &&
+ MBBI->getOperand(1).isReg() && !MBBI->mayLoad() &&
+ "Not a compare reg/reg.");
+
+ MachineBasicBlock *MBB = MBBI->getParent();
+ bool CCLive = true;
+ SmallVector<MachineInstr *, 4> CCUsers;
+ for (MachineBasicBlock::iterator Itr = std::next(MBBI);
+ Itr != MBB->end(); ++Itr) {
+ if (Itr->readsRegister(SystemZ::CC)) {
+ unsigned Flags = Itr->getDesc().TSFlags;
+ if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast))
+ CCUsers.push_back(&*Itr);
+ else
+ return false;
+ }
+ if (Itr->definesRegister(SystemZ::CC)) {
+ CCLive = false;
+ break;
+ }
+ }
+ if (CCLive) {
+ LivePhysRegs LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo());
+ LiveRegs.addLiveOuts(*MBB);
+ if (LiveRegs.contains(SystemZ::CC))
+ return false;
+ }
+
+ // Update all CC users.
+ for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) {
+ unsigned Flags = CCUsers[Idx]->getDesc().TSFlags;
+ unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ?
+ 0 : CCUsers[Idx]->getNumExplicitOperands() - 2);
+ MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1);
+ unsigned NewCCMask = SystemZ::reverseCCMask(CCMaskMO.getImm());
+ CCMaskMO.setImm(NewCCMask);
+ }
+
+ return true;
+}
+
+unsigned SystemZ::reverseCCMask(unsigned CCMask) {
+ return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
+ (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_UO));
+}
+
+MachineBasicBlock *SystemZ::emitBlockAfter(MachineBasicBlock *MBB) {
+ MachineFunction &MF = *MBB->getParent();
+ MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+ MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
+ return NewMBB;
+}
+
+MachineBasicBlock *SystemZ::splitBlockAfter(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
+MachineBasicBlock *SystemZ::splitBlockBefore(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
if (!STI.hasLoadAndTrap())
return 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 8391970c7d9d..72dafc3c93c2 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -155,6 +155,20 @@ enum FusedCompareType {
namespace SystemZ {
int getTwoOperandOpcode(uint16_t Opcode);
int getTargetMemOpcode(uint16_t Opcode);
+
+// Return a version of comparison CC mask CCMask in which the LT and GT
+// actions are swapped.
+unsigned reverseCCMask(unsigned CCMask);
+
+// Create a new basic block after MBB.
+MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB);
+// Split MBB after MI and return the new block (the one that contains
+// instructions after MI).
+MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB);
+// Split MBB before MI and return the new block (the one that contains MI).
+MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB);
}
class SystemZInstrInfo : public SystemZGenInstrInfo {
@@ -219,15 +233,16 @@ public:
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
- bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &Mask, int &Value) const override;
- bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
- unsigned, unsigned, int&, int&, int&) const override;
+ bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &Mask, int &Value) const override;
+ bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
+ Register, Register, Register, int &, int &,
+ int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DstReg,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg) const override;
- bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
+ const DebugLoc &DL, Register DstReg,
+ ArrayRef<MachineOperand> Cond, Register TrueReg,
+ Register FalseReg) const override;
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const override;
bool isPredicable(const MachineInstr &MI) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
@@ -247,12 +262,12 @@ public:
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIdx,
+ Register DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
@@ -313,6 +328,12 @@ public:
SystemZII::FusedCompareType Type,
const MachineInstr *MI = nullptr) const;
+ // Try to find all CC users of the compare instruction (MBBI) and update
+ // all of them to maintain equivalent behavior after swapping the compare
+ // operands. Return false if not all users can be conclusively found and
+ // handled. The compare instruction is *not* changed.
+ bool prepareCompareSwapOperands(MachineBasicBlock::iterator MBBI) const;
+
// If Opcode is a LOAD opcode for with an associated LOAD AND TRAP
// operation exists, returh the opcode for the latter, otherwise return 0.
unsigned getLoadAndTrap(unsigned Opcode) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 9579dcc0d1b6..d5d56ecf6e47 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -29,6 +29,15 @@ let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
[(set GR64:$dst, dynalloc12only:$src)]>;
+let Defs = [R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
+ usesCustomInserter = 1 in
+ def PROBED_ALLOCA : Pseudo<(outs GR64:$dst),
+ (ins GR64:$oldSP, GR64:$space),
+ [(set GR64:$dst, (z_probed_alloca GR64:$oldSP, GR64:$space))]>;
+
+let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
+ hasSideEffects = 1 in
+ def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>;
//===----------------------------------------------------------------------===//
// Branch instructions
@@ -492,7 +501,7 @@ let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
let isCommutable = 1 in {
// Expands to SELR or SELFHR or a branch-and-move sequence,
// depending on the choice of registers.
- def SELRMux : CondBinaryRRFaPseudo<"selrmux", GRX32, GRX32, GRX32>;
+ def SELRMux : CondBinaryRRFaPseudo<"MUXselr", GRX32, GRX32, GRX32>;
defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>;
defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>;
@@ -525,13 +534,13 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
let isCommutable = 1 in {
// Expands to LOCR or LOCFHR or a branch-and-move sequence,
// depending on the choice of registers.
- def LOCRMux : CondBinaryRRFPseudo<"locrmux", GRX32, GRX32>;
+ def LOCRMux : CondBinaryRRFPseudo<"MUXlocr", GRX32, GRX32>;
defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;
}
// Load on condition. Matched via DAG pattern.
// Expands to LOC or LOCFH, depending on the choice of register.
- def LOCMux : CondUnaryRSYPseudo<simple_load, GRX32, 4>;
+ defm LOCMux : CondUnaryRSYPseudoAndMemFold<"MUXloc", simple_load, GRX32, 4>;
defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>;
// Store on condition. Expanded from CondStore* pseudos.
@@ -564,7 +573,7 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
// Load on condition. Matched via DAG pattern.
defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, simple_load, GR32, 4>;
- defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>;
+ defm LOCG : CondUnaryRSYPairAndMemFold<"locg", 0xEBE2, simple_load, GR64, 8>;
// Store on condition. Expanded from CondStore* pseudos.
defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>;
@@ -1348,8 +1357,8 @@ def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>;
// Multiplication of memory, setting the condition code.
let Predicates = [FeatureMiscellaneousExtensions2], Defs = [CC] in {
- def MSC : BinaryRXY<"msc", 0xE353, null_frag, GR32, load, 4>;
- def MSGC : BinaryRXY<"msgc", 0xE383, null_frag, GR64, load, 8>;
+ defm MSC : BinaryRXYAndPseudo<"msc", 0xE353, null_frag, GR32, load, 4>;
+ defm MSGC : BinaryRXYAndPseudo<"msgc", 0xE383, null_frag, GR64, load, 8>;
}
// Multiplication of a register, producing two results.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index c945122ee577..e73f1e429c3c 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -177,9 +177,13 @@ let Predicates = [FeatureVector] in {
let Predicates = [FeatureVectorPackedDecimal] in {
// Load rightmost with length. The number of loaded bytes is only known
- // at run time.
- def VLRL : BinaryVSI<"vlrl", 0xE635, int_s390_vlrl, 0>;
+ // at run time. Note that while the instruction will accept immediate
+ // lengths larger that 15 at runtime, those will always result in a trap,
+ // so we never emit them here.
+ def VLRL : BinaryVSI<"vlrl", 0xE635, null_frag, 0>;
def VLRLR : BinaryVRSd<"vlrlr", 0xE637, int_s390_vlrl, 0>;
+ def : Pat<(int_s390_vlrl imm32zx4:$len, bdaddr12only:$addr),
+ (VLRL bdaddr12only:$addr, imm32zx4:$len)>;
}
// Use replicating loads if we're inserting a single element into an
@@ -243,9 +247,13 @@ let Predicates = [FeatureVector] in {
let Predicates = [FeatureVectorPackedDecimal] in {
// Store rightmost with length. The number of stored bytes is only known
- // at run time.
- def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, int_s390_vstrl, 0>;
+ // at run time. Note that while the instruction will accept immediate
+ // lengths larger that 15 at runtime, those will always result in a trap,
+ // so we never emit them here.
+ def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, null_frag, 0>;
def VSTRLR : StoreLengthVRSd<"vstrlr", 0xE63F, int_s390_vstrl, 0>;
+ def : Pat<(int_s390_vstrl VR128:$val, imm32zx4:$len, bdaddr12only:$addr),
+ (VSTRL VR128:$val, bdaddr12only:$addr, imm32zx4:$len)>;
}
//===----------------------------------------------------------------------===//
@@ -463,49 +471,56 @@ defm : GenericVectorOps<v2f64, v2i64>;
//===----------------------------------------------------------------------===//
let Predicates = [FeatureVector] in {
- // Add.
- def VA : BinaryVRRcGeneric<"va", 0xE7F3>;
- def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>;
- def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
- def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
- def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>;
- def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>;
-
- // Add compute carry.
- def VACC : BinaryVRRcGeneric<"vacc", 0xE7F1>;
- def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>;
- def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>;
- def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>;
- def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>;
- def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>;
-
- // Add with carry.
- def VAC : TernaryVRRdGeneric<"vac", 0xE7BB>;
- def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>;
-
- // Add with carry compute carry.
- def VACCC : TernaryVRRdGeneric<"vaccc", 0xE7B9>;
- def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>;
+ let isCommutable = 1 in {
+ // Add.
+ def VA : BinaryVRRcGeneric<"va", 0xE7F3>;
+ def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>;
+ def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
+ def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
+ def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>;
+ def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>;
+ }
+
+ let isCommutable = 1 in {
+ // Add compute carry.
+ def VACC : BinaryVRRcGeneric<"vacc", 0xE7F1>;
+ def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>;
+ def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>;
+ def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>;
+ def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>;
+ def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>;
+
+ // Add with carry.
+ def VAC : TernaryVRRdGeneric<"vac", 0xE7BB>;
+ def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>;
+
+ // Add with carry compute carry.
+ def VACCC : TernaryVRRdGeneric<"vaccc", 0xE7B9>;
+ def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>;
+ }
// And.
- def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>;
+ let isCommutable = 1 in
+ def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>;
// And with complement.
def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>;
- // Average.
- def VAVG : BinaryVRRcGeneric<"vavg", 0xE7F2>;
- def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>;
- def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>;
- def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>;
- def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>;
-
- // Average logical.
- def VAVGL : BinaryVRRcGeneric<"vavgl", 0xE7F0>;
- def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>;
- def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>;
- def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>;
- def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>;
+ let isCommutable = 1 in {
+ // Average.
+ def VAVG : BinaryVRRcGeneric<"vavg", 0xE7F2>;
+ def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>;
+ def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>;
+ def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>;
+ def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>;
+
+ // Average logical.
+ def VAVGL : BinaryVRRcGeneric<"vavgl", 0xE7F0>;
+ def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>;
+ def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>;
+ def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>;
+ def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>;
+ }
// Checksum.
def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>;
@@ -524,12 +539,14 @@ let Predicates = [FeatureVector] in {
def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>;
def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>;
- // Not exclusive or.
- let Predicates = [FeatureVectorEnhancements1] in
- def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>;
+ let isCommutable = 1 in {
+ // Not exclusive or.
+ let Predicates = [FeatureVectorEnhancements1] in
+ def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>;
- // Exclusive or.
- def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
+ // Exclusive or.
+ def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
+ }
// Galois field multiply sum.
def VGFM : BinaryVRRcGeneric<"vgfm", 0xE7B4>;
@@ -559,135 +576,145 @@ let Predicates = [FeatureVector] in {
def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>;
def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>;
- // Maximum.
- def VMX : BinaryVRRcGeneric<"vmx", 0xE7FF>;
- def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
- def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
- def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
- def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
-
- // Maximum logical.
- def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>;
- def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
- def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
- def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
- def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
+ let isCommutable = 1 in {
+ // Maximum.
+ def VMX : BinaryVRRcGeneric<"vmx", 0xE7FF>;
+ def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
+ def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
+ def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
+ def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
+
+ // Maximum logical.
+ def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>;
+ def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
+ def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
+ def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
+ def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
+ }
- // Minimum.
- def VMN : BinaryVRRcGeneric<"vmn", 0xE7FE>;
- def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
- def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
- def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
- def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
-
- // Minimum logical.
- def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>;
- def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
- def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
- def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
- def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
-
- // Multiply and add low.
- def VMAL : TernaryVRRdGeneric<"vmal", 0xE7AA>;
- def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>;
- def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>;
- def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>;
-
- // Multiply and add high.
- def VMAH : TernaryVRRdGeneric<"vmah", 0xE7AB>;
- def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>;
- def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>;
- def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>;
-
- // Multiply and add logical high.
- def VMALH : TernaryVRRdGeneric<"vmalh", 0xE7A9>;
- def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>;
- def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>;
- def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>;
-
- // Multiply and add even.
- def VMAE : TernaryVRRdGeneric<"vmae", 0xE7AE>;
- def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>;
- def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>;
- def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>;
-
- // Multiply and add logical even.
- def VMALE : TernaryVRRdGeneric<"vmale", 0xE7AC>;
- def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>;
- def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>;
- def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>;
-
- // Multiply and add odd.
- def VMAO : TernaryVRRdGeneric<"vmao", 0xE7AF>;
- def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>;
- def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>;
- def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>;
-
- // Multiply and add logical odd.
- def VMALO : TernaryVRRdGeneric<"vmalo", 0xE7AD>;
- def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>;
- def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>;
- def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>;
-
- // Multiply high.
- def VMH : BinaryVRRcGeneric<"vmh", 0xE7A3>;
- def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>;
- def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>;
- def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>;
-
- // Multiply logical high.
- def VMLH : BinaryVRRcGeneric<"vmlh", 0xE7A1>;
- def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>;
- def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>;
- def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>;
-
- // Multiply low.
- def VML : BinaryVRRcGeneric<"vml", 0xE7A2>;
- def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>;
- def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>;
- def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>;
-
- // Multiply even.
- def VME : BinaryVRRcGeneric<"vme", 0xE7A6>;
- def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>;
- def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>;
- def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>;
-
- // Multiply logical even.
- def VMLE : BinaryVRRcGeneric<"vmle", 0xE7A4>;
- def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>;
- def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>;
- def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>;
-
- // Multiply odd.
- def VMO : BinaryVRRcGeneric<"vmo", 0xE7A7>;
- def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>;
- def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>;
- def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>;
-
- // Multiply logical odd.
- def VMLO : BinaryVRRcGeneric<"vmlo", 0xE7A5>;
- def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>;
- def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
- def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
+ let isCommutable = 1 in {
+ // Minimum.
+ def VMN : BinaryVRRcGeneric<"vmn", 0xE7FE>;
+ def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
+ def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
+ def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
+ def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
+
+ // Minimum logical.
+ def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>;
+ def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
+ def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
+ def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
+ def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
+ }
+
+ let isCommutable = 1 in {
+ // Multiply and add low.
+ def VMAL : TernaryVRRdGeneric<"vmal", 0xE7AA>;
+ def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>;
+ def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>;
+ def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>;
+
+ // Multiply and add high.
+ def VMAH : TernaryVRRdGeneric<"vmah", 0xE7AB>;
+ def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>;
+ def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>;
+ def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>;
+
+ // Multiply and add logical high.
+ def VMALH : TernaryVRRdGeneric<"vmalh", 0xE7A9>;
+ def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>;
+ def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>;
+ def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>;
+
+ // Multiply and add even.
+ def VMAE : TernaryVRRdGeneric<"vmae", 0xE7AE>;
+ def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>;
+ def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>;
+ def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>;
+
+ // Multiply and add logical even.
+ def VMALE : TernaryVRRdGeneric<"vmale", 0xE7AC>;
+ def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>;
+ def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>;
+ def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>;
+
+ // Multiply and add odd.
+ def VMAO : TernaryVRRdGeneric<"vmao", 0xE7AF>;
+ def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>;
+ def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>;
+ def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>;
+
+ // Multiply and add logical odd.
+ def VMALO : TernaryVRRdGeneric<"vmalo", 0xE7AD>;
+ def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>;
+ def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>;
+ def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>;
+ }
+
+ let isCommutable = 1 in {
+ // Multiply high.
+ def VMH : BinaryVRRcGeneric<"vmh", 0xE7A3>;
+ def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>;
+ def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>;
+ def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>;
+
+ // Multiply logical high.
+ def VMLH : BinaryVRRcGeneric<"vmlh", 0xE7A1>;
+ def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>;
+ def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>;
+ def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>;
+
+ // Multiply low.
+ def VML : BinaryVRRcGeneric<"vml", 0xE7A2>;
+ def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>;
+ def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>;
+ def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>;
+
+ // Multiply even.
+ def VME : BinaryVRRcGeneric<"vme", 0xE7A6>;
+ def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>;
+ def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>;
+ def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>;
+
+ // Multiply logical even.
+ def VMLE : BinaryVRRcGeneric<"vmle", 0xE7A4>;
+ def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>;
+ def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>;
+ def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>;
+
+ // Multiply odd.
+ def VMO : BinaryVRRcGeneric<"vmo", 0xE7A7>;
+ def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>;
+ def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>;
+ def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>;
+
+ // Multiply logical odd.
+ def VMLO : BinaryVRRcGeneric<"vmlo", 0xE7A5>;
+ def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>;
+ def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
+ def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
+ }
// Multiply sum logical.
- let Predicates = [FeatureVectorEnhancements1] in {
+ let Predicates = [FeatureVectorEnhancements1], isCommutable = 1 in {
def VMSL : QuaternaryVRRdGeneric<"vmsl", 0xE7B8>;
def VMSLG : QuaternaryVRRd<"vmslg", 0xE7B8, int_s390_vmslg,
v128q, v128g, v128g, v128q, 3>;
}
// Nand.
- let Predicates = [FeatureVectorEnhancements1] in
+ let Predicates = [FeatureVectorEnhancements1], isCommutable = 1 in
def VNN : BinaryVRRc<"vnn", 0xE76E, null_frag, v128any, v128any>;
// Nor.
- def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;
+ let isCommutable = 1 in
+ def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;
def : InstAlias<"vnot\t$V1, $V2", (VNO VR128:$V1, VR128:$V2, VR128:$V2), 0>;
// Or.
- def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>;
+ let isCommutable = 1 in
+ def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>;
// Or with complement.
let Predicates = [FeatureVectorEnhancements1] in
@@ -1017,13 +1044,15 @@ multiclass VectorRounding<Instruction insn, TypedReg tr> {
let Predicates = [FeatureVector] in {
// Add.
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>;
- def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>;
+ def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8, 0,
+ "adbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>;
- def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>;
+ def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8, 0,
+ "aebr">;
def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>;
}
}
@@ -1104,10 +1133,12 @@ let Predicates = [FeatureVector] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>;
- def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>;
+ def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8, 0,
+ "ddbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>;
- def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>;
+ def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8, 0,
+ "debr">;
def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>;
}
}
@@ -1135,7 +1166,8 @@ let Predicates = [FeatureVector] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_any_vextend, v128db, v128sb, 2, 0>;
- def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>;
+ def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8, 0,
+ "ldebr">;
}
let Predicates = [FeatureVectorEnhancements1] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
@@ -1178,7 +1210,7 @@ let Predicates = [FeatureVector] in {
def : FPMinMax<insn, any_fmaximum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
v128db, v128db, 3, 0>;
@@ -1204,7 +1236,7 @@ let Predicates = [FeatureVector] in {
def : FPMinMax<insn, any_fminimum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
v128db, v128db, 3, 0>;
@@ -1225,43 +1257,49 @@ let Predicates = [FeatureVector] in {
}
// Multiply.
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>;
- def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>;
+ def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8, 0,
+ "mdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>;
- def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>;
+ def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8, 0,
+ "meebr">;
def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>;
}
}
// Multiply and add.
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>;
- def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>;
+ def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3,
+ "madbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>;
- def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>;
+ def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2,
+ "maebr">;
def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>;
}
}
// Multiply and subtract.
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>;
- def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>;
+ def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3,
+ "msdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>;
- def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>;
+ def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2,
+ "msebr">;
def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>;
}
}
// Negative multiply and add.
- let Uses = [FPC], mayRaiseFPException = 1,
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1,
Predicates = [FeatureVectorEnhancements1] in {
def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;
def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>;
@@ -1272,7 +1310,7 @@ let Predicates = [FeatureVector] in {
}
// Negative multiply and subtract.
- let Uses = [FPC], mayRaiseFPException = 1,
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1,
Predicates = [FeatureVectorEnhancements1] in {
def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;
def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>;
@@ -1323,10 +1361,12 @@ let Predicates = [FeatureVector] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>;
- def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>;
+ def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8, 0,
+ "sqdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>;
- def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>;
+ def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8, 0,
+ "sqebr">;
def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>;
}
}
@@ -1335,10 +1375,12 @@ let Predicates = [FeatureVector] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>;
- def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>;
+ def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8, 0,
+ "sdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>;
- def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>;
+ def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8, 0,
+ "sebr">;
def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>;
}
}
@@ -1364,9 +1406,9 @@ let Predicates = [FeatureVector] in {
// Compare scalar.
let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
- def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3>;
+ def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3, "cdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
- def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2>;
+ def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2, "cebr">;
def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_any_fcmp, v128xb, 4>;
}
}
@@ -1374,9 +1416,9 @@ let Predicates = [FeatureVector] in {
// Compare and signal scalar.
let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
- def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3>;
+ def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3, "kdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
- def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2>;
+ def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2, "kebr">;
def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, z_strict_fcmps, v128xb, 4>;
}
}
@@ -1545,7 +1587,7 @@ def : VectorReplicateScalar<v16i8, VREPB, 7>;
def : VectorReplicateScalar<v8i16, VREPH, 3>;
def : VectorReplicateScalar<v4i32, VREPF, 1>;
-// i64 replications are just a single isntruction.
+// i64 replications are just a single instruction.
def : Pat<(v2i64 (z_replicate GR64:$scalar)),
(VLVGP GR64:$scalar, GR64:$scalar)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index d1f6511ceea3..f755d5cd3d5b 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -29,8 +29,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
SystemZ::GPRRegs SpillGPRRegs;
SystemZ::GPRRegs RestoreGPRRegs;
- unsigned VarArgsFirstGPR;
- unsigned VarArgsFirstFPR;
+ Register VarArgsFirstGPR;
+ Register VarArgsFirstFPR;
unsigned VarArgsFrameIndex;
unsigned RegSaveFrameIndex;
int FramePointerSaveIndex;
@@ -47,7 +47,7 @@ public:
// this function and the SP offset for the STMG. These are 0 if no GPRs
// need to be saved or restored.
SystemZ::GPRRegs getSpillGPRRegs() const { return SpillGPRRegs; }
- void setSpillGPRRegs(unsigned Low, unsigned High, unsigned Offs) {
+ void setSpillGPRRegs(Register Low, Register High, unsigned Offs) {
SpillGPRRegs.LowGPR = Low;
SpillGPRRegs.HighGPR = High;
SpillGPRRegs.GPROffset = Offs;
@@ -57,7 +57,7 @@ public:
// this function and the SP offset for the LMG. These are 0 if no GPRs
// need to be saved or restored.
SystemZ::GPRRegs getRestoreGPRRegs() const { return RestoreGPRRegs; }
- void setRestoreGPRRegs(unsigned Low, unsigned High, unsigned Offs) {
+ void setRestoreGPRRegs(Register Low, Register High, unsigned Offs) {
RestoreGPRRegs.LowGPR = Low;
RestoreGPRRegs.HighGPR = High;
RestoreGPRRegs.GPROffset = Offs;
@@ -65,12 +65,12 @@ public:
// Get and set the number of fixed (as opposed to variable) arguments
// that are passed in GPRs to this function.
- unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
- void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
+ Register getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
+ void setVarArgsFirstGPR(Register GPR) { VarArgsFirstGPR = GPR; }
// Likewise FPRs.
- unsigned getVarArgsFirstFPR() const { return VarArgsFirstFPR; }
- void setVarArgsFirstFPR(unsigned FPR) { VarArgsFirstFPR = FPR; }
+ Register getVarArgsFirstFPR() const { return VarArgsFirstFPR; }
+ void setVarArgsFirstFPR(Register FPR) { VarArgsFirstFPR = FPR; }
// Get and set the frame index of the first stack vararg.
unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td
index bd40f6d7bf40..a883daad73e7 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -22,8 +22,8 @@ class ImmediateTLSAsmOperand<string name>
}
class ImmediateOp<ValueType vt, string asmop> : Operand<vt> {
- let PrintMethod = "print"##asmop##"Operand";
- let DecoderMethod = "decode"##asmop##"Operand";
+ let PrintMethod = "print"#asmop#"Operand";
+ let DecoderMethod = "decode"#asmop#"Operand";
let ParserMatchClass = !cast<AsmOperandClass>(asmop);
let OperandType = "OPERAND_IMMEDIATE";
}
@@ -52,14 +52,14 @@ multiclass Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> {
// Constructs an asm operand for a PC-relative address. SIZE says how
// many bits there are.
-class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> {
+class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"#size> {
let PredicateMethod = "isImm";
- let ParserMethod = "parsePCRel"##size;
+ let ParserMethod = "parsePCRel"#size;
}
class PCRelTLSAsmOperand<string size>
- : ImmediateTLSAsmOperand<"PCRelTLS"##size> {
+ : ImmediateTLSAsmOperand<"PCRelTLS"#size> {
let PredicateMethod = "isImmTLS";
- let ParserMethod = "parsePCRelTLS"##size;
+ let ParserMethod = "parsePCRelTLS"#size;
}
// Constructs an operand for a PC-relative address with address type VT.
@@ -92,9 +92,9 @@ class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop>
class AddressAsmOperand<string format, string bitsize, string dispsize,
string length = "">
: AsmOperandClass {
- let Name = format##bitsize##"Disp"##dispsize##length;
- let ParserMethod = "parse"##format##bitsize;
- let RenderMethod = "add"##format##"Operands";
+ let Name = format#bitsize#"Disp"#dispsize#length;
+ let ParserMethod = "parse"#format#bitsize;
+ let RenderMethod = "add"#format#"Operands";
}
// Constructs an instruction operand for an addressing mode. FORMAT,
@@ -103,15 +103,15 @@ class AddressAsmOperand<string format, string bitsize, string dispsize,
// (base register, displacement, etc.).
class AddressOperand<string bitsize, string dispsize, string length,
string format, dag operands>
- : Operand<!cast<ValueType>("i"##bitsize)> {
- let PrintMethod = "print"##format##"Operand";
- let EncoderMethod = "get"##format##dispsize##length##"Encoding";
+ : Operand<!cast<ValueType>("i"#bitsize)> {
+ let PrintMethod = "print"#format#"Operand";
+ let EncoderMethod = "get"#format#dispsize#length#"Encoding";
let DecoderMethod =
- "decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
+ "decode"#format#bitsize#"Disp"#dispsize#length#"Operand";
let OperandType = "OPERAND_MEMORY";
let MIOperandInfo = operands;
let ParserMatchClass =
- !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
+ !cast<AddressAsmOperand>(format#bitsize#"Disp"#dispsize#length);
}
// Constructs both a DAG pattern and instruction operand for an addressing mode.
@@ -126,45 +126,45 @@ class AddressOperand<string bitsize, string dispsize, string length,
class AddressingMode<string seltype, string bitsize, string dispsize,
string suffix, string length, int numops, string format,
dag operands>
- : ComplexPattern<!cast<ValueType>("i"##bitsize), numops,
- "select"##seltype##dispsize##suffix##length,
+ : ComplexPattern<!cast<ValueType>("i"#bitsize), numops,
+ "select"#seltype#dispsize#suffix#length,
[add, sub, or, frameindex, z_adjdynalloc]>,
AddressOperand<bitsize, dispsize, length, format, operands>;
// An addressing mode with a base and displacement but no index.
class BDMode<string type, string bitsize, string dispsize, string suffix>
: AddressingMode<type, bitsize, dispsize, suffix, "", 2, "BDAddr",
- (ops !cast<RegisterOperand>("ADDR"##bitsize),
- !cast<Operand>("disp"##dispsize##"imm"##bitsize))>;
+ (ops !cast<RegisterOperand>("ADDR"#bitsize),
+ !cast<Operand>("disp"#dispsize#"imm"#bitsize))>;
// An addressing mode with a base, displacement and index.
class BDXMode<string type, string bitsize, string dispsize, string suffix>
: AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDXAddr",
- (ops !cast<RegisterOperand>("ADDR"##bitsize),
- !cast<Operand>("disp"##dispsize##"imm"##bitsize),
- !cast<RegisterOperand>("ADDR"##bitsize))>;
+ (ops !cast<RegisterOperand>("ADDR"#bitsize),
+ !cast<Operand>("disp"#dispsize#"imm"#bitsize),
+ !cast<RegisterOperand>("ADDR"#bitsize))>;
// A BDMode paired with an immediate length operand of LENSIZE bits.
class BDLMode<string type, string bitsize, string dispsize, string suffix,
string lensize>
- : AddressingMode<type, bitsize, dispsize, suffix, "Len"##lensize, 3,
+ : AddressingMode<type, bitsize, dispsize, suffix, "Len"#lensize, 3,
"BDLAddr",
- (ops !cast<RegisterOperand>("ADDR"##bitsize),
- !cast<Operand>("disp"##dispsize##"imm"##bitsize),
- !cast<Operand>("imm"##bitsize))>;
+ (ops !cast<RegisterOperand>("ADDR"#bitsize),
+ !cast<Operand>("disp"#dispsize#"imm"#bitsize),
+ !cast<Operand>("imm"#bitsize))>;
// A BDMode paired with a register length operand.
class BDRMode<string type, string bitsize, string dispsize, string suffix>
: AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDRAddr",
- (ops !cast<RegisterOperand>("ADDR"##bitsize),
- !cast<Operand>("disp"##dispsize##"imm"##bitsize),
- !cast<RegisterOperand>("GR"##bitsize))>;
+ (ops !cast<RegisterOperand>("ADDR"#bitsize),
+ !cast<Operand>("disp"#dispsize#"imm"#bitsize),
+ !cast<RegisterOperand>("GR"#bitsize))>;
// An addressing mode with a base, displacement and a vector index.
class BDVMode<string bitsize, string dispsize>
: AddressOperand<bitsize, dispsize, "", "BDVAddr",
- (ops !cast<RegisterOperand>("ADDR"##bitsize),
- !cast<Operand>("disp"##dispsize##"imm"##bitsize),
+ (ops !cast<RegisterOperand>("ADDR"#bitsize),
+ !cast<Operand>("disp"#dispsize#"imm"#bitsize),
!cast<RegisterOperand>("VR128"))>;
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
index a6a72903e573..81af5fd854db 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -40,6 +40,10 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2,
SDTCisSameAs<0, 2>,
SDTCisPtrTy<0>]>;
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
+def SDT_ZProbedAlloca : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisPtrTy<0>]>;
def SDT_ZGR128Binary : SDTypeProfile<1, 2,
[SDTCisVT<0, untyped>,
SDTCisInt<1>,
@@ -269,6 +273,8 @@ def z_select_ccmask_1 : SDNode<"SystemZISD::SELECT_CCMASK",
SDT_ZSelectCCMask>;
def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>;
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
+def z_probed_alloca : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca,
+ [SDNPHasChain]>;
def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>;
def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>;
@@ -374,7 +380,7 @@ def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC",
def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>;
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
- : SDNode<"SystemZISD::"##name, profile,
+ : SDNode<"SystemZISD::"#name, profile,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def z_atomic_swapw : AtomicWOp<"ATOMIC_SWAPW">;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td
index 501a69488397..e3190eddb9f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -57,10 +57,10 @@ multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode,
// The inserted operand is loaded using LOAD from an address of mode MODE.
multiclass InsertMem<string type, Instruction insn, RegisterOperand cls,
SDPatternOperator load, AddressingMode mode> {
- def : Pat<(!cast<SDPatternOperator>("or_as_"##type)
+ def : Pat<(!cast<SDPatternOperator>("or_as_"#type)
cls:$src1, (load mode:$src2)),
(insn cls:$src1, mode:$src2)>;
- def : Pat<(!cast<SDPatternOperator>("or_as_rev"##type)
+ def : Pat<(!cast<SDPatternOperator>("or_as_rev"#type)
(load mode:$src2), cls:$src1),
(insn cls:$src1, mode:$src2)>;
}
@@ -167,7 +167,7 @@ class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1,
: Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))),
(insn tr2.op:$vec, suppress, mode)>;
-// Use INSN to perform mininum/maximum operation OPERATOR on type TR.
+// Use INSN to perform minimum/maximum operation OPERATOR on type TR.
// FUNCTION is the type of minimum/maximum function to perform.
class FPMinMax<Instruction insn, SDPatternOperator operator, TypedReg tr,
bits<4> function>
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZProcessors.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZProcessors.td
index af33a0300552..57c2411b8dcf 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZProcessors.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZProcessors.td
@@ -9,7 +9,7 @@
// Processor definitions.
//
// For compatibility with other compilers on the platform, each model can
-// be identifed either by the system name (e.g. z10) or the level of the
+// be identified either by the system name (e.g. z10) or the level of the
// architecture the model supports, as identified by the edition level
// of the z/Architecture Principles of Operation document (e.g. arch8).
//
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 0d5e7af92523..fe2aaca8429a 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -73,13 +73,10 @@ static void addHints(ArrayRef<MCPhysReg> Order,
Hints.push_back(Reg);
}
-bool
-SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
- ArrayRef<MCPhysReg> Order,
- SmallVectorImpl<MCPhysReg> &Hints,
- const MachineFunction &MF,
- const VirtRegMap *VRM,
- const LiveRegMatrix *Matrix) const {
+bool SystemZRegisterInfo::getRegAllocationHints(
+ Register VirtReg, ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
+ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -134,11 +131,11 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
}
if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) {
- SmallVector<unsigned, 8> Worklist;
- SmallSet<unsigned, 4> DoneRegs;
+ SmallVector<Register, 8> Worklist;
+ SmallSet<Register, 4> DoneRegs;
Worklist.push_back(VirtReg);
while (Worklist.size()) {
- unsigned Reg = Worklist.pop_back_val();
+ Register Reg = Worklist.pop_back_val();
if (!DoneRegs.insert(Reg).second)
continue;
@@ -267,14 +264,14 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// Decompose the frame index into a base and offset.
int FrameIndex = MI->getOperand(FIOperandNum).getIndex();
- unsigned BasePtr;
+ Register BasePtr;
int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) +
MI->getOperand(FIOperandNum + 1).getImm());
// Special handling of dbg_value instructions.
if (MI->isDebugValue()) {
MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false);
- MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ MI->getDebugOffset().ChangeToImmediate(Offset);
return;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 7044efef1ac6..9f2cca0c83f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -58,11 +58,9 @@ public:
const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
- bool getRegAllocationHints(unsigned VirtReg,
- ArrayRef<MCPhysReg> Order,
+ bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
- const MachineFunction &MF,
- const VirtRegMap *VRM,
+ const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
// Override TargetRegisterInfo.h.
@@ -72,9 +70,6 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
return true;
}
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
- return true;
- }
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 3567b0f3acf8..a85862e62749 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -35,15 +35,15 @@ multiclass SystemZRegClass<string name, list<ValueType> types, int size,
dag regList, bit allocatable = 1> {
def AsmOperand : AsmOperandClass {
let Name = name;
- let ParserMethod = "parse"##name;
+ let ParserMethod = "parse"#name;
let RenderMethod = "addRegOperands";
}
let isAllocatable = allocatable in
def Bit : RegisterClass<"SystemZ", types, size, regList> {
let Size = size;
}
- def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> {
- let ParserMatchClass = !cast<AsmOperandClass>(name##"AsmOperand");
+ def "" : RegisterOperand<!cast<RegisterClass>(name#"Bit")> {
+ let ParserMatchClass = !cast<AsmOperandClass>(name#"AsmOperand");
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 47c925dcf730..6b4f35e5ba2b 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -47,7 +47,7 @@ static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence,
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline,
+ SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
if (IsVolatile)
return SDValue();
@@ -74,7 +74,7 @@ static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst,
- SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile,
+ SDValue Byte, SDValue Size, Align Alignment, bool IsVolatile,
MachinePointerInfo DstPtrInfo) const {
EVT PtrVT = Dst.getValueType();
@@ -97,20 +97,22 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes);
unsigned Size2 = Bytes - Size1;
SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1,
- Align, DstPtrInfo);
+ Alignment.value(), DstPtrInfo);
if (Size2 == 0)
return Chain1;
Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
DAG.getConstant(Size1, DL, PtrVT));
DstPtrInfo = DstPtrInfo.getWithOffset(Size1);
- SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2,
- std::min(Align, Size1), DstPtrInfo);
+ SDValue Chain2 = memsetStore(
+ DAG, DL, Chain, Dst, ByteVal, Size2,
+ std::min((unsigned)Alignment.value(), Size1), DstPtrInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);
}
} else {
// Handle one and two bytes using STC.
if (Bytes <= 2) {
- SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align);
+ SDValue Chain1 =
+ DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment);
if (Bytes == 1)
return Chain1;
SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
@@ -131,7 +133,7 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
// Copy the byte to the first location and then use MVC to copy
// it to the rest.
- Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align);
+ Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment);
SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
DAG.getConstant(1, DL, PtrVT));
return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 7d63bae83cf3..a4a5b1fbdf90 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -25,14 +25,15 @@ public:
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL,
SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool IsVolatile,
- bool AlwaysInline,
+ SDValue Size, Align Alignment,
+ bool IsVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
SDValue Chain, SDValue Dst, SDValue Byte,
- SDValue Size, unsigned Align, bool IsVolatile,
+ SDValue Size, Align Alignment,
+ bool IsVolatile,
MachinePointerInfo DstPtrInfo) const override;
std::pair<SDValue, SDValue>
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index f6184cec795a..3d27b70d6ef9 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -46,6 +46,7 @@ private:
bool shortenOn001(MachineInstr &MI, unsigned Opcode);
bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
+ bool shortenFusedFPOp(MachineInstr &MI, unsigned Opcode);
const SystemZInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -64,7 +65,7 @@ SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
// Tie operands if MI has become a two-address instruction.
static void tieOpsIfNeeded(MachineInstr &MI) {
- if (MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ if (MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
!MI.getOperand(0).isTied())
MI.tieOperands(0, 1);
}
@@ -175,6 +176,32 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
return false;
}
+bool SystemZShortenInst::shortenFusedFPOp(MachineInstr &MI, unsigned Opcode) {
+ MachineOperand &DstMO = MI.getOperand(0);
+ MachineOperand &LHSMO = MI.getOperand(1);
+ MachineOperand &RHSMO = MI.getOperand(2);
+ MachineOperand &AccMO = MI.getOperand(3);
+ if (SystemZMC::getFirstReg(DstMO.getReg()) < 16 &&
+ SystemZMC::getFirstReg(LHSMO.getReg()) < 16 &&
+ SystemZMC::getFirstReg(RHSMO.getReg()) < 16 &&
+ SystemZMC::getFirstReg(AccMO.getReg()) < 16 &&
+ DstMO.getReg() == AccMO.getReg()) {
+ MachineOperand Lhs(LHSMO);
+ MachineOperand Rhs(RHSMO);
+ MachineOperand Src(AccMO);
+ MI.RemoveOperand(3);
+ MI.RemoveOperand(2);
+ MI.RemoveOperand(1);
+ MI.setDesc(TII->get(Opcode));
+ MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
+ .add(Src)
+ .add(Lhs)
+ .add(Rhs);
+ return true;
+ }
+ return false;
+}
+
// Process all instructions in MBB. Return true if something changed.
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
@@ -235,6 +262,22 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
Changed |= shortenOn001(MI, SystemZ::MEEBR);
break;
+ case SystemZ::WFMADB:
+ Changed |= shortenFusedFPOp(MI, SystemZ::MADBR);
+ break;
+
+ case SystemZ::WFMASB:
+ Changed |= shortenFusedFPOp(MI, SystemZ::MAEBR);
+ break;
+
+ case SystemZ::WFMSDB:
+ Changed |= shortenFusedFPOp(MI, SystemZ::MSDBR);
+ break;
+
+ case SystemZ::WFMSSB:
+ Changed |= shortenFusedFPOp(MI, SystemZ::MSEBR);
+ break;
+
case SystemZ::WFLCDB:
Changed |= shortenOn01(MI, SystemZ::LCDFR);
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 5e8af81842c4..68e0b7ae66a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -9,6 +9,7 @@
#include "SystemZSubtarget.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -28,11 +29,16 @@ void SystemZSubtarget::anchor() {}
SystemZSubtarget &
SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
- std::string CPUName = CPU;
+ StringRef CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
+
+ // -msoft-float implies -mno-vx.
+ if (HasSoftFloat)
+ HasVector = false;
+
return *this;
}
@@ -57,7 +63,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasInsertReferenceBitsMultiple(false),
HasMiscellaneousExtensions3(false), HasMessageSecurityAssist9(false),
HasVectorEnhancements2(false), HasVectorPackedDecimalEnhancement(false),
- HasEnhancedSort(false), HasDeflateConversion(false),
+ HasEnhancedSort(false), HasDeflateConversion(false), HasSoftFloat(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(), FrameLowering() {}
@@ -68,9 +74,12 @@ bool SystemZSubtarget::enableSubRegLiveness() const {
bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
CodeModel::Model CM) const {
- // PC32DBL accesses require the low bit to be clear. Note that a zero
- // value selects the default alignment and is therefore OK.
- if (GV->getAlignment() == 1)
+ // PC32DBL accesses require the low bit to be clear.
+ //
+ // FIXME: Explicitly check for functions: the datalayout is currently
+ // missing information about function pointers.
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ if (GV->getPointerAlignment(DL) == 1 && !GV->getValueType()->isFunctionTy())
return false;
// For the small model, all locally-binding symbols are in range.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index fa3f65d93c91..4b49c37fe4e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -68,6 +68,7 @@ protected:
bool HasVectorPackedDecimalEnhancement;
bool HasEnhancedSort;
bool HasDeflateConversion;
+ bool HasSoftFloat;
private:
Triple TargetTriple;
@@ -239,6 +240,9 @@ public:
// Return true if the target has the deflate-conversion facility.
bool hasDeflateConversion() const { return HasDeflateConversion; }
+ // Return true if soft float should be used.
+ bool hasSoftFloat() const { return HasSoftFloat; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp
index f103812eb096..7cb7dca2ea28 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTDC.cpp
@@ -44,7 +44,9 @@
//===----------------------------------------------------------------------===//
#include "SystemZ.h"
+#include "SystemZSubtarget.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -53,6 +55,7 @@
#include "llvm/IR/IntrinsicsS390.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
#include <deque>
#include <set>
@@ -72,6 +75,11 @@ public:
}
bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ }
+
private:
// Maps seen instructions that can be mapped to a TDC, values are
// (TDC operand, TDC mask, worthy flag) triples.
@@ -310,6 +318,12 @@ void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
}
bool SystemZTDCPass::runOnFunction(Function &F) {
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ if (TPC.getTM<TargetMachine>()
+ .getSubtarget<SystemZSubtarget>(F)
+ .hasSoftFloat())
+ return false;
+
ConvertedInsts.clear();
LogicOpsWorklist.clear();
PossibleJunk.clear();
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index dfcdb5356485..3f467b200852 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -40,8 +40,10 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {
// This is the case by default if CPU is z13 or later, and can be
// overridden via "[+-]vector" feature string elements.
bool VectorABI = true;
+ bool SoftFloat = false;
if (CPU.empty() || CPU == "generic" ||
- CPU == "z10" || CPU == "z196" || CPU == "zEC12")
+ CPU == "z10" || CPU == "z196" || CPU == "zEC12" ||
+ CPU == "arch8" || CPU == "arch9" || CPU == "arch10")
VectorABI = false;
SmallVector<StringRef, 3> Features;
@@ -51,9 +53,13 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {
VectorABI = true;
if (Feature == "-vector")
VectorABI = false;
+ if (Feature == "soft-float" || Feature == "+soft-float")
+ SoftFloat = true;
+ if (Feature == "-soft-float")
+ SoftFloat = false;
}
- return VectorABI;
+ return VectorABI && !SoftFloat;
}
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -154,13 +160,46 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
getEffectiveRelocModel(RM),
getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),
OL),
- TLOF(std::make_unique<TargetLoweringObjectFileELF>()),
- Subtarget(TT, CPU, FS, *this) {
+ TLOF(std::make_unique<TargetLoweringObjectFileELF>()) {
initAsmInfo();
}
SystemZTargetMachine::~SystemZTargetMachine() = default;
+const SystemZSubtarget *
+SystemZTargetMachine::getSubtargetImpl(const Function &F) const {
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function, so we can enable it as a subtarget feature.
+ bool softFloat =
+ F.hasFnAttribute("use-soft-float") &&
+ F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+
+ if (softFloat)
+ FS += FS.empty() ? "+soft-float" : ",+soft-float";
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = std::make_unique<SystemZSubtarget>(TargetTriple, CPU, FS, *this);
+ }
+
+ return I.get();
+}
+
namespace {
/// SystemZ Code Generator Pass Configuration Options.
@@ -183,6 +222,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
bool addILPOpts() override;
+ void addPreRegAlloc() override;
void addPostRewrite() override;
void addPostRegAlloc() override;
void addPreSched2() override;
@@ -214,6 +254,10 @@ bool SystemZPassConfig::addILPOpts() {
return true;
}
+void SystemZPassConfig::addPreRegAlloc() {
+ addPass(createSystemZCopyPhysRegsPass(getSystemZTargetMachine()));
+}
+
void SystemZPassConfig::addPostRewrite() {
addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index ac04a080f580..9ea03e104fc9 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -26,7 +26,8 @@ namespace llvm {
class SystemZTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- SystemZSubtarget Subtarget;
+
+ mutable StringMap<std::unique_ptr<SystemZSubtarget>> SubtargetMap;
public:
SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -35,11 +36,11 @@ public:
CodeGenOpt::Level OL, bool JIT);
~SystemZTargetMachine() override;
- const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
-
- const SystemZSubtarget *getSubtargetImpl(const Function &) const override {
- return &Subtarget;
- }
+ const SystemZSubtarget *getSubtargetImpl(const Function &) const override;
+ // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget,
+ // subtargets are per-function entities based on the target-specific
+ // attributes of each function.
+ const SystemZSubtarget *getSubtargetImpl() const = delete;
// Override LLVMTargetMachine
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index acec3c533585..864200e5f71c 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -30,7 +30,8 @@ using namespace llvm;
//
//===----------------------------------------------------------------------===//
-int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -63,7 +64,8 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
}
int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -177,11 +179,12 @@ int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
break;
}
- return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+ return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -226,7 +229,7 @@ int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return TTI::TCC_Free;
break;
}
- return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+ return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
TargetTransformInfo::PopcntSupportKind
@@ -246,8 +249,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
for (auto &BB : L->blocks())
for (auto &I : *BB) {
if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
- ImmutableCallSite CS(&I);
- if (const Function *F = CS.getCalledFunction()) {
+ if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
if (isLoweredToCall(F))
HasCall = true;
if (F->getIntrinsicID() == Intrinsic::memcpy ||
@@ -259,7 +261,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
if (isa<StoreInst>(&I)) {
Type *MemAccessTy = I.getOperand(0)->getType();
- NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0);
+ NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0,
+ TTI::TCK_RecipThroughput);
}
}
@@ -291,6 +294,10 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Force = true;
}
+void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) {
@@ -323,6 +330,23 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {
return 0;
}
+unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches,
+ bool HasCall) const {
+ // Don't prefetch a loop with many far apart accesses.
+ if (NumPrefetches > 16)
+ return UINT_MAX;
+
+ // Emit prefetch instructions for smaller strides in cases where we think
+ // the hardware prefetcher might not be able to keep up.
+ if (NumStridedMemAccesses > 32 &&
+ NumStridedMemAccesses == NumMemAccesses && !HasCall)
+ return 1;
+
+ return ST->hasMiscellaneousExtensions3() ? 8192 : 2048;
+}
+
bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
EVT VT = TLI->getValueType(DL, DataType);
return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
@@ -341,18 +365,25 @@ static unsigned getScalarSizeInBits(Type *Ty) {
// type until it is legal. This would e.g. return 4 for <6 x i64>, instead of
// 3.
static unsigned getNumVectorRegs(Type *Ty) {
- assert(Ty->isVectorTy() && "Expected vector type");
- unsigned WideBits = getScalarSizeInBits(Ty) * Ty->getVectorNumElements();
+ auto *VTy = cast<FixedVectorType>(Ty);
+ unsigned WideBits = getScalarSizeInBits(Ty) * VTy->getNumElements();
assert(WideBits > 0 && "Could not compute size of vector");
return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
}
int SystemZTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
+ // TODO: Handle more cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+ Op2Info, Opd1PropInfo,
+ Opd2PropInfo, Args, CxtI);
+
// TODO: return a good value for BB-VECTORIZER that includes the
// immediate loads, which we do not want to count for the loop
// vectorizer, since they are hopefully hoisted out of the loop. This
@@ -391,10 +422,59 @@ int SystemZTTIImpl::getArithmeticInstrCost(
}
}
- if (Ty->isVectorTy()) {
- assert(ST->hasVector() &&
- "getArithmeticInstrCost() called with vector type.");
- unsigned VF = Ty->getVectorNumElements();
+ if (!Ty->isVectorTy()) {
+ // These FP operations are supported with a dedicated instruction for
+ // float, double and fp128 (base implementation assumes float generally
+ // costs 2).
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
+ return 1;
+
+ // There is no native support for FRem.
+ if (Opcode == Instruction::FRem)
+ return LIBCALL_COST;
+
+ // Give discount for some combined logical operations if supported.
+ if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
+ if (Opcode == Instruction::Xor) {
+ for (const Value *A : Args) {
+ if (const Instruction *I = dyn_cast<Instruction>(A))
+ if (I->hasOneUse() &&
+ (I->getOpcode() == Instruction::And ||
+ I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::Xor))
+ return 0;
+ }
+ }
+ else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
+ for (const Value *A : Args) {
+ if (const Instruction *I = dyn_cast<Instruction>(A))
+ if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
+ return 0;
+ }
+ }
+ }
+
+ // Or requires one instruction, although it has custom handling for i64.
+ if (Opcode == Instruction::Or)
+ return 1;
+
+ if (Opcode == Instruction::Xor && ScalarBits == 1) {
+ if (ST->hasLoadStoreOnCond2())
+ return 5; // 2 * (li 0; loc 1); xor
+ return 7; // 2 * ipm sequences ; xor ; shift ; compare
+ }
+
+ if (DivRemConstPow2)
+ return (SignedDivRem ? SDivPow2Cost : 1);
+ if (DivRemConst)
+ return DivMulSeqCost;
+ if (SignedDivRem || UnsignedDivRem)
+ return DivInstrCost;
+ }
+ else if (ST->hasVector()) {
+ auto *VTy = cast<FixedVectorType>(Ty);
+ unsigned VF = VTy->getNumElements();
unsigned NumVectors = getNumVectorRegs(Ty);
// These vector operations are custom handled, but are still supported
@@ -407,7 +487,7 @@ int SystemZTTIImpl::getArithmeticInstrCost(
if (DivRemConstPow2)
return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
if (DivRemConst)
- return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args);
+ return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args);
if ((SignedDivRem || UnsignedDivRem) && VF > 4)
// Temporary hack: disable high vectorization factors with integer
// division/remainder, which will get scalarized and handled with
@@ -429,8 +509,8 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
unsigned ScalarCost =
- getArithmeticInstrCost(Opcode, Ty->getScalarType());
- unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
+ getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind);
+ unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(VTy, Args);
// FIXME: VF 2 for these FP operations are currently just as
// expensive as for VF 4.
if (VF == 2)
@@ -447,101 +527,51 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// There is no native support for FRem.
if (Opcode == Instruction::FRem) {
- unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args);
+ unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args);
// FIXME: VF 2 for float is currently just as expensive as for VF 4.
if (VF == 2 && ScalarBits == 32)
Cost *= 2;
return Cost;
}
}
- else { // Scalar:
- // These FP operations are supported with a dedicated instruction for
- // float, double and fp128 (base implementation assumes float generally
- // costs 2).
- if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
- Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
- return 1;
-
- // There is no native support for FRem.
- if (Opcode == Instruction::FRem)
- return LIBCALL_COST;
-
- // Give discount for some combined logical operations if supported.
- if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
- if (Opcode == Instruction::Xor) {
- for (const Value *A : Args) {
- if (const Instruction *I = dyn_cast<Instruction>(A))
- if (I->hasOneUse() &&
- (I->getOpcode() == Instruction::And ||
- I->getOpcode() == Instruction::Or ||
- I->getOpcode() == Instruction::Xor))
- return 0;
- }
- }
- else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
- for (const Value *A : Args) {
- if (const Instruction *I = dyn_cast<Instruction>(A))
- if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
- return 0;
- }
- }
- }
-
- // Or requires one instruction, although it has custom handling for i64.
- if (Opcode == Instruction::Or)
- return 1;
-
- if (Opcode == Instruction::Xor && ScalarBits == 1) {
- if (ST->hasLoadStoreOnCond2())
- return 5; // 2 * (li 0; loc 1); xor
- return 7; // 2 * ipm sequences ; xor ; shift ; compare
- }
-
- if (DivRemConstPow2)
- return (SignedDivRem ? SDivPow2Cost : 1);
- if (DivRemConst)
- return DivMulSeqCost;
- if (SignedDivRem || UnsignedDivRem)
- return DivInstrCost;
- }
// Fallback to the default implementation.
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
-int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
- assert (Tp->isVectorTy());
- assert (ST->hasVector() && "getShuffleCost() called.");
- unsigned NumVectors = getNumVectorRegs(Tp);
+int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
+ int Index, VectorType *SubTp) {
+ if (ST->hasVector()) {
+ unsigned NumVectors = getNumVectorRegs(Tp);
- // TODO: Since fp32 is expanded, the shuffle cost should always be 0.
+ // TODO: Since fp32 is expanded, the shuffle cost should always be 0.
- // FP128 values are always in scalar registers, so there is no work
- // involved with a shuffle, except for broadcast. In that case register
- // moves are done with a single instruction per element.
- if (Tp->getScalarType()->isFP128Ty())
- return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0);
+ // FP128 values are always in scalar registers, so there is no work
+ // involved with a shuffle, except for broadcast. In that case register
+ // moves are done with a single instruction per element.
+ if (Tp->getScalarType()->isFP128Ty())
+ return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0);
- switch (Kind) {
- case TargetTransformInfo::SK_ExtractSubvector:
- // ExtractSubvector Index indicates start offset.
+ switch (Kind) {
+ case TargetTransformInfo::SK_ExtractSubvector:
+ // ExtractSubvector Index indicates start offset.
- // Extracting a subvector from first index is a noop.
- return (Index == 0 ? 0 : NumVectors);
+ // Extracting a subvector from first index is a noop.
+ return (Index == 0 ? 0 : NumVectors);
- case TargetTransformInfo::SK_Broadcast:
- // Loop vectorizer calls here to figure out the extra cost of
- // broadcasting a loaded value to all elements of a vector. Since vlrep
- // loads and replicates with a single instruction, adjust the returned
- // value.
- return NumVectors - 1;
+ case TargetTransformInfo::SK_Broadcast:
+ // Loop vectorizer calls here to figure out the extra cost of
+ // broadcasting a loaded value to all elements of a vector. Since vlrep
+ // loads and replicates with a single instruction, adjust the returned
+ // value.
+ return NumVectors - 1;
- default:
+ default:
- // SystemZ supports single instruction permutation / replication.
- return NumVectors;
+ // SystemZ supports single instruction permutation / replication.
+ return NumVectors;
+ }
}
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
@@ -564,8 +594,9 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) {
assert (SrcTy->isVectorTy() && DstTy->isVectorTy());
assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() &&
"Packing must reduce size of vector type.");
- assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() &&
- "Packing should not change number of elements.");
+ assert(cast<FixedVectorType>(SrcTy)->getNumElements() ==
+ cast<FixedVectorType>(DstTy)->getNumElements() &&
+ "Packing should not change number of elements.");
// TODO: Since fp32 is expanded, the extract cost should always be 0.
@@ -580,7 +611,7 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) {
unsigned Cost = 0;
unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
- unsigned VF = SrcTy->getVectorNumElements();
+ unsigned VF = cast<FixedVectorType>(SrcTy)->getNumElements();
for (unsigned P = 0; P < Log2Diff; ++P) {
if (NumParts > 1)
NumParts /= 2;
@@ -642,7 +673,7 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
// Return the potentially vectorized type based on 'I' and 'VF'. 'I' may
// be either scalar or already vectorized with a same or lesser VF.
Type *ElTy = OpTy->getScalarType();
- return VectorType::get(ElTy, VF);
+ return FixedVectorType::get(ElTy, VF);
}
return nullptr;
@@ -653,8 +684,8 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
unsigned SystemZTTIImpl::
getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
const Instruction *I) {
- assert (Dst->isVectorTy());
- unsigned VF = Dst->getVectorNumElements();
+ auto *DstVTy = cast<FixedVectorType>(Dst);
+ unsigned VF = DstVTy->getNumElements();
unsigned Cost = 0;
// If we know what the widths of the compared operands, get any cost of
// converting it to match Dst. Otherwise assume same widths.
@@ -668,14 +699,50 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
}
int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
+ // FIXME: Can the logic below also be used for these cost kinds?
+ if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) {
+ int BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
+ return BaseCost == 0 ? BaseCost : 1;
+ }
+
unsigned DstScalarBits = Dst->getScalarSizeInBits();
unsigned SrcScalarBits = Src->getScalarSizeInBits();
- if (Src->isVectorTy()) {
- assert (ST->hasVector() && "getCastInstrCost() called with vector type.");
- assert (Dst->isVectorTy());
- unsigned VF = Src->getVectorNumElements();
+ if (!Src->isVectorTy()) {
+ assert (!Dst->isVectorTy());
+
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
+ if (SrcScalarBits >= 32 ||
+ (I != nullptr && isa<LoadInst>(I->getOperand(0))))
+ return 1;
+ return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
+ }
+
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+ Src->isIntegerTy(1)) {
+ if (ST->hasLoadStoreOnCond2())
+ return 2; // li 0; loc 1
+
+ // This should be extension of a compare i1 result, which is done with
+ // ipm and a varying sequence of instructions.
+ unsigned Cost = 0;
+ if (Opcode == Instruction::SExt)
+ Cost = (DstScalarBits < 64 ? 3 : 4);
+ if (Opcode == Instruction::ZExt)
+ Cost = 3;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
+ if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
+ // If operands of an fp-type was compared, this costs +1.
+ Cost++;
+ return Cost;
+ }
+ }
+ else if (ST->hasVector()) {
+ auto *SrcVecTy = cast<FixedVectorType>(Src);
+ auto *DstVecTy = cast<FixedVectorType>(Dst);
+ unsigned VF = SrcVecTy->getNumElements();
unsigned NumDstVectors = getNumVectorRegs(Dst);
unsigned NumSrcVectors = getNumVectorRegs(Src);
@@ -720,7 +787,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// inserting and extracting the values. Base implementation does not
// realize float->int gets scalarized.
unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(),
- Src->getScalarType());
+ Src->getScalarType(), CostKind);
unsigned TotCost = VF * ScalarCost;
bool NeedsInserts = true, NeedsExtracts = true;
// FP128 registers do not get inserted or extracted.
@@ -731,8 +798,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
(Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
NeedsExtracts = false;
- TotCost += getScalarizationOverhead(Src, false, NeedsExtracts);
- TotCost += getScalarizationOverhead(Dst, NeedsInserts, false);
+ TotCost += getScalarizationOverhead(SrcVecTy, false, NeedsExtracts);
+ TotCost += getScalarizationOverhead(DstVecTy, NeedsInserts, false);
// FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
@@ -743,7 +810,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (Opcode == Instruction::FPTrunc) {
if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements.
- return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false);
+ return VF /*ldxbr/lexbr*/ +
+ getScalarizationOverhead(DstVecTy, true, false);
else // double -> float
return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/);
}
@@ -756,40 +824,11 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return VF * 2;
}
// -> fp128. VF * lxdb/lxeb + extraction of elements.
- return VF + getScalarizationOverhead(Src, false, true);
+ return VF + getScalarizationOverhead(SrcVecTy, false, true);
}
}
- else { // Scalar
- assert (!Dst->isVectorTy());
-
- if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
- if (SrcScalarBits >= 32 ||
- (I != nullptr && isa<LoadInst>(I->getOperand(0))))
- return 1;
- return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
- }
- if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
- Src->isIntegerTy(1)) {
- if (ST->hasLoadStoreOnCond2())
- return 2; // li 0; loc 1
-
- // This should be extension of a compare i1 result, which is done with
- // ipm and a varying sequence of instructions.
- unsigned Cost = 0;
- if (Opcode == Instruction::SExt)
- Cost = (DstScalarBits < 64 ? 3 : 4);
- if (Opcode == Instruction::ZExt)
- Cost = 3;
- Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
- if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
- // If operands of an fp-type was compared, this costs +1.
- Cost++;
- return Cost;
- }
- }
-
- return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
}
// Scalar i8 / i16 operations will typically be made after first extending
@@ -805,10 +844,38 @@ static unsigned getOperandsExtensionCost(const Instruction *I) {
}
int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy, const Instruction *I) {
- if (ValTy->isVectorTy()) {
- assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
- unsigned VF = ValTy->getVectorNumElements();
+ Type *CondTy,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
+
+ if (!ValTy->isVectorTy()) {
+ switch (Opcode) {
+ case Instruction::ICmp: {
+ // A loaded value compared with 0 with multiple users becomes Load and
+ // Test. The load is then not foldable, so return 0 cost for the ICmp.
+ unsigned ScalarBits = ValTy->getScalarSizeInBits();
+ if (I != nullptr && ScalarBits >= 32)
+ if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+ if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
+ C->getZExtValue() == 0)
+ return 0;
+
+ unsigned Cost = 1;
+ if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
+ Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
+ return Cost;
+ }
+ case Instruction::Select:
+ if (ValTy->isFloatingPointTy())
+ return 4; // No load on condition for FP - costs a conditional jump.
+ return 1; // Load On Condition / Select Register.
+ }
+ }
+ else if (ST->hasVector()) {
+ unsigned VF = cast<FixedVectorType>(ValTy)->getNumElements();
// Called with a compare instruction.
if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
@@ -856,32 +923,8 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return getNumVectorRegs(ValTy) /*vsel*/ + PackCost;
}
}
- else { // Scalar
- switch (Opcode) {
- case Instruction::ICmp: {
- // A loaded value compared with 0 with multiple users becomes Load and
- // Test. The load is then not foldable, so return 0 cost for the ICmp.
- unsigned ScalarBits = ValTy->getScalarSizeInBits();
- if (I != nullptr && ScalarBits >= 32)
- if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
- if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
- if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
- C->getZExtValue() == 0)
- return 0;
-
- unsigned Cost = 1;
- if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
- Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
- return Cost;
- }
- case Instruction::Select:
- if (ValTy->isFloatingPointTy())
- return 4; // No load on condition for FP - costs a conditional jump.
- return 1; // Load On Condition / Select Register.
- }
- }
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
}
int SystemZTTIImpl::
@@ -995,9 +1038,14 @@ static bool isBswapIntrinsicCall(const Value *V) {
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
assert(!Src->isVoidTy() && "Invalid type");
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return 1;
+
if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) {
// Store the load or its truncated or extended value in FoldedValue.
const Instruction *FoldedValue = nullptr;
@@ -1058,16 +1106,13 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// needed for using / defining the vector operands. The SystemZ version does
// roughly the same but bases the computations on vector permutations
// instead.
-int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int SystemZTTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
+ Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
assert(isa<VectorType>(VecTy) &&
"Expect a vector type for interleaved memory op");
@@ -1075,7 +1120,7 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
// Return the ceiling of dividing A by B.
auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
- unsigned NumElts = VecTy->getVectorNumElements();
+ unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
unsigned VF = NumElts / Factor;
unsigned NumEltsPerVecReg = (128U / getScalarSizeInBits(VecTy));
@@ -1125,22 +1170,10 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
return -1;
}
-int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args,
- FastMathFlags FMF, unsigned VF) {
- int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
- if (Cost != -1)
- return Cost;
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
-}
-
-int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys,
- FastMathFlags FMF,
- unsigned ScalarizationCostPassed) {
- int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
+int SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ int Cost = getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType());
if (Cost != -1)
return Cost;
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys,
- FMF, ScalarizationCostPassed);
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index bc4d066881c1..7f8f7f6f923f 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -38,17 +38,21 @@ public:
unsigned getInliningThresholdMultiplier() { return 3; }
- int getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
- int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ Type *Ty, TTI::TargetCostKind CostKind);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
/// @}
@@ -60,8 +64,12 @@ public:
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getCacheLineSize() const override { return 256; }
- unsigned getPrefetchDistance() const override { return 2000; }
- unsigned getMinPrefetchStride() const override { return 2048; }
+ unsigned getPrefetchDistance() const override { return 4500; }
+ unsigned getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches,
+ bool HasCall) const override;
+ bool enableWritePrefetching() const override { return true; }
bool hasDivRemOp(Type *DataType, bool IsSigned);
bool prefersVectorizedAddressing() { return false; }
@@ -71,40 +79,39 @@ public:
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
- int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
const Instruction *I);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr);
-
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
-
- int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF,
- unsigned VF = 1);
- int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = UINT_MAX);
+ unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
+
+ int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
/// @}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/Target.cpp b/contrib/llvm-project/llvm/lib/Target/Target.cpp
index 8a46c77492c5..7411687e2ca3 100644
--- a/contrib/llvm-project/llvm/lib/Target/Target.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Target.cpp
@@ -111,11 +111,11 @@ unsigned long long LLVMABISizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
}
unsigned LLVMABIAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
- return unwrap(TD)->getABITypeAlignment(unwrap(Ty));
+ return unwrap(TD)->getABITypeAlign(unwrap(Ty)).value();
}
unsigned LLVMCallFrameAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
- return unwrap(TD)->getABITypeAlignment(unwrap(Ty));
+ return unwrap(TD)->getABITypeAlign(unwrap(Ty)).value();
}
unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
@@ -124,7 +124,9 @@ unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD,
LLVMValueRef GlobalVar) {
- return unwrap(TD)->getPreferredAlignment(unwrap<GlobalVariable>(GlobalVar));
+ return unwrap(TD)
+ ->getPreferredAlign(unwrap<GlobalVariable>(GlobalVar))
+ .value();
}
unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
diff --git a/contrib/llvm-project/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/TargetLoweringObjectFile.cpp
index dcd3934de0fa..eea0aeea2c45 100644
--- a/contrib/llvm-project/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -19,10 +19,12 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -38,11 +40,10 @@ using namespace llvm;
/// lowering implementations a chance to set up their default sections.
void TargetLoweringObjectFile::Initialize(MCContext &ctx,
const TargetMachine &TM) {
- Ctx = &ctx;
// `Initialize` can be called more than once.
delete Mang;
Mang = new Mangler();
- InitMCObjectFileInfo(TM.getTargetTriple(), TM.isPositionIndependent(), *Ctx,
+ InitMCObjectFileInfo(TM.getTargetTriple(), TM.isPositionIndependent(), ctx,
TM.getCodeModel() == CodeModel::Large);
// Reset various EH DWARF encodings.
@@ -121,7 +122,7 @@ MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase(
NameStr += GV->getParent()->getDataLayout().getPrivateGlobalPrefix();
TM.getNameWithPrefix(NameStr, GV, *Mang);
NameStr.append(Suffix.begin(), Suffix.end());
- return Ctx->getOrCreateSymbol(NameStr);
+ return getContext().getOrCreateSymbol(NameStr);
}
MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol(
@@ -142,13 +143,17 @@ void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
/// may be overridden by the target implementation.
SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalObject *GO,
const TargetMachine &TM){
- assert(!GO->isDeclaration() && !GO->hasAvailableExternallyLinkage() &&
+ assert(!GO->isDeclarationForLinker() &&
"Can only be used for global definitions");
// Functions are classified as text sections.
if (isa<Function>(GO))
return SectionKind::getText();
+ // Basic blocks are classified as text sections.
+ if (isa<BasicBlock>(GO))
+ return SectionKind::getText();
+
// Global variables require more detailed analysis.
const auto *GVar = cast<GlobalVariable>(GO);
@@ -268,12 +273,21 @@ MCSection *TargetLoweringObjectFile::SectionForGlobal(
return SelectSectionForGlobal(GO, Kind, TM);
}
+/// This method computes the appropriate section to emit the specified global
+/// variable or function definition. This should not be passed external (or
+/// available externally) globals.
+MCSection *
+TargetLoweringObjectFile::SectionForGlobal(const GlobalObject *GO,
+ const TargetMachine &TM) const {
+ return SectionForGlobal(GO, getKindForGlobal(GO, TM), TM);
+}
+
MCSection *TargetLoweringObjectFile::getSectionForJumpTable(
const Function &F, const TargetMachine &TM) const {
- unsigned Align = 0;
+ Align Alignment(1);
return getSectionForConstant(F.getParent()->getDataLayout(),
SectionKind::getReadOnly(), /*C=*/nullptr,
- Align);
+ Alignment);
}
bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
@@ -295,13 +309,19 @@ bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
/// information, return a section that it should be placed in.
MCSection *TargetLoweringObjectFile::getSectionForConstant(
const DataLayout &DL, SectionKind Kind, const Constant *C,
- unsigned &Align) const {
+ Align &Alignment) const {
if (Kind.isReadOnly() && ReadOnlySection != nullptr)
return ReadOnlySection;
return DataSection;
}
+MCSection *TargetLoweringObjectFile::getSectionForMachineBasicBlock(
+ const Function &F, const MachineBasicBlock &MBB,
+ const TargetMachine &TM) const {
+ return nullptr;
+}
+
/// getTTypeGlobalReference - Return an MCExpr to use for a
/// reference to the specified global variable from exception
/// handling information.
@@ -327,7 +347,7 @@ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
// Emit a label to the streamer for the current position. This gives us
// .-foo addressing.
MCSymbol *PCSym = getContext().createTempSymbol();
- Streamer.EmitLabel(PCSym);
+ Streamer.emitLabel(PCSym);
const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext());
return MCBinaryExpr::createSub(Sym, PC, getContext());
}
@@ -337,7 +357,7 @@ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
// FIXME: It's not clear what, if any, default this should have - perhaps a
// null return could mean 'no location' & we should just do that here.
- return MCSymbolRefExpr::create(Sym, *Ctx);
+ return MCSymbolRefExpr::create(Sym, getContext());
}
void TargetLoweringObjectFile::getNameWithPrefix(
diff --git a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
index 97a1eb2f190a..074e9fde79e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/TargetMachine.cpp
@@ -34,10 +34,10 @@ using namespace llvm;
TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options)
- : TheTarget(T), DL(DataLayoutString), TargetTriple(TT), TargetCPU(CPU),
- TargetFS(FS), AsmInfo(nullptr), MRI(nullptr), MII(nullptr), STI(nullptr),
- RequireStructuredCFG(false), O0WantsFastISel(false),
- DefaultOptions(Options), Options(Options) {}
+ : TheTarget(T), DL(DataLayoutString), TargetTriple(TT),
+ TargetCPU(std::string(CPU)), TargetFS(std::string(FS)), AsmInfo(nullptr),
+ MRI(nullptr), MII(nullptr), STI(nullptr), RequireStructuredCFG(false),
+ O0WantsFastISel(false), DefaultOptions(Options), Options(Options) {}
TargetMachine::~TargetMachine() = default;
@@ -46,17 +46,17 @@ bool TargetMachine::isPositionIndependent() const {
}
/// Reset the target options based on the function's attributes.
+/// setFunctionAttributes should have made the raw attribute value consistent
+/// with the command line flag if used.
+//
// FIXME: This function needs to go away for a number of reasons:
// a) global state on the TargetMachine is terrible in general,
// b) these target options should be passed only on the function
// and not on the TargetMachine (via TargetOptions) at all.
void TargetMachine::resetTargetOptions(const Function &F) const {
-#define RESET_OPTION(X, Y) \
- do { \
- if (F.hasFnAttribute(Y)) \
- Options.X = (F.getFnAttribute(Y).getValueAsString() == "true"); \
- else \
- Options.X = DefaultOptions.X; \
+#define RESET_OPTION(X, Y) \
+ do { \
+ Options.X = (F.getFnAttribute(Y).getValueAsString() == "true"); \
} while (0)
RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
@@ -193,6 +193,14 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
// Check if we can use copy relocations.
if (!(GV && GV->isThreadLocal()) && RM == Reloc::Static)
return true;
+ } else if (TT.isOSBinFormatELF()) {
+ // If dso_local allows AsmPrinter::getSymbolPreferLocal to use a local
+ // alias, set the flag. We cannot set dso_local for other global values,
+ // because otherwise direct accesses to a probably interposable symbol (even
+ // if the codegen assumes not) will be rejected by the linker.
+ if (!GV || !GV->canBenefitFromLocalAlias())
+ return false;
+ return TT.isX86() && M.noSemanticInterposition();
}
// ELF & wasm support preemption of other symbols.
@@ -258,6 +266,10 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV) const {
const TargetLoweringObjectFile *TLOF = getObjFileLowering();
+ // XCOFF symbols could have special naming convention.
+ if (MCSymbol *TargetSymbol = TLOF->getTargetSymbol(GV, *this))
+ return TargetSymbol;
+
SmallString<128> NameStr;
getNameWithPrefix(NameStr, GV, TLOF->getMangler());
return TLOF->getContext().getOrCreateSymbol(NameStr);
diff --git a/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp
index a38633e1f27e..60fe84cadacc 100644
--- a/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/TargetMachineC.cpp
@@ -164,12 +164,12 @@ char* LLVMGetTargetMachineTriple(LLVMTargetMachineRef T) {
}
char* LLVMGetTargetMachineCPU(LLVMTargetMachineRef T) {
- std::string StringRep = unwrap(T)->getTargetCPU();
+ std::string StringRep = std::string(unwrap(T)->getTargetCPU());
return strdup(StringRep.c_str());
}
char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) {
- std::string StringRep = unwrap(T)->getTargetFeatureString();
+ std::string StringRep = std::string(unwrap(T)->getTargetFeatureString());
return strdup(StringRep.c_str());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
new file mode 100644
index 000000000000..7a899b4b38e2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -0,0 +1,1454 @@
+//===-- VEAsmParser.cpp - Parse VE assembly to MCInst instructions --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/VEMCExpr.h"
+#include "MCTargetDesc/VEMCTargetDesc.h"
+#include "TargetInfo/VETargetInfo.h"
+#include "VE.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <memory>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ve-asmparser"
+
+namespace {
+
+class VEOperand;
+
+class VEAsmParser : public MCTargetAsmParser {
+ MCAsmParser &Parser;
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "VEGenAsmMatcher.inc"
+
+ /// }
+
+ // public interface of the MCTargetAsmParser.
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ int parseRegisterName(unsigned (*matchFn)(StringRef));
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+ bool ParseDirective(AsmToken DirectiveID) override;
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+ unsigned Kind) override;
+
+ // Custom parse functions for VE specific operands.
+ OperandMatchResultTy parseMEMOperand(OperandVector &Operands);
+ OperandMatchResultTy parseMEMAsOperand(OperandVector &Operands);
+ OperandMatchResultTy parseCCOpOperand(OperandVector &Operands);
+ OperandMatchResultTy parseRDOpOperand(OperandVector &Operands);
+ OperandMatchResultTy parseMImmOperand(OperandVector &Operands);
+ OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name);
+ OperandMatchResultTy parseVEAsmOperand(std::unique_ptr<VEOperand> &Operand);
+
+ // Helper function to parse expression with a symbol.
+ const MCExpr *extractModifierFromExpr(const MCExpr *E,
+ VEMCExpr::VariantKind &Variant);
+ const MCExpr *fixupVariantKind(const MCExpr *E);
+ bool parseExpression(const MCExpr *&EVal);
+
+ // Split the mnemonic stripping conditional code and quantifiers
+ StringRef splitMnemonic(StringRef Name, SMLoc NameLoc,
+ OperandVector *Operands);
+
+public:
+ VEAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, sti, MII), Parser(parser) {
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+ }
+};
+
+} // end anonymous namespace
+
+static const MCPhysReg I32Regs[64] = {
+ VE::SW0, VE::SW1, VE::SW2, VE::SW3, VE::SW4, VE::SW5, VE::SW6,
+ VE::SW7, VE::SW8, VE::SW9, VE::SW10, VE::SW11, VE::SW12, VE::SW13,
+ VE::SW14, VE::SW15, VE::SW16, VE::SW17, VE::SW18, VE::SW19, VE::SW20,
+ VE::SW21, VE::SW22, VE::SW23, VE::SW24, VE::SW25, VE::SW26, VE::SW27,
+ VE::SW28, VE::SW29, VE::SW30, VE::SW31, VE::SW32, VE::SW33, VE::SW34,
+ VE::SW35, VE::SW36, VE::SW37, VE::SW38, VE::SW39, VE::SW40, VE::SW41,
+ VE::SW42, VE::SW43, VE::SW44, VE::SW45, VE::SW46, VE::SW47, VE::SW48,
+ VE::SW49, VE::SW50, VE::SW51, VE::SW52, VE::SW53, VE::SW54, VE::SW55,
+ VE::SW56, VE::SW57, VE::SW58, VE::SW59, VE::SW60, VE::SW61, VE::SW62,
+ VE::SW63};
+
+static const MCPhysReg F32Regs[64] = {
+ VE::SF0, VE::SF1, VE::SF2, VE::SF3, VE::SF4, VE::SF5, VE::SF6,
+ VE::SF7, VE::SF8, VE::SF9, VE::SF10, VE::SF11, VE::SF12, VE::SF13,
+ VE::SF14, VE::SF15, VE::SF16, VE::SF17, VE::SF18, VE::SF19, VE::SF20,
+ VE::SF21, VE::SF22, VE::SF23, VE::SF24, VE::SF25, VE::SF26, VE::SF27,
+ VE::SF28, VE::SF29, VE::SF30, VE::SF31, VE::SF32, VE::SF33, VE::SF34,
+ VE::SF35, VE::SF36, VE::SF37, VE::SF38, VE::SF39, VE::SF40, VE::SF41,
+ VE::SF42, VE::SF43, VE::SF44, VE::SF45, VE::SF46, VE::SF47, VE::SF48,
+ VE::SF49, VE::SF50, VE::SF51, VE::SF52, VE::SF53, VE::SF54, VE::SF55,
+ VE::SF56, VE::SF57, VE::SF58, VE::SF59, VE::SF60, VE::SF61, VE::SF62,
+ VE::SF63};
+
+static const MCPhysReg F128Regs[32] = {
+ VE::Q0, VE::Q1, VE::Q2, VE::Q3, VE::Q4, VE::Q5, VE::Q6, VE::Q7,
+ VE::Q8, VE::Q9, VE::Q10, VE::Q11, VE::Q12, VE::Q13, VE::Q14, VE::Q15,
+ VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23,
+ VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31};
+
+static const MCPhysReg MISCRegs[31] = {
+ VE::USRCC, VE::PSW, VE::SAR, VE::NoRegister,
+ VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR,
+ VE::PMCR0, VE::PMCR1, VE::PMCR2, VE::PMCR3,
+ VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::NoRegister,
+ VE::PMC0, VE::PMC1, VE::PMC2, VE::PMC3,
+ VE::PMC4, VE::PMC5, VE::PMC6, VE::PMC7,
+ VE::PMC8, VE::PMC9, VE::PMC10, VE::PMC11,
+ VE::PMC12, VE::PMC13, VE::PMC14};
+
+namespace {
+
+/// VEOperand - Instances of this class represent a parsed VE machine
+/// instruction.
+class VEOperand : public MCParsedAsmOperand {
+private:
+ enum KindTy {
+ k_Token,
+ k_Register,
+ k_Immediate,
+ // SX-Aurora ASX form is disp(index, base).
+ k_MemoryRegRegImm, // base=reg, index=reg, disp=imm
+ k_MemoryRegImmImm, // base=reg, index=imm, disp=imm
+ k_MemoryZeroRegImm, // base=0, index=reg, disp=imm
+ k_MemoryZeroImmImm, // base=0, index=imm, disp=imm
+ // SX-Aurora AS form is disp(base).
+ k_MemoryRegImm, // base=reg, disp=imm
+ k_MemoryZeroImm, // base=0, disp=imm
+ // Other special cases for Aurora VE
+ k_CCOp, // condition code
+ k_RDOp, // rounding mode
+ k_MImmOp, // Special immediate value of sequential bit stream of 0 or 1.
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ struct Token {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ unsigned IndexReg;
+ const MCExpr *Index;
+ const MCExpr *Offset;
+ };
+
+ struct CCOp {
+ unsigned CCVal;
+ };
+
+ struct RDOp {
+ unsigned RDVal;
+ };
+
+ struct MImmOp {
+ const MCExpr *Val;
+ bool M0Flag;
+ };
+
+ union {
+ struct Token Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
+ struct CCOp CC;
+ struct RDOp RD;
+ struct MImmOp MImm;
+ };
+
+public:
+ VEOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+ bool isToken() const override { return Kind == k_Token; }
+ bool isReg() const override { return Kind == k_Register; }
+ bool isImm() const override { return Kind == k_Immediate; }
+ bool isMem() const override {
+ return isMEMrri() || isMEMrii() || isMEMzri() || isMEMzii() || isMEMri() ||
+ isMEMzi();
+ }
+ bool isMEMrri() const { return Kind == k_MemoryRegRegImm; }
+ bool isMEMrii() const { return Kind == k_MemoryRegImmImm; }
+ bool isMEMzri() const { return Kind == k_MemoryZeroRegImm; }
+ bool isMEMzii() const { return Kind == k_MemoryZeroImmImm; }
+ bool isMEMri() const { return Kind == k_MemoryRegImm; }
+ bool isMEMzi() const { return Kind == k_MemoryZeroImm; }
+ bool isCCOp() const { return Kind == k_CCOp; }
+ bool isRDOp() const { return Kind == k_RDOp; }
+ bool isZero() {
+ if (!isImm())
+ return false;
+
+ // Constant case
+ if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Value = ConstExpr->getValue();
+ return Value == 0;
+ }
+ return false;
+ }
+ bool isUImm0to2() {
+ if (!isImm())
+ return false;
+
+ // Constant case
+ if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Value = ConstExpr->getValue();
+ return Value >= 0 && Value < 3;
+ }
+ return false;
+ }
+ bool isUImm1() {
+ if (!isImm())
+ return false;
+
+ // Constant case
+ if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Value = ConstExpr->getValue();
+ return isUInt<1>(Value);
+ }
+ return false;
+ }
+ bool isUImm2() {
+ if (!isImm())
+ return false;
+
+ // Constant case
+ if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Value = ConstExpr->getValue();
+ return isUInt<2>(Value);
+ }
+ return false;
+ }
+ bool isUImm3() {
+ if (!isImm())
+ return false;
+
+ // Constant case
+ if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Value = ConstExpr->getValue();
+ return isUInt<3>(Value);
+ }
+ return false;
+ }
+ bool isUImm6() {
+ if (!isImm())
+ return false;
+
+ // Constant case
+ if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Value = ConstExpr->getValue();
+ return isUInt<6>(Value);
+ }
+ return false;
+ }
+ bool isUImm7() {
+ if (!isImm())
+ return false;
+
+ // Constant case
+ if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Value = ConstExpr->getValue();
+ return isUInt<7>(Value);
+ }
+ return false;
+ }
+ bool isSImm7() {
+ if (!isImm())
+ return false;
+
+ // Constant case
+ if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Value = ConstExpr->getValue();
+ return isInt<7>(Value);
+ }
+ return false;
+ }
+ bool isMImm() const {
+ if (Kind != k_MImmOp)
+ return false;
+
+ // Constant case
+ if (const auto *ConstExpr = dyn_cast<MCConstantExpr>(MImm.Val)) {
+ int64_t Value = ConstExpr->getValue();
+ return isUInt<6>(Value);
+ }
+ return false;
+ }
+
+ StringRef getToken() const {
+ assert(Kind == k_Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ unsigned getReg() const override {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert((Kind == k_Immediate) && "Invalid access!");
+ return Imm.Val;
+ }
+
+ unsigned getMemBase() const {
+ assert((Kind == k_MemoryRegRegImm || Kind == k_MemoryRegImmImm ||
+ Kind == k_MemoryRegImm) &&
+ "Invalid access!");
+ return Mem.Base;
+ }
+
+ unsigned getMemIndexReg() const {
+ assert((Kind == k_MemoryRegRegImm || Kind == k_MemoryZeroRegImm) &&
+ "Invalid access!");
+ return Mem.IndexReg;
+ }
+
+ const MCExpr *getMemIndex() const {
+ assert((Kind == k_MemoryRegImmImm || Kind == k_MemoryZeroImmImm) &&
+ "Invalid access!");
+ return Mem.Index;
+ }
+
+ const MCExpr *getMemOffset() const {
+ assert((Kind == k_MemoryRegRegImm || Kind == k_MemoryRegImmImm ||
+ Kind == k_MemoryZeroImmImm || Kind == k_MemoryZeroRegImm ||
+ Kind == k_MemoryRegImm || Kind == k_MemoryZeroImm) &&
+ "Invalid access!");
+ return Mem.Offset;
+ }
+
+ void setMemOffset(const MCExpr *off) {
+ assert((Kind == k_MemoryRegRegImm || Kind == k_MemoryRegImmImm ||
+ Kind == k_MemoryZeroImmImm || Kind == k_MemoryZeroRegImm ||
+ Kind == k_MemoryRegImm || Kind == k_MemoryZeroImm) &&
+ "Invalid access!");
+ Mem.Offset = off;
+ }
+
+ unsigned getCCVal() const {
+ assert((Kind == k_CCOp) && "Invalid access!");
+ return CC.CCVal;
+ }
+
+ unsigned getRDVal() const {
+ assert((Kind == k_RDOp) && "Invalid access!");
+ return RD.RDVal;
+ }
+
+ const MCExpr *getMImmVal() const {
+ assert((Kind == k_MImmOp) && "Invalid access!");
+ return MImm.Val;
+ }
+ bool getM0Flag() const {
+ assert((Kind == k_MImmOp) && "Invalid access!");
+ return MImm.M0Flag;
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const override { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const override { return EndLoc; }
+
+ void print(raw_ostream &OS) const override {
+ switch (Kind) {
+ case k_Token:
+ OS << "Token: " << getToken() << "\n";
+ break;
+ case k_Register:
+ OS << "Reg: #" << getReg() << "\n";
+ break;
+ case k_Immediate:
+ OS << "Imm: " << getImm() << "\n";
+ break;
+ case k_MemoryRegRegImm:
+ assert(getMemOffset() != nullptr);
+ OS << "Mem: #" << getMemBase() << "+#" << getMemIndexReg() << "+"
+ << *getMemOffset() << "\n";
+ break;
+ case k_MemoryRegImmImm:
+ assert(getMemIndex() != nullptr && getMemOffset() != nullptr);
+ OS << "Mem: #" << getMemBase() << "+" << *getMemIndex() << "+"
+ << *getMemOffset() << "\n";
+ break;
+ case k_MemoryZeroRegImm:
+ assert(getMemOffset() != nullptr);
+ OS << "Mem: 0+#" << getMemIndexReg() << "+" << *getMemOffset() << "\n";
+ break;
+ case k_MemoryZeroImmImm:
+ assert(getMemIndex() != nullptr && getMemOffset() != nullptr);
+ OS << "Mem: 0+" << *getMemIndex() << "+" << *getMemOffset() << "\n";
+ break;
+ case k_MemoryRegImm:
+ assert(getMemOffset() != nullptr);
+ OS << "Mem: #" << getMemBase() << "+" << *getMemOffset() << "\n";
+ break;
+ case k_MemoryZeroImm:
+ assert(getMemOffset() != nullptr);
+ OS << "Mem: 0+" << *getMemOffset() << "\n";
+ break;
+ case k_CCOp:
+ OS << "CCOp: " << getCCVal() << "\n";
+ break;
+ case k_RDOp:
+ OS << "RDOp: " << getRDVal() << "\n";
+ break;
+ case k_MImmOp:
+ OS << "MImm: (" << getMImmVal() << (getM0Flag() ? ")0" : ")1") << "\n";
+ break;
+ }
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCExpr *Expr = getImm();
+ addExpr(Inst, Expr);
+ }
+
+ void addZeroOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void addUImm0to2Operands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void addUImm1Operands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void addUImm2Operands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void addUImm3Operands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void addUImm6Operands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void addUImm7Operands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void addSImm7Operands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediate when possible. Null MCExpr = 0.
+ if (!Expr)
+ Inst.addOperand(MCOperand::createImm(0));
+ else if (const auto *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ }
+
+ void addMEMrriOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createReg(getMemBase()));
+ Inst.addOperand(MCOperand::createReg(getMemIndexReg()));
+ addExpr(Inst, getMemOffset());
+ }
+
+ void addMEMriiOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createReg(getMemBase()));
+ addExpr(Inst, getMemIndex());
+ addExpr(Inst, getMemOffset());
+ }
+
+ void addMEMzriOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createImm(0));
+ Inst.addOperand(MCOperand::createReg(getMemIndexReg()));
+ addExpr(Inst, getMemOffset());
+ }
+
+ void addMEMziiOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createImm(0));
+ addExpr(Inst, getMemIndex());
+ addExpr(Inst, getMemOffset());
+ }
+
+ void addMEMriOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createReg(getMemBase()));
+ addExpr(Inst, getMemOffset());
+ }
+
+ void addMEMziOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createImm(0));
+ addExpr(Inst, getMemOffset());
+ }
+
+ void addCCOpOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createImm(getCCVal()));
+ }
+
+ void addRDOpOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createImm(getRDVal()));
+ }
+
+ void addMImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const auto *ConstExpr = dyn_cast<MCConstantExpr>(getMImmVal());
+ assert(ConstExpr && "Null operands!");
+ int64_t Value = ConstExpr->getValue();
+ if (getM0Flag())
+ Value += 64;
+ Inst.addOperand(MCOperand::createImm(Value));
+ }
+
+ static std::unique_ptr<VEOperand> CreateToken(StringRef Str, SMLoc S) {
+ auto Op = std::make_unique<VEOperand>(k_Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<VEOperand> CreateReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ auto Op = std::make_unique<VEOperand>(k_Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<VEOperand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ auto Op = std::make_unique<VEOperand>(k_Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<VEOperand> CreateCCOp(unsigned CCVal, SMLoc S,
+ SMLoc E) {
+ auto Op = std::make_unique<VEOperand>(k_CCOp);
+ Op->CC.CCVal = CCVal;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<VEOperand> CreateRDOp(unsigned RDVal, SMLoc S,
+ SMLoc E) {
+ auto Op = std::make_unique<VEOperand>(k_RDOp);
+ Op->RD.RDVal = RDVal;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<VEOperand> CreateMImm(const MCExpr *Val, bool Flag,
+ SMLoc S, SMLoc E) {
+ auto Op = std::make_unique<VEOperand>(k_MImmOp);
+ Op->MImm.Val = Val;
+ Op->MImm.M0Flag = Flag;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static bool MorphToI32Reg(VEOperand &Op) {
+ unsigned Reg = Op.getReg();
+ unsigned regIdx = Reg - VE::SX0;
+ if (regIdx > 63)
+ return false;
+ Op.Reg.RegNum = I32Regs[regIdx];
+ return true;
+ }
+
+ static bool MorphToF32Reg(VEOperand &Op) {
+ unsigned Reg = Op.getReg();
+ unsigned regIdx = Reg - VE::SX0;
+ if (regIdx > 63)
+ return false;
+ Op.Reg.RegNum = F32Regs[regIdx];
+ return true;
+ }
+
+ static bool MorphToF128Reg(VEOperand &Op) {
+ unsigned Reg = Op.getReg();
+ unsigned regIdx = Reg - VE::SX0;
+ if (regIdx % 2 || regIdx > 63)
+ return false;
+ Op.Reg.RegNum = F128Regs[regIdx / 2];
+ return true;
+ }
+
+ static bool MorphToMISCReg(VEOperand &Op) {
+ const auto *ConstExpr = dyn_cast<MCConstantExpr>(Op.getImm());
+ if (!ConstExpr)
+ return false;
+ unsigned regIdx = ConstExpr->getValue();
+ if (regIdx > 31 || MISCRegs[regIdx] == VE::NoRegister)
+ return false;
+ Op.Kind = k_Register;
+ Op.Reg.RegNum = MISCRegs[regIdx];
+ return true;
+ }
+
+ static std::unique_ptr<VEOperand>
+ MorphToMEMri(unsigned Base, std::unique_ptr<VEOperand> Op) {
+ const MCExpr *Imm = Op->getImm();
+ Op->Kind = k_MemoryRegImm;
+ Op->Mem.Base = Base;
+ Op->Mem.IndexReg = 0;
+ Op->Mem.Index = nullptr;
+ Op->Mem.Offset = Imm;
+ return Op;
+ }
+
+ static std::unique_ptr<VEOperand>
+ MorphToMEMzi(std::unique_ptr<VEOperand> Op) {
+ const MCExpr *Imm = Op->getImm();
+ Op->Kind = k_MemoryZeroImm;
+ Op->Mem.Base = 0;
+ Op->Mem.IndexReg = 0;
+ Op->Mem.Index = nullptr;
+ Op->Mem.Offset = Imm;
+ return Op;
+ }
+
+ static std::unique_ptr<VEOperand>
+ MorphToMEMrri(unsigned Base, unsigned Index, std::unique_ptr<VEOperand> Op) {
+ const MCExpr *Imm = Op->getImm();
+ Op->Kind = k_MemoryRegRegImm;
+ Op->Mem.Base = Base;
+ Op->Mem.IndexReg = Index;
+ Op->Mem.Index = nullptr;
+ Op->Mem.Offset = Imm;
+ return Op;
+ }
+
+ static std::unique_ptr<VEOperand>
+ MorphToMEMrii(unsigned Base, const MCExpr *Index,
+ std::unique_ptr<VEOperand> Op) {
+ const MCExpr *Imm = Op->getImm();
+ Op->Kind = k_MemoryRegImmImm;
+ Op->Mem.Base = Base;
+ Op->Mem.IndexReg = 0;
+ Op->Mem.Index = Index;
+ Op->Mem.Offset = Imm;
+ return Op;
+ }
+
+ static std::unique_ptr<VEOperand>
+ MorphToMEMzri(unsigned Index, std::unique_ptr<VEOperand> Op) {
+ const MCExpr *Imm = Op->getImm();
+ Op->Kind = k_MemoryZeroRegImm;
+ Op->Mem.Base = 0;
+ Op->Mem.IndexReg = Index;
+ Op->Mem.Index = nullptr;
+ Op->Mem.Offset = Imm;
+ return Op;
+ }
+
+ static std::unique_ptr<VEOperand>
+ MorphToMEMzii(const MCExpr *Index, std::unique_ptr<VEOperand> Op) {
+ const MCExpr *Imm = Op->getImm();
+ Op->Kind = k_MemoryZeroImmImm;
+ Op->Mem.Base = 0;
+ Op->Mem.IndexReg = 0;
+ Op->Mem.Index = Index;
+ Op->Mem.Offset = Imm;
+ return Op;
+ }
+};
+
+} // end anonymous namespace
+
+bool VEAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ unsigned MatchResult =
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+ switch (MatchResult) {
+ case Match_Success:
+ Inst.setLoc(IDLoc);
+ Out.emitInstruction(Inst, getSTI());
+ return false;
+
+ case Match_MissingFeature:
+ return Error(IDLoc,
+ "instruction requires a CPU feature not currently enabled");
+
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0ULL) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((VEOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "invalid instruction mnemonic");
+ }
+ llvm_unreachable("Implement any new match types added!");
+}
+
+bool VEAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ return Error(StartLoc, "invalid register name");
+ return false;
+}
+
+/// Parses a register name using a given matching function.
+/// Checks for lowercase or uppercase if necessary.
+int VEAsmParser::parseRegisterName(unsigned (*matchFn)(StringRef)) {
+ StringRef Name = Parser.getTok().getString();
+
+ int RegNum = matchFn(Name);
+
+ // GCC supports case insensitive register names. All of the VE registers
+ // are all lower case.
+ if (RegNum == VE::NoRegister) {
+ RegNum = matchFn(Name.lower());
+ }
+
+ return RegNum;
+}
+
+/// Maps from the set of all register names to a register number.
+/// \note Generated by TableGen.
+static unsigned MatchRegisterName(StringRef Name);
+
+/// Maps from the set of all alternative registernames to a register number.
+/// \note Generated by TableGen.
+static unsigned MatchRegisterAltName(StringRef Name);
+
+OperandMatchResultTy
+VEAsmParser::tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ const AsmToken Tok = Parser.getTok();
+ StartLoc = Tok.getLoc();
+ EndLoc = Tok.getEndLoc();
+ RegNo = 0;
+ if (getLexer().getKind() != AsmToken::Percent)
+ return MatchOperand_NoMatch;
+ Parser.Lex();
+
+ RegNo = parseRegisterName(&MatchRegisterName);
+ if (RegNo == VE::NoRegister)
+ RegNo = parseRegisterName(&MatchRegisterAltName);
+
+ if (RegNo != VE::NoRegister) {
+ Parser.Lex();
+ return MatchOperand_Success;
+ }
+
+ getLexer().UnLex(Tok);
+ return MatchOperand_NoMatch;
+}
+
+static StringRef parseCC(StringRef Name, unsigned Prefix, unsigned Suffix,
+ bool IntegerCC, bool OmitCC, SMLoc NameLoc,
+ OperandVector *Operands) {
+ // Parse instructions with a conditional code. For example, 'bne' is
+ // converted into two operands 'b' and 'ne'.
+ StringRef Cond = Name.slice(Prefix, Suffix);
+ VECC::CondCode CondCode =
+ IntegerCC ? stringToVEICondCode(Cond) : stringToVEFCondCode(Cond);
+
+ // If OmitCC is enabled, CC_AT and CC_AF is treated as a part of mnemonic.
+ if (CondCode != VECC::UNKNOWN &&
+ (!OmitCC || (CondCode != VECC::CC_AT && CondCode != VECC::CC_AF))) {
+ StringRef SuffixStr = Name.substr(Suffix);
+ // Push "b".
+ Name = Name.slice(0, Prefix);
+ Operands->push_back(VEOperand::CreateToken(Name, NameLoc));
+ // Push $cond part.
+ SMLoc CondLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Prefix);
+ SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Suffix);
+ Operands->push_back(VEOperand::CreateCCOp(CondCode, CondLoc, SuffixLoc));
+ // push suffix like ".l.t"
+ if (!SuffixStr.empty())
+ Operands->push_back(VEOperand::CreateToken(SuffixStr, SuffixLoc));
+ } else {
+ Operands->push_back(VEOperand::CreateToken(Name, NameLoc));
+ }
+ return Name;
+}
+
+static StringRef parseRD(StringRef Name, unsigned Prefix, SMLoc NameLoc,
+ OperandVector *Operands) {
+ // Parse instructions with a conditional code. For example, 'cvt.w.d.sx.rz'
+ // is converted into two operands 'cvt.w.d.sx' and '.rz'.
+ StringRef RD = Name.substr(Prefix);
+ VERD::RoundingMode RoundingMode = stringToVERD(RD);
+
+ if (RoundingMode != VERD::UNKNOWN) {
+ Name = Name.slice(0, Prefix);
+ // push 1st like `cvt.w.d.sx`
+ Operands->push_back(VEOperand::CreateToken(Name, NameLoc));
+ SMLoc SuffixLoc =
+ SMLoc::getFromPointer(NameLoc.getPointer() + (RD.data() - Name.data()));
+ SMLoc SuffixEnd =
+ SMLoc::getFromPointer(NameLoc.getPointer() + (RD.end() - Name.data()));
+ // push $round if it has rounding mode
+ Operands->push_back(
+ VEOperand::CreateRDOp(RoundingMode, SuffixLoc, SuffixEnd));
+ } else {
+ Operands->push_back(VEOperand::CreateToken(Name, NameLoc));
+ }
+ return Name;
+}
+
+// Split the mnemonic into ASM operand, conditional code and instruction
+// qualifier (half-word, byte).
+StringRef VEAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc,
+ OperandVector *Operands) {
+ // Create the leading tokens for the mnemonic
+ StringRef Mnemonic = Name;
+
+ if (Name[0] == 'b') {
+ // Match b?? or br??.
+ size_t Start = 1;
+ size_t Next = Name.find('.');
+ // Adjust position of CondCode.
+ if (Name.size() > 1 && Name[1] == 'r')
+ Start = 2;
+ // Check suffix.
+ bool ICC = true;
+ if (Next + 1 < Name.size() &&
+ (Name[Next + 1] == 'd' || Name[Next + 1] == 's'))
+ ICC = false;
+ Mnemonic = parseCC(Name, Start, Next, ICC, true, NameLoc, Operands);
+ } else if (Name.startswith("cmov.l.") || Name.startswith("cmov.w.") ||
+ Name.startswith("cmov.d.") || Name.startswith("cmov.s.")) {
+ bool ICC = Name[5] == 'l' || Name[5] == 'w';
+ Mnemonic = parseCC(Name, 7, Name.size(), ICC, false, NameLoc, Operands);
+ } else if (Name.startswith("cvt.w.d.sx") || Name.startswith("cvt.w.d.zx") ||
+ Name.startswith("cvt.w.s.sx") || Name.startswith("cvt.w.s.zx")) {
+ Mnemonic = parseRD(Name, 10, NameLoc, Operands);
+ } else if (Name.startswith("cvt.l.d")) {
+ Mnemonic = parseRD(Name, 7, NameLoc, Operands);
+ } else {
+ Operands->push_back(VEOperand::CreateToken(Mnemonic, NameLoc));
+ }
+
+ return Mnemonic;
+}
+
+static void applyMnemonicAliases(StringRef &Mnemonic,
+ const FeatureBitset &Features,
+ unsigned VariantID);
+
+bool VEAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
+ // If the target architecture uses MnemonicAlias, call it here to parse
+ // operands correctly.
+ applyMnemonicAliases(Name, getAvailableFeatures(), 0);
+
+ // Split name to first token and the rest, e.g. "bgt.l.t" to "b", "gt", and
+ // ".l.t". We treat "b" as a mnemonic, "gt" as first operand, and ".l.t"
+ // as second operand.
+ StringRef Mnemonic = splitMnemonic(Name, NameLoc, &Operands);
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (parseOperand(Operands, Mnemonic) != MatchOperand_Success) {
+ SMLoc Loc = getLexer().getLoc();
+ return Error(Loc, "unexpected token");
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+ // Parse and remember the operand.
+ if (parseOperand(Operands, Mnemonic) != MatchOperand_Success) {
+ SMLoc Loc = getLexer().getLoc();
+ return Error(Loc, "unexpected token");
+ }
+ }
+ }
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ return Error(Loc, "unexpected token");
+ }
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
+bool VEAsmParser::ParseDirective(AsmToken DirectiveID) {
+ // Let the MC layer to handle other directives.
+ return true;
+}
+
+/// Extract \code @lo32/@hi32/etc \endcode modifier from expression.
+/// Recursively scan the expression and check for VK_VE_HI32/LO32/etc
+/// symbol variants. If all symbols with modifier use the same
+/// variant, return the corresponding VEMCExpr::VariantKind,
+/// and a modified expression using the default symbol variant.
+/// Otherwise, return NULL.
+const MCExpr *
+VEAsmParser::extractModifierFromExpr(const MCExpr *E,
+ VEMCExpr::VariantKind &Variant) {
+ MCContext &Context = getParser().getContext();
+ Variant = VEMCExpr::VK_VE_None;
+
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return nullptr;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+ switch (SRE->getKind()) {
+ case MCSymbolRefExpr::VK_None:
+ // Use VK_VE_REFLONG to a symbol without modifiers.
+ Variant = VEMCExpr::VK_VE_REFLONG;
+ break;
+ case MCSymbolRefExpr::VK_VE_HI32:
+ Variant = VEMCExpr::VK_VE_HI32;
+ break;
+ case MCSymbolRefExpr::VK_VE_LO32:
+ Variant = VEMCExpr::VK_VE_LO32;
+ break;
+ case MCSymbolRefExpr::VK_VE_PC_HI32:
+ Variant = VEMCExpr::VK_VE_PC_HI32;
+ break;
+ case MCSymbolRefExpr::VK_VE_PC_LO32:
+ Variant = VEMCExpr::VK_VE_PC_LO32;
+ break;
+ case MCSymbolRefExpr::VK_VE_GOT_HI32:
+ Variant = VEMCExpr::VK_VE_GOT_HI32;
+ break;
+ case MCSymbolRefExpr::VK_VE_GOT_LO32:
+ Variant = VEMCExpr::VK_VE_GOT_LO32;
+ break;
+ case MCSymbolRefExpr::VK_VE_GOTOFF_HI32:
+ Variant = VEMCExpr::VK_VE_GOTOFF_HI32;
+ break;
+ case MCSymbolRefExpr::VK_VE_GOTOFF_LO32:
+ Variant = VEMCExpr::VK_VE_GOTOFF_LO32;
+ break;
+ case MCSymbolRefExpr::VK_VE_PLT_HI32:
+ Variant = VEMCExpr::VK_VE_PLT_HI32;
+ break;
+ case MCSymbolRefExpr::VK_VE_PLT_LO32:
+ Variant = VEMCExpr::VK_VE_PLT_LO32;
+ break;
+ case MCSymbolRefExpr::VK_VE_TLS_GD_HI32:
+ Variant = VEMCExpr::VK_VE_TLS_GD_HI32;
+ break;
+ case MCSymbolRefExpr::VK_VE_TLS_GD_LO32:
+ Variant = VEMCExpr::VK_VE_TLS_GD_LO32;
+ break;
+ case MCSymbolRefExpr::VK_VE_TPOFF_HI32:
+ Variant = VEMCExpr::VK_VE_TPOFF_HI32;
+ break;
+ case MCSymbolRefExpr::VK_VE_TPOFF_LO32:
+ Variant = VEMCExpr::VK_VE_TPOFF_LO32;
+ break;
+ default:
+ return nullptr;
+ }
+
+ return MCSymbolRefExpr::create(&SRE->getSymbol(), Context);
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+ const MCExpr *Sub = extractModifierFromExpr(UE->getSubExpr(), Variant);
+ if (!Sub)
+ return nullptr;
+ return MCUnaryExpr::create(UE->getOpcode(), Sub, Context);
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ VEMCExpr::VariantKind LHSVariant, RHSVariant;
+ const MCExpr *LHS = extractModifierFromExpr(BE->getLHS(), LHSVariant);
+ const MCExpr *RHS = extractModifierFromExpr(BE->getRHS(), RHSVariant);
+
+ if (!LHS && !RHS)
+ return nullptr;
+
+ if (!LHS)
+ LHS = BE->getLHS();
+ if (!RHS)
+ RHS = BE->getRHS();
+
+ if (LHSVariant == VEMCExpr::VK_VE_None)
+ Variant = RHSVariant;
+ else if (RHSVariant == VEMCExpr::VK_VE_None)
+ Variant = LHSVariant;
+ else if (LHSVariant == RHSVariant)
+ Variant = LHSVariant;
+ else
+ return nullptr;
+
+ return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context);
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+const MCExpr *VEAsmParser::fixupVariantKind(const MCExpr *E) {
+ MCContext &Context = getParser().getContext();
+
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ case MCExpr::SymbolRef:
+ return E;
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+ const MCExpr *Sub = fixupVariantKind(UE->getSubExpr());
+ if (Sub == UE->getSubExpr())
+ return E;
+ return MCUnaryExpr::create(UE->getOpcode(), Sub, Context);
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ const MCExpr *LHS = fixupVariantKind(BE->getLHS());
+ const MCExpr *RHS = fixupVariantKind(BE->getRHS());
+ if (LHS == BE->getLHS() && RHS == BE->getRHS())
+ return E;
+ return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context);
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+/// ParseExpression. This differs from the default "parseExpression" in that
+/// it handles modifiers.
+bool VEAsmParser::parseExpression(const MCExpr *&EVal) {
+ // Handle \code symbol @lo32/@hi32/etc \endcode.
+ if (getParser().parseExpression(EVal))
+ return true;
+
+ // Convert MCSymbolRefExpr with VK_* to MCExpr with VK_*.
+ EVal = fixupVariantKind(EVal);
+ VEMCExpr::VariantKind Variant;
+ const MCExpr *E = extractModifierFromExpr(EVal, Variant);
+ if (E)
+ EVal = VEMCExpr::create(Variant, E, getParser().getContext());
+
+ return false;
+}
+
+OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) {
+ LLVM_DEBUG(dbgs() << "parseMEMOperand\n");
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ SMLoc E = Tok.getEndLoc();
+ // Parse ASX format
+ // disp
+ // disp(, base)
+ // disp(index)
+ // disp(index, base)
+ // (, base)
+ // (index)
+ // (index, base)
+
+ std::unique_ptr<VEOperand> Offset;
+ switch (getLexer().getKind()) {
+ default:
+ return MatchOperand_NoMatch;
+
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ case AsmToken::Dot:
+ case AsmToken::Identifier: {
+ const MCExpr *EVal;
+ if (!parseExpression(EVal))
+ Offset = VEOperand::CreateImm(EVal, S, E);
+ else
+ return MatchOperand_NoMatch;
+ break;
+ }
+
+ case AsmToken::LParen:
+ // empty disp (= 0)
+ Offset =
+ VEOperand::CreateImm(MCConstantExpr::create(0, getContext()), S, E);
+ break;
+ }
+
+ switch (getLexer().getKind()) {
+ default:
+ return MatchOperand_ParseFail;
+
+ case AsmToken::EndOfStatement:
+ Operands.push_back(VEOperand::MorphToMEMzii(
+ MCConstantExpr::create(0, getContext()), std::move(Offset)));
+ return MatchOperand_Success;
+
+ case AsmToken::LParen:
+ Parser.Lex(); // Eat the (
+ break;
+ }
+
+ const MCExpr *IndexValue = nullptr;
+ unsigned IndexReg = 0;
+
+ switch (getLexer().getKind()) {
+ default:
+ if (ParseRegister(IndexReg, S, E))
+ return MatchOperand_ParseFail;
+ break;
+
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ case AsmToken::Dot:
+ if (getParser().parseExpression(IndexValue, E))
+ return MatchOperand_ParseFail;
+ break;
+
+ case AsmToken::Comma:
+ // empty index
+ IndexValue = MCConstantExpr::create(0, getContext());
+ break;
+ }
+
+ switch (getLexer().getKind()) {
+ default:
+ return MatchOperand_ParseFail;
+
+ case AsmToken::RParen:
+ Parser.Lex(); // Eat the )
+ Operands.push_back(
+ IndexValue ? VEOperand::MorphToMEMzii(IndexValue, std::move(Offset))
+ : VEOperand::MorphToMEMzri(IndexReg, std::move(Offset)));
+ return MatchOperand_Success;
+
+ case AsmToken::Comma:
+ Parser.Lex(); // Eat the ,
+ break;
+ }
+
+ unsigned BaseReg = 0;
+ if (ParseRegister(BaseReg, S, E))
+ return MatchOperand_ParseFail;
+
+ if (!Parser.getTok().is(AsmToken::RParen))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex(); // Eat the )
+ Operands.push_back(
+ IndexValue
+ ? VEOperand::MorphToMEMrii(BaseReg, IndexValue, std::move(Offset))
+ : VEOperand::MorphToMEMrri(BaseReg, IndexReg, std::move(Offset)));
+
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy VEAsmParser::parseMEMAsOperand(OperandVector &Operands) {
+ LLVM_DEBUG(dbgs() << "parseMEMAsOperand\n");
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+ SMLoc E = Tok.getEndLoc();
+ // Parse AS format
+ // disp
+ // disp(, base)
+ // disp(base)
+ // disp()
+ // (, base)
+ // (base)
+ // base
+
+ unsigned BaseReg = VE::NoRegister;
+ std::unique_ptr<VEOperand> Offset;
+ switch (getLexer().getKind()) {
+ default:
+ return MatchOperand_NoMatch;
+
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ case AsmToken::Dot:
+ case AsmToken::Identifier: {
+ const MCExpr *EVal;
+ if (!parseExpression(EVal))
+ Offset = VEOperand::CreateImm(EVal, S, E);
+ else
+ return MatchOperand_NoMatch;
+ break;
+ }
+
+ case AsmToken::Percent:
+ if (ParseRegister(BaseReg, S, E))
+ return MatchOperand_NoMatch;
+ Offset =
+ VEOperand::CreateImm(MCConstantExpr::create(0, getContext()), S, E);
+ break;
+
+ case AsmToken::LParen:
+ // empty disp (= 0)
+ Offset =
+ VEOperand::CreateImm(MCConstantExpr::create(0, getContext()), S, E);
+ break;
+ }
+
+ switch (getLexer().getKind()) {
+ default:
+ return MatchOperand_ParseFail;
+
+ case AsmToken::EndOfStatement:
+ case AsmToken::Comma:
+ Operands.push_back(BaseReg != VE::NoRegister
+ ? VEOperand::MorphToMEMri(BaseReg, std::move(Offset))
+ : VEOperand::MorphToMEMzi(std::move(Offset)));
+ return MatchOperand_Success;
+
+ case AsmToken::LParen:
+ if (BaseReg != VE::NoRegister)
+ return MatchOperand_ParseFail;
+ Parser.Lex(); // Eat the (
+ break;
+ }
+
+ switch (getLexer().getKind()) {
+ default:
+ if (ParseRegister(BaseReg, S, E))
+ return MatchOperand_ParseFail;
+ break;
+
+ case AsmToken::Comma:
+ Parser.Lex(); // Eat the ,
+ if (ParseRegister(BaseReg, S, E))
+ return MatchOperand_ParseFail;
+ break;
+
+ case AsmToken::RParen:
+ break;
+ }
+
+ if (!Parser.getTok().is(AsmToken::RParen))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex(); // Eat the )
+ Operands.push_back(BaseReg != VE::NoRegister
+ ? VEOperand::MorphToMEMri(BaseReg, std::move(Offset))
+ : VEOperand::MorphToMEMzi(std::move(Offset)));
+
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy VEAsmParser::parseMImmOperand(OperandVector &Operands) {
+ LLVM_DEBUG(dbgs() << "parseMImmOperand\n");
+
+ // Parsing "(" + number + ")0/1"
+ const AsmToken Tok1 = Parser.getTok();
+ if (!Tok1.is(AsmToken::LParen))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat the '('.
+
+ const AsmToken Tok2 = Parser.getTok();
+ SMLoc E;
+ const MCExpr *EVal;
+ if (!Tok2.is(AsmToken::Integer) || getParser().parseExpression(EVal, E)) {
+ getLexer().UnLex(Tok1);
+ return MatchOperand_NoMatch;
+ }
+
+ const AsmToken Tok3 = Parser.getTok();
+ if (!Tok3.is(AsmToken::RParen)) {
+ getLexer().UnLex(Tok2);
+ getLexer().UnLex(Tok1);
+ return MatchOperand_NoMatch;
+ }
+ Parser.Lex(); // Eat the ')'.
+
+ const AsmToken &Tok4 = Parser.getTok();
+ StringRef Suffix = Tok4.getString();
+ if (Suffix != "1" && Suffix != "0") {
+ getLexer().UnLex(Tok3);
+ getLexer().UnLex(Tok2);
+ getLexer().UnLex(Tok1);
+ return MatchOperand_NoMatch;
+ }
+ Parser.Lex(); // Eat the value.
+ SMLoc EndLoc = SMLoc::getFromPointer(Suffix.end());
+ Operands.push_back(
+ VEOperand::CreateMImm(EVal, Suffix == "0", Tok1.getLoc(), EndLoc));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands,
+ StringRef Mnemonic) {
+ LLVM_DEBUG(dbgs() << "parseOperand\n");
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
+ return ResTy;
+
+ switch (getLexer().getKind()) {
+ case AsmToken::LParen:
+ // FIXME: Parsing "(" + %vreg + ", " + %vreg + ")"
+ // FALLTHROUGH
+ default: {
+ std::unique_ptr<VEOperand> Op;
+ ResTy = parseVEAsmOperand(Op);
+ if (ResTy != MatchOperand_Success || !Op)
+ return MatchOperand_ParseFail;
+
+ // Push the parsed operand into the list of operands
+ Operands.push_back(std::move(Op));
+
+ if (!Parser.getTok().is(AsmToken::LParen))
+ break;
+
+ // FIXME: Parsing %vec-reg + "(" + %sclar-reg/number + ")"
+ break;
+ }
+ }
+
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy
+VEAsmParser::parseVEAsmOperand(std::unique_ptr<VEOperand> &Op) {
+ LLVM_DEBUG(dbgs() << "parseVEAsmOperand\n");
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ const MCExpr *EVal;
+
+ Op = nullptr;
+ switch (getLexer().getKind()) {
+ default:
+ break;
+
+ case AsmToken::Percent:
+ unsigned RegNo;
+ if (tryParseRegister(RegNo, S, E) == MatchOperand_Success)
+ Op = VEOperand::CreateReg(RegNo, S, E);
+ break;
+
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ case AsmToken::Dot:
+ case AsmToken::Identifier:
+ if (!parseExpression(EVal))
+ Op = VEOperand::CreateImm(EVal, S, E);
+ break;
+ }
+ return (Op) ? MatchOperand_Success : MatchOperand_ParseFail;
+}
+
+// Force static initialization.
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEAsmParser() {
+ RegisterMCAsmParser<VEAsmParser> A(getTheVETarget());
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "VEGenAsmMatcher.inc"
+
+unsigned VEAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp,
+ unsigned Kind) {
+ VEOperand &Op = (VEOperand &)GOp;
+
+ // VE uses identical register name for all registers like both
+ // F32 and I32 uses "%s23". Need to convert the name of them
+ // for validation.
+ switch (Kind) {
+ default:
+ break;
+ case MCK_F32:
+ if (Op.isReg() && VEOperand::MorphToF32Reg(Op))
+ return MCTargetAsmParser::Match_Success;
+ break;
+ case MCK_I32:
+ if (Op.isReg() && VEOperand::MorphToI32Reg(Op))
+ return MCTargetAsmParser::Match_Success;
+ break;
+ case MCK_F128:
+ if (Op.isReg() && VEOperand::MorphToF128Reg(Op))
+ return MCTargetAsmParser::Match_Success;
+ break;
+ case MCK_MISC:
+ if (Op.isImm() && VEOperand::MorphToMISCReg(Op))
+ return MCTargetAsmParser::Match_Success;
+ break;
+ }
+ return Match_InvalidOperand;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
new file mode 100644
index 000000000000..35885a4e3cae
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
@@ -0,0 +1,560 @@
+//===- VEDisassembler.cpp - Disassembler for VE -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the VE Disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/VEMCTargetDesc.h"
+#include "TargetInfo/VETargetInfo.h"
+#include "VE.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ve-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+
+/// A disassembler class for VE.
+class VEDisassembler : public MCDisassembler {
+public:
+ VEDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
+ virtual ~VEDisassembler() {}
+
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &CStream) const override;
+};
+} // namespace
+
+static MCDisassembler *createVEDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new VEDisassembler(STI, Ctx);
+}
+
+extern "C" void LLVMInitializeVEDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(getTheVETarget(),
+ createVEDisassembler);
+}
+
+static const unsigned I32RegDecoderTable[] = {
+ VE::SW0, VE::SW1, VE::SW2, VE::SW3, VE::SW4, VE::SW5, VE::SW6,
+ VE::SW7, VE::SW8, VE::SW9, VE::SW10, VE::SW11, VE::SW12, VE::SW13,
+ VE::SW14, VE::SW15, VE::SW16, VE::SW17, VE::SW18, VE::SW19, VE::SW20,
+ VE::SW21, VE::SW22, VE::SW23, VE::SW24, VE::SW25, VE::SW26, VE::SW27,
+ VE::SW28, VE::SW29, VE::SW30, VE::SW31, VE::SW32, VE::SW33, VE::SW34,
+ VE::SW35, VE::SW36, VE::SW37, VE::SW38, VE::SW39, VE::SW40, VE::SW41,
+ VE::SW42, VE::SW43, VE::SW44, VE::SW45, VE::SW46, VE::SW47, VE::SW48,
+ VE::SW49, VE::SW50, VE::SW51, VE::SW52, VE::SW53, VE::SW54, VE::SW55,
+ VE::SW56, VE::SW57, VE::SW58, VE::SW59, VE::SW60, VE::SW61, VE::SW62,
+ VE::SW63};
+
+static const unsigned I64RegDecoderTable[] = {
+ VE::SX0, VE::SX1, VE::SX2, VE::SX3, VE::SX4, VE::SX5, VE::SX6,
+ VE::SX7, VE::SX8, VE::SX9, VE::SX10, VE::SX11, VE::SX12, VE::SX13,
+ VE::SX14, VE::SX15, VE::SX16, VE::SX17, VE::SX18, VE::SX19, VE::SX20,
+ VE::SX21, VE::SX22, VE::SX23, VE::SX24, VE::SX25, VE::SX26, VE::SX27,
+ VE::SX28, VE::SX29, VE::SX30, VE::SX31, VE::SX32, VE::SX33, VE::SX34,
+ VE::SX35, VE::SX36, VE::SX37, VE::SX38, VE::SX39, VE::SX40, VE::SX41,
+ VE::SX42, VE::SX43, VE::SX44, VE::SX45, VE::SX46, VE::SX47, VE::SX48,
+ VE::SX49, VE::SX50, VE::SX51, VE::SX52, VE::SX53, VE::SX54, VE::SX55,
+ VE::SX56, VE::SX57, VE::SX58, VE::SX59, VE::SX60, VE::SX61, VE::SX62,
+ VE::SX63};
+
+static const unsigned F32RegDecoderTable[] = {
+ VE::SF0, VE::SF1, VE::SF2, VE::SF3, VE::SF4, VE::SF5, VE::SF6,
+ VE::SF7, VE::SF8, VE::SF9, VE::SF10, VE::SF11, VE::SF12, VE::SF13,
+ VE::SF14, VE::SF15, VE::SF16, VE::SF17, VE::SF18, VE::SF19, VE::SF20,
+ VE::SF21, VE::SF22, VE::SF23, VE::SF24, VE::SF25, VE::SF26, VE::SF27,
+ VE::SF28, VE::SF29, VE::SF30, VE::SF31, VE::SF32, VE::SF33, VE::SF34,
+ VE::SF35, VE::SF36, VE::SF37, VE::SF38, VE::SF39, VE::SF40, VE::SF41,
+ VE::SF42, VE::SF43, VE::SF44, VE::SF45, VE::SF46, VE::SF47, VE::SF48,
+ VE::SF49, VE::SF50, VE::SF51, VE::SF52, VE::SF53, VE::SF54, VE::SF55,
+ VE::SF56, VE::SF57, VE::SF58, VE::SF59, VE::SF60, VE::SF61, VE::SF62,
+ VE::SF63};
+
+static const unsigned F128RegDecoderTable[] = {
+ VE::Q0, VE::Q1, VE::Q2, VE::Q3, VE::Q4, VE::Q5, VE::Q6, VE::Q7,
+ VE::Q8, VE::Q9, VE::Q10, VE::Q11, VE::Q12, VE::Q13, VE::Q14, VE::Q15,
+ VE::Q16, VE::Q17, VE::Q18, VE::Q19, VE::Q20, VE::Q21, VE::Q22, VE::Q23,
+ VE::Q24, VE::Q25, VE::Q26, VE::Q27, VE::Q28, VE::Q29, VE::Q30, VE::Q31};
+
+static const unsigned MiscRegDecoderTable[] = {
+ VE::USRCC, VE::PSW, VE::SAR, VE::NoRegister,
+ VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::PMMR,
+ VE::PMCR0, VE::PMCR1, VE::PMCR2, VE::PMCR3,
+ VE::NoRegister, VE::NoRegister, VE::NoRegister, VE::NoRegister,
+ VE::PMC0, VE::PMC1, VE::PMC2, VE::PMC3,
+ VE::PMC4, VE::PMC5, VE::PMC6, VE::PMC7,
+ VE::PMC8, VE::PMC9, VE::PMC10, VE::PMC11,
+ VE::PMC12, VE::PMC13, VE::PMC14};
+
+static DecodeStatus DecodeI32RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 63)
+ return MCDisassembler::Fail;
+ unsigned Reg = I32RegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeI64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 63)
+ return MCDisassembler::Fail;
+ unsigned Reg = I64RegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeF32RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 63)
+ return MCDisassembler::Fail;
+ unsigned Reg = F32RegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeF128RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo % 2 || RegNo > 63)
+ return MCDisassembler::Fail;
+ unsigned Reg = F128RegDecoderTable[RegNo / 2];
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMISCRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 30)
+ return MCDisassembler::Fail;
+ unsigned Reg = MiscRegDecoderTable[RegNo];
+ if (Reg == VE::NoRegister)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeASX(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeLoadI32(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeStoreI32(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLoadI64(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeStoreI64(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLoadF32(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeStoreF32(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLoadASI64(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeStoreASI64(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTS1AMI64(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTS1AMI32(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCASI64(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeCASI32(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeCall(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeSIMM7(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeSIMM32(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeCCOperand(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeRDOperand(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBranchCondition(MCInst &Inst, uint64_t insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeBranchConditionAlways(MCInst &Inst, uint64_t insn,
+ uint64_t Address,
+ const void *Decoder);
+
+#include "VEGenDisassemblerTables.inc"
+
+/// Read four bytes from the ArrayRef and return 32 bit word.
+static DecodeStatus readInstruction64(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &Size, uint64_t &Insn,
+ bool IsLittleEndian) {
+ // We want to read exactly 8 Bytes of data.
+ if (Bytes.size() < 8) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ Insn = IsLittleEndian
+ ? ((uint64_t)Bytes[0] << 0) | ((uint64_t)Bytes[1] << 8) |
+ ((uint64_t)Bytes[2] << 16) | ((uint64_t)Bytes[3] << 24) |
+ ((uint64_t)Bytes[4] << 32) | ((uint64_t)Bytes[5] << 40) |
+ ((uint64_t)Bytes[6] << 48) | ((uint64_t)Bytes[7] << 56)
+ : ((uint64_t)Bytes[7] << 0) | ((uint64_t)Bytes[6] << 8) |
+ ((uint64_t)Bytes[5] << 16) | ((uint64_t)Bytes[4] << 24) |
+ ((uint64_t)Bytes[3] << 32) | ((uint64_t)Bytes[2] << 40) |
+ ((uint64_t)Bytes[1] << 48) | ((uint64_t)Bytes[0] << 56);
+
+ return MCDisassembler::Success;
+}
+
+DecodeStatus VEDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CStream) const {
+ uint64_t Insn;
+ bool isLittleEndian = getContext().getAsmInfo()->isLittleEndian();
+ DecodeStatus Result =
+ readInstruction64(Bytes, Address, Size, Insn, isLittleEndian);
+ if (Result == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ // Calling the auto-generated decoder function.
+
+ Result = decodeInstruction(DecoderTableVE64, Instr, Insn, Address, this, STI);
+
+ if (Result != MCDisassembler::Fail) {
+ Size = 8;
+ return Result;
+ }
+
+ return MCDisassembler::Fail;
+}
+
+typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeASX(MCInst &MI, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned sy = fieldFromInstruction(insn, 40, 7);
+ bool cy = fieldFromInstruction(insn, 47, 1);
+ unsigned sz = fieldFromInstruction(insn, 32, 7);
+ bool cz = fieldFromInstruction(insn, 39, 1);
+ uint64_t simm32 = SignExtend64<32>(fieldFromInstruction(insn, 0, 32));
+ DecodeStatus status;
+
+ // Decode sz.
+ if (cz) {
+ status = DecodeI64RegisterClass(MI, sz, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+ } else {
+ MI.addOperand(MCOperand::createImm(0));
+ }
+
+ // Decode sy.
+ if (cy) {
+ status = DecodeI64RegisterClass(MI, sy, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+ } else {
+ MI.addOperand(MCOperand::createImm(SignExtend32<7>(sy)));
+ }
+
+ // Decode simm32.
+ MI.addOperand(MCOperand::createImm(simm32));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAS(MCInst &MI, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned sz = fieldFromInstruction(insn, 32, 7);
+ bool cz = fieldFromInstruction(insn, 39, 1);
+ uint64_t simm32 = SignExtend64<32>(fieldFromInstruction(insn, 0, 32));
+ DecodeStatus status;
+
+ // Decode sz.
+ if (cz) {
+ status = DecodeI64RegisterClass(MI, sz, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+ } else {
+ MI.addOperand(MCOperand::createImm(0));
+ }
+
+ // Decode simm32.
+ MI.addOperand(MCOperand::createImm(simm32));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMem(MCInst &MI, uint64_t insn, uint64_t Address,
+ const void *Decoder, bool isLoad,
+ DecodeFunc DecodeSX) {
+ unsigned sx = fieldFromInstruction(insn, 48, 7);
+
+ DecodeStatus status;
+ if (isLoad) {
+ status = DecodeSX(MI, sx, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+ }
+
+ status = DecodeASX(MI, insn, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+
+ if (!isLoad) {
+ status = DecodeSX(MI, sx, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+ }
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemAS(MCInst &MI, uint64_t insn, uint64_t Address,
+ const void *Decoder, bool isLoad,
+ DecodeFunc DecodeSX) {
+ unsigned sx = fieldFromInstruction(insn, 48, 7);
+
+ DecodeStatus status;
+ if (isLoad) {
+ status = DecodeSX(MI, sx, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+ }
+
+ status = DecodeAS(MI, insn, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+
+ if (!isLoad) {
+ status = DecodeSX(MI, sx, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+ }
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLoadI32(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI32RegisterClass);
+}
+
+static DecodeStatus DecodeStoreI32(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder) {
+ return DecodeMem(Inst, insn, Address, Decoder, false, DecodeI32RegisterClass);
+}
+
+static DecodeStatus DecodeLoadI64(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeStoreI64(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder) {
+ return DecodeMem(Inst, insn, Address, Decoder, false, DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeLoadF32(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ return DecodeMem(Inst, insn, Address, Decoder, true, DecodeF32RegisterClass);
+}
+
+static DecodeStatus DecodeStoreF32(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder) {
+ return DecodeMem(Inst, insn, Address, Decoder, false, DecodeF32RegisterClass);
+}
+
+static DecodeStatus DecodeLoadASI64(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder) {
+ return DecodeMemAS(Inst, insn, Address, Decoder, true,
+ DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeStoreASI64(MCInst &Inst, uint64_t insn,
+ uint64_t Address, const void *Decoder) {
+ return DecodeMemAS(Inst, insn, Address, Decoder, false,
+ DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeCAS(MCInst &MI, uint64_t insn, uint64_t Address,
+ const void *Decoder, bool isImmOnly, bool isUImm,
+ DecodeFunc DecodeSX) {
+ unsigned sx = fieldFromInstruction(insn, 48, 7);
+ bool cy = fieldFromInstruction(insn, 47, 1);
+ unsigned sy = fieldFromInstruction(insn, 40, 7);
+
+ // Add $sx.
+ DecodeStatus status;
+ status = DecodeSX(MI, sx, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+
+ // Add $disp($sz).
+ status = DecodeAS(MI, insn, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+
+ // Add $sy.
+ if (cy && !isImmOnly) {
+ status = DecodeSX(MI, sy, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+ } else {
+ if (isUImm)
+ MI.addOperand(MCOperand::createImm(sy));
+ else
+ MI.addOperand(MCOperand::createImm(SignExtend32<7>(sy)));
+ }
+
+ // Add $sd.
+ status = DecodeSX(MI, sx, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeTS1AMI64(MCInst &MI, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ return DecodeCAS(MI, insn, Address, Decoder, false, true,
+ DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeTS1AMI32(MCInst &MI, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ return DecodeCAS(MI, insn, Address, Decoder, false, true,
+ DecodeI32RegisterClass);
+}
+
+static DecodeStatus DecodeCASI64(MCInst &MI, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ return DecodeCAS(MI, insn, Address, Decoder, false, false,
+ DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeCASI32(MCInst &MI, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ return DecodeCAS(MI, insn, Address, Decoder, false, false,
+ DecodeI32RegisterClass);
+}
+
+static DecodeStatus DecodeCall(MCInst &Inst, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI64RegisterClass);
+}
+
+static DecodeStatus DecodeSIMM7(MCInst &MI, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ uint64_t tgt = SignExtend64<7>(insn);
+ MI.addOperand(MCOperand::createImm(tgt));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSIMM32(MCInst &MI, uint64_t insn, uint64_t Address,
+ const void *Decoder) {
+ uint64_t tgt = SignExtend64<32>(insn);
+ MI.addOperand(MCOperand::createImm(tgt));
+ return MCDisassembler::Success;
+}
+
+static bool isIntegerBCKind(MCInst &MI) {
+
+#define BCm_kind(NAME) \
+ case NAME##rri: \
+ case NAME##rzi: \
+ case NAME##iri: \
+ case NAME##izi: \
+ case NAME##rri_nt: \
+ case NAME##rzi_nt: \
+ case NAME##iri_nt: \
+ case NAME##izi_nt: \
+ case NAME##rri_t: \
+ case NAME##rzi_t: \
+ case NAME##iri_t: \
+ case NAME##izi_t:
+
+#define BCRm_kind(NAME) \
+ case NAME##rr: \
+ case NAME##ir: \
+ case NAME##rr_nt: \
+ case NAME##ir_nt: \
+ case NAME##rr_t: \
+ case NAME##ir_t:
+
+ {
+ using namespace llvm::VE;
+ switch (MI.getOpcode()) {
+ BCm_kind(BCFL) BCm_kind(BCFW) BCRm_kind(BRCFL)
+ BCRm_kind(BRCFW) return true;
+ }
+ }
+#undef BCm_kind
+
+ return false;
+}
+
+// Decode CC Operand field.
+static DecodeStatus DecodeCCOperand(MCInst &MI, uint64_t cf, uint64_t Address,
+ const void *Decoder) {
+ MI.addOperand(MCOperand::createImm(VEValToCondCode(cf, isIntegerBCKind(MI))));
+ return MCDisassembler::Success;
+}
+
+// Decode RD Operand field.
+static DecodeStatus DecodeRDOperand(MCInst &MI, uint64_t cf, uint64_t Address,
+ const void *Decoder) {
+ MI.addOperand(MCOperand::createImm(VEValToRD(cf)));
+ return MCDisassembler::Success;
+}
+
+// Decode branch condition instruction and CCOperand field in it.
+static DecodeStatus DecodeBranchCondition(MCInst &MI, uint64_t insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned cf = fieldFromInstruction(insn, 48, 4);
+ bool cy = fieldFromInstruction(insn, 47, 1);
+ unsigned sy = fieldFromInstruction(insn, 40, 7);
+
+ // Decode cf.
+ MI.addOperand(MCOperand::createImm(VEValToCondCode(cf, isIntegerBCKind(MI))));
+
+ // Decode sy.
+ DecodeStatus status;
+ if (cy) {
+ status = DecodeI64RegisterClass(MI, sy, Address, Decoder);
+ if (status != MCDisassembler::Success)
+ return status;
+ } else {
+ MI.addOperand(MCOperand::createImm(SignExtend32<7>(sy)));
+ }
+
+ // Decode MEMri.
+ return DecodeAS(MI, insn, Address, Decoder);
+}
+
+static DecodeStatus DecodeBranchConditionAlways(MCInst &MI, uint64_t insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // Decode MEMri.
+ return DecodeAS(MI, insn, Address, Decoder);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp
deleted file mode 100644
index 4e7bcd36c32a..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-//===-- VEInstPrinter.cpp - Convert VE MCInst to assembly syntax -----------==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an VE MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "VEInstPrinter.h"
-#include "VE.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "ve-asmprinter"
-
-// The generated AsmMatcher VEGenAsmWriter uses "VE" as the target
-// namespace.
-namespace llvm {
-namespace VE {
-using namespace VE;
-}
-} // namespace llvm
-
-#define GET_INSTRUCTION_NAME
-#define PRINT_ALIAS_INSTR
-#include "VEGenAsmWriter.inc"
-
-void VEInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << '%' << StringRef(getRegisterName(RegNo)).lower();
-}
-
-void VEInstPrinter::printInst(const MCInst *MI, uint64_t Address,
- StringRef Annot, const MCSubtargetInfo &STI,
- raw_ostream &OS) {
- if (!printAliasInstr(MI, STI, OS))
- printInstruction(MI, Address, STI, OS);
- printAnnotation(OS, Annot);
-}
-
-void VEInstPrinter::printOperand(const MCInst *MI, int opNum,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(opNum);
-
- if (MO.isReg()) {
- printRegName(O, MO.getReg());
- return;
- }
-
- if (MO.isImm()) {
- switch (MI->getOpcode()) {
- default:
- // Expects signed 32bit literals
- assert(isInt<32>(MO.getImm()) && "Immediate too large");
- int32_t TruncatedImm = static_cast<int32_t>(MO.getImm());
- O << TruncatedImm;
- return;
- }
- }
-
- assert(MO.isExpr() && "Unknown operand kind in printOperand");
- MO.getExpr()->print(O, &MAI);
-}
-
-void VEInstPrinter::printMemASXOperand(const MCInst *MI, int opNum,
- const MCSubtargetInfo &STI,
- raw_ostream &O, const char *Modifier) {
- // If this is an ADD operand, emit it like normal operands.
- if (Modifier && !strcmp(Modifier, "arith")) {
- printOperand(MI, opNum, STI, O);
- O << ", ";
- printOperand(MI, opNum + 1, STI, O);
- return;
- }
-
- const MCOperand &MO = MI->getOperand(opNum + 1);
- if (!MO.isImm() || MO.getImm() != 0) {
- printOperand(MI, opNum + 1, STI, O);
- }
- O << "(,";
- printOperand(MI, opNum, STI, O);
- O << ")";
-}
-
-void VEInstPrinter::printMemASOperand(const MCInst *MI, int opNum,
- const MCSubtargetInfo &STI,
- raw_ostream &O, const char *Modifier) {
- // If this is an ADD operand, emit it like normal operands.
- if (Modifier && !strcmp(Modifier, "arith")) {
- printOperand(MI, opNum, STI, O);
- O << ", ";
- printOperand(MI, opNum + 1, STI, O);
- return;
- }
-
- const MCOperand &MO = MI->getOperand(opNum + 1);
- if (!MO.isImm() || MO.getImm() != 0) {
- printOperand(MI, opNum + 1, STI, O);
- }
- O << "(";
- printOperand(MI, opNum, STI, O);
- O << ")";
-}
-
-void VEInstPrinter::printCCOperand(const MCInst *MI, int opNum,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- int CC = (int)MI->getOperand(opNum).getImm();
- O << VECondCodeToString((VECC::CondCodes)CC);
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
new file mode 100644
index 000000000000..9a6ae90b5c73
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
@@ -0,0 +1,224 @@
+//===-- VEAsmBackend.cpp - VE Assembler Backend ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/VEFixupKinds.h"
+#include "MCTargetDesc/VEMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ case FK_Data_8:
+ case FK_PCRel_1:
+ case FK_PCRel_2:
+ case FK_PCRel_4:
+ case FK_PCRel_8:
+ return Value;
+ case VE::fixup_ve_hi32:
+ case VE::fixup_ve_pc_hi32:
+ case VE::fixup_ve_got_hi32:
+ case VE::fixup_ve_gotoff_hi32:
+ case VE::fixup_ve_plt_hi32:
+ case VE::fixup_ve_tls_gd_hi32:
+ case VE::fixup_ve_tpoff_hi32:
+ return (Value >> 32) & 0xffffffff;
+ case VE::fixup_ve_reflong:
+ case VE::fixup_ve_lo32:
+ case VE::fixup_ve_pc_lo32:
+ case VE::fixup_ve_got_lo32:
+ case VE::fixup_ve_gotoff_lo32:
+ case VE::fixup_ve_plt_lo32:
+ case VE::fixup_ve_tls_gd_lo32:
+ case VE::fixup_ve_tpoff_lo32:
+ return Value & 0xffffffff;
+ }
+}
+
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_PCRel_1:
+ return 1;
+ case FK_Data_2:
+ case FK_PCRel_2:
+ return 2;
+ return 4;
+ case FK_Data_4:
+ case FK_PCRel_4:
+ case VE::fixup_ve_reflong:
+ case VE::fixup_ve_hi32:
+ case VE::fixup_ve_lo32:
+ case VE::fixup_ve_pc_hi32:
+ case VE::fixup_ve_pc_lo32:
+ case VE::fixup_ve_got_hi32:
+ case VE::fixup_ve_got_lo32:
+ case VE::fixup_ve_gotoff_hi32:
+ case VE::fixup_ve_gotoff_lo32:
+ case VE::fixup_ve_plt_hi32:
+ case VE::fixup_ve_plt_lo32:
+ case VE::fixup_ve_tls_gd_hi32:
+ case VE::fixup_ve_tls_gd_lo32:
+ case VE::fixup_ve_tpoff_hi32:
+ case VE::fixup_ve_tpoff_lo32:
+ return 4;
+ case FK_Data_8:
+ case FK_PCRel_8:
+ return 8;
+ }
+}
+
+namespace {
+class VEAsmBackend : public MCAsmBackend {
+protected:
+ const Target &TheTarget;
+
+public:
+ VEAsmBackend(const Target &T) : MCAsmBackend(support::little), TheTarget(T) {}
+
+ unsigned getNumFixupKinds() const override { return VE::NumTargetFixupKinds; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+ const static MCFixupKindInfo Infos[VE::NumTargetFixupKinds] = {
+ // name, offset, bits, flags
+ {"fixup_ve_reflong", 0, 32, 0},
+ {"fixup_ve_hi32", 0, 32, 0},
+ {"fixup_ve_lo32", 0, 32, 0},
+ {"fixup_ve_pc_hi32", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_ve_pc_lo32", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_ve_got_hi32", 0, 32, 0},
+ {"fixup_ve_got_lo32", 0, 32, 0},
+ {"fixup_ve_gotoff_hi32", 0, 32, 0},
+ {"fixup_ve_gotoff_lo32", 0, 32, 0},
+ {"fixup_ve_plt_hi32", 0, 32, 0},
+ {"fixup_ve_plt_lo32", 0, 32, 0},
+ {"fixup_ve_tls_gd_hi32", 0, 32, 0},
+ {"fixup_ve_tls_gd_lo32", 0, 32, 0},
+ {"fixup_ve_tpoff_hi32", 0, 32, 0},
+ {"fixup_ve_tpoff_lo32", 0, 32, 0},
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override {
+ switch ((VE::Fixups)Fixup.getKind()) {
+ default:
+ return false;
+ case VE::fixup_ve_tls_gd_hi32:
+ case VE::fixup_ve_tls_gd_lo32:
+ case VE::fixup_ve_tpoff_hi32:
+ case VE::fixup_ve_tpoff_lo32:
+ return true;
+ }
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst,
+ const MCSubtargetInfo &STI) const override {
+ // Not implemented yet. For example, if we have a branch with
+ // lager than SIMM32 immediate value, we want to relaxation such
+ // branch instructions.
+ return false;
+ }
+
+ /// fixupNeedsRelaxation - Target specific predicate for whether a given
+ /// fixup requires the associated instruction to be relaxed.
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ // Not implemented yet. For example, if we have a branch with
+ // lager than SIMM32 immediate value, we want to relaxation such
+ // branch instructions.
+ return false;
+ }
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override {
+ // Aurora VE doesn't support relaxInstruction yet.
+ llvm_unreachable("relaxInstruction() should not be called");
+ }
+
+ bool writeNopData(raw_ostream &OS, uint64_t Count) const override {
+ if ((Count % 8) != 0)
+ return false;
+
+ for (uint64_t i = 0; i < Count; i += 8)
+ support::endian::write<uint64_t>(OS, 0x7900000000000000ULL,
+ support::little);
+
+ return true;
+ }
+};
+
+class ELFVEAsmBackend : public VEAsmBackend {
+ Triple::OSType OSType;
+
+public:
+ ELFVEAsmBackend(const Target &T, Triple::OSType OSType)
+ : VEAsmBackend(T), OSType(OSType) {}
+
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const override {
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value)
+ return; // Doesn't change encoding.
+
+ MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+
+ // Shift the value into position.
+ Value <<= Info.TargetOffset;
+
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
+ // For each byte of the fragment that the fixup touches, mask in the bits
+ // from the fixup value. The Value has been "split up" into the
+ // appropriate bitfields above.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = Endian == support::little ? i : (NumBytes - 1) - i;
+ Data[Offset + Idx] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
+ }
+ }
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType);
+ return createVEELFObjectWriter(OSABI);
+ }
+};
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createVEAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options) {
+ return new ELFVEAsmBackend(T, STI.getTargetTriple().getOS());
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
new file mode 100644
index 000000000000..741e8320a941
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
@@ -0,0 +1,135 @@
+//===-- VEELFObjectWriter.cpp - VE ELF Writer -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEFixupKinds.h"
+#include "VEMCExpr.h"
+#include "VEMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class VEELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ VEELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/* Is64Bit */ true, OSABI, ELF::EM_VE,
+ /* HasRelocationAddend */ true) {}
+
+ ~VEELFObjectWriter() override {}
+
+protected:
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
+
+ bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ unsigned Type) const override;
+};
+} // namespace
+
+unsigned VEELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ if (const VEMCExpr *SExpr = dyn_cast<VEMCExpr>(Fixup.getValue())) {
+ if (SExpr->getKind() == VEMCExpr::VK_VE_PC_LO32)
+ return ELF::R_VE_PC_LO32;
+ }
+
+ if (IsPCRel) {
+ switch (Fixup.getTargetKind()) {
+ default:
+ llvm_unreachable("Unimplemented fixup -> relocation");
+ case FK_PCRel_1:
+ llvm_unreachable("Unimplemented fixup fk_data_1 -> relocation");
+ case FK_PCRel_2:
+ llvm_unreachable("Unimplemented fixup fk_data_2 -> relocation");
+ // FIXME: relative kind?
+ case FK_PCRel_4:
+ return ELF::R_VE_REFLONG;
+ case FK_PCRel_8:
+ return ELF::R_VE_REFQUAD;
+ case VE::fixup_ve_pc_hi32:
+ return ELF::R_VE_PC_HI32;
+ case VE::fixup_ve_pc_lo32:
+ return ELF::R_VE_PC_LO32;
+ }
+ }
+
+ switch (Fixup.getTargetKind()) {
+ default:
+ llvm_unreachable("Unimplemented fixup -> relocation");
+ case FK_Data_1:
+ llvm_unreachable("Unimplemented fixup fk_data_1 -> relocation");
+ case FK_Data_2:
+ llvm_unreachable("Unimplemented fixup fk_data_2 -> relocation");
+ case FK_Data_4:
+ return ELF::R_VE_REFLONG;
+ case FK_Data_8:
+ return ELF::R_VE_REFQUAD;
+ case VE::fixup_ve_reflong:
+ return ELF::R_VE_REFLONG;
+ case VE::fixup_ve_hi32:
+ return ELF::R_VE_HI32;
+ case VE::fixup_ve_lo32:
+ return ELF::R_VE_LO32;
+ case VE::fixup_ve_pc_hi32:
+ llvm_unreachable("Unimplemented fixup pc_hi32 -> relocation");
+ case VE::fixup_ve_pc_lo32:
+ llvm_unreachable("Unimplemented fixup pc_lo32 -> relocation");
+ case VE::fixup_ve_got_hi32:
+ return ELF::R_VE_GOT_HI32;
+ case VE::fixup_ve_got_lo32:
+ return ELF::R_VE_GOT_LO32;
+ case VE::fixup_ve_gotoff_hi32:
+ return ELF::R_VE_GOTOFF_HI32;
+ case VE::fixup_ve_gotoff_lo32:
+ return ELF::R_VE_GOTOFF_LO32;
+ case VE::fixup_ve_plt_hi32:
+ return ELF::R_VE_PLT_HI32;
+ case VE::fixup_ve_plt_lo32:
+ return ELF::R_VE_PLT_LO32;
+ case VE::fixup_ve_tls_gd_hi32:
+ return ELF::R_VE_TLS_GD_HI32;
+ case VE::fixup_ve_tls_gd_lo32:
+ return ELF::R_VE_TLS_GD_LO32;
+ case VE::fixup_ve_tpoff_hi32:
+ return ELF::R_VE_TPOFF_HI32;
+ case VE::fixup_ve_tpoff_lo32:
+ return ELF::R_VE_TPOFF_LO32;
+ }
+
+ return ELF::R_VE_NONE;
+}
+
+bool VEELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+ unsigned Type) const {
+ switch (Type) {
+ default:
+ return false;
+
+ // All relocations that use a GOT need a symbol, not an offset, as
+ // the offset of the symbol within the section is irrelevant to
+ // where the GOT entry is. Don't need to list all the TLS entries,
+ // as they're all marked as requiring a symbol anyways.
+ case ELF::R_VE_GOT_HI32:
+ case ELF::R_VE_GOT_LO32:
+ case ELF::R_VE_GOTOFF_HI32:
+ case ELF::R_VE_GOTOFF_LO32:
+ case ELF::R_VE_TLS_GD_HI32:
+ case ELF::R_VE_TLS_GD_LO32:
+ return true;
+ }
+}
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createVEELFObjectWriter(uint8_t OSABI) {
+ return std::make_unique<VEELFObjectWriter>(OSABI);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
new file mode 100644
index 000000000000..5d5dc1c5c891
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
@@ -0,0 +1,61 @@
+//===-- VEFixupKinds.h - VE Specific Fixup Entries --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEFIXUPKINDS_H
+#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace VE {
+enum Fixups {
+ /// fixup_ve_reflong - 32-bit fixup corresponding to foo
+ fixup_ve_reflong = FirstTargetFixupKind,
+
+ /// fixup_ve_hi32 - 32-bit fixup corresponding to foo@hi
+ fixup_ve_hi32,
+
+ /// fixup_ve_lo32 - 32-bit fixup corresponding to foo@lo
+ fixup_ve_lo32,
+
+ /// fixup_ve_pc_hi32 - 32-bit fixup corresponding to foo@pc_hi
+ fixup_ve_pc_hi32,
+
+ /// fixup_ve_pc_lo32 - 32-bit fixup corresponding to foo@pc_lo
+ fixup_ve_pc_lo32,
+
+ /// fixup_ve_got_hi32 - 32-bit fixup corresponding to foo@got_hi
+ fixup_ve_got_hi32,
+
+ /// fixup_ve_got_lo32 - 32-bit fixup corresponding to foo@got_lo
+ fixup_ve_got_lo32,
+
+ /// fixup_ve_gotoff_hi32 - 32-bit fixup corresponding to foo@gotoff_hi
+ fixup_ve_gotoff_hi32,
+
+ /// fixup_ve_gotoff_lo32 - 32-bit fixup corresponding to foo@gotoff_lo
+ fixup_ve_gotoff_lo32,
+
+ /// fixup_ve_plt_hi32/lo32
+ fixup_ve_plt_hi32,
+ fixup_ve_plt_lo32,
+
+ /// fixups for Thread Local Storage
+ fixup_ve_tls_gd_hi32,
+ fixup_ve_tls_gd_lo32,
+ fixup_ve_tpoff_hi32,
+ fixup_ve_tpoff_lo32,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // namespace VE
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
new file mode 100644
index 000000000000..1fe9423e01b8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
@@ -0,0 +1,227 @@
+//===-- VEInstPrinter.cpp - Convert VE MCInst to assembly syntax -----------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an VE MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEInstPrinter.h"
+#include "VE.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ve-asmprinter"
+
+// The generated AsmMatcher VEGenAsmWriter uses "VE" as the target
+// namespace.
+namespace llvm {
+namespace VE {
+using namespace VE;
+}
+} // namespace llvm
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "VEGenAsmWriter.inc"
+
+void VEInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ // Generic registers have identical register name among register classes.
+ unsigned AltIdx = VE::AsmName;
+ // Misc registers have each own name, so no use alt-names.
+ if (MRI.getRegClass(VE::MISCRegClassID).contains(RegNo))
+ AltIdx = VE::NoRegAltName;
+ OS << '%' << getRegisterName(RegNo, AltIdx);
+}
+
+void VEInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &OS) {
+ if (!printAliasInstr(MI, Address, STI, OS))
+ printInstruction(MI, Address, STI, OS);
+ printAnnotation(OS, Annot);
+}
+
+void VEInstPrinter::printOperand(const MCInst *MI, int OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ if (MO.isReg()) {
+ printRegName(O, MO.getReg());
+ return;
+ }
+
+ if (MO.isImm()) {
+ switch (MI->getOpcode()) {
+ default:
+ // Expects signed 32bit literals
+ int32_t TruncatedImm = static_cast<int32_t>(MO.getImm());
+ O << TruncatedImm;
+ return;
+ }
+ }
+
+ assert(MO.isExpr() && "Unknown operand kind in printOperand");
+ MO.getExpr()->print(O, &MAI);
+}
+
+void VEInstPrinter::printMemASXOperand(const MCInst *MI, int OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O, const char *Modifier) {
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ printOperand(MI, OpNum, STI, O);
+ O << ", ";
+ printOperand(MI, OpNum + 1, STI, O);
+ return;
+ }
+
+ if (MI->getOperand(OpNum + 2).isImm() &&
+ MI->getOperand(OpNum + 2).getImm() == 0) {
+ // don't print "+0"
+ } else {
+ printOperand(MI, OpNum + 2, STI, O);
+ }
+ if (MI->getOperand(OpNum + 1).isImm() &&
+ MI->getOperand(OpNum + 1).getImm() == 0 &&
+ MI->getOperand(OpNum).isImm() && MI->getOperand(OpNum).getImm() == 0) {
+ if (MI->getOperand(OpNum + 2).isImm() &&
+ MI->getOperand(OpNum + 2).getImm() == 0) {
+ O << "0";
+ } else {
+ // don't print "+0,+0"
+ }
+ } else {
+ O << "(";
+ if (MI->getOperand(OpNum + 1).isImm() &&
+ MI->getOperand(OpNum + 1).getImm() == 0) {
+ // don't print "+0"
+ } else {
+ printOperand(MI, OpNum + 1, STI, O);
+ }
+ if (MI->getOperand(OpNum).isImm() && MI->getOperand(OpNum).getImm() == 0) {
+ // don't print "+0"
+ } else {
+ O << ", ";
+ printOperand(MI, OpNum, STI, O);
+ }
+ O << ")";
+ }
+}
+
+void VEInstPrinter::printMemASOperandASX(const MCInst *MI, int OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O, const char *Modifier) {
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ printOperand(MI, OpNum, STI, O);
+ O << ", ";
+ printOperand(MI, OpNum + 1, STI, O);
+ return;
+ }
+
+ if (MI->getOperand(OpNum + 1).isImm() &&
+ MI->getOperand(OpNum + 1).getImm() == 0) {
+ // don't print "+0"
+ } else {
+ printOperand(MI, OpNum + 1, STI, O);
+ }
+ if (MI->getOperand(OpNum).isImm() && MI->getOperand(OpNum).getImm() == 0) {
+ if (MI->getOperand(OpNum + 1).isImm() &&
+ MI->getOperand(OpNum + 1).getImm() == 0) {
+ O << "0";
+ } else {
+ // don't print "(0)"
+ }
+ } else {
+ O << "(, ";
+ printOperand(MI, OpNum, STI, O);
+ O << ")";
+ }
+}
+
+void VEInstPrinter::printMemASOperandRRM(const MCInst *MI, int OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O, const char *Modifier) {
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ printOperand(MI, OpNum, STI, O);
+ O << ", ";
+ printOperand(MI, OpNum + 1, STI, O);
+ return;
+ }
+
+ if (MI->getOperand(OpNum + 1).isImm() &&
+ MI->getOperand(OpNum + 1).getImm() == 0) {
+ // don't print "+0"
+ } else {
+ printOperand(MI, OpNum + 1, STI, O);
+ }
+ if (MI->getOperand(OpNum).isImm() && MI->getOperand(OpNum).getImm() == 0) {
+ if (MI->getOperand(OpNum + 1).isImm() &&
+ MI->getOperand(OpNum + 1).getImm() == 0) {
+ O << "0";
+ } else {
+ // don't print "(0)"
+ }
+ } else {
+ O << "(";
+ printOperand(MI, OpNum, STI, O);
+ O << ")";
+ }
+}
+
+void VEInstPrinter::printMemASOperandHM(const MCInst *MI, int OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O, const char *Modifier) {
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ printOperand(MI, OpNum, STI, O);
+ O << ", ";
+ printOperand(MI, OpNum + 1, STI, O);
+ return;
+ }
+
+ if (MI->getOperand(OpNum + 1).isImm() &&
+ MI->getOperand(OpNum + 1).getImm() == 0) {
+ // don't print "+0"
+ } else {
+ printOperand(MI, OpNum + 1, STI, O);
+ }
+ O << "(";
+ if (MI->getOperand(OpNum).isReg())
+ printOperand(MI, OpNum, STI, O);
+ O << ")";
+}
+
+void VEInstPrinter::printMImmOperand(const MCInst *MI, int OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ int MImm = (int)MI->getOperand(OpNum).getImm() & 0x7f;
+ if (MImm > 63)
+ O << "(" << MImm - 64 << ")0";
+ else
+ O << "(" << MImm << ")1";
+}
+
+void VEInstPrinter::printCCOperand(const MCInst *MI, int OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ int CC = (int)MI->getOperand(OpNum).getImm();
+ O << VECondCodeToString((VECC::CondCode)CC);
+}
+
+void VEInstPrinter::printRDOperand(const MCInst *MI, int OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ int RD = (int)MI->getOperand(OpNum).getImm();
+ O << VERDToString((VERD::RoundingMode)RD);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
index 05a53d59e878..657cc513b3c5 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_VE_INSTPRINTER_VEINSTPRINTER_H
#define LLVM_LIB_TARGET_VE_INSTPRINTER_VEINSTPRINTER_H
+#include "VEMCTargetDesc.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -28,20 +29,32 @@ public:
const MCSubtargetInfo &STI, raw_ostream &OS) override;
// Autogenerated by tblgen.
- bool printAliasInstr(const MCInst *, const MCSubtargetInfo &, raw_ostream &);
+ bool printAliasInstr(const MCInst *, uint64_t Address,
+ const MCSubtargetInfo &, raw_ostream &);
void printInstruction(const MCInst *, uint64_t, const MCSubtargetInfo &,
raw_ostream &);
- static const char *getRegisterName(unsigned RegNo);
+ static const char *getRegisterName(unsigned RegNo,
+ unsigned AltIdx = VE::NoRegAltName);
- void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ void printOperand(const MCInst *MI, int OpNum, const MCSubtargetInfo &STI,
raw_ostream &OS);
- void printMemASXOperand(const MCInst *MI, int opNum,
+ void printMemASXOperand(const MCInst *MI, int OpNum,
const MCSubtargetInfo &STI, raw_ostream &OS,
const char *Modifier = nullptr);
- void printMemASOperand(const MCInst *MI, int opNum,
- const MCSubtargetInfo &STI, raw_ostream &OS,
- const char *Modifier = nullptr);
- void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ void printMemASOperandASX(const MCInst *MI, int OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &OS,
+ const char *Modifier = nullptr);
+ void printMemASOperandRRM(const MCInst *MI, int OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &OS,
+ const char *Modifier = nullptr);
+ void printMemASOperandHM(const MCInst *MI, int OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &OS,
+ const char *Modifier = nullptr);
+ void printMImmOperand(const MCInst *MI, int OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &OS);
+ void printCCOperand(const MCInst *MI, int OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &OS);
+ void printRDOperand(const MCInst *MI, int OpNum, const MCSubtargetInfo &STI,
raw_ostream &OS);
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
index 9f29fc092c69..76824335239b 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
@@ -37,4 +37,5 @@ VEELFMCAsmInfo::VEELFMCAsmInfo(const Triple &TheTriple) {
UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
+ UseIntegratedAssembler = false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
new file mode 100644
index 000000000000..d50d8fcae9da
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
@@ -0,0 +1,165 @@
+//===-- VEMCCodeEmitter.cpp - Convert VE code to machine code -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VEMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/VEFixupKinds.h"
+#include "VE.h"
+#include "VEMCExpr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+
+class VEMCCodeEmitter : public MCCodeEmitter {
+ const MCInstrInfo &MCII;
+ MCContext &Ctx;
+
+public:
+ VEMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), Ctx(ctx) {}
+ VEMCCodeEmitter(const VEMCCodeEmitter &) = delete;
+ VEMCCodeEmitter &operator=(const VEMCCodeEmitter &) = delete;
+ ~VEMCCodeEmitter() override = default;
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ uint64_t getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getCCOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getRDOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+private:
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
+};
+
+} // end anonymous namespace
+
+void VEMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ verifyInstructionPredicates(MI,
+ computeAvailableFeatures(STI.getFeatureBits()));
+
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+ support::endian::write<uint64_t>(OS, Bits, support::little);
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+unsigned VEMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg())
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+ if (MO.isImm())
+ return MO.getImm();
+
+ assert(MO.isExpr());
+ const MCExpr *Expr = MO.getExpr();
+ if (const VEMCExpr *SExpr = dyn_cast<VEMCExpr>(Expr)) {
+ MCFixupKind Kind = (MCFixupKind)SExpr->getFixupKind();
+ Fixups.push_back(MCFixup::create(0, Expr, Kind));
+ return 0;
+ }
+
+ int64_t Res;
+ if (Expr->evaluateAsAbsolute(Res))
+ return Res;
+
+ llvm_unreachable("Unhandled expression!");
+ return 0;
+}
+
+uint64_t
+VEMCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm())
+ return getMachineOpValue(MI, MO, Fixups, STI);
+
+ Fixups.push_back(
+ MCFixup::create(0, MO.getExpr(), (MCFixupKind)VE::fixup_ve_pc_lo32));
+ return 0;
+}
+
+uint64_t VEMCCodeEmitter::getCCOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return VECondCodeToVal(
+ static_cast<VECC::CondCode>(getMachineOpValue(MI, MO, Fixups, STI)));
+ return 0;
+}
+
+uint64_t VEMCCodeEmitter::getRDOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return VERDToVal(static_cast<VERD::RoundingMode>(
+ getMachineOpValue(MI, MO, Fixups, STI)));
+ return 0;
+}
+
+#define ENABLE_INSTR_PREDICATE_VERIFIER
+#include "VEGenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createVEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new VEMCCodeEmitter(MCII, Ctx);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
new file mode 100644
index 000000000000..a3ce3b3309be
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
@@ -0,0 +1,225 @@
+//===-- VEMCExpr.cpp - VE specific MC expression classes ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the assembly expression modifiers
+// accepted by the VE architecture (e.g. "%hi", "%lo", ...).
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEMCExpr.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/BinaryFormat/ELF.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "vemcexpr"
+
+const VEMCExpr *VEMCExpr::create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) VEMCExpr(Kind, Expr);
+}
+
+void VEMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+
+ bool closeParen = printVariantKind(OS, Kind);
+
+ const MCExpr *Expr = getSubExpr();
+ Expr->print(OS, MAI);
+
+ if (closeParen)
+ OS << ')';
+ printVariantKindSuffix(OS, Kind);
+}
+
+bool VEMCExpr::printVariantKind(raw_ostream &OS, VariantKind Kind) {
+ switch (Kind) {
+ case VK_VE_None:
+ case VK_VE_REFLONG:
+ return false;
+
+ case VK_VE_HI32:
+ case VK_VE_LO32:
+ case VK_VE_PC_HI32:
+ case VK_VE_PC_LO32:
+ case VK_VE_GOT_HI32:
+ case VK_VE_GOT_LO32:
+ case VK_VE_GOTOFF_HI32:
+ case VK_VE_GOTOFF_LO32:
+ case VK_VE_PLT_HI32:
+ case VK_VE_PLT_LO32:
+ case VK_VE_TLS_GD_HI32:
+ case VK_VE_TLS_GD_LO32:
+ case VK_VE_TPOFF_HI32:
+ case VK_VE_TPOFF_LO32:
+ // Use suffix for these variant kinds
+ return false;
+ }
+ return true;
+}
+
+void VEMCExpr::printVariantKindSuffix(raw_ostream &OS, VariantKind Kind) {
+ switch (Kind) {
+ case VK_VE_None:
+ case VK_VE_REFLONG:
+ break;
+ case VK_VE_HI32:
+ OS << "@hi";
+ break;
+ case VK_VE_LO32:
+ OS << "@lo";
+ break;
+ case VK_VE_PC_HI32:
+ OS << "@pc_hi";
+ break;
+ case VK_VE_PC_LO32:
+ OS << "@pc_lo";
+ break;
+ case VK_VE_GOT_HI32:
+ OS << "@got_hi";
+ break;
+ case VK_VE_GOT_LO32:
+ OS << "@got_lo";
+ break;
+ case VK_VE_GOTOFF_HI32:
+ OS << "@gotoff_hi";
+ break;
+ case VK_VE_GOTOFF_LO32:
+ OS << "@gotoff_lo";
+ break;
+ case VK_VE_PLT_HI32:
+ OS << "@plt_hi";
+ break;
+ case VK_VE_PLT_LO32:
+ OS << "@plt_lo";
+ break;
+ case VK_VE_TLS_GD_HI32:
+ OS << "@tls_gd_hi";
+ break;
+ case VK_VE_TLS_GD_LO32:
+ OS << "@tls_gd_lo";
+ break;
+ case VK_VE_TPOFF_HI32:
+ OS << "@tpoff_hi";
+ break;
+ case VK_VE_TPOFF_LO32:
+ OS << "@tpoff_lo";
+ break;
+ }
+}
+
+VEMCExpr::VariantKind VEMCExpr::parseVariantKind(StringRef name) {
+ return StringSwitch<VEMCExpr::VariantKind>(name)
+ .Case("hi", VK_VE_HI32)
+ .Case("lo", VK_VE_LO32)
+ .Case("pc_hi", VK_VE_PC_HI32)
+ .Case("pc_lo", VK_VE_PC_LO32)
+ .Case("got_hi", VK_VE_GOT_HI32)
+ .Case("got_lo", VK_VE_GOT_LO32)
+ .Case("gotoff_hi", VK_VE_GOTOFF_HI32)
+ .Case("gotoff_lo", VK_VE_GOTOFF_LO32)
+ .Case("plt_hi", VK_VE_PLT_HI32)
+ .Case("plt_lo", VK_VE_PLT_LO32)
+ .Case("tls_gd_hi", VK_VE_TLS_GD_HI32)
+ .Case("tls_gd_lo", VK_VE_TLS_GD_LO32)
+ .Case("tpoff_hi", VK_VE_TPOFF_HI32)
+ .Case("tpoff_lo", VK_VE_TPOFF_LO32)
+ .Default(VK_VE_None);
+}
+
+VE::Fixups VEMCExpr::getFixupKind(VEMCExpr::VariantKind Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unhandled VEMCExpr::VariantKind");
+ case VK_VE_REFLONG:
+ return VE::fixup_ve_reflong;
+ case VK_VE_HI32:
+ return VE::fixup_ve_hi32;
+ case VK_VE_LO32:
+ return VE::fixup_ve_lo32;
+ case VK_VE_PC_HI32:
+ return VE::fixup_ve_pc_hi32;
+ case VK_VE_PC_LO32:
+ return VE::fixup_ve_pc_lo32;
+ case VK_VE_GOT_HI32:
+ return VE::fixup_ve_got_hi32;
+ case VK_VE_GOT_LO32:
+ return VE::fixup_ve_got_lo32;
+ case VK_VE_GOTOFF_HI32:
+ return VE::fixup_ve_gotoff_hi32;
+ case VK_VE_GOTOFF_LO32:
+ return VE::fixup_ve_gotoff_lo32;
+ case VK_VE_PLT_HI32:
+ return VE::fixup_ve_plt_hi32;
+ case VK_VE_PLT_LO32:
+ return VE::fixup_ve_plt_lo32;
+ case VK_VE_TLS_GD_HI32:
+ return VE::fixup_ve_tls_gd_hi32;
+ case VK_VE_TLS_GD_LO32:
+ return VE::fixup_ve_tls_gd_lo32;
+ case VK_VE_TPOFF_HI32:
+ return VE::fixup_ve_tpoff_hi32;
+ case VK_VE_TPOFF_LO32:
+ return VE::fixup_ve_tpoff_lo32;
+ }
+}
+
+bool VEMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
+}
+
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+ switch (Expr->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expr!");
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+ fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+ fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef: {
+ // We're known to be under a TLS fixup, so any symbol should be
+ // modified. There should be only one.
+ const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+ cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS);
+ break;
+ }
+
+ case MCExpr::Unary:
+ fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void VEMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
+}
+
+void VEMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+ switch (getKind()) {
+ default:
+ return;
+ case VK_VE_TLS_GD_HI32:
+ case VK_VE_TLS_GD_LO32:
+ case VK_VE_TPOFF_HI32:
+ case VK_VE_TPOFF_LO32:
+ break;
+ }
+ fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h
new file mode 100644
index 000000000000..2b0c44576099
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h
@@ -0,0 +1,95 @@
+//====- VEMCExpr.h - VE specific MC expression classes --------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes VE-specific MCExprs, used for modifiers like
+// "%hi" or "%lo" etc.,
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCEXPR_H
+#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCEXPR_H
+
+#include "VEFixupKinds.h"
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class StringRef;
+class VEMCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_VE_None,
+ VK_VE_REFLONG,
+ VK_VE_HI32,
+ VK_VE_LO32,
+ VK_VE_PC_HI32,
+ VK_VE_PC_LO32,
+ VK_VE_GOT_HI32,
+ VK_VE_GOT_LO32,
+ VK_VE_GOTOFF_HI32,
+ VK_VE_GOTOFF_LO32,
+ VK_VE_PLT_HI32,
+ VK_VE_PLT_LO32,
+ VK_VE_TLS_GD_HI32,
+ VK_VE_TLS_GD_LO32,
+ VK_VE_TPOFF_HI32,
+ VK_VE_TPOFF_LO32,
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+
+ explicit VEMCExpr(VariantKind Kind, const MCExpr *Expr)
+ : Kind(Kind), Expr(Expr) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const VEMCExpr *create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx);
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// getFixupKind - Get the fixup kind of this expression.
+ VE::Fixups getFixupKind() const { return getFixupKind(Kind); }
+
+ /// @}
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
+ }
+
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+
+ static bool classof(const VEMCExpr *) { return true; }
+
+ static VariantKind parseVariantKind(StringRef name);
+ static bool printVariantKind(raw_ostream &OS, VariantKind Kind);
+ static void printVariantKindSuffix(raw_ostream &OS, VariantKind Kind);
+ static VE::Fixups getFixupKind(VariantKind Kind);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
index b228617058a6..a39cffc8f4a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
@@ -11,7 +11,8 @@
//===----------------------------------------------------------------------===//
#include "VEMCTargetDesc.h"
-#include "InstPrinter/VEInstPrinter.h"
+#include "TargetInfo/VETargetInfo.h"
+#include "VEInstPrinter.h"
#include "VEMCAsmInfo.h"
#include "VETargetStreamer.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -35,7 +36,7 @@ static MCAsmInfo *createVEMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TT,
const MCTargetOptions &Options) {
MCAsmInfo *MAI = new VEELFMCAsmInfo(TT);
unsigned Reg = MRI.getDwarfRegNum(VE::SX11, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, Reg, 0);
MAI->addInitialFrameState(Inst);
return MAI;
}
@@ -93,6 +94,12 @@ extern "C" void LLVMInitializeVETargetMC() {
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(*T, createVEMCSubtargetInfo);
+ // Register the MC Code Emitter.
+ TargetRegistry::RegisterMCCodeEmitter(*T, createVEMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(*T, createVEAsmBackend);
+
// Register the object target streamer.
TargetRegistry::RegisterObjectTargetStreamer(*T,
createObjectTargetStreamer);
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
index 24a5c8209be2..7fb8a556aa74 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
@@ -22,7 +22,7 @@ class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
-class MCObjectWriter;
+class MCObjectTargetWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class MCTargetOptions;
@@ -32,8 +32,12 @@ class StringRef;
class raw_pwrite_stream;
class raw_ostream;
-Target &getTheVETarget();
-
+MCCodeEmitter *createVEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI, MCContext &Ctx);
+MCAsmBackend *createVEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options);
+std::unique_ptr<MCObjectTargetWriter> createVEELFObjectWriter(uint8_t OSABI);
} // namespace llvm
// Defines symbolic names for VE registers. This defines a mapping from
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp
index dfe94bbaaa4b..29f5afb67ac1 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "VETargetStreamer.h"
-#include "InstPrinter/VEInstPrinter.h"
+#include "VEInstPrinter.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
index be68fe7d2429..65bd142fe0db 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
@@ -6,8 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "VE.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/VETargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.h
new file mode 100644
index 000000000000..7879e6f069a1
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/TargetInfo/VETargetInfo.h
@@ -0,0 +1,20 @@
+//===-- VETargetInfo.h - VE Target Implementation ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_TARGETINFO_VETARGETINFO_H
+#define LLVM_LIB_TARGET_VE_TARGETINFO_VETARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheVETarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_VE_TARGETINFO_VETARGETINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.h b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
index 9b61f2b63f36..7ed7797cbb83 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VE.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_VE_VE_H
#include "MCTargetDesc/VEMCTargetDesc.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
@@ -37,36 +38,50 @@ namespace llvm {
// Enums corresponding to VE condition codes, both icc's and fcc's. These
// values must be kept in sync with the ones in the .td file.
namespace VECC {
-enum CondCodes {
+enum CondCode {
// Integer comparison
- CC_IG = 0, // Greater
- CC_IL = 1, // Less
+ CC_IG = 0, // Greater
+ CC_IL = 1, // Less
CC_INE = 2, // Not Equal
CC_IEQ = 3, // Equal
CC_IGE = 4, // Greater or Equal
CC_ILE = 5, // Less or Equal
// Floating point comparison
- CC_AF = 0 + 6, // Never
- CC_G = 1 + 6, // Greater
- CC_L = 2 + 6, // Less
- CC_NE = 3 + 6, // Not Equal
- CC_EQ = 4 + 6, // Equal
- CC_GE = 5 + 6, // Greater or Equal
- CC_LE = 6 + 6, // Less or Equal
- CC_NUM = 7 + 6, // Number
- CC_NAN = 8 + 6, // NaN
- CC_GNAN = 9 + 6, // Greater or NaN
- CC_LNAN = 10 + 6, // Less or NaN
+ CC_AF = 0 + 6, // Never
+ CC_G = 1 + 6, // Greater
+ CC_L = 2 + 6, // Less
+ CC_NE = 3 + 6, // Not Equal
+ CC_EQ = 4 + 6, // Equal
+ CC_GE = 5 + 6, // Greater or Equal
+ CC_LE = 6 + 6, // Less or Equal
+ CC_NUM = 7 + 6, // Number
+ CC_NAN = 8 + 6, // NaN
+ CC_GNAN = 9 + 6, // Greater or NaN
+ CC_LNAN = 10 + 6, // Less or NaN
CC_NENAN = 11 + 6, // Not Equal or NaN
CC_EQNAN = 12 + 6, // Equal or NaN
CC_GENAN = 13 + 6, // Greater or Equal or NaN
CC_LENAN = 14 + 6, // Less or Equal or NaN
- CC_AT = 15 + 6, // Always
+ CC_AT = 15 + 6, // Always
+ UNKNOWN
+};
+}
+// Enums corresponding to VE Rounding Mode. These values must be kept in
+// sync with the ones in the .td file.
+namespace VERD {
+enum RoundingMode {
+ RD_NONE = 0, // According to PSW
+ RD_RZ = 8, // Round toward Zero
+ RD_RP = 9, // Round toward Plus infinity
+ RD_RM = 10, // Round toward Minus infinity
+ RD_RN = 11, // Round to Nearest (ties to Even)
+ RD_RA = 12, // Round to Nearest (ties to Away)
+ UNKNOWN
};
}
-inline static const char *VECondCodeToString(VECC::CondCodes CC) {
+inline static const char *VECondCodeToString(VECC::CondCode CC) {
switch (CC) {
case VECC::CC_IG: return "gt";
case VECC::CC_IL: return "lt";
@@ -90,20 +105,252 @@ inline static const char *VECondCodeToString(VECC::CondCodes CC) {
case VECC::CC_GENAN: return "genan";
case VECC::CC_LENAN: return "lenan";
case VECC::CC_AT: return "at";
+ default:
+ llvm_unreachable("Invalid cond code");
+ }
+}
+
+inline static VECC::CondCode stringToVEICondCode(StringRef S) {
+ return StringSwitch<VECC::CondCode>(S)
+ .Case("gt", VECC::CC_IG)
+ .Case("lt", VECC::CC_IL)
+ .Case("ne", VECC::CC_INE)
+ .Case("eq", VECC::CC_IEQ)
+ .Case("ge", VECC::CC_IGE)
+ .Case("le", VECC::CC_ILE)
+ .Case("af", VECC::CC_AF)
+ .Case("at", VECC::CC_AT)
+ .Case("", VECC::CC_AT)
+ .Default(VECC::UNKNOWN);
+}
+
+inline static VECC::CondCode stringToVEFCondCode(StringRef S) {
+ return StringSwitch<VECC::CondCode>(S)
+ .Case("gt", VECC::CC_G)
+ .Case("lt", VECC::CC_L)
+ .Case("ne", VECC::CC_NE)
+ .Case("eq", VECC::CC_EQ)
+ .Case("ge", VECC::CC_GE)
+ .Case("le", VECC::CC_LE)
+ .Case("num", VECC::CC_NUM)
+ .Case("nan", VECC::CC_NAN)
+ .Case("gtnan", VECC::CC_GNAN)
+ .Case("ltnan", VECC::CC_LNAN)
+ .Case("nenan", VECC::CC_NENAN)
+ .Case("eqnan", VECC::CC_EQNAN)
+ .Case("genan", VECC::CC_GENAN)
+ .Case("lenan", VECC::CC_LENAN)
+ .Case("af", VECC::CC_AF)
+ .Case("at", VECC::CC_AT)
+ .Case("", VECC::CC_AT)
+ .Default(VECC::UNKNOWN);
+}
+
+inline static unsigned VECondCodeToVal(VECC::CondCode CC) {
+ switch (CC) {
+ case VECC::CC_IG:
+ return 1;
+ case VECC::CC_IL:
+ return 2;
+ case VECC::CC_INE:
+ return 3;
+ case VECC::CC_IEQ:
+ return 4;
+ case VECC::CC_IGE:
+ return 5;
+ case VECC::CC_ILE:
+ return 6;
+ case VECC::CC_AF:
+ return 0;
+ case VECC::CC_G:
+ return 1;
+ case VECC::CC_L:
+ return 2;
+ case VECC::CC_NE:
+ return 3;
+ case VECC::CC_EQ:
+ return 4;
+ case VECC::CC_GE:
+ return 5;
+ case VECC::CC_LE:
+ return 6;
+ case VECC::CC_NUM:
+ return 7;
+ case VECC::CC_NAN:
+ return 8;
+ case VECC::CC_GNAN:
+ return 9;
+ case VECC::CC_LNAN:
+ return 10;
+ case VECC::CC_NENAN:
+ return 11;
+ case VECC::CC_EQNAN:
+ return 12;
+ case VECC::CC_GENAN:
+ return 13;
+ case VECC::CC_LENAN:
+ return 14;
+ case VECC::CC_AT:
+ return 15;
+ default:
+ llvm_unreachable("Invalid cond code");
+ }
+}
+
+inline static VECC::CondCode VEValToCondCode(unsigned Val, bool IsInteger) {
+ if (IsInteger) {
+ switch (Val) {
+ case 0:
+ return VECC::CC_AF;
+ case 1:
+ return VECC::CC_IG;
+ case 2:
+ return VECC::CC_IL;
+ case 3:
+ return VECC::CC_INE;
+ case 4:
+ return VECC::CC_IEQ;
+ case 5:
+ return VECC::CC_IGE;
+ case 6:
+ return VECC::CC_ILE;
+ case 15:
+ return VECC::CC_AT;
+ }
+ } else {
+ switch (Val) {
+ case 0:
+ return VECC::CC_AF;
+ case 1:
+ return VECC::CC_G;
+ case 2:
+ return VECC::CC_L;
+ case 3:
+ return VECC::CC_NE;
+ case 4:
+ return VECC::CC_EQ;
+ case 5:
+ return VECC::CC_GE;
+ case 6:
+ return VECC::CC_LE;
+ case 7:
+ return VECC::CC_NUM;
+ case 8:
+ return VECC::CC_NAN;
+ case 9:
+ return VECC::CC_GNAN;
+ case 10:
+ return VECC::CC_LNAN;
+ case 11:
+ return VECC::CC_NENAN;
+ case 12:
+ return VECC::CC_EQNAN;
+ case 13:
+ return VECC::CC_GENAN;
+ case 14:
+ return VECC::CC_LENAN;
+ case 15:
+ return VECC::CC_AT;
+ }
}
llvm_unreachable("Invalid cond code");
}
-// Different to Hi_32/Lo_32 the HI32 and LO32 functions
-// preserve the correct numerical value
-// on the LLVM data type for MC immediates (int64_t).
-inline static int64_t HI32(int64_t imm) {
- return (int32_t)(imm >> 32);
+inline static const char *VERDToString(VERD::RoundingMode R) {
+ switch (R) {
+ case VERD::RD_NONE:
+ return "";
+ case VERD::RD_RZ:
+ return ".rz";
+ case VERD::RD_RP:
+ return ".rp";
+ case VERD::RD_RM:
+ return ".rm";
+ case VERD::RD_RN:
+ return ".rn";
+ case VERD::RD_RA:
+ return ".ra";
+ default:
+ llvm_unreachable("Invalid branch predicate");
+ }
}
-inline static int64_t LO32(int64_t imm) {
- return (int32_t)(imm);
+inline static VERD::RoundingMode stringToVERD(StringRef S) {
+ return StringSwitch<VERD::RoundingMode>(S)
+ .Case("", VERD::RD_NONE)
+ .Case(".rz", VERD::RD_RZ)
+ .Case(".rp", VERD::RD_RP)
+ .Case(".rm", VERD::RD_RM)
+ .Case(".rn", VERD::RD_RN)
+ .Case(".ra", VERD::RD_RA)
+ .Default(VERD::UNKNOWN);
}
+inline static unsigned VERDToVal(VERD::RoundingMode R) {
+ switch (R) {
+ case VERD::RD_NONE:
+ case VERD::RD_RZ:
+ case VERD::RD_RP:
+ case VERD::RD_RM:
+ case VERD::RD_RN:
+ case VERD::RD_RA:
+ return static_cast<unsigned>(R);
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid branch predicates");
+}
+
+inline static VERD::RoundingMode VEValToRD(unsigned Val) {
+ switch (Val) {
+ case static_cast<unsigned>(VERD::RD_NONE):
+ return VERD::RD_NONE;
+ case static_cast<unsigned>(VERD::RD_RZ):
+ return VERD::RD_RZ;
+ case static_cast<unsigned>(VERD::RD_RP):
+ return VERD::RD_RP;
+ case static_cast<unsigned>(VERD::RD_RM):
+ return VERD::RD_RM;
+ case static_cast<unsigned>(VERD::RD_RN):
+ return VERD::RD_RN;
+ case static_cast<unsigned>(VERD::RD_RA):
+ return VERD::RD_RA;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid branch predicates");
+}
+
+// MImm - Special immediate value of sequential bit stream of 0 or 1.
+// See VEInstrInfo.td for details.
+inline static bool isMImmVal(uint64_t Val) {
+ if (Val == 0) {
+ // (0)1 is 0
+ return true;
+ }
+ if (isMask_64(Val)) {
+ // (m)0 patterns
+ return true;
+ }
+ // (m)1 patterns
+ return (Val & (1UL << 63)) && isShiftedMask_64(Val);
+}
+
+inline static bool isMImm32Val(uint32_t Val) {
+ if (Val == 0) {
+ // (0)1 is 0
+ return true;
+ }
+ if (isMask_32(Val)) {
+ // (m)0 patterns
+ return true;
+ }
+ // (m)1 patterns
+ return (Val & (1 << 31)) && isShiftedMask_32(Val);
+}
+
+inline unsigned M0(unsigned Val) { return Val + 64; }
+inline unsigned M1(unsigned Val) { return Val; }
+
} // namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VE.td b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
index 7404321b1a06..617a6ea458b6 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VE.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VE.td
@@ -29,6 +29,13 @@ include "VEInstrInfo.td"
def VEInstrInfo : InstrInfo;
+def VEAsmParser : AsmParser {
+ // Use both VE register name matcher to accept "S0~S63" register names
+ // and default register matcher to accept other registeres.
+ let AllowDuplicateRegisterNames = 1;
+ let ShouldEmitMatchRegisterAltName = 1;
+}
+
//===----------------------------------------------------------------------===//
// VE processors supported.
//===----------------------------------------------------------------------===//
@@ -51,6 +58,7 @@ def VEAsmWriter : AsmWriter {
def VE : Target {
// Pull in Instruction Info:
let InstructionSet = VEInstrInfo;
+ let AssemblyParsers = [VEAsmParser];
let AssemblyWriters = [VEAsmWriter];
let AllowRegisterRenaming = 1;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
index 918f2a1acdaf..86e3aa3d3fa1 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEAsmPrinter.cpp
@@ -11,8 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/VEInstPrinter.h"
+#include "MCTargetDesc/VEInstPrinter.h"
+#include "MCTargetDesc/VEMCExpr.h"
#include "MCTargetDesc/VETargetStreamer.h"
+#include "TargetInfo/VETargetInfo.h"
#include "VE.h"
#include "VEInstrInfo.h"
#include "VETargetMachine.h"
@@ -46,7 +48,14 @@ public:
StringRef getPassName() const override { return "VE Assembly Printer"; }
- void EmitInstruction(const MachineInstr *MI) override;
+ void lowerGETGOTAndEmitMCInsts(const MachineInstr *MI,
+ const MCSubtargetInfo &STI);
+ void lowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI,
+ const MCSubtargetInfo &STI);
+ void lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI,
+ const MCSubtargetInfo &STI);
+
+ void emitInstruction(const MachineInstr *MI) override;
static const char *getRegisterName(unsigned RegNo) {
return VEInstPrinter::getRegisterName(RegNo);
@@ -54,7 +63,265 @@ public:
};
} // end of anonymous namespace
-void VEAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+static MCOperand createVEMCOperand(VEMCExpr::VariantKind Kind, MCSymbol *Sym,
+ MCContext &OutContext) {
+ const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Sym, OutContext);
+ const VEMCExpr *expr = VEMCExpr::create(Kind, MCSym, OutContext);
+ return MCOperand::createExpr(expr);
+}
+
+static MCOperand createGOTRelExprOp(VEMCExpr::VariantKind Kind,
+ MCSymbol *GOTLabel, MCContext &OutContext) {
+ const MCSymbolRefExpr *GOT = MCSymbolRefExpr::create(GOTLabel, OutContext);
+ const VEMCExpr *expr = VEMCExpr::create(Kind, GOT, OutContext);
+ return MCOperand::createExpr(expr);
+}
+
+static void emitSIC(MCStreamer &OutStreamer, MCOperand &RD,
+ const MCSubtargetInfo &STI) {
+ MCInst SICInst;
+ SICInst.setOpcode(VE::SIC);
+ SICInst.addOperand(RD);
+ OutStreamer.emitInstruction(SICInst, STI);
+}
+
+static void emitBSIC(MCStreamer &OutStreamer, MCOperand &R1, MCOperand &R2,
+ const MCSubtargetInfo &STI) {
+ MCInst BSICInst;
+ BSICInst.setOpcode(VE::BSICrii);
+ BSICInst.addOperand(R1);
+ BSICInst.addOperand(R2);
+ MCOperand czero = MCOperand::createImm(0);
+ BSICInst.addOperand(czero);
+ BSICInst.addOperand(czero);
+ OutStreamer.emitInstruction(BSICInst, STI);
+}
+
+static void emitLEAzzi(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD,
+ const MCSubtargetInfo &STI) {
+ MCInst LEAInst;
+ LEAInst.setOpcode(VE::LEAzii);
+ LEAInst.addOperand(RD);
+ MCOperand CZero = MCOperand::createImm(0);
+ LEAInst.addOperand(CZero);
+ LEAInst.addOperand(CZero);
+ LEAInst.addOperand(Imm);
+ OutStreamer.emitInstruction(LEAInst, STI);
+}
+
+static void emitLEASLzzi(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD,
+ const MCSubtargetInfo &STI) {
+ MCInst LEASLInst;
+ LEASLInst.setOpcode(VE::LEASLzii);
+ LEASLInst.addOperand(RD);
+ MCOperand CZero = MCOperand::createImm(0);
+ LEASLInst.addOperand(CZero);
+ LEASLInst.addOperand(CZero);
+ LEASLInst.addOperand(Imm);
+ OutStreamer.emitInstruction(LEASLInst, STI);
+}
+
+static void emitLEAzii(MCStreamer &OutStreamer, MCOperand &RS1, MCOperand &Imm,
+ MCOperand &RD, const MCSubtargetInfo &STI) {
+ MCInst LEAInst;
+ LEAInst.setOpcode(VE::LEAzii);
+ LEAInst.addOperand(RD);
+ MCOperand CZero = MCOperand::createImm(0);
+ LEAInst.addOperand(CZero);
+ LEAInst.addOperand(RS1);
+ LEAInst.addOperand(Imm);
+ OutStreamer.emitInstruction(LEAInst, STI);
+}
+
+static void emitLEASLrri(MCStreamer &OutStreamer, MCOperand &RS1,
+ MCOperand &RS2, MCOperand &Imm, MCOperand &RD,
+ const MCSubtargetInfo &STI) {
+ MCInst LEASLInst;
+ LEASLInst.setOpcode(VE::LEASLrri);
+ LEASLInst.addOperand(RD);
+ LEASLInst.addOperand(RS1);
+ LEASLInst.addOperand(RS2);
+ LEASLInst.addOperand(Imm);
+ OutStreamer.emitInstruction(LEASLInst, STI);
+}
+
+static void emitBinary(MCStreamer &OutStreamer, unsigned Opcode, MCOperand &RS1,
+ MCOperand &Src2, MCOperand &RD,
+ const MCSubtargetInfo &STI) {
+ MCInst Inst;
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(RD);
+ Inst.addOperand(RS1);
+ Inst.addOperand(Src2);
+ OutStreamer.emitInstruction(Inst, STI);
+}
+
+static void emitANDrm(MCStreamer &OutStreamer, MCOperand &RS1, MCOperand &Imm,
+ MCOperand &RD, const MCSubtargetInfo &STI) {
+ emitBinary(OutStreamer, VE::ANDrm, RS1, Imm, RD, STI);
+}
+
+static void emitHiLo(MCStreamer &OutStreamer, MCSymbol *GOTSym,
+ VEMCExpr::VariantKind HiKind, VEMCExpr::VariantKind LoKind,
+ MCOperand &RD, MCContext &OutContext,
+ const MCSubtargetInfo &STI) {
+
+ MCOperand hi = createVEMCOperand(HiKind, GOTSym, OutContext);
+ MCOperand lo = createVEMCOperand(LoKind, GOTSym, OutContext);
+ emitLEAzzi(OutStreamer, lo, RD, STI);
+ MCOperand M032 = MCOperand::createImm(M0(32));
+ emitANDrm(OutStreamer, RD, M032, RD, STI);
+ emitLEASLzzi(OutStreamer, hi, RD, STI);
+}
+
+void VEAsmPrinter::lowerGETGOTAndEmitMCInsts(const MachineInstr *MI,
+ const MCSubtargetInfo &STI) {
+ MCSymbol *GOTLabel =
+ OutContext.getOrCreateSymbol(Twine("_GLOBAL_OFFSET_TABLE_"));
+
+ const MachineOperand &MO = MI->getOperand(0);
+ MCOperand MCRegOP = MCOperand::createReg(MO.getReg());
+
+ if (!isPositionIndependent()) {
+ // Just load the address of GOT to MCRegOP.
+ switch (TM.getCodeModel()) {
+ default:
+ llvm_unreachable("Unsupported absolute code model");
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Large:
+ emitHiLo(*OutStreamer, GOTLabel, VEMCExpr::VK_VE_HI32,
+ VEMCExpr::VK_VE_LO32, MCRegOP, OutContext, STI);
+ break;
+ }
+ return;
+ }
+
+ MCOperand RegGOT = MCOperand::createReg(VE::SX15); // GOT
+ MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT
+
+ // lea %got, _GLOBAL_OFFSET_TABLE_@PC_LO(-24)
+ // and %got, %got, (32)0
+ // sic %plt
+ // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%got, %plt)
+ MCOperand cim24 = MCOperand::createImm(-24);
+ MCOperand loImm =
+ createGOTRelExprOp(VEMCExpr::VK_VE_PC_LO32, GOTLabel, OutContext);
+ emitLEAzii(*OutStreamer, cim24, loImm, MCRegOP, STI);
+ MCOperand M032 = MCOperand::createImm(M0(32));
+ emitANDrm(*OutStreamer, MCRegOP, M032, MCRegOP, STI);
+ emitSIC(*OutStreamer, RegPLT, STI);
+ MCOperand hiImm =
+ createGOTRelExprOp(VEMCExpr::VK_VE_PC_HI32, GOTLabel, OutContext);
+ emitLEASLrri(*OutStreamer, RegGOT, RegPLT, hiImm, MCRegOP, STI);
+}
+
+void VEAsmPrinter::lowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI,
+ const MCSubtargetInfo &STI) {
+ const MachineOperand &MO = MI->getOperand(0);
+ MCOperand MCRegOP = MCOperand::createReg(MO.getReg());
+ const MachineOperand &Addr = MI->getOperand(1);
+ MCSymbol *AddrSym = nullptr;
+
+ switch (Addr.getType()) {
+ default:
+ llvm_unreachable("<unknown operand type>");
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ report_fatal_error("MBB is not supported yet");
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ report_fatal_error("ConstantPool is not supported yet");
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ AddrSym = GetExternalSymbolSymbol(Addr.getSymbolName());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ AddrSym = getSymbol(Addr.getGlobal());
+ break;
+ }
+
+ if (!isPositionIndependent()) {
+ llvm_unreachable("Unsupported uses of %plt in not PIC code");
+ return;
+ }
+
+ MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT
+
+ // lea %dst, %plt_lo(func)(-24)
+ // and %dst, %dst, (32)0
+ // sic %plt ; FIXME: is it safe to use %plt here?
+ // lea.sl %dst, %plt_hi(func)(%dst, %plt)
+ MCOperand cim24 = MCOperand::createImm(-24);
+ MCOperand loImm =
+ createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, AddrSym, OutContext);
+ emitLEAzii(*OutStreamer, cim24, loImm, MCRegOP, STI);
+ MCOperand M032 = MCOperand::createImm(M0(32));
+ emitANDrm(*OutStreamer, MCRegOP, M032, MCRegOP, STI);
+ emitSIC(*OutStreamer, RegPLT, STI);
+ MCOperand hiImm =
+ createGOTRelExprOp(VEMCExpr::VK_VE_PLT_HI32, AddrSym, OutContext);
+ emitLEASLrri(*OutStreamer, MCRegOP, RegPLT, hiImm, MCRegOP, STI);
+}
+
+void VEAsmPrinter::lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI,
+ const MCSubtargetInfo &STI) {
+ const MachineOperand &Addr = MI->getOperand(0);
+ MCSymbol *AddrSym = nullptr;
+
+ switch (Addr.getType()) {
+ default:
+ llvm_unreachable("<unknown operand type>");
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ report_fatal_error("MBB is not supported yet");
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ report_fatal_error("ConstantPool is not supported yet");
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ AddrSym = GetExternalSymbolSymbol(Addr.getSymbolName());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ AddrSym = getSymbol(Addr.getGlobal());
+ break;
+ }
+
+ MCOperand RegLR = MCOperand::createReg(VE::SX10); // LR
+ MCOperand RegS0 = MCOperand::createReg(VE::SX0); // S0
+ MCOperand RegS12 = MCOperand::createReg(VE::SX12); // S12
+ MCSymbol *GetTLSLabel = OutContext.getOrCreateSymbol(Twine("__tls_get_addr"));
+
+ // lea %s0, sym@tls_gd_lo(-24)
+ // and %s0, %s0, (32)0
+ // sic %lr
+ // lea.sl %s0, sym@tls_gd_hi(%s0, %lr)
+ // lea %s12, __tls_get_addr@plt_lo(8)
+ // and %s12, %s12, (32)0
+ // lea.sl %s12, __tls_get_addr@plt_hi(%s12, %lr)
+ // bsic %lr, (, %s12)
+ MCOperand cim24 = MCOperand::createImm(-24);
+ MCOperand loImm =
+ createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_LO32, AddrSym, OutContext);
+ emitLEAzii(*OutStreamer, cim24, loImm, RegS0, STI);
+ MCOperand M032 = MCOperand::createImm(M0(32));
+ emitANDrm(*OutStreamer, RegS0, M032, RegS0, STI);
+ emitSIC(*OutStreamer, RegLR, STI);
+ MCOperand hiImm =
+ createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_HI32, AddrSym, OutContext);
+ emitLEASLrri(*OutStreamer, RegS0, RegLR, hiImm, RegS0, STI);
+ MCOperand ci8 = MCOperand::createImm(8);
+ MCOperand loImm2 =
+ createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, GetTLSLabel, OutContext);
+ emitLEAzii(*OutStreamer, ci8, loImm2, RegS12, STI);
+ emitANDrm(*OutStreamer, RegS12, M032, RegS12, STI);
+ MCOperand hiImm2 =
+ createGOTRelExprOp(VEMCExpr::VK_VE_PLT_HI32, GetTLSLabel, OutContext);
+ emitLEASLrri(*OutStreamer, RegS12, RegLR, hiImm2, RegS12, STI);
+ emitBSIC(*OutStreamer, RegLR, RegS12, STI);
+}
+
+void VEAsmPrinter::emitInstruction(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
@@ -62,7 +329,17 @@ void VEAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case TargetOpcode::DBG_VALUE:
// FIXME: Debug Value.
return;
+ case VE::GETGOT:
+ lowerGETGOTAndEmitMCInsts(MI, getSubtargetInfo());
+ return;
+ case VE::GETFUNPLT:
+ lowerGETFunPLTAndEmitMCInsts(MI, getSubtargetInfo());
+ return;
+ case VE::GETTLSADDR:
+ lowerGETTLSAddrAndEmitMCInsts(MI, getSubtargetInfo());
+ return;
}
+
MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
do {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
index 1a9097c79dd4..4f04dae884ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VECallingConv.td
@@ -13,7 +13,77 @@
//===----------------------------------------------------------------------===//
// Aurora VE
//===----------------------------------------------------------------------===//
+def CC_VE_C_Stack: CallingConv<[
+ // float --> need special handling like below.
+ // 0 4
+ // +------+------+
+ // | empty| float|
+ // +------+------+
+ CCIfType<[f32], CCCustom<"allocateFloat">>,
+
+ // All of the rest are assigned to the stack in 8-byte aligned units.
+ CCAssignToStack<0, 8>
+]>;
+
+def CC_VE : CallingConv<[
+ // All arguments get passed in generic registers if there is space.
+
+ // Promote i1/i8/i16 arguments to i32.
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // bool, char, int, enum, long --> generic integer 32 bit registers
+ CCIfType<[i32], CCAssignToRegWithShadow<
+ [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7],
+ [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+ // float --> generic floating point 32 bit registers
+ CCIfType<[f32], CCAssignToRegWithShadow<
+ [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7],
+ [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+ // long long/double --> generic 64 bit registers
+ CCIfType<[i64, f64],
+ CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+ // Alternatively, they are assigned to the stack in 8-byte aligned units.
+ CCDelegateTo<CC_VE_C_Stack>
+]>;
+
+// All arguments get passed in stack for varargs function or non-prototyped
+// function.
+def CC_VE2 : CallingConv<[
+ // float --> need special handling like below.
+ // 0 4
+ // +------+------+
+ // | empty| float|
+ // +------+------+
+ CCIfType<[f32], CCCustom<"allocateFloat">>,
+
+ CCAssignToStack<0, 8>
+]>;
+
+def RetCC_VE : CallingConv<[
+ // Promote i1/i8/i16 arguments to i32.
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // bool, char, int, enum, long --> generic integer 32 bit registers
+ CCIfType<[i32], CCAssignToRegWithShadow<
+ [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7],
+ [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+ // float --> generic floating point 32 bit registers
+ CCIfType<[f32], CCAssignToRegWithShadow<
+ [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7],
+ [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+ // long long/double --> generic 64 bit registers
+ CCIfType<[i64, f64],
+ CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+]>;
// Callee-saved registers
def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>;
def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+// PreserveAll (clobbers s62,s63) - used for ve_grow_stack
+def CSR_preserve_all : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
index ef5b5f055911..8b10e6466123 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.cpp
@@ -12,6 +12,7 @@
#include "VEFrameLowering.h"
#include "VEInstrInfo.h"
+#include "VEMachineFunctionInfo.h"
#include "VESubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -29,12 +30,13 @@ using namespace llvm;
VEFrameLowering::VEFrameLowering(const VESubtarget &ST)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(16), 0,
- Align(16)) {}
+ Align(16)),
+ STI(ST) {}
void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- int NumBytes,
+ uint64_t NumBytes,
bool RequireFPUpdate) const {
DebugLoc dl;
@@ -46,24 +48,35 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
// st %lr, 8(,%sp)
// st %got, 24(,%sp)
// st %plt, 32(,%sp)
+ // st %s17, 40(,%sp) iff this function is using s17 as BP
// or %fp, 0, %sp
- BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+ BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
.addReg(VE::SX11)
.addImm(0)
+ .addImm(0)
.addReg(VE::SX9);
- BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+ BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
.addReg(VE::SX11)
+ .addImm(0)
.addImm(8)
.addReg(VE::SX10);
- BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+ BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
.addReg(VE::SX11)
+ .addImm(0)
.addImm(24)
.addReg(VE::SX15);
- BuildMI(MBB, MBBI, dl, TII.get(VE::STSri))
+ BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
.addReg(VE::SX11)
+ .addImm(0)
.addImm(32)
.addReg(VE::SX16);
+ if (hasBP(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(VE::STrii))
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(40)
+ .addReg(VE::SX17);
BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9)
.addReg(VE::SX11)
.addImm(0);
@@ -72,7 +85,7 @@ void VEFrameLowering::emitPrologueInsns(MachineFunction &MF,
void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- int NumBytes,
+ uint64_t NumBytes,
bool RequireFPUpdate) const {
DebugLoc dl;
@@ -81,6 +94,7 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
// Insert following codes here as epilogue
//
// or %sp, 0, %fp
+ // ld %s17, 40(,%sp) iff this function is using s17 as BP
// ld %got, 32(,%sp)
// ld %plt, 24(,%sp)
// ld %lr, 8(,%sp)
@@ -89,30 +103,40 @@ void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11)
.addReg(VE::SX9)
.addImm(0);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX16)
+ if (hasBP(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX17)
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(40);
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX16)
.addReg(VE::SX11)
+ .addImm(0)
.addImm(32);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX15)
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX15)
.addReg(VE::SX11)
+ .addImm(0)
.addImm(24);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX10)
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX10)
.addReg(VE::SX11)
+ .addImm(0)
.addImm(8);
- BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX9)
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LDrii), VE::SX9)
.addReg(VE::SX11)
+ .addImm(0)
.addImm(0);
}
void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- int NumBytes) const {
+ int64_t NumBytes,
+ MaybeAlign MaybeAlign) const {
DebugLoc dl;
const VEInstrInfo &TII =
*static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (NumBytes >= -64 && NumBytes < 63) {
- BuildMI(MBB, MBBI, dl, TII.get(VE::ADXri), VE::SX11)
+ BuildMI(MBB, MBBI, dl, TII.get(VE::ADDSLri), VE::SX11)
.addReg(VE::SX11)
.addImm(NumBytes);
return;
@@ -123,20 +147,28 @@ void VEFrameLowering::emitSPAdjustment(MachineFunction &MF,
// lea %s13,%lo(NumBytes)
// and %s13,%s13,(32)0
// lea.sl %sp,%hi(NumBytes)(%sp, %s13)
- BuildMI(MBB, MBBI, dl, TII.get(VE::LEAzzi), VE::SX13)
- .addImm(LO32(NumBytes));
- BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm0), VE::SX13)
+ BuildMI(MBB, MBBI, dl, TII.get(VE::LEAzii), VE::SX13)
+ .addImm(0)
+ .addImm(0)
+ .addImm(Lo_32(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX13)
.addReg(VE::SX13)
- .addImm(32);
+ .addImm(M0(32));
BuildMI(MBB, MBBI, dl, TII.get(VE::LEASLrri), VE::SX11)
.addReg(VE::SX11)
.addReg(VE::SX13)
- .addImm(HI32(NumBytes));
+ .addImm(Hi_32(NumBytes));
+
+ if (MaybeAlign) {
+ // and %sp, %sp, Align-1
+ BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm), VE::SX11)
+ .addReg(VE::SX11)
+ .addImm(M1(64 - Log2_64(MaybeAlign.valueOrOne().value())));
+ }
}
void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- int NumBytes) const {
+ MachineBasicBlock::iterator MBBI) const {
DebugLoc dl;
const VEInstrInfo &TII =
*static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -175,11 +207,8 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
MachineFrameInfo &MFI = MF.getFrameInfo();
- const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(Subtarget.getInstrInfo());
- const VERegisterInfo &RegInfo =
- *static_cast<const VERegisterInfo *>(Subtarget.getRegisterInfo());
+ const VEInstrInfo &TII = *STI.getInstrInfo();
+ const VERegisterInfo &RegInfo = *STI.getRegisterInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -191,39 +220,22 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
// rather than reporting an error, as would be sensible. This is
// poor, but fixing that bogosity is going to be a large project.
// For now, just see if it's lied, and report an error here.
- if (!NeedsStackRealignment && MFI.getMaxAlignment() > getStackAlignment())
+ if (!NeedsStackRealignment && MFI.getMaxAlign() > getStackAlign())
report_fatal_error("Function \"" + Twine(MF.getName()) +
"\" required "
"stack re-alignment, but LLVM couldn't handle it "
"(probably because it has a dynamic alloca).");
// Get the number of bytes to allocate from the FrameInfo
- int NumBytes = (int)MFI.getStackSize();
- // The VE ABI requires a reserved 176-byte area in the user's stack, starting
- // at %sp + 16. This is for the callee Register Save Area (RSA).
- //
- // We therefore need to add that offset to the total stack size
- // after all the stack objects are placed by
- // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack
- // needs to be aligned *after* the extra size is added, we need to disable
- // calculateFrameObjectOffsets's built-in stack alignment, by having
- // targetHandlesStackFrameRounding return true.
-
- // Add the extra call frame stack size, if needed. (This is the same
- // code as in PrologEpilogInserter, but also gets disabled by
- // targetHandlesStackFrameRounding)
- if (MFI.adjustsStack() && hasReservedCallFrame(MF))
- NumBytes += MFI.getMaxCallFrameSize();
-
- // Adds the VE subtarget-specific spill area to the stack
- // size. Also ensures target-required alignment.
- NumBytes = Subtarget.getAdjustedFrameSize(NumBytes);
+ uint64_t NumBytes = MFI.getStackSize();
+
+ // The VE ABI requires a reserved 176 bytes area at the top
+ // of stack as described in VESubtarget.cpp. So, we adjust it here.
+ NumBytes = STI.getAdjustedFrameSize(NumBytes);
// Finally, ensure that the size is sufficiently aligned for the
// data on the stack.
- if (MFI.getMaxAlignment() > 0) {
- NumBytes = alignTo(NumBytes, MFI.getMaxAlignment());
- }
+ NumBytes = alignTo(NumBytes, MFI.getMaxAlign());
// Update stack size with corrected value.
MFI.setStackSize(NumBytes);
@@ -232,16 +244,25 @@ void VEFrameLowering::emitPrologue(MachineFunction &MF,
emitPrologueInsns(MF, MBB, MBBI, NumBytes, true);
// Emit stack adjust instructions
- emitSPAdjustment(MF, MBB, MBBI, -NumBytes);
+ MaybeAlign RuntimeAlign =
+ NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None;
+ emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign);
+
+ if (hasBP(MF)) {
+ // Copy SP to BP.
+ BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX17)
+ .addReg(VE::SX11)
+ .addImm(0);
+ }
// Emit stack extend instructions
- emitSPExtend(MF, MBB, MBBI, -NumBytes);
+ emitSPExtend(MF, MBB, MBBI);
- unsigned regFP = RegInfo.getDwarfRegNum(VE::SX9, true);
+ Register RegFP = RegInfo.getDwarfRegNum(VE::SX9, true);
// Emit ".cfi_def_cfa_register 30".
unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP));
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, RegFP));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@@ -256,7 +277,7 @@ MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr(
MachineBasicBlock::iterator I) const {
if (!hasReservedCallFrame(MF)) {
MachineInstr &MI = *I;
- int Size = MI.getOperand(0).getImm();
+ int64_t Size = MI.getOperand(0).getImm();
if (MI.getOpcode() == VE::ADJCALLSTACKDOWN)
Size = -Size;
@@ -272,20 +293,17 @@ void VEFrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo &MFI = MF.getFrameInfo();
- int NumBytes = (int)MFI.getStackSize();
+ uint64_t NumBytes = MFI.getStackSize();
// Emit Epilogue instructions to restore %lr
emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true);
}
-bool VEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
- // Reserve call frame if there are no variable sized objects on the stack.
- return !MF.getFrameInfo().hasVarSizedObjects();
-}
-
// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
+// pointer register. This is true if the function has variable sized allocas
+// or if frame pointer elimination is disabled. For the case of VE, we don't
+// implement FP eliminator yet, but we returns false from this function to
+// not refer fp from generated code.
bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -295,13 +313,41 @@ bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
MFI.isFrameAddressTaken();
}
+bool VEFrameLowering::hasBP(const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
+}
+
int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const {
- // Addressable stack objects are accessed using neg. offsets from
- // %fp, or positive offsets from %sp.
+ Register &FrameReg) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const VERegisterInfo *RegInfo = STI.getRegisterInfo();
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ bool isFixed = MFI.isFixedObjectIndex(FI);
+
int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
- FrameReg = VE::SX11; // %sp
- return FrameOffset + MF.getFrameInfo().getStackSize();
+
+ if (FuncInfo->isLeafProc()) {
+ // If there's a leaf proc, all offsets need to be %sp-based,
+ // because we haven't caused %fp to actually point to our frame.
+ FrameReg = VE::SX11; // %sp
+ return FrameOffset + MF.getFrameInfo().getStackSize();
+ }
+ if (RegInfo->needsStackRealignment(MF) && !isFixed) {
+ // If there is dynamic stack realignment, all local object
+ // references need to be via %sp or %s17 (bp), to take account
+ // of the re-alignment.
+ if (hasBP(MF))
+ FrameReg = VE::SX17; // %bp
+ else
+ FrameReg = VE::SX11; // %sp
+ return FrameOffset + MF.getFrameInfo().getStackSize();
+ }
+ // Finally, default to using %fp.
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FrameOffset;
}
bool VEFrameLowering::isLeafProc(MachineFunction &MF) const {
@@ -321,5 +367,8 @@ void VEFrameLowering::determineCalleeSaves(MachineFunction &MF,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
- assert(isLeafProc(MF) && "TODO implement for non-leaf procs");
+ if (isLeafProc(MF)) {
+ VEMachineFunctionInfo *MFI = MF.getInfo<VEMachineFunctionInfo>();
+ MFI->setLeafProc(true);
+ }
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
index 97e31d21aa43..b548d663c504 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEFrameLowering.h
@@ -28,23 +28,28 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, int NumBytes,
+ MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
bool RequireFPUpdate) const;
void emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, int NumBytes,
+ MachineBasicBlock::iterator MBBI, uint64_t NumBytes,
bool RequireFPUpdate) const;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
- bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ bool hasBP(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const override;
+ // VE reserves argument space always for call sites in the function
+ // immediately on entry of the current function.
+ bool hasReservedCallFrame(const MachineFunction &MF) const override {
+ return true;
+ }
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
@@ -58,10 +63,8 @@ public:
return Offsets;
}
- /// targetHandlesStackFrameRounding - Returns true if the target is
- /// responsible for rounding up the stack frame (probably at emitPrologue
- /// time).
- bool targetHandlesStackFrameRounding() const override { return true; }
+protected:
+ const VESubtarget &STI;
private:
// Returns true if MF is a leaf procedure.
@@ -69,11 +72,12 @@ private:
// Emits code for adjusting SP in function prologue/epilogue.
void emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, int NumBytes) const;
+ MachineBasicBlock::iterator MBBI, int64_t NumBytes,
+ MaybeAlign MayAlign = MaybeAlign()) const;
// Emits code for extending SP in function prologue/epilogue.
void emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, int NumBytes) const;
+ MachineBasicBlock::iterator MBBI) const;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
index 43030993efb9..f3d067d55fdb 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
@@ -23,6 +23,105 @@ using namespace llvm;
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
+/// Convert a DAG integer condition code to a VE ICC condition.
+inline static VECC::CondCode intCondCode2Icc(ISD::CondCode CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ISD::SETEQ:
+ return VECC::CC_IEQ;
+ case ISD::SETNE:
+ return VECC::CC_INE;
+ case ISD::SETLT:
+ return VECC::CC_IL;
+ case ISD::SETGT:
+ return VECC::CC_IG;
+ case ISD::SETLE:
+ return VECC::CC_ILE;
+ case ISD::SETGE:
+ return VECC::CC_IGE;
+ case ISD::SETULT:
+ return VECC::CC_IL;
+ case ISD::SETULE:
+ return VECC::CC_ILE;
+ case ISD::SETUGT:
+ return VECC::CC_IG;
+ case ISD::SETUGE:
+ return VECC::CC_IGE;
+ }
+}
+
+/// Convert a DAG floating point condition code to a VE FCC condition.
+inline static VECC::CondCode fpCondCode2Fcc(ISD::CondCode CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unknown fp condition code!");
+ case ISD::SETFALSE:
+ return VECC::CC_AF;
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ return VECC::CC_EQ;
+ case ISD::SETNE:
+ case ISD::SETONE:
+ return VECC::CC_NE;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ return VECC::CC_L;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ return VECC::CC_G;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ return VECC::CC_LE;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ return VECC::CC_GE;
+ case ISD::SETO:
+ return VECC::CC_NUM;
+ case ISD::SETUO:
+ return VECC::CC_NAN;
+ case ISD::SETUEQ:
+ return VECC::CC_EQNAN;
+ case ISD::SETUNE:
+ return VECC::CC_NENAN;
+ case ISD::SETULT:
+ return VECC::CC_LNAN;
+ case ISD::SETUGT:
+ return VECC::CC_GNAN;
+ case ISD::SETULE:
+ return VECC::CC_LENAN;
+ case ISD::SETUGE:
+ return VECC::CC_GENAN;
+ case ISD::SETTRUE:
+ return VECC::CC_AT;
+ }
+}
+
+/// getImmVal - get immediate representation of integer value
+inline static uint64_t getImmVal(const ConstantSDNode *N) {
+ return N->getSExtValue();
+}
+
+/// getFpImmVal - get immediate representation of floating point value
+inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) {
+ const APInt &Imm = N->getValueAPF().bitcastToAPInt();
+ uint64_t Val = Imm.getZExtValue();
+ if (Imm.getBitWidth() == 32) {
+ // Immediate value of float place places at higher bits on VE.
+ Val <<= 32;
+ }
+ return Val;
+}
+
+/// convMImmVal - Convert a mimm integer immediate value to target immediate.
+inline static uint64_t convMImmVal(uint64_t Val) {
+ if (Val == 0)
+ return 0; // (0)1
+ if (Val & (1UL << 63))
+ return countLeadingOnes(Val); // (m)1
+ return countLeadingZeros(Val) | 0x40; // (m)0
+}
+
//===--------------------------------------------------------------------===//
/// VEDAGToDAGISel - VE specific code to select VE machine
/// instructions for SelectionDAG operations.
@@ -43,15 +142,172 @@ public:
void Select(SDNode *N) override;
+ // Complex Pattern Selectors.
+ bool selectADDRrri(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
+ bool selectADDRrii(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
+ bool selectADDRzri(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
+ bool selectADDRzii(SDValue N, SDValue &Base, SDValue &Index, SDValue &Offset);
+ bool selectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
+
StringRef getPassName() const override {
return "VE DAG->DAG Pattern Instruction Selection";
}
// Include the pieces autogenerated from the target description.
#include "VEGenDAGISel.inc"
+
+private:
+ SDNode *getGlobalBaseReg();
+
+ bool matchADDRrr(SDValue N, SDValue &Base, SDValue &Index);
+ bool matchADDRri(SDValue N, SDValue &Base, SDValue &Offset);
};
} // end anonymous namespace
+bool VEDAGToDAGISel::selectADDRrri(SDValue Addr, SDValue &Base, SDValue &Index,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::FrameIndex)
+ return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false; // direct calls.
+
+ SDValue LHS, RHS;
+ if (matchADDRri(Addr, LHS, RHS)) {
+ if (matchADDRrr(LHS, Base, Index)) {
+ Offset = RHS;
+ return true;
+ }
+ // Return false to try selectADDRrii.
+ return false;
+ }
+ if (matchADDRrr(Addr, LHS, RHS)) {
+ if (matchADDRri(RHS, Index, Offset)) {
+ Base = LHS;
+ return true;
+ }
+ if (matchADDRri(LHS, Base, Offset)) {
+ Index = RHS;
+ return true;
+ }
+ Base = LHS;
+ Index = RHS;
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ return true;
+ }
+ return false; // Let the reg+imm(=0) pattern catch this!
+}
+
+bool VEDAGToDAGISel::selectADDRrii(SDValue Addr, SDValue &Base, SDValue &Index,
+ SDValue &Offset) {
+ if (matchADDRri(Addr, Base, Offset)) {
+ Index = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ return true;
+ }
+
+ Base = Addr;
+ Index = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ return true;
+}
+
+bool VEDAGToDAGISel::selectADDRzri(SDValue Addr, SDValue &Base, SDValue &Index,
+ SDValue &Offset) {
+ // Prefer ADDRrii.
+ return false;
+}
+
+bool VEDAGToDAGISel::selectADDRzii(SDValue Addr, SDValue &Base, SDValue &Index,
+ SDValue &Offset) {
+ if (dyn_cast<FrameIndexSDNode>(Addr)) {
+ return false;
+ }
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false; // direct calls.
+
+ if (ConstantSDNode *CN = cast<ConstantSDNode>(Addr)) {
+ if (isInt<32>(CN->getSExtValue())) {
+ Base = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ Index = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ Offset =
+ CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool VEDAGToDAGISel::selectADDRri(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (matchADDRri(Addr, Base, Offset))
+ return true;
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ return true;
+}
+
+bool VEDAGToDAGISel::matchADDRrr(SDValue Addr, SDValue &Base, SDValue &Index) {
+ if (dyn_cast<FrameIndexSDNode>(Addr))
+ return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ ; // Nothing to do here.
+ } else if (Addr.getOpcode() == ISD::OR) {
+ // We want to look through a transform in InstCombine and DAGCombiner that
+ // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
+ if (!CurDAG->haveNoCommonBitsSet(Addr.getOperand(0), Addr.getOperand(1)))
+ return false;
+ } else {
+ return false;
+ }
+
+ if (Addr.getOperand(0).getOpcode() == VEISD::Lo ||
+ Addr.getOperand(1).getOpcode() == VEISD::Lo)
+ return false; // Let the LEASL patterns catch this!
+
+ Base = Addr.getOperand(0);
+ Index = Addr.getOperand(1);
+ return true;
+}
+
+bool VEDAGToDAGISel::matchADDRri(SDValue Addr, SDValue &Base, SDValue &Offset) {
+ auto AddrTy = Addr->getValueType(0);
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), AddrTy);
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false; // direct calls.
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<32>(CN->getSExtValue())) {
+ if (FrameIndexSDNode *FIN =
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+ // Constant offset from frame ref.
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), AddrTy);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+ Offset =
+ CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
void VEDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
@@ -59,9 +315,22 @@ void VEDAGToDAGISel::Select(SDNode *N) {
return; // Already selected.
}
+ switch (N->getOpcode()) {
+ case VEISD::GLOBAL_BASE_REG:
+ ReplaceNode(N, getGlobalBaseReg());
+ return;
+ }
+
SelectCode(N);
}
+SDNode *VEDAGToDAGISel::getGlobalBaseReg() {
+ Register GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG
+ ->getRegister(GlobalBaseReg, TLI->getPointerTy(CurDAG->getDataLayout()))
+ .getNode();
+}
+
/// createVEISelDag - This pass converts a legalized DAG into a
/// VE-specific DAG, ready for instruction scheduling.
///
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
index aa6c3c08bd75..ab720545dd83 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "VEISelLowering.h"
+#include "MCTargetDesc/VEMCExpr.h"
+#include "VEMachineFunctionInfo.h"
#include "VERegisterInfo.h"
#include "VETargetMachine.h"
#include "llvm/ADT/StringSwitch.h"
@@ -36,14 +38,37 @@ using namespace llvm;
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
+static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ switch (LocVT.SimpleTy) {
+ case MVT::f32: {
+ // Allocate stack like below
+ // 0 4
+ // +------+------+
+ // | empty| float|
+ // +------+------+
+ // Use align=8 for dummy area to align the beginning of these 2 area.
+ State.AllocateStack(4, Align(8)); // for empty area
+ // Use align=4 for value to place it at just after the dummy area.
+ unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
#include "VEGenCallingConv.inc"
bool VETargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
- assert(!IsVarArg && "TODO implement var args");
- assert(Outs.empty() && "TODO implement return values");
- return true; // TODO support more than 'ret void'
+ CCAssignFn *RetCC = RetCC_VE;
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC);
}
SDValue
@@ -52,12 +77,57 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
- assert(!IsVarArg && "TODO implement var args");
- assert(Outs.empty() && "TODO implement return values");
- assert(OutVals.empty() && "TODO implement return values");
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+ // Analyze return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_VE);
+
+ SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ SDValue OutVal = OutVals[i];
+
+ // Integer return values must be sign or zero extended by the callee.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
+ break;
+ case CCValAssign::ZExt:
+ OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
+ break;
+ case CCValAssign::AExt:
+ OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
+ break;
+ default:
+ llvm_unreachable("Unknown loc info!");
+ }
+
+ assert(!VA.needsCustom() && "Unexpected custom lowering");
+
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
}
@@ -65,8 +135,89 @@ SDValue VETargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- assert(!IsVarArg && "TODO implement var args");
- assert(Ins.empty() && "TODO implement input arguments");
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Get the base offset of the incoming arguments stack space.
+ unsigned ArgsBaseOffset = 176;
+ // Get the size of the preserved arguments area
+ unsigned ArgsPreserved = 64;
+
+ // Analyze arguments according to CC_VE.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
+ // Allocate the preserved area first.
+ CCInfo.AllocateStack(ArgsPreserved, Align(8));
+ // We already allocated the preserved area, so the stack offset computed
+ // by CC_VE would be correct now.
+ CCInfo.AnalyzeFormalArguments(Ins, CC_VE);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ // This argument is passed in a register.
+ // All integer register arguments are promoted by the caller to i64.
+
+ // Create a virtual register for the promoted live-in value.
+ unsigned VReg =
+ MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
+ SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
+
+ // Get the high bits for i32 struct elements.
+ if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+ Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
+ DAG.getConstant(32, DL, MVT::i32));
+
+ // The caller promoted the argument, so insert an Assert?ext SDNode so we
+ // won't promote the value again in this function.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ default:
+ break;
+ }
+
+ // Truncate the register down to the argument type.
+ if (VA.isExtInLoc())
+ Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
+
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ // The registers are exhausted. This argument was passed on the stack.
+ assert(VA.isMemLoc());
+ // The CC_VE_Full/Half functions compute stack offsets relative to the
+ // beginning of the arguments area at %fp+176.
+ unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
+ unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+ int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
+ InVals.push_back(
+ DAG.getLoad(VA.getValVT(), DL, Chain,
+ DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
+ MachinePointerInfo::getFixedStack(MF, FI)));
+ }
+
+ if (!IsVarArg)
+ return Chain;
+
+ // This function takes variable arguments, some of which may have been passed
+ // in registers %s0-%s8.
+ //
+ // The va_start intrinsic needs to know the offset to the first variable
+ // argument.
+ // TODO: need to calculate offset correctly once we support f128.
+ unsigned ArgOffset = ArgLocs.size() * 8;
+ VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ // Skip the 176 bytes of register save area.
+ FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
+
return Chain;
}
@@ -78,7 +229,7 @@ Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
.Case("sp", VE::SX11) // Stack pointer
.Case("fp", VE::SX9) // Frame pointer
.Case("sl", VE::SX8) // Stack limit
- .Case("lr", VE::SX10) // Link regsiter
+ .Case("lr", VE::SX10) // Link register
.Case("tp", VE::SX14) // Thread pointer
.Case("outer", VE::SX12) // Outer regiser
.Case("info", VE::SX17) // Info area register
@@ -96,6 +247,314 @@ Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
+SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc DL = CLI.DL;
+ SDValue Chain = CLI.Chain;
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // VE target does not yet support tail call optimization.
+ CLI.IsTailCall = false;
+
+ // Get the base offset of the outgoing arguments stack space.
+ unsigned ArgsBaseOffset = 176;
+ // Get the size of the preserved arguments area
+ unsigned ArgsPreserved = 8 * 8u;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
+ // Allocate the preserved area first.
+ CCInfo.AllocateStack(ArgsPreserved, Align(8));
+ // We already allocated the preserved area, so the stack offset computed
+ // by CC_VE would be correct now.
+ CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE);
+
+ // VE requires to use both register and stack for varargs or no-prototyped
+ // functions.
+ bool UseBoth = CLI.IsVarArg;
+
+ // Analyze operands again if it is required to store BOTH.
+ SmallVector<CCValAssign, 16> ArgLocs2;
+ CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+ ArgLocs2, *DAG.getContext());
+ if (UseBoth)
+ CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2);
+
+ // Get the size of the outgoing arguments stack space requirement.
+ unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+ // Keep stack frames 16-byte aligned.
+ ArgsSize = alignTo(ArgsSize, 16);
+
+ // Adjust the stack pointer to make room for the arguments.
+ // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
+ // with more than 6 arguments.
+ Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
+
+ // Collect the set of registers to pass to the function and their values.
+ // This will be emitted as a sequence of CopyToReg nodes glued to the call
+ // instruction.
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+ // Collect chains from all the memory opeations that copy arguments to the
+ // stack. They must follow the stack pointer adjustment above and precede the
+ // call instruction itself.
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // VE needs to get address of callee function in a register
+ // So, prepare to copy it to SX12 here.
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ SDValue Callee = CLI.Callee;
+
+ bool IsPICCall = isPositionIndependent();
+
+ // PC-relative references to external symbols should go through $stub.
+ // If so, we need to prepare GlobalBaseReg first.
+ const TargetMachine &TM = DAG.getTarget();
+ const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+ const GlobalValue *GV = nullptr;
+ auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (CalleeG)
+ GV = CalleeG->getGlobal();
+ bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
+ bool UsePlt = !Local;
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Turn GlobalAddress/ExternalSymbol node into a value node
+ // containing the address of them here.
+ if (CalleeG) {
+ if (IsPICCall) {
+ if (UsePlt)
+ Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
+ Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
+ } else {
+ Callee =
+ makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+ }
+ } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ if (IsPICCall) {
+ if (UsePlt)
+ Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
+ Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
+ } else {
+ Callee =
+ makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+ }
+ }
+
+ RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = CLI.OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown location info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (!UseBoth)
+ continue;
+ VA = ArgLocs2[i];
+ }
+
+ assert(VA.isMemLoc());
+
+ // Create a store off the stack pointer for this argument.
+ SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
+ // The argument area starts at %fp+176 in the callee frame,
+ // %sp+176 in ours.
+ SDValue PtrOff =
+ DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
+ PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
+ }
+
+ // Emit all stores, make sure they occur before the call.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
+
+ // Build a sequence of CopyToReg nodes glued together with token chain and
+ // glue operands which copy the outgoing args into registers. The InGlue is
+ // necessary since all emitted instructions must be stuck together in order
+ // to pass the live physical registers.
+ SDValue InGlue;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
+ RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
+ }
+
+ // Build the operands for the call instruction itself.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ // Make sure the CopyToReg nodes are glued to the call instruction which
+ // consumes the registers.
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
+
+ // Now the call itself.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
+ InGlue = Chain.getValue(1);
+
+ // Revert the stack pointer immediately after the call.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
+ DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
+ InGlue = Chain.getValue(1);
+
+ // Now extract the return values. This is more or less the same as
+ // LowerFormalArguments.
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+
+ // Set inreg flag manually for codegen generated library calls that
+ // return float.
+ if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
+ CLI.Ins[0].Flags.setInReg();
+
+ RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ unsigned Reg = VA.getLocReg();
+
+ // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
+ // reside in the same register in the high and low bits. Reuse the
+ // CopyFromReg previous node to avoid duplicate copies.
+ SDValue RV;
+ if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
+ if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
+ RV = Chain.getValue(0);
+
+ // But usually we'll create a new CopyFromReg for a different register.
+ if (!RV.getNode()) {
+ RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
+ Chain = RV.getValue(1);
+ InGlue = Chain.getValue(2);
+ }
+
+ // Get the high bits for i32 struct elements.
+ if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+ RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
+ DAG.getConstant(32, DL, MVT::i32));
+
+ // The callee promoted the return value, so insert an Assert?ext SDNode so
+ // we won't promote the value again in this function.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::SExt:
+ RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ case CCValAssign::ZExt:
+ RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ default:
+ break;
+ }
+
+ // Truncate the register down to the return value type.
+ if (VA.isExtInLoc())
+ RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
+
+ InVals.push_back(RV);
+ }
+
+ return Chain;
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const {
+ return VT == MVT::f32 || VT == MVT::f64;
+}
+
+/// Determine if the target supports unaligned memory accesses.
+///
+/// This function returns true if the target allows unaligned memory accesses
+/// of the specified type in the given address space. If true, it also returns
+/// whether the unaligned memory access is "fast" in the last argument by
+/// reference. This is used, for example, in situations where an array
+/// copy/move/set is converted to a sequence of store operations. Its use
+/// helps to ensure that such replacements don't generate code that causes an
+/// alignment error (trap) on the target machine.
+bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ unsigned Align,
+ MachineMemOperand::Flags,
+ bool *Fast) const {
+ if (Fast) {
+ // It's fast anytime on VE
+ *Fast = true;
+ }
+ return true;
+}
+
+bool VETargetLowering::hasAndNot(SDValue Y) const {
+ EVT VT = Y.getValueType();
+
+ // VE doesn't have vector and not instruction.
+ if (VT.isVector())
+ return false;
+
+ // VE allows different immediate values for X and Y where ~X & Y.
+ // Only simm7 works for X, and only mimm works for Y on VE. However, this
+ // function is used to check whether an immediate value is OK for and-not
+ // instruction as both X and Y. Generating additional instruction to
+ // retrieve an immediate value is no good since the purpose of this
+ // function is to convert a series of 3 instructions to another series of
+ // 3 instructions with better parallelism. Therefore, we return false
+ // for all immediate values now.
+ // FIXME: Change hasAndNot function to have two operands to make it work
+ // correctly with Aurora VE.
+ if (isa<ConstantSDNode>(Y))
+ return false;
+
+ // It's ok for generic registers.
+ return true;
+}
+
VETargetLowering::VETargetLowering(const TargetMachine &TM,
const VESubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -108,7 +567,87 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
setBooleanVectorContents(ZeroOrOneBooleanContent);
// Set up the register classes.
+ addRegisterClass(MVT::i32, &VE::I32RegClass);
addRegisterClass(MVT::i64, &VE::I64RegClass);
+ addRegisterClass(MVT::f32, &VE::F32RegClass);
+ addRegisterClass(MVT::f64, &VE::I64RegClass);
+
+ /// Load & Store {
+ for (MVT FPVT : MVT::fp_valuetypes()) {
+ for (MVT OtherFPVT : MVT::fp_valuetypes()) {
+ // Turn FP extload into load/fpextend
+ setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
+
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(FPVT, OtherFPVT, Expand);
+ }
+ }
+
+ // VE doesn't have i1 sign extending load
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+ setTruncStoreAction(VT, MVT::i1, Expand);
+ }
+ /// } Load & Store
+
+ // Custom legalize address nodes into LO/HI parts.
+ MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
+ setOperationAction(ISD::BlockAddress, PtrVT, Custom);
+ setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+
+ /// VAARG handling {
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ // VAARG needs to be lowered to access with 8 bytes alignment.
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ /// } VAARG handling
+
+ /// Stack {
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+ /// } Stack
+
+ /// Int Ops {
+ for (MVT IntVT : {MVT::i32, MVT::i64}) {
+ // VE has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, IntVT, Expand);
+ setOperationAction(ISD::SREM, IntVT, Expand);
+ setOperationAction(ISD::SDIVREM, IntVT, Expand);
+ setOperationAction(ISD::UDIVREM, IntVT, Expand);
+
+ setOperationAction(ISD::CTTZ, IntVT, Expand);
+ setOperationAction(ISD::ROTL, IntVT, Expand);
+ setOperationAction(ISD::ROTR, IntVT, Expand);
+
+ // Use isel patterns for i32 and i64
+ setOperationAction(ISD::BSWAP, IntVT, Legal);
+ setOperationAction(ISD::CTLZ, IntVT, Legal);
+ setOperationAction(ISD::CTPOP, IntVT, Legal);
+
+ // Use isel patterns for i64, Promote i32
+ LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
+ setOperationAction(ISD::BITREVERSE, IntVT, Act);
+ }
+ /// } Int Ops
+
+ /// Conversion {
+ // VE doesn't have instructions for fp<->uint, so expand them by llvm
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+
+ // fp16 not supported
+ for (MVT FPVT : MVT::fp_valuetypes()) {
+ setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
+ setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
+ }
+ /// } Conversion
setStackPointerRegisterToSaveRestore(VE::SX11);
@@ -122,16 +661,316 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
}
const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
+#define TARGET_NODE_CASE(NAME) \
+ case VEISD::NAME: \
+ return "VEISD::" #NAME;
switch ((VEISD::NodeType)Opcode) {
case VEISD::FIRST_NUMBER:
break;
- case VEISD::RET_FLAG:
- return "VEISD::RET_FLAG";
+ TARGET_NODE_CASE(Lo)
+ TARGET_NODE_CASE(Hi)
+ TARGET_NODE_CASE(GETFUNPLT)
+ TARGET_NODE_CASE(GETSTACKTOP)
+ TARGET_NODE_CASE(GETTLSADDR)
+ TARGET_NODE_CASE(CALL)
+ TARGET_NODE_CASE(RET_FLAG)
+ TARGET_NODE_CASE(GLOBAL_BASE_REG)
}
+#undef TARGET_NODE_CASE
return nullptr;
}
EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
EVT VT) const {
- return MVT::i64;
+ return MVT::i32;
+}
+
+// Convert to a target node and set target flags.
+SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
+ SelectionDAG &DAG) const {
+ if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
+ return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
+ GA->getValueType(0), GA->getOffset(), TF);
+
+ if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
+ return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
+ 0, TF);
+
+ if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
+ return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
+ TF);
+
+ llvm_unreachable("Unhandled address SDNode");
+}
+
+// Split Op into high and low parts according to HiTF and LoTF.
+// Return an ADD node combining the parts.
+SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
+ SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
+ return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+}
+
+// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
+// or ExternalSymbol SDNode.
+SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT PtrVT = Op.getValueType();
+
+ // Handle PIC mode first. VE needs a got load for every variable!
+ if (isPositionIndependent()) {
+ // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
+ // function has calls.
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI.setHasCalls(true);
+ auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
+
+ if (isa<ConstantPoolSDNode>(Op) ||
+ (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
+ // Create following instructions for local linkage PIC code.
+ // lea %s35, %gotoff_lo(.LCPI0_0)
+ // and %s35, %s35, (32)0
+ // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35)
+ // adds.l %s35, %s15, %s35 ; %s15 is GOT
+ // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
+ SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
+ VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
+ SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
+ return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
+ }
+ // Create following instructions for not local linkage PIC code.
+ // lea %s35, %got_lo(.LCPI0_0)
+ // and %s35, %s35, (32)0
+ // lea.sl %s35, %got_hi(.LCPI0_0)(%s35)
+ // adds.l %s35, %s15, %s35 ; %s15 is GOT
+ // ld %s35, (,%s35)
+ // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
+ SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
+ VEMCExpr::VK_VE_GOT_LO32, DAG);
+ SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
+ SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
+ return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
+ }
+
+ // This is one of the absolute code models.
+ switch (getTargetMachine().getCodeModel()) {
+ default:
+ llvm_unreachable("Unsupported absolute code model");
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Large:
+ // abs64.
+ return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+ }
+}
+
+/// Custom Lower {
+
+SDValue VETargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ return makeAddress(Op, DAG);
+}
+
+SDValue VETargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ return makeAddress(Op, DAG);
+}
+
+SDValue
+VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+
+ // Generate the following code:
+ // t1: ch,glue = callseq_start t0, 0, 0
+ // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
+ // t3: ch,glue = callseq_end t2, 0, 0, t2:2
+ // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
+ SDValue Label = withTargetFlags(Op, 0, DAG);
+ EVT PtrVT = Op.getValueType();
+
+ // Lowering the machine isd will make sure everything is in the right
+ // location.
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
+ DAG.getMachineFunction(), CallingConv::C);
+ Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl);
+ SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
+ Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true),
+ DAG.getIntPtrConstant(0, dl, true),
+ Chain.getValue(1), dl);
+ Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1));
+
+ // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI.setHasCalls(true);
+
+ // Also generate code to prepare a GOT register if it is PIC.
+ if (isPositionIndependent()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
+ }
+
+ return Chain;
+}
+
+SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The current implementation of nld (2.26) doesn't allow local exec model
+ // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
+ // generate the general dynamic model code sequence.
+ //
+ // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
+ return LowerToTLSGeneralDynamicModel(Op, DAG);
+}
+
+SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // Need frame address to find the address of VarArgsFrameIndex.
+ MF.getFrameInfo().setFrameAddressIsTaken(true);
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDLoc DL(Op);
+ SDValue Offset =
+ DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
+ DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
+ MachinePointerInfo(SV));
+}
+
+SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ EVT PtrVT = VAListPtr.getValueType();
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ SDLoc DL(Node);
+ SDValue VAList =
+ DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
+ SDValue Chain = VAList.getValue(1);
+ SDValue NextPtr;
+
+ if (VT == MVT::f32) {
+ // float --> need special handling like below.
+ // 0 4
+ // +------+------+
+ // | empty| float|
+ // +------+------+
+ // Increment the pointer, VAList, by 8 to the next vaarg.
+ NextPtr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
+ // Then, adjust VAList.
+ unsigned InternalOffset = 4;
+ VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(InternalOffset, DL, PtrVT));
+ } else {
+ // Increment the pointer, VAList, by 8 to the next vaarg.
+ NextPtr =
+ DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
+ }
+
+ // Store the incremented VAList to the legalized pointer.
+ InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
+
+ // Load the actual argument out of the pointer VAList.
+ // We can't count on greater alignment than the word size.
+ return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
+ std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
+}
+
+SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Generate following code.
+ // (void)__llvm_grow_stack(size);
+ // ret = GETSTACKTOP; // pseudo instruction
+ SDLoc DL(Op);
+
+ // Get the inputs.
+ SDNode *Node = Op.getNode();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ MaybeAlign Alignment(Op.getConstantOperandVal(2));
+ EVT VT = Node->getValueType(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
+
+ const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+ Align StackAlign = TFI.getStackAlign();
+ bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
+
+ // Prepare arguments
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Size;
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Args.push_back(Entry);
+ if (NeedsAlign) {
+ Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Args.push_back(Entry);
+ }
+ Type *RetTy = Type::getVoidTy(*DAG.getContext());
+
+ EVT PtrVT = Op.getValueType();
+ SDValue Callee;
+ if (NeedsAlign) {
+ Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
+ } else {
+ Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
+ }
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL)
+ .setChain(Chain)
+ .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
+ .setDiscardResult(true);
+ std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
+ Chain = pair.second;
+ SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
+ if (NeedsAlign) {
+ Result = DAG.getNode(ISD::ADD, DL, VT, Result,
+ DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
+ Result = DAG.getNode(ISD::AND, DL, VT, Result,
+ DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
+ }
+ // Chain = Result.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
+ DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
+
+ SDValue Ops[2] = {Result, Chain};
+ return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Should not custom lower this!");
+ case ISD::BlockAddress:
+ return LowerBlockAddress(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ return lowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG);
+ case ISD::VAARG:
+ return LowerVAARG(Op, DAG);
+ }
}
+/// } Custom Lower
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
index 39b3610a0c3a..4633220efaa1 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.h
@@ -23,7 +23,18 @@ class VESubtarget;
namespace VEISD {
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- RET_FLAG, // Return with a flag operand.
+
+ Hi,
+ Lo, // Hi/Lo operations, typically on a global address.
+
+ GETFUNPLT, // load function address through %plt insturction
+ GETTLSADDR, // load address for TLS access
+ GETSTACKTOP, // retrieve address of stack top (first address of
+ // locals and temporaries)
+
+ CALL, // A call instruction.
+ RET_FLAG, // Return with a flag operand.
+ GLOBAL_BASE_REG, // Global base reg for PIC.
};
}
@@ -34,6 +45,9 @@ public:
VETargetLowering(const TargetMachine &TM, const VESubtarget &STI);
const char *getTargetNodeName(unsigned Opcode) const override;
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i32;
+ }
Register getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const override;
@@ -48,6 +62,9 @@ public:
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
@@ -56,6 +73,36 @@ public:
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
SelectionDAG &DAG) const override;
+
+ /// Custom Lower {
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ /// } Custom Lower
+
+ SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
+ SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
+ SelectionDAG &DAG) const;
+ SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
+
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
+ /// Returns true if the target allows unaligned memory accesses of the
+ /// specified type.
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
+ MachineMemOperand::Flags Flags,
+ bool *Fast) const override;
+
+ // Block s/udiv lowering for now
+ bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; }
+
+ bool hasAndNot(SDValue Y) const override;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
index a8d3e786ba89..0c02411ff916 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrFormats.td
@@ -6,6 +6,20 @@
//
//===----------------------------------------------------------------------===//
+// SX-Aurora uses little endian, but instructions are encoded little bit
+// different manner. Therefore, we need to tranlate the address of each
+// bitfield described in ISA documentation like below.
+//
+// ISA | InstrFormats.td
+// ---------------------------
+// 0-7 => 63-56
+// 8 => 55
+// 32-63 => 31-0
+
+//===----------------------------------------------------------------------===//
+// Instruction Format
+//===----------------------------------------------------------------------===//
+
class InstVE<dag outs, dag ins, string asmstr, list<dag> pattern>
: Instruction {
field bits<64> Inst;
@@ -14,7 +28,7 @@ class InstVE<dag outs, dag ins, string asmstr, list<dag> pattern>
let Size = 8;
bits<8> op;
- let Inst{0-7} = op;
+ let Inst{63-56} = op;
dag OutOperandList = outs;
dag InOperandList = ins;
@@ -25,50 +39,154 @@ class InstVE<dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<64> SoftFail = 0;
}
-class RM<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern=[]>
+//-----------------------------------------------------------------------------
+// Section 5.1 RM Type
+//
+// RM type has sx, sy, sz, and imm32.
+// The effective address is generated by sz + sy + imm32.
+//-----------------------------------------------------------------------------
+
+class RM<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []>
: InstVE<outs, ins, asmstr, pattern> {
bits<1> cx = 0;
bits<7> sx;
- bits<1> cy = 0;
+ bits<1> cy = 1;
+ bits<7> sz; // defines sz prior to sy to assign from sz
+ bits<7> sy;
+ bits<1> cz = 1;
+ bits<32> imm32;
+ let op = opVal;
+ let Inst{55} = cx;
+ let Inst{54-48} = sx;
+ let Inst{47} = cy;
+ let Inst{46-40} = sy;
+ let Inst{39} = cz;
+ let Inst{38-32} = sz;
+ let Inst{31-0} = imm32;
+}
+
+//-----------------------------------------------------------------------------
+// Section 5.2 RRM Type
+//
+// RRM type is identical to RM, but the effective address is generated
+// by sz + imm32. The sy field is used by other purposes.
+//-----------------------------------------------------------------------------
+
+class RRM<bits<8>opVal, dag outs, dag ins, string asmstr,
+ list<dag> pattern = []>
+ : RM<opVal, outs, ins, asmstr, pattern>;
+
+// RRMHM type is to load/store host memory
+// It is similar to RRM and not use sy.
+class RRMHM<bits<8>opVal, dag outs, dag ins, string asmstr,
+ list<dag> pattern = []>
+ : RRM<opVal, outs, ins, asmstr, pattern> {
+ bits<2> ry = 0;
+ let cy = 0;
+ let sy{6-2} = 0;
+ let sy{1-0} = ry;
+}
+
+//-----------------------------------------------------------------------------
+// Section 5.3 CF Type
+//
+// CF type is used for control flow.
+//-----------------------------------------------------------------------------
+
+class CF<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []>
+ : InstVE<outs, ins, asmstr, pattern> {
+ bits<1> cx = 0;
+ bits<1> cx2 = 0;
+ bits<2> bpf = 0;
+ bits<4> cf;
+ bits<1> cy = 1;
bits<7> sy;
- bits<1> cz = 0;
+ bits<1> cz = 1;
bits<7> sz;
- bits<32> imm32 = 0;
+ bits<32> imm32;
let op = opVal;
- let Inst{15} = cx;
- let Inst{14-8} = sx;
- let Inst{23} = cy;
- let Inst{22-16} = sy;
- let Inst{31} = cz;
- let Inst{30-24} = sz;
- let Inst{63-32} = imm32;
+ let Inst{55} = cx;
+ let Inst{54} = cx2;
+ let Inst{53-52} = bpf;
+ let Inst{51-48} = cf;
+ let Inst{47} = cy;
+ let Inst{46-40} = sy;
+ let Inst{39} = cz;
+ let Inst{38-32} = sz;
+ let Inst{31-0} = imm32;
}
-class RR<bits<8>opVal, dag outs, dag ins, string asmstr>
- : RM<opVal, outs, ins, asmstr> {
+//-----------------------------------------------------------------------------
+// Section 5.4 RR Type
+//
+// RR type is for generic arithmetic instructions.
+//-----------------------------------------------------------------------------
+
+class RR<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []>
+ : InstVE<outs, ins, asmstr, pattern> {
+ bits<1> cx = 0;
+ bits<7> sx;
+ bits<1> cy = 1;
+ bits<7> sy;
+ bits<1> cz = 1;
+ bits<7> sz; // m field places at the top sz field
+ bits<8> vx = 0;
+ bits<8> vz = 0;
bits<1> cw = 0;
bits<1> cw2 = 0;
bits<4> cfw = 0;
- let imm32{0-23} = 0;
- let imm32{24} = cw;
- let imm32{25} = cw2;
- let imm32{26-27} = 0;
- let imm32{28-31} = cfw;
+ let op = opVal;
+ let Inst{55} = cx;
+ let Inst{54-48} = sx;
+ let Inst{47} = cy;
+ let Inst{46-40} = sy;
+ let Inst{39} = cz;
+ let Inst{38-32} = sz;
+ let Inst{31-24} = vx;
+ let Inst{23-16} = 0;
+ let Inst{15-8} = vz;
+ let Inst{7} = cw;
+ let Inst{6} = cw2;
+ let Inst{5-4} = 0;
+ let Inst{3-0} = cfw;
}
-class CF<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern=[]>
- : RM<opVal, outs, ins, asmstr, pattern> {
- bits<1> cx2;
- bits<2> bpf;
- bits<4> cf;
- let cx = 0;
- let sx{6} = cx2;
- let sx{5-4} = bpf;
- let sx{3-0} = cf;
+// RRFENCE type is special RR type for a FENCE instruction.
+class RRFENCE<bits<8>opVal, dag outs, dag ins, string asmstr,
+ list<dag> pattern = []>
+ : InstVE<outs, ins, asmstr, pattern> {
+ bits<1> avo = 0;
+ bits<1> lf = 0;
+ bits<1> sf = 0;
+ bits<1> c2 = 0;
+ bits<1> c1 = 0;
+ bits<1> c0 = 0;
+ let op = opVal;
+ let Inst{55} = avo;
+ let Inst{54-50} = 0;
+ let Inst{49} = lf;
+ let Inst{48} = sf;
+ let Inst{47-43} = 0;
+ let Inst{42} = c2;
+ let Inst{41} = c1;
+ let Inst{40} = c0;
+ let Inst{39-0} = 0;
}
+//-----------------------------------------------------------------------------
+// Section 5.5 RW Type
+//-----------------------------------------------------------------------------
+
+//-----------------------------------------------------------------------------
+// Section 5.6 RVM Type
+//-----------------------------------------------------------------------------
+
+//-----------------------------------------------------------------------------
+// Section 5.7 RV Type
+//-----------------------------------------------------------------------------
+
// Pseudo instructions.
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern=[]>
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = []>
: InstVE<outs, ins, asmstr, pattern> {
let isCodeGenOnly = 1;
let isPseudo = 1;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
index bc382dcef7c3..86b2ac2078b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -12,6 +12,7 @@
#include "VEInstrInfo.h"
#include "VE.h"
+#include "VEMachineFunctionInfo.h"
#include "VESubtarget.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -24,7 +25,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define DEBUG_TYPE "ve"
+#define DEBUG_TYPE "ve-instr-info"
using namespace llvm;
@@ -35,8 +36,441 @@ using namespace llvm;
void VEInstrInfo::anchor() {}
VEInstrInfo::VEInstrInfo(VESubtarget &ST)
- : VEGenInstrInfo(VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI(),
- Subtarget(ST) {}
+ : VEGenInstrInfo(VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI() {}
+
+static bool IsIntegerCC(unsigned CC) { return (CC < VECC::CC_AF); }
+
+static VECC::CondCode GetOppositeBranchCondition(VECC::CondCode CC) {
+ switch (CC) {
+ case VECC::CC_IG:
+ return VECC::CC_ILE;
+ case VECC::CC_IL:
+ return VECC::CC_IGE;
+ case VECC::CC_INE:
+ return VECC::CC_IEQ;
+ case VECC::CC_IEQ:
+ return VECC::CC_INE;
+ case VECC::CC_IGE:
+ return VECC::CC_IL;
+ case VECC::CC_ILE:
+ return VECC::CC_IG;
+ case VECC::CC_AF:
+ return VECC::CC_AT;
+ case VECC::CC_G:
+ return VECC::CC_LENAN;
+ case VECC::CC_L:
+ return VECC::CC_GENAN;
+ case VECC::CC_NE:
+ return VECC::CC_EQNAN;
+ case VECC::CC_EQ:
+ return VECC::CC_NENAN;
+ case VECC::CC_GE:
+ return VECC::CC_LNAN;
+ case VECC::CC_LE:
+ return VECC::CC_GNAN;
+ case VECC::CC_NUM:
+ return VECC::CC_NAN;
+ case VECC::CC_NAN:
+ return VECC::CC_NUM;
+ case VECC::CC_GNAN:
+ return VECC::CC_LE;
+ case VECC::CC_LNAN:
+ return VECC::CC_GE;
+ case VECC::CC_NENAN:
+ return VECC::CC_EQ;
+ case VECC::CC_EQNAN:
+ return VECC::CC_NE;
+ case VECC::CC_GENAN:
+ return VECC::CC_L;
+ case VECC::CC_LENAN:
+ return VECC::CC_G;
+ case VECC::CC_AT:
+ return VECC::CC_AF;
+ case VECC::UNKNOWN:
+ return VECC::UNKNOWN;
+ }
+ llvm_unreachable("Invalid cond code");
+}
+
+// Treat br.l [BRCF AT] as unconditional branch
+static bool isUncondBranchOpcode(int Opc) {
+ return Opc == VE::BRCFLa || Opc == VE::BRCFWa ||
+ Opc == VE::BRCFLa_nt || Opc == VE::BRCFWa_nt ||
+ Opc == VE::BRCFLa_t || Opc == VE::BRCFWa_t ||
+ Opc == VE::BRCFDa || Opc == VE::BRCFSa ||
+ Opc == VE::BRCFDa_nt || Opc == VE::BRCFSa_nt ||
+ Opc == VE::BRCFDa_t || Opc == VE::BRCFSa_t;
+}
+
+static bool isCondBranchOpcode(int Opc) {
+ return Opc == VE::BRCFLrr || Opc == VE::BRCFLir ||
+ Opc == VE::BRCFLrr_nt || Opc == VE::BRCFLir_nt ||
+ Opc == VE::BRCFLrr_t || Opc == VE::BRCFLir_t ||
+ Opc == VE::BRCFWrr || Opc == VE::BRCFWir ||
+ Opc == VE::BRCFWrr_nt || Opc == VE::BRCFWir_nt ||
+ Opc == VE::BRCFWrr_t || Opc == VE::BRCFWir_t ||
+ Opc == VE::BRCFDrr || Opc == VE::BRCFDir ||
+ Opc == VE::BRCFDrr_nt || Opc == VE::BRCFDir_nt ||
+ Opc == VE::BRCFDrr_t || Opc == VE::BRCFDir_t ||
+ Opc == VE::BRCFSrr || Opc == VE::BRCFSir ||
+ Opc == VE::BRCFSrr_nt || Opc == VE::BRCFSir_nt ||
+ Opc == VE::BRCFSrr_t || Opc == VE::BRCFSir_t;
+}
+
+static bool isIndirectBranchOpcode(int Opc) {
+ return Opc == VE::BCFLari || Opc == VE::BCFLari ||
+ Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt ||
+ Opc == VE::BCFLari_t || Opc == VE::BCFLari_t ||
+ Opc == VE::BCFLari || Opc == VE::BCFLari ||
+ Opc == VE::BCFLari_nt || Opc == VE::BCFLari_nt ||
+ Opc == VE::BCFLari_t || Opc == VE::BCFLari_t;
+}
+
+static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
+ SmallVectorImpl<MachineOperand> &Cond) {
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOperand(0).getImm()));
+ Cond.push_back(LastInst->getOperand(1));
+ Cond.push_back(LastInst->getOperand(2));
+ Target = LastInst->getOperand(3).getMBB();
+}
+
+bool VEInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return false;
+
+ if (!isUnpredicatedTerminator(*I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = &*I;
+ unsigned LastOpc = LastInst->getOpcode();
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
+ if (isUncondBranchOpcode(LastOpc)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ parseCondBranch(LastInst, TBB, Cond);
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = &*I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+ while (isUncondBranchOpcode(SecondLastOpc)) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ SecondLastInst = &*I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
+ return true;
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ parseCondBranch(SecondLastInst, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed.
+ if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // ...likewise if it ends with an indirect branch followed by an unconditional
+ // branch.
+ if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond,
+ const DebugLoc &DL, int *BytesAdded) const {
+ assert(TBB && "insertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 3 || Cond.size() == 0) &&
+ "VE branch conditions should have three component!");
+ assert(!BytesAdded && "code size not handled");
+ if (Cond.empty()) {
+ // Uncondition branch
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(VE::BRCFLa_t))
+ .addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch
+ // (BRCFir CC sy sz addr)
+ assert(Cond[0].isImm() && Cond[2].isReg() && "not implemented");
+
+ unsigned opc[2];
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineFunction *MF = MBB.getParent();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned Reg = Cond[2].getReg();
+ if (IsIntegerCC(Cond[0].getImm())) {
+ if (TRI->getRegSizeInBits(Reg, MRI) == 32) {
+ opc[0] = VE::BRCFWir;
+ opc[1] = VE::BRCFWrr;
+ } else {
+ opc[0] = VE::BRCFLir;
+ opc[1] = VE::BRCFLrr;
+ }
+ } else {
+ if (TRI->getRegSizeInBits(Reg, MRI) == 32) {
+ opc[0] = VE::BRCFSir;
+ opc[1] = VE::BRCFSrr;
+ } else {
+ opc[0] = VE::BRCFDir;
+ opc[1] = VE::BRCFDrr;
+ }
+ }
+ if (Cond[1].isImm()) {
+ BuildMI(&MBB, DL, get(opc[0]))
+ .add(Cond[0]) // condition code
+ .add(Cond[1]) // lhs
+ .add(Cond[2]) // rhs
+ .addMBB(TBB);
+ } else {
+ BuildMI(&MBB, DL, get(opc[1]))
+ .add(Cond[0])
+ .add(Cond[1])
+ .add(Cond[2])
+ .addMBB(TBB);
+ }
+
+ if (!FBB)
+ return 1;
+
+ BuildMI(&MBB, DL, get(VE::BRCFLa_t))
+ .addMBB(FBB);
+ return 2;
+}
+
+unsigned VEInstrInfo::removeBranch(MachineBasicBlock &MBB,
+ int *BytesRemoved) const {
+ assert(!BytesRemoved && "code size not handled");
+
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+ while (I != MBB.begin()) {
+ --I;
+
+ if (I->isDebugValue())
+ continue;
+
+ if (!isUncondBranchOpcode(I->getOpcode()) &&
+ !isCondBranchOpcode(I->getOpcode()))
+ break; // Not a branch
+
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+ return Count;
+}
+
+bool VEInstrInfo::reverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ VECC::CondCode CC = static_cast<VECC::CondCode>(Cond[0].getImm());
+ Cond[0].setImm(GetOppositeBranchCondition(CC));
+ return false;
+}
+
+static bool IsAliasOfSX(Register Reg) {
+ return VE::I8RegClass.contains(Reg) || VE::I16RegClass.contains(Reg) ||
+ VE::I32RegClass.contains(Reg) || VE::I64RegClass.contains(Reg) ||
+ VE::F32RegClass.contains(Reg);
+}
+
+void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ MCRegister DestReg, MCRegister SrcReg,
+ bool KillSrc) const {
+
+ if (IsAliasOfSX(SrcReg) && IsAliasOfSX(DestReg)) {
+ BuildMI(MBB, I, DL, get(VE::ORri), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ } else {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ dbgs() << "Impossible reg-to-reg copy from " << printReg(SrcReg, TRI)
+ << " to " << printReg(DestReg, TRI) << "\n";
+ llvm_unreachable("Impossible reg-to-reg copy");
+ }
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const {
+ if (MI.getOpcode() == VE::LDrii || // I64
+ MI.getOpcode() == VE::LDLSXrii || // I32
+ MI.getOpcode() == VE::LDUrii // F32
+ ) {
+ if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
+ MI.getOperand(2).getImm() == 0 && MI.getOperand(3).isImm() &&
+ MI.getOperand(3).getImm() == 0) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const {
+ if (MI.getOpcode() == VE::STrii || // I64
+ MI.getOpcode() == VE::STLrii || // I32
+ MI.getOpcode() == VE::STUrii // F32
+ ) {
+ if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
+ MI.getOperand(1).getImm() == 0 && MI.getOperand(2).isImm() &&
+ MI.getOperand(2).getImm() == 0) {
+ FrameIndex = MI.getOperand(0).getIndex();
+ return MI.getOperand(3).getReg();
+ }
+ }
+ return 0;
+}
+
+void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ Register SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end())
+ DL = I->getDebugLoc();
+
+ MachineFunction *MF = MBB.getParent();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
+
+ // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
+ if (RC == &VE::I64RegClass) {
+ BuildMI(MBB, I, DL, get(VE::STrii))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO);
+ } else if (RC == &VE::I32RegClass) {
+ BuildMI(MBB, I, DL, get(VE::STLrii))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO);
+ } else if (RC == &VE::F32RegClass) {
+ BuildMI(MBB, I, DL, get(VE::STUrii))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO);
+ } else
+ report_fatal_error("Can't store this register to stack slot");
+}
+
+void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ Register DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end())
+ DL = I->getDebugLoc();
+
+ MachineFunction *MF = MBB.getParent();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
+
+ if (RC == &VE::I64RegClass) {
+ BuildMI(MBB, I, DL, get(VE::LDrii), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addMemOperand(MMO);
+ } else if (RC == &VE::I32RegClass) {
+ BuildMI(MBB, I, DL, get(VE::LDLSXrii), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addMemOperand(MMO);
+ } else if (RC == &VE::F32RegClass) {
+ BuildMI(MBB, I, DL, get(VE::LDUrii), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addMemOperand(MMO);
+ } else
+ report_fatal_error("Can't load this register from stack slot");
+}
+
+Register VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ VEMachineFunctionInfo *VEFI = MF->getInfo<VEMachineFunctionInfo>();
+ Register GlobalBaseReg = VEFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // We use %s15 (%got) as a global base register
+ GlobalBaseReg = VE::SX15;
+
+ // Insert a pseudo instruction to set the GlobalBaseReg into the first
+ // MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc dl;
+ BuildMI(FirstMBB, MBBI, dl, get(VE::GETGOT), GlobalBaseReg);
+ VEFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
switch (MI.getOpcode()) {
@@ -47,6 +481,9 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent(); // The pseudo instruction is gone now.
return true;
}
+ case VE::GETSTACKTOP: {
+ return expandGetStackTopPseudo(MI);
+ }
}
return false;
}
@@ -54,8 +491,8 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const VEInstrInfo &TII =
- *static_cast<const VEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
+ const VEInstrInfo &TII = *STI.getInstrInfo();
DebugLoc dl = MBB.findDebugLoc(MI);
// Create following instructions and multiple basic blocks.
@@ -91,7 +528,7 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(syscallMBB);
BB->addSuccessor(sinkMBB);
- BuildMI(BB, dl, TII.get(VE::BCRLrr))
+ BuildMI(BB, dl, TII.get(VE::BRCFLrr_t))
.addImm(VECC::CC_IGE)
.addReg(VE::SX11) // %sp
.addReg(VE::SX8) // %sl
@@ -102,23 +539,26 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
// Update machine-CFG edges
BB->addSuccessor(sinkMBB);
- BuildMI(BB, dl, TII.get(VE::LDSri), VE::SX61)
+ BuildMI(BB, dl, TII.get(VE::LDrii), VE::SX61)
.addReg(VE::SX14)
+ .addImm(0)
.addImm(0x18);
BuildMI(BB, dl, TII.get(VE::ORri), VE::SX62)
.addReg(VE::SX0)
.addImm(0);
- BuildMI(BB, dl, TII.get(VE::LEAzzi), VE::SX63)
+ BuildMI(BB, dl, TII.get(VE::LEAzii), VE::SX63)
+ .addImm(0)
+ .addImm(0)
.addImm(0x13b);
- BuildMI(BB, dl, TII.get(VE::SHMri))
+ BuildMI(BB, dl, TII.get(VE::SHMLri))
.addReg(VE::SX61)
.addImm(0)
.addReg(VE::SX63);
- BuildMI(BB, dl, TII.get(VE::SHMri))
+ BuildMI(BB, dl, TII.get(VE::SHMLri))
.addReg(VE::SX61)
.addImm(8)
.addReg(VE::SX8);
- BuildMI(BB, dl, TII.get(VE::SHMri))
+ BuildMI(BB, dl, TII.get(VE::SHMLri))
.addReg(VE::SX61)
.addImm(16)
.addReg(VE::SX11);
@@ -131,3 +571,35 @@ bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const {
MI.eraseFromParent(); // The pseudo instruction is gone now.
return true;
}
+
+bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const VESubtarget &STI = MF.getSubtarget<VESubtarget>();
+ const VEInstrInfo &TII = *STI.getInstrInfo();
+ DebugLoc DL = MBB->findDebugLoc(MI);
+
+ // Create following instruction
+ //
+ // dst = %sp + target specific frame + the size of parameter area
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const VEFrameLowering &TFL = *STI.getFrameLowering();
+
+ // The VE ABI requires a reserved 176 bytes area at the top
+ // of stack as described in VESubtarget.cpp. So, we adjust it here.
+ unsigned NumBytes = STI.getAdjustedFrameSize(0);
+
+ // Also adds the size of parameter area.
+ if (MFI.adjustsStack() && TFL.hasReservedCallFrame(MF))
+ NumBytes += MFI.getMaxCallFrameSize();
+
+ BuildMI(*MBB, MI, DL, TII.get(VE::LEArii))
+ .addDef(MI.getOperand(0).getReg())
+ .addReg(VE::SX11)
+ .addImm(0)
+ .addImm(NumBytes);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
index 6a26d0e95275..7b6662df1d60 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.h
@@ -25,7 +25,6 @@ class VESubtarget;
class VEInstrInfo : public VEGenInstrInfo {
const VERegisterInfo RI;
- const VESubtarget &Subtarget;
virtual void anchor();
public:
@@ -37,10 +36,52 @@ public:
///
const VERegisterInfo &getRegisterInfo() const { return RI; }
+ /// Branch Analysis & Modification {
+ bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const override;
+
+ unsigned removeBranch(MachineBasicBlock &MBB,
+ int *BytesRemoved = nullptr) const override;
+
+ unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+ const DebugLoc &DL,
+ int *BytesAdded = nullptr) const override;
+
+ bool
+ reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+ /// } Branch Analysis & Modification
+
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
+ bool KillSrc) const override;
+
+ /// Stack Spill & Reload {
+ unsigned isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const override;
+ unsigned isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex) const override;
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register SrcReg,
+ bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register DestReg,
+ int FrameIndex, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+ /// } Stack Spill & Reload
+
+ Register getGlobalBaseReg(MachineFunction *MF) const;
+
// Lower pseudo instructions after register allocation.
bool expandPostRAPseudo(MachineInstr &MI) const override;
bool expandExtendStackPseudo(MachineInstr &MI) const;
+ bool expandGetStackTopPseudo(MachineInstr &MI) const;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
index dc671aaa3f8d..8500f8ef1292 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.td
@@ -17,6 +17,94 @@
include "VEInstrFormats.td"
//===----------------------------------------------------------------------===//
+// Helper functions to retrieve target constants.
+//
+// VE instructions have a space to hold following immediates
+// $sy has 7 bits to represent simm7, uimm7, simm7fp, or uimm7fp.
+// $sz also has 7 bits to represent mimm or mimmfp.
+// $disp has 32 bits to represent simm32.
+//
+// The mimm is a special immediate value of sequential bit stream of 0 or 1.
+// `(m)0`: Represents 0 sequence then 1 sequence like 0b00...0011...11,
+// where `m` is equal to the number of leading zeros.
+// `(m)1`: Represents 1 sequence then 0 sequence like 0b11...1100...00,
+// where `m` is equal to the number of leading ones.
+// Each bit of mimm's 7 bits is used like below:
+// bit 6 : If `(m)0`, this bit is 1. Otherwise, this bit is 0.
+// bit 5-0: Represents the m (0-63).
+// Use `!add(m, 64)` to generates an immediate value in pattern matchings.
+//
+// The floating point immediate value is not something like compacted value.
+// It is simple integer representation, so it works rarely.
+// e.g. 0.0 (0x00000000) or -2.0 (0xC0000000=(2)1).
+//===----------------------------------------------------------------------===//
+
+def ULO7 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0x7f,
+ SDLoc(N), MVT::i32);
+}]>;
+def LO7 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(SignExtend32(N->getSExtValue(), 7),
+ SDLoc(N), MVT::i32);
+}]>;
+def MIMM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(convMImmVal(getImmVal(N)),
+ SDLoc(N), MVT::i32);
+}]>;
+def LO32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(Lo_32(N->getZExtValue()),
+ SDLoc(N), MVT::i32);
+}]>;
+def HI32 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return CurDAG->getTargetConstant(Hi_32(N->getZExtValue()),
+ SDLoc(N), MVT::i32);
+}]>;
+
+def LO7FP : SDNodeXForm<fpimm, [{
+ uint64_t Val = getFpImmVal(N);
+ return CurDAG->getTargetConstant(SignExtend32(Val, 7), SDLoc(N), MVT::i32);
+}]>;
+def MIMMFP : SDNodeXForm<fpimm, [{
+ return CurDAG->getTargetConstant(convMImmVal(getFpImmVal(N)),
+ SDLoc(N), MVT::i32);
+}]>;
+def LOFP32 : SDNodeXForm<fpimm, [{
+ return CurDAG->getTargetConstant(Lo_32(getFpImmVal(N) & 0xffffffff),
+ SDLoc(N), MVT::i32);
+}]>;
+def HIFP32 : SDNodeXForm<fpimm, [{
+ return CurDAG->getTargetConstant(Hi_32(getFpImmVal(N)), SDLoc(N), MVT::i32);
+}]>;
+
+def icond2cc : SDNodeXForm<cond, [{
+ VECC::CondCode VECC = intCondCode2Icc(N->get());
+ return CurDAG->getTargetConstant(VECC, SDLoc(N), MVT::i32);
+}]>;
+
+def icond2ccSwap : SDNodeXForm<cond, [{
+ ISD::CondCode CC = getSetCCSwappedOperands(N->get());
+ VECC::CondCode VECC = intCondCode2Icc(CC);
+ return CurDAG->getTargetConstant(VECC, SDLoc(N), MVT::i32);
+}]>;
+
+def fcond2cc : SDNodeXForm<cond, [{
+ VECC::CondCode VECC = fpCondCode2Fcc(N->get());
+ return CurDAG->getTargetConstant(VECC, SDLoc(N), MVT::i32);
+}]>;
+
+def fcond2ccSwap : SDNodeXForm<cond, [{
+ ISD::CondCode CC = getSetCCSwappedOperands(N->get());
+ VECC::CondCode VECC = fpCondCode2Fcc(CC);
+ return CurDAG->getTargetConstant(VECC, SDLoc(N), MVT::i32);
+}]>;
+
+def CCOP : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(),
+ SDLoc(N), MVT::i32);
+}]>;
+
+//===----------------------------------------------------------------------===//
// Feature predicates.
//===----------------------------------------------------------------------===//
@@ -24,42 +112,302 @@ include "VEInstrFormats.td"
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
-def simm7 : PatLeaf<(imm), [{ return isInt<7>(N->getSExtValue()); }]>;
+// zero
+def ZeroAsmOperand : AsmOperandClass {
+ let Name = "Zero";
+}
+def zero : Operand<i32>, PatLeaf<(imm), [{
+ return N->getSExtValue() == 0; }]> {
+ let ParserMatchClass = ZeroAsmOperand;
+}
+
+// uimm0to2 - Special immediate value represents 0, 1, and 2.
+def UImm0to2AsmOperand : AsmOperandClass {
+ let Name = "UImm0to2";
+}
+def uimm0to2 : Operand<i32>, PatLeaf<(imm), [{
+ return N->getZExtValue() < 3; }], ULO7> {
+ let ParserMatchClass = UImm0to2AsmOperand;
+}
+
+// uimm1 - Generic immediate value.
+def UImm1AsmOperand : AsmOperandClass {
+ let Name = "UImm1";
+}
+def uimm1 : Operand<i32>, PatLeaf<(imm), [{
+ return isUInt<1>(N->getZExtValue()); }], ULO7> {
+ let ParserMatchClass = UImm1AsmOperand;
+}
+
+// uimm2 - Generic immediate value.
+def UImm2AsmOperand : AsmOperandClass {
+ let Name = "UImm2";
+}
+def uimm2 : Operand<i32>, PatLeaf<(imm), [{
+ return isUInt<2>(N->getZExtValue()); }], ULO7> {
+ let ParserMatchClass = UImm2AsmOperand;
+}
+
+// uimm3 - Generic immediate value.
+def UImm3AsmOperand : AsmOperandClass {
+ let Name = "UImm3";
+}
+def uimm3 : Operand<i32>, PatLeaf<(imm), [{
+ return isUInt<3>(N->getZExtValue()); }], ULO7> {
+ let ParserMatchClass = UImm3AsmOperand;
+}
+
+// uimm6 - Generic immediate value.
+def UImm6AsmOperand : AsmOperandClass {
+ let Name = "UImm6";
+}
+def uimm6 : Operand<i32>, PatLeaf<(imm), [{
+ return isUInt<6>(N->getZExtValue()); }], ULO7> {
+ let ParserMatchClass = UImm6AsmOperand;
+}
+
+// uimm7 - Generic immediate value.
+def UImm7AsmOperand : AsmOperandClass {
+ let Name = "UImm7";
+}
+def uimm7 : Operand<i32>, PatLeaf<(imm), [{
+ return isUInt<7>(N->getZExtValue()); }], ULO7> {
+ let ParserMatchClass = UImm7AsmOperand;
+}
+
+// simm7 - Generic immediate value.
+def SImm7AsmOperand : AsmOperandClass {
+ let Name = "SImm7";
+}
+def simm7 : Operand<i32>, PatLeaf<(imm), [{
+ return isInt<7>(N->getSExtValue()); }], LO7> {
+ let ParserMatchClass = SImm7AsmOperand;
+ let DecoderMethod = "DecodeSIMM7";
+}
+
+// mimm - Special immediate value of sequential bit stream of 0 or 1.
+def MImmAsmOperand : AsmOperandClass {
+ let Name = "MImm";
+ let ParserMethod = "parseMImmOperand";
+}
+def mimm : Operand<i32>, PatLeaf<(imm), [{
+ return isMImmVal(getImmVal(N)); }], MIMM> {
+ let ParserMatchClass = MImmAsmOperand;
+ let PrintMethod = "printMImmOperand";
+}
+
+// simm7fp - Generic fp immediate value.
+def simm7fp : Operand<i32>, PatLeaf<(fpimm), [{
+ return isInt<7>(getFpImmVal(N));
+ }], LO7FP> {
+ let ParserMatchClass = SImm7AsmOperand;
+ let DecoderMethod = "DecodeSIMM7";
+}
+
+// mimmfp - Special fp immediate value of sequential bit stream of 0 or 1.
+def mimmfp : Operand<i32>, PatLeaf<(fpimm), [{
+ return isMImmVal(getFpImmVal(N)); }], MIMMFP> {
+ let ParserMatchClass = MImmAsmOperand;
+ let PrintMethod = "printMImmOperand";
+}
+
+// mimmfp32 - 32 bit width mimmfp
+// Float value places at higher bits, so ignore lower 32 bits.
+def mimmfp32 : Operand<i32>, PatLeaf<(fpimm), [{
+ return isMImm32Val(getFpImmVal(N) >> 32); }], MIMMFP> {
+ let ParserMatchClass = MImmAsmOperand;
+ let PrintMethod = "printMImmOperand";
+}
+
+// other generic patterns to use in pattern matchings
def simm32 : PatLeaf<(imm), [{ return isInt<32>(N->getSExtValue()); }]>;
-def uimm6 : PatLeaf<(imm), [{ return isUInt<6>(N->getZExtValue()); }]>;
+def uimm32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
+def lomsbzero : PatLeaf<(imm), [{ return (N->getZExtValue() & 0x80000000)
+ == 0; }]>;
+def lozero : PatLeaf<(imm), [{ return (N->getZExtValue() & 0xffffffff)
+ == 0; }]>;
+def fplomsbzero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0x80000000)
+ == 0; }]>;
+def fplozero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0xffffffff)
+ == 0; }]>;
+
+def CCSIOp : PatLeaf<(cond), [{
+ switch (N->get()) {
+ default: return true;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return false;
+ }
+}]>;
+
+def CCUIOp : PatLeaf<(cond), [{
+ switch (N->get()) {
+ default: return true;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return false;
+ }
+}]>;
-// ASX format of memory address
-def MEMri : Operand<iPTR> {
+//===----------------------------------------------------------------------===//
+// Addressing modes.
+// SX-Aurora has following fields.
+// sz: register or 0
+// sy: register or immediate (-64 to 63)
+// disp: immediate (-2147483648 to 2147483647)
+//
+// There are two kinds of instruction.
+// ASX format uses sz + sy + disp.
+// AS format uses sz + disp.
+//
+// Moreover, there are four kinds of assembly instruction format.
+// ASX format uses "disp", "disp(, sz)", "disp(sy)", "disp(sy, sz)",
+// "(, sz)", "(sy)", or "(sy, sz)".
+// AS format uses "disp", "disp(, sz)", or "(, sz)" in general.
+// AS format in RRM format uses "disp", "disp(sz)", or "(sz)".
+// AS format in RRM format for host memory access uses "sz", "(sz)",
+// or "disp(sz)".
+//
+// We defined them below.
+//
+// ASX format:
+// MEMrri, MEMrii, MEMzri, MEMzii
+// AS format:
+// MEMriASX, MEMziASX : simple AS format
+// MEMriRRM, MEMziRRM : AS format in RRM format
+// MEMriHM, MEMziHM : AS format in RRM format for host memory access
+//===----------------------------------------------------------------------===//
+
+// DAG selections for both ASX and AS formats.
+def ADDRrri : ComplexPattern<iPTR, 3, "selectADDRrri", [frameindex], []>;
+def ADDRrii : ComplexPattern<iPTR, 3, "selectADDRrii", [frameindex], []>;
+def ADDRzri : ComplexPattern<iPTR, 3, "selectADDRzri", [], []>;
+def ADDRzii : ComplexPattern<iPTR, 3, "selectADDRzii", [], []>;
+def ADDRri : ComplexPattern<iPTR, 2, "selectADDRri", [frameindex], []>;
+def ADDRzi : ComplexPattern<iPTR, 2, "selectADDRzi", [], []>;
+
+// ASX format.
+def VEMEMrriAsmOperand : AsmOperandClass {
+ let Name = "MEMrri";
+ let ParserMethod = "parseMEMOperand";
+}
+def VEMEMriiAsmOperand : AsmOperandClass {
+ let Name = "MEMrii";
+ let ParserMethod = "parseMEMOperand";
+}
+def VEMEMzriAsmOperand : AsmOperandClass {
+ let Name = "MEMzri";
+ let ParserMethod = "parseMEMOperand";
+}
+def VEMEMziiAsmOperand : AsmOperandClass {
+ let Name = "MEMzii";
+ let ParserMethod = "parseMEMOperand";
+}
+
+// ASX format uses single assembly instruction format.
+def MEMrri : Operand<iPTR> {
+ let PrintMethod = "printMemASXOperand";
+ let MIOperandInfo = (ops ptr_rc, ptr_rc, i32imm);
+ let ParserMatchClass = VEMEMrriAsmOperand;
+}
+def MEMrii : Operand<iPTR> {
let PrintMethod = "printMemASXOperand";
- let MIOperandInfo = (ops ptr_rc, i64imm);
+ let MIOperandInfo = (ops ptr_rc, i32imm, i32imm);
+ let ParserMatchClass = VEMEMriiAsmOperand;
+}
+def MEMzri : Operand<iPTR> {
+ let PrintMethod = "printMemASXOperand";
+ let MIOperandInfo = (ops i32imm /* = 0 */, ptr_rc, i32imm);
+ let ParserMatchClass = VEMEMzriAsmOperand;
+}
+def MEMzii : Operand<iPTR> {
+ let PrintMethod = "printMemASXOperand";
+ let MIOperandInfo = (ops i32imm /* = 0 */, i32imm, i32imm);
+ let ParserMatchClass = VEMEMziiAsmOperand;
}
-// AS format of memory address
-def MEMASri : Operand<iPTR> {
- let PrintMethod = "printMemASOperand";
- let MIOperandInfo = (ops ptr_rc, i64imm);
+// AS format.
+def VEMEMriAsmOperand : AsmOperandClass {
+ let Name = "MEMri";
+ let ParserMethod = "parseMEMAsOperand";
+}
+def VEMEMziAsmOperand : AsmOperandClass {
+ let Name = "MEMzi";
+ let ParserMethod = "parseMEMAsOperand";
}
-// Branch targets have OtherVT type.
-def brtarget32 : Operand<OtherVT> {
- let EncoderMethod = "getBranchTarget32OpValue";
+// AS format uses multiple assembly instruction formats
+// 1. AS generic assembly instruction format:
+def MEMriASX : Operand<iPTR> {
+ let PrintMethod = "printMemASOperandASX";
+ let MIOperandInfo = (ops ptr_rc, i32imm);
+ let ParserMatchClass = VEMEMriAsmOperand;
+}
+def MEMziASX : Operand<iPTR> {
+ let PrintMethod = "printMemASOperandASX";
+ let MIOperandInfo = (ops i32imm /* = 0 */, i32imm);
+ let ParserMatchClass = VEMEMziAsmOperand;
}
-def simm7Op64 : Operand<i64> {
- let DecoderMethod = "DecodeSIMM7";
+// 2. AS RRM style assembly instruction format:
+def MEMriRRM : Operand<iPTR> {
+ let PrintMethod = "printMemASOperandRRM";
+ let MIOperandInfo = (ops ptr_rc, i32imm);
+ let ParserMatchClass = VEMEMriAsmOperand;
+}
+def MEMziRRM : Operand<iPTR> {
+ let PrintMethod = "printMemASOperandRRM";
+ let MIOperandInfo = (ops i32imm /* = 0 */, i32imm);
+ let ParserMatchClass = VEMEMziAsmOperand;
}
-def simm32Op64 : Operand<i64> {
- let DecoderMethod = "DecodeSIMM32";
+// 3. AS HM style assembly instruction format:
+def MEMriHM : Operand<iPTR> {
+ let PrintMethod = "printMemASOperandHM";
+ let MIOperandInfo = (ops ptr_rc, i32imm);
+ let ParserMatchClass = VEMEMriAsmOperand;
+}
+def MEMziHM : Operand<iPTR> {
+ let PrintMethod = "printMemASOperandHM";
+ let MIOperandInfo = (ops i32imm /* = 0 */, i32imm);
+ let ParserMatchClass = VEMEMziAsmOperand;
}
-def uimm6Op64 : Operand<i64> {
- let DecoderMethod = "DecodeUIMM6";
+//===----------------------------------------------------------------------===//
+// Other operands.
+//===----------------------------------------------------------------------===//
+
+// Branch targets have OtherVT type.
+def brtarget32 : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValue";
+ let DecoderMethod = "DecodeSIMM32";
}
// Operand for printing out a condition code.
-let PrintMethod = "printCCOperand" in
- def CCOp : Operand<i32>;
+def CCOpAsmOperand : AsmOperandClass { let Name = "CCOp"; }
+def CCOp : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 22; }], CCOP> {
+ let PrintMethod = "printCCOperand";
+ let DecoderMethod = "DecodeCCOperand";
+ let EncoderMethod = "getCCOpValue";
+ let ParserMatchClass = CCOpAsmOperand;
+}
+
+// Operand for a rounding mode code.
+def RDOpAsmOperand : AsmOperandClass {
+ let Name = "RDOp";
+}
+def RDOp : Operand<i32> {
+ let PrintMethod = "printRDOperand";
+ let DecoderMethod = "DecodeRDOperand";
+ let EncoderMethod = "getRDOpValue";
+ let ParserMatchClass = RDOpAsmOperand;
+}
+
+def VEhi : SDNode<"VEISD::Hi", SDTIntUnaryOp>;
+def VElo : SDNode<"VEISD::Lo", SDTIntUnaryOp>;
// These are target-independent nodes, but have target-specific formats.
def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64>,
@@ -72,10 +420,29 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-// def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>;
+def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>;
+def call : SDNode<"VEISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def retflag : SDNode<"VEISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def getGOT : Operand<iPTR>;
+
+// GETFUNPLT for PIC
+def GetFunPLT : SDNode<"VEISD::GETFUNPLT", SDTIntUnaryOp>;
+
+// GETTLSADDR for TLS
+def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+// GETSTACKTOP
+def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+
//===----------------------------------------------------------------------===//
// VE Flag Conditions
//===----------------------------------------------------------------------===//
@@ -107,168 +474,1243 @@ def CC_LENAN : CC_VAL<20>; // Less or Equal or NaN
def CC_AT : CC_VAL<21>; // Always true
//===----------------------------------------------------------------------===//
+// VE Rounding Mode
+//===----------------------------------------------------------------------===//
+
+// Note that these values must be kept in sync with the VERD::RoundingMode enum
+// values.
+class RD_VAL<int N> : PatLeaf<(i32 N)>;
+def RD_NONE : RD_VAL< 0>; // According to PSW
+def RD_RZ : RD_VAL< 8>; // Round toward Zero
+def RD_RP : RD_VAL< 9>; // Round toward Plus infinity
+def RD_RM : RD_VAL<10>; // Round toward Minus infinity
+def RD_RN : RD_VAL<11>; // Round to Nearest (ties to Even)
+def RD_RA : RD_VAL<12>; // Round to Nearest (ties to Away)
+
+//===----------------------------------------------------------------------===//
// VE Multiclasses for common instruction formats
//===----------------------------------------------------------------------===//
-multiclass RMm<string opcStr, bits<8>opc,
- RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> {
- def rri : RM<
- opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, immOp2:$imm32),
- !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})")> {
- let cy = 1;
- let cz = 1;
- let hasSideEffects = 0;
+// Multiclass for generic RR type instructions
+let hasSideEffects = 0 in
+multiclass RRbm<string opcStr, bits<8>opc,
+ RegisterClass RCo, ValueType Tyo,
+ RegisterClass RCi, ValueType Tyi,
+ SDPatternOperator OpNode = null_frag,
+ Operand immOp = simm7, Operand mOp = mimm> {
+ def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz),
+ !strconcat(opcStr, " $sx, $sy, $sz"),
+ [(set Tyo:$sx, (OpNode Tyi:$sy, Tyi:$sz))]>;
+ // VE calculates (OpNode $sy, $sz), but llvm requires to have immediate
+ // in RHS, so we use following definition.
+ let cy = 0 in
+ def ri : RR<opc, (outs RCo:$sx), (ins RCi:$sz, immOp:$sy),
+ !strconcat(opcStr, " $sx, $sy, $sz"),
+ [(set Tyo:$sx, (OpNode Tyi:$sz, (Tyi immOp:$sy)))]>;
+ let cz = 0 in
+ def rm : RR<opc, (outs RCo:$sx), (ins RCi:$sy, mOp:$sz),
+ !strconcat(opcStr, " $sx, $sy, $sz"),
+ [(set Tyo:$sx, (OpNode Tyi:$sy, (Tyi mOp:$sz)))]>;
+ let cy = 0, cz = 0 in
+ def im : RR<opc, (outs RCo:$sx), (ins immOp:$sy, mOp:$sz),
+ !strconcat(opcStr, " $sx, $sy, $sz"),
+ [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]>;
+}
+
+// Multiclass for non-commutative RR type instructions
+let hasSideEffects = 0 in
+multiclass RRNCbm<string opcStr, bits<8>opc,
+ RegisterClass RCo, ValueType Tyo,
+ RegisterClass RCi, ValueType Tyi,
+ SDPatternOperator OpNode = null_frag,
+ Operand immOp = simm7, Operand mOp = mimm> {
+ def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz),
+ !strconcat(opcStr, " $sx, $sy, $sz"),
+ [(set Tyo:$sx, (OpNode Tyi:$sy, Tyi:$sz))]>;
+ let cy = 0 in
+ def ir : RR<opc, (outs RCo:$sx), (ins immOp:$sy, RCi:$sz),
+ !strconcat(opcStr, " $sx, $sy, $sz"),
+ [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), Tyi:$sz))]>;
+ let cz = 0 in
+ def rm : RR<opc, (outs RCo:$sx), (ins RCi:$sy, mOp:$sz),
+ !strconcat(opcStr, " $sx, $sy, $sz"),
+ [(set Tyo:$sx, (OpNode Tyi:$sy, (Tyi mOp:$sz)))]>;
+ let cy = 0, cz = 0 in
+ def im : RR<opc, (outs RCo:$sx), (ins immOp:$sy, mOp:$sz),
+ !strconcat(opcStr, " $sx, $sy, $sz"),
+ [(set Tyo:$sx, (OpNode (Tyi immOp:$sy), (Tyi mOp:$sz)))]>;
+}
+
+// Generic RR multiclass with 2 arguments.
+// e.g. ADDUL, ADDSWSX, ADDSWZX, and etc.
+multiclass RRm<string opcStr, bits<8>opc,
+ RegisterClass RC, ValueType Ty,
+ SDPatternOperator OpNode = null_frag,
+ Operand immOp = simm7, Operand mOp = mimm> :
+ RRbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp>;
+
+// Generic RR multiclass for non-commutative instructions with 2 arguments.
+// e.g. SUBUL, SUBUW, SUBSWSX, and etc.
+multiclass RRNCm<string opcStr, bits<8>opc,
+ RegisterClass RC, ValueType Ty,
+ SDPatternOperator OpNode = null_frag,
+ Operand immOp = simm7, Operand mOp = mimm> :
+ RRNCbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp>;
+
+// Generic RR multiclass for floating point instructions with 2 arguments.
+// e.g. FADDD, FADDS, FSUBD, and etc.
+multiclass RRFm<string opcStr, bits<8>opc,
+ RegisterClass RC, ValueType Ty,
+ SDPatternOperator OpNode = null_frag,
+ Operand immOp = simm7fp, Operand mOp = mimmfp> :
+ RRNCbm<opcStr, opc, RC, Ty, RC, Ty, OpNode, immOp, mOp>;
+
+// Generic RR multiclass for shift instructions with 2 arguments.
+// e.g. SLL, SRL, SLAWSX, and etc.
+let hasSideEffects = 0 in
+multiclass RRIm<string opcStr, bits<8>opc,
+ RegisterClass RC, ValueType Ty,
+ SDPatternOperator OpNode = null_frag> {
+ def rr : RR<opc, (outs RC:$sx), (ins RC:$sz, I32:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy"),
+ [(set Ty:$sx, (OpNode Ty:$sz, i32:$sy))]>;
+ let cz = 0 in
+ def mr : RR<opc, (outs RC:$sx), (ins mimm:$sz, I32:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy"),
+ [(set Ty:$sx, (OpNode (Ty mimm:$sz), i32:$sy))]>;
+ let cy = 0 in
+ def ri : RR<opc, (outs RC:$sx), (ins RC:$sz, uimm7:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy"),
+ [(set Ty:$sx, (OpNode Ty:$sz, (i32 uimm7:$sy)))]>;
+ let cy = 0, cz = 0 in
+ def mi : RR<opc, (outs RC:$sx), (ins mimm:$sz, uimm7:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy"),
+ [(set Ty:$sx, (OpNode (Ty mimm:$sz), (i32 uimm7:$sy)))]>;
+}
+
+// Special RR multiclass for 128 bits shift left instruction.
+// e.g. SLD
+let Constraints = "$hi = $sx", DisableEncoding = "$hi", hasSideEffects = 0 in
+multiclass RRILDm<string opcStr, bits<8>opc,
+ RegisterClass RC, ValueType Ty,
+ SDPatternOperator OpNode = null_frag> {
+ def rrr : RR<opc, (outs RC:$sx), (ins RC:$hi, RC:$sz, I32:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+ let cz = 0 in
+ def rmr : RR<opc, (outs RC:$sx), (ins RC:$hi, mimm:$sz, I32:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+ let cy = 0 in
+ def rri : RR<opc, (outs RC:$sx), (ins RC:$hi, RC:$sz, uimm7:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+ let cy = 0, cz = 0 in
+ def rmi : RR<opc, (outs RC:$sx), (ins RC:$hi, mimm:$sz, uimm7:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+}
+
+// Special RR multiclass for 128 bits shift right instruction.
+// e.g. SRD
+let Constraints = "$low = $sx", DisableEncoding = "$low", hasSideEffects = 0 in
+multiclass RRIRDm<string opcStr, bits<8>opc,
+ RegisterClass RC, ValueType Ty,
+ SDPatternOperator OpNode = null_frag> {
+ def rrr : RR<opc, (outs RC:$sx), (ins RC:$sz, RC:$low, I32:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+ let cz = 0 in
+ def mrr : RR<opc, (outs RC:$sx), (ins mimm:$sz, RC:$low, I32:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+ let cy = 0 in
+ def rri : RR<opc, (outs RC:$sx), (ins RC:$sz, RC:$low, uimm7:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+ let cy = 0, cz = 0 in
+ def mri : RR<opc, (outs RC:$sx), (ins mimm:$sz, RC:$low, uimm7:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+}
+
+// Generic RR multiclass with an argument.
+// e.g. LDZ, PCNT, and BRV
+let cy = 0, sy = 0, hasSideEffects = 0 in
+multiclass RRI1m<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
+ SDPatternOperator OpNode = null_frag> {
+ def r : RR<opc, (outs RC:$sx), (ins RC:$sz), !strconcat(opcStr, " $sx, $sz"),
+ [(set Ty:$sx, (OpNode Ty:$sz))]>;
+ let cz = 0 in
+ def m : RR<opc, (outs RC:$sx), (ins mimm:$sz),
+ !strconcat(opcStr, " $sx, $sz"),
+ [(set Ty:$sx, (OpNode (Ty mimm:$sz)))]>;
+}
+
+// Special RR multiclass for MRG instruction.
+// e.g. MRG
+let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0 in
+multiclass RRMRGm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty> {
+ def rr : RR<opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, RC:$sd),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cy = 0 in
+ def ir : RR<opc, (outs RC:$sx), (ins simm7:$sy, RC:$sz, RC:$sd),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cz = 0 in
+ def rm : RR<opc, (outs RC:$sx), (ins RC:$sy, mimm:$sz, RC:$sd),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cy = 0, cz = 0 in
+ def im : RR<opc, (outs RC:$sx), (ins simm7:$sy, mimm:$sz, RC:$sd),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+}
+
+// Special RR multiclass for BSWP instruction.
+// e.g. BSWP
+let hasSideEffects = 0 in
+multiclass RRSWPm<string opcStr, bits<8>opc,
+ RegisterClass RC, ValueType Ty,
+ SDPatternOperator OpNode = null_frag> {
+ let cy = 0 in
+ def ri : RR<opc, (outs RC:$sx), (ins RC:$sz, uimm1:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy"),
+ [(set Ty:$sx, (OpNode Ty:$sz, (i32 uimm1:$sy)))]>;
+ let cy = 0, cz = 0 in
+ def mi : RR<opc, (outs RC:$sx), (ins mimm:$sz, uimm1:$sy),
+ !strconcat(opcStr, " $sx, $sz, $sy"),
+ [(set Ty:$sx, (OpNode (Ty mimm:$sz), (i32 uimm1:$sy)))]>;
+}
+
+// Multiclass for CMOV instructions.
+// e.g. CMOVL, CMOVW, CMOVD, and etc.
+let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0,
+ cfw = ? in
+multiclass RRCMOVm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty> {
+ def rr : RR<opc, (outs I64:$sx), (ins CCOp:$cfw, RC:$sy, I64:$sz, I64:$sd),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+ let cy = 0 in
+ def ir : RR<opc, (outs I64:$sx),
+ (ins CCOp:$cfw, simm7:$sy, I64:$sz, I64:$sd),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+ let cz = 0 in
+ def rm : RR<opc, (outs I64:$sx),
+ (ins CCOp:$cfw, RC:$sy, mimm:$sz, I64:$sd),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+ let cy = 0, cz = 0 in
+ def im : RR<opc, (outs I64:$sx),
+ (ins CCOp:$cfw, simm7:$sy, mimm:$sz, I64:$sd),
+ !strconcat(opcStr, " $sx, $sz, $sy")>;
+}
+
+// Multiclass for floating point conversion instructions.
+// e.g. CVTWDSX, CVTWDZX, CVTWSSX, and etc.
+// sz{3-0} = rounding mode
+let cz = 0, hasSideEffects = 0 in
+multiclass CVTRDm<string opcStr, bits<8> opc, RegisterClass RCo, ValueType Tyo,
+ RegisterClass RCi, ValueType Tyi> {
+ def r : RR<opc, (outs RCo:$sx), (ins RDOp:$rd, RCi:$sy),
+ !strconcat(opcStr, "${rd} $sx, $sy")> {
+ bits<4> rd;
+ let sz{5-4} = 0;
+ let sz{3-0} = rd;
}
- def zzi : RM<
- opc, (outs RC:$sx), (ins immOp2:$imm32),
- !strconcat(opcStr, " $sx, $imm32")> {
- let cy = 0;
- let sy = 0;
- let cz = 0;
- let sz = 0;
- let hasSideEffects = 0;
+ let cy = 0 in
+ def i : RR<opc, (outs RCo:$sx), (ins RDOp:$rd, simm7:$sy),
+ !strconcat(opcStr, "${rd} $sx, $sy")> {
+ bits<4> rd;
+ let sz{5-4} = 0;
+ let sz{3-0} = rd;
}
}
-// Multiclass for RR type instructions
+// Multiclass for floating point conversion instructions.
+// e.g. CVTDW, CVTSW, CVTDL, and etc.
+let cz = 0, sz = 0, hasSideEffects = 0 in
+multiclass CVTm<string opcStr, bits<8> opc, RegisterClass RCo, ValueType Tyo,
+ RegisterClass RCi, ValueType Tyi,
+ SDPatternOperator OpNode = null_frag> {
+ def r : RR<opc, (outs RCo:$sx), (ins RCi:$sy),
+ !strconcat(opcStr, " $sx, $sy"),
+ [(set Tyo:$sx, (OpNode Tyi:$sy))]>;
+ let cy = 0 in
+ def i : RR<opc, (outs RCo:$sx), (ins simm7:$sy),
+ !strconcat(opcStr, " $sx, $sy")>;
+}
-multiclass RRmrr<string opcStr, bits<8>opc,
- RegisterClass RCo, ValueType Tyo,
- RegisterClass RCi, ValueType Tyi> {
- def rr : RR<opc, (outs RCo:$sx), (ins RCi:$sy, RCi:$sz),
- !strconcat(opcStr, " $sx, $sy, $sz")>
- { let cy = 1; let cz = 1; let hasSideEffects = 0; }
+// Multiclass for PFCH instructions.
+// e.g. PFCH
+let sx = 0, hasSideEffects = 0 in
+multiclass PFCHm<string opcStr, bits<8>opc> {
+ def rri : RM<opc, (outs), (ins MEMrri:$addr), !strconcat(opcStr, " $addr"),
+ [(prefetch ADDRrri:$addr, imm, imm, (i32 1))]>;
+ let cy = 0 in
+ def rii : RM<opc, (outs), (ins MEMrii:$addr), !strconcat(opcStr, " $addr"),
+ [(prefetch ADDRrii:$addr, imm, imm, (i32 1))]>;
+ let cz = 0 in
+ def zri : RM<opc, (outs), (ins MEMzri:$addr), !strconcat(opcStr, " $addr"),
+ [(prefetch ADDRzri:$addr, imm, imm, (i32 1))]>;
+ let cy = 0, cz = 0 in
+ def zii : RM<opc, (outs), (ins MEMzii:$addr), !strconcat(opcStr, " $addr"),
+ [(prefetch ADDRzii:$addr, imm, imm, (i32 1))]>;
}
-multiclass RRmri<string opcStr, bits<8>opc,
- RegisterClass RCo, ValueType Tyo,
- RegisterClass RCi, ValueType Tyi, Operand immOp> {
- // VE calculates (OpNode $sy, $sz), but llvm requires to have immediate
- // in RHS, so we use following definition.
- def ri : RR<opc, (outs RCo:$sx), (ins RCi:$sz, immOp:$sy),
- !strconcat(opcStr, " $sx, $sy, $sz")>
- { let cy = 0; let cz = 1; let hasSideEffects = 0; }
+// Multiclass for CAS instructions.
+// e.g. TS1AML, TS1AMW, TS2AM, and etc.
+let Constraints = "$dest = $sd", DisableEncoding = "$sd",
+ mayStore=1, mayLoad = 1, hasSideEffects = 0 in
+multiclass RRCAStgm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
+ Operand immOp, Operand MEM, Operand ADDR,
+ SDPatternOperator OpNode = null_frag> {
+ def r : RRM<opc, (outs RC:$dest), (ins MEM:$addr, RC:$sy, RC:$sd),
+ !strconcat(opcStr, " $dest, $addr, $sy"),
+ [(set Ty:$dest, (OpNode ADDR:$addr, Ty:$sy, Ty:$sd))]>;
+ let cy = 0 in
+ def i : RRM<opc, (outs RC:$dest), (ins MEM:$addr, immOp:$sy, RC:$sd),
+ !strconcat(opcStr, " $dest, $addr, $sy"),
+ [(set Ty:$dest, (OpNode ADDR:$addr, (Ty immOp:$sy), Ty:$sd))]>;
+}
+multiclass RRCASm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
+ Operand immOp, SDPatternOperator OpNode = null_frag> {
+ defm ri : RRCAStgm<opcStr, opc, RC, Ty, immOp, MEMriRRM, ADDRri, OpNode>;
+ let cz = 0 in
+ defm zi : RRCAStgm<opcStr, opc, RC, Ty, immOp, MEMziRRM, ADDRzi, OpNode>;
}
-multiclass RRmiz<string opcStr, bits<8>opc,
- RegisterClass RCo, ValueType Tyo,
- RegisterClass RCi, ValueType Tyi, Operand immOp> {
- def zi : RR<opc, (outs RCo:$sx), (ins immOp:$sy),
- !strconcat(opcStr, " $sx, $sy")>
- { let cy = 0; let cz = 0; let sz = 0; let hasSideEffects = 0; }
+// Multiclass for branch instructions
+// e.g. BCFL, BCFW, BCFD, and etc.
+let isBranch = 1, isTerminator = 1, isIndirectBranch = 1, hasSideEffects = 0 in
+multiclass BCbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond,
+ Operand ADDR> {
+ let bpf = 0 /* NONE */ in
+ def "" : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
+ !strconcat(opcStr, " ", cmpStr, "$addr")>;
+ let bpf = 2 /* NOT TaKEN */ in
+ def _nt : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
+ !strconcat(opcStr, ".nt ", cmpStr, "$addr")>;
+ let bpf = 3 /* TaKEN */ in
+ def _t : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
+ !strconcat(opcStr, ".t ", cmpStr, "$addr")>;
+}
+multiclass BCtgm<string opcStr, string cmpStr, bits<8> opc, dag cond> {
+ defm ri : BCbpfm<opcStr, cmpStr, opc, cond, MEMriASX>;
+ let cz = 0 in defm zi : BCbpfm<opcStr, cmpStr, opc, cond, MEMziASX>;
+}
+multiclass BCm<string opcStr, string opcStrAt, string opcStrAf, bits<8> opc,
+ RegisterClass RC, Operand immOp> {
+ let DecoderMethod = "DecodeBranchCondition" in
+ defm r : BCtgm<opcStr, "$comp, ", opc, (ins CCOp:$cond, RC:$comp)>;
+ let DecoderMethod = "DecodeBranchCondition", cy = 0 in
+ defm i : BCtgm<opcStr, "$comp, ", opc, (ins CCOp:$cond, immOp:$comp)>;
+ let DecoderMethod = "DecodeBranchConditionAlways", cy = 0, sy = 0,
+ cf = 15 /* AT */, isBarrier = 1 in
+ defm a : BCtgm<opcStrAt, "", opc, (ins)>;
+ let DecoderMethod = "DecodeBranchConditionAlways", cy = 0, sy = 0,
+ cf = 0 /* AF */ in
+ defm na : BCtgm<opcStrAf, "", opc, (ins)>;
}
-multiclass RRNDmrm<string opcStr, bits<8>opc,
- RegisterClass RCo, ValueType Tyo,
- RegisterClass RCi, ValueType Tyi, Operand immOp2> {
- def rm0 : RR<opc, (outs RCo:$sx), (ins RCi:$sy, immOp2:$sz),
- !strconcat(opcStr, " $sx, $sy, (${sz})0")> {
- let cy = 1;
- let cz = 0;
- let sz{6} = 1;
- // (guess) tblgen conservatively assumes hasSideEffects when
- // it fails to infer from a pattern.
- let hasSideEffects = 0;
- }
+// Multiclass for relative branch instructions
+// e.g. BRCFL, BRCFW, BRCFD, and etc.
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0 in
+multiclass BCRbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond> {
+ let bpf = 0 /* NONE */ in
+ def "" : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
+ !strconcat(opcStr, " ", cmpStr, "$imm32")>;
+ let bpf = 2 /* NOT TaKEN */ in
+ def _nt : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
+ !strconcat(opcStr, ".nt ", cmpStr, "$imm32")>;
+ let bpf = 3 /* TaKEN */ in
+ def _t : CF<opc, (outs), !con(cond, (ins brtarget32:$imm32)),
+ !strconcat(opcStr, ".t ", cmpStr, "$imm32")>;
+}
+multiclass BCRm<string opcStr, string opcStrAt, string opcStrAf, bits<8> opc,
+ RegisterClass RC, Operand immOp> {
+ defm rr : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy, RC:$sz)>;
+ let cy = 0 in
+ defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy, RC:$sz)>;
+ let cy = 0, sy = 0, cz = 0, sz = 0, cf = 15 /* AT */, isBarrier = 1 in
+ defm a : BCRbpfm<opcStrAt, "", opc, (ins)>;
+ let cy = 0, sy = 0, cz = 0, sz = 0, cf = 0 /* AF */ in
+ defm na : BCRbpfm<opcStrAf, "", opc, (ins)>;
}
-// Used by add, mul, div, and similar commutative instructions
-// The order of operands are "$sx, $sy, $sz"
+// Multiclass for communication register instructions.
+// e.g. LCR
+let hasSideEffects = 1 in
+multiclass LOADCRm<string opcStr, bits<8>opc, RegisterClass RC> {
+ def rr : RR<opc, (outs RC:$sx), (ins RC:$sz, RC:$sy),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cy = 0 in def ri : RR<opc, (outs RC:$sx), (ins RC:$sz, simm7:$sy),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cz = 0 in def zr : RR<opc, (outs RC:$sx), (ins zero:$sz, RC:$sy),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cy = 0, cz = 0 in
+ def zi : RR<opc, (outs RC:$sx), (ins zero:$sz, simm7:$sy),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+}
-multiclass RRm<string opcStr, bits<8>opc,
- RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> :
- RRmrr<opcStr, opc, RC, Ty, RC, Ty>,
- RRmri<opcStr, opc, RC, Ty, RC, Ty, immOp>,
- RRmiz<opcStr, opc, RC, Ty, RC, Ty, immOp>,
- RRNDmrm<opcStr, opc, RC, Ty, RC, Ty, immOp2>;
-
-// Branch multiclass
-let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in
-multiclass BCRm<string opcStr, string opcStrAt, bits<8> opc,
- RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> {
- def rr : CF<
- opc, (outs),
- (ins CCOp:$cf, RC:$sy, RC:$sz, brtarget32:$imm32),
- !strconcat(opcStr, " $sy, $sz, $imm32")> {
- let cy = 1;
- let cz = 1;
- let hasSideEffects = 0;
- }
+// Multiclass for communication register instructions.
+// e.g. SCR
+let hasSideEffects = 1 in
+multiclass STORECRm<string opcStr, bits<8>opc, RegisterClass RC> {
+ def rr : RR<opc, (outs), (ins RC:$sz, RC:$sy, RC:$sx),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cy = 0 in def ri : RR<opc, (outs), (ins RC:$sz, simm7:$sy, RC:$sx),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cz = 0 in def zr : RR<opc, (outs), (ins zero:$sz, RC:$sy, RC:$sx),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cy = 0, cz = 0 in
+ def zi : RR<opc, (outs), (ins zero:$sz, simm7:$sy, RC:$sx),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
}
+// Multiclass for communication register instructions.
+// e.g. FIDCR
+let cz = 0, hasSideEffects = 1 in
+multiclass FIDCRm<string opcStr, bits<8>opc, RegisterClass RC> {
+ def ri : RR<opc, (outs RC:$sx), (ins RC:$sy, uimm3:$sz),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cy = 0 in def ii : RR<opc, (outs RC:$sx), (ins simm7:$sy, uimm3:$sz),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+}
+
+// Multiclass for LHM instruction.
+let mayLoad = 1, hasSideEffects = 0 in
+multiclass LHMm<string opcStr, bits<8> opc, RegisterClass RC> {
+ def ri : RRMHM<opc, (outs RC:$dest), (ins MEMriHM:$addr),
+ !strconcat(opcStr, " $dest, $addr")>;
+ let cz = 0 in
+ def zi : RRMHM<opc, (outs RC:$dest), (ins MEMziHM:$addr),
+ !strconcat(opcStr, " $dest, $addr")>;
+}
+
+// Multiclass for SHM instruction.
+let mayStore = 1, hasSideEffects = 0 in
+multiclass SHMm<string opcStr, bits<8> opc, RegisterClass RC> {
+ def ri : RRMHM<opc, (outs), (ins MEMriHM:$addr, RC:$sx),
+ !strconcat(opcStr, " $sx, $addr")>;
+ let cz = 0 in
+ def zi : RRMHM<opc, (outs), (ins MEMziHM:$addr, RC:$sx),
+ !strconcat(opcStr, " $sx, $addr")>;
+}
//===----------------------------------------------------------------------===//
// Instructions
+//
+// Define all scalar instructions defined in SX-Aurora TSUBASA Architecture
+// Guide here. As those mnemonics, we use mnemonics defined in Vector Engine
+// Assembly Language Reference Manual.
//===----------------------------------------------------------------------===//
-// LEA and LEASL instruction (load 32 bit imm to low or high part)
-let cx = 0 in
-defm LEA : RMm<"lea", 0x06, I64, i64, simm7Op64, simm32Op64>;
+//-----------------------------------------------------------------------------
+// Section 8.2 - Load/Store instructions
+//-----------------------------------------------------------------------------
+
+// Multiclass for generic RM instructions
+multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC> {
+ def rri : RM<opc, (outs RC:$dest), (ins MEMrri:$addr),
+ !strconcat(opcStr, " $dest, $addr"), []>;
+ let cy = 0 in
+ def rii : RM<opc, (outs RC:$dest), (ins MEMrii:$addr),
+ !strconcat(opcStr, " $dest, $addr"), []>;
+ let cz = 0 in
+ def zri : RM<opc, (outs RC:$dest), (ins MEMzri:$addr),
+ !strconcat(opcStr, " $dest, $addr"), []>;
+ let cy = 0, cz = 0 in
+ def zii : RM<opc, (outs RC:$dest), (ins MEMzii:$addr),
+ !strconcat(opcStr, " $dest, $addr"), []>;
+}
+
+// Section 8.2.1 - LEA
+let cx = 0, DecoderMethod = "DecodeLoadI64" in
+defm LEA : RMm<"lea", 0x06, I64>;
+let cx = 1, DecoderMethod = "DecodeLoadI64" in
+defm LEASL : RMm<"lea.sl", 0x06, I64>;
+let cx = 0, DecoderMethod = "DecodeLoadI32", isCodeGenOnly = 1 in
+defm LEA32 : RMm<"lea", 0x06, I32>;
+
+def : Pat<(iPTR ADDRrri:$addr), (LEArri MEMrri:$addr)>;
+def : Pat<(iPTR ADDRrii:$addr), (LEArii MEMrii:$addr)>;
+def : Pat<(add I64:$base, simm32:$disp), (LEArii $base, 0, (LO32 $disp))>;
+def : Pat<(add I64:$base, lozero:$disp), (LEASLrii $base, 0, (HI32 $disp))>;
+def : Pat<(add I32:$base, simm32:$disp),
+ (LEA32rii (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $base, sub_i32), 0,
+ (LO32 $disp))>;
+
+def lea_add : PatFrags<(ops node:$base, node:$idx, node:$disp),
+ [(add (add node:$base, node:$idx), node:$disp),
+ (add (add node:$base, node:$disp), node:$idx)]>;
+def : Pat<(lea_add I64:$base, simm7:$idx, simm32:$disp),
+ (LEArii $base, (LO7 $idx), (LO32 $disp))>;
+def : Pat<(lea_add I64:$base, I64:$idx, simm32:$disp),
+ (LEArri $base, $idx, (LO32 $disp))>;
+def : Pat<(lea_add I64:$base, simm7:$idx, lozero:$disp),
+ (LEASLrii $base, (LO7 $idx), (HI32 $disp))>;
+def : Pat<(lea_add I64:$base, I64:$idx, lozero:$disp),
+ (LEASLrri $base, $idx, (HI32 $disp))>;
+
+// Multiclass for load instructions.
+let mayLoad = 1, hasSideEffects = 0 in
+multiclass LOADm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty,
+ SDPatternOperator OpNode = null_frag> {
+ def rri : RM<opc, (outs RC:$dest), (ins MEMrri:$addr),
+ !strconcat(opcStr, " $dest, $addr"),
+ [(set Ty:$dest, (OpNode ADDRrri:$addr))]>;
+ let cy = 0 in
+ def rii : RM<opc, (outs RC:$dest), (ins MEMrii:$addr),
+ !strconcat(opcStr, " $dest, $addr"),
+ [(set Ty:$dest, (OpNode ADDRrii:$addr))]>;
+ let cz = 0 in
+ def zri : RM<opc, (outs RC:$dest), (ins MEMzri:$addr),
+ !strconcat(opcStr, " $dest, $addr"),
+ [(set Ty:$dest, (OpNode ADDRzri:$addr))]>;
+ let cy = 0, cz = 0 in
+ def zii : RM<opc, (outs RC:$dest), (ins MEMzii:$addr),
+ !strconcat(opcStr, " $dest, $addr"),
+ [(set Ty:$dest, (OpNode ADDRzii:$addr))]>;
+}
+
+// Section 8.2.2 - LDS
+let DecoderMethod = "DecodeLoadI64" in
+defm LD : LOADm<"ld", 0x01, I64, i64, load>;
+def : Pat<(f64 (load ADDRrri:$addr)), (LDrri MEMrri:$addr)>;
+def : Pat<(f64 (load ADDRrii:$addr)), (LDrii MEMrii:$addr)>;
+def : Pat<(f64 (load ADDRzri:$addr)), (LDzri MEMzri:$addr)>;
+def : Pat<(f64 (load ADDRzii:$addr)), (LDzii MEMzii:$addr)>;
+
+// Section 8.2.3 - LDU
+let DecoderMethod = "DecodeLoadF32" in
+defm LDU : LOADm<"ldu", 0x02, F32, f32, load>;
+
+// Section 8.2.4 - LDL
+let DecoderMethod = "DecodeLoadI32" in
+defm LDLSX : LOADm<"ldl.sx", 0x03, I32, i32, load>;
+let cx = 1, DecoderMethod = "DecodeLoadI32" in
+defm LDLZX : LOADm<"ldl.zx", 0x03, I32, i32, load>;
+
+// Section 8.2.5 - LD2B
+let DecoderMethod = "DecodeLoadI32" in
+defm LD2BSX : LOADm<"ld2b.sx", 0x04, I32, i32, sextloadi16>;
+let cx = 1, DecoderMethod = "DecodeLoadI32" in
+defm LD2BZX : LOADm<"ld2b.zx", 0x04, I32, i32, zextloadi16>;
+
+// Section 8.2.6 - LD1B
+let DecoderMethod = "DecodeLoadI32" in
+defm LD1BSX : LOADm<"ld1b.sx", 0x05, I32, i32, sextloadi8>;
+let cx = 1, DecoderMethod = "DecodeLoadI32" in
+defm LD1BZX : LOADm<"ld1b.zx", 0x05, I32, i32, zextloadi8>;
+
+// Multiclass for store instructions.
+let mayStore = 1 in
+multiclass STOREm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty,
+ SDPatternOperator OpNode = null_frag> {
+ def rri : RM<opc, (outs), (ins MEMrri:$addr, RC:$sx),
+ !strconcat(opcStr, " $sx, $addr"),
+ [(OpNode Ty:$sx, ADDRrri:$addr)]>;
+ let cy = 0 in
+ def rii : RM<opc, (outs), (ins MEMrii:$addr, RC:$sx),
+ !strconcat(opcStr, " $sx, $addr"),
+ [(OpNode Ty:$sx, ADDRrii:$addr)]>;
+ let cz = 0 in
+ def zri : RM<opc, (outs), (ins MEMzri:$addr, RC:$sx),
+ !strconcat(opcStr, " $sx, $addr"),
+ [(OpNode Ty:$sx, ADDRzri:$addr)]>;
+ let cy = 0, cz = 0 in
+ def zii : RM<opc, (outs), (ins MEMzii:$addr, RC:$sx),
+ !strconcat(opcStr, " $sx, $addr"),
+ [(OpNode Ty:$sx, ADDRzii:$addr)]>;
+}
+
+// Section 8.2.7 - STS
+let DecoderMethod = "DecodeStoreI64" in
+defm ST : STOREm<"st", 0x11, I64, i64, store>;
+def : Pat<(store f64:$src, ADDRrri:$addr), (STrri MEMrri:$addr, $src)>;
+def : Pat<(store f64:$src, ADDRrii:$addr), (STrii MEMrii:$addr, $src)>;
+def : Pat<(store f64:$src, ADDRzri:$addr), (STzri MEMzri:$addr, $src)>;
+def : Pat<(store f64:$src, ADDRzii:$addr), (STzii MEMzii:$addr, $src)>;
+
+// Section 8.2.8 - STU
+let DecoderMethod = "DecodeStoreF32" in
+defm STU : STOREm<"stu", 0x12, F32, f32, store>;
+
+// Section 8.2.9 - STL
+let DecoderMethod = "DecodeStoreI32" in
+defm STL : STOREm<"stl", 0x13, I32, i32, store>;
+
+// Section 8.2.10 - ST2B
+let DecoderMethod = "DecodeStoreI32" in
+defm ST2B : STOREm<"st2b", 0x14, I32, i32, truncstorei16>;
+
+// Section 8.2.11 - ST1B
+let DecoderMethod = "DecodeStoreI32" in
+defm ST1B : STOREm<"st1b", 0x15, I32, i32, truncstorei8>;
+
+// Section 8.2.12 - DLDS
+let DecoderMethod = "DecodeLoadI64" in
+defm DLD : LOADm<"dld", 0x09, I64, i64, load>;
+
+// Section 8.2.13 - DLDU
+let DecoderMethod = "DecodeLoadF32" in
+defm DLDU : LOADm<"dldu", 0x0a, F32, f32, load>;
+
+// Section 8.2.14 - DLDL
+let DecoderMethod = "DecodeLoadI32" in
+defm DLDLSX : LOADm<"dldl.sx", 0x0b, I32, i32, load>;
+let cx = 1, DecoderMethod = "DecodeLoadI32" in
+defm DLDLZX : LOADm<"dldl.zx", 0x0b, I32, i32, load>;
+
+// Section 8.2.15 - PFCH
+let DecoderMethod = "DecodeASX" in
+defm PFCH : PFCHm<"pfch", 0x0c>;
+
+// Section 8.2.16 - TS1AM (Test and Set 1 AM)
+let DecoderMethod = "DecodeTS1AMI64" in
+defm TS1AML : RRCASm<"ts1am.l", 0x42, I64, i64, uimm7>;
+let DecoderMethod = "DecodeTS1AMI32", cx = 1 in
+defm TS1AMW : RRCASm<"ts1am.w", 0x42, I32, i32, uimm7>;
+
+// Section 8.2.17 - TS2AM (Test and Set 2 AM)
+let DecoderMethod = "DecodeTS1AMI64" in
+defm TS2AM : RRCASm<"ts2am", 0x43, I64, i64, uimm7>;
+
+// Section 8.2.18 - TS3AM (Test and Set 3 AM)
+let DecoderMethod = "DecodeTS1AMI64" in
+defm TS3AM : RRCASm<"ts3am", 0x52, I64, i64, uimm1>;
+
+// Section 8.2.19 - ATMAM (Atomic AM)
+let DecoderMethod = "DecodeTS1AMI64" in
+defm ATMAM : RRCASm<"atmam", 0x53, I64, i64, uimm0to2>;
+
+// Section 8.2.20 - CAS (Compare and Swap)
+let DecoderMethod = "DecodeCASI64" in
+defm CASL : RRCASm<"cas.l", 0x62, I64, i64, simm7>;
+let DecoderMethod = "DecodeCASI32", cx = 1 in
+defm CASW : RRCASm<"cas.w", 0x62, I32, i32, simm7>;
+
+//-----------------------------------------------------------------------------
+// Section 8.3 - Transfer Control Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.3.1 - FENCE (Fence)
+let hasSideEffects = 1 in {
+ let avo = 1 in def FENCEI : RRFENCE<0x20, (outs), (ins), "fencei">;
+ def FENCEM : RRFENCE<0x20, (outs), (ins uimm2:$kind), "fencem $kind"> {
+ bits<2> kind;
+ let lf = kind{1};
+ let sf = kind{0};
+ }
+ def FENCEC : RRFENCE<0x20, (outs), (ins uimm3:$kind), "fencec $kind"> {
+ bits<3> kind;
+ let c2 = kind{2};
+ let c1 = kind{1};
+ let c0 = kind{0};
+ }
+}
+
+// Section 8.3.2 - SVOB (Set Vector Out-of-order memory access Boundary)
+let sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 1 in
+def SVOB : RR<0x30, (outs), (ins), "svob">;
+
+//-----------------------------------------------------------------------------
+// Section 8.4 - Fixed-point Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.4.1 - ADD (Add)
+defm ADDUL : RRm<"addu.l", 0x48, I64, i64>;
+let cx = 1 in defm ADDUW : RRm<"addu.w", 0x48, I32, i32>;
+
+// Section 8.4.2 - ADS (Add Single)
+defm ADDSWSX : RRm<"adds.w.sx", 0x4A, I32, i32, add>;
+let cx = 1 in defm ADDSWZX : RRm<"adds.w.zx", 0x4A, I32, i32>;
+
+// Section 8.4.3 - ADX (Add)
+defm ADDSL : RRm<"adds.l", 0x59, I64, i64, add>;
+
+// Section 8.4.4 - SUB (Subtract)
+defm SUBUL : RRNCm<"subu.l", 0x58, I64, i64>;
+let cx = 1 in defm SUBUW : RRNCm<"subu.w", 0x58, I32, i32>;
+
+// Section 8.4.5 - SBS (Subtract Single)
+defm SUBSWSX : RRNCm<"subs.w.sx", 0x5A, I32, i32, sub>;
+let cx = 1 in defm SUBSWZX : RRNCm<"subs.w.zx", 0x5A, I32, i32>;
+
+// Section 8.4.6 - SBX (Subtract)
+defm SUBSL : RRNCm<"subs.l", 0x5B, I64, i64, sub>;
+
+// Section 8.4.7 - MPY (Multiply)
+defm MULUL : RRm<"mulu.l", 0x49, I64, i64>;
+let cx = 1 in defm MULUW : RRm<"mulu.w", 0x49, I32, i32>;
+
+// Section 8.4.8 - MPS (Multiply Single)
+defm MULSWSX : RRm<"muls.w.sx", 0x4B, I32, i32, mul>;
+let cx = 1 in defm MULSWZX : RRm<"muls.w.zx", 0x4B, I32, i32>;
+
+// Section 8.4.9 - MPX (Multiply)
+defm MULSL : RRm<"muls.l", 0x6E, I64, i64, mul>;
+
+// Section 8.4.10 - MPD (Multiply)
+defm MULSLW : RRbm<"muls.l.w", 0x6B, I64, i64, I32, i32>;
+
+// Section 8.4.11 - DIV (Divide)
+defm DIVUL : RRNCm<"divu.l", 0x6F, I64, i64, udiv>;
+let cx = 1 in defm DIVUW : RRNCm<"divu.w", 0x6F, I32, i32, udiv>;
+
+// Section 8.4.12 - DVS (Divide Single)
+defm DIVSWSX : RRNCm<"divs.w.sx", 0x7B, I32, i32, sdiv>;
+let cx = 1 in defm DIVSWZX : RRNCm<"divs.w.zx", 0x7B, I32, i32>;
+
+// Section 8.4.13 - DVX (Divide)
+defm DIVSL : RRNCm<"divs.l", 0x7F, I64, i64, sdiv>;
+
+// Section 8.4.14 - CMP (Compare)
+defm CMPUL : RRNCm<"cmpu.l", 0x55, I64, i64>;
+let cx = 1 in defm CMPUW : RRNCm<"cmpu.w", 0x55, I32, i32>;
+
+// Section 8.4.15 - CPS (Compare Single)
+defm CMPSWSX : RRNCm<"cmps.w.sx", 0x7A, I32, i32>;
+let cx = 1 in defm CMPSWZX : RRNCm<"cmps.w.zx", 0x7A, I32, i32>;
+
+// Section 8.4.16 - CPX (Compare)
+defm CMPSL : RRNCm<"cmps.l", 0x6A, I64, i64>;
+
+// Section 8.4.17 - CMS (Compare and Select Maximum/Minimum Single)
+// cx: sx/zx, cw: max/min
+defm MAXSWSX : RRm<"maxs.w.sx", 0x78, I32, i32>;
+let cx = 1 in defm MAXSWZX : RRm<"maxs.w.zx", 0x78, I32, i32>;
+let cw = 1 in defm MINSWSX : RRm<"mins.w.sx", 0x78, I32, i32>;
+let cx = 1, cw = 1 in defm MINSWZX : RRm<"mins.w.zx", 0x78, I32, i32>;
+
+// Section 8.4.18 - CMX (Compare and Select Maximum/Minimum)
+defm MAXSL : RRm<"maxs.l", 0x68, I64, i64>;
+let cw = 1 in defm MINSL : RRm<"mins.l", 0x68, I64, i64>;
+
+//-----------------------------------------------------------------------------
+// Section 8.5 - Logical Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.5.1 - AND (AND)
+defm AND : RRm<"and", 0x44, I64, i64, and>;
+let isCodeGenOnly = 1 in defm AND32 : RRm<"and", 0x44, I32, i32, and>;
+
+// Section 8.5.2 - OR (OR)
+defm OR : RRm<"or", 0x45, I64, i64, or>;
+let isCodeGenOnly = 1 in defm OR32 : RRm<"or", 0x45, I32, i32, or>;
+
+// Section 8.5.3 - XOR (Exclusive OR)
+defm XOR : RRm<"xor", 0x46, I64, i64, xor>;
+let isCodeGenOnly = 1 in defm XOR32 : RRm<"xor", 0x46, I32, i32, xor>;
+
+// Section 8.5.4 - EQV (Equivalence)
+defm EQV : RRm<"eqv", 0x47, I64, i64>;
+
+// Section 8.5.5 - NND (Negate AND)
+def and_not : PatFrags<(ops node:$x, node:$y),
+ [(and (not node:$x), node:$y)]>;
+defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>;
+
+// Section 8.5.6 - MRG (Merge)
+defm MRG : RRMRGm<"mrg", 0x56, I64, i64>;
+
+// Section 8.5.7 - LDZ (Leading Zero Count)
+defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz>;
+
+// Section 8.5.8 - PCNT (Population Count)
+defm PCNT : RRI1m<"pcnt", 0x38, I64, i64, ctpop>;
+
+// Section 8.5.9 - BRV (Bit Reverse)
+defm BRV : RRI1m<"brv", 0x39, I64, i64, bitreverse>;
+
+// Section 8.5.10 - BSWP (Byte Swap)
+defm BSWP : RRSWPm<"bswp", 0x2B, I64, i64>;
+
+// Section 8.5.11 - CMOV (Conditional Move)
+let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64>;
+let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32, i32>;
+let cw = 0, cw2 = 1 in defm CMOVD : RRCMOVm<"cmov.d.${cfw}", 0x3B, I64, f64>;
+let cw = 1, cw2 = 1 in defm CMOVS : RRCMOVm<"cmov.s.${cfw}", 0x3B, F32, f32>;
+def : MnemonicAlias<"cmov.l", "cmov.l.at">;
+def : MnemonicAlias<"cmov.w", "cmov.w.at">;
+def : MnemonicAlias<"cmov.d", "cmov.d.at">;
+def : MnemonicAlias<"cmov.s", "cmov.s.at">;
+
+//-----------------------------------------------------------------------------
+// Section 8.6 - Shift Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.6.1 - SLL (Shift Left Logical)
+defm SLL : RRIm<"sll", 0x65, I64, i64, shl>;
+
+// Section 8.6.2 - SLD (Shift Left Double)
+defm SLD : RRILDm<"sld", 0x64, I64, i64>;
+
+// Section 8.6.3 - SRL (Shift Right Logical)
+defm SRL : RRIm<"srl", 0x75, I64, i64, srl>;
+
+// Section 8.6.4 - SRD (Shift Right Double)
+defm SRD : RRIRDm<"srd", 0x74, I64, i64>;
+
+// Section 8.6.5 - SLA (Shift Left Arithmetic)
+defm SLAWSX : RRIm<"sla.w.sx", 0x66, I32, i32, shl>;
+let cx = 1 in defm SLAWZX : RRIm<"sla.w.zx", 0x66, I32, i32>;
+
+// Section 8.6.6 - SLAX (Shift Left Arithmetic)
+defm SLAL : RRIm<"sla.l", 0x57, I64, i64>;
+
+// Section 8.6.7 - SRA (Shift Right Arithmetic)
+defm SRAWSX : RRIm<"sra.w.sx", 0x76, I32, i32, sra>;
+let cx = 1 in defm SRAWZX : RRIm<"sra.w.zx", 0x76, I32, i32>;
+
+// Section 8.6.8 - SRAX (Shift Right Arithmetic)
+defm SRAL : RRIm<"sra.l", 0x77, I64, i64, sra>;
+
+def : Pat<(i32 (srl i32:$src, (i32 simm7:$val))),
+ (EXTRACT_SUBREG (SRLri (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ $src, sub_i32), !add(32, 64)), imm:$val), sub_i32)>;
+def : Pat<(i32 (srl i32:$src, i32:$val)),
+ (EXTRACT_SUBREG (SRLrr (ANDrm (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ $src, sub_i32), !add(32, 64)), $val), sub_i32)>;
+
+//-----------------------------------------------------------------------------
+// Section 8.7 - Floating-point Arithmetic Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.7.1 - FAD (Floating Add)
+defm FADDD : RRFm<"fadd.d", 0x4C, I64, f64, fadd>;
+let cx = 1 in
+defm FADDS : RRFm<"fadd.s", 0x4C, F32, f32, fadd, simm7fp, mimmfp32>;
+
+// Section 8.7.2 - FSB (Floating Subtract)
+defm FSUBD : RRFm<"fsub.d", 0x5C, I64, f64, fsub>;
+let cx = 1 in
+defm FSUBS : RRFm<"fsub.s", 0x5C, F32, f32, fsub, simm7fp, mimmfp32>;
+
+// Section 8.7.3 - FMP (Floating Multiply)
+defm FMULD : RRFm<"fmul.d", 0x4D, I64, f64, fmul>;
+let cx = 1 in
+defm FMULS : RRFm<"fmul.s", 0x4D, F32, f32, fmul, simm7fp, mimmfp32>;
+
+// Section 8.7.4 - FDV (Floating Divide)
+defm FDIVD : RRFm<"fdiv.d", 0x5D, I64, f64, fdiv>;
+let cx = 1 in
+defm FDIVS : RRFm<"fdiv.s", 0x5D, F32, f32, fdiv, simm7fp, mimmfp32>;
+
+// Section 8.7.5 - FCP (Floating Compare)
+defm FCMPD : RRFm<"fcmp.d", 0x7E, I64, f64>;
+let cx = 1 in
+defm FCMPS : RRFm<"fcmp.s", 0x7E, F32, f32, null_frag, simm7fp, mimmfp32>;
+
+// Section 8.7.6 - CMS (Compare and Select Maximum/Minimum Single)
+// cx: double/float, cw: max/min
+let cw = 0, cx = 0 in
+defm FMAXD : RRFm<"fmax.d", 0x3E, I64, f64, fmaxnum>;
+let cw = 0, cx = 1 in
+defm FMAXS : RRFm<"fmax.s", 0x3E, F32, f32, fmaxnum, simm7fp, mimmfp32>;
+let cw = 1, cx = 0 in
+defm FMIND : RRFm<"fmin.d", 0x3E, I64, f64, fminnum>;
+let cw = 1, cx = 1 in
+defm FMINS : RRFm<"fmin.s", 0x3E, F32, f32, fminnum, simm7fp, mimmfp32>;
+
+// Section 8.7.7 - FAQ (Floating Add Quadruple)
+defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128>;
+
+// Section 8.7.8 - FSQ (Floating Subtract Quadruple)
+defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128>;
+
+// Section 8.7.9 - FMQ (Floating Subtract Quadruple)
+defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128>;
+
+// Section 8.7.10 - FCQ (Floating Compare Quadruple)
+defm FCMPQ : RRNCbm<"fcmp.q", 0x7D, I64, f64, F128, f128, null_frag, simm7fp,
+ mimmfp>;
+
+// Section 8.7.11 - FIX (Convert to Fixed Point)
+// cx: double/float, cw: sx/zx, sz{0-3} = round
+let cx = 0, cw = 0 /* sign extend */ in
+defm CVTWDSX : CVTRDm<"cvt.w.d.sx", 0x4E, I32, i32, I64, f64>;
+let cx = 0, cw = 1 /* zero extend */ in
+defm CVTWDZX : CVTRDm<"cvt.w.d.zx", 0x4E, I32, i32, I64, f64>;
+let cx = 1, cw = 0 /* sign extend */ in
+defm CVTWSSX : CVTRDm<"cvt.w.s.sx", 0x4E, I32, i32, F32, f32>;
+let cx = 1, cw = 1 /* zero extend */ in
+defm CVTWSZX : CVTRDm<"cvt.w.s.zx", 0x4E, I32, i32, F32, f32>;
+
+// Section 8.7.12 - FIXX (Convert to Fixed Point)
+defm CVTLD : CVTRDm<"cvt.l.d", 0x4F, I64, i64, I64, f64>;
+
+// Section 8.7.13 - FLT (Convert to Floating Point)
+defm CVTDW : CVTm<"cvt.d.w", 0x5E, I64, f64, I32, i32, sint_to_fp>;
+let cx = 1 in
+defm CVTSW : CVTm<"cvt.s.w", 0x5E, F32, f32, I32, i32, sint_to_fp>;
+
+// Section 8.7.14 - FLTX (Convert to Floating Point)
+defm CVTDL : CVTm<"cvt.d.l", 0x5F, I64, f64, I64, i64, sint_to_fp>;
+
+// Section 8.7.15 - CVS (Convert to Single-format)
+defm CVTSD : CVTm<"cvt.s.d", 0x1F, F32, f32, I64, f64, fpround>;
+let cx = 1 in
+defm CVTSQ : CVTm<"cvt.s.q", 0x1F, F32, f32, F128, f128>;
+
+// Section 8.7.16 - CVD (Convert to Double-format)
+defm CVTDS : CVTm<"cvt.d.s", 0x0F, I64, f64, F32, f32, fpextend>;
+let cx = 1 in
+defm CVTDQ : CVTm<"cvt.d.q", 0x0F, I64, f64, F128, f128>;
+
+// Section 8.7.17 - CVQ (Convert to Single-format)
+defm CVTQD : CVTm<"cvt.q.d", 0x2D, F128, f128, I64, f64>;
let cx = 1 in
-defm LEASL : RMm<"lea.sl", 0x06, I64, i64, simm7Op64, simm32Op64>;
+defm CVTQS : CVTm<"cvt.q.s", 0x2D, F128, f128, F32, f32>;
+
+//-----------------------------------------------------------------------------
+// Section 8.8 - Branch instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.8.1 - BC (Branch on Codition)
+defm BCFL : BCm<"b${cond}.l", "b.l", "baf.l", 0x19, I64, simm7>;
+
+// Indirect branch aliases
+def : Pat<(brind I64:$reg), (BCFLari_t $reg, 0)>;
+def : Pat<(brind tblockaddress:$imm), (BCFLazi_t 0, $imm)>;
+
+// Return instruction is a special case of jump.
+let Uses = [SX10], bpf = 3 /* TAKEN */, cf = 15 /* AT */, cy = 0, sy = 0,
+ sz = 10 /* SX10 */, imm32 = 0, isReturn = 1, isTerminator = 1,
+ isBarrier = 1, isCodeGenOnly = 1, hasSideEffects = 0 in
+def RET : CF<0x19, (outs), (ins), "b.l.t (, %s10)", [(retflag)]>;
+
+// Section 8.8.2 - BCS (Branch on Condition Single)
+defm BCFW : BCm<"b${cond}.w", "b.w", "baf.w", 0x1B, I32, simm7>;
+
+// Section 8.8.3 - BCF (Branch on Condition Floating Point)
+defm BCFD : BCm<"b${cond}.d", "b.d", "baf.d", 0x1C, I64, simm7fp>;
+let cx = 1 in
+defm BCFS : BCm<"b${cond}.s", "b.s", "baf.s", 0x1C, F32, simm7fp>;
+
+// Section 8.8.4 - BCR (Branch on Condition Relative)
+let cx = 0, cx2 = 0 in
+defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7>;
+let cx = 1, cx2 = 0 in
+defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7>;
+let cx = 0, cx2 = 1 in
+defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp>;
+let cx = 1, cx2 = 1 in
+defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp>;
+
+// Section 8.8.5 - BSIC (Branch and Save IC)
+let isCall = 1, hasSideEffects = 0, DecoderMethod = "DecodeCall" in
+defm BSIC : RMm<"bsic", 0x08, I64>;
+
+// Call instruction is a special case of BSIC.
+let Defs = [SX10], sx = 10 /* SX10 */, cy = 0, sy = 0, imm32 = 0,
+ isCall = 1, isCodeGenOnly = 1, hasSideEffects = 0 in
+def CALLr : RM<0x08, (outs), (ins I64:$sz, variable_ops),
+ "bsic %s10, (, $sz)", [(call i64:$sz)]>;
-// 5.3.2.2. Fixed-Point Arithmetic Operation Instructions
+//-----------------------------------------------------------------------------
+// Section 8.19 - Control Instructions
+//-----------------------------------------------------------------------------
-// ADX instruction
-let cx = 0 in
-defm ADX : RRm<"adds.l", 0x59, I64, i64, simm7Op64, uimm6Op64>;
+// Section 8.19.1 - SIC (Save Instruction Counter)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 1, Uses = [IC] in
+def SIC : RR<0x28, (outs I32:$sx), (ins), "sic $sx">;
-// 5.3.2.3. Logical Arithmetic Operation Instructions
+// Section 8.19.2 - LPM (Load Program Mode Flags)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 1, Defs = [PSW] in
+def LPM : RR<0x3a, (outs), (ins I64:$sy), "lpm $sy">;
-let cx = 0 in {
- defm AND : RRm<"and", 0x44, I64, i64, simm7Op64, uimm6Op64>;
- defm OR : RRm<"or", 0x45, I64, i64, simm7Op64, uimm6Op64>;
+// Section 8.19.3 - SPM (Save Program Mode Flags)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 1, Uses = [PSW] in
+def SPM : RR<0x2a, (outs I64:$sx), (ins), "spm $sx">;
+
+// Section 8.19.4 - LFR (Load Flag Register)
+let sx = 0, cz = 0, sz = 0, hasSideEffects = 1, Defs = [PSW] in {
+ def LFRr : RR<0x69, (outs), (ins I64:$sy), "lfr $sy">;
+ let cy = 0 in def LFRi : RR<0x69, (outs), (ins uimm6:$sy), "lfr $sy">;
}
-// Load and Store instructions
-// As 1st step, only uses sz and imm32 to represent $addr
-let mayLoad = 1, hasSideEffects = 0 in {
-let cy = 0, sy = 0, cz = 1 in {
-let cx = 0 in
-def LDSri : RM<
- 0x01, (outs I64:$sx), (ins MEMri:$addr),
- "ld $sx, $addr">;
+// Section 8.19.5 - SFR (Save Flag Register)
+let cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 1, Uses = [PSW] in
+def SFR : RR<0x29, (outs I64:$sx), (ins), "sfr $sx">;
+
+// Section 8.19.6 - SMIR (Save Miscellaneous Register)
+let cy = 0, cz = 0, sz = 0, hasSideEffects = 1 in {
+ def SMIR : RR<0x22, (outs I64:$sx), (ins MISC:$sy), "smir $sx, $sy">;
+}
+
+// Section 8.19.7 - NOP (No Operation)
+let sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in
+def NOP : RR<0x79, (outs), (ins), "nop">;
+
+// Section 8.19.8 - MONC (Monitor Call)
+let sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 1 in {
+ def MONC : RR<0x3F, (outs), (ins), "monc">;
+ let cx = 1, isTrap = 1 in def MONCHDB : RR<0x3F, (outs), (ins), "monc.hdb">;
}
+
+// Section 8.19.9 - LCR (Load Communication Register)
+defm LCR : LOADCRm<"lcr", 0x40, I64>;
+
+// Section 8.19.10 - SCR (Save Communication Register)
+defm SCR : STORECRm<"scr", 0x50, I64>;
+
+// Section 8.19.11 - TSCR (Test & Set Communication Register)
+defm TSCR : LOADCRm<"tscr", 0x41, I64>;
+
+// Section 8.19.12 - FIDCR (Fetch & Increment/Decrement CR)
+defm FIDCR : FIDCRm<"fidcr", 0x51, I64>;
+
+//-----------------------------------------------------------------------------
+// Section 8.20 - Host Memory Access Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.20.1 - LHM (Load Host Memory)
+let ry = 3, DecoderMethod = "DecodeLoadASI64" in
+defm LHML : LHMm<"lhm.l", 0x21, I64>;
+let ry = 2, DecoderMethod = "DecodeLoadASI64" in
+defm LHMW : LHMm<"lhm.w", 0x21, I64>;
+let ry = 1, DecoderMethod = "DecodeLoadASI64" in
+defm LHMH : LHMm<"lhm.h", 0x21, I64>;
+let ry = 0, DecoderMethod = "DecodeLoadASI64" in
+defm LHMB : LHMm<"lhm.b", 0x21, I64>;
+
+// Section 8.20.2 - SHM (Store Host Memory)
+let ry = 3, DecoderMethod = "DecodeStoreASI64" in
+defm SHML : SHMm<"shm.l", 0x31, I64>;
+let ry = 2, DecoderMethod = "DecodeStoreASI64" in
+defm SHMW : SHMm<"shm.w", 0x31, I64>;
+let ry = 1, DecoderMethod = "DecodeStoreASI64" in
+defm SHMH : SHMm<"shm.h", 0x31, I64>;
+let ry = 0, DecoderMethod = "DecodeStoreASI64" in
+defm SHMB : SHMm<"shm.b", 0x31, I64>;
+
+//===----------------------------------------------------------------------===//
+// Instructions for CodeGenOnly
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pattern Matchings
+//===----------------------------------------------------------------------===//
+
+// Small immediates.
+def : Pat<(i32 simm7:$val), (OR32im (LO7 $val), 0)>;
+def : Pat<(i64 simm7:$val), (ORim (LO7 $val), 0)>;
+// Medium immediates.
+def : Pat<(i32 simm32:$val), (LEA32zii 0, 0, (LO32 $val))>;
+def : Pat<(i64 simm32:$val), (LEAzii 0, 0, (LO32 $val))>;
+def : Pat<(i64 uimm32:$val), (ANDrm (LEAzii 0, 0, (LO32 $val)), !add(32, 64))>;
+// Arbitrary immediates.
+def : Pat<(i64 lozero:$val),
+ (LEASLzii 0, 0, (HI32 imm:$val))>;
+def : Pat<(i64 lomsbzero:$val),
+ (LEASLrii (LEAzii 0, 0, (LO32 imm:$val)), 0, (HI32 imm:$val))>;
+def : Pat<(i64 imm:$val),
+ (LEASLrii (ANDrm (LEAzii 0, 0, (LO32 imm:$val)), !add(32, 64)), 0,
+ (HI32 imm:$val))>;
+
+// floating point
+def : Pat<(f32 fpimm:$val),
+ (EXTRACT_SUBREG (LEASLzii 0, 0, (HIFP32 $val)), sub_f32)>;
+def : Pat<(f64 fplozero:$val),
+ (LEASLzii 0, 0, (HIFP32 $val))>;
+def : Pat<(f64 fplomsbzero:$val),
+ (LEASLrii (LEAzii 0, 0, (LOFP32 $val)), 0, (HIFP32 $val))>;
+def : Pat<(f64 fpimm:$val),
+ (LEASLrii (ANDrm (LEAzii 0, 0, (LOFP32 $val)), !add(32, 64)), 0,
+ (HIFP32 $val))>;
+
+// The same integer registers are used for i32 and i64 values.
+// When registers hold i32 values, the high bits are unused.
+
+// TODO Use standard expansion for shift-based lowering of sext_inreg
+
+// Cast to i1
+def : Pat<(sext_inreg I32:$src, i1),
+ (SRAWSXri (SLAWSXri $src, 31), 31)>;
+def : Pat<(sext_inreg I64:$src, i1),
+ (SRALri (SLLri $src, 63), 63)>;
+
+// Cast to i8
+def : Pat<(sext_inreg I32:$src, i8),
+ (SRAWSXri (SLAWSXri $src, 24), 24)>;
+def : Pat<(sext_inreg I64:$src, i8),
+ (SRALri (SLLri $src, 56), 56)>;
+def : Pat<(sext_inreg (i32 (trunc i64:$src)), i8),
+ (EXTRACT_SUBREG (SRALri (SLLri $src, 56), 56), sub_i32)>;
+def : Pat<(and (trunc i64:$src), 0xff),
+ (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(56, 64))>;
+
+// Cast to i16
+def : Pat<(sext_inreg I32:$src, i16),
+ (SRAWSXri (SLAWSXri $src, 16), 16)>;
+def : Pat<(sext_inreg I64:$src, i16),
+ (SRALri (SLLri $src, 48), 48)>;
+def : Pat<(sext_inreg (i32 (trunc i64:$src)), i16),
+ (EXTRACT_SUBREG (SRALri (SLLri $src, 48), 48), sub_i32)>;
+def : Pat<(and (trunc i64:$src), 0xffff),
+ (AND32rm (EXTRACT_SUBREG $src, sub_i32), !add(48, 64))>;
+
+// Cast to i32
+def : Pat<(i32 (trunc i64:$src)),
+ (ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0)>;
+def : Pat<(i32 (fp_to_sint I64:$reg)), (CVTWDSXr RD_RZ, $reg)>;
+def : Pat<(i32 (fp_to_sint F32:$reg)), (CVTWSSXr RD_RZ, $reg)>;
+
+// Cast to i64
+def : Pat<(sext_inreg I64:$src, i32),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (ADDSWSXrm (EXTRACT_SUBREG $src, sub_i32), 0), sub_i32)>;
+def : Pat<(i64 (sext i32:$sy)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWSXrm $sy, 0), sub_i32)>;
+def : Pat<(i64 (zext i32:$sy)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADDSWZXrm $sy, 0), sub_i32)>;
+def : Pat<(i64 (fp_to_sint f32:$sy)), (CVTLDr RD_RZ, (CVTDSr $sy))>;
+def : Pat<(i64 (fp_to_sint I64:$reg)), (CVTLDr RD_RZ, $reg)>;
+
+// Cast to f32
+def : Pat<(f32 (sint_to_fp i64:$sy)), (CVTSDr (CVTDLr i64:$sy))>;
+
+def : Pat<(i64 (anyext i32:$sy)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $sy, sub_i32)>;
+
+
+// extload, sextload and zextload stuff
+multiclass EXT64m<SDPatternOperator from,
+ SDPatternOperator torri,
+ SDPatternOperator torii,
+ SDPatternOperator tozri,
+ SDPatternOperator tozii> {
+ def : Pat<(i64 (from ADDRrri:$addr)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (torri MEMrri:$addr),
+ sub_i32)>;
+ def : Pat<(i64 (from ADDRrii:$addr)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (torii MEMrii:$addr),
+ sub_i32)>;
+ def : Pat<(i64 (from ADDRzri:$addr)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (tozri MEMzri:$addr),
+ sub_i32)>;
+ def : Pat<(i64 (from ADDRzii:$addr)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (tozii MEMzii:$addr),
+ sub_i32)>;
}
+defm : EXT64m<sextloadi8, LD1BSXrri, LD1BSXrii, LD1BSXzri, LD1BSXzii>;
+defm : EXT64m<zextloadi8, LD1BZXrri, LD1BZXrii, LD1BZXzri, LD1BZXzii>;
+defm : EXT64m<extloadi8, LD1BZXrri, LD1BZXrii, LD1BZXzri, LD1BZXzii>;
+defm : EXT64m<sextloadi16, LD2BSXrri, LD2BSXrii, LD2BSXzri, LD2BSXzii>;
+defm : EXT64m<zextloadi16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
+defm : EXT64m<extloadi16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
+defm : EXT64m<sextloadi32, LDLSXrri, LDLSXrii, LDLSXzri, LDLSXzii>;
+defm : EXT64m<zextloadi32, LDLZXrri, LDLZXrii, LDLZXzri, LDLZXzii>;
+defm : EXT64m<extloadi32, LDLSXrri, LDLSXrii, LDLSXzri, LDLSXzii>;
-let mayStore = 1, hasSideEffects = 0 in {
-let cx = 0, cy = 0, sy = 0, cz = 1 in {
-def STSri : RM<
- 0x11, (outs), (ins MEMri:$addr, I64:$sx),
- "st $sx, $addr">;
+// anyextload
+multiclass EXT32m<SDPatternOperator from,
+ SDPatternOperator torri,
+ SDPatternOperator torii,
+ SDPatternOperator tozri,
+ SDPatternOperator tozii> {
+ def : Pat<(from ADDRrri:$addr), (torri MEMrri:$addr)>;
+ def : Pat<(from ADDRrii:$addr), (torii MEMrii:$addr)>;
+ def : Pat<(from ADDRzri:$addr), (tozri MEMzri:$addr)>;
+ def : Pat<(from ADDRzii:$addr), (tozii MEMzii:$addr)>;
}
+defm : EXT32m<extloadi8, LD1BZXrri, LD1BZXrii, LD1BZXzri, LD1BZXzii>;
+defm : EXT32m<extloadi16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
+
+// truncstore
+multiclass TRUNC64m<SDPatternOperator from,
+ SDPatternOperator torri,
+ SDPatternOperator torii,
+ SDPatternOperator tozri,
+ SDPatternOperator tozii> {
+ def : Pat<(from i64:$src, ADDRrri:$addr),
+ (torri MEMrri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from i64:$src, ADDRrii:$addr),
+ (torii MEMrii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from i64:$src, ADDRzri:$addr),
+ (tozri MEMzri:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
+ def : Pat<(from i64:$src, ADDRzii:$addr),
+ (tozii MEMzii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
}
+defm : TRUNC64m<truncstorei8, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
+defm : TRUNC64m<truncstorei16, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
+defm : TRUNC64m<truncstorei32, STLrri, STLrii, STLzri, ST1Bzii>;
+
+// Address calculation and its optimization
+def : Pat<(VEhi tglobaladdr:$in), (LEASLzii 0, 0, tglobaladdr:$in)>;
+def : Pat<(VElo tglobaladdr:$in),
+ (ANDrm (LEAzii 0, 0, tglobaladdr:$in), !add(32, 64))>;
+def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
+ (LEASLrii (ANDrm (LEAzii 0, 0, tglobaladdr:$in2), !add(32, 64)), 0,
+ (tglobaladdr:$in1))>;
-// Return instruction is also a special case of jump.
-let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 15 /* AT */, cy = 0, sy = 0,
- cz = 1, sz = 0x10 /* SX10 */, imm32 = 0, Uses = [SX10],
- isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
- isCodeGenOnly = 1, hasSideEffects = 0 in
-def RET : CF<
- 0x19, (outs), (ins),
- "b.l (,%lr)",
- [(retflag)]>;
+// GlobalTLS address calculation and its optimization
+def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzii 0, 0, tglobaltlsaddr:$in)>;
+def : Pat<(VElo tglobaltlsaddr:$in),
+ (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in), !add(32, 64))>;
+def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)),
+ (LEASLrii (ANDrm (LEAzii 0, 0, tglobaltlsaddr:$in2), !add(32, 64)), 0,
+ (tglobaltlsaddr:$in1))>;
-// Branch instruction
-let cx = 0, cx2 = 0, bpf = 0 /* NONE */ in
-defm BCRL : BCRm<"br${cf}.l", "br.l", 0x18, I64, i64, simm7Op64, uimm6Op64>;
+// Address calculation and its optimization
+def : Pat<(VEhi texternalsym:$in), (LEASLzii 0, 0, texternalsym:$in)>;
+def : Pat<(VElo texternalsym:$in),
+ (ANDrm (LEAzii 0, 0, texternalsym:$in), !add(32, 64))>;
+def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)),
+ (LEASLrii (ANDrm (LEAzii 0, 0, texternalsym:$in2), !add(32, 64)), 0,
+ (texternalsym:$in1))>;
-let cx = 0, cy = 0, cz = 1, hasSideEffects = 0 in {
-let sy = 3 in
-def SHMri : RM<
- 0x31, (outs), (ins MEMASri:$addr, I64:$sx),
- "shm.l $sx, $addr">;
+// Branches
+def : Pat<(br bb:$addr), (BRCFLa bb:$addr)>;
+
+// brcc
+// integer brcc
+multiclass BRCCIm<ValueType ty, SDPatternOperator BrOpNode1,
+ SDPatternOperator BrOpNode2,
+ SDPatternOperator CmpOpNode1,
+ SDPatternOperator CmpOpNode2> {
+ def : Pat<(brcc CCSIOp:$cond, ty:$l, simm7:$r, bb:$addr),
+ (BrOpNode2 (icond2ccSwap $cond), (LO7 $r), $l, bb:$addr)>;
+ def : Pat<(brcc CCSIOp:$cond, ty:$l, ty:$r, bb:$addr),
+ (BrOpNode1 (icond2cc $cond), $l, $r, bb:$addr)>;
+ def : Pat<(brcc CCUIOp:$cond, ty:$l, simm7:$r, bb:$addr),
+ (BrOpNode2 (icond2cc $cond), 0, (CmpOpNode2 (LO7 $r), $l),
+ bb:$addr)>;
+ def : Pat<(brcc CCUIOp:$cond, ty:$l, ty:$r, bb:$addr),
+ (BrOpNode2 (icond2cc $cond), 0, (CmpOpNode1 $r, $l), bb:$addr)>;
}
+defm : BRCCIm<i32, BRCFWrr, BRCFWir, CMPUWrr, CMPUWir>;
+defm : BRCCIm<i64, BRCFLrr, BRCFLir, CMPULrr, CMPULir>;
-let cx = 0, sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in
-def MONC : RR<
- 0x3F, (outs), (ins),
- "monc">;
+// floating point brcc
+multiclass BRCCFm<ValueType ty, SDPatternOperator BrOpNode1,
+ SDPatternOperator BrOpNode2> {
+ def : Pat<(brcc cond:$cond, ty:$l, simm7fp:$r, bb:$addr),
+ (BrOpNode2 (fcond2ccSwap $cond), (LO7FP $r), $l, bb:$addr)>;
+ def : Pat<(brcc cond:$cond, ty:$l, ty:$r, bb:$addr),
+ (BrOpNode1 (fcond2cc $cond), $l, $r, bb:$addr)>;
+}
+defm : BRCCFm<f32, BRCFSrr, BRCFSir>;
+defm : BRCCFm<f64, BRCFDrr, BRCFDir>;
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
+// GETGOT for PIC
+let Defs = [SX15 /* %got */, SX16 /* %plt */], hasSideEffects = 0 in {
+ def GETGOT : Pseudo<(outs getGOT:$getpcseq), (ins), "$getpcseq">;
+}
+
+// GETFUNPLT for PIC
+let hasSideEffects = 0 in
+def GETFUNPLT : Pseudo<(outs I64:$dst), (ins i64imm:$addr),
+ "$dst, $addr",
+ [(set iPTR:$dst, (GetFunPLT tglobaladdr:$addr))] >;
+
+def : Pat<(GetFunPLT tglobaladdr:$dst),
+ (GETFUNPLT tglobaladdr:$dst)>;
+def : Pat<(GetFunPLT texternalsym:$dst),
+ (GETFUNPLT texternalsym:$dst)>;
+
+// GETTLSADDR for TLS
+let Defs = [SX0, SX10, SX12], hasSideEffects = 0 in
+def GETTLSADDR : Pseudo<(outs), (ins i64imm:$addr),
+ "# GETTLSADDR $addr",
+ [(GetTLSAddr tglobaltlsaddr:$addr)] >;
+
+def : Pat<(GetTLSAddr tglobaltlsaddr:$dst),
+ (GETTLSADDR tglobaltlsaddr:$dst)>;
+
let Defs = [SX11], Uses = [SX11], hasSideEffects = 0 in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt, i64imm:$amt2),
"# ADJCALLSTACKDOWN $amt, $amt2",
@@ -286,3 +1728,278 @@ let hasSideEffects = 0 in
def EXTEND_STACK_GUARD : Pseudo<(outs), (ins),
"# EXTEND STACK GUARD",
[]>;
+
+// Dynamic stack allocation yields a __llvm_grow_stack for VE targets.
+// These calls are needed to probe the stack when allocating more over
+// %s8 (%sl - stack limit).
+
+let Uses = [SX11], hasSideEffects = 1 in
+def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
+ "# GET STACK TOP",
+ [(set iPTR:$dst, (GetStackTop))]>;
+// SETCC pattern matches
+//
+// CMP %tmp, lhs, rhs ; compare lhs and rhs
+// or %res, 0, (0)1 ; initialize by 0
+// CMOV %res, (63)0, %tmp ; set 1 if %tmp is true
+
+def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCSIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVLrm (icond2cc $cond),
+ (CMPSLrr i64:$LHS, i64:$RHS),
+ !add(63, 64),
+ (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCUIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVLrm (icond2cc $cond),
+ (CMPULrr i64:$LHS, i64:$RHS),
+ !add(63, 64),
+ (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCSIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVWrm (icond2cc $cond),
+ (CMPSWSXrr i32:$LHS, i32:$RHS),
+ !add(63, 64),
+ (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCUIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVWrm (icond2cc $cond),
+ (CMPUWrr i32:$LHS, i32:$RHS),
+ !add(63, 64),
+ (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc f64:$LHS, f64:$RHS, cond:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVDrm (fcond2cc $cond),
+ (FCMPDrr f64:$LHS, f64:$RHS),
+ !add(63, 64),
+ (ORim 0, 0)), sub_i32)>;
+
+def : Pat<(i32 (setcc f32:$LHS, f32:$RHS, cond:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVSrm (fcond2cc $cond),
+ (FCMPSrr f32:$LHS, f32:$RHS),
+ !add(63, 64),
+ (ORim 0, 0)), sub_i32)>;
+
+// Special SELECTCC pattern matches
+// Use min/max for better performance.
+//
+// MAX/MIN %res, %lhs, %rhs
+
+def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOGT)),
+ (FMAXDrr $LHS, $RHS)>;
+def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOGT)),
+ (FMAXSrr $LHS, $RHS)>;
+def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETGT)),
+ (MAXSLrr $LHS, $RHS)>;
+def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETGT)),
+ (MAXSWSXrr $LHS, $RHS)>;
+def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOGE)),
+ (FMAXDrr $LHS, $RHS)>;
+def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOGE)),
+ (FMAXSrr $LHS, $RHS)>;
+def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETGE)),
+ (MAXSLrr $LHS, $RHS)>;
+def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETGE)),
+ (MAXSWSXrr $LHS, $RHS)>;
+
+def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOLT)),
+ (FMINDrr $LHS, $RHS)>;
+def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOLT)),
+ (FMINSrr $LHS, $RHS)>;
+def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLT)),
+ (MINSLrr $LHS, $RHS)>;
+def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLT)),
+ (MINSWSXrr $LHS, $RHS)>;
+def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOLE)),
+ (FMINDrr $LHS, $RHS)>;
+def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOLE)),
+ (FMINSrr $LHS, $RHS)>;
+def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLE)),
+ (MINSLrr $LHS, $RHS)>;
+def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLE)),
+ (MINSWSXrr $LHS, $RHS)>;
+
+// Generic SELECTCC pattern matches
+//
+// CMP %tmp, %l, %r ; compare %l and %r
+// or %res, %f, (0)1 ; initialize by %f
+// CMOV %res, %t, %tmp ; set %t if %tmp is true
+
+// selectcc for i64 result
+def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)),
+ (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)),
+ (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)),
+ (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)),
+ (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)),
+ (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)),
+ (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+
+// selectcc for i32 result
+def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCSIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVWrr (icond2cc $cond),
+ (CMPSWSXrr $l, $r),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+ sub_i32)>;
+def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCUIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVWrr (icond2cc $cond),
+ (CMPUWrr $l, $r),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+ sub_i32)>;
+def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCSIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVLrr (icond2cc $cond),
+ (CMPSLrr $l, $r),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+ sub_i32)>;
+def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCUIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVLrr (icond2cc $cond),
+ (CMPULrr $l, $r),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+ sub_i32)>;
+def : Pat<(i32 (selectcc f32:$l, f32:$r, i32:$t, i32:$f, cond:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVSrr (fcond2cc $cond),
+ (FCMPSrr $l, $r),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+ sub_i32)>;
+def : Pat<(i32 (selectcc f64:$l, f64:$r, i32:$t, i32:$f, cond:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVDrr (fcond2cc $cond),
+ (FCMPDrr $l, $r),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+ sub_i32)>;
+
+// selectcc for f64 result
+def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)),
+ (CMOVWrr (icond2cc $cond), (CMPSWSXrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)),
+ (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)),
+ (CMOVLrr (icond2cc $cond), (CMPSLrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)),
+ (CMOVLrr (icond2cc $cond), (CMPULrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)),
+ (CMOVSrr (fcond2cc $cond), (FCMPSrr $l, $r), $t, $f)>;
+def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)),
+ (CMOVDrr (fcond2cc $cond), (FCMPDrr $l, $r), $t, $f)>;
+
+// selectcc for f32 result
+def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCSIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVWrr (icond2cc $cond),
+ (CMPSWSXrr $l, $r),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+ sub_f32)>;
+def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCUIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVWrr (icond2cc $cond),
+ (CMPUWrr $l, $r),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+ sub_f32)>;
+def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCSIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVLrr (icond2cc $cond),
+ (CMPSLrr $l, $r),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+ sub_f32)>;
+def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCUIOp:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVLrr (icond2cc $cond),
+ (CMPULrr $l, $r),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+ sub_f32)>;
+def : Pat<(f32 (selectcc f32:$l, f32:$r, f32:$t, f32:$f, cond:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVSrr (fcond2cc $cond),
+ (FCMPSrr $l, $r),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+ sub_f32)>;
+def : Pat<(f32 (selectcc f64:$l, f64:$r, f32:$t, f32:$f, cond:$cond)),
+ (EXTRACT_SUBREG
+ (CMOVDrr (fcond2cc $cond),
+ (FCMPDrr $l, $r),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32),
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)),
+ sub_f32)>;
+
+// Generic SELECT pattern matches
+// Use cmov.w for all cases since %pred holds i32.
+//
+// CMOV.w.ne %res, %tval, %tmp ; set tval if %tmp is true
+
+def : Pat<(i64 (select i32:$pred, i64:$t, i64:$f)),
+ (CMOVWrr CC_INE, $pred, $t, $f)>;
+
+def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)),
+ (EXTRACT_SUBREG
+ (CMOVWrr CC_INE, $pred,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+ sub_i32)>;
+
+def : Pat<(f64 (select i32:$pred, f64:$t, f64:$f)),
+ (CMOVWrr CC_INE, $pred, $t, $f)>;
+
+def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)),
+ (EXTRACT_SUBREG
+ (CMOVWrr CC_INE, $pred,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_f32),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)),
+ sub_f32)>;
+
+// bitconvert
+def : Pat<(f64 (bitconvert i64:$src)), (COPY_TO_REGCLASS $src, I64)>;
+def : Pat<(i64 (bitconvert f64:$src)), (COPY_TO_REGCLASS $src, I64)>;
+
+def : Pat<(i32 (bitconvert f32:$op)),
+ (EXTRACT_SUBREG (SRALri (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ $op, sub_f32), 32), sub_i32)>;
+def : Pat<(f32 (bitconvert i32:$op)),
+ (EXTRACT_SUBREG (SLLri (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ $op, sub_i32), 32), sub_f32)>;
+
+// Bits operations pattern matchings.
+def : Pat<(i32 (ctpop i32:$src)),
+ (EXTRACT_SUBREG (PCNTr (ANDrm (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), $src, sub_i32), !add(32, 64))), sub_i32)>;
+def : Pat<(i32 (ctlz i32:$src)),
+ (EXTRACT_SUBREG (LDZr (SLLri (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>;
+def : Pat<(i64 (bswap i64:$src)),
+ (BSWPri $src, 0)>;
+def : Pat<(i32 (bswap i32:$src)),
+ (EXTRACT_SUBREG (BSWPri (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), sub_i32)>;
+
+// Several special pattern matches to optimize code
+
+def : Pat<(i32 (and i32:$lhs, 0xff)),
+ (AND32rm $lhs, !add(56, 64))>;
+def : Pat<(i32 (and i32:$lhs, 0xffff)),
+ (AND32rm $lhs, !add(48, 64))>;
+def : Pat<(i32 (and i32:$lhs, 0xffffffff)),
+ (AND32rm $lhs, !add(32, 64))>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
index 6c8fc3536c34..9815610510e1 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEMCInstLower.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/VEMCExpr.h"
#include "VE.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -27,9 +28,16 @@ using namespace llvm;
static MCOperand LowerSymbolOperand(const MachineInstr *MI,
const MachineOperand &MO,
const MCSymbol *Symbol, AsmPrinter &AP) {
+ VEMCExpr::VariantKind Kind = (VEMCExpr::VariantKind)MO.getTargetFlags();
- const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, AP.OutContext);
- return MCOperand::createExpr(MCSym);
+ const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, AP.OutContext);
+ // Add offset iff MO is not jump table info or machine basic block.
+ if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(MO.getOffset(), AP.OutContext),
+ AP.OutContext);
+ Expr = VEMCExpr::create(Kind, Expr, AP.OutContext);
+ return MCOperand::createExpr(Expr);
}
static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
@@ -43,6 +51,11 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
break;
return MCOperand::createReg(MO.getReg());
+ case MachineOperand::MO_ExternalSymbol:
+ return LowerSymbolOperand(
+ MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
+ case MachineOperand::MO_GlobalAddress:
+ return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP);
case MachineOperand::MO_Immediate:
return MCOperand::createImm(MO.getImm());
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..1addfc7174eb
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
@@ -0,0 +1,13 @@
+//===-- VEMachineFunctionInfo.cpp - VE Machine Function Info --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void VEMachineFunctionInfo::anchor() {}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VEMachineFunctionInfo.h
new file mode 100644
index 000000000000..16b25fed3f11
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEMachineFunctionInfo.h
@@ -0,0 +1,48 @@
+//===- VEMachineFunctionInfo.h - VE Machine Function Info -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares VE specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class VEMachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+private:
+ Register GlobalBaseReg;
+
+ /// VarArgsFrameOffset - Frame offset to start of varargs area.
+ int VarArgsFrameOffset;
+
+ /// IsLeafProc - True if the function is a leaf procedure.
+ bool IsLeafProc;
+
+public:
+ VEMachineFunctionInfo()
+ : GlobalBaseReg(), VarArgsFrameOffset(0), IsLeafProc(false) {}
+ explicit VEMachineFunctionInfo(MachineFunction &MF)
+ : GlobalBaseReg(), VarArgsFrameOffset(0), IsLeafProc(false) {}
+
+ Register getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; }
+
+ int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+ void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
+
+ void setLeafProc(bool rhs) { IsLeafProc = rhs; }
+ bool isLeafProc() const { return IsLeafProc; }
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
index e1ff614abc20..5783a8df69d2 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -34,12 +34,22 @@ VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {}
const MCPhysReg *
VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- return CSR_SaveList;
+ switch (MF->getFunction().getCallingConv()) {
+ default:
+ return CSR_SaveList;
+ case CallingConv::PreserveAll:
+ return CSR_preserve_all_SaveList;
+ }
}
const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
- return CSR_RegMask;
+ switch (CC) {
+ default:
+ return CSR_RegMask;
+ case CallingConv::PreserveAll:
+ return CSR_preserve_all_RegMask;
+ }
}
const uint32_t *VERegisterInfo::getNoPreservedMask() const {
@@ -48,26 +58,34 @@ const uint32_t *VERegisterInfo::getNoPreservedMask() const {
BitVector VERegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- Reserved.set(VE::SX8); // stack limit
- Reserved.set(VE::SX9); // frame pointer
- Reserved.set(VE::SX10); // link register (return address)
- Reserved.set(VE::SX11); // stack pointer
- Reserved.set(VE::SX12); // outer register
- Reserved.set(VE::SX13); // id register for dynamic linker
-
- Reserved.set(VE::SX14); // thread pointer
- Reserved.set(VE::SX15); // global offset table register
- Reserved.set(VE::SX16); // procedure linkage table register
- Reserved.set(VE::SX17); // linkage-area register
-
- // sx18-sx33 are callee-saved registers
- // sx34-sx63 are temporary registers
+ const Register ReservedRegs[] = {
+ VE::SX8, // Stack limit
+ VE::SX9, // Frame pointer
+ VE::SX10, // Link register (return address)
+ VE::SX11, // Stack pointer
+
+ // FIXME: maybe not need to be reserved
+ VE::SX12, // Outer register
+ VE::SX13, // Id register for dynamic linker
+
+ VE::SX14, // Thread pointer
+ VE::SX15, // Global offset table register
+ VE::SX16, // Procedure linkage table register
+ VE::SX17, // Linkage-area register
+ // sx18-sx33 are callee-saved registers
+ // sx34-sx63 are temporary registers
+ };
+
+ for (auto R : ReservedRegs)
+ for (MCRegAliasIterator ItAlias(R, this, true); ItAlias.isValid();
+ ++ItAlias)
+ Reserved.set(*ItAlias);
return Reserved;
}
-bool VERegisterInfo::isConstantPhysReg(unsigned PhysReg) const { return false; }
+bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { return false; }
const TargetRegisterClass *
VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
@@ -77,12 +95,12 @@ VERegisterInfo::getPointerRegClass(const MachineFunction &MF,
static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
MachineInstr &MI, const DebugLoc &dl,
- unsigned FIOperandNum, int Offset, unsigned FramePtr) {
+ unsigned FIOperandNum, int Offset, Register FrameReg) {
// Replace frame index with a frame pointer reference directly.
// VE has 32 bit offset field, so no need to expand a target instruction.
// Directly encode it.
- MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FIOperandNum + 2).ChangeToImmediate(Offset);
}
void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
@@ -96,11 +114,11 @@ void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MI.getParent()->getParent();
const VEFrameLowering *TFI = getFrameLowering(MF);
- unsigned FrameReg;
+ Register FrameReg;
int Offset;
Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
- Offset += MI.getOperand(FIOperandNum + 1).getImm();
+ Offset += MI.getOperand(FIOperandNum + 2).getImm();
replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
index 9cb475f5e174..9a32da16bea6 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.h
@@ -30,7 +30,7 @@ public:
const uint32_t *getNoPreservedMask() const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool isConstantPhysReg(unsigned PhysReg) const override;
+ bool isConstantPhysReg(MCRegister PhysReg) const override;
const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
index ef5b9c09705a..29708d35c730 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.td
@@ -10,28 +10,135 @@
// Declarations that describe the VE register file
//===----------------------------------------------------------------------===//
-class VEReg<bits<7> Enc, string n> : Register<n> {
+class VEReg<bits<7> enc, string n, list<Register> subregs = [],
+ list<string> altNames = [], list<Register> aliases = []>
+ : Register<n, altNames> {
let HWEncoding{15-7} = 0;
- let HWEncoding{6-0} = Enc;
+ let HWEncoding{6-0} = enc;
let Namespace = "VE";
-}
-
-// Registers are identified with 7-bit ID numbers.
-// R - 64-bit integer or floating-point registers
-class R<bits<7> Enc, string n, list<Register> subregs = [],
- list<Register> aliases = []>: VEReg<Enc, n> {
let SubRegs = subregs;
let Aliases = aliases;
}
+class VEMiscReg<bits<6> enc, string n>: Register<n> {
+ let HWEncoding{15-6} = 0;
+ let HWEncoding{5-0} = enc;
+ let Namespace = "VE";
+}
+
+let Namespace = "VE" in {
+ def sub_i8 : SubRegIndex<8, 56>; // Low 8 bit (56..63)
+ def sub_i16 : SubRegIndex<16, 48>; // Low 16 bit (48..63)
+ def sub_i32 : SubRegIndex<32, 32>; // Low 32 bit (32..63)
+ def sub_f32 : SubRegIndex<32>; // High 32 bit (0..31)
+ def sub_even : SubRegIndex<64>; // High 64 bit (0..63)
+ def sub_odd : SubRegIndex<64, 64>; // Low 64 bit (64..127)
+ def AsmName : RegAltNameIndex;
+}
+
+//-----------------------------------------------------------------------------
+// Miscellaneous Registers
+//-----------------------------------------------------------------------------
+
+def USRCC : VEMiscReg<0, "usrcc">; // User clock counter
+def PSW : VEMiscReg<1, "psw">; // Program status word
+def SAR : VEMiscReg<2, "sar">; // Store address register
+def PMMR : VEMiscReg<7, "pmmr">; // Performance monitor mode register
+
+// Performance monitor configuration registers
+foreach I = 0-3 in
+ def PMCR#I : VEMiscReg<!add(8,I), "pmcr"#I>;
+
+// Performance monitor counter
+foreach I = 0-14 in
+ def PMC#I : VEMiscReg<!add(16,I), "pmc"#I>;
+
+// Register classes.
+def MISC : RegisterClass<"VE", [i64], 64,
+ (add USRCC, PSW, SAR, PMMR,
+ (sequence "PMCR%u", 0, 3),
+ (sequence "PMC%u", 0, 14))>;
+
+//-----------------------------------------------------------------------------
+// Instruction Counter Register
+//-----------------------------------------------------------------------------
+
+def IC : VEMiscReg<62, "ic">;
+
+//-----------------------------------------------------------------------------
+// Gneric Registers
+//-----------------------------------------------------------------------------
+
+let RegAltNameIndices = [AsmName] in {
+
+// Generic integer registers - 8 bits wide
+foreach I = 0-63 in
+ def SB#I : VEReg<I, "sb"#I, [], ["s"#I]>, DwarfRegNum<[I]>;
+
+// Generic integer registers - 16 bits wide
+let SubRegIndices = [sub_i8] in
+foreach I = 0-63 in
+ def SH#I : VEReg<I, "sh"#I, [!cast<VEReg>("SB"#I)], ["s"#I]>,
+ DwarfRegNum<[I]>;
+
+// Generic integer registers - 32 bits wide
+let SubRegIndices = [sub_i16] in
+foreach I = 0-63 in
+ def SW#I : VEReg<I, "sw"#I, [!cast<VEReg>("SH"#I)], ["s"#I]>,
+ DwarfRegNum<[I]>;
+
+// Generic floating point registers - 32 bits wide
+// NOTE: Mark SF#I as alias of SW#I temporary to avoid register allocation
+// problem.
+foreach I = 0-63 in
+ def SF#I : VEReg<I, "sf"#I, [], ["s"#I], [!cast<VEReg>("SW"#I)]>,
+ DwarfRegNum<[I]>;
+
// Generic integer registers - 64 bits wide
+let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in
foreach I = 0-63 in
- def SX#I : R<I, "S"#I, []>,
- DwarfRegNum<[I]>;
+ def SX#I : VEReg<I, "s"#I, [!cast<VEReg>("SW"#I), !cast<VEReg>("SF"#I)],
+ ["s"#I]>, DwarfRegNum<[I]>;
+
+// Aliases of the S* registers used to hold 128-bit for values (long doubles).
+// Following foreach represents something like:
+// def Q0 : VEReg<0, "q0", [SX0, SX1], ["s0"]>;
+// def Q1 : VEReg<2, "q2", [SX2, SX3], ["s2"]>;
+// ...
+let SubRegIndices = [sub_even, sub_odd], CoveredBySubRegs = 1 in
+foreach I = 0-31 in
+ def Q#I : VEReg<!shl(I,1), "q"#I,
+ [!cast<VEReg>("SX"#!shl(I,1)),
+ !cast<VEReg>("SX"#!add(!shl(I,1),1))],
+ ["s"#!shl(I,1)]>;
+
+} // RegAltNameIndices = [AsmName]
// Register classes.
//
// The register order is defined in terms of the preferred
// allocation order.
-def I64 : RegisterClass<"VE", [i64], 64,
- (sequence "SX%u", 0, 63)>;
+def I8 : RegisterClass<"VE", [i8], 8,
+ (add (sequence "SB%u", 0, 7),
+ (sequence "SB%u", 34, 63),
+ (sequence "SB%u", 8, 33))>;
+def I16 : RegisterClass<"VE", [i16], 16,
+ (add (sequence "SH%u", 0, 7),
+ (sequence "SH%u", 34, 63),
+ (sequence "SH%u", 8, 33))>;
+def I32 : RegisterClass<"VE", [i32], 32,
+ (add (sequence "SW%u", 0, 7),
+ (sequence "SW%u", 34, 63),
+ (sequence "SW%u", 8, 33))>;
+def I64 : RegisterClass<"VE", [i64, f64], 64,
+ (add (sequence "SX%u", 0, 7),
+ (sequence "SX%u", 34, 63),
+ (sequence "SX%u", 8, 33))>;
+def F32 : RegisterClass<"VE", [f32], 32,
+ (add (sequence "SF%u", 0, 7),
+ (sequence "SF%u", 34, 63),
+ (sequence "SF%u", 8, 33))>;
+def F128 : RegisterClass<"VE", [f128], 128,
+ (add (sequence "Q%u", 0, 3),
+ (sequence "Q%u", 17, 31),
+ (sequence "Q%u", 4, 16))>;
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
index 861e88cdb583..a0b78d95e3cf 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.cpp
@@ -28,7 +28,7 @@ void VESubtarget::anchor() {}
VESubtarget &VESubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
// Determine default and user specified characteristics
- std::string CPUName = CPU;
+ std::string CPUName = std::string(CPU);
if (CPUName.empty())
CPUName = "ve";
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
index e9637cc16023..f3a2c206162e 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VESubtarget.h
@@ -42,7 +42,7 @@ public:
const TargetMachine &TM);
const VEInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const TargetFrameLowering *getFrameLowering() const override {
+ const VEFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
const VERegisterInfo *getRegisterInfo() const override {
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
index 46f5c0dc1805..08b55eebbc98 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "VETargetMachine.h"
+#include "TargetInfo/VETargetInfo.h"
#include "VE.h"
#include "VETargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -40,8 +41,8 @@ static std::string computeDataLayout(const Triple &T) {
// VE supports 32 bit and 64 bits integer on registers
Ret += "-n32:64";
- // Stack alignment is 64 bits
- Ret += "-S64";
+ // Stack alignment is 128 bits
+ Ret += "-S128";
return Ret;
}
@@ -73,7 +74,8 @@ VETargetMachine::VETargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
- TLOF(createTLOF()), Subtarget(TT, CPU, FS, *this) {
+ TLOF(createTLOF()),
+ Subtarget(TT, std::string(CPU), std::string(FS), *this) {
initAsmInfo();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h
index 3191d59ec1c8..041d3b197ec3 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VETargetMachine.h
@@ -50,6 +50,8 @@ public:
bool isMachineVerifierClean() const override { return false; }
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+
+ unsigned getSjLjDataSize() const override { return 64; }
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index ea99cee3eb3b..e29d85d7588d 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -164,6 +164,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
// Much like WebAssemblyAsmPrinter in the backend, we have to own these.
std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
+ std::vector<std::unique_ptr<std::string>> Names;
// Order of labels, directives and instructions in a .s file have no
// syntactical enforcement. This class is a callback from the actual parser,
@@ -214,6 +215,11 @@ public:
SMLoc & /*EndLoc*/) override {
llvm_unreachable("ParseRegister is not implemented.");
}
+ OperandMatchResultTy tryParseRegister(unsigned & /*RegNo*/,
+ SMLoc & /*StartLoc*/,
+ SMLoc & /*EndLoc*/) override {
+ llvm_unreachable("tryParseRegister is not implemented.");
+ }
bool error(const Twine &Msg, const AsmToken &Tok) {
return Parser.Error(Tok.getLoc(), Msg + Tok.getString());
@@ -227,6 +233,12 @@ public:
Signatures.push_back(std::move(Sig));
}
+ StringRef storeName(StringRef Name) {
+ std::unique_ptr<std::string> N = std::make_unique<std::string>(Name);
+ Names.push_back(std::move(N));
+ return *Names.back();
+ }
+
std::pair<StringRef, StringRef> nestingString(NestingType NT) {
switch (NT) {
case Function:
@@ -310,6 +322,8 @@ public:
return wasm::ValType::V128;
if (Type == "exnref")
return wasm::ValType::EXNREF;
+ if (Type == "externref")
+ return wasm::ValType::EXTERNREF;
return Optional<wasm::ValType>();
}
@@ -430,7 +444,7 @@ public:
Name = StringRef(NameLoc.getPointer(), Name.size());
// WebAssembly has instructions with / in them, which AsmLexer parses
- // as seperate tokens, so if we find such tokens immediately adjacent (no
+ // as separate tokens, so if we find such tokens immediately adjacent (no
// whitespace), expand the name to include them:
for (;;) {
auto &Sep = Lexer.getTok();
@@ -688,7 +702,7 @@ public:
// WebAssemblyAsmPrinter::EmitFunctionBodyStart.
// TODO: would be good to factor this into a common function, but the
// assembler and backend really don't share any common code, and this code
- // parses the locals seperately.
+ // parses the locals separately.
auto SymName = expectIdent();
if (SymName.empty())
return true;
@@ -720,7 +734,7 @@ public:
return true;
auto ExportName = expectIdent();
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
- WasmSym->setExportName(ExportName);
+ WasmSym->setExportName(storeName(ExportName));
TOut.emitExportName(WasmSym, ExportName);
}
@@ -732,7 +746,7 @@ public:
return true;
auto ImportModule = expectIdent();
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
- WasmSym->setImportModule(ImportModule);
+ WasmSym->setImportModule(storeName(ImportModule));
TOut.emitImportModule(WasmSym, ImportModule);
}
@@ -744,7 +758,7 @@ public:
return true;
auto ImportName = expectIdent();
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
- WasmSym->setImportName(ImportName);
+ WasmSym->setImportName(storeName(ImportName));
TOut.emitImportName(WasmSym, ImportName);
}
@@ -787,7 +801,7 @@ public:
return error("Cannot parse .int expression: ", Lexer.getTok());
size_t NumBits = 0;
DirectiveID.getString().drop_front(4).getAsInteger(10, NumBits);
- Out.EmitValue(Val, NumBits / 8, End);
+ Out.emitValue(Val, NumBits / 8, End);
return expect(AsmToken::EndOfStatement, "EOL");
}
@@ -796,7 +810,7 @@ public:
std::string S;
if (Parser.parseEscapedString(S))
return error("Cannot parse string constant: ", Lexer.getTok());
- Out.EmitBytes(StringRef(S.c_str(), S.length() + 1));
+ Out.emitBytes(StringRef(S.c_str(), S.length() + 1));
return expect(AsmToken::EndOfStatement, "EOL");
}
@@ -834,7 +848,17 @@ public:
if (Op0.getImm() == -1)
Op0.setImm(Align);
}
- Out.EmitInstruction(Inst, getSTI());
+ if (getSTI().getTargetTriple().isArch64Bit()) {
+ // Upgrade 32-bit loads/stores to 64-bit. These mostly differ by having
+ // an offset64 arg instead of offset32, but to the assembler matcher
+ // they're both immediates so don't get selected for.
+ auto Opc64 = WebAssembly::getWasm64Opcode(
+ static_cast<uint16_t>(Inst.getOpcode()));
+ if (Opc64 >= 0) {
+ Inst.setOpcode(Opc64);
+ }
+ }
+ Out.emitInstruction(Inst, getSTI());
if (CurrentState == EndFunction) {
onEndOfFunction();
} else {
@@ -879,6 +903,9 @@ public:
auto SecName = ".text." + SymName;
auto WS = getContext().getWasmSection(SecName, SectionKind::getText());
getStreamer().SwitchSection(WS);
+ // Also generate DWARF for this section if requested.
+ if (getContext().getGenDwarfForAssembly())
+ getContext().addGenDwarfSection(WS);
}
void onEndOfFunction() {
@@ -886,7 +913,7 @@ public:
// user.
if (!LastFunctionLabel) return;
auto TempSym = getContext().createLinkerPrivateTempSymbol();
- getStreamer().EmitLabel(TempSym);
+ getStreamer().emitLabel(TempSym);
auto Start = MCSymbolRefExpr::create(LastFunctionLabel, getContext());
auto End = MCSymbolRefExpr::create(TempSym, getContext());
auto Expr =
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index a8cb5d18537c..42fa6d58fffd 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -46,9 +46,10 @@ class WebAssemblyDisassembler final : public MCDisassembler {
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const override;
- DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
- ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &CStream) const override;
+ Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CStream) const override;
public:
WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
@@ -120,29 +121,29 @@ bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
return true;
}
-MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
- StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &CStream) const {
+Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart(
+ SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
+ uint64_t Address, raw_ostream &CStream) const {
Size = 0;
if (Address == 0) {
// Start of a code section: we're parsing only the function count.
int64_t FunctionCount;
if (!nextLEB(FunctionCount, Bytes, Size, false))
- return MCDisassembler::Fail;
+ return None;
outs() << " # " << FunctionCount << " functions in section.";
} else {
// Parse the start of a single function.
int64_t BodySize, LocalEntryCount;
if (!nextLEB(BodySize, Bytes, Size, false) ||
!nextLEB(LocalEntryCount, Bytes, Size, false))
- return MCDisassembler::Fail;
+ return None;
if (LocalEntryCount) {
outs() << " .local ";
for (int64_t I = 0; I < LocalEntryCount; I++) {
int64_t Count, Type;
if (!nextLEB(Count, Bytes, Size, false) ||
!nextLEB(Type, Bytes, Size, false))
- return MCDisassembler::Fail;
+ return None;
for (int64_t J = 0; J < Count; J++) {
if (I || J)
outs() << ", ";
@@ -198,6 +199,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
case WebAssembly::OPERAND_GLOBAL:
case WebAssembly::OPERAND_FUNCTION32:
case WebAssembly::OPERAND_OFFSET32:
+ case WebAssembly::OPERAND_OFFSET64:
case WebAssembly::OPERAND_P2ALIGN:
case WebAssembly::OPERAND_TYPEINDEX:
case WebAssembly::OPERAND_EVENT:
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 8314de41021f..8ecd7c53621d 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -64,9 +64,6 @@ public:
return false;
}
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {}
-
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
@@ -80,6 +77,7 @@ WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_sleb128_i32", 0, 5 * 8, 0},
{"fixup_sleb128_i64", 0, 10 * 8, 0},
{"fixup_uleb128_i32", 0, 5 * 8, 0},
+ {"fixup_uleb128_i64", 0, 10 * 8, 0},
};
if (Kind < FirstTargetFixupKind)
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
index 33e8de282955..92708dadd3e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
@@ -17,6 +17,7 @@ enum Fixups {
fixup_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
fixup_sleb128_i64, // 64-bit signed
fixup_uleb128_i32, // 32-bit unsigned
+ fixup_uleb128_i64, // 64-bit unsigned
// Marker
LastTargetFixupKind,
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index b262e06e55e7..f60b5fcd14ec 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -54,17 +54,28 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// Print any additional variadic operands.
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
if (Desc.isVariadic()) {
- if (Desc.getNumOperands() == 0 && MI->getNumOperands() > 0)
+ if ((Desc.getNumOperands() == 0 && MI->getNumOperands() > 0) ||
+ Desc.variadicOpsAreDefs())
OS << "\t";
- for (auto I = Desc.getNumOperands(), E = MI->getNumOperands(); I < E; ++I) {
- // FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because
- // we have an extra flags operand which is not currently printed, for
- // compatiblity reasons.
- if (I != 0 && ((MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID &&
- MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID_S) ||
- I != Desc.getNumOperands()))
+ unsigned Start = Desc.getNumOperands();
+ unsigned NumVariadicDefs = 0;
+ if (Desc.variadicOpsAreDefs()) {
+ // The number of variadic defs is encoded in an immediate by MCInstLower
+ NumVariadicDefs = MI->getOperand(0).getImm();
+ Start = 1;
+ }
+ bool NeedsComma = Desc.getNumOperands() > 0 && !Desc.variadicOpsAreDefs();
+ for (auto I = Start, E = MI->getNumOperands(); I < E; ++I) {
+ if (MI->getOpcode() == WebAssembly::CALL_INDIRECT &&
+ I - Start == NumVariadicDefs) {
+ // Skip type and flags arguments when printing for tests
+ ++I;
+ continue;
+ }
+ if (NeedsComma)
OS << ", ";
- printOperand(MI, I, OS);
+ printOperand(MI, I, OS, I - Start < NumVariadicDefs);
+ NeedsComma = true;
}
}
@@ -207,20 +218,21 @@ static std::string toString(const APFloat &FP) {
}
void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+ raw_ostream &O, bool IsVariadicDef) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
unsigned WAReg = Op.getReg();
if (int(WAReg) >= 0)
printRegName(O, WAReg);
- else if (OpNo >= MII.get(MI->getOpcode()).getNumDefs())
+ else if (OpNo >= Desc.getNumDefs() && !IsVariadicDef)
O << "$pop" << WebAssemblyFunctionInfo::getWARegStackId(WAReg);
else if (WAReg != WebAssemblyFunctionInfo::UnusedReg)
O << "$push" << WebAssemblyFunctionInfo::getWARegStackId(WAReg);
else
O << "$drop";
// Add a '=' suffix if this is a def.
- if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
+ if (OpNo < MII.get(MI->getOpcode()).getNumDefs() || IsVariadicDef)
O << '=';
} else if (Op.isImm()) {
O << Op.getImm();
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
index bee85507f044..1387a1928b3f 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
@@ -41,7 +41,8 @@ public:
const MCSubtargetInfo &STI, raw_ostream &OS) override;
// Used by tblegen code.
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ bool IsVariadicDef = false);
void printBrList(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 1a4c57e66d2f..dfed3451e45b 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -158,6 +158,10 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
case WebAssembly::OPERAND_EVENT:
FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i32);
break;
+ case WebAssembly::OPERAND_OFFSET64:
+ FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i64);
+ PaddedSize = 10;
+ break;
default:
llvm_unreachable("unexpected symbolic operand kind");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index b339860a381d..02b310628ee1 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -24,14 +24,10 @@ namespace llvm {
class MCAsmBackend;
class MCCodeEmitter;
-class MCContext;
class MCInstrInfo;
class MCObjectTargetWriter;
-class MCSubtargetInfo;
class MVT;
-class Target;
class Triple;
-class raw_pwrite_stream;
MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
@@ -68,6 +64,8 @@ enum OperandType {
OPERAND_FUNCTION32,
/// 32-bit unsigned memory offsets.
OPERAND_OFFSET32,
+ /// 64-bit unsigned memory offsets.
+ OPERAND_OFFSET64,
/// p2align immediate for load and store address alignment.
OPERAND_P2ALIGN,
/// signature immediate for block/loop.
@@ -149,216 +147,121 @@ wasm::ValType toValType(const MVT &Ty);
/// Return the default p2align value for a load or store with the given opcode.
inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
switch (Opc) {
- case WebAssembly::LOAD8_S_I32:
- case WebAssembly::LOAD8_S_I32_S:
- case WebAssembly::LOAD8_U_I32:
- case WebAssembly::LOAD8_U_I32_S:
- case WebAssembly::LOAD8_S_I64:
- case WebAssembly::LOAD8_S_I64_S:
- case WebAssembly::LOAD8_U_I64:
- case WebAssembly::LOAD8_U_I64_S:
- case WebAssembly::ATOMIC_LOAD8_U_I32:
- case WebAssembly::ATOMIC_LOAD8_U_I32_S:
- case WebAssembly::ATOMIC_LOAD8_U_I64:
- case WebAssembly::ATOMIC_LOAD8_U_I64_S:
- case WebAssembly::STORE8_I32:
- case WebAssembly::STORE8_I32_S:
- case WebAssembly::STORE8_I64:
- case WebAssembly::STORE8_I64_S:
- case WebAssembly::ATOMIC_STORE8_I32:
- case WebAssembly::ATOMIC_STORE8_I32_S:
- case WebAssembly::ATOMIC_STORE8_I64:
- case WebAssembly::ATOMIC_STORE8_I64_S:
- case WebAssembly::ATOMIC_RMW8_U_ADD_I32:
- case WebAssembly::ATOMIC_RMW8_U_ADD_I32_S:
- case WebAssembly::ATOMIC_RMW8_U_ADD_I64:
- case WebAssembly::ATOMIC_RMW8_U_ADD_I64_S:
- case WebAssembly::ATOMIC_RMW8_U_SUB_I32:
- case WebAssembly::ATOMIC_RMW8_U_SUB_I32_S:
- case WebAssembly::ATOMIC_RMW8_U_SUB_I64:
- case WebAssembly::ATOMIC_RMW8_U_SUB_I64_S:
- case WebAssembly::ATOMIC_RMW8_U_AND_I32:
- case WebAssembly::ATOMIC_RMW8_U_AND_I32_S:
- case WebAssembly::ATOMIC_RMW8_U_AND_I64:
- case WebAssembly::ATOMIC_RMW8_U_AND_I64_S:
- case WebAssembly::ATOMIC_RMW8_U_OR_I32:
- case WebAssembly::ATOMIC_RMW8_U_OR_I32_S:
- case WebAssembly::ATOMIC_RMW8_U_OR_I64:
- case WebAssembly::ATOMIC_RMW8_U_OR_I64_S:
- case WebAssembly::ATOMIC_RMW8_U_XOR_I32:
- case WebAssembly::ATOMIC_RMW8_U_XOR_I32_S:
- case WebAssembly::ATOMIC_RMW8_U_XOR_I64:
- case WebAssembly::ATOMIC_RMW8_U_XOR_I64_S:
- case WebAssembly::ATOMIC_RMW8_U_XCHG_I32:
- case WebAssembly::ATOMIC_RMW8_U_XCHG_I32_S:
- case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
- case WebAssembly::ATOMIC_RMW8_U_XCHG_I64_S:
- case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
- case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32_S:
- case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
- case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64_S:
- case WebAssembly::LOAD_SPLAT_v8x16:
- case WebAssembly::LOAD_SPLAT_v8x16_S:
+#define WASM_LOAD_STORE(NAME) \
+ case WebAssembly::NAME##_A32: \
+ case WebAssembly::NAME##_A64: \
+ case WebAssembly::NAME##_A32_S: \
+ case WebAssembly::NAME##_A64_S:
+ WASM_LOAD_STORE(LOAD8_S_I32)
+ WASM_LOAD_STORE(LOAD8_U_I32)
+ WASM_LOAD_STORE(LOAD8_S_I64)
+ WASM_LOAD_STORE(LOAD8_U_I64)
+ WASM_LOAD_STORE(ATOMIC_LOAD8_U_I32)
+ WASM_LOAD_STORE(ATOMIC_LOAD8_U_I64)
+ WASM_LOAD_STORE(STORE8_I32)
+ WASM_LOAD_STORE(STORE8_I64)
+ WASM_LOAD_STORE(ATOMIC_STORE8_I32)
+ WASM_LOAD_STORE(ATOMIC_STORE8_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_ADD_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_ADD_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_SUB_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_SUB_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_AND_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_AND_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_OR_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_OR_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_XOR_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_XOR_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_XCHG_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_XCHG_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I64)
+ WASM_LOAD_STORE(LOAD_SPLAT_v8x16)
return 0;
- case WebAssembly::LOAD16_S_I32:
- case WebAssembly::LOAD16_S_I32_S:
- case WebAssembly::LOAD16_U_I32:
- case WebAssembly::LOAD16_U_I32_S:
- case WebAssembly::LOAD16_S_I64:
- case WebAssembly::LOAD16_S_I64_S:
- case WebAssembly::LOAD16_U_I64:
- case WebAssembly::LOAD16_U_I64_S:
- case WebAssembly::ATOMIC_LOAD16_U_I32:
- case WebAssembly::ATOMIC_LOAD16_U_I32_S:
- case WebAssembly::ATOMIC_LOAD16_U_I64:
- case WebAssembly::ATOMIC_LOAD16_U_I64_S:
- case WebAssembly::STORE16_I32:
- case WebAssembly::STORE16_I32_S:
- case WebAssembly::STORE16_I64:
- case WebAssembly::STORE16_I64_S:
- case WebAssembly::ATOMIC_STORE16_I32:
- case WebAssembly::ATOMIC_STORE16_I32_S:
- case WebAssembly::ATOMIC_STORE16_I64:
- case WebAssembly::ATOMIC_STORE16_I64_S:
- case WebAssembly::ATOMIC_RMW16_U_ADD_I32:
- case WebAssembly::ATOMIC_RMW16_U_ADD_I32_S:
- case WebAssembly::ATOMIC_RMW16_U_ADD_I64:
- case WebAssembly::ATOMIC_RMW16_U_ADD_I64_S:
- case WebAssembly::ATOMIC_RMW16_U_SUB_I32:
- case WebAssembly::ATOMIC_RMW16_U_SUB_I32_S:
- case WebAssembly::ATOMIC_RMW16_U_SUB_I64:
- case WebAssembly::ATOMIC_RMW16_U_SUB_I64_S:
- case WebAssembly::ATOMIC_RMW16_U_AND_I32:
- case WebAssembly::ATOMIC_RMW16_U_AND_I32_S:
- case WebAssembly::ATOMIC_RMW16_U_AND_I64:
- case WebAssembly::ATOMIC_RMW16_U_AND_I64_S:
- case WebAssembly::ATOMIC_RMW16_U_OR_I32:
- case WebAssembly::ATOMIC_RMW16_U_OR_I32_S:
- case WebAssembly::ATOMIC_RMW16_U_OR_I64:
- case WebAssembly::ATOMIC_RMW16_U_OR_I64_S:
- case WebAssembly::ATOMIC_RMW16_U_XOR_I32:
- case WebAssembly::ATOMIC_RMW16_U_XOR_I32_S:
- case WebAssembly::ATOMIC_RMW16_U_XOR_I64:
- case WebAssembly::ATOMIC_RMW16_U_XOR_I64_S:
- case WebAssembly::ATOMIC_RMW16_U_XCHG_I32:
- case WebAssembly::ATOMIC_RMW16_U_XCHG_I32_S:
- case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
- case WebAssembly::ATOMIC_RMW16_U_XCHG_I64_S:
- case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
- case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32_S:
- case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
- case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64_S:
- case WebAssembly::LOAD_SPLAT_v16x8:
- case WebAssembly::LOAD_SPLAT_v16x8_S:
+ WASM_LOAD_STORE(LOAD16_S_I32)
+ WASM_LOAD_STORE(LOAD16_U_I32)
+ WASM_LOAD_STORE(LOAD16_S_I64)
+ WASM_LOAD_STORE(LOAD16_U_I64)
+ WASM_LOAD_STORE(ATOMIC_LOAD16_U_I32)
+ WASM_LOAD_STORE(ATOMIC_LOAD16_U_I64)
+ WASM_LOAD_STORE(STORE16_I32)
+ WASM_LOAD_STORE(STORE16_I64)
+ WASM_LOAD_STORE(ATOMIC_STORE16_I32)
+ WASM_LOAD_STORE(ATOMIC_STORE16_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_ADD_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_ADD_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_SUB_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_SUB_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_AND_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_AND_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_OR_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_OR_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_XOR_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_XOR_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_XCHG_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_XCHG_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I64)
+ WASM_LOAD_STORE(LOAD_SPLAT_v16x8)
return 1;
- case WebAssembly::LOAD_I32:
- case WebAssembly::LOAD_I32_S:
- case WebAssembly::LOAD_F32:
- case WebAssembly::LOAD_F32_S:
- case WebAssembly::STORE_I32:
- case WebAssembly::STORE_I32_S:
- case WebAssembly::STORE_F32:
- case WebAssembly::STORE_F32_S:
- case WebAssembly::LOAD32_S_I64:
- case WebAssembly::LOAD32_S_I64_S:
- case WebAssembly::LOAD32_U_I64:
- case WebAssembly::LOAD32_U_I64_S:
- case WebAssembly::STORE32_I64:
- case WebAssembly::STORE32_I64_S:
- case WebAssembly::ATOMIC_LOAD_I32:
- case WebAssembly::ATOMIC_LOAD_I32_S:
- case WebAssembly::ATOMIC_LOAD32_U_I64:
- case WebAssembly::ATOMIC_LOAD32_U_I64_S:
- case WebAssembly::ATOMIC_STORE_I32:
- case WebAssembly::ATOMIC_STORE_I32_S:
- case WebAssembly::ATOMIC_STORE32_I64:
- case WebAssembly::ATOMIC_STORE32_I64_S:
- case WebAssembly::ATOMIC_RMW_ADD_I32:
- case WebAssembly::ATOMIC_RMW_ADD_I32_S:
- case WebAssembly::ATOMIC_RMW32_U_ADD_I64:
- case WebAssembly::ATOMIC_RMW32_U_ADD_I64_S:
- case WebAssembly::ATOMIC_RMW_SUB_I32:
- case WebAssembly::ATOMIC_RMW_SUB_I32_S:
- case WebAssembly::ATOMIC_RMW32_U_SUB_I64:
- case WebAssembly::ATOMIC_RMW32_U_SUB_I64_S:
- case WebAssembly::ATOMIC_RMW_AND_I32:
- case WebAssembly::ATOMIC_RMW_AND_I32_S:
- case WebAssembly::ATOMIC_RMW32_U_AND_I64:
- case WebAssembly::ATOMIC_RMW32_U_AND_I64_S:
- case WebAssembly::ATOMIC_RMW_OR_I32:
- case WebAssembly::ATOMIC_RMW_OR_I32_S:
- case WebAssembly::ATOMIC_RMW32_U_OR_I64:
- case WebAssembly::ATOMIC_RMW32_U_OR_I64_S:
- case WebAssembly::ATOMIC_RMW_XOR_I32:
- case WebAssembly::ATOMIC_RMW_XOR_I32_S:
- case WebAssembly::ATOMIC_RMW32_U_XOR_I64:
- case WebAssembly::ATOMIC_RMW32_U_XOR_I64_S:
- case WebAssembly::ATOMIC_RMW_XCHG_I32:
- case WebAssembly::ATOMIC_RMW_XCHG_I32_S:
- case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
- case WebAssembly::ATOMIC_RMW32_U_XCHG_I64_S:
- case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
- case WebAssembly::ATOMIC_RMW_CMPXCHG_I32_S:
- case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
- case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64_S:
- case WebAssembly::ATOMIC_NOTIFY:
- case WebAssembly::ATOMIC_NOTIFY_S:
- case WebAssembly::ATOMIC_WAIT_I32:
- case WebAssembly::ATOMIC_WAIT_I32_S:
- case WebAssembly::LOAD_SPLAT_v32x4:
- case WebAssembly::LOAD_SPLAT_v32x4_S:
+ WASM_LOAD_STORE(LOAD_I32)
+ WASM_LOAD_STORE(LOAD_F32)
+ WASM_LOAD_STORE(STORE_I32)
+ WASM_LOAD_STORE(STORE_F32)
+ WASM_LOAD_STORE(LOAD32_S_I64)
+ WASM_LOAD_STORE(LOAD32_U_I64)
+ WASM_LOAD_STORE(STORE32_I64)
+ WASM_LOAD_STORE(ATOMIC_LOAD_I32)
+ WASM_LOAD_STORE(ATOMIC_LOAD32_U_I64)
+ WASM_LOAD_STORE(ATOMIC_STORE_I32)
+ WASM_LOAD_STORE(ATOMIC_STORE32_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_ADD_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW32_U_ADD_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_SUB_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW32_U_SUB_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_AND_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW32_U_AND_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_OR_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW32_U_OR_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_XOR_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW32_U_XOR_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW32_U_XCHG_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I32)
+ WASM_LOAD_STORE(ATOMIC_RMW32_U_CMPXCHG_I64)
+ WASM_LOAD_STORE(ATOMIC_NOTIFY)
+ WASM_LOAD_STORE(ATOMIC_WAIT_I32)
+ WASM_LOAD_STORE(LOAD_SPLAT_v32x4)
return 2;
- case WebAssembly::LOAD_I64:
- case WebAssembly::LOAD_I64_S:
- case WebAssembly::LOAD_F64:
- case WebAssembly::LOAD_F64_S:
- case WebAssembly::STORE_I64:
- case WebAssembly::STORE_I64_S:
- case WebAssembly::STORE_F64:
- case WebAssembly::STORE_F64_S:
- case WebAssembly::ATOMIC_LOAD_I64:
- case WebAssembly::ATOMIC_LOAD_I64_S:
- case WebAssembly::ATOMIC_STORE_I64:
- case WebAssembly::ATOMIC_STORE_I64_S:
- case WebAssembly::ATOMIC_RMW_ADD_I64:
- case WebAssembly::ATOMIC_RMW_ADD_I64_S:
- case WebAssembly::ATOMIC_RMW_SUB_I64:
- case WebAssembly::ATOMIC_RMW_SUB_I64_S:
- case WebAssembly::ATOMIC_RMW_AND_I64:
- case WebAssembly::ATOMIC_RMW_AND_I64_S:
- case WebAssembly::ATOMIC_RMW_OR_I64:
- case WebAssembly::ATOMIC_RMW_OR_I64_S:
- case WebAssembly::ATOMIC_RMW_XOR_I64:
- case WebAssembly::ATOMIC_RMW_XOR_I64_S:
- case WebAssembly::ATOMIC_RMW_XCHG_I64:
- case WebAssembly::ATOMIC_RMW_XCHG_I64_S:
- case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
- case WebAssembly::ATOMIC_RMW_CMPXCHG_I64_S:
- case WebAssembly::ATOMIC_WAIT_I64:
- case WebAssembly::ATOMIC_WAIT_I64_S:
- case WebAssembly::LOAD_SPLAT_v64x2:
- case WebAssembly::LOAD_SPLAT_v64x2_S:
- case WebAssembly::LOAD_EXTEND_S_v8i16:
- case WebAssembly::LOAD_EXTEND_S_v8i16_S:
- case WebAssembly::LOAD_EXTEND_U_v8i16:
- case WebAssembly::LOAD_EXTEND_U_v8i16_S:
- case WebAssembly::LOAD_EXTEND_S_v4i32:
- case WebAssembly::LOAD_EXTEND_S_v4i32_S:
- case WebAssembly::LOAD_EXTEND_U_v4i32:
- case WebAssembly::LOAD_EXTEND_U_v4i32_S:
- case WebAssembly::LOAD_EXTEND_S_v2i64:
- case WebAssembly::LOAD_EXTEND_S_v2i64_S:
- case WebAssembly::LOAD_EXTEND_U_v2i64:
- case WebAssembly::LOAD_EXTEND_U_v2i64_S:
+ WASM_LOAD_STORE(LOAD_I64)
+ WASM_LOAD_STORE(LOAD_F64)
+ WASM_LOAD_STORE(STORE_I64)
+ WASM_LOAD_STORE(STORE_F64)
+ WASM_LOAD_STORE(ATOMIC_LOAD_I64)
+ WASM_LOAD_STORE(ATOMIC_STORE_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_ADD_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_SUB_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_AND_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_OR_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_XOR_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I64)
+ WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I64)
+ WASM_LOAD_STORE(ATOMIC_WAIT_I64)
+ WASM_LOAD_STORE(LOAD_SPLAT_v64x2)
+ WASM_LOAD_STORE(LOAD_EXTEND_S_v8i16)
+ WASM_LOAD_STORE(LOAD_EXTEND_U_v8i16)
+ WASM_LOAD_STORE(LOAD_EXTEND_S_v4i32)
+ WASM_LOAD_STORE(LOAD_EXTEND_U_v4i32)
+ WASM_LOAD_STORE(LOAD_EXTEND_S_v2i64)
+ WASM_LOAD_STORE(LOAD_EXTEND_U_v2i64)
return 3;
- case WebAssembly::LOAD_V128:
- case WebAssembly::LOAD_V128_S:
- case WebAssembly::STORE_V128:
- case WebAssembly::STORE_V128_S:
+ WASM_LOAD_STORE(LOAD_V128)
+ WASM_LOAD_STORE(STORE_V128)
return 4;
default:
return -1;
}
+#undef WASM_LOAD_STORE
}
inline unsigned GetDefaultP2Align(unsigned Opc) {
@@ -441,30 +344,8 @@ inline bool isTee(unsigned Opc) {
inline bool isCallDirect(unsigned Opc) {
switch (Opc) {
- case WebAssembly::CALL_VOID:
- case WebAssembly::CALL_VOID_S:
- case WebAssembly::CALL_i32:
- case WebAssembly::CALL_i32_S:
- case WebAssembly::CALL_i64:
- case WebAssembly::CALL_i64_S:
- case WebAssembly::CALL_f32:
- case WebAssembly::CALL_f32_S:
- case WebAssembly::CALL_f64:
- case WebAssembly::CALL_f64_S:
- case WebAssembly::CALL_v16i8:
- case WebAssembly::CALL_v16i8_S:
- case WebAssembly::CALL_v8i16:
- case WebAssembly::CALL_v8i16_S:
- case WebAssembly::CALL_v4i32:
- case WebAssembly::CALL_v4i32_S:
- case WebAssembly::CALL_v2i64:
- case WebAssembly::CALL_v2i64_S:
- case WebAssembly::CALL_v4f32:
- case WebAssembly::CALL_v4f32_S:
- case WebAssembly::CALL_v2f64:
- case WebAssembly::CALL_v2f64_S:
- case WebAssembly::CALL_exnref:
- case WebAssembly::CALL_exnref_S:
+ case WebAssembly::CALL:
+ case WebAssembly::CALL_S:
case WebAssembly::RET_CALL:
case WebAssembly::RET_CALL_S:
return true;
@@ -475,30 +356,8 @@ inline bool isCallDirect(unsigned Opc) {
inline bool isCallIndirect(unsigned Opc) {
switch (Opc) {
- case WebAssembly::CALL_INDIRECT_VOID:
- case WebAssembly::CALL_INDIRECT_VOID_S:
- case WebAssembly::CALL_INDIRECT_i32:
- case WebAssembly::CALL_INDIRECT_i32_S:
- case WebAssembly::CALL_INDIRECT_i64:
- case WebAssembly::CALL_INDIRECT_i64_S:
- case WebAssembly::CALL_INDIRECT_f32:
- case WebAssembly::CALL_INDIRECT_f32_S:
- case WebAssembly::CALL_INDIRECT_f64:
- case WebAssembly::CALL_INDIRECT_f64_S:
- case WebAssembly::CALL_INDIRECT_v16i8:
- case WebAssembly::CALL_INDIRECT_v16i8_S:
- case WebAssembly::CALL_INDIRECT_v8i16:
- case WebAssembly::CALL_INDIRECT_v8i16_S:
- case WebAssembly::CALL_INDIRECT_v4i32:
- case WebAssembly::CALL_INDIRECT_v4i32_S:
- case WebAssembly::CALL_INDIRECT_v2i64:
- case WebAssembly::CALL_INDIRECT_v2i64_S:
- case WebAssembly::CALL_INDIRECT_v4f32:
- case WebAssembly::CALL_INDIRECT_v4f32_S:
- case WebAssembly::CALL_INDIRECT_v2f64:
- case WebAssembly::CALL_INDIRECT_v2f64_S:
- case WebAssembly::CALL_INDIRECT_exnref:
- case WebAssembly::CALL_INDIRECT_exnref_S:
+ case WebAssembly::CALL_INDIRECT:
+ case WebAssembly::CALL_INDIRECT_S:
case WebAssembly::RET_CALL_INDIRECT:
case WebAssembly::RET_CALL_INDIRECT_S:
return true;
@@ -507,66 +366,15 @@ inline bool isCallIndirect(unsigned Opc) {
}
}
-/// Returns the operand number of a callee, assuming the argument is a call
-/// instruction.
-inline unsigned getCalleeOpNo(unsigned Opc) {
- switch (Opc) {
- case WebAssembly::CALL_VOID:
- case WebAssembly::CALL_VOID_S:
- case WebAssembly::CALL_INDIRECT_VOID:
- case WebAssembly::CALL_INDIRECT_VOID_S:
- case WebAssembly::RET_CALL:
- case WebAssembly::RET_CALL_S:
- case WebAssembly::RET_CALL_INDIRECT:
- case WebAssembly::RET_CALL_INDIRECT_S:
- return 0;
- case WebAssembly::CALL_i32:
- case WebAssembly::CALL_i32_S:
- case WebAssembly::CALL_i64:
- case WebAssembly::CALL_i64_S:
- case WebAssembly::CALL_f32:
- case WebAssembly::CALL_f32_S:
- case WebAssembly::CALL_f64:
- case WebAssembly::CALL_f64_S:
- case WebAssembly::CALL_v16i8:
- case WebAssembly::CALL_v16i8_S:
- case WebAssembly::CALL_v8i16:
- case WebAssembly::CALL_v8i16_S:
- case WebAssembly::CALL_v4i32:
- case WebAssembly::CALL_v4i32_S:
- case WebAssembly::CALL_v2i64:
- case WebAssembly::CALL_v2i64_S:
- case WebAssembly::CALL_v4f32:
- case WebAssembly::CALL_v4f32_S:
- case WebAssembly::CALL_v2f64:
- case WebAssembly::CALL_v2f64_S:
- case WebAssembly::CALL_exnref:
- case WebAssembly::CALL_exnref_S:
- case WebAssembly::CALL_INDIRECT_i32:
- case WebAssembly::CALL_INDIRECT_i32_S:
- case WebAssembly::CALL_INDIRECT_i64:
- case WebAssembly::CALL_INDIRECT_i64_S:
- case WebAssembly::CALL_INDIRECT_f32:
- case WebAssembly::CALL_INDIRECT_f32_S:
- case WebAssembly::CALL_INDIRECT_f64:
- case WebAssembly::CALL_INDIRECT_f64_S:
- case WebAssembly::CALL_INDIRECT_v16i8:
- case WebAssembly::CALL_INDIRECT_v16i8_S:
- case WebAssembly::CALL_INDIRECT_v8i16:
- case WebAssembly::CALL_INDIRECT_v8i16_S:
- case WebAssembly::CALL_INDIRECT_v4i32:
- case WebAssembly::CALL_INDIRECT_v4i32_S:
- case WebAssembly::CALL_INDIRECT_v2i64:
- case WebAssembly::CALL_INDIRECT_v2i64_S:
- case WebAssembly::CALL_INDIRECT_v4f32:
- case WebAssembly::CALL_INDIRECT_v4f32_S:
- case WebAssembly::CALL_INDIRECT_v2f64:
- case WebAssembly::CALL_INDIRECT_v2f64_S:
- case WebAssembly::CALL_INDIRECT_exnref:
- case WebAssembly::CALL_INDIRECT_exnref_S:
- return 1;
+inline bool isBrTable(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::BR_TABLE_I32:
+ case WebAssembly::BR_TABLE_I32_S:
+ case WebAssembly::BR_TABLE_I64:
+ case WebAssembly::BR_TABLE_I64_S:
+ return true;
default:
- llvm_unreachable("Not a call instruction");
+ return false;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 7c21ed5f974e..e954eeaebb14 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -28,7 +28,7 @@ WebAssemblyTargetStreamer::WebAssemblyTargetStreamer(MCStreamer &S)
: MCTargetStreamer(S) {}
void WebAssemblyTargetStreamer::emitValueType(wasm::ValType Type) {
- Streamer.EmitIntValue(uint8_t(Type), 1);
+ Streamer.emitIntValue(uint8_t(Type), 1);
}
WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer(
@@ -113,9 +113,9 @@ void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) {
++Grouped.back().second;
}
- Streamer.EmitULEB128IntValue(Grouped.size());
+ Streamer.emitULEB128IntValue(Grouped.size());
for (auto Pair : Grouped) {
- Streamer.EmitULEB128IntValue(Pair.second);
+ Streamer.emitULEB128IntValue(Pair.second);
emitValueType(Pair.first);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 9aee1a06c956..d6fba05c9986 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -21,7 +21,6 @@
namespace llvm {
-class MCWasmStreamer;
class MCSymbolWasm;
/// WebAssembly-specific streamer interface, to implement support
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index e7a599e3e175..779e921c1d94 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -78,7 +78,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
return wasm::R_WASM_TABLE_INDEX_REL_SLEB;
case MCSymbolRefExpr::VK_WASM_MBREL:
assert(SymA.isData());
- return wasm::R_WASM_MEMORY_ADDR_REL_SLEB;
+ return is64Bit() ? wasm::R_WASM_MEMORY_ADDR_REL_SLEB64
+ : wasm::R_WASM_MEMORY_ADDR_REL_SLEB;
case MCSymbolRefExpr::VK_WASM_TYPEINDEX:
return wasm::R_WASM_TYPE_INDEX_LEB;
default:
@@ -91,7 +92,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
return wasm::R_WASM_TABLE_INDEX_SLEB;
return wasm::R_WASM_MEMORY_ADDR_SLEB;
case WebAssembly::fixup_sleb128_i64:
- llvm_unreachable("fixup_sleb128_i64 not implemented yet");
+ assert(SymA.isData());
+ return wasm::R_WASM_MEMORY_ADDR_SLEB64;
case WebAssembly::fixup_uleb128_i32:
if (SymA.isGlobal())
return wasm::R_WASM_GLOBAL_INDEX_LEB;
@@ -100,9 +102,14 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
if (SymA.isEvent())
return wasm::R_WASM_EVENT_INDEX_LEB;
return wasm::R_WASM_MEMORY_ADDR_LEB;
+ case WebAssembly::fixup_uleb128_i64:
+ assert(SymA.isData());
+ return wasm::R_WASM_MEMORY_ADDR_LEB64;
case FK_Data_4:
if (SymA.isFunction())
return wasm::R_WASM_TABLE_INDEX_I32;
+ if (SymA.isGlobal())
+ return wasm::R_WASM_GLOBAL_INDEX_I32;
if (auto Section = static_cast<const MCSectionWasm *>(
getFixupSection(Fixup.getValue()))) {
if (Section->getKind().isText())
@@ -111,6 +118,9 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
return wasm::R_WASM_SECTION_OFFSET_I32;
}
return wasm::R_WASM_MEMORY_ADDR_I32;
+ case FK_Data_8:
+ assert(SymA.isData());
+ return wasm::R_WASM_MEMORY_ADDR_I64;
default:
llvm_unreachable("unimplemented fixup kind");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
index 87317f8a7f1e..f9a96819905f 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -32,3 +32,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTargetInfo() {
RegisterTarget<Triple::wasm64> Y(getTheWebAssemblyTarget64(), "wasm64",
"WebAssembly 64-bit", "WebAssembly");
}
+
+// Defines llvm::WebAssembly::getWasm64Opcode llvm::WebAssembly::getStackOpcode
+// which have to be in a shared location between CodeGen and MC.
+#define GET_INSTRMAP_INFO 1
+#define GET_INSTRINFO_ENUM 1
+#include "WebAssemblyGenInstrInfo.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h
index a7427f78c72c..be7a632331c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h
@@ -21,6 +21,13 @@ class Target;
Target &getTheWebAssemblyTarget32();
Target &getTheWebAssemblyTarget64();
+namespace WebAssembly {
+
+int getStackOpcode(unsigned short Opcode);
+int getWasm64Opcode(unsigned short Opcode);
+
+} // namespace WebAssembly
+
} // namespace llvm
#endif // LLVM_LIB_TARGET_WEBASSEMBLY_TARGETINFO_WEBASSEMBLYTARGETINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h
index fcd48e0096b6..9ce02f7731e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -44,6 +44,7 @@ FunctionPass *createWebAssemblyOptimizeLiveIntervals();
FunctionPass *createWebAssemblyMemIntrinsicResults();
FunctionPass *createWebAssemblyRegStackify();
FunctionPass *createWebAssemblyRegColoring();
+FunctionPass *createWebAssemblyFixBrTableDefaults();
FunctionPass *createWebAssemblyFixIrreducibleControlFlow();
FunctionPass *createWebAssemblyLateEHPrepare();
FunctionPass *createWebAssemblyCFGSort();
@@ -51,8 +52,8 @@ FunctionPass *createWebAssemblyCFGStackify();
FunctionPass *createWebAssemblyExplicitLocals();
FunctionPass *createWebAssemblyLowerBrUnless();
FunctionPass *createWebAssemblyRegNumbering();
+FunctionPass *createWebAssemblyDebugFixup();
FunctionPass *createWebAssemblyPeephole();
-FunctionPass *createWebAssemblyCallIndirectFixup();
// PassRegistry initialization declarations.
void initializeWebAssemblyAddMissingPrototypesPass(PassRegistry &);
@@ -68,6 +69,7 @@ void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &);
void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &);
void initializeWebAssemblyRegStackifyPass(PassRegistry &);
void initializeWebAssemblyRegColoringPass(PassRegistry &);
+void initializeWebAssemblyFixBrTableDefaultsPass(PassRegistry &);
void initializeWebAssemblyFixIrreducibleControlFlowPass(PassRegistry &);
void initializeWebAssemblyLateEHPreparePass(PassRegistry &);
void initializeWebAssemblyExceptionInfoPass(PassRegistry &);
@@ -76,11 +78,20 @@ void initializeWebAssemblyCFGStackifyPass(PassRegistry &);
void initializeWebAssemblyExplicitLocalsPass(PassRegistry &);
void initializeWebAssemblyLowerBrUnlessPass(PassRegistry &);
void initializeWebAssemblyRegNumberingPass(PassRegistry &);
+void initializeWebAssemblyDebugFixupPass(PassRegistry &);
void initializeWebAssemblyPeepholePass(PassRegistry &);
-void initializeWebAssemblyCallIndirectFixupPass(PassRegistry &);
namespace WebAssembly {
-enum TargetIndex { TI_LOCAL_START, TI_GLOBAL_START, TI_OPERAND_STACK_START };
+enum TargetIndex {
+ // Followed by a local index (ULEB).
+ TI_LOCAL,
+ // Followed by an absolute global index (ULEB). DEPRECATED.
+ TI_GLOBAL_FIXED,
+ TI_OPERAND_STACK,
+ // Followed by a compilation unit relative global index (uint32_t)
+ // that will have an associated relocation.
+ TI_GLOBAL_RELOC
+};
} // end namespace WebAssembly
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.td
index b0b8a9b996a3..2c18bf2c3abe 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -66,6 +66,10 @@ def FeatureMutableGlobals :
SubtargetFeature<"mutable-globals", "HasMutableGlobals", "true",
"Enable mutable globals">;
+def FeatureReferenceTypes :
+ SubtargetFeature<"reference-types", "HasReferenceTypes", "true",
+ "Enable reference types">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//===----------------------------------------------------------------------===//
@@ -98,7 +102,8 @@ def : ProcessorModel<"generic", NoSchedModel, []>;
def : ProcessorModel<"bleeding-edge", NoSchedModel,
[FeatureSIMD128, FeatureAtomics,
FeatureNontrappingFPToInt, FeatureSignExt,
- FeatureMutableGlobals]>;
+ FeatureMutableGlobals, FeatureBulkMemory,
+ FeatureTailCall]>;
//===----------------------------------------------------------------------===//
// Target Declaration
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
index b7a701f15782..530a55cda0e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
@@ -132,7 +132,7 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
for (auto &Pair : Replacements) {
Function *OldF = Pair.first;
Function *NewF = Pair.second;
- std::string Name = OldF->getName();
+ std::string Name = std::string(OldF->getName());
M.getFunctionList().push_back(NewF);
OldF->replaceAllUsesWith(
ConstantExpr::getPointerBitCastOrAddrSpaceCast(NewF, OldF->getType()));
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index adcb24b4be53..96fa13d30729 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -85,7 +85,7 @@ WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() {
// WebAssemblyAsmPrinter Implementation.
//===----------------------------------------------------------------------===//
-void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
+void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) {
for (auto &It : OutContext.getSymbols()) {
// Emit a .globaltype and .eventtype declaration.
auto Sym = cast<MCSymbolWasm>(It.getValue());
@@ -103,7 +103,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (F.isDeclarationForLinker()) {
SmallVector<MVT, 4> Results;
SmallVector<MVT, 4> Params;
- computeSignatureVTs(F.getFunctionType(), F, TM, Params, Results);
+ computeSignatureVTs(F.getFunctionType(), &F, F, TM, Params, Results);
auto *Sym = cast<MCSymbolWasm>(getSymbol(&F));
Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
if (!Sym->getSignature()) {
@@ -122,14 +122,14 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
F.hasFnAttribute("wasm-import-module")) {
StringRef Name =
F.getFnAttribute("wasm-import-module").getValueAsString();
- Sym->setImportModule(Name);
+ Sym->setImportModule(storeName(Name));
getTargetStreamer()->emitImportModule(Sym, Name);
}
if (TM.getTargetTriple().isOSBinFormatWasm() &&
F.hasFnAttribute("wasm-import-name")) {
StringRef Name =
F.getFnAttribute("wasm-import-name").getValueAsString();
- Sym->setImportName(Name);
+ Sym->setImportName(storeName(Name));
getTargetStreamer()->emitImportName(Sym, Name);
}
}
@@ -137,7 +137,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (F.hasFnAttribute("wasm-export-name")) {
auto *Sym = cast<MCSymbolWasm>(getSymbol(&F));
StringRef Name = F.getFnAttribute("wasm-export-name").getValueAsString();
- Sym->setExportName(Name);
+ Sym->setExportName(storeName(Name));
getTargetStreamer()->emitExportName(Sym, Name);
}
}
@@ -167,7 +167,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
MCSectionWasm *MySection =
OutContext.getWasmSection(SectionName, SectionKind::getMetadata());
OutStreamer->SwitchSection(MySection);
- OutStreamer->EmitBytes(Contents->getString());
+ OutStreamer->emitBytes(Contents->getString());
OutStreamer->PopSection();
}
}
@@ -208,19 +208,19 @@ void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) {
".custom_section.producers", SectionKind::getMetadata());
OutStreamer->PushSection();
OutStreamer->SwitchSection(Producers);
- OutStreamer->EmitULEB128IntValue(FieldCount);
+ OutStreamer->emitULEB128IntValue(FieldCount);
for (auto &Producers : {std::make_pair("language", &Languages),
std::make_pair("processed-by", &Tools)}) {
if (Producers.second->empty())
continue;
- OutStreamer->EmitULEB128IntValue(strlen(Producers.first));
- OutStreamer->EmitBytes(Producers.first);
- OutStreamer->EmitULEB128IntValue(Producers.second->size());
+ OutStreamer->emitULEB128IntValue(strlen(Producers.first));
+ OutStreamer->emitBytes(Producers.first);
+ OutStreamer->emitULEB128IntValue(Producers.second->size());
for (auto &Producer : *Producers.second) {
- OutStreamer->EmitULEB128IntValue(Producer.first.size());
- OutStreamer->EmitBytes(Producer.first);
- OutStreamer->EmitULEB128IntValue(Producer.second.size());
- OutStreamer->EmitBytes(Producer.second);
+ OutStreamer->emitULEB128IntValue(Producer.first.size());
+ OutStreamer->emitBytes(Producer.first);
+ OutStreamer->emitULEB128IntValue(Producer.second.size());
+ OutStreamer->emitBytes(Producer.second);
}
}
OutStreamer->PopSection();
@@ -230,20 +230,20 @@ void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) {
void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
struct FeatureEntry {
uint8_t Prefix;
- StringRef Name;
+ std::string Name;
};
// Read target features and linkage policies from module metadata
SmallVector<FeatureEntry, 4> EmittedFeatures;
- for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
- std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str();
+ auto EmitFeature = [&](std::string Feature) {
+ std::string MDKey = (StringRef("wasm-feature-") + Feature).str();
Metadata *Policy = M.getModuleFlag(MDKey);
if (Policy == nullptr)
- continue;
+ return;
FeatureEntry Entry;
Entry.Prefix = 0;
- Entry.Name = KV.Key;
+ Entry.Name = Feature;
if (auto *MD = cast<ConstantAsMetadata>(Policy))
if (auto *I = cast<ConstantInt>(MD->getValue()))
@@ -253,10 +253,16 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
if (Entry.Prefix != wasm::WASM_FEATURE_PREFIX_USED &&
Entry.Prefix != wasm::WASM_FEATURE_PREFIX_REQUIRED &&
Entry.Prefix != wasm::WASM_FEATURE_PREFIX_DISALLOWED)
- continue;
+ return;
EmittedFeatures.push_back(Entry);
+ };
+
+ for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
+ EmitFeature(KV.Key);
}
+ // This pseudo-feature tells the linker whether shared memory would be safe
+ EmitFeature("shared-mem");
if (EmittedFeatures.size() == 0)
return;
@@ -267,30 +273,31 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
OutStreamer->PushSection();
OutStreamer->SwitchSection(FeaturesSection);
- OutStreamer->EmitULEB128IntValue(EmittedFeatures.size());
+ OutStreamer->emitULEB128IntValue(EmittedFeatures.size());
for (auto &F : EmittedFeatures) {
- OutStreamer->EmitIntValue(F.Prefix, 1);
- OutStreamer->EmitULEB128IntValue(F.Name.size());
- OutStreamer->EmitBytes(F.Name);
+ OutStreamer->emitIntValue(F.Prefix, 1);
+ OutStreamer->emitULEB128IntValue(F.Name.size());
+ OutStreamer->emitBytes(F.Name);
}
OutStreamer->PopSection();
}
-void WebAssemblyAsmPrinter::EmitConstantPool() {
+void WebAssemblyAsmPrinter::emitConstantPool() {
assert(MF->getConstantPool()->getConstants().empty() &&
"WebAssembly disables constant pools");
}
-void WebAssemblyAsmPrinter::EmitJumpTableInfo() {
+void WebAssemblyAsmPrinter::emitJumpTableInfo() {
// Nothing to do; jump tables are incorporated into the instruction stream.
}
-void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
+void WebAssemblyAsmPrinter::emitFunctionBodyStart() {
const Function &F = MF->getFunction();
SmallVector<MVT, 1> ResultVTs;
SmallVector<MVT, 4> ParamVTs;
- computeSignatureVTs(F.getFunctionType(), F, TM, ParamVTs, ResultVTs);
+ computeSignatureVTs(F.getFunctionType(), &F, F, TM, ParamVTs, ResultVTs);
+
auto Signature = signatureFromMVTs(ResultVTs, ParamVTs);
auto *WasmSym = cast<MCSymbolWasm>(CurrentFnSym);
WasmSym->setSignature(Signature.get());
@@ -312,10 +319,10 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
valTypesFromMVTs(MFI->getLocals(), Locals);
getTargetStreamer()->emitLocal(Locals);
- AsmPrinter::EmitFunctionBodyStart();
+ AsmPrinter::emitFunctionBodyStart();
}
-void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void WebAssemblyAsmPrinter::emitInstruction(const MachineInstr *MI) {
LLVM_DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
switch (MI->getOpcode()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
index 4e55c81dec38..d9281568638d 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -16,9 +16,7 @@
#include "llvm/Target/TargetMachine.h"
namespace llvm {
-class MCSymbol;
class WebAssemblyTargetStreamer;
-class WebAssemblyMCInstLower;
class LLVM_LIBRARY_VISIBILITY WebAssemblyAsmPrinter final : public AsmPrinter {
const WebAssemblySubtarget *Subtarget;
@@ -26,6 +24,13 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyAsmPrinter final : public AsmPrinter {
WebAssemblyFunctionInfo *MFI;
// TODO: Do the uniquing of Signatures here instead of ObjectFileWriter?
std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
+ std::vector<std::unique_ptr<std::string>> Names;
+
+ StringRef storeName(StringRef Name) {
+ std::unique_ptr<std::string> N = std::make_unique<std::string>(Name);
+ Names.push_back(std::move(N));
+ return *Names.back();
+ }
public:
explicit WebAssemblyAsmPrinter(TargetMachine &TM,
@@ -57,13 +62,13 @@ public:
// AsmPrinter Implementation.
//===------------------------------------------------------------------===//
- void EmitEndOfAsmFile(Module &M) override;
+ void emitEndOfAsmFile(Module &M) override;
void EmitProducerInfo(Module &M);
void EmitTargetFeatures(Module &M);
- void EmitJumpTableInfo() override;
- void EmitConstantPool() override;
- void EmitFunctionBodyStart() override;
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitJumpTableInfo() override;
+ void emitConstantPool() override;
+ void emitFunctionBodyStart() override;
+ void emitInstruction(const MachineInstr *MI) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &OS) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index c069af9eed62..8442b49e25f4 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -79,7 +79,6 @@ template <> bool ConcreteRegion<MachineLoop>::isLoop() const { return true; }
class RegionInfo {
const MachineLoopInfo &MLI;
const WebAssemblyExceptionInfo &WEI;
- std::vector<const Region *> Regions;
DenseMap<const MachineLoop *, std::unique_ptr<Region>> LoopMap;
DenseMap<const WebAssemblyException *, std::unique_ptr<Region>> ExceptionMap;
@@ -93,7 +92,14 @@ public:
const auto *WE = WEI.getExceptionFor(MBB);
if (!ML && !WE)
return nullptr;
- if ((ML && !WE) || (ML && WE && ML->getNumBlocks() < WE->getNumBlocks())) {
+ // We determine subregion relationship by domination of their headers, i.e.,
+ // if region A's header dominates region B's header, B is a subregion of A.
+ // WebAssemblyException contains BBs in all its subregions (loops or
+ // exceptions), but MachineLoop may not, because MachineLoop does not contain
+ // BBs that don't have a path to its header even if they are dominated by
+ // its header. So here we should use WE->contains(ML->getHeader()), but not
+ // ML->contains(WE->getHeader()).
+ if ((ML && !WE) || (ML && WE && WE->contains(ML->getHeader()))) {
// If the smallest region containing MBB is a loop
if (LoopMap.count(ML))
return LoopMap[ML].get();
@@ -152,9 +158,17 @@ static void maybeUpdateTerminator(MachineBasicBlock *MBB) {
AllAnalyzable &= Term.isBranch() && !Term.isIndirectBranch();
}
assert((AnyBarrier || AllAnalyzable) &&
- "AnalyzeBranch needs to analyze any block with a fallthrough");
+ "analyzeBranch needs to analyze any block with a fallthrough");
+
+ // Find the layout successor from the original block order.
+ MachineFunction *MF = MBB->getParent();
+ MachineBasicBlock *OriginalSuccessor =
+ unsigned(MBB->getNumber() + 1) < MF->getNumBlockIDs()
+ ? MF->getBlockNumbered(MBB->getNumber() + 1)
+ : nullptr;
+
if (AllAnalyzable)
- MBB->updateTerminator();
+ MBB->updateTerminator(OriginalSuccessor);
}
namespace {
@@ -241,9 +255,12 @@ struct Entry {
static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
const WebAssemblyExceptionInfo &WEI,
const MachineDominatorTree &MDT) {
+ // Remember original layout ordering, so we can update terminators after
+ // reordering to point to the original layout successor.
+ MF.RenumberBlocks();
+
// Prepare for a topological sort: Record the number of predecessors each
// block has, ignoring loop backedges.
- MF.RenumberBlocks();
SmallVector<unsigned, 16> NumPredsLeft(MF.getNumBlockIDs(), 0);
for (MachineBasicBlock &MBB : MF) {
unsigned N = MBB.pred_size();
@@ -368,6 +385,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
const Region *Region = RI.getRegionFor(&MBB);
if (Region && &MBB == Region->getHeader()) {
+ // Region header.
if (Region->isLoop()) {
// Loop header. The loop predecessor should be sorted above, and the
// other predecessors should be backedges below.
@@ -377,7 +395,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
"Loop header predecessors must be loop predecessors or "
"backedges");
} else {
- // Not a loop header. All predecessors should be sorted above.
+ // Exception header. All predecessors should be sorted above.
for (auto Pred : MBB.predecessors())
assert(Pred->getNumber() < MBB.getNumber() &&
"Non-loop-header predecessors should be topologically sorted");
@@ -386,7 +404,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
"Regions should be declared at most once.");
} else {
- // Not a loop header. All predecessors should be sorted above.
+ // Not a region header. All predecessors should be sorted above.
for (auto Pred : MBB.predecessors())
assert(Pred->getNumber() < MBB.getNumber() &&
"Non-loop-header predecessors should be topologically sorted");
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 7e867edaaa27..8cbfc98e8197 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-cfg-stackify"
@@ -277,11 +278,19 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
#endif
}
- // All previously inserted BLOCK/TRY markers should be after the BLOCK
- // because they are all nested blocks.
+ // If there is a previously placed BLOCK/TRY marker and its corresponding
+ // END marker is before the current BLOCK's END marker, that should be
+ // placed after this BLOCK. Otherwise it should be placed before this BLOCK
+ // marker.
if (MI.getOpcode() == WebAssembly::BLOCK ||
- MI.getOpcode() == WebAssembly::TRY)
- AfterSet.insert(&MI);
+ MI.getOpcode() == WebAssembly::TRY) {
+ if (BeginToEnd[&MI]->getParent()->getNumber() <= MBB.getNumber())
+ AfterSet.insert(&MI);
+#ifndef NDEBUG
+ else
+ BeforeSet.insert(&MI);
+#endif
+ }
#ifndef NDEBUG
// All END_(BLOCK|LOOP|TRY) markers should be before the BLOCK.
@@ -661,9 +670,28 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) {
MachineBasicBlock *EHPadLayoutPred = MBB.getPrevNode();
MachineBasicBlock *Cont = BeginToEnd[EHPadToTry[&MBB]]->getParent();
bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond);
+ // This condition means either
+ // 1. This BB ends with a single unconditional branch whose destinaion is
+ // Cont.
+ // 2. This BB ends with a conditional branch followed by an unconditional
+ // branch, and the unconditional branch's destination is Cont.
+ // In both cases, we want to remove the last (= unconditional) branch.
if (Analyzable && ((Cond.empty() && TBB && TBB == Cont) ||
- (!Cond.empty() && FBB && FBB == Cont)))
- TII.removeBranch(*EHPadLayoutPred);
+ (!Cond.empty() && FBB && FBB == Cont))) {
+ bool ErasedUncondBr = false;
+ (void)ErasedUncondBr;
+ for (auto I = EHPadLayoutPred->end(), E = EHPadLayoutPred->begin();
+ I != E; --I) {
+ auto PrevI = std::prev(I);
+ if (PrevI->isTerminator()) {
+ assert(PrevI->getOpcode() == WebAssembly::BR);
+ PrevI->eraseFromParent();
+ ErasedUncondBr = true;
+ break;
+ }
+ }
+ assert(ErasedUncondBr && "Unconditional branch not erased!");
+ }
}
// When there are block / end_block markers that overlap with try / end_try
@@ -705,12 +733,30 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) {
}
}
+// Get the appropriate copy opcode for the given register class.
+static unsigned getCopyOpcode(const TargetRegisterClass *RC) {
+ if (RC == &WebAssembly::I32RegClass)
+ return WebAssembly::COPY_I32;
+ if (RC == &WebAssembly::I64RegClass)
+ return WebAssembly::COPY_I64;
+ if (RC == &WebAssembly::F32RegClass)
+ return WebAssembly::COPY_F32;
+ if (RC == &WebAssembly::F64RegClass)
+ return WebAssembly::COPY_F64;
+ if (RC == &WebAssembly::V128RegClass)
+ return WebAssembly::COPY_V128;
+ if (RC == &WebAssembly::EXNREFRegClass)
+ return WebAssembly::COPY_EXNREF;
+ llvm_unreachable("Unexpected register class");
+}
+
// When MBB is split into MBB and Split, we should unstackify defs in MBB that
// have their uses in Split.
static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
MachineBasicBlock &Split,
WebAssemblyFunctionInfo &MFI,
- MachineRegisterInfo &MRI) {
+ MachineRegisterInfo &MRI,
+ const WebAssemblyInstrInfo &TII) {
for (auto &MI : Split) {
for (auto &MO : MI.explicit_uses()) {
if (!MO.isReg() || Register::isPhysicalRegister(MO.getReg()))
@@ -720,6 +766,47 @@ static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
MFI.unstackifyVReg(MO.getReg());
}
}
+
+ // In RegStackify, when a register definition is used multiple times,
+ // Reg = INST ...
+ // INST ..., Reg, ...
+ // INST ..., Reg, ...
+ // INST ..., Reg, ...
+ //
+ // we introduce a TEE, which has the following form:
+ // DefReg = INST ...
+ // TeeReg, Reg = TEE_... DefReg
+ // INST ..., TeeReg, ...
+ // INST ..., Reg, ...
+ // INST ..., Reg, ...
+ // with DefReg and TeeReg stackified but Reg not stackified.
+ //
+ // But the invariant that TeeReg should be stackified can be violated while we
+ // unstackify registers in the split BB above. In this case, we convert TEEs
+ // into two COPYs. This COPY will be eventually eliminated in ExplicitLocals.
+ // DefReg = INST ...
+ // TeeReg = COPY DefReg
+ // Reg = COPY DefReg
+ // INST ..., TeeReg, ...
+ // INST ..., Reg, ...
+ // INST ..., Reg, ...
+ for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
+ MachineInstr &MI = *I++;
+ if (!WebAssembly::isTee(MI.getOpcode()))
+ continue;
+ Register TeeReg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(1).getReg();
+ Register DefReg = MI.getOperand(2).getReg();
+ if (!MFI.isVRegStackified(TeeReg)) {
+ // Now we are not using TEE anymore, so unstackify DefReg too
+ MFI.unstackifyVReg(DefReg);
+ unsigned CopyOpc = getCopyOpcode(MRI.getRegClass(DefReg));
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), TeeReg)
+ .addReg(DefReg);
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(CopyOpc), Reg).addReg(DefReg);
+ MI.eraseFromParent();
+ }
+ }
}
bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
@@ -866,6 +953,10 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
// In new CFG, <destination to branch to, register containing exnref>
DenseMap<MachineBasicBlock *, unsigned> BrDestToExnReg;
+ // Destinations for branches that will be newly added, for which a new
+ // BLOCK/END_BLOCK markers are necessary.
+ SmallVector<MachineBasicBlock *, 8> BrDests;
+
// Gather possibly throwing calls (i.e., previously invokes) whose current
// unwind destination is not the same as the original CFG.
for (auto &MBB : reverse(MF)) {
@@ -1036,7 +1127,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
BrDest->insert(BrDest->end(), EndTry->removeFromParent());
// Take out the handler body from EH pad to the new branch destination BB.
BrDest->splice(BrDest->end(), EHPad, SplitPos, EHPad->end());
- unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI);
+ unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI, TII);
// Fix predecessor-successor relationship.
BrDest->transferSuccessors(EHPad);
EHPad->addSuccessor(BrDest);
@@ -1075,6 +1166,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
? DebugLoc()
: EHPadLayoutPred->rbegin()->getDebugLoc();
BuildMI(EHPadLayoutPred, DL, TII.get(WebAssembly::BR)).addMBB(Cont);
+ BrDests.push_back(Cont);
}
}
@@ -1109,6 +1201,9 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr;
std::tie(RangeBegin, RangeEnd) = Range;
auto *MBB = RangeBegin->getParent();
+ // Store the first function call from this range, because RangeBegin can
+ // be moved to point EH_LABEL before the call
+ MachineInstr *RangeBeginCall = RangeBegin;
// Include possible EH_LABELs in the range
if (RangeBegin->getIterator() != MBB->begin() &&
@@ -1126,9 +1221,27 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
}
}
+ // Local expression tree before the first call of this range should go
+ // after the nested TRY.
+ SmallPtrSet<const MachineInstr *, 4> AfterSet;
+ AfterSet.insert(RangeBegin);
+ AfterSet.insert(RangeBeginCall);
+ for (auto I = MachineBasicBlock::iterator(RangeBeginCall),
+ E = MBB->begin();
+ I != E; --I) {
+ if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition())
+ continue;
+ if (WebAssembly::isChild(*std::prev(I), MFI))
+ AfterSet.insert(&*std::prev(I));
+ else
+ break;
+ }
+
// Create the nested try instruction.
+ auto InsertPos = getLatestInsertPos(
+ MBB, SmallPtrSet<const MachineInstr *, 4>(), AfterSet);
MachineInstr *NestedTry =
- BuildMI(*MBB, *RangeBegin, RangeBegin->getDebugLoc(),
+ BuildMI(*MBB, InsertPos, RangeBegin->getDebugLoc(),
TII.get(WebAssembly::TRY))
.addImm(int64_t(WebAssembly::BlockType::Void));
@@ -1152,13 +1265,21 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
// new nested continuation BB.
NestedCont->splice(NestedCont->end(), MBB,
std::next(RangeEnd->getIterator()), MBB->end());
- unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI);
+ unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI, TII);
registerTryScope(NestedTry, NestedEndTry, NestedEHPad);
// Fix predecessor-successor relationship.
NestedCont->transferSuccessors(MBB);
- if (EHPad)
+ if (EHPad) {
NestedCont->removeSuccessor(EHPad);
+ // If EHPad does not have any predecessors left after removing
+ // NextedCont predecessor, remove its successor too, because this EHPad
+ // is not reachable from the entry BB anyway. We can't remove EHPad BB
+ // itself because it can contain 'catch' or 'end', which are necessary
+ // for keeping try-catch-end structure.
+ if (EHPad->pred_empty())
+ EHPad->removeSuccessor(BrDest);
+ }
MBB->addSuccessor(NestedEHPad);
MBB->addSuccessor(NestedCont);
NestedEHPad->addSuccessor(BrDest);
@@ -1190,10 +1311,14 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
// Recompute the dominator tree.
getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
- // Place block markers for newly added branches.
- SmallVector <MachineBasicBlock *, 8> BrDests;
- for (auto &P : BrDestToTryRanges)
- BrDests.push_back(P.first);
+ // Place block markers for newly added branches, if necessary.
+
+ // If we've created an appendix BB and a branch to it, place a block/end_block
+ // marker for that. For some new branches, those branch destination BBs start
+ // with a hoisted end_try marker, so we don't need a new marker there.
+ if (AppendixBB)
+ BrDests.push_back(AppendixBB);
+
llvm::sort(BrDests,
[&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
auto ANum = A->getNumber();
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
deleted file mode 100644
index 2537e6042b1e..000000000000
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-//===-- WebAssemblyCallIndirectFixup.cpp - Fix call_indirects -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file converts pseudo call_indirect instructions into real
-/// call_indirects.
-///
-/// The order of arguments for a call_indirect is the arguments to the function
-/// call, followed by the function pointer. There's no natural way to express
-/// a machineinstr with varargs followed by one more arg, so we express it as
-/// the function pointer followed by varargs, then rewrite it here.
-///
-/// We need to rewrite the order of the arguments on the machineinstrs
-/// themselves so that register stackification knows the order they'll be
-/// executed in.
-///
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
-#include "WebAssembly.h"
-#include "WebAssemblyMachineFunctionInfo.h"
-#include "WebAssemblySubtarget.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "wasm-call-indirect-fixup"
-
-namespace {
-class WebAssemblyCallIndirectFixup final : public MachineFunctionPass {
- StringRef getPassName() const override {
- return "WebAssembly CallIndirect Fixup";
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
-public:
- static char ID; // Pass identification, replacement for typeid
- WebAssemblyCallIndirectFixup() : MachineFunctionPass(ID) {}
-};
-} // end anonymous namespace
-
-char WebAssemblyCallIndirectFixup::ID = 0;
-INITIALIZE_PASS(WebAssemblyCallIndirectFixup, DEBUG_TYPE,
- "Rewrite call_indirect argument orderings", false, false)
-
-FunctionPass *llvm::createWebAssemblyCallIndirectFixup() {
- return new WebAssemblyCallIndirectFixup();
-}
-
-static unsigned getNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- using namespace WebAssembly;
- case PCALL_INDIRECT_VOID:
- return CALL_INDIRECT_VOID;
- case PCALL_INDIRECT_i32:
- return CALL_INDIRECT_i32;
- case PCALL_INDIRECT_i64:
- return CALL_INDIRECT_i64;
- case PCALL_INDIRECT_f32:
- return CALL_INDIRECT_f32;
- case PCALL_INDIRECT_f64:
- return CALL_INDIRECT_f64;
- case PCALL_INDIRECT_v16i8:
- return CALL_INDIRECT_v16i8;
- case PCALL_INDIRECT_v8i16:
- return CALL_INDIRECT_v8i16;
- case PCALL_INDIRECT_v4i32:
- return CALL_INDIRECT_v4i32;
- case PCALL_INDIRECT_v2i64:
- return CALL_INDIRECT_v2i64;
- case PCALL_INDIRECT_v4f32:
- return CALL_INDIRECT_v4f32;
- case PCALL_INDIRECT_v2f64:
- return CALL_INDIRECT_v2f64;
- case PCALL_INDIRECT_exnref:
- return CALL_INDIRECT_exnref;
- case PRET_CALL_INDIRECT:
- return RET_CALL_INDIRECT;
- default:
- return INSTRUCTION_LIST_END;
- }
-}
-
-static bool isPseudoCallIndirect(const MachineInstr &MI) {
- return getNonPseudoCallIndirectOpcode(MI) !=
- WebAssembly::INSTRUCTION_LIST_END;
-}
-
-bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << "********** Fixing up CALL_INDIRECTs **********\n"
- << "********** Function: " << MF.getName() << '\n');
-
- bool Changed = false;
- const WebAssemblyInstrInfo *TII =
- MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
-
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (isPseudoCallIndirect(MI)) {
- LLVM_DEBUG(dbgs() << "Found call_indirect: " << MI << '\n');
-
- // Rewrite pseudo to non-pseudo
- const MCInstrDesc &Desc = TII->get(getNonPseudoCallIndirectOpcode(MI));
- MI.setDesc(Desc);
-
- // Rewrite argument order
- SmallVector<MachineOperand, 8> Ops;
-
- // Set up a placeholder for the type signature immediate.
- Ops.push_back(MachineOperand::CreateImm(0));
-
- // Set up the flags immediate, which currently has no defined flags
- // so it's always zero.
- Ops.push_back(MachineOperand::CreateImm(0));
-
- for (const MachineOperand &MO :
- make_range(MI.operands_begin() + MI.getDesc().getNumDefs() + 1,
- MI.operands_begin() + MI.getNumExplicitOperands()))
- Ops.push_back(MO);
- Ops.push_back(MI.getOperand(MI.getDesc().getNumDefs()));
-
- // Replace the instructions operands.
- while (MI.getNumOperands() > MI.getDesc().getNumDefs())
- MI.RemoveOperand(MI.getNumOperands() - 1);
- for (const MachineOperand &MO : Ops)
- MI.addOperand(MO);
-
- LLVM_DEBUG(dbgs() << " After transform: " << MI);
- Changed = true;
- }
- }
- }
-
- LLVM_DEBUG(dbgs() << "\nDone fixing up CALL_INDIRECTs\n\n");
-
- return Changed;
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp
new file mode 100644
index 000000000000..655e30a29eff
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp
@@ -0,0 +1,138 @@
+//===-- WebAssemblyDebugFixup.cpp - Debug Fixup ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Several prior passes may "stackify" registers, here we ensure any references
+/// in such registers in debug_value instructions become stack relative also.
+/// This is done in a separate pass such that not all previous passes need to
+/// track stack depth when values get stackified.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-debug-fixup"
+
+namespace {
+class WebAssemblyDebugFixup final : public MachineFunctionPass {
+ StringRef getPassName() const override { return "WebAssembly Debug Fixup"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyDebugFixup() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyDebugFixup::ID = 0;
+INITIALIZE_PASS(
+ WebAssemblyDebugFixup, DEBUG_TYPE,
+ "Ensures debug_value's that have been stackified become stack relative",
+ false, false)
+
+FunctionPass *llvm::createWebAssemblyDebugFixup() {
+ return new WebAssemblyDebugFixup();
+}
+
+bool WebAssemblyDebugFixup::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** Debug Fixup **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+ struct StackElem {
+ unsigned Reg;
+ MachineInstr *DebugValue;
+ };
+ std::vector<StackElem> Stack;
+ for (MachineBasicBlock &MBB : MF) {
+ // We may insert into this list.
+ for (auto MII = MBB.begin(); MII != MBB.end(); ++MII) {
+ MachineInstr &MI = *MII;
+ if (MI.isDebugValue()) {
+ auto &MO = MI.getOperand(0);
+ // Also check if not a $noreg: likely a DBG_VALUE we just inserted.
+ if (MO.isReg() && MO.getReg().isValid() &&
+ MFI.isVRegStackified(MO.getReg())) {
+ // Found a DBG_VALUE with a stackified register we will
+ // change into a stack operand.
+ // Search for register rather than assume it is on top (which it
+ // typically is if it appears right after the def), since
+ // DBG_VALUE's may shift under some circumstances.
+ for (auto &Elem : reverse(Stack)) {
+ if (MO.getReg() == Elem.Reg) {
+ auto Depth = static_cast<unsigned>(&Elem - &Stack[0]);
+ LLVM_DEBUG(dbgs() << "Debug Value VReg " << MO.getReg()
+ << " -> Stack Relative " << Depth << "\n");
+ MO.ChangeToTargetIndex(WebAssembly::TI_OPERAND_STACK, Depth);
+ // Save the DBG_VALUE instruction that defined this stackified
+ // variable since later we need it to construct another one on
+ // pop.
+ Elem.DebugValue = &MI;
+ break;
+ }
+ }
+ // If the Reg was not found, we have a DBG_VALUE outside of its
+ // def-use range, and we leave it unmodified as reg, which means
+ // it will be culled later.
+ }
+ } else {
+ // Track stack depth.
+ for (MachineOperand &MO : reverse(MI.explicit_uses())) {
+ if (MO.isReg() && MFI.isVRegStackified(MO.getReg())) {
+ auto Prev = Stack.back();
+ Stack.pop_back();
+ assert(Prev.Reg == MO.getReg() &&
+ "WebAssemblyDebugFixup: Pop: Register not matched!");
+ if (Prev.DebugValue) {
+ // This stackified reg is a variable that started life at
+ // Prev.DebugValue, so now that we're popping it we must insert
+ // a $noreg DBG_VALUE for the variable to end it, right after
+ // the current instruction.
+ BuildMI(*Prev.DebugValue->getParent(), std::next(MII),
+ Prev.DebugValue->getDebugLoc(), TII->get(WebAssembly::DBG_VALUE), false,
+ Register(), Prev.DebugValue->getOperand(2).getMetadata(),
+ Prev.DebugValue->getOperand(3).getMetadata());
+ }
+ }
+ }
+ for (MachineOperand &MO : MI.defs()) {
+ if (MO.isReg() && MFI.isVRegStackified(MO.getReg())) {
+ Stack.push_back({MO.getReg(), nullptr});
+ }
+ }
+ }
+ }
+ assert(Stack.empty() &&
+ "WebAssemblyDebugFixup: Stack not empty at end of basic block!");
+ }
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
index 114a50a3055d..159fb4c00ddc 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
@@ -31,7 +31,7 @@ void WebAssemblyDebugValueManager::move(MachineInstr *Insert) {
void WebAssemblyDebugValueManager::updateReg(unsigned Reg) {
for (auto *DBI : DbgValues)
- DBI->getOperand(0).setReg(Reg);
+ DBI->getDebugOperand(0).setReg(Reg);
}
void WebAssemblyDebugValueManager::clone(MachineInstr *Insert,
@@ -40,14 +40,14 @@ void WebAssemblyDebugValueManager::clone(MachineInstr *Insert,
MachineFunction *MF = MBB->getParent();
for (MachineInstr *DBI : reverse(DbgValues)) {
MachineInstr *Clone = MF->CloneMachineInstr(DBI);
- Clone->getOperand(0).setReg(NewReg);
+ Clone->getDebugOperand(0).setReg(NewReg);
MBB->insert(Insert, Clone);
}
}
void WebAssemblyDebugValueManager::replaceWithLocal(unsigned LocalId) {
for (auto *DBI : DbgValues) {
- MachineOperand &Op = DBI->getOperand(0);
- Op.ChangeToTargetIndex(llvm::WebAssembly::TI_LOCAL_START, LocalId);
+ MachineOperand &Op = DBI->getDebugOperand(0);
+ Op.ChangeToTargetIndex(llvm::WebAssembly::TI_LOCAL, LocalId);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
index a511b320b56b..c75de7aa207f 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
@@ -46,14 +46,14 @@ bool WebAssemblyExceptionInfo::runOnMachineFunction(MachineFunction &MF) {
void WebAssemblyExceptionInfo::recalculate(
MachineDominatorTree &MDT, const MachineDominanceFrontier &MDF) {
// Postorder traversal of the dominator tree.
- SmallVector<WebAssemblyException *, 8> Exceptions;
+ SmallVector<std::unique_ptr<WebAssemblyException>, 8> Exceptions;
for (auto DomNode : post_order(&MDT)) {
MachineBasicBlock *EHPad = DomNode->getBlock();
if (!EHPad->isEHPad())
continue;
- auto *WE = new WebAssemblyException(EHPad);
- discoverAndMapException(WE, MDT, MDF);
- Exceptions.push_back(WE);
+ auto WE = std::make_unique<WebAssemblyException>(EHPad);
+ discoverAndMapException(WE.get(), MDT, MDF);
+ Exceptions.push_back(std::move(WE));
}
// Add BBs to exceptions
@@ -64,17 +64,21 @@ void WebAssemblyExceptionInfo::recalculate(
WE->addBlock(MBB);
}
+ SmallVector<WebAssemblyException*, 8> ExceptionPointers;
+ ExceptionPointers.reserve(Exceptions.size());
+
// Add subexceptions to exceptions
- for (auto *WE : Exceptions) {
+ for (auto &WE : Exceptions) {
+ ExceptionPointers.push_back(WE.get());
if (WE->getParentException())
- WE->getParentException()->getSubExceptions().push_back(WE);
+ WE->getParentException()->getSubExceptions().push_back(std::move(WE));
else
- addTopLevelException(WE);
+ addTopLevelException(std::move(WE));
}
// For convenience, Blocks and SubExceptions are inserted in postorder.
// Reverse the lists.
- for (auto *WE : Exceptions) {
+ for (auto *WE : ExceptionPointers) {
WE->reverseBlock();
std::reverse(WE->getSubExceptions().begin(), WE->getSubExceptions().end());
}
@@ -82,7 +86,6 @@ void WebAssemblyExceptionInfo::recalculate(
void WebAssemblyExceptionInfo::releaseMemory() {
BBMap.clear();
- DeleteContainerPointers(TopLevelExceptions);
TopLevelExceptions.clear();
}
@@ -181,6 +184,6 @@ raw_ostream &operator<<(raw_ostream &OS, const WebAssemblyException &WE) {
}
void WebAssemblyExceptionInfo::print(raw_ostream &OS, const Module *) const {
- for (auto *WE : TopLevelExceptions)
+ for (auto &WE : TopLevelExceptions)
WE->print(OS);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
index 9a90d7df7d47..50151ec8da5a 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
@@ -43,13 +43,12 @@ class WebAssemblyException {
MachineBasicBlock *EHPad = nullptr;
WebAssemblyException *ParentException = nullptr;
- std::vector<WebAssemblyException *> SubExceptions;
+ std::vector<std::unique_ptr<WebAssemblyException>> SubExceptions;
std::vector<MachineBasicBlock *> Blocks;
SmallPtrSet<const MachineBasicBlock *, 8> BlockSet;
public:
WebAssemblyException(MachineBasicBlock *EHPad) : EHPad(EHPad) {}
- ~WebAssemblyException() { DeleteContainerPointers(SubExceptions); }
WebAssemblyException(const WebAssemblyException &) = delete;
const WebAssemblyException &operator=(const WebAssemblyException &) = delete;
@@ -83,14 +82,16 @@ public:
unsigned getNumBlocks() const { return Blocks.size(); }
std::vector<MachineBasicBlock *> &getBlocksVector() { return Blocks; }
- const std::vector<WebAssemblyException *> &getSubExceptions() const {
+ const std::vector<std::unique_ptr<WebAssemblyException>> &getSubExceptions() const {
return SubExceptions;
}
- std::vector<WebAssemblyException *> &getSubExceptions() {
+ std::vector<std::unique_ptr<WebAssemblyException>> &getSubExceptions() {
return SubExceptions;
}
- void addSubException(WebAssemblyException *E) { SubExceptions.push_back(E); }
- using iterator = typename std::vector<WebAssemblyException *>::const_iterator;
+ void addSubException(std::unique_ptr<WebAssemblyException> E) {
+ SubExceptions.push_back(std::move(E));
+ }
+ using iterator = typename decltype(SubExceptions)::const_iterator;
iterator begin() const { return SubExceptions.begin(); }
iterator end() const { return SubExceptions.end(); }
@@ -117,7 +118,7 @@ raw_ostream &operator<<(raw_ostream &OS, const WebAssemblyException &WE);
class WebAssemblyExceptionInfo final : public MachineFunctionPass {
// Mapping of basic blocks to the innermost exception they occur in
DenseMap<const MachineBasicBlock *, WebAssemblyException *> BBMap;
- std::vector<WebAssemblyException *> TopLevelExceptions;
+ std::vector<std::unique_ptr<WebAssemblyException>> TopLevelExceptions;
void discoverAndMapException(WebAssemblyException *WE,
const MachineDominatorTree &MDT,
@@ -156,9 +157,9 @@ public:
BBMap[MBB] = WE;
}
- void addTopLevelException(WebAssemblyException *WE) {
+ void addTopLevelException(std::unique_ptr<WebAssemblyException> WE) {
assert(!WE->getParentException() && "Not a top level exception!");
- TopLevelExceptions.push_back(WE);
+ TopLevelExceptions.push_back(std::move(WE));
}
void print(raw_ostream &OS, const Module *M = nullptr) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index acbd4c9921b0..55925bcbe771 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -31,16 +31,6 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-explicit-locals"
-// A command-line option to disable this pass, and keep implicit locals
-// for the purpose of testing with lit/llc ONLY.
-// This produces output which is not valid WebAssembly, and is not supported
-// by assemblers/disassemblers and other MC based tools.
-static cl::opt<bool> WasmDisableExplicitLocals(
- "wasm-disable-explicit-locals", cl::Hidden,
- cl::desc("WebAssembly: output implicit locals in"
- " instruction output for test purposes only."),
- cl::init(false));
-
namespace {
class WebAssemblyExplicitLocals final : public MachineFunctionPass {
StringRef getPassName() const override {
@@ -69,13 +59,28 @@ FunctionPass *llvm::createWebAssemblyExplicitLocals() {
return new WebAssemblyExplicitLocals();
}
+static void checkFrameBase(WebAssemblyFunctionInfo &MFI, unsigned Local,
+ unsigned Reg) {
+ // Mark a local for the frame base vreg.
+ if (MFI.isFrameBaseVirtual() && Reg == MFI.getFrameBaseVreg()) {
+ LLVM_DEBUG({
+ dbgs() << "Allocating local " << Local << "for VReg "
+ << Register::virtReg2Index(Reg) << '\n';
+ });
+ MFI.setFrameBaseLocal(Local);
+ }
+}
+
/// Return a local id number for the given register, assigning it a new one
/// if it doesn't yet have one.
static unsigned getLocalId(DenseMap<unsigned, unsigned> &Reg2Local,
- unsigned &CurLocal, unsigned Reg) {
+ WebAssemblyFunctionInfo &MFI, unsigned &CurLocal,
+ unsigned Reg) {
auto P = Reg2Local.insert(std::make_pair(Reg, CurLocal));
- if (P.second)
+ if (P.second) {
+ checkFrameBase(MFI, CurLocal, Reg);
++CurLocal;
+ }
return P.first->second;
}
@@ -168,11 +173,18 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
/// start of the expression tree.
static MachineInstr *findStartOfTree(MachineOperand &MO,
MachineRegisterInfo &MRI,
- WebAssemblyFunctionInfo &MFI) {
+ const WebAssemblyFunctionInfo &MFI) {
Register Reg = MO.getReg();
assert(MFI.isVRegStackified(Reg));
MachineInstr *Def = MRI.getVRegDef(Reg);
+ // If this instruction has any non-stackified defs, it is the start
+ for (auto DefReg : Def->defs()) {
+ if (!MFI.isVRegStackified(DefReg.getReg())) {
+ return Def;
+ }
+ }
+
// Find the first stackified use and proceed from there.
for (MachineOperand &DefMO : Def->explicit_uses()) {
if (!DefMO.isReg())
@@ -189,10 +201,6 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
"********** Function: "
<< MF.getName() << '\n');
- // Disable this pass if directed to do so.
- if (WasmDisableExplicitLocals)
- return false;
-
bool Changed = false;
MachineRegisterInfo &MRI = MF.getRegInfo();
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
@@ -210,7 +218,9 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
break;
Register Reg = MI.getOperand(0).getReg();
assert(!MFI.isVRegStackified(Reg));
- Reg2Local[Reg] = static_cast<unsigned>(MI.getOperand(1).getImm());
+ auto Local = static_cast<unsigned>(MI.getOperand(1).getImm());
+ Reg2Local[Reg] = Local;
+ checkFrameBase(MFI, Local, Reg);
MI.eraseFromParent();
Changed = true;
}
@@ -233,6 +243,12 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
if (MI.isDebugInstr() || MI.isLabel())
continue;
+ if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) {
+ MI.eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
// Replace tee instructions with local.tee. The difference is that tee
// instructions have two defs, while local.tee instructions have one def
// and an index of a local to write to.
@@ -244,18 +260,18 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Stackify the input if it isn't stackified yet.
if (!MFI.isVRegStackified(OldReg)) {
- unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+ unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg);
Register NewReg = MRI.createVirtualRegister(RC);
unsigned Opc = getLocalGetOpcode(RC);
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), NewReg)
.addImm(LocalId);
MI.getOperand(2).setReg(NewReg);
- MFI.stackifyVReg(NewReg);
+ MFI.stackifyVReg(MRI, NewReg);
}
// Replace the TEE with a LOCAL_TEE.
unsigned LocalId =
- getLocalId(Reg2Local, CurLocal, MI.getOperand(1).getReg());
+ getLocalId(Reg2Local, MFI, CurLocal, MI.getOperand(1).getReg());
unsigned Opc = getLocalTeeOpcode(RC);
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(0).getReg())
@@ -269,20 +285,13 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- // Insert local.sets for any defs that aren't stackified yet. Currently
- // we handle at most one def.
- assert(MI.getDesc().getNumDefs() <= 1);
- if (MI.getDesc().getNumDefs() == 1) {
- Register OldReg = MI.getOperand(0).getReg();
+ // Insert local.sets for any defs that aren't stackified yet.
+ for (auto &Def : MI.defs()) {
+ Register OldReg = Def.getReg();
if (!MFI.isVRegStackified(OldReg)) {
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
Register NewReg = MRI.createVirtualRegister(RC);
auto InsertPt = std::next(MI.getIterator());
- if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) {
- MI.eraseFromParent();
- Changed = true;
- continue;
- }
if (UseEmpty[Register::virtReg2Index(OldReg)]) {
unsigned Opc = getDropOpcode(RC);
MachineInstr *Drop =
@@ -290,8 +299,10 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
.addReg(NewReg);
// After the drop instruction, this reg operand will not be used
Drop->getOperand(0).setIsKill();
+ if (MFI.isFrameBaseVirtual() && OldReg == MFI.getFrameBaseVreg())
+ MFI.clearFrameBaseVreg();
} else {
- unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+ unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg);
unsigned Opc = getLocalSetOpcode(RC);
WebAssemblyDebugValueManager(&MI).replaceWithLocal(LocalId);
@@ -300,12 +311,12 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
.addImm(LocalId)
.addReg(NewReg);
}
- MI.getOperand(0).setReg(NewReg);
// This register operand of the original instruction is now being used
// by the inserted drop or local.set instruction, so make it not dead
// yet.
- MI.getOperand(0).setIsDead(false);
- MFI.stackifyVReg(NewReg);
+ Def.setReg(NewReg);
+ Def.setIsDead(false);
+ MFI.stackifyVReg(MRI, NewReg);
Changed = true;
}
}
@@ -323,7 +334,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// immediates.
if (MO.isDef()) {
assert(MI.isInlineAsm());
- unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+ unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg);
// If this register operand is tied to another operand, we can't
// change it to an immediate. Untie it first.
MI.untieRegOperand(MI.getOperandNo(&MO));
@@ -341,7 +352,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Our contract with inline asm register operands is to provide local
// indices as immediates.
if (MI.isInlineAsm()) {
- unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+ unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg);
// Untie it first if this reg operand is tied to another operand.
MI.untieRegOperand(MI.getOperandNo(&MO));
MO.ChangeToImmediate(LocalId);
@@ -349,7 +360,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
}
// Insert a local.get.
- unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+ unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg);
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
Register NewReg = MRI.createVirtualRegister(RC);
unsigned Opc = getLocalGetOpcode(RC);
@@ -357,7 +368,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg)
.addImm(LocalId);
MO.setReg(NewReg);
- MFI.stackifyVReg(NewReg);
+ MFI.stackifyVReg(MRI, NewReg);
Changed = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index c932f985489a..8a0092a3f298 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -640,6 +640,9 @@ bool WebAssemblyFastISel::fastLowerArguments() {
if (F->isVarArg())
return false;
+ if (FuncInfo.Fn->getCallingConv() == CallingConv::Swift)
+ return false;
+
unsigned I = 0;
for (auto const &Arg : F->args()) {
const AttributeList &Attrs = F->getAttributes();
@@ -754,17 +757,18 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
if (Func && Func->isIntrinsic())
return false;
+ if (Call->getCallingConv() == CallingConv::Swift)
+ return false;
+
bool IsDirect = Func != nullptr;
- if (!IsDirect && isa<ConstantExpr>(Call->getCalledValue()))
+ if (!IsDirect && isa<ConstantExpr>(Call->getCalledOperand()))
return false;
FunctionType *FuncTy = Call->getFunctionType();
- unsigned Opc;
+ unsigned Opc = IsDirect ? WebAssembly::CALL : WebAssembly::CALL_INDIRECT;
bool IsVoid = FuncTy->getReturnType()->isVoidTy();
unsigned ResultReg;
- if (IsVoid) {
- Opc = IsDirect ? WebAssembly::CALL_VOID : WebAssembly::PCALL_INDIRECT_VOID;
- } else {
+ if (!IsVoid) {
if (!Subtarget->hasSIMD128() && Call->getType()->isVectorTy())
return false;
@@ -774,54 +778,36 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
case MVT::i8:
case MVT::i16:
case MVT::i32:
- Opc = IsDirect ? WebAssembly::CALL_i32 : WebAssembly::PCALL_INDIRECT_i32;
ResultReg = createResultReg(&WebAssembly::I32RegClass);
break;
case MVT::i64:
- Opc = IsDirect ? WebAssembly::CALL_i64 : WebAssembly::PCALL_INDIRECT_i64;
ResultReg = createResultReg(&WebAssembly::I64RegClass);
break;
case MVT::f32:
- Opc = IsDirect ? WebAssembly::CALL_f32 : WebAssembly::PCALL_INDIRECT_f32;
ResultReg = createResultReg(&WebAssembly::F32RegClass);
break;
case MVT::f64:
- Opc = IsDirect ? WebAssembly::CALL_f64 : WebAssembly::PCALL_INDIRECT_f64;
ResultReg = createResultReg(&WebAssembly::F64RegClass);
break;
case MVT::v16i8:
- Opc = IsDirect ? WebAssembly::CALL_v16i8
- : WebAssembly::PCALL_INDIRECT_v16i8;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v8i16:
- Opc = IsDirect ? WebAssembly::CALL_v8i16
- : WebAssembly::PCALL_INDIRECT_v8i16;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v4i32:
- Opc = IsDirect ? WebAssembly::CALL_v4i32
- : WebAssembly::PCALL_INDIRECT_v4i32;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v2i64:
- Opc = IsDirect ? WebAssembly::CALL_v2i64
- : WebAssembly::PCALL_INDIRECT_v2i64;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v4f32:
- Opc = IsDirect ? WebAssembly::CALL_v4f32
- : WebAssembly::PCALL_INDIRECT_v4f32;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::v2f64:
- Opc = IsDirect ? WebAssembly::CALL_v2f64
- : WebAssembly::PCALL_INDIRECT_v2f64;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
case MVT::exnref:
- Opc = IsDirect ? WebAssembly::CALL_exnref
- : WebAssembly::PCALL_INDIRECT_exnref;
ResultReg = createResultReg(&WebAssembly::EXNREFRegClass);
break;
default:
@@ -861,7 +847,7 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
unsigned CalleeReg = 0;
if (!IsDirect) {
- CalleeReg = getRegForValue(Call->getCalledValue());
+ CalleeReg = getRegForValue(Call->getCalledOperand());
if (!CalleeReg)
return false;
}
@@ -871,14 +857,20 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
if (!IsVoid)
MIB.addReg(ResultReg, RegState::Define);
- if (IsDirect)
+ if (IsDirect) {
MIB.addGlobalAddress(Func);
- else
- MIB.addReg(CalleeReg);
+ } else {
+ // Add placeholders for the type index and immediate flags
+ MIB.addImm(0);
+ MIB.addImm(0);
+ }
for (unsigned ArgReg : Args)
MIB.addReg(ArgReg);
+ if (!IsDirect)
+ MIB.addReg(CalleeReg);
+
if (!IsVoid)
updateValueMap(Call, ResultReg);
return true;
@@ -1168,30 +1160,31 @@ bool WebAssemblyFastISel::selectLoad(const Instruction *I) {
unsigned Opc;
const TargetRegisterClass *RC;
+ bool A64 = Subtarget->hasAddr64();
switch (getSimpleType(Load->getType())) {
case MVT::i1:
case MVT::i8:
- Opc = WebAssembly::LOAD8_U_I32;
+ Opc = A64 ? WebAssembly::LOAD8_U_I32_A64 : WebAssembly::LOAD8_U_I32_A32;
RC = &WebAssembly::I32RegClass;
break;
case MVT::i16:
- Opc = WebAssembly::LOAD16_U_I32;
+ Opc = A64 ? WebAssembly::LOAD16_U_I32_A64 : WebAssembly::LOAD16_U_I32_A32;
RC = &WebAssembly::I32RegClass;
break;
case MVT::i32:
- Opc = WebAssembly::LOAD_I32;
+ Opc = A64 ? WebAssembly::LOAD_I32_A64 : WebAssembly::LOAD_I32_A32;
RC = &WebAssembly::I32RegClass;
break;
case MVT::i64:
- Opc = WebAssembly::LOAD_I64;
+ Opc = A64 ? WebAssembly::LOAD_I64_A64 : WebAssembly::LOAD_I64_A32;
RC = &WebAssembly::I64RegClass;
break;
case MVT::f32:
- Opc = WebAssembly::LOAD_F32;
+ Opc = A64 ? WebAssembly::LOAD_F32_A64 : WebAssembly::LOAD_F32_A32;
RC = &WebAssembly::F32RegClass;
break;
case MVT::f64:
- Opc = WebAssembly::LOAD_F64;
+ Opc = A64 ? WebAssembly::LOAD_F64_A64 : WebAssembly::LOAD_F64_A32;
RC = &WebAssembly::F64RegClass;
break;
default:
@@ -1224,27 +1217,28 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
unsigned Opc;
bool VTIsi1 = false;
+ bool A64 = Subtarget->hasAddr64();
switch (getSimpleType(Store->getValueOperand()->getType())) {
case MVT::i1:
VTIsi1 = true;
LLVM_FALLTHROUGH;
case MVT::i8:
- Opc = WebAssembly::STORE8_I32;
+ Opc = A64 ? WebAssembly::STORE8_I32_A64 : WebAssembly::STORE8_I32_A32;
break;
case MVT::i16:
- Opc = WebAssembly::STORE16_I32;
+ Opc = A64 ? WebAssembly::STORE16_I32_A64 : WebAssembly::STORE16_I32_A32;
break;
case MVT::i32:
- Opc = WebAssembly::STORE_I32;
+ Opc = A64 ? WebAssembly::STORE_I32_A64 : WebAssembly::STORE_I32_A32;
break;
case MVT::i64:
- Opc = WebAssembly::STORE_I64;
+ Opc = A64 ? WebAssembly::STORE_I64_A64 : WebAssembly::STORE_I64_A32;
break;
case MVT::f32:
- Opc = WebAssembly::STORE_F32;
+ Opc = A64 ? WebAssembly::STORE_F32_A64 : WebAssembly::STORE_F32_A32;
break;
case MVT::f64:
- Opc = WebAssembly::STORE_F64;
+ Opc = A64 ? WebAssembly::STORE_F64_A64 : WebAssembly::STORE_F64_A32;
break;
default:
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
new file mode 100644
index 000000000000..7f805b34b499
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
@@ -0,0 +1,155 @@
+//=- WebAssemblyFixBrTableDefaults.cpp - Fix br_table default branch targets -//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file implements a pass that eliminates redundant range checks
+/// guarding br_table instructions. Since jump tables on most targets cannot
+/// handle out of range indices, LLVM emits these checks before most jump
+/// tables. But br_table takes a default branch target as an argument, so it
+/// does not need the range checks.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-fix-br-table-defaults"
+
+namespace {
+
+class WebAssemblyFixBrTableDefaults final : public MachineFunctionPass {
+ StringRef getPassName() const override {
+ return "WebAssembly Fix br_table Defaults";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyFixBrTableDefaults() : MachineFunctionPass(ID) {}
+};
+
+char WebAssemblyFixBrTableDefaults::ID = 0;
+
+// `MI` is a br_table instruction with a dummy default target argument. This
+// function finds and adds the default target argument and removes any redundant
+// range check preceding the br_table. Returns the MBB that the br_table is
+// moved into so it can be removed from further consideration, or nullptr if the
+// br_table cannot be optimized.
+MachineBasicBlock *fixBrTable(MachineInstr &MI, MachineBasicBlock *MBB,
+ MachineFunction &MF) {
+ // Get the header block, which contains the redundant range check.
+ assert(MBB->pred_size() == 1 && "Expected a single guard predecessor");
+ auto *HeaderMBB = *MBB->pred_begin();
+
+ // Find the conditional jump to the default target. If it doesn't exist, the
+ // default target is unreachable anyway, so we can keep the existing dummy
+ // target.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 2> Cond;
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ bool Analyzed = !TII.analyzeBranch(*HeaderMBB, TBB, FBB, Cond);
+ assert(Analyzed && "Could not analyze jump header branches");
+ (void)Analyzed;
+
+ // Here are the possible outcomes. '_' is nullptr, `J` is the jump table block
+ // aka MBB, 'D' is the default block.
+ //
+ // TBB | FBB | Meaning
+ // _ | _ | No default block, header falls through to jump table
+ // J | _ | No default block, header jumps to the jump table
+ // D | _ | Header jumps to the default and falls through to the jump table
+ // D | J | Header jumps to the default and also to the jump table
+ if (TBB && TBB != MBB) {
+ assert((FBB == nullptr || FBB == MBB) &&
+ "Expected jump or fallthrough to br_table block");
+ assert(Cond.size() == 2 && Cond[1].isReg() && "Unexpected condition info");
+
+ // If the range check checks an i64 value, we cannot optimize it out because
+ // the i64 index is truncated to an i32, making values over 2^32
+ // indistinguishable from small numbers. There are also other strange edge
+ // cases that can arise in practice that we don't want to reason about, so
+ // conservatively only perform the optimization if the range check is the
+ // normal case of an i32.gt_u.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ auto *RangeCheck = MRI.getVRegDef(Cond[1].getReg());
+ assert(RangeCheck != nullptr);
+ if (RangeCheck->getOpcode() != WebAssembly::GT_U_I32)
+ return nullptr;
+
+ // Remove the dummy default target and install the real one.
+ MI.RemoveOperand(MI.getNumExplicitOperands() - 1);
+ MI.addOperand(MF, MachineOperand::CreateMBB(TBB));
+ }
+
+ // Remove any branches from the header and splice in the jump table instead
+ TII.removeBranch(*HeaderMBB, nullptr);
+ HeaderMBB->splice(HeaderMBB->end(), MBB, MBB->begin(), MBB->end());
+
+ // Update CFG to skip the old jump table block. Remove shared successors
+ // before transferring to avoid duplicated successors.
+ HeaderMBB->removeSuccessor(MBB);
+ for (auto &Succ : MBB->successors())
+ if (HeaderMBB->isSuccessor(Succ))
+ HeaderMBB->removeSuccessor(Succ);
+ HeaderMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Remove the old jump table block from the function
+ MF.erase(MBB);
+
+ return HeaderMBB;
+}
+
+bool WebAssemblyFixBrTableDefaults::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** Fixing br_table Default Targets **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ bool Changed = false;
+ SmallPtrSet<MachineBasicBlock *, 16> MBBSet;
+ for (auto &MBB : MF)
+ MBBSet.insert(&MBB);
+
+ while (!MBBSet.empty()) {
+ MachineBasicBlock *MBB = *MBBSet.begin();
+ MBBSet.erase(MBB);
+ for (auto &MI : *MBB) {
+ if (WebAssembly::isBrTable(MI)) {
+ auto *Fixed = fixBrTable(MI, MBB, MF);
+ if (Fixed != nullptr) {
+ MBBSet.erase(Fixed);
+ Changed = true;
+ }
+ break;
+ }
+ }
+ }
+
+ if (Changed) {
+ // We rewrote part of the function; recompute relevant things.
+ MF.RenumberBlocks();
+ return true;
+ }
+
+ return false;
+}
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(WebAssemblyFixBrTableDefaults, DEBUG_TYPE,
+ "Removes range checks and sets br_table default targets", false,
+ false)
+
+FunctionPass *llvm::createWebAssemblyFixBrTableDefaults() {
+ return new WebAssemblyFixBrTableDefaults();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index 6b1bbd7a2b07..7abb6fa8905c 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -23,7 +23,6 @@
//===----------------------------------------------------------------------===//
#include "WebAssembly.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -73,11 +72,11 @@ static void findUses(Value *V, Function &F,
else if (auto *A = dyn_cast<GlobalAlias>(U.getUser()))
findUses(A, F, Uses, ConstantBCs);
else if (U.get()->getType() != F.getType()) {
- CallSite CS(U.getUser());
- if (!CS)
+ CallBase *CB = dyn_cast<CallBase>(U.getUser());
+ if (!CB)
// Skip uses that aren't immediately called
continue;
- Value *Callee = CS.getCalledValue();
+ Value *Callee = CB->getCalledOperand();
if (Callee != V)
// Skip calls where the function isn't the callee
continue;
@@ -244,6 +243,10 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
// Collect all the places that need wrappers.
for (Function &F : M) {
+ // Skip to fix when the function is swiftcc because swiftcc allows
+ // bitcast type difference for swiftself and swifterror.
+ if (F.getCallingConv() == CallingConv::Swift)
+ continue;
findUses(&F, F, Uses, ConstantBCs);
// If we have a "main" function, and its type isn't
@@ -304,7 +307,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
if (CallMain) {
Main->setName("__original_main");
auto *MainWrapper =
- cast<Function>(CallMain->getCalledValue()->stripPointerCasts());
+ cast<Function>(CallMain->getCalledOperand()->stripPointerCasts());
delete CallMain;
if (Main->isDeclaration()) {
// The wrapper is not needed in this case as we don't need to export
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index 157ea9d525c9..1ceae59dc993 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -66,6 +66,17 @@ namespace {
using BlockVector = SmallVector<MachineBasicBlock *, 4>;
using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
+static BlockVector getSortedEntries(const BlockSet &Entries) {
+ BlockVector SortedEntries(Entries.begin(), Entries.end());
+ llvm::sort(SortedEntries,
+ [](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+ auto ANum = A->getNumber();
+ auto BNum = B->getNumber();
+ return ANum < BNum;
+ });
+ return SortedEntries;
+}
+
// Calculates reachability in a region. Ignores branches to blocks outside of
// the region, and ignores branches to the region entry (for the case where
// the region is the inner part of a loop).
@@ -241,7 +252,6 @@ public:
bool WebAssemblyFixIrreducibleControlFlow::processRegion(
MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
bool Changed = false;
-
// Remove irreducibility before processing child loops, which may take
// multiple iterations.
while (true) {
@@ -249,12 +259,18 @@ bool WebAssemblyFixIrreducibleControlFlow::processRegion(
bool FoundIrreducibility = false;
- for (auto *LoopEntry : Graph.getLoopEntries()) {
+ for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) {
// Find mutual entries - all entries which can reach this one, and
// are reached by it (that always includes LoopEntry itself). All mutual
// entries must be in the same loop, so if we have more than one, then we
// have irreducible control flow.
//
+ // (Note that we need to sort the entries here, as otherwise the order can
+ // matter: being mutual is a symmetric relationship, and each set of
+ // mutuals will be handled properly no matter which we see first. However,
+ // there can be multiple disjoint sets of mutuals, and which we process
+ // first changes the output.)
+ //
// Note that irreducibility may involve inner loops, e.g. imagine A
// starts one loop, and it has B inside it which starts an inner loop.
// If we add a branch from all the way on the outside to B, then in a
@@ -325,13 +341,7 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
assert(Entries.size() >= 2);
// Sort the entries to ensure a deterministic build.
- BlockVector SortedEntries(Entries.begin(), Entries.end());
- llvm::sort(SortedEntries,
- [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
- auto ANum = A->getNumber();
- auto BNum = B->getNumber();
- return ANum < BNum;
- });
+ BlockVector SortedEntries = getSortedEntries(Entries);
#ifndef NDEBUG
for (auto Block : SortedEntries)
@@ -403,31 +413,33 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
}
// Record if each entry has a layout predecessor. This map stores
- // <<Predecessor is within the loop?, loop entry>, layout predecessor>
- std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *>
+ // <<loop entry, Predecessor is within the loop?>, layout predecessor>
+ DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
EntryToLayoutPred;
- for (auto *Pred : AllPreds)
+ for (auto *Pred : AllPreds) {
+ bool PredInLoop = InLoop.count(Pred);
for (auto *Entry : Pred->successors())
if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
- EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred;
+ EntryToLayoutPred[{Entry, PredInLoop}] = Pred;
+ }
// We need to create at most two routing blocks per entry: one for
// predecessors outside the loop and one for predecessors inside the loop.
// This map stores
- // <<Predecessor is within the loop?, loop entry>, routing block>
- std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map;
+ // <<loop entry, Predecessor is within the loop?>, routing block>
+ DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
+ Map;
for (auto *Pred : AllPreds) {
bool PredInLoop = InLoop.count(Pred);
for (auto *Entry : Pred->successors()) {
- if (!Entries.count(Entry) ||
- Map.count(std::make_pair(InLoop.count(Pred), Entry)))
+ if (!Entries.count(Entry) || Map.count({Entry, PredInLoop}))
continue;
// If there exists a layout predecessor of this entry and this predecessor
// is not that, we rather create a routing block after that layout
// predecessor to save a branch.
- if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) &&
- EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred)
- continue;
+ if (auto *OtherPred = EntryToLayoutPred.lookup({Entry, PredInLoop}))
+ if (OtherPred != Pred)
+ continue;
// This is a successor we need to rewrite.
MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
@@ -443,7 +455,7 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
.addImm(Indices[Entry]);
BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
Routing->addSuccessor(Dispatch);
- Map[std::make_pair(PredInLoop, Entry)] = Routing;
+ Map[{Entry, PredInLoop}] = Routing;
}
}
@@ -453,12 +465,12 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
for (MachineInstr &Term : Pred->terminators())
for (auto &Op : Term.explicit_uses())
if (Op.isMBB() && Indices.count(Op.getMBB()))
- Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]);
+ Op.setMBB(Map[{Op.getMBB(), PredInLoop}]);
for (auto *Succ : Pred->successors()) {
if (!Entries.count(Succ))
continue;
- auto *Routing = Map[std::make_pair(PredInLoop, Succ)];
+ auto *Routing = Map[{Succ, PredInLoop}];
Pred->replaceSuccessor(Succ, Routing);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 71eeebfada4b..95669932e73f 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -19,6 +19,7 @@
#include "WebAssemblyFrameLowering.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
#include "WebAssemblyInstrInfo.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
@@ -86,8 +87,8 @@ bool WebAssemblyFrameLowering::needsSPForLocalFrame(
}
// In function with EH pads, we need to make a copy of the value of
-// __stack_pointer global in SP32 register, in order to use it when restoring
-// __stack_pointer after an exception is caught.
+// __stack_pointer global in SP32/64 register, in order to use it when
+// restoring __stack_pointer after an exception is caught.
bool WebAssemblyFrameLowering::needsPrologForEH(
const MachineFunction &MF) const {
auto EHType = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType();
@@ -122,6 +123,57 @@ bool WebAssemblyFrameLowering::needsSPWriteback(
return needsSPForLocalFrame(MF) && !CanUseRedZone;
}
+unsigned WebAssemblyFrameLowering::getSPReg(const MachineFunction &MF) {
+ return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+ ? WebAssembly::SP64
+ : WebAssembly::SP32;
+}
+
+unsigned WebAssemblyFrameLowering::getFPReg(const MachineFunction &MF) {
+ return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+ ? WebAssembly::FP64
+ : WebAssembly::FP32;
+}
+
+unsigned
+WebAssemblyFrameLowering::getOpcConst(const MachineFunction &MF) {
+ return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+ ? WebAssembly::CONST_I64
+ : WebAssembly::CONST_I32;
+}
+
+unsigned WebAssemblyFrameLowering::getOpcAdd(const MachineFunction &MF) {
+ return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+ ? WebAssembly::ADD_I64
+ : WebAssembly::ADD_I32;
+}
+
+unsigned WebAssemblyFrameLowering::getOpcSub(const MachineFunction &MF) {
+ return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+ ? WebAssembly::SUB_I64
+ : WebAssembly::SUB_I32;
+}
+
+unsigned WebAssemblyFrameLowering::getOpcAnd(const MachineFunction &MF) {
+ return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+ ? WebAssembly::AND_I64
+ : WebAssembly::AND_I32;
+}
+
+unsigned
+WebAssemblyFrameLowering::getOpcGlobGet(const MachineFunction &MF) {
+ return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+ ? WebAssembly::GLOBAL_GET_I64
+ : WebAssembly::GLOBAL_GET_I32;
+}
+
+unsigned
+WebAssemblyFrameLowering::getOpcGlobSet(const MachineFunction &MF) {
+ return MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()
+ ? WebAssembly::GLOBAL_SET_I64
+ : WebAssembly::GLOBAL_SET_I32;
+}
+
void WebAssemblyFrameLowering::writeSPToGlobal(
unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const {
@@ -129,7 +181,8 @@ void WebAssemblyFrameLowering::writeSPToGlobal(
const char *ES = "__stack_pointer";
auto *SPSymbol = MF.createExternalSymbolName(ES);
- BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32))
+
+ BuildMI(MBB, InsertStore, DL, TII->get(getOpcGlobSet(MF)))
.addExternalSymbol(SPSymbol)
.addReg(SrcReg);
}
@@ -140,11 +193,12 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
MachineBasicBlock::iterator I) const {
assert(!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) &&
"Call frame pseudos should only be used for dynamic stack adjustment");
- const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ auto &ST = MF.getSubtarget<WebAssemblySubtarget>();
+ const auto *TII = ST.getInstrInfo();
if (I->getOpcode() == TII->getCallFrameDestroyOpcode() &&
needsSPWriteback(MF)) {
DebugLoc DL = I->getDebugLoc();
- writeSPToGlobal(WebAssembly::SP32, MF, MBB, I, DL);
+ writeSPToGlobal(getSPReg(MF), MF, MBB, I, DL);
}
return MBB.erase(I);
}
@@ -160,7 +214,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
return;
uint64_t StackSize = MFI.getStackSize();
- const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ auto &ST = MF.getSubtarget<WebAssemblySubtarget>();
+ const auto *TII = ST.getInstrInfo();
auto &MRI = MF.getRegInfo();
auto InsertPt = MBB.begin();
@@ -171,13 +226,13 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
const TargetRegisterClass *PtrRC =
MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
- unsigned SPReg = WebAssembly::SP32;
+ unsigned SPReg = getSPReg(MF);
if (StackSize)
SPReg = MRI.createVirtualRegister(PtrRC);
const char *ES = "__stack_pointer";
auto *SPSymbol = MF.createExternalSymbolName(ES);
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg)
+ BuildMI(MBB, InsertPt, DL, TII->get(getOpcGlobGet(MF)), SPReg)
.addExternalSymbol(SPSymbol);
bool HasBP = hasBP(MF);
@@ -191,34 +246,30 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
if (StackSize) {
// Subtract the frame size
Register OffsetReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg)
.addImm(StackSize);
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32),
- WebAssembly::SP32)
+ BuildMI(MBB, InsertPt, DL, TII->get(getOpcSub(MF)), getSPReg(MF))
.addReg(SPReg)
.addReg(OffsetReg);
}
if (HasBP) {
Register BitmaskReg = MRI.createVirtualRegister(PtrRC);
- unsigned Alignment = MFI.getMaxAlignment();
- assert((1u << countTrailingZeros(Alignment)) == Alignment &&
- "Alignment must be a power of 2");
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg)
- .addImm((int)~(Alignment - 1));
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32),
- WebAssembly::SP32)
- .addReg(WebAssembly::SP32)
+ Align Alignment = MFI.getMaxAlign();
+ BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), BitmaskReg)
+ .addImm((int64_t) ~(Alignment.value() - 1));
+ BuildMI(MBB, InsertPt, DL, TII->get(getOpcAnd(MF)), getSPReg(MF))
+ .addReg(getSPReg(MF))
.addReg(BitmaskReg);
}
if (hasFP(MF)) {
// Unlike most conventional targets (where FP points to the saved FP),
// FP points to the bottom of the fixed-size locals, so we can use positive
// offsets in load/store instructions.
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32)
- .addReg(WebAssembly::SP32);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), getFPReg(MF))
+ .addReg(getSPReg(MF));
}
if (StackSize && needsSPWriteback(MF)) {
- writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPt, DL);
+ writeSPToGlobal(getSPReg(MF), MF, MBB, InsertPt, DL);
}
}
@@ -227,7 +278,8 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
uint64_t StackSize = MF.getFrameInfo().getStackSize();
if (!needsSP(MF) || !needsSPWriteback(MF))
return;
- const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ auto &ST = MF.getSubtarget<WebAssemblySubtarget>();
+ const auto *TII = ST.getInstrInfo();
auto &MRI = MF.getRegInfo();
auto InsertPt = MBB.getFirstTerminator();
DebugLoc DL;
@@ -238,6 +290,7 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
// Restore the stack pointer. If we had fixed-size locals, add the offset
// subtracted in the prolog.
unsigned SPReg = 0;
+ unsigned SPFPReg = hasFP(MF) ? getFPReg(MF) : getSPReg(MF);
if (hasBP(MF)) {
auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
SPReg = FI->getBasePointerVreg();
@@ -245,17 +298,34 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
const TargetRegisterClass *PtrRC =
MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
Register OffsetReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ BuildMI(MBB, InsertPt, DL, TII->get(getOpcConst(MF)), OffsetReg)
.addImm(StackSize);
- // In the epilog we don't need to write the result back to the SP32 physreg
- // because it won't be used again. We can use a stackified register instead.
+ // In the epilog we don't need to write the result back to the SP32/64
+ // physreg because it won't be used again. We can use a stackified register
+ // instead.
SPReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg)
- .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32)
+ BuildMI(MBB, InsertPt, DL, TII->get(getOpcAdd(MF)), SPReg)
+ .addReg(SPFPReg)
.addReg(OffsetReg);
} else {
- SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32;
+ SPReg = SPFPReg;
}
writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL);
}
+
+TargetFrameLowering::DwarfFrameBase
+WebAssemblyFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
+ DwarfFrameBase Loc;
+ Loc.Kind = DwarfFrameBase::WasmFrameBase;
+ const WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ if (needsSP(MF) && MFI.isFrameBaseVirtual()) {
+ unsigned LocalNum = MFI.getFrameBaseLocal();
+ Loc.Location.WasmLoc = {WebAssembly::TI_LOCAL, LocalNum};
+ } else {
+ // TODO: This should work on a breakpoint at a function with no frame,
+ // but probably won't work for traversing up the stack.
+ Loc.Location.WasmLoc = {WebAssembly::TI_GLOBAL_RELOC, 0};
+ }
+ return Loc;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index fdc0f561dcd9..e16f639ff22b 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -18,7 +18,6 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
-class MachineFrameInfo;
class WebAssemblyFrameLowering final : public TargetFrameLowering {
public:
@@ -44,6 +43,7 @@ public:
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override;
bool needsPrologForEH(const MachineFunction &MF) const;
@@ -53,6 +53,15 @@ public:
MachineBasicBlock::iterator &InsertStore,
const DebugLoc &DL) const;
+ static unsigned getSPReg(const MachineFunction &MF);
+ static unsigned getFPReg(const MachineFunction &MF);
+ static unsigned getOpcConst(const MachineFunction &MF);
+ static unsigned getOpcAdd(const MachineFunction &MF);
+ static unsigned getOpcSub(const MachineFunction &MF);
+ static unsigned getOpcAnd(const MachineFunction &MF);
+ static unsigned getOpcGlobGet(const MachineFunction &MF);
+ static unsigned getOpcGlobSet(const MachineFunction &MF);
+
private:
bool hasBP(const MachineFunction &MF) const;
bool needsSPForLocalFrame(const MachineFunction &MF) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index ba04fd4eb9dd..dee1c4e28149 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -13,8 +13,7 @@
// NOTE: NO INCLUDE GUARD DESIRED!
-HANDLE_NODETYPE(CALL1)
-HANDLE_NODETYPE(CALL0)
+HANDLE_NODETYPE(CALL)
HANDLE_NODETYPE(RET_CALL)
HANDLE_NODETYPE(RETURN)
HANDLE_NODETYPE(ARGUMENT)
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 531a07b829c8..d1a696f854f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -53,11 +53,6 @@ public:
Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
- // Wasm64 is not fully supported right now (and is not specified)
- if (Subtarget->hasAddr64())
- report_fatal_error(
- "64-bit WebAssembly (wasm64) is not currently supported");
-
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -82,6 +77,13 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
return;
}
+ MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+ auto GlobalGetIns = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
+ : WebAssembly::GLOBAL_GET_I32;
+ auto ConstIns =
+ PtrVT == MVT::i64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
+ auto AddIns = PtrVT == MVT::i64 ? WebAssembly::ADD_I64 : WebAssembly::ADD_I32;
+
// Few custom selection stuff.
SDLoc DL(Node);
MachineFunction &MF = CurDAG->getMachineFunction();
@@ -145,20 +147,16 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
false);
}
- MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
- assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT);
SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress(
GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0);
- MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32,
- DL, MVT::i32, TLSBaseSym);
- MachineSDNode *TLSOffset = CurDAG->getMachineNode(
- WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym);
- MachineSDNode *TLSAddress =
- CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32,
- SDValue(TLSBase, 0), SDValue(TLSOffset, 0));
+ MachineSDNode *TLSBase =
+ CurDAG->getMachineNode(GlobalGetIns, DL, PtrVT, TLSBaseSym);
+ MachineSDNode *TLSOffset =
+ CurDAG->getMachineNode(ConstIns, DL, PtrVT, TLSOffsetSym);
+ MachineSDNode *TLSAddress = CurDAG->getMachineNode(
+ AddIns, DL, PtrVT, SDValue(TLSBase, 0), SDValue(TLSOffset, 0));
ReplaceNode(Node, TLSAddress);
return;
}
@@ -167,22 +165,16 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
switch (IntNo) {
case Intrinsic::wasm_tls_size: {
- MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
- assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
MachineSDNode *TLSSize = CurDAG->getMachineNode(
- WebAssembly::GLOBAL_GET_I32, DL, PtrVT,
- CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32));
+ GlobalGetIns, DL, PtrVT,
+ CurDAG->getTargetExternalSymbol("__tls_size", PtrVT));
ReplaceNode(Node, TLSSize);
return;
}
case Intrinsic::wasm_tls_align: {
- MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
- assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
MachineSDNode *TLSAlign = CurDAG->getMachineNode(
- WebAssembly::GLOBAL_GET_I32, DL, PtrVT,
- CurDAG->getTargetExternalSymbol("__tls_align", MVT::i32));
+ GlobalGetIns, DL, PtrVT,
+ CurDAG->getTargetExternalSymbol("__tls_align", PtrVT));
ReplaceNode(Node, TLSAlign);
return;
}
@@ -193,11 +185,8 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
switch (IntNo) {
case Intrinsic::wasm_tls_base: {
- MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
- assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
-
MachineSDNode *TLSBase = CurDAG->getMachineNode(
- WebAssembly::GLOBAL_GET_I32, DL, MVT::i32, MVT::Other,
+ GlobalGetIns, DL, PtrVT, MVT::Other,
CurDAG->getTargetExternalSymbol("__tls_base", PtrVT),
Node->getOperand(0));
ReplaceNode(Node, TLSBase);
@@ -206,6 +195,35 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case WebAssemblyISD::CALL:
+ case WebAssemblyISD::RET_CALL: {
+ // CALL has both variable operands and variable results, but ISel only
+ // supports one or the other. Split calls into two nodes glued together, one
+ // for the operands and one for the results. These two nodes will be
+ // recombined in a custom inserter hook into a single MachineInstr.
+ SmallVector<SDValue, 16> Ops;
+ for (size_t i = 1; i < Node->getNumOperands(); ++i) {
+ SDValue Op = Node->getOperand(i);
+ if (i == 1 && Op->getOpcode() == WebAssemblyISD::Wrapper)
+ Op = Op->getOperand(0);
+ Ops.push_back(Op);
+ }
+
+ // Add the chain last
+ Ops.push_back(Node->getOperand(0));
+ MachineSDNode *CallParams =
+ CurDAG->getMachineNode(WebAssembly::CALL_PARAMS, DL, MVT::Glue, Ops);
+
+ unsigned Results = Node->getOpcode() == WebAssemblyISD::CALL
+ ? WebAssembly::CALL_RESULTS
+ : WebAssembly::RET_CALL_RESULTS;
+
+ SDValue Link(CallParams, 0);
+ MachineSDNode *CallResults =
+ CurDAG->getMachineNode(Results, DL, Node->getVTList(), Link);
+ ReplaceNode(Node, CallResults);
+ return;
+ }
default:
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 5b177c0c5d9d..a9b9eceb4130 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -61,8 +61,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
- }
- if (Subtarget->hasUnimplementedSIMD128()) {
addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
}
@@ -116,97 +114,81 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
for (auto T : {MVT::i32, MVT::i64})
setOperationAction(Op, T, Expand);
if (Subtarget->hasSIMD128())
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
setOperationAction(Op, T, Expand);
- if (Subtarget->hasUnimplementedSIMD128())
- setOperationAction(Op, MVT::v2i64, Expand);
}
// SIMD-specific configuration
if (Subtarget->hasSIMD128()) {
+ // Hoist bitcasts out of shuffles
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+
// Support saturating add for i8x16 and i16x8
for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
for (auto T : {MVT::v16i8, MVT::v8i16})
setOperationAction(Op, T, Legal);
+ // Support integer abs
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+ setOperationAction(ISD::ABS, T, Legal);
+
// Custom lower BUILD_VECTORs to minimize number of replace_lanes
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
+ MVT::v2f64})
setOperationAction(ISD::BUILD_VECTOR, T, Custom);
- if (Subtarget->hasUnimplementedSIMD128())
- for (auto T : {MVT::v2i64, MVT::v2f64})
- setOperationAction(ISD::BUILD_VECTOR, T, Custom);
// We have custom shuffle lowering to expose the shuffle mask
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
+ MVT::v2f64})
setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
- if (Subtarget->hasUnimplementedSIMD128())
- for (auto T: {MVT::v2i64, MVT::v2f64})
- setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
// Custom lowering since wasm shifts must have a scalar shift amount
- for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) {
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+ for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
setOperationAction(Op, T, Custom);
- if (Subtarget->hasUnimplementedSIMD128())
- setOperationAction(Op, MVT::v2i64, Custom);
- }
// Custom lower lane accesses to expand out variable indices
- for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) {
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
+ for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
+ MVT::v2f64})
setOperationAction(Op, T, Custom);
- if (Subtarget->hasUnimplementedSIMD128())
- for (auto T : {MVT::v2i64, MVT::v2f64})
- setOperationAction(Op, T, Custom);
- }
- // There is no i64x2.mul instruction
- setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ // There is no i8x16.mul instruction
+ setOperationAction(ISD::MUL, MVT::v16i8, Expand);
// There are no vector select instructions
- for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) {
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
+ for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT})
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
+ MVT::v2f64})
setOperationAction(Op, T, Expand);
- if (Subtarget->hasUnimplementedSIMD128())
- for (auto T : {MVT::v2i64, MVT::v2f64})
- setOperationAction(Op, T, Expand);
- }
// Expand integer operations supported for scalars but not SIMD
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
- ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) {
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+ ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
setOperationAction(Op, T, Expand);
- if (Subtarget->hasUnimplementedSIMD128())
- setOperationAction(Op, MVT::v2i64, Expand);
- }
// But we do have integer min and max operations
- if (Subtarget->hasUnimplementedSIMD128()) {
- for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
- setOperationAction(Op, T, Legal);
- }
+ for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+ setOperationAction(Op, T, Legal);
// Expand float operations supported for scalars but not SIMD
for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
- ISD::FEXP, ISD::FEXP2, ISD::FRINT}) {
- setOperationAction(Op, MVT::v4f32, Expand);
- if (Subtarget->hasUnimplementedSIMD128())
- setOperationAction(Op, MVT::v2f64, Expand);
- }
+ ISD::FEXP, ISD::FEXP2, ISD::FRINT})
+ for (auto T : {MVT::v4f32, MVT::v2f64})
+ setOperationAction(Op, T, Expand);
// Expand operations not supported for i64x2 vectors
- if (Subtarget->hasUnimplementedSIMD128())
- for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
- setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);
-
- // Expand additional SIMD ops that V8 hasn't implemented yet
- if (!Subtarget->hasUnimplementedSIMD128()) {
- setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
- setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
- }
+ for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
+ setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);
+
+ // 64x2 conversions are not in the spec
+ for (auto Op :
+ {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
+ for (auto T : {MVT::v2i64, MVT::v2f64})
+ setOperationAction(Op, T, Expand);
}
// As a special case, these operators use the type to mean the type to
@@ -227,6 +209,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+ setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
// Expand these forms; we pattern-match the forms that we can handle in isel.
@@ -259,12 +242,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
}
}
// But some vector extending loads are legal
- if (Subtarget->hasUnimplementedSIMD128()) {
- for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
- setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
- setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
- setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
- }
+ for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
+ setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
+ setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
+ setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
}
}
@@ -273,6 +254,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Trap lowers to wasm unreachable
setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// Exception handling intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -434,6 +416,58 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
return DoneMBB;
}
+static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults,
+ DebugLoc DL, MachineBasicBlock *BB,
+ const TargetInstrInfo &TII) {
+ MachineInstr &CallParams = *CallResults.getPrevNode();
+ assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
+ assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
+ CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
+
+ bool IsIndirect = CallParams.getOperand(0).isReg();
+ bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
+
+ unsigned CallOp;
+ if (IsIndirect && IsRetCall) {
+ CallOp = WebAssembly::RET_CALL_INDIRECT;
+ } else if (IsIndirect) {
+ CallOp = WebAssembly::CALL_INDIRECT;
+ } else if (IsRetCall) {
+ CallOp = WebAssembly::RET_CALL;
+ } else {
+ CallOp = WebAssembly::CALL;
+ }
+
+ MachineFunction &MF = *BB->getParent();
+ const MCInstrDesc &MCID = TII.get(CallOp);
+ MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
+
+ // Move the function pointer to the end of the arguments for indirect calls
+ if (IsIndirect) {
+ auto FnPtr = CallParams.getOperand(0);
+ CallParams.RemoveOperand(0);
+ CallParams.addOperand(FnPtr);
+ }
+
+ for (auto Def : CallResults.defs())
+ MIB.add(Def);
+
+ // Add placeholders for the type index and immediate flags
+ if (IsIndirect) {
+ MIB.addImm(0);
+ MIB.addImm(0);
+ }
+
+ for (auto Use : CallParams.uses())
+ MIB.add(Use);
+
+ BB->insert(CallResults.getIterator(), MIB);
+ CallParams.eraseFromParent();
+ CallResults.eraseFromParent();
+
+ return BB;
+}
+
MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
@@ -466,7 +500,9 @@ MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
case WebAssembly::FP_TO_UINT_I64_F64:
return LowerFPToInt(MI, DL, BB, TII, true, true, true,
WebAssembly::I64_TRUNC_U_F64);
- llvm_unreachable("Unexpected instruction to emit with custom inserter");
+ case WebAssembly::CALL_RESULTS:
+ case WebAssembly::RET_CALL_RESULTS:
+ return LowerCallResults(MI, DL, BB, TII);
}
}
@@ -565,8 +601,6 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
}
bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
- if (!Subtarget->hasUnimplementedSIMD128())
- return false;
MVT ExtT = ExtVal.getSimpleValueType();
MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0);
return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
@@ -580,7 +614,11 @@ EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
if (VT.isVector())
return VT.changeVectorElementTypeToInteger();
- return TargetLowering::getSetCCResultType(DL, C, VT);
+ // So far, all branch instructions in Wasm take an I32 condition.
+ // The default TargetLowering::getSetCCResultType returns the pointer size,
+ // which would be useful to reduce instruction counts when testing
+ // against 64-bit pointers/values if at some point Wasm supports that.
+ return EVT::getIntegerVT(C, 32);
}
bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
@@ -648,7 +686,8 @@ static bool callingConvSupported(CallingConv::ID CallConv) {
CallConv == CallingConv::PreserveMost ||
CallConv == CallingConv::PreserveAll ||
CallConv == CallingConv::CXX_FAST_TLS ||
- CallConv == CallingConv::WASM_EmscriptenInvoke;
+ CallConv == CallingConv::WASM_EmscriptenInvoke ||
+ CallConv == CallingConv::Swift;
}
SDValue
@@ -670,41 +709,57 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
fail(DL, DAG, "WebAssembly doesn't support patch point yet");
if (CLI.IsTailCall) {
- bool MustTail = CLI.CS && CLI.CS.isMustTailCall();
- if (Subtarget->hasTailCall() && !CLI.IsVarArg) {
- // Do not tail call unless caller and callee return types match
- const Function &F = MF.getFunction();
- const TargetMachine &TM = getTargetMachine();
- Type *RetTy = F.getReturnType();
- SmallVector<MVT, 4> CallerRetTys;
- SmallVector<MVT, 4> CalleeRetTys;
- computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
- computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
- bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
- std::equal(CallerRetTys.begin(), CallerRetTys.end(),
- CalleeRetTys.begin());
- if (!TypesMatch) {
- // musttail in this case would be an LLVM IR validation failure
- assert(!MustTail);
- CLI.IsTailCall = false;
- }
- } else {
+ auto NoTail = [&](const char *Msg) {
+ if (CLI.CB && CLI.CB->isMustTailCall())
+ fail(DL, DAG, Msg);
CLI.IsTailCall = false;
- if (MustTail) {
- if (CLI.IsVarArg) {
- // The return would pop the argument buffer
- fail(DL, DAG, "WebAssembly does not support varargs tail calls");
- } else {
- fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled");
+ };
+
+ if (!Subtarget->hasTailCall())
+ NoTail("WebAssembly 'tail-call' feature not enabled");
+
+ // Varargs calls cannot be tail calls because the buffer is on the stack
+ if (CLI.IsVarArg)
+ NoTail("WebAssembly does not support varargs tail calls");
+
+ // Do not tail call unless caller and callee return types match
+ const Function &F = MF.getFunction();
+ const TargetMachine &TM = getTargetMachine();
+ Type *RetTy = F.getReturnType();
+ SmallVector<MVT, 4> CallerRetTys;
+ SmallVector<MVT, 4> CalleeRetTys;
+ computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
+ computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
+ bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
+ std::equal(CallerRetTys.begin(), CallerRetTys.end(),
+ CalleeRetTys.begin());
+ if (!TypesMatch)
+ NoTail("WebAssembly tail call requires caller and callee return types to "
+ "match");
+
+ // If pointers to local stack values are passed, we cannot tail call
+ if (CLI.CB) {
+ for (auto &Arg : CLI.CB->args()) {
+ Value *Val = Arg.get();
+ // Trace the value back through pointer operations
+ while (true) {
+ Value *Src = Val->stripPointerCastsAndAliases();
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
+ Src = GEP->getPointerOperand();
+ if (Val == Src)
+ break;
+ Val = Src;
+ }
+ if (isa<AllocaInst>(Val)) {
+ NoTail(
+ "WebAssembly does not support tail calling with stack arguments");
+ break;
}
}
}
}
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
- if (Ins.size() > 1)
- fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet");
-
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
@@ -717,10 +772,14 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
std::swap(OutVals[0], OutVals[1]);
}
+ bool HasSwiftSelfArg = false;
+ bool HasSwiftErrorArg = false;
unsigned NumFixedArgs = 0;
for (unsigned I = 0; I < Outs.size(); ++I) {
const ISD::OutputArg &Out = Outs[I];
SDValue &OutVal = OutVals[I];
+ HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
+ HasSwiftErrorArg |= Out.Flags.isSwiftError();
if (Out.Flags.isNest())
fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
if (Out.Flags.isInAlloca())
@@ -732,13 +791,13 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
auto &MFI = MF.getFrameInfo();
int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
- Out.Flags.getByValAlign(),
+ Out.Flags.getNonZeroByValAlign(),
/*isSS=*/false);
SDValue SizeNode =
DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
Chain = DAG.getMemcpy(
- Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(),
+ Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(),
/*isVolatile*/ false, /*AlwaysInline=*/false,
/*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
OutVal = FINode;
@@ -750,6 +809,29 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
bool IsVarArg = CLI.IsVarArg;
auto PtrVT = getPointerTy(Layout);
+ // For swiftcc, emit additional swiftself and swifterror arguments
+ // if there aren't. These additional arguments are also added for callee
+ // signature They are necessary to match callee and caller signature for
+ // indirect call.
+ if (CallConv == CallingConv::Swift) {
+ if (!HasSwiftSelfArg) {
+ NumFixedArgs++;
+ ISD::OutputArg Arg;
+ Arg.Flags.setSwiftSelf();
+ CLI.Outs.push_back(Arg);
+ SDValue ArgVal = DAG.getUNDEF(PtrVT);
+ CLI.OutVals.push_back(ArgVal);
+ }
+ if (!HasSwiftErrorArg) {
+ NumFixedArgs++;
+ ISD::OutputArg Arg;
+ Arg.Flags.setSwiftError();
+ CLI.Outs.push_back(Arg);
+ SDValue ArgVal = DAG.getUNDEF(PtrVT);
+ CLI.OutVals.push_back(ArgVal);
+ }
+ }
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
@@ -763,10 +845,10 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
EVT VT = Arg.getValueType();
assert(VT != MVT::iPTR && "Legalized args should be concrete");
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
- unsigned Align = std::max(Out.Flags.getOrigAlign(),
- Layout.getABITypeAlignment(Ty));
- unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
- Align);
+ Align Alignment =
+ std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
+ unsigned Offset =
+ CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
Offset, VT.getSimpleVT(),
CCValAssign::Full));
@@ -838,7 +920,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (In.Flags.isInConsecutiveRegsLast())
fail(DL, DAG,
"WebAssembly hasn't implemented cons regs last return values");
- // Ignore In.getOrigAlign() because all our arguments are passed in
+ // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
// registers.
InTys.push_back(In.VT);
}
@@ -851,17 +933,13 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
InTys.push_back(MVT::Other);
SDVTList InTyList = DAG.getVTList(InTys);
- SDValue Res =
- DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1,
- DL, InTyList, Ops);
- if (Ins.empty()) {
- Chain = Res;
- } else {
- InVals.push_back(Res);
- Chain = Res.getValue(1);
- }
+ SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
- return Chain;
+ for (size_t I = 0; I < Ins.size(); ++I)
+ InVals.push_back(Res.getValue(I));
+
+ // Return the chain
+ return Res.getValue(Ins.size());
}
bool WebAssemblyTargetLowering::CanLowerReturn(
@@ -916,7 +994,11 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
// of the incoming values before they're represented by virtual registers.
MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
+ bool HasSwiftErrorArg = false;
+ bool HasSwiftSelfArg = false;
for (const ISD::InputArg &In : Ins) {
+ HasSwiftSelfArg |= In.Flags.isSwiftSelf();
+ HasSwiftErrorArg |= In.Flags.isSwiftError();
if (In.Flags.isInAlloca())
fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
if (In.Flags.isNest())
@@ -925,7 +1007,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
if (In.Flags.isInConsecutiveRegsLast())
fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
- // Ignore In.getOrigAlign() because all our arguments are passed in
+ // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
// registers.
InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
DAG.getTargetConstant(InVals.size(),
@@ -936,6 +1018,19 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
MFI->addParam(In.VT);
}
+ // For swiftcc, emit additional swiftself and swifterror arguments
+ // if there aren't. These additional arguments are also added for callee
+ // signature They are necessary to match callee and caller signature for
+ // indirect call.
+ auto PtrVT = getPointerTy(MF.getDataLayout());
+ if (CallConv == CallingConv::Swift) {
+ if (!HasSwiftSelfArg) {
+ MFI->addParam(PtrVT);
+ }
+ if (!HasSwiftErrorArg) {
+ MFI->addParam(PtrVT);
+ }
+ }
// Varargs are copied into a buffer allocated by the caller, and a pointer to
// the buffer is passed as an argument.
if (IsVarArg) {
@@ -953,8 +1048,8 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
// Record the number and types of arguments and results.
SmallVector<MVT, 4> Params;
SmallVector<MVT, 4> Results;
- computeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
- DAG.getTarget(), Params, Results);
+ computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(),
+ MF.getFunction(), DAG.getTarget(), Params, Results);
for (MVT VT : Results)
MFI->addResult(VT);
// TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
@@ -1190,11 +1285,10 @@ SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
for (auto MBB : MBBs)
Ops.push_back(DAG.getBasicBlock(MBB));
- // TODO: For now, we just pick something arbitrary for a default case for now.
- // We really want to sniff out the guard and put in the real default case (and
- // delete the guard).
- Ops.push_back(DAG.getBasicBlock(MBBs[0]));
-
+ // Add the first MBB as a dummy default target for now. This will be replaced
+ // with the proper default target (and the preceding range check eliminated)
+ // if possible by WebAssemblyFixBrTableDefaults.
+ Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
}
@@ -1262,6 +1356,24 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
Op.getOperand(3) // thrown value
});
}
+
+ case Intrinsic::wasm_shuffle: {
+ // Drop in-chain and replace undefs, but otherwise pass through unchanged
+ SDValue Ops[18];
+ size_t OpIdx = 0;
+ Ops[OpIdx++] = Op.getOperand(1);
+ Ops[OpIdx++] = Op.getOperand(2);
+ while (OpIdx < 18) {
+ const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
+ if (MaskIdx.isUndef() ||
+ cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) {
+ Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32);
+ } else {
+ Ops[OpIdx++] = MaskIdx;
+ }
+ }
+ return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
+ }
}
}
@@ -1270,39 +1382,42 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
// If sign extension operations are disabled, allow sext_inreg only if operand
- // is a vector extract. SIMD does not depend on sign extension operations, but
- // allowing sext_inreg in this context lets us have simple patterns to select
- // extract_lane_s instructions. Expanding sext_inreg everywhere would be
- // simpler in this file, but would necessitate large and brittle patterns to
- // undo the expansion and select extract_lane_s instructions.
+ // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
+ // extension operations, but allowing sext_inreg in this context lets us have
+ // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
+ // everywhere would be simpler in this file, but would necessitate large and
+ // brittle patterns to undo the expansion and select extract_lane_s
+ // instructions.
assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
- if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
- const SDValue &Extract = Op.getOperand(0);
- MVT VecT = Extract.getOperand(0).getSimpleValueType();
- MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
- ->getVT()
- .getSimpleVT();
- MVT ExtractedVecT =
- MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
- if (ExtractedVecT == VecT)
- return Op;
- // Bitcast vector to appropriate type to ensure ISel pattern coverage
- const SDValue &Index = Extract.getOperand(1);
- unsigned IndexVal =
- static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
- unsigned Scale =
- ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
- assert(Scale > 1);
- SDValue NewIndex =
- DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
- SDValue NewExtract = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
- DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(),
- NewExtract, Op.getOperand(1));
- }
- // Otherwise expand
- return SDValue();
+ if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ const SDValue &Extract = Op.getOperand(0);
+ MVT VecT = Extract.getOperand(0).getSimpleValueType();
+ if (VecT.getVectorElementType().getSizeInBits() > 32)
+ return SDValue();
+ MVT ExtractedLaneT =
+ cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
+ MVT ExtractedVecT =
+ MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
+ if (ExtractedVecT == VecT)
+ return Op;
+
+ // Bitcast vector to appropriate type to ensure ISel pattern coverage
+ const SDNode *Index = Extract.getOperand(1).getNode();
+ if (!isa<ConstantSDNode>(Index))
+ return SDValue();
+ unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue();
+ unsigned Scale =
+ ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
+ assert(Scale > 1);
+ SDValue NewIndex =
+ DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
+ SDValue NewExtract = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
+ DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
+ Op.getOperand(1));
}
SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
@@ -1311,7 +1426,7 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
const EVT VecT = Op.getValueType();
const EVT LaneT = Op.getOperand(0).getValueType();
const size_t Lanes = Op.getNumOperands();
- bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8;
+ bool CanSwizzle = VecT == MVT::v16i8;
// BUILD_VECTORs are lowered to the instruction that initializes the highest
// possible number of lanes at once followed by a sequence of replace_lane
@@ -1410,38 +1525,37 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// original instruction
std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
SDValue Result;
- if (Subtarget->hasUnimplementedSIMD128()) {
- // Prefer swizzles over vector consts over splats
- if (NumSwizzleLanes >= NumSplatLanes &&
- NumSwizzleLanes >= NumConstantLanes) {
- Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
- SwizzleIndices);
- auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
- IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
- return Swizzled == GetSwizzleSrcs(I, Lane);
- };
- } else if (NumConstantLanes >= NumSplatLanes) {
- SmallVector<SDValue, 16> ConstLanes;
- for (const SDValue &Lane : Op->op_values()) {
- if (IsConstant(Lane)) {
- ConstLanes.push_back(Lane);
- } else if (LaneT.isFloatingPoint()) {
- ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
- } else {
- ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
- }
+ // Prefer swizzles over vector consts over splats
+ if (NumSwizzleLanes >= NumSplatLanes &&
+ (!Subtarget->hasUnimplementedSIMD128() ||
+ NumSwizzleLanes >= NumConstantLanes)) {
+ Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
+ SwizzleIndices);
+ auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
+ IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
+ return Swizzled == GetSwizzleSrcs(I, Lane);
+ };
+ } else if (NumConstantLanes >= NumSplatLanes &&
+ Subtarget->hasUnimplementedSIMD128()) {
+ SmallVector<SDValue, 16> ConstLanes;
+ for (const SDValue &Lane : Op->op_values()) {
+ if (IsConstant(Lane)) {
+ ConstLanes.push_back(Lane);
+ } else if (LaneT.isFloatingPoint()) {
+ ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
+ } else {
+ ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
}
- Result = DAG.getBuildVector(VecT, DL, ConstLanes);
- IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
- return IsConstant(Lane);
- };
}
+ Result = DAG.getBuildVector(VecT, DL, ConstLanes);
+ IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
+ return IsConstant(Lane);
+ };
}
if (!Result) {
// Use a splat, but possibly a load_splat
LoadSDNode *SplattedLoad;
- if (Subtarget->hasUnimplementedSIMD128() &&
- (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
+ if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
Result = DAG.getMemIntrinsicNode(
WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
@@ -1502,7 +1616,6 @@ SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
// expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes
// (which return i64x2 results) as well. So instead we manually unroll i64x2
// comparisons here.
- assert(Subtarget->hasUnimplementedSIMD128());
assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
SmallVector<SDValue, 2> LHS, RHS;
DAG.ExtractVectorElements(Op->getOperand(0), LHS);
@@ -1536,22 +1649,25 @@ static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
return DAG.UnrollVectorOp(Op.getNode());
// Otherwise mask the shift value to get proper semantics from 32-bit shift
SDLoc DL(Op);
- SDValue ShiftVal = Op.getOperand(1);
- uint64_t MaskVal = LaneT.getSizeInBits() - 1;
- SDValue MaskedShiftVal = DAG.getNode(
- ISD::AND, // mask opcode
- DL, ShiftVal.getValueType(), // masked value type
- ShiftVal, // original shift value operand
- DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand
- );
-
- return DAG.UnrollVectorOp(
- DAG.getNode(Op.getOpcode(), // original shift opcode
- DL, Op.getValueType(), // original return type
- Op.getOperand(0), // original vector operand,
- MaskedShiftVal // new masked shift value operand
- )
- .getNode());
+ size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
+ SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
+ unsigned ShiftOpcode = Op.getOpcode();
+ SmallVector<SDValue, 16> ShiftedElements;
+ DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
+ SmallVector<SDValue, 16> ShiftElements;
+ DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
+ SmallVector<SDValue, 16> UnrolledOps;
+ for (size_t i = 0; i < NumLanes; ++i) {
+ SDValue MaskedShiftValue =
+ DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
+ SDValue ShiftedValue = ShiftedElements[i];
+ if (ShiftOpcode == ISD::SRA)
+ ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
+ ShiftedValue, DAG.getValueType(LaneT));
+ UnrolledOps.push_back(
+ DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
+ }
+ return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
}
SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
@@ -1561,19 +1677,13 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
// Only manually lower vector shifts
assert(Op.getSimpleValueType().isVector());
- // Unroll non-splat vector shifts
- BuildVectorSDNode *ShiftVec;
- SDValue SplatVal;
- if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
- !(SplatVal = ShiftVec->getSplatValue()))
+ auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
+ if (!ShiftVal)
return unrollVectorShift(Op, DAG);
- // All splats except i64x2 const splats are handled by patterns
- auto *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
- if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64)
- return Op;
+ // Use anyext because none of the high bits can affect the shift
+ ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
- // i64x2 const splats are custom lowered to avoid unnecessary wraps
unsigned Opcode;
switch (Op.getOpcode()) {
case ISD::SHL:
@@ -1588,11 +1698,45 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
default:
llvm_unreachable("unexpected opcode");
}
- APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32);
- return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
- DAG.getConstant(Shift, DL, MVT::i32));
+
+ return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
}
//===----------------------------------------------------------------------===//
-// WebAssembly Optimization Hooks
+// Custom DAG combine hooks
//===----------------------------------------------------------------------===//
+static SDValue
+performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ auto &DAG = DCI.DAG;
+ auto Shuffle = cast<ShuffleVectorSDNode>(N);
+
+ // Hoist vector bitcasts that don't change the number of lanes out of unary
+ // shuffles, where they are less likely to get in the way of other combines.
+ // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
+ // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
+ SDValue Bitcast = N->getOperand(0);
+ if (Bitcast.getOpcode() != ISD::BITCAST)
+ return SDValue();
+ if (!N->getOperand(1).isUndef())
+ return SDValue();
+ SDValue CastOp = Bitcast.getOperand(0);
+ MVT SrcType = CastOp.getSimpleValueType();
+ MVT DstType = Bitcast.getSimpleValueType();
+ if (!SrcType.is128BitVector() ||
+ SrcType.getVectorNumElements() != DstType.getVectorNumElements())
+ return SDValue();
+ SDValue NewShuffle = DAG.getVectorShuffle(
+ SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
+ return DAG.getBitcast(DstType, NewShuffle);
+}
+
+SDValue
+WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default:
+ return SDValue();
+ case ISD::VECTOR_SHUFFLE:
+ return performVECTOR_SHUFFLECombine(N, DCI);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 58e088a0ba50..b8e612377529 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -39,7 +39,6 @@ enum NodeType : unsigned {
} // end namespace WebAssemblyISD
class WebAssemblySubtarget;
-class WebAssemblyTargetMachine;
class WebAssemblyTargetLowering final : public TargetLowering {
public:
@@ -119,6 +118,11 @@ private:
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
+
+ // Custom DAG combine hooks
+ SDValue
+ PerformDAGCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) const override;
};
namespace WebAssembly {
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index a9a99d38f9f1..256b77e33db9 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -13,10 +13,11 @@
let UseNamedOperandTable = 1 in
multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
- list<dag> pattern_r, string asmstr_r = "",
- string asmstr_s = "", bits<32> atomic_op = -1> {
+ list<dag> pattern_r, string asmstr_r,
+ string asmstr_s, bits<32> atomic_op,
+ string is64 = "false"> {
defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
- !or(0xfe00, !and(0xff, atomic_op))>,
+ !or(0xfe00, !and(0xff, atomic_op)), is64>,
Requires<[HasAtomics]>;
}
@@ -32,85 +33,166 @@ multiclass ATOMIC_NRI<dag oops, dag iops, list<dag> pattern, string asmstr = "",
//===----------------------------------------------------------------------===//
let hasSideEffects = 1 in {
-defm ATOMIC_NOTIFY :
+defm ATOMIC_NOTIFY_A32 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
"atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
- "atomic.notify \t${off}${p2align}", 0x00>;
+ "atomic.notify \t${off}${p2align}", 0x00, "false">;
+defm ATOMIC_NOTIFY_A64 :
+ ATOMIC_I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$count),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
+ "atomic.notify \t${off}${p2align}", 0x00, "true">;
let mayLoad = 1 in {
-defm ATOMIC_WAIT_I32 :
+defm ATOMIC_WAIT_I32_A32 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp,
I64:$timeout),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
"i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "i32.atomic.wait \t${off}${p2align}", 0x01>;
-defm ATOMIC_WAIT_I64 :
+ "i32.atomic.wait \t${off}${p2align}", 0x01, "false">;
+defm ATOMIC_WAIT_I32_A64 :
+ ATOMIC_I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$exp,
+ I64:$timeout),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "i32.atomic.wait \t${off}${p2align}", 0x01, "true">;
+defm ATOMIC_WAIT_I64_A32 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp,
I64:$timeout),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
"i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "i64.atomic.wait \t${off}${p2align}", 0x02>;
+ "i64.atomic.wait \t${off}${p2align}", 0x02, "false">;
+defm ATOMIC_WAIT_I64_A64 :
+ ATOMIC_I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, I64:$addr, I64:$exp,
+ I64:$timeout),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "i64.atomic.wait \t${off}${p2align}", 0x02, "true">;
} // mayLoad = 1
} // hasSideEffects = 1
let Predicates = [HasAtomics] in {
// Select notifys with no constant offset.
-def NotifyPatNoOffset :
+def NotifyPatNoOffset_A32 :
Pat<(i32 (int_wasm_atomic_notify I32:$addr, I32:$count)),
- (ATOMIC_NOTIFY 0, 0, I32:$addr, I32:$count)>;
+ (ATOMIC_NOTIFY_A32 0, 0, I32:$addr, I32:$count)>,
+ Requires<[HasAddr32]>;
+def NotifyPatNoOffset_A64 :
+ Pat<(i32 (int_wasm_atomic_notify I64:$addr, I32:$count)),
+ (ATOMIC_NOTIFY_A64 0, 0, I64:$addr, I32:$count)>,
+ Requires<[HasAddr64]>;
// Select notifys with a constant offset.
// Pattern with address + immediate offset
-class NotifyPatImmOff<PatFrag operand> :
- Pat<(i32 (int_wasm_atomic_notify (operand I32:$addr, imm:$off), I32:$count)),
- (ATOMIC_NOTIFY 0, imm:$off, I32:$addr, I32:$count)>;
-def : NotifyPatImmOff<regPlusImm>;
-def : NotifyPatImmOff<or_is_add>;
+multiclass NotifyPatImmOff<PatFrag operand, string inst> {
+ def : Pat<(i32 (int_wasm_atomic_notify (operand I32:$addr, imm:$off),
+ I32:$count)),
+ (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, I32:$count)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(i32 (int_wasm_atomic_notify (operand I64:$addr, imm:$off),
+ I32:$count)),
+ (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, I32:$count)>,
+ Requires<[HasAddr64]>;
+}
+defm : NotifyPatImmOff<regPlusImm, "ATOMIC_NOTIFY">;
+defm : NotifyPatImmOff<or_is_add, "ATOMIC_NOTIFY">;
// Select notifys with just a constant offset.
-def NotifyPatOffsetOnly :
+def NotifyPatOffsetOnly_A32 :
+ Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)),
+ (ATOMIC_NOTIFY_A32 0, imm:$off, (CONST_I32 0), I32:$count)>,
+ Requires<[HasAddr32]>;
+def NotifyPatOffsetOnly_A64 :
Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)),
- (ATOMIC_NOTIFY 0, imm:$off, (CONST_I32 0), I32:$count)>;
+ (ATOMIC_NOTIFY_A64 0, imm:$off, (CONST_I64 0), I32:$count)>,
+ Requires<[HasAddr64]>;
-def NotifyPatGlobalAddrOffOnly :
+def NotifyPatGlobalAddrOffOnly_A32 :
Pat<(i32 (int_wasm_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
I32:$count)),
- (ATOMIC_NOTIFY 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>;
+ (ATOMIC_NOTIFY_A32 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>,
+ Requires<[HasAddr32]>;
+def NotifyPatGlobalAddrOffOnly_A64 :
+ Pat<(i32 (int_wasm_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
+ I32:$count)),
+ (ATOMIC_NOTIFY_A64 0, tglobaladdr:$off, (CONST_I64 0), I32:$count)>,
+ Requires<[HasAddr64]>;
// Select waits with no constant offset.
-class WaitPatNoOffset<ValueType ty, Intrinsic kind, NI inst> :
- Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)),
- (inst 0, 0, I32:$addr, ty:$exp, I64:$timeout)>;
-def : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+multiclass WaitPatNoOffset<ValueType ty, Intrinsic kind,
+ string inst> {
+ def : Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)),
+ (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$exp, I64:$timeout)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(i32 (kind I64:$addr, ty:$exp, I64:$timeout)),
+ (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$exp, I64:$timeout)>,
+ Requires<[HasAddr64]>;
+}
+defm : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">;
+defm : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">;
+defm : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">;
+defm : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">;
// Select waits with a constant offset.
// Pattern with address + immediate offset
-class WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, NI inst> :
- Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)),
- (inst 0, imm:$off, I32:$addr, ty:$exp, I64:$timeout)>;
-def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, ATOMIC_WAIT_I32>;
-def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, ATOMIC_WAIT_I32>;
-def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, ATOMIC_WAIT_I64>;
-def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, ATOMIC_WAIT_I64>;
-
-// Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset.
-class WaitPatOffsetOnly<ValueType ty, Intrinsic kind, NI inst> :
- Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
- (inst 0, imm:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
-def : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
-class WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, NI inst> :
- Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)),
- (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
-def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+multiclass WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand,
+ string inst> {
+ def : Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)),
+ (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$exp,
+ I64:$timeout)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(i32 (kind (operand I64:$addr, imm:$off), ty:$exp, I64:$timeout)),
+ (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$exp,
+ I64:$timeout)>,
+ Requires<[HasAddr64]>;
+}
+defm : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm,
+ "ATOMIC_WAIT_I32">;
+defm : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add,
+ "ATOMIC_WAIT_I32">;
+defm : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm,
+ "ATOMIC_WAIT_I64">;
+defm : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add,
+ "ATOMIC_WAIT_I64">;
+
+// Select wait_i32, "ATOMIC_WAIT_I32s with just a constant offset.
+multiclass WaitPatOffsetOnly<ValueType ty, Intrinsic kind, string inst> {
+ def : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
+ (!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$exp,
+ I64:$timeout)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
+ (!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$exp,
+ I64:$timeout)>,
+ Requires<[HasAddr64]>;
+}
+defm : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, "ATOMIC_WAIT_I32">;
+defm : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, "ATOMIC_WAIT_I64">;
+
+multiclass WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, string inst> {
+ def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp,
+ I64:$timeout)),
+ (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp,
+ I64:$timeout)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp,
+ I64:$timeout)),
+ (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp,
+ I64:$timeout)>,
+ Requires<[HasAddr64]>;
+}
+defm : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32,
+ "ATOMIC_WAIT_I32">;
+defm : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64,
+ "ATOMIC_WAIT_I64">;
} // Predicates = [HasAtomics]
//===----------------------------------------------------------------------===//
@@ -131,8 +213,8 @@ defm ATOMIC_FENCE : ATOMIC_NRI<(outs), (ins i8imm:$flags), [], "atomic.fence",
//===----------------------------------------------------------------------===//
multiclass AtomicLoad<WebAssemblyRegClass rc, string name, int atomic_op> {
- defm "" : WebAssemblyLoad<rc, name, !or(0xfe00, !and(0xff, atomic_op))>,
- Requires<[HasAtomics]>;
+ defm "" : WebAssemblyLoad<rc, name, !or(0xfe00, !and(0xff, atomic_op)),
+ [HasAtomics]>;
}
defm ATOMIC_LOAD_I32 : AtomicLoad<I32, "i32.atomic.load", 0x10>;
@@ -140,23 +222,23 @@ defm ATOMIC_LOAD_I64 : AtomicLoad<I64, "i64.atomic.load", 0x11>;
// Select loads with no constant offset.
let Predicates = [HasAtomics] in {
-def : LoadPatNoOffset<i32, atomic_load_32, ATOMIC_LOAD_I32>;
-def : LoadPatNoOffset<i64, atomic_load_64, ATOMIC_LOAD_I64>;
+defm : LoadPatNoOffset<i32, atomic_load_32, "ATOMIC_LOAD_I32">;
+defm : LoadPatNoOffset<i64, atomic_load_64, "ATOMIC_LOAD_I64">;
// Select loads with a constant offset.
// Pattern with address + immediate offset
-def : LoadPatImmOff<i32, atomic_load_32, regPlusImm, ATOMIC_LOAD_I32>;
-def : LoadPatImmOff<i64, atomic_load_64, regPlusImm, ATOMIC_LOAD_I64>;
-def : LoadPatImmOff<i32, atomic_load_32, or_is_add, ATOMIC_LOAD_I32>;
-def : LoadPatImmOff<i64, atomic_load_64, or_is_add, ATOMIC_LOAD_I64>;
+defm : LoadPatImmOff<i32, atomic_load_32, regPlusImm, "ATOMIC_LOAD_I32">;
+defm : LoadPatImmOff<i64, atomic_load_64, regPlusImm, "ATOMIC_LOAD_I64">;
+defm : LoadPatImmOff<i32, atomic_load_32, or_is_add, "ATOMIC_LOAD_I32">;
+defm : LoadPatImmOff<i64, atomic_load_64, or_is_add, "ATOMIC_LOAD_I64">;
// Select loads with just a constant offset.
-def : LoadPatOffsetOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
-def : LoadPatOffsetOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
+defm : LoadPatOffsetOnly<i32, atomic_load_32, "ATOMIC_LOAD_I32">;
+defm : LoadPatOffsetOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">;
-def : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
-def : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
+defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, "ATOMIC_LOAD_I32">;
+defm : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">;
} // Predicates = [HasAtomics]
@@ -205,62 +287,62 @@ def sext_aload_16_64 :
let Predicates = [HasAtomics] in {
// Select zero-extending loads with no constant offset.
-def : LoadPatNoOffset<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>;
-def : LoadPatNoOffset<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>;
-def : LoadPatNoOffset<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatNoOffset<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-def : LoadPatNoOffset<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
+defm : LoadPatNoOffset<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">;
+defm : LoadPatNoOffset<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">;
+defm : LoadPatNoOffset<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPatNoOffset<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
+defm : LoadPatNoOffset<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
// Select sign-extending loads with no constant offset
-def : LoadPatNoOffset<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
-def : LoadPatNoOffset<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
-def : LoadPatNoOffset<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatNoOffset<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+defm : LoadPatNoOffset<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">;
+defm : LoadPatNoOffset<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">;
+defm : LoadPatNoOffset<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPatNoOffset<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
// 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s
// Zero-extending loads with constant offset
-def : LoadPatImmOff<i32, zext_aload_8_32, regPlusImm, ATOMIC_LOAD8_U_I32>;
-def : LoadPatImmOff<i32, zext_aload_16_32, regPlusImm, ATOMIC_LOAD16_U_I32>;
-def : LoadPatImmOff<i32, zext_aload_8_32, or_is_add, ATOMIC_LOAD8_U_I32>;
-def : LoadPatImmOff<i32, zext_aload_16_32, or_is_add, ATOMIC_LOAD16_U_I32>;
-def : LoadPatImmOff<i64, zext_aload_8_64, regPlusImm, ATOMIC_LOAD8_U_I64>;
-def : LoadPatImmOff<i64, zext_aload_16_64, regPlusImm, ATOMIC_LOAD16_U_I64>;
-def : LoadPatImmOff<i64, zext_aload_32_64, regPlusImm, ATOMIC_LOAD32_U_I64>;
-def : LoadPatImmOff<i64, zext_aload_8_64, or_is_add, ATOMIC_LOAD8_U_I64>;
-def : LoadPatImmOff<i64, zext_aload_16_64, or_is_add, ATOMIC_LOAD16_U_I64>;
-def : LoadPatImmOff<i64, zext_aload_32_64, or_is_add, ATOMIC_LOAD32_U_I64>;
+defm : LoadPatImmOff<i32, zext_aload_8_32, regPlusImm, "ATOMIC_LOAD8_U_I32">;
+defm : LoadPatImmOff<i32, zext_aload_16_32, regPlusImm, "ATOMIC_LOAD16_U_I32">;
+defm : LoadPatImmOff<i32, zext_aload_8_32, or_is_add, "ATOMIC_LOAD8_U_I32">;
+defm : LoadPatImmOff<i32, zext_aload_16_32, or_is_add, "ATOMIC_LOAD16_U_I32">;
+defm : LoadPatImmOff<i64, zext_aload_8_64, regPlusImm, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPatImmOff<i64, zext_aload_16_64, regPlusImm, "ATOMIC_LOAD16_U_I64">;
+defm : LoadPatImmOff<i64, zext_aload_32_64, regPlusImm, "ATOMIC_LOAD32_U_I64">;
+defm : LoadPatImmOff<i64, zext_aload_8_64, or_is_add, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPatImmOff<i64, zext_aload_16_64, or_is_add, "ATOMIC_LOAD16_U_I64">;
+defm : LoadPatImmOff<i64, zext_aload_32_64, or_is_add, "ATOMIC_LOAD32_U_I64">;
// Sign-extending loads with constant offset
-def : LoadPatImmOff<i32, atomic_load_8, regPlusImm, ATOMIC_LOAD8_U_I32>;
-def : LoadPatImmOff<i32, atomic_load_16, regPlusImm, ATOMIC_LOAD16_U_I32>;
-def : LoadPatImmOff<i32, atomic_load_8, or_is_add, ATOMIC_LOAD8_U_I32>;
-def : LoadPatImmOff<i32, atomic_load_16, or_is_add, ATOMIC_LOAD16_U_I32>;
-def : LoadPatImmOff<i64, sext_aload_8_64, regPlusImm, ATOMIC_LOAD8_U_I64>;
-def : LoadPatImmOff<i64, sext_aload_16_64, regPlusImm, ATOMIC_LOAD16_U_I64>;
-def : LoadPatImmOff<i64, sext_aload_8_64, or_is_add, ATOMIC_LOAD8_U_I64>;
-def : LoadPatImmOff<i64, sext_aload_16_64, or_is_add, ATOMIC_LOAD16_U_I64>;
+defm : LoadPatImmOff<i32, atomic_load_8, regPlusImm, "ATOMIC_LOAD8_U_I32">;
+defm : LoadPatImmOff<i32, atomic_load_16, regPlusImm, "ATOMIC_LOAD16_U_I32">;
+defm : LoadPatImmOff<i32, atomic_load_8, or_is_add, "ATOMIC_LOAD8_U_I32">;
+defm : LoadPatImmOff<i32, atomic_load_16, or_is_add, "ATOMIC_LOAD16_U_I32">;
+defm : LoadPatImmOff<i64, sext_aload_8_64, regPlusImm, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPatImmOff<i64, sext_aload_16_64, regPlusImm, "ATOMIC_LOAD16_U_I64">;
+defm : LoadPatImmOff<i64, sext_aload_8_64, or_is_add, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPatImmOff<i64, sext_aload_16_64, or_is_add, "ATOMIC_LOAD16_U_I64">;
// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64
// Extending loads with just a constant offset
-def : LoadPatOffsetOnly<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>;
-def : LoadPatOffsetOnly<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>;
-def : LoadPatOffsetOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatOffsetOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-def : LoadPatOffsetOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
-def : LoadPatOffsetOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
-def : LoadPatOffsetOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
-def : LoadPatOffsetOnly<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatOffsetOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-
-def : LoadPatGlobalAddrOffOnly<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>;
-def : LoadPatGlobalAddrOffOnly<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>;
-def : LoadPatGlobalAddrOffOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatGlobalAddrOffOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-def : LoadPatGlobalAddrOffOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
-def : LoadPatGlobalAddrOffOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
-def : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
-def : LoadPatGlobalAddrOffOnly<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
+defm : LoadPatOffsetOnly<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">;
+defm : LoadPatOffsetOnly<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">;
+defm : LoadPatOffsetOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPatOffsetOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
+defm : LoadPatOffsetOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
+defm : LoadPatOffsetOnly<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">;
+defm : LoadPatOffsetOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">;
+defm : LoadPatOffsetOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPatOffsetOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
+
+defm : LoadPatGlobalAddrOffOnly<i32, zext_aload_8_32, "ATOMIC_LOAD8_U_I32">;
+defm : LoadPatGlobalAddrOffOnly<i32, zext_aload_16_32, "ATOMIC_LOAD16_U_I32">;
+defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
+defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
+defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">;
+defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">;
+defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
} // Predicates = [HasAtomics]
@@ -269,8 +351,8 @@ def : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
//===----------------------------------------------------------------------===//
multiclass AtomicStore<WebAssemblyRegClass rc, string name, int atomic_op> {
- defm "" : WebAssemblyStore<rc, name, !or(0xfe00, !and(0xff, atomic_op))>,
- Requires<[HasAtomics]>;
+ defm "" : WebAssemblyStore<rc, name, !or(0xfe00, !and(0xff, atomic_op)),
+ [HasAtomics]>;
}
defm ATOMIC_STORE_I32 : AtomicStore<I32, "i32.atomic.store", 0x17>;
@@ -284,33 +366,54 @@ defm ATOMIC_STORE_I64 : AtomicStore<I64, "i64.atomic.store", 0x18>;
let Predicates = [HasAtomics] in {
// Select stores with no constant offset.
-class AStorePatNoOffset<ValueType ty, PatFrag kind, NI inst> :
- Pat<(kind I32:$addr, ty:$val), (inst 0, 0, I32:$addr, ty:$val)>;
-def : AStorePatNoOffset<i32, atomic_store_32, ATOMIC_STORE_I32>;
-def : AStorePatNoOffset<i64, atomic_store_64, ATOMIC_STORE_I64>;
+multiclass AStorePatNoOffset<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(kind I32:$addr, ty:$val),
+ (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$val)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(kind I64:$addr, ty:$val),
+ (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$val)>,
+ Requires<[HasAddr64]>;
+}
+defm : AStorePatNoOffset<i32, atomic_store_32, "ATOMIC_STORE_I32">;
+defm : AStorePatNoOffset<i64, atomic_store_64, "ATOMIC_STORE_I64">;
// Select stores with a constant offset.
// Pattern with address + immediate offset
-class AStorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> :
- Pat<(kind (operand I32:$addr, imm:$off), ty:$val),
- (inst 0, imm:$off, I32:$addr, ty:$val)>;
-def : AStorePatImmOff<i32, atomic_store_32, regPlusImm, ATOMIC_STORE_I32>;
-def : AStorePatImmOff<i64, atomic_store_64, regPlusImm, ATOMIC_STORE_I64>;
-def : AStorePatImmOff<i32, atomic_store_32, or_is_add, ATOMIC_STORE_I32>;
-def : AStorePatImmOff<i64, atomic_store_64, or_is_add, ATOMIC_STORE_I64>;
+multiclass AStorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand,
+ string inst> {
+ def : Pat<(kind (operand I32:$addr, imm:$off), ty:$val),
+ (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$val)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(kind (operand I64:$addr, imm:$off), ty:$val),
+ (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$val)>,
+ Requires<[HasAddr64]>;
+}
+defm : AStorePatImmOff<i32, atomic_store_32, regPlusImm, "ATOMIC_STORE_I32">;
+defm : AStorePatImmOff<i64, atomic_store_64, regPlusImm, "ATOMIC_STORE_I64">;
// Select stores with just a constant offset.
-class AStorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(kind imm:$off, ty:$val), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>;
-def : AStorePatOffsetOnly<i32, atomic_store_32, ATOMIC_STORE_I32>;
-def : AStorePatOffsetOnly<i64, atomic_store_64, ATOMIC_STORE_I64>;
-
-class AStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val),
- (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>;
-def : AStorePatGlobalAddrOffOnly<i32, atomic_store_32, ATOMIC_STORE_I32>;
-def : AStorePatGlobalAddrOffOnly<i64, atomic_store_64, ATOMIC_STORE_I64>;
+multiclass AStorePatOffsetOnly<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(kind imm:$off, ty:$val),
+ (!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$val)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(kind imm:$off, ty:$val),
+ (!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$val)>,
+ Requires<[HasAddr64]>;
+}
+defm : AStorePatOffsetOnly<i32, atomic_store_32, "ATOMIC_STORE_I32">;
+defm : AStorePatOffsetOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">;
+
+multiclass AStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val),
+ (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val),
+ (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>,
+ Requires<[HasAddr64]>;
+}
+defm : AStorePatGlobalAddrOffOnly<i32, atomic_store_32, "ATOMIC_STORE_I32">;
+defm : AStorePatGlobalAddrOffOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">;
} // Predicates = [HasAtomics]
@@ -336,36 +439,40 @@ def trunc_astore_32_64 : trunc_astore_64<atomic_store_32>;
let Predicates = [HasAtomics] in {
// Truncating stores with no constant offset
-def : AStorePatNoOffset<i32, atomic_store_8, ATOMIC_STORE8_I32>;
-def : AStorePatNoOffset<i32, atomic_store_16, ATOMIC_STORE16_I32>;
-def : AStorePatNoOffset<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
-def : AStorePatNoOffset<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
-def : AStorePatNoOffset<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
+defm : AStorePatNoOffset<i32, atomic_store_8, "ATOMIC_STORE8_I32">;
+defm : AStorePatNoOffset<i32, atomic_store_16, "ATOMIC_STORE16_I32">;
+defm : AStorePatNoOffset<i64, trunc_astore_8_64, "ATOMIC_STORE8_I64">;
+defm : AStorePatNoOffset<i64, trunc_astore_16_64, "ATOMIC_STORE16_I64">;
+defm : AStorePatNoOffset<i64, trunc_astore_32_64, "ATOMIC_STORE32_I64">;
// Truncating stores with a constant offset
-def : AStorePatImmOff<i32, atomic_store_8, regPlusImm, ATOMIC_STORE8_I32>;
-def : AStorePatImmOff<i32, atomic_store_16, regPlusImm, ATOMIC_STORE16_I32>;
-def : AStorePatImmOff<i64, trunc_astore_8_64, regPlusImm, ATOMIC_STORE8_I64>;
-def : AStorePatImmOff<i64, trunc_astore_16_64, regPlusImm, ATOMIC_STORE16_I64>;
-def : AStorePatImmOff<i64, trunc_astore_32_64, regPlusImm, ATOMIC_STORE32_I64>;
-def : AStorePatImmOff<i32, atomic_store_8, or_is_add, ATOMIC_STORE8_I32>;
-def : AStorePatImmOff<i32, atomic_store_16, or_is_add, ATOMIC_STORE16_I32>;
-def : AStorePatImmOff<i64, trunc_astore_8_64, or_is_add, ATOMIC_STORE8_I64>;
-def : AStorePatImmOff<i64, trunc_astore_16_64, or_is_add, ATOMIC_STORE16_I64>;
-def : AStorePatImmOff<i64, trunc_astore_32_64, or_is_add, ATOMIC_STORE32_I64>;
+defm : AStorePatImmOff<i32, atomic_store_8, regPlusImm, "ATOMIC_STORE8_I32">;
+defm : AStorePatImmOff<i32, atomic_store_16, regPlusImm, "ATOMIC_STORE16_I32">;
+defm : AStorePatImmOff<i64, trunc_astore_8_64, regPlusImm, "ATOMIC_STORE8_I64">;
+defm : AStorePatImmOff<i64, trunc_astore_16_64, regPlusImm,
+ "ATOMIC_STORE16_I64">;
+defm : AStorePatImmOff<i64, trunc_astore_32_64, regPlusImm,
+ "ATOMIC_STORE32_I64">;
+defm : AStorePatImmOff<i32, atomic_store_8, or_is_add, "ATOMIC_STORE8_I32">;
+defm : AStorePatImmOff<i32, atomic_store_16, or_is_add, "ATOMIC_STORE16_I32">;
+defm : AStorePatImmOff<i64, trunc_astore_8_64, or_is_add, "ATOMIC_STORE8_I64">;
+defm : AStorePatImmOff<i64, trunc_astore_16_64, or_is_add,
+ "ATOMIC_STORE16_I64">;
+defm : AStorePatImmOff<i64, trunc_astore_32_64, or_is_add,
+ "ATOMIC_STORE32_I64">;
// Truncating stores with just a constant offset
-def : AStorePatOffsetOnly<i32, atomic_store_8, ATOMIC_STORE8_I32>;
-def : AStorePatOffsetOnly<i32, atomic_store_16, ATOMIC_STORE16_I32>;
-def : AStorePatOffsetOnly<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
-def : AStorePatOffsetOnly<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
-def : AStorePatOffsetOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
-
-def : AStorePatGlobalAddrOffOnly<i32, atomic_store_8, ATOMIC_STORE8_I32>;
-def : AStorePatGlobalAddrOffOnly<i32, atomic_store_16, ATOMIC_STORE16_I32>;
-def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
-def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
-def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
+defm : AStorePatOffsetOnly<i32, atomic_store_8, "ATOMIC_STORE8_I32">;
+defm : AStorePatOffsetOnly<i32, atomic_store_16, "ATOMIC_STORE16_I32">;
+defm : AStorePatOffsetOnly<i64, trunc_astore_8_64, "ATOMIC_STORE8_I64">;
+defm : AStorePatOffsetOnly<i64, trunc_astore_16_64, "ATOMIC_STORE16_I64">;
+defm : AStorePatOffsetOnly<i64, trunc_astore_32_64, "ATOMIC_STORE32_I64">;
+
+defm : AStorePatGlobalAddrOffOnly<i32, atomic_store_8, "ATOMIC_STORE8_I32">;
+defm : AStorePatGlobalAddrOffOnly<i32, atomic_store_16, "ATOMIC_STORE16_I32">;
+defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_8_64, "ATOMIC_STORE8_I64">;
+defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_16_64, "ATOMIC_STORE16_I64">;
+defm : AStorePatGlobalAddrOffOnly<i64, trunc_astore_32_64, "ATOMIC_STORE32_I64">;
} // Predicates = [HasAtomics]
@@ -375,12 +482,18 @@ def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string name,
int atomic_op> {
- defm "" :
+ defm "_A32" :
ATOMIC_I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
!strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $val"),
- !strconcat(name, "\t${off}${p2align}"), atomic_op>;
+ !strconcat(name, "\t${off}${p2align}"), atomic_op, "false">;
+ defm "_A64" :
+ ATOMIC_I<(outs rc:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, I64:$addr, rc:$val),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $val"),
+ !strconcat(name, "\t${off}${p2align}"), atomic_op, "true">;
}
defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0x1e>;
@@ -464,56 +577,78 @@ defm ATOMIC_RMW32_U_XCHG_I64 :
WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xchg_u", 0x47>;
// Select binary RMWs with no constant offset.
-class BinRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind I32:$addr, ty:$val)), (inst 0, 0, I32:$addr, ty:$val)>;
+multiclass BinRMWPatNoOffset<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(ty (kind I32:$addr, ty:$val)),
+ (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$val)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(ty (kind I64:$addr, ty:$val)),
+ (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$val)>,
+ Requires<[HasAddr64]>;
+}
// Select binary RMWs with a constant offset.
// Pattern with address + immediate offset
-class BinRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> :
- Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)),
- (inst 0, imm:$off, I32:$addr, ty:$val)>;
+multiclass BinRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand,
+ string inst> {
+ def : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)),
+ (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$val)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(ty (kind (operand I64:$addr, imm:$off), ty:$val)),
+ (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$val)>,
+ Requires<[HasAddr64]>;
+}
// Select binary RMWs with just a constant offset.
-class BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind imm:$off, ty:$val)),
- (inst 0, imm:$off, (CONST_I32 0), ty:$val)>;
+multiclass BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(ty (kind imm:$off, ty:$val)),
+ (!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$val)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(ty (kind imm:$off, ty:$val)),
+ (!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$val)>,
+ Requires<[HasAddr64]>;
+}
-class BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
- (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>;
+multiclass BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> {
+ def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
+ (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
+ (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>,
+ Requires<[HasAddr64]>;
+}
// Patterns for various addressing modes.
-multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
- NI inst_64> {
- def : BinRMWPatNoOffset<i32, rmw_32, inst_32>;
- def : BinRMWPatNoOffset<i64, rmw_64, inst_64>;
+multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, string inst_32,
+ string inst_64> {
+ defm : BinRMWPatNoOffset<i32, rmw_32, inst_32>;
+ defm : BinRMWPatNoOffset<i64, rmw_64, inst_64>;
- def : BinRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>;
- def : BinRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>;
- def : BinRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>;
- def : BinRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>;
+ defm : BinRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>;
+ defm : BinRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>;
+ defm : BinRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>;
+ defm : BinRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>;
- def : BinRMWPatOffsetOnly<i32, rmw_32, inst_32>;
- def : BinRMWPatOffsetOnly<i64, rmw_64, inst_64>;
+ defm : BinRMWPatOffsetOnly<i32, rmw_32, inst_32>;
+ defm : BinRMWPatOffsetOnly<i64, rmw_64, inst_64>;
- def : BinRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
- def : BinRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
+ defm : BinRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
+ defm : BinRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
}
let Predicates = [HasAtomics] in {
-defm : BinRMWPattern<atomic_load_add_32, atomic_load_add_64, ATOMIC_RMW_ADD_I32,
- ATOMIC_RMW_ADD_I64>;
-defm : BinRMWPattern<atomic_load_sub_32, atomic_load_sub_64, ATOMIC_RMW_SUB_I32,
- ATOMIC_RMW_SUB_I64>;
-defm : BinRMWPattern<atomic_load_and_32, atomic_load_and_64, ATOMIC_RMW_AND_I32,
- ATOMIC_RMW_AND_I64>;
-defm : BinRMWPattern<atomic_load_or_32, atomic_load_or_64, ATOMIC_RMW_OR_I32,
- ATOMIC_RMW_OR_I64>;
-defm : BinRMWPattern<atomic_load_xor_32, atomic_load_xor_64, ATOMIC_RMW_XOR_I32,
- ATOMIC_RMW_XOR_I64>;
-defm : BinRMWPattern<atomic_swap_32, atomic_swap_64, ATOMIC_RMW_XCHG_I32,
- ATOMIC_RMW_XCHG_I64>;
+defm : BinRMWPattern<atomic_load_add_32, atomic_load_add_64,
+ "ATOMIC_RMW_ADD_I32", "ATOMIC_RMW_ADD_I64">;
+defm : BinRMWPattern<atomic_load_sub_32, atomic_load_sub_64,
+ "ATOMIC_RMW_SUB_I32", "ATOMIC_RMW_SUB_I64">;
+defm : BinRMWPattern<atomic_load_and_32, atomic_load_and_64,
+ "ATOMIC_RMW_AND_I32", "ATOMIC_RMW_AND_I64">;
+defm : BinRMWPattern<atomic_load_or_32, atomic_load_or_64,
+ "ATOMIC_RMW_OR_I32", "ATOMIC_RMW_OR_I64">;
+defm : BinRMWPattern<atomic_load_xor_32, atomic_load_xor_64,
+ "ATOMIC_RMW_XOR_I32", "ATOMIC_RMW_XOR_I64">;
+defm : BinRMWPattern<atomic_swap_32, atomic_swap_64,
+ "ATOMIC_RMW_XCHG_I32", "ATOMIC_RMW_XCHG_I64">;
} // Predicates = [HasAtomics]
// Truncating & zero-extending binary RMW patterns.
@@ -556,87 +691,93 @@ multiclass BinRMWTruncExtPattern<
PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64,
NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> {
// Truncating-extending binary RMWs with no constant offset
- def : BinRMWPatNoOffset<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatNoOffset<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatNoOffset<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatNoOffset<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
- def : BinRMWPatNoOffset<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
+ defm : BinRMWPatNoOffset<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
+ defm : BinRMWPatNoOffset<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
+ defm : BinRMWPatNoOffset<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
+ defm : BinRMWPatNoOffset<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
+ defm : BinRMWPatNoOffset<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
- def : BinRMWPatNoOffset<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatNoOffset<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatNoOffset<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatNoOffset<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
+ defm : BinRMWPatNoOffset<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
+ defm : BinRMWPatNoOffset<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
+ defm : BinRMWPatNoOffset<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
+ defm : BinRMWPatNoOffset<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
// Truncating-extending binary RMWs with a constant offset
- def : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
- def : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
- def : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
- def : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
- def : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, regPlusImm, inst32_64>;
- def : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
- def : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
- def : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
- def : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
- def : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, or_is_add, inst32_64>;
-
- def : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
- def : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
- def : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
- def : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
- def : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
- def : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
- def : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
- def : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+ defm : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+ defm : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, regPlusImm,
+ inst16_32>;
+ defm : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+ defm : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, regPlusImm,
+ inst16_64>;
+ defm : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, regPlusImm,
+ inst32_64>;
+ defm : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+ defm : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+ defm : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+ defm : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+ defm : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, or_is_add, inst32_64>;
+
+ defm : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+ defm : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, regPlusImm,
+ inst16_32>;
+ defm : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+ defm : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, regPlusImm,
+ inst16_64>;
+ defm : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+ defm : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+ defm : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+ defm : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
// Truncating-extending binary RMWs with just a constant offset
- def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
- def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
-
- def : BinRMWPatOffsetOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatOffsetOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatOffsetOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatOffsetOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
-
- def : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
- def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
-
- def : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
+ defm : BinRMWPatOffsetOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
+ defm : BinRMWPatOffsetOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
+ defm : BinRMWPatOffsetOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
+ defm : BinRMWPatOffsetOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
+ defm : BinRMWPatOffsetOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
+
+ defm : BinRMWPatOffsetOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
+ defm : BinRMWPatOffsetOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
+ defm : BinRMWPatOffsetOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
+ defm : BinRMWPatOffsetOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
+
+ defm : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
+ defm : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
+ defm : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
+ defm : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
+ defm : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
+
+ defm : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
+ defm : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
+ defm : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
+ defm : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
}
let Predicates = [HasAtomics] in {
defm : BinRMWTruncExtPattern<
atomic_load_add_8, atomic_load_add_16, atomic_load_add_32, atomic_load_add_64,
- ATOMIC_RMW8_U_ADD_I32, ATOMIC_RMW16_U_ADD_I32,
- ATOMIC_RMW8_U_ADD_I64, ATOMIC_RMW16_U_ADD_I64, ATOMIC_RMW32_U_ADD_I64>;
+ "ATOMIC_RMW8_U_ADD_I32", "ATOMIC_RMW16_U_ADD_I32",
+ "ATOMIC_RMW8_U_ADD_I64", "ATOMIC_RMW16_U_ADD_I64", "ATOMIC_RMW32_U_ADD_I64">;
defm : BinRMWTruncExtPattern<
atomic_load_sub_8, atomic_load_sub_16, atomic_load_sub_32, atomic_load_sub_64,
- ATOMIC_RMW8_U_SUB_I32, ATOMIC_RMW16_U_SUB_I32,
- ATOMIC_RMW8_U_SUB_I64, ATOMIC_RMW16_U_SUB_I64, ATOMIC_RMW32_U_SUB_I64>;
+ "ATOMIC_RMW8_U_SUB_I32", "ATOMIC_RMW16_U_SUB_I32",
+ "ATOMIC_RMW8_U_SUB_I64", "ATOMIC_RMW16_U_SUB_I64", "ATOMIC_RMW32_U_SUB_I64">;
defm : BinRMWTruncExtPattern<
atomic_load_and_8, atomic_load_and_16, atomic_load_and_32, atomic_load_and_64,
- ATOMIC_RMW8_U_AND_I32, ATOMIC_RMW16_U_AND_I32,
- ATOMIC_RMW8_U_AND_I64, ATOMIC_RMW16_U_AND_I64, ATOMIC_RMW32_U_AND_I64>;
+ "ATOMIC_RMW8_U_AND_I32", "ATOMIC_RMW16_U_AND_I32",
+ "ATOMIC_RMW8_U_AND_I64", "ATOMIC_RMW16_U_AND_I64", "ATOMIC_RMW32_U_AND_I64">;
defm : BinRMWTruncExtPattern<
atomic_load_or_8, atomic_load_or_16, atomic_load_or_32, atomic_load_or_64,
- ATOMIC_RMW8_U_OR_I32, ATOMIC_RMW16_U_OR_I32,
- ATOMIC_RMW8_U_OR_I64, ATOMIC_RMW16_U_OR_I64, ATOMIC_RMW32_U_OR_I64>;
+ "ATOMIC_RMW8_U_OR_I32", "ATOMIC_RMW16_U_OR_I32",
+ "ATOMIC_RMW8_U_OR_I64", "ATOMIC_RMW16_U_OR_I64", "ATOMIC_RMW32_U_OR_I64">;
defm : BinRMWTruncExtPattern<
atomic_load_xor_8, atomic_load_xor_16, atomic_load_xor_32, atomic_load_xor_64,
- ATOMIC_RMW8_U_XOR_I32, ATOMIC_RMW16_U_XOR_I32,
- ATOMIC_RMW8_U_XOR_I64, ATOMIC_RMW16_U_XOR_I64, ATOMIC_RMW32_U_XOR_I64>;
+ "ATOMIC_RMW8_U_XOR_I32", "ATOMIC_RMW16_U_XOR_I32",
+ "ATOMIC_RMW8_U_XOR_I64", "ATOMIC_RMW16_U_XOR_I64", "ATOMIC_RMW32_U_XOR_I64">;
defm : BinRMWTruncExtPattern<
atomic_swap_8, atomic_swap_16, atomic_swap_32, atomic_swap_64,
- ATOMIC_RMW8_U_XCHG_I32, ATOMIC_RMW16_U_XCHG_I32,
- ATOMIC_RMW8_U_XCHG_I64, ATOMIC_RMW16_U_XCHG_I64, ATOMIC_RMW32_U_XCHG_I64>;
+ "ATOMIC_RMW8_U_XCHG_I32", "ATOMIC_RMW16_U_XCHG_I32",
+ "ATOMIC_RMW8_U_XCHG_I64", "ATOMIC_RMW16_U_XCHG_I64",
+ "ATOMIC_RMW32_U_XCHG_I64">;
} // Predicates = [HasAtomics]
//===----------------------------------------------------------------------===//
@@ -651,13 +792,20 @@ defm : BinRMWTruncExtPattern<
multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string name,
int atomic_op> {
- defm "" :
+ defm "_A32" :
ATOMIC_I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp,
rc:$new_),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
!strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"),
- !strconcat(name, "\t${off}${p2align}"), atomic_op>;
+ !strconcat(name, "\t${off}${p2align}"), atomic_op, "false">;
+ defm "_A64" :
+ ATOMIC_I<(outs rc:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, I64:$addr, rc:$exp,
+ rc:$new_),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"),
+ !strconcat(name, "\t${off}${p2align}"), atomic_op, "true">;
}
defm ATOMIC_RMW_CMPXCHG_I32 :
@@ -676,47 +824,70 @@ defm ATOMIC_RMW32_U_CMPXCHG_I64 :
WebAssemblyTerRMW<I64, "i64.atomic.rmw32.cmpxchg_u", 0x4e>;
// Select ternary RMWs with no constant offset.
-class TerRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)),
- (inst 0, 0, I32:$addr, ty:$exp, ty:$new)>;
+multiclass TerRMWPatNoOffset<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)),
+ (!cast<NI>(inst#_A32) 0, 0, I32:$addr, ty:$exp, ty:$new)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(ty (kind I64:$addr, ty:$exp, ty:$new)),
+ (!cast<NI>(inst#_A64) 0, 0, I64:$addr, ty:$exp, ty:$new)>,
+ Requires<[HasAddr64]>;
+}
// Select ternary RMWs with a constant offset.
// Pattern with address + immediate offset
-class TerRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> :
- Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)),
- (inst 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>;
+multiclass TerRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand,
+ string inst> {
+ def : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)),
+ (!cast<NI>(inst#_A32) 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(ty (kind (operand I64:$addr, imm:$off), ty:$exp, ty:$new)),
+ (!cast<NI>(inst#_A64) 0, imm:$off, I64:$addr, ty:$exp, ty:$new)>,
+ Requires<[HasAddr64]>;
+}
// Select ternary RMWs with just a constant offset.
-class TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind imm:$off, ty:$exp, ty:$new)),
- (inst 0, imm:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
+multiclass TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(ty (kind imm:$off, ty:$exp, ty:$new)),
+ (!cast<NI>(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$exp,
+ ty:$new)>;
+ def : Pat<(ty (kind imm:$off, ty:$exp, ty:$new)),
+ (!cast<NI>(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$exp,
+ ty:$new)>;
+}
-class TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
- (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
+multiclass TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
+ (!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp,
+ ty:$new)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
+ (!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp,
+ ty:$new)>,
+ Requires<[HasAddr64]>;
+}
// Patterns for various addressing modes.
-multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
- NI inst_64> {
- def : TerRMWPatNoOffset<i32, rmw_32, inst_32>;
- def : TerRMWPatNoOffset<i64, rmw_64, inst_64>;
+multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, string inst_32,
+ string inst_64> {
+ defm : TerRMWPatNoOffset<i32, rmw_32, inst_32>;
+ defm : TerRMWPatNoOffset<i64, rmw_64, inst_64>;
- def : TerRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>;
- def : TerRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>;
- def : TerRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>;
- def : TerRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>;
+ defm : TerRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>;
+ defm : TerRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>;
+ defm : TerRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>;
+ defm : TerRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>;
- def : TerRMWPatOffsetOnly<i32, rmw_32, inst_32>;
- def : TerRMWPatOffsetOnly<i64, rmw_64, inst_64>;
+ defm : TerRMWPatOffsetOnly<i32, rmw_32, inst_32>;
+ defm : TerRMWPatOffsetOnly<i64, rmw_64, inst_64>;
- def : TerRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
- def : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
+ defm : TerRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
+ defm : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
}
let Predicates = [HasAtomics] in
defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64,
- ATOMIC_RMW_CMPXCHG_I32, ATOMIC_RMW_CMPXCHG_I64>;
+ "ATOMIC_RMW_CMPXCHG_I32", "ATOMIC_RMW_CMPXCHG_I64">;
// Truncating & zero-extending ternary RMW patterns.
// DAG legalization & optimization before instruction selection may introduce
@@ -759,67 +930,73 @@ class sext_ter_rmw_16_64<PatFrag kind> : sext_ter_rmw_8_64<kind>;
// Patterns for various addressing modes for truncating-extending ternary RMWs.
multiclass TerRMWTruncExtPattern<
PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64,
- NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> {
+ string inst8_32, string inst16_32, string inst8_64, string inst16_64,
+ string inst32_64> {
// Truncating-extending ternary RMWs with no constant offset
- def : TerRMWPatNoOffset<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatNoOffset<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatNoOffset<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatNoOffset<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
- def : TerRMWPatNoOffset<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+ defm : TerRMWPatNoOffset<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ defm : TerRMWPatNoOffset<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ defm : TerRMWPatNoOffset<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ defm : TerRMWPatNoOffset<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ defm : TerRMWPatNoOffset<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
- def : TerRMWPatNoOffset<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatNoOffset<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatNoOffset<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatNoOffset<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ defm : TerRMWPatNoOffset<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ defm : TerRMWPatNoOffset<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ defm : TerRMWPatNoOffset<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ defm : TerRMWPatNoOffset<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
// Truncating-extending ternary RMWs with a constant offset
- def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
- def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
- def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
- def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
- def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, regPlusImm, inst32_64>;
- def : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
- def : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
- def : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
- def : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
- def : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, or_is_add, inst32_64>;
-
- def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
- def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
- def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
- def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
- def : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
- def : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
- def : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
- def : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+ defm : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+ defm : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, regPlusImm,
+ inst16_32>;
+ defm : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+ defm : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, regPlusImm,
+ inst16_64>;
+ defm : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, regPlusImm,
+ inst32_64>;
+ defm : TerRMWPatImmOff<i32, zext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+ defm : TerRMWPatImmOff<i32, zext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+ defm : TerRMWPatImmOff<i64, zext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+ defm : TerRMWPatImmOff<i64, zext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+ defm : TerRMWPatImmOff<i64, zext_ter_rmw_32_64<rmw_32>, or_is_add, inst32_64>;
+
+ defm : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+ defm : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, regPlusImm,
+ inst16_32>;
+ defm : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+ defm : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, regPlusImm,
+ inst16_64>;
+ defm : TerRMWPatImmOff<i32, sext_ter_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+ defm : TerRMWPatImmOff<i32, sext_ter_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+ defm : TerRMWPatImmOff<i64, sext_ter_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+ defm : TerRMWPatImmOff<i64, sext_ter_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
// Truncating-extending ternary RMWs with just a constant offset
- def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
- def : TerRMWPatOffsetOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
-
- def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatOffsetOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatOffsetOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
-
- def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
- def : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
-
- def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ defm : TerRMWPatOffsetOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ defm : TerRMWPatOffsetOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ defm : TerRMWPatOffsetOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ defm : TerRMWPatOffsetOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ defm : TerRMWPatOffsetOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ defm : TerRMWPatOffsetOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ defm : TerRMWPatOffsetOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ defm : TerRMWPatOffsetOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ defm : TerRMWPatOffsetOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
+
+ defm : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ defm : TerRMWPatGlobalAddrOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ defm : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ defm : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
+ defm : TerRMWPatGlobalAddrOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
+
+ defm : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
+ defm : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
+ defm : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
+ defm : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
}
let Predicates = [HasAtomics] in
defm : TerRMWTruncExtPattern<
atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64,
- ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
- ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
- ATOMIC_RMW32_U_CMPXCHG_I64>;
+ "ATOMIC_RMW8_U_CMPXCHG_I32", "ATOMIC_RMW16_U_CMPXCHG_I32",
+ "ATOMIC_RMW8_U_CMPXCHG_I64", "ATOMIC_RMW16_U_CMPXCHG_I64",
+ "ATOMIC_RMW32_U_CMPXCHG_I64">;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
index 05735cf6d31f..3e9ef6fbc7ea 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
@@ -33,39 +33,43 @@ def wasm_memset_t : SDTypeProfile<0, 4,
def wasm_memset : SDNode<"WebAssemblyISD::MEMORY_FILL", wasm_memset_t,
[SDNPHasChain, SDNPMayStore]>;
+multiclass BulkMemoryOps<WebAssemblyRegClass rc, string B> {
+
let mayStore = 1, hasSideEffects = 1 in
-defm MEMORY_INIT :
+defm MEMORY_INIT_A#B :
BULK_I<(outs),
- (ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest,
- I32:$offset, I32:$size),
+ (ins i32imm_op:$seg, i32imm_op:$idx, rc:$dest,
+ rc:$offset, rc:$size),
(outs), (ins i32imm_op:$seg, i32imm_op:$idx),
- [(int_wasm_memory_init (i32 timm:$seg), (i32 timm:$idx), I32:$dest,
- I32:$offset, I32:$size
- )],
+ [],
"memory.init\t$seg, $idx, $dest, $offset, $size",
"memory.init\t$seg, $idx", 0x08>;
let hasSideEffects = 1 in
defm DATA_DROP :
BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg),
- [(int_wasm_data_drop (i32 timm:$seg))],
+ [],
"data.drop\t$seg", "data.drop\t$seg", 0x09>;
let mayLoad = 1, mayStore = 1 in
-defm MEMORY_COPY :
+defm MEMORY_COPY_A#B :
BULK_I<(outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx,
- I32:$dst, I32:$src, I32:$len),
+ rc:$dst, rc:$src, rc:$len),
(outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx),
[(wasm_memcpy (i32 imm:$src_idx), (i32 imm:$dst_idx),
- I32:$dst, I32:$src, I32:$len
+ rc:$dst, rc:$src, rc:$len
)],
"memory.copy\t$src_idx, $dst_idx, $dst, $src, $len",
"memory.copy\t$src_idx, $dst_idx", 0x0a>;
let mayStore = 1 in
-defm MEMORY_FILL :
- BULK_I<(outs), (ins i32imm_op:$idx, I32:$dst, I32:$value, I32:$size),
+defm MEMORY_FILL_A#B :
+ BULK_I<(outs), (ins i32imm_op:$idx, rc:$dst, I32:$value, rc:$size),
(outs), (ins i32imm_op:$idx),
- [(wasm_memset (i32 imm:$idx), I32:$dst, I32:$value, I32:$size)],
+ [(wasm_memset (i32 imm:$idx), rc:$dst, I32:$value, rc:$size)],
"memory.fill\t$idx, $dst, $value, $size",
"memory.fill\t$idx", 0x0b>;
+}
+
+defm : BulkMemoryOps<I32, "32">;
+defm : BulkMemoryOps<I64, "64">;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 703c15d58c93..b997c1c16fcb 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -23,155 +23,56 @@ defm ADJCALLSTACKUP : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2),
[(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>;
} // Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1
-multiclass CALL<ValueType vt, WebAssemblyRegClass rt, string prefix,
- list<Predicate> preds = []> {
- defm CALL_#vt :
- I<(outs rt:$dst), (ins function32_op:$callee, variable_ops),
- (outs), (ins function32_op:$callee),
- [(set (vt rt:$dst), (WebAssemblycall1 (i32 imm:$callee)))],
- !strconcat(prefix, "call\t$dst, $callee"),
- !strconcat(prefix, "call\t$callee"),
- 0x10>,
- Requires<preds>;
- let isCodeGenOnly = 1 in
- defm PCALL_INDIRECT_#vt :
- I<(outs rt:$dst), (ins I32:$callee, variable_ops),
- (outs), (ins I32:$callee),
- [(set (vt rt:$dst), (WebAssemblycall1 I32:$callee))],
- "PSEUDO CALL INDIRECT\t$callee",
- "PSEUDO CALL INDIRECT\t$callee">,
- Requires<preds>;
+let Uses = [SP32, SP64], isCall = 1 in {
- defm CALL_INDIRECT_#vt :
- I<(outs rt:$dst),
- (ins TypeIndex:$type, i32imm:$flags, variable_ops),
- (outs), (ins TypeIndex:$type, i32imm:$flags),
- [],
- !strconcat(prefix, "call_indirect\t$dst"),
- !strconcat(prefix, "call_indirect\t$type"),
- 0x11>,
- Requires<preds>;
-}
+// CALL should take both variadic arguments and produce variadic results, but
+// this is not possible to model directly. Instead, we select calls to a
+// CALL_PARAMS taking variadic arguments linked with a CALL_RESULTS that handles
+// producing the call's variadic results. We recombine the two in a custom
+// inserter hook after DAG ISel, so passes over MachineInstrs will only ever
+// observe CALL nodes with all of the expected variadic uses and defs.
+let isPseudo = 1 in
+defm CALL_PARAMS :
+ I<(outs), (ins function32_op:$callee, variable_ops),
+ (outs), (ins function32_op:$callee), [],
+ "call_params\t$callee", "call_params\t$callee", -1>;
-let Uses = [SP32, SP64], isCall = 1 in {
-defm "" : CALL<i32, I32, "i32.">;
-defm "" : CALL<i64, I64, "i64.">;
-defm "" : CALL<f32, F32, "f32.">;
-defm "" : CALL<f64, F64, "f64.">;
-defm "" : CALL<exnref, EXNREF, "exnref.", [HasExceptionHandling]>;
-defm "" : CALL<v16i8, V128, "v128.", [HasSIMD128]>;
-defm "" : CALL<v8i16, V128, "v128.", [HasSIMD128]>;
-defm "" : CALL<v4i32, V128, "v128.", [HasSIMD128]>;
-defm "" : CALL<v2i64, V128, "v128.", [HasSIMD128]>;
-defm "" : CALL<v4f32, V128, "v128.", [HasSIMD128]>;
-defm "" : CALL<v2f64, V128, "v128.", [HasSIMD128]>;
+let variadicOpsAreDefs = 1, usesCustomInserter = 1, isPseudo = 1 in
+defm CALL_RESULTS :
+ I<(outs), (ins variable_ops), (outs), (ins), [],
+ "call_results", "call_results", -1>;
-let IsCanonical = 1 in {
-defm CALL_VOID :
+let variadicOpsAreDefs = 1, usesCustomInserter = 1, isPseudo = 1 in
+defm RET_CALL_RESULTS :
+ I<(outs), (ins variable_ops), (outs), (ins), [],
+ "return_call_results", "return_call_results", -1>;
+
+let variadicOpsAreDefs = 1 in
+defm CALL :
I<(outs), (ins function32_op:$callee, variable_ops),
- (outs), (ins function32_op:$callee),
- [(WebAssemblycall0 (i32 imm:$callee))],
- "call \t$callee", "call\t$callee", 0x10>;
+ (outs), (ins function32_op:$callee), [],
+ "call", "call\t$callee", 0x10>;
-let isReturn = 1 in
+let variadicOpsAreDefs = 1 in
+defm CALL_INDIRECT :
+ I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops),
+ (outs), (ins TypeIndex:$type, i32imm:$flags), [],
+ "call_indirect", "call_indirect\t$type", 0x11>;
+
+let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in
defm RET_CALL :
I<(outs), (ins function32_op:$callee, variable_ops),
- (outs), (ins function32_op:$callee),
- [(WebAssemblyretcall (i32 imm:$callee))],
+ (outs), (ins function32_op:$callee), [],
"return_call \t$callee", "return_call\t$callee", 0x12>,
Requires<[HasTailCall]>;
-let isCodeGenOnly = 1 in
-defm PCALL_INDIRECT_VOID :
- I<(outs), (ins I32:$callee, variable_ops),
- (outs), (ins I32:$callee),
- [(WebAssemblycall0 I32:$callee)],
- "PSEUDO CALL INDIRECT\t$callee",
- "PSEUDO CALL INDIRECT\t$callee">;
-
-defm CALL_INDIRECT_VOID :
- I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops),
- (outs), (ins TypeIndex:$type, i32imm:$flags),
- [],
- "call_indirect\t", "call_indirect\t$type",
- 0x11>;
-
let isReturn = 1 in
defm RET_CALL_INDIRECT :
I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops),
- (outs), (ins TypeIndex:$type, i32imm:$flags),
- [],
+ (outs), (ins TypeIndex:$type, i32imm:$flags), [],
"return_call_indirect\t", "return_call_indirect\t$type",
0x13>,
Requires<[HasTailCall]>;
-let isCodeGenOnly = 1, isReturn = 1 in
-defm PRET_CALL_INDIRECT:
- I<(outs), (ins I32:$callee, variable_ops),
- (outs), (ins I32:$callee),
- [(WebAssemblyretcall I32:$callee)],
- "PSEUDO RET_CALL INDIRECT\t$callee",
- "PSEUDO RET_CALL INDIRECT\t$callee">,
- Requires<[HasTailCall]>;
-
-} // IsCanonical = 1
} // Uses = [SP32,SP64], isCall = 1
-
-// Patterns for matching a direct call to a global address.
-def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_i32 tglobaladdr:$callee)>;
-def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_i64 tglobaladdr:$callee)>;
-def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_f32 tglobaladdr:$callee)>;
-def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_f64 tglobaladdr:$callee)>;
-def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_v16i8 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_v8i16 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_v4i32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_v2i64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_v2f64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_exnref tglobaladdr:$callee)>,
- Requires<[HasExceptionHandling]>;
-def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)),
- (CALL_VOID tglobaladdr:$callee)>;
-def : Pat<(WebAssemblyretcall (WebAssemblywrapper tglobaladdr:$callee)),
- (RET_CALL tglobaladdr:$callee)>, Requires<[HasTailCall]>;
-
-// Patterns for matching a direct call to an external symbol.
-def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_i32 texternalsym:$callee)>;
-def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_i64 texternalsym:$callee)>;
-def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_f32 texternalsym:$callee)>;
-def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_f64 texternalsym:$callee)>;
-def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_v16i8 texternalsym:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_v8i16 texternalsym:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_v4i32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(v2i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_v2i64 texternalsym:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_v2f64 texternalsym:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_exnref texternalsym:$callee)>,
- Requires<[HasExceptionHandling]>;
-def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)),
- (CALL_VOID texternalsym:$callee)>;
-def : Pat<(WebAssemblyretcall (WebAssemblywrapper texternalsym:$callee)),
- (RET_CALL texternalsym:$callee)>, Requires<[HasTailCall]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 1afc9a8790dc..171dd9a67beb 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -40,21 +40,25 @@ def brlist : Operand<i32> {
let PrintMethod = "printBrList";
}
-// TODO: SelectionDAG's lowering insists on using a pointer as the index for
-// jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode
-// currently.
-let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+// Duplicating a BR_TABLE is almost never a good idea. In particular, it can
+// lead to some nasty irreducibility due to tail merging when the br_table is in
+// a loop.
+let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1, isNotDuplicable = 1 in {
+
defm BR_TABLE_I32 : I<(outs), (ins I32:$index, variable_ops),
(outs), (ins brlist:$brl),
[(WebAssemblybr_table I32:$index)],
"br_table \t$index", "br_table \t$brl",
0x0e>;
+// TODO: SelectionDAG's lowering insists on using a pointer as the index for
+// jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode
+// currently.
defm BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops),
(outs), (ins brlist:$brl),
[(WebAssemblybr_table I64:$index)],
"br_table \t$index", "br_table \t$brl",
0x0e>;
-} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
+} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1, isNotDuplicable = 1
// This is technically a control-flow instruction, since all it affects is the
// IP.
@@ -85,8 +89,8 @@ defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
-let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
-
+let hasCtrlDep = 1, isBarrier = 1 in {
+let isTerminator = 1 in {
let isReturn = 1 in {
defm RETURN : I<(outs), (ins variable_ops), (outs), (ins),
@@ -99,8 +103,21 @@ defm FALLTHROUGH_RETURN : I<(outs), (ins variable_ops), (outs), (ins), []>;
} // isReturn = 1
+let isTrap = 1 in
defm UNREACHABLE : NRI<(outs), (ins), [(trap)], "unreachable", 0x00>;
-} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
+
+} // isTerminator = 1
+
+// debugtrap explicitly returns despite trapping because it is supposed to just
+// get the attention of the debugger. Unfortunately, because UNREACHABLE is a
+// terminator, lowering debugtrap to UNREACHABLE can create an invalid
+// MachineBasicBlock when there is additional code after it. Lower it to this
+// non-terminator version instead.
+// TODO: Actually execute the debugger statement when running on the Web
+let isTrap = 1 in
+defm DEBUG_UNREACHABLE : NRI<(outs), (ins), [(debugtrap)], "unreachable", 0x00>;
+
+} // hasCtrlDep = 1, isBarrier = 1
//===----------------------------------------------------------------------===//
// Exception handling instructions
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index aff4d20d8d82..0a4289c4959b 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -14,11 +14,13 @@
// WebAssembly Instruction Format.
// We instantiate 2 of these for every actual instruction (register based
// and stack based), see below.
-class WebAssemblyInst<bits<32> inst, string asmstr, string stack> : StackRel,
- Instruction {
+class WebAssemblyInst<bits<32> inst, string asmstr, string stack, string is64>
+ : StackRel, Wasm64Rel, Instruction {
bits<32> Inst = inst; // Instruction encoding.
string StackBased = stack;
string BaseName = NAME;
+ string IsWasm64 = is64;
+ string Wasm32Name = !subst("_A64", "_A32", NAME);
let Namespace = "WebAssembly";
let Pattern = [];
let AsmString = asmstr;
@@ -29,8 +31,8 @@ class WebAssemblyInst<bits<32> inst, string asmstr, string stack> : StackRel,
// Normal instructions. Default instantiation of a WebAssemblyInst.
class NI<dag oops, dag iops, list<dag> pattern, string stack,
- string asmstr = "", bits<32> inst = -1>
- : WebAssemblyInst<inst, asmstr, stack> {
+ string asmstr = "", bits<32> inst = -1, string is64 = "false">
+ : WebAssemblyInst<inst, asmstr, stack, is64> {
dag OutOperandList = oops;
dag InOperandList = iops;
let Pattern = pattern;
@@ -52,11 +54,11 @@ class NI<dag oops, dag iops, list<dag> pattern, string stack,
// there is always an equivalent pair of instructions.
multiclass I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
list<dag> pattern_r, string asmstr_r = "", string asmstr_s = "",
- bits<32> inst = -1> {
+ bits<32> inst = -1, string is64 = "false"> {
let isCodeGenOnly = 1 in
- def "" : NI<oops_r, iops_r, pattern_r, "false", asmstr_r, inst>;
+ def "" : NI<oops_r, iops_r, pattern_r, "false", asmstr_r, inst, is64>;
let BaseName = NAME in
- def _S : NI<oops_s, iops_s, [], "true", asmstr_s, inst>;
+ def _S : NI<oops_s, iops_s, [], "true", asmstr_s, inst, is64>;
}
// For instructions that have no register ops, so both sets are the same.
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 221dacaf821b..6fe1fd2b5c5a 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -235,8 +235,9 @@ bool WebAssemblyInstrInfo::reverseBranchCondition(
ArrayRef<std::pair<int, const char *>>
WebAssemblyInstrInfo::getSerializableTargetIndices() const {
static const std::pair<int, const char *> TargetIndices[] = {
- {WebAssembly::TI_LOCAL_START, "wasm-local-start"},
- {WebAssembly::TI_GLOBAL_START, "wasm-global-start"},
- {WebAssembly::TI_OPERAND_STACK_START, "wasm-operator-stack-start"}};
+ {WebAssembly::TI_LOCAL, "wasm-local"},
+ {WebAssembly::TI_GLOBAL_FIXED, "wasm-global-fixed"},
+ {WebAssembly::TI_OPERAND_STACK, "wasm-operand-stack"},
+ {WebAssembly::TI_GLOBAL_RELOC, "wasm-global-reloc"}};
return makeArrayRef(TargetIndices);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 044901481381..5ff0d73534a6 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -24,43 +24,47 @@ def HasAddr64 : Predicate<"Subtarget->hasAddr64()">;
def HasSIMD128 :
Predicate<"Subtarget->hasSIMD128()">,
- AssemblerPredicate<"FeatureSIMD128", "simd128">;
+ AssemblerPredicate<(all_of FeatureSIMD128), "simd128">;
def HasUnimplementedSIMD128 :
Predicate<"Subtarget->hasUnimplementedSIMD128()">,
- AssemblerPredicate<"FeatureUnimplementedSIMD128", "unimplemented-simd128">;
+ AssemblerPredicate<(all_of FeatureUnimplementedSIMD128), "unimplemented-simd128">;
def HasAtomics :
Predicate<"Subtarget->hasAtomics()">,
- AssemblerPredicate<"FeatureAtomics", "atomics">;
+ AssemblerPredicate<(all_of FeatureAtomics), "atomics">;
def HasMultivalue :
Predicate<"Subtarget->hasMultivalue()">,
- AssemblerPredicate<"FeatureMultivalue", "multivalue">;
+ AssemblerPredicate<(all_of FeatureMultivalue), "multivalue">;
def HasNontrappingFPToInt :
Predicate<"Subtarget->hasNontrappingFPToInt()">,
- AssemblerPredicate<"FeatureNontrappingFPToInt", "nontrapping-fptoint">;
+ AssemblerPredicate<(all_of FeatureNontrappingFPToInt), "nontrapping-fptoint">;
def NotHasNontrappingFPToInt :
Predicate<"!Subtarget->hasNontrappingFPToInt()">,
- AssemblerPredicate<"!FeatureNontrappingFPToInt", "nontrapping-fptoint">;
+ AssemblerPredicate<(all_of (not FeatureNontrappingFPToInt)), "nontrapping-fptoint">;
def HasSignExt :
Predicate<"Subtarget->hasSignExt()">,
- AssemblerPredicate<"FeatureSignExt", "sign-ext">;
+ AssemblerPredicate<(all_of FeatureSignExt), "sign-ext">;
def HasTailCall :
Predicate<"Subtarget->hasTailCall()">,
- AssemblerPredicate<"FeatureTailCall", "tail-call">;
+ AssemblerPredicate<(all_of FeatureTailCall), "tail-call">;
def HasExceptionHandling :
Predicate<"Subtarget->hasExceptionHandling()">,
- AssemblerPredicate<"FeatureExceptionHandling", "exception-handling">;
+ AssemblerPredicate<(all_of FeatureExceptionHandling), "exception-handling">;
def HasBulkMemory :
Predicate<"Subtarget->hasBulkMemory()">,
- AssemblerPredicate<"FeatureBulkMemory", "bulk-memory">;
+ AssemblerPredicate<(all_of FeatureBulkMemory), "bulk-memory">;
+
+def HasReferenceTypes :
+ Predicate<"Subtarget->hasReferenceTypes()">,
+ AssemblerPredicate<(all_of FeatureReferenceTypes), "reference-types">;
//===----------------------------------------------------------------------===//
// WebAssembly-specific DAG Node Types.
@@ -91,15 +95,6 @@ def WebAssemblycallseq_start :
def WebAssemblycallseq_end :
SDNode<"ISD::CALLSEQ_END", SDT_WebAssemblyCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0",
- SDT_WebAssemblyCall0,
- [SDNPHasChain, SDNPVariadic]>;
-def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1",
- SDT_WebAssemblyCall1,
- [SDNPHasChain, SDNPVariadic]>;
-def WebAssemblyretcall : SDNode<"WebAssemblyISD::RET_CALL",
- SDT_WebAssemblyCall0,
- [SDNPHasChain, SDNPVariadic]>;
def WebAssemblybr_table : SDNode<"WebAssemblyISD::BR_TABLE",
SDT_WebAssemblyBrTable,
[SDNPHasChain, SDNPVariadic]>;
@@ -171,6 +166,9 @@ def function32_op : Operand<i32>;
let OperandType = "OPERAND_OFFSET32" in
def offset32_op : Operand<i32>;
+let OperandType = "OPERAND_OFFSET64" in
+def offset64_op : Operand<i64>;
+
let OperandType = "OPERAND_P2ALIGN" in {
def P2Align : Operand<i32> {
let PrintMethod = "printWebAssemblyP2AlignOperand";
@@ -205,6 +203,19 @@ def getStackOpcode : InstrMapping {
}
//===----------------------------------------------------------------------===//
+// WebAssembly 32 to 64-bit instruction mapping
+//===----------------------------------------------------------------------===//
+
+class Wasm64Rel;
+def getWasm64Opcode : InstrMapping {
+ let FilterClass = "Wasm64Rel";
+ let RowFields = ["Wasm32Name"];
+ let ColFields = ["IsWasm64"];
+ let KeyCol = ["false"];
+ let ValueCols = [["true"]];
+}
+
+//===----------------------------------------------------------------------===//
// WebAssembly Instruction Format Definitions.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index eba9b80d3286..b3c63cc1f884 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
// TODO:
-// - HasAddr64
// - WebAssemblyTargetLowering having to do with atomics
// - Each has optional alignment.
@@ -41,181 +40,222 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
// offsets folded into them, so we can just use add.
// Defines atomic and non-atomic loads, regular and extending.
-multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> {
- let mayLoad = 1, UseNamedOperandTable = 1 in
- defm "": I<(outs rc:$dst),
- (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
- (outs), (ins P2Align:$p2align, offset32_op:$off),
- [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"),
- !strconcat(Name, "\t${off}${p2align}"), Opcode>;
+multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode,
+ list<Predicate> reqs = []> {
+ let mayLoad = 1, UseNamedOperandTable = 1 in {
+ defm "_A32": I<(outs rc:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+ (outs), (ins P2Align:$p2align, offset32_op:$off),
+ [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"),
+ !strconcat(Name, "\t${off}${p2align}"), Opcode, "false">,
+ Requires<reqs>;
+ defm "_A64": I<(outs rc:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
+ (outs), (ins P2Align:$p2align, offset64_op:$off),
+ [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"),
+ !strconcat(Name, "\t${off}${p2align}"), Opcode, "true">,
+ Requires<reqs>;
+ }
}
// Basic load.
// FIXME: When we can break syntax compatibility, reorder the fields in the
// asmstrings to match the binary encoding.
-defm LOAD_I32 : WebAssemblyLoad<I32, "i32.load", 0x28>;
-defm LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29>;
-defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a>;
-defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b>;
+defm LOAD_I32 : WebAssemblyLoad<I32, "i32.load", 0x28, []>;
+defm LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29, []>;
+defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a, []>;
+defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b, []>;
// Select loads with no constant offset.
-class LoadPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind I32:$addr)), (inst 0, 0, I32:$addr)>;
-
-def : LoadPatNoOffset<i32, load, LOAD_I32>;
-def : LoadPatNoOffset<i64, load, LOAD_I64>;
-def : LoadPatNoOffset<f32, load, LOAD_F32>;
-def : LoadPatNoOffset<f64, load, LOAD_F64>;
+multiclass LoadPatNoOffset<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(ty (kind I32:$addr)), (!cast<NI>(inst # "_A32") 0, 0, I32:$addr)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(ty (kind I64:$addr)), (!cast<NI>(inst # "_A64") 0, 0, I64:$addr)>,
+ Requires<[HasAddr64]>;
+}
+defm : LoadPatNoOffset<i32, load, "LOAD_I32">;
+defm : LoadPatNoOffset<i64, load, "LOAD_I64">;
+defm : LoadPatNoOffset<f32, load, "LOAD_F32">;
+defm : LoadPatNoOffset<f64, load, "LOAD_F64">;
// Select loads with a constant offset.
// Pattern with address + immediate offset
-class LoadPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> :
- Pat<(ty (kind (operand I32:$addr, imm:$off))), (inst 0, imm:$off, I32:$addr)>;
-
-def : LoadPatImmOff<i32, load, regPlusImm, LOAD_I32>;
-def : LoadPatImmOff<i64, load, regPlusImm, LOAD_I64>;
-def : LoadPatImmOff<f32, load, regPlusImm, LOAD_F32>;
-def : LoadPatImmOff<f64, load, regPlusImm, LOAD_F64>;
-def : LoadPatImmOff<i32, load, or_is_add, LOAD_I32>;
-def : LoadPatImmOff<i64, load, or_is_add, LOAD_I64>;
-def : LoadPatImmOff<f32, load, or_is_add, LOAD_F32>;
-def : LoadPatImmOff<f64, load, or_is_add, LOAD_F64>;
+multiclass LoadPatImmOff<ValueType ty, PatFrag kind, PatFrag operand,
+ string inst> {
+ def : Pat<(ty (kind (operand I32:$addr, imm:$off))),
+ (!cast<NI>(inst # "_A32") 0, imm:$off, I32:$addr)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(ty (kind (operand I64:$addr, imm:$off))),
+ (!cast<NI>(inst # "_A64") 0, imm:$off, I64:$addr)>,
+ Requires<[HasAddr64]>;
+}
-// Select loads with just a constant offset.
-class LoadPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind imm:$off)), (inst 0, imm:$off, (CONST_I32 0))>;
+defm : LoadPatImmOff<i32, load, regPlusImm, "LOAD_I32">;
+defm : LoadPatImmOff<i64, load, regPlusImm, "LOAD_I64">;
+defm : LoadPatImmOff<f32, load, regPlusImm, "LOAD_F32">;
+defm : LoadPatImmOff<f64, load, regPlusImm, "LOAD_F64">;
+defm : LoadPatImmOff<i32, load, or_is_add, "LOAD_I32">;
+defm : LoadPatImmOff<i64, load, or_is_add, "LOAD_I64">;
+defm : LoadPatImmOff<f32, load, or_is_add, "LOAD_F32">;
+defm : LoadPatImmOff<f64, load, or_is_add, "LOAD_F64">;
-def : LoadPatOffsetOnly<i32, load, LOAD_I32>;
-def : LoadPatOffsetOnly<i64, load, LOAD_I64>;
-def : LoadPatOffsetOnly<f32, load, LOAD_F32>;
-def : LoadPatOffsetOnly<f64, load, LOAD_F64>;
+// Select loads with just a constant offset.
+multiclass LoadPatOffsetOnly<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(ty (kind imm:$off)),
+ (!cast<NI>(inst # "_A32") 0, imm:$off, (CONST_I32 0))>,
+ Requires<[HasAddr32]>;
+ def : Pat<(ty (kind imm:$off)),
+ (!cast<NI>(inst # "_A64") 0, imm:$off, (CONST_I64 0))>,
+ Requires<[HasAddr64]>;
+}
-class LoadPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))),
- (inst 0, tglobaladdr:$off, (CONST_I32 0))>, Requires<[IsNotPIC]>;
+defm : LoadPatOffsetOnly<i32, load, "LOAD_I32">;
+defm : LoadPatOffsetOnly<i64, load, "LOAD_I64">;
+defm : LoadPatOffsetOnly<f32, load, "LOAD_F32">;
+defm : LoadPatOffsetOnly<f64, load, "LOAD_F64">;
+
+multiclass LoadPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))),
+ (!cast<NI>(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0))>,
+ Requires<[IsNotPIC, HasAddr32]>;
+ def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))),
+ (!cast<NI>(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0))>,
+ Requires<[IsNotPIC, HasAddr64]>;
+}
-def : LoadPatGlobalAddrOffOnly<i32, load, LOAD_I32>;
-def : LoadPatGlobalAddrOffOnly<i64, load, LOAD_I64>;
-def : LoadPatGlobalAddrOffOnly<f32, load, LOAD_F32>;
-def : LoadPatGlobalAddrOffOnly<f64, load, LOAD_F64>;
+defm : LoadPatGlobalAddrOffOnly<i32, load, "LOAD_I32">;
+defm : LoadPatGlobalAddrOffOnly<i64, load, "LOAD_I64">;
+defm : LoadPatGlobalAddrOffOnly<f32, load, "LOAD_F32">;
+defm : LoadPatGlobalAddrOffOnly<f64, load, "LOAD_F64">;
// Extending load.
-defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c>;
-defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d>;
-defm LOAD16_S_I32 : WebAssemblyLoad<I32, "i32.load16_s", 0x2e>;
-defm LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.load16_u", 0x2f>;
-defm LOAD8_S_I64 : WebAssemblyLoad<I64, "i64.load8_s", 0x30>;
-defm LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.load8_u", 0x31>;
-defm LOAD16_S_I64 : WebAssemblyLoad<I64, "i64.load16_s", 0x32>;
-defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33>;
-defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34>;
-defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35>;
+defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c, []>;
+defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d, []>;
+defm LOAD16_S_I32 : WebAssemblyLoad<I32, "i32.load16_s", 0x2e, []>;
+defm LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.load16_u", 0x2f, []>;
+defm LOAD8_S_I64 : WebAssemblyLoad<I64, "i64.load8_s", 0x30, []>;
+defm LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.load8_u", 0x31, []>;
+defm LOAD16_S_I64 : WebAssemblyLoad<I64, "i64.load16_s", 0x32, []>;
+defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33, []>;
+defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34, []>;
+defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35, []>;
// Select extending loads with no constant offset.
-def : LoadPatNoOffset<i32, sextloadi8, LOAD8_S_I32>;
-def : LoadPatNoOffset<i32, zextloadi8, LOAD8_U_I32>;
-def : LoadPatNoOffset<i32, sextloadi16, LOAD16_S_I32>;
-def : LoadPatNoOffset<i32, zextloadi16, LOAD16_U_I32>;
-def : LoadPatNoOffset<i64, sextloadi8, LOAD8_S_I64>;
-def : LoadPatNoOffset<i64, zextloadi8, LOAD8_U_I64>;
-def : LoadPatNoOffset<i64, sextloadi16, LOAD16_S_I64>;
-def : LoadPatNoOffset<i64, zextloadi16, LOAD16_U_I64>;
-def : LoadPatNoOffset<i64, sextloadi32, LOAD32_S_I64>;
-def : LoadPatNoOffset<i64, zextloadi32, LOAD32_U_I64>;
+defm : LoadPatNoOffset<i32, sextloadi8, "LOAD8_S_I32">;
+defm : LoadPatNoOffset<i32, zextloadi8, "LOAD8_U_I32">;
+defm : LoadPatNoOffset<i32, sextloadi16, "LOAD16_S_I32">;
+defm : LoadPatNoOffset<i32, zextloadi16, "LOAD16_U_I32">;
+defm : LoadPatNoOffset<i64, sextloadi8, "LOAD8_S_I64">;
+defm : LoadPatNoOffset<i64, zextloadi8, "LOAD8_U_I64">;
+defm : LoadPatNoOffset<i64, sextloadi16, "LOAD16_S_I64">;
+defm : LoadPatNoOffset<i64, zextloadi16, "LOAD16_U_I64">;
+defm : LoadPatNoOffset<i64, sextloadi32, "LOAD32_S_I64">;
+defm : LoadPatNoOffset<i64, zextloadi32, "LOAD32_U_I64">;
// Select extending loads with a constant offset.
-def : LoadPatImmOff<i32, sextloadi8, regPlusImm, LOAD8_S_I32>;
-def : LoadPatImmOff<i32, zextloadi8, regPlusImm, LOAD8_U_I32>;
-def : LoadPatImmOff<i32, sextloadi16, regPlusImm, LOAD16_S_I32>;
-def : LoadPatImmOff<i32, zextloadi16, regPlusImm, LOAD16_U_I32>;
-def : LoadPatImmOff<i64, sextloadi8, regPlusImm, LOAD8_S_I64>;
-def : LoadPatImmOff<i64, zextloadi8, regPlusImm, LOAD8_U_I64>;
-def : LoadPatImmOff<i64, sextloadi16, regPlusImm, LOAD16_S_I64>;
-def : LoadPatImmOff<i64, zextloadi16, regPlusImm, LOAD16_U_I64>;
-def : LoadPatImmOff<i64, sextloadi32, regPlusImm, LOAD32_S_I64>;
-def : LoadPatImmOff<i64, zextloadi32, regPlusImm, LOAD32_U_I64>;
-
-def : LoadPatImmOff<i32, sextloadi8, or_is_add, LOAD8_S_I32>;
-def : LoadPatImmOff<i32, zextloadi8, or_is_add, LOAD8_U_I32>;
-def : LoadPatImmOff<i32, sextloadi16, or_is_add, LOAD16_S_I32>;
-def : LoadPatImmOff<i32, zextloadi16, or_is_add, LOAD16_U_I32>;
-def : LoadPatImmOff<i64, sextloadi8, or_is_add, LOAD8_S_I64>;
-def : LoadPatImmOff<i64, zextloadi8, or_is_add, LOAD8_U_I64>;
-def : LoadPatImmOff<i64, sextloadi16, or_is_add, LOAD16_S_I64>;
-def : LoadPatImmOff<i64, zextloadi16, or_is_add, LOAD16_U_I64>;
-def : LoadPatImmOff<i64, sextloadi32, or_is_add, LOAD32_S_I64>;
-def : LoadPatImmOff<i64, zextloadi32, or_is_add, LOAD32_U_I64>;
+defm : LoadPatImmOff<i32, sextloadi8, regPlusImm, "LOAD8_S_I32">;
+defm : LoadPatImmOff<i32, zextloadi8, regPlusImm, "LOAD8_U_I32">;
+defm : LoadPatImmOff<i32, sextloadi16, regPlusImm, "LOAD16_S_I32">;
+defm : LoadPatImmOff<i32, zextloadi16, regPlusImm, "LOAD16_U_I32">;
+defm : LoadPatImmOff<i64, sextloadi8, regPlusImm, "LOAD8_S_I64">;
+defm : LoadPatImmOff<i64, zextloadi8, regPlusImm, "LOAD8_U_I64">;
+defm : LoadPatImmOff<i64, sextloadi16, regPlusImm, "LOAD16_S_I64">;
+defm : LoadPatImmOff<i64, zextloadi16, regPlusImm, "LOAD16_U_I64">;
+defm : LoadPatImmOff<i64, sextloadi32, regPlusImm, "LOAD32_S_I64">;
+defm : LoadPatImmOff<i64, zextloadi32, regPlusImm, "LOAD32_U_I64">;
+
+defm : LoadPatImmOff<i32, sextloadi8, or_is_add, "LOAD8_S_I32">;
+defm : LoadPatImmOff<i32, zextloadi8, or_is_add, "LOAD8_U_I32">;
+defm : LoadPatImmOff<i32, sextloadi16, or_is_add, "LOAD16_S_I32">;
+defm : LoadPatImmOff<i32, zextloadi16, or_is_add, "LOAD16_U_I32">;
+defm : LoadPatImmOff<i64, sextloadi8, or_is_add, "LOAD8_S_I64">;
+defm : LoadPatImmOff<i64, zextloadi8, or_is_add, "LOAD8_U_I64">;
+defm : LoadPatImmOff<i64, sextloadi16, or_is_add, "LOAD16_S_I64">;
+defm : LoadPatImmOff<i64, zextloadi16, or_is_add, "LOAD16_U_I64">;
+defm : LoadPatImmOff<i64, sextloadi32, or_is_add, "LOAD32_S_I64">;
+defm : LoadPatImmOff<i64, zextloadi32, or_is_add, "LOAD32_U_I64">;
// Select extending loads with just a constant offset.
-def : LoadPatOffsetOnly<i32, sextloadi8, LOAD8_S_I32>;
-def : LoadPatOffsetOnly<i32, zextloadi8, LOAD8_U_I32>;
-def : LoadPatOffsetOnly<i32, sextloadi16, LOAD16_S_I32>;
-def : LoadPatOffsetOnly<i32, zextloadi16, LOAD16_U_I32>;
-
-def : LoadPatOffsetOnly<i64, sextloadi8, LOAD8_S_I64>;
-def : LoadPatOffsetOnly<i64, zextloadi8, LOAD8_U_I64>;
-def : LoadPatOffsetOnly<i64, sextloadi16, LOAD16_S_I64>;
-def : LoadPatOffsetOnly<i64, zextloadi16, LOAD16_U_I64>;
-def : LoadPatOffsetOnly<i64, sextloadi32, LOAD32_S_I64>;
-def : LoadPatOffsetOnly<i64, zextloadi32, LOAD32_U_I64>;
-
-def : LoadPatGlobalAddrOffOnly<i32, sextloadi8, LOAD8_S_I32>;
-def : LoadPatGlobalAddrOffOnly<i32, zextloadi8, LOAD8_U_I32>;
-def : LoadPatGlobalAddrOffOnly<i32, sextloadi16, LOAD16_S_I32>;
-def : LoadPatGlobalAddrOffOnly<i32, zextloadi16, LOAD16_U_I32>;
-def : LoadPatGlobalAddrOffOnly<i64, sextloadi8, LOAD8_S_I64>;
-def : LoadPatGlobalAddrOffOnly<i64, zextloadi8, LOAD8_U_I64>;
-def : LoadPatGlobalAddrOffOnly<i64, sextloadi16, LOAD16_S_I64>;
-def : LoadPatGlobalAddrOffOnly<i64, zextloadi16, LOAD16_U_I64>;
-def : LoadPatGlobalAddrOffOnly<i64, sextloadi32, LOAD32_S_I64>;
-def : LoadPatGlobalAddrOffOnly<i64, zextloadi32, LOAD32_U_I64>;
+defm : LoadPatOffsetOnly<i32, sextloadi8, "LOAD8_S_I32">;
+defm : LoadPatOffsetOnly<i32, zextloadi8, "LOAD8_U_I32">;
+defm : LoadPatOffsetOnly<i32, sextloadi16, "LOAD16_S_I32">;
+defm : LoadPatOffsetOnly<i32, zextloadi16, "LOAD16_U_I32">;
+
+defm : LoadPatOffsetOnly<i64, sextloadi8, "LOAD8_S_I64">;
+defm : LoadPatOffsetOnly<i64, zextloadi8, "LOAD8_U_I64">;
+defm : LoadPatOffsetOnly<i64, sextloadi16, "LOAD16_S_I64">;
+defm : LoadPatOffsetOnly<i64, zextloadi16, "LOAD16_U_I64">;
+defm : LoadPatOffsetOnly<i64, sextloadi32, "LOAD32_S_I64">;
+defm : LoadPatOffsetOnly<i64, zextloadi32, "LOAD32_U_I64">;
+
+defm : LoadPatGlobalAddrOffOnly<i32, sextloadi8, "LOAD8_S_I32">;
+defm : LoadPatGlobalAddrOffOnly<i32, zextloadi8, "LOAD8_U_I32">;
+defm : LoadPatGlobalAddrOffOnly<i32, sextloadi16, "LOAD16_S_I32">;
+defm : LoadPatGlobalAddrOffOnly<i32, zextloadi16, "LOAD16_U_I32">;
+defm : LoadPatGlobalAddrOffOnly<i64, sextloadi8, "LOAD8_S_I64">;
+defm : LoadPatGlobalAddrOffOnly<i64, zextloadi8, "LOAD8_U_I64">;
+defm : LoadPatGlobalAddrOffOnly<i64, sextloadi16, "LOAD16_S_I64">;
+defm : LoadPatGlobalAddrOffOnly<i64, zextloadi16, "LOAD16_U_I64">;
+defm : LoadPatGlobalAddrOffOnly<i64, sextloadi32, "LOAD32_S_I64">;
+defm : LoadPatGlobalAddrOffOnly<i64, zextloadi32, "LOAD32_U_I64">;
// Resolve "don't care" extending loads to zero-extending loads. This is
// somewhat arbitrary, but zero-extending is conceptually simpler.
// Select "don't care" extending loads with no constant offset.
-def : LoadPatNoOffset<i32, extloadi8, LOAD8_U_I32>;
-def : LoadPatNoOffset<i32, extloadi16, LOAD16_U_I32>;
-def : LoadPatNoOffset<i64, extloadi8, LOAD8_U_I64>;
-def : LoadPatNoOffset<i64, extloadi16, LOAD16_U_I64>;
-def : LoadPatNoOffset<i64, extloadi32, LOAD32_U_I64>;
+defm : LoadPatNoOffset<i32, extloadi8, "LOAD8_U_I32">;
+defm : LoadPatNoOffset<i32, extloadi16, "LOAD16_U_I32">;
+defm : LoadPatNoOffset<i64, extloadi8, "LOAD8_U_I64">;
+defm : LoadPatNoOffset<i64, extloadi16, "LOAD16_U_I64">;
+defm : LoadPatNoOffset<i64, extloadi32, "LOAD32_U_I64">;
// Select "don't care" extending loads with a constant offset.
-def : LoadPatImmOff<i32, extloadi8, regPlusImm, LOAD8_U_I32>;
-def : LoadPatImmOff<i32, extloadi16, regPlusImm, LOAD16_U_I32>;
-def : LoadPatImmOff<i64, extloadi8, regPlusImm, LOAD8_U_I64>;
-def : LoadPatImmOff<i64, extloadi16, regPlusImm, LOAD16_U_I64>;
-def : LoadPatImmOff<i64, extloadi32, regPlusImm, LOAD32_U_I64>;
-def : LoadPatImmOff<i32, extloadi8, or_is_add, LOAD8_U_I32>;
-def : LoadPatImmOff<i32, extloadi16, or_is_add, LOAD16_U_I32>;
-def : LoadPatImmOff<i64, extloadi8, or_is_add, LOAD8_U_I64>;
-def : LoadPatImmOff<i64, extloadi16, or_is_add, LOAD16_U_I64>;
-def : LoadPatImmOff<i64, extloadi32, or_is_add, LOAD32_U_I64>;
+defm : LoadPatImmOff<i32, extloadi8, regPlusImm, "LOAD8_U_I32">;
+defm : LoadPatImmOff<i32, extloadi16, regPlusImm, "LOAD16_U_I32">;
+defm : LoadPatImmOff<i64, extloadi8, regPlusImm, "LOAD8_U_I64">;
+defm : LoadPatImmOff<i64, extloadi16, regPlusImm, "LOAD16_U_I64">;
+defm : LoadPatImmOff<i64, extloadi32, regPlusImm, "LOAD32_U_I64">;
+defm : LoadPatImmOff<i32, extloadi8, or_is_add, "LOAD8_U_I32">;
+defm : LoadPatImmOff<i32, extloadi16, or_is_add, "LOAD16_U_I32">;
+defm : LoadPatImmOff<i64, extloadi8, or_is_add, "LOAD8_U_I64">;
+defm : LoadPatImmOff<i64, extloadi16, or_is_add, "LOAD16_U_I64">;
+defm : LoadPatImmOff<i64, extloadi32, or_is_add, "LOAD32_U_I64">;
// Select "don't care" extending loads with just a constant offset.
-def : LoadPatOffsetOnly<i32, extloadi8, LOAD8_U_I32>;
-def : LoadPatOffsetOnly<i32, extloadi16, LOAD16_U_I32>;
-def : LoadPatOffsetOnly<i64, extloadi8, LOAD8_U_I64>;
-def : LoadPatOffsetOnly<i64, extloadi16, LOAD16_U_I64>;
-def : LoadPatOffsetOnly<i64, extloadi32, LOAD32_U_I64>;
-def : LoadPatGlobalAddrOffOnly<i32, extloadi8, LOAD8_U_I32>;
-def : LoadPatGlobalAddrOffOnly<i32, extloadi16, LOAD16_U_I32>;
-def : LoadPatGlobalAddrOffOnly<i64, extloadi8, LOAD8_U_I64>;
-def : LoadPatGlobalAddrOffOnly<i64, extloadi16, LOAD16_U_I64>;
-def : LoadPatGlobalAddrOffOnly<i64, extloadi32, LOAD32_U_I64>;
+defm : LoadPatOffsetOnly<i32, extloadi8, "LOAD8_U_I32">;
+defm : LoadPatOffsetOnly<i32, extloadi16, "LOAD16_U_I32">;
+defm : LoadPatOffsetOnly<i64, extloadi8, "LOAD8_U_I64">;
+defm : LoadPatOffsetOnly<i64, extloadi16, "LOAD16_U_I64">;
+defm : LoadPatOffsetOnly<i64, extloadi32, "LOAD32_U_I64">;
+defm : LoadPatGlobalAddrOffOnly<i32, extloadi8, "LOAD8_U_I32">;
+defm : LoadPatGlobalAddrOffOnly<i32, extloadi16, "LOAD16_U_I32">;
+defm : LoadPatGlobalAddrOffOnly<i64, extloadi8, "LOAD8_U_I64">;
+defm : LoadPatGlobalAddrOffOnly<i64, extloadi16, "LOAD16_U_I64">;
+defm : LoadPatGlobalAddrOffOnly<i64, extloadi32, "LOAD32_U_I64">;
// Defines atomic and non-atomic stores, regular and truncating
-multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode> {
+multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode,
+ list<Predicate> reqs = []> {
let mayStore = 1, UseNamedOperandTable = 1 in
- defm "" : I<(outs),
- (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
- (outs),
- (ins P2Align:$p2align, offset32_op:$off), [],
- !strconcat(Name, "\t${off}(${addr})${p2align}, $val"),
- !strconcat(Name, "\t${off}${p2align}"), Opcode>;
+ defm "_A32" : I<(outs),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
+ (outs),
+ (ins P2Align:$p2align, offset32_op:$off), [],
+ !strconcat(Name, "\t${off}(${addr})${p2align}, $val"),
+ !strconcat(Name, "\t${off}${p2align}"), Opcode, "false">,
+ Requires<reqs>;
+ let mayStore = 1, UseNamedOperandTable = 1 in
+ defm "_A64" : I<(outs),
+ (ins P2Align:$p2align, offset64_op:$off, I64:$addr, rc:$val),
+ (outs),
+ (ins P2Align:$p2align, offset64_op:$off), [],
+ !strconcat(Name, "\t${off}(${addr})${p2align}, $val"),
+ !strconcat(Name, "\t${off}${p2align}"), Opcode, "true">,
+ Requires<reqs>;
}
+
// Basic store.
// Note: WebAssembly inverts SelectionDAG's usual operand order.
defm STORE_I32 : WebAssemblyStore<I32, "i32.store", 0x36>;
@@ -224,43 +264,68 @@ defm STORE_F32 : WebAssemblyStore<F32, "f32.store", 0x38>;
defm STORE_F64 : WebAssemblyStore<F64, "f64.store", 0x39>;
// Select stores with no constant offset.
-class StorePatNoOffset<ValueType ty, PatFrag node, NI inst> :
- Pat<(node ty:$val, I32:$addr), (inst 0, 0, I32:$addr, ty:$val)>;
+multiclass StorePatNoOffset<ValueType ty, PatFrag node, string inst> {
+ def : Pat<(node ty:$val, I32:$addr),
+ (!cast<NI>(inst # "_A32") 0, 0, I32:$addr, ty:$val)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(node ty:$val, I64:$addr),
+ (!cast<NI>(inst # "_A64") 0, 0, I64:$addr, ty:$val)>,
+ Requires<[HasAddr64]>;
+}
-def : StorePatNoOffset<i32, store, STORE_I32>;
-def : StorePatNoOffset<i64, store, STORE_I64>;
-def : StorePatNoOffset<f32, store, STORE_F32>;
-def : StorePatNoOffset<f64, store, STORE_F64>;
+defm : StorePatNoOffset<i32, store, "STORE_I32">;
+defm : StorePatNoOffset<i64, store, "STORE_I64">;
+defm : StorePatNoOffset<f32, store, "STORE_F32">;
+defm : StorePatNoOffset<f64, store, "STORE_F64">;
// Select stores with a constant offset.
-class StorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> :
- Pat<(kind ty:$val, (operand I32:$addr, imm:$off)),
- (inst 0, imm:$off, I32:$addr, ty:$val)>;
-
-def : StorePatImmOff<i32, store, regPlusImm, STORE_I32>;
-def : StorePatImmOff<i64, store, regPlusImm, STORE_I64>;
-def : StorePatImmOff<f32, store, regPlusImm, STORE_F32>;
-def : StorePatImmOff<f64, store, regPlusImm, STORE_F64>;
-def : StorePatImmOff<i32, store, or_is_add, STORE_I32>;
-def : StorePatImmOff<i64, store, or_is_add, STORE_I64>;
-def : StorePatImmOff<f32, store, or_is_add, STORE_F32>;
-def : StorePatImmOff<f64, store, or_is_add, STORE_F64>;
+multiclass StorePatImmOff<ValueType ty, PatFrag kind, PatFrag operand,
+ string inst> {
+ def : Pat<(kind ty:$val, (operand I32:$addr, imm:$off)),
+ (!cast<NI>(inst # "_A32") 0, imm:$off, I32:$addr, ty:$val)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(kind ty:$val, (operand I64:$addr, imm:$off)),
+ (!cast<NI>(inst # "_A64") 0, imm:$off, I64:$addr, ty:$val)>,
+ Requires<[HasAddr64]>;
+}
+
+defm : StorePatImmOff<i32, store, regPlusImm, "STORE_I32">;
+defm : StorePatImmOff<i64, store, regPlusImm, "STORE_I64">;
+defm : StorePatImmOff<f32, store, regPlusImm, "STORE_F32">;
+defm : StorePatImmOff<f64, store, regPlusImm, "STORE_F64">;
+defm : StorePatImmOff<i32, store, or_is_add, "STORE_I32">;
+defm : StorePatImmOff<i64, store, or_is_add, "STORE_I64">;
+defm : StorePatImmOff<f32, store, or_is_add, "STORE_F32">;
+defm : StorePatImmOff<f64, store, or_is_add, "STORE_F64">;
// Select stores with just a constant offset.
-class StorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(kind ty:$val, imm:$off), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>;
-def : StorePatOffsetOnly<i32, store, STORE_I32>;
-def : StorePatOffsetOnly<i64, store, STORE_I64>;
-def : StorePatOffsetOnly<f32, store, STORE_F32>;
-def : StorePatOffsetOnly<f64, store, STORE_F64>;
-
-class StorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
- (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>, Requires<[IsNotPIC]>;
-def : StorePatGlobalAddrOffOnly<i32, store, STORE_I32>;
-def : StorePatGlobalAddrOffOnly<i64, store, STORE_I64>;
-def : StorePatGlobalAddrOffOnly<f32, store, STORE_F32>;
-def : StorePatGlobalAddrOffOnly<f64, store, STORE_F64>;
+multiclass StorePatOffsetOnly<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(kind ty:$val, imm:$off),
+ (!cast<NI>(inst # "_A32") 0, imm:$off, (CONST_I32 0), ty:$val)>,
+ Requires<[HasAddr32]>;
+ def : Pat<(kind ty:$val, imm:$off),
+ (!cast<NI>(inst # "_A64") 0, imm:$off, (CONST_I64 0), ty:$val)>,
+ Requires<[HasAddr64]>;
+}
+defm : StorePatOffsetOnly<i32, store, "STORE_I32">;
+defm : StorePatOffsetOnly<i64, store, "STORE_I64">;
+defm : StorePatOffsetOnly<f32, store, "STORE_F32">;
+defm : StorePatOffsetOnly<f64, store, "STORE_F64">;
+
+multiclass StorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
+ def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (!cast<NI>(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0),
+ ty:$val)>,
+ Requires<[IsNotPIC, HasAddr32]>;
+ def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (!cast<NI>(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0),
+ ty:$val)>,
+ Requires<[IsNotPIC, HasAddr64]>;
+}
+defm : StorePatGlobalAddrOffOnly<i32, store, "STORE_I32">;
+defm : StorePatGlobalAddrOffOnly<i64, store, "STORE_I64">;
+defm : StorePatGlobalAddrOffOnly<f32, store, "STORE_F32">;
+defm : StorePatGlobalAddrOffOnly<f64, store, "STORE_F64">;
// Truncating store.
defm STORE8_I32 : WebAssemblyStore<I32, "i32.store8", 0x3a>;
@@ -270,51 +335,54 @@ defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>;
defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>;
// Select truncating stores with no constant offset.
-def : StorePatNoOffset<i32, truncstorei8, STORE8_I32>;
-def : StorePatNoOffset<i32, truncstorei16, STORE16_I32>;
-def : StorePatNoOffset<i64, truncstorei8, STORE8_I64>;
-def : StorePatNoOffset<i64, truncstorei16, STORE16_I64>;
-def : StorePatNoOffset<i64, truncstorei32, STORE32_I64>;
+defm : StorePatNoOffset<i32, truncstorei8, "STORE8_I32">;
+defm : StorePatNoOffset<i32, truncstorei16, "STORE16_I32">;
+defm : StorePatNoOffset<i64, truncstorei8, "STORE8_I64">;
+defm : StorePatNoOffset<i64, truncstorei16, "STORE16_I64">;
+defm : StorePatNoOffset<i64, truncstorei32, "STORE32_I64">;
// Select truncating stores with a constant offset.
-def : StorePatImmOff<i32, truncstorei8, regPlusImm, STORE8_I32>;
-def : StorePatImmOff<i32, truncstorei16, regPlusImm, STORE16_I32>;
-def : StorePatImmOff<i64, truncstorei8, regPlusImm, STORE8_I64>;
-def : StorePatImmOff<i64, truncstorei16, regPlusImm, STORE16_I64>;
-def : StorePatImmOff<i64, truncstorei32, regPlusImm, STORE32_I64>;
-def : StorePatImmOff<i32, truncstorei8, or_is_add, STORE8_I32>;
-def : StorePatImmOff<i32, truncstorei16, or_is_add, STORE16_I32>;
-def : StorePatImmOff<i64, truncstorei8, or_is_add, STORE8_I64>;
-def : StorePatImmOff<i64, truncstorei16, or_is_add, STORE16_I64>;
-def : StorePatImmOff<i64, truncstorei32, or_is_add, STORE32_I64>;
+defm : StorePatImmOff<i32, truncstorei8, regPlusImm, "STORE8_I32">;
+defm : StorePatImmOff<i32, truncstorei16, regPlusImm, "STORE16_I32">;
+defm : StorePatImmOff<i64, truncstorei8, regPlusImm, "STORE8_I64">;
+defm : StorePatImmOff<i64, truncstorei16, regPlusImm, "STORE16_I64">;
+defm : StorePatImmOff<i64, truncstorei32, regPlusImm, "STORE32_I64">;
+defm : StorePatImmOff<i32, truncstorei8, or_is_add, "STORE8_I32">;
+defm : StorePatImmOff<i32, truncstorei16, or_is_add, "STORE16_I32">;
+defm : StorePatImmOff<i64, truncstorei8, or_is_add, "STORE8_I64">;
+defm : StorePatImmOff<i64, truncstorei16, or_is_add, "STORE16_I64">;
+defm : StorePatImmOff<i64, truncstorei32, or_is_add, "STORE32_I64">;
// Select truncating stores with just a constant offset.
-def : StorePatOffsetOnly<i32, truncstorei8, STORE8_I32>;
-def : StorePatOffsetOnly<i32, truncstorei16, STORE16_I32>;
-def : StorePatOffsetOnly<i64, truncstorei8, STORE8_I64>;
-def : StorePatOffsetOnly<i64, truncstorei16, STORE16_I64>;
-def : StorePatOffsetOnly<i64, truncstorei32, STORE32_I64>;
-def : StorePatGlobalAddrOffOnly<i32, truncstorei8, STORE8_I32>;
-def : StorePatGlobalAddrOffOnly<i32, truncstorei16, STORE16_I32>;
-def : StorePatGlobalAddrOffOnly<i64, truncstorei8, STORE8_I64>;
-def : StorePatGlobalAddrOffOnly<i64, truncstorei16, STORE16_I64>;
-def : StorePatGlobalAddrOffOnly<i64, truncstorei32, STORE32_I64>;
-
+defm : StorePatOffsetOnly<i32, truncstorei8, "STORE8_I32">;
+defm : StorePatOffsetOnly<i32, truncstorei16, "STORE16_I32">;
+defm : StorePatOffsetOnly<i64, truncstorei8, "STORE8_I64">;
+defm : StorePatOffsetOnly<i64, truncstorei16, "STORE16_I64">;
+defm : StorePatOffsetOnly<i64, truncstorei32, "STORE32_I64">;
+defm : StorePatGlobalAddrOffOnly<i32, truncstorei8, "STORE8_I32">;
+defm : StorePatGlobalAddrOffOnly<i32, truncstorei16, "STORE16_I32">;
+defm : StorePatGlobalAddrOffOnly<i64, truncstorei8, "STORE8_I64">;
+defm : StorePatGlobalAddrOffOnly<i64, truncstorei16, "STORE16_I64">;
+defm : StorePatGlobalAddrOffOnly<i64, truncstorei32, "STORE32_I64">;
+
+multiclass MemoryOps<WebAssemblyRegClass rc, string B> {
// Current memory size.
-defm MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
+defm MEMORY_SIZE_A#B : I<(outs rc:$dst), (ins i32imm:$flags),
(outs), (ins i32imm:$flags),
- [(set I32:$dst,
+ [(set rc:$dst,
(int_wasm_memory_size (i32 imm:$flags)))],
"memory.size\t$dst, $flags", "memory.size\t$flags",
- 0x3f>,
- Requires<[HasAddr32]>;
+ 0x3f>;
// Grow memory.
-defm MEMORY_GROW_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
+defm MEMORY_GROW_A#B : I<(outs rc:$dst), (ins i32imm:$flags, rc:$delta),
(outs), (ins i32imm:$flags),
- [(set I32:$dst,
+ [(set rc:$dst,
(int_wasm_memory_grow (i32 imm:$flags),
- I32:$delta))],
+ rc:$delta))],
"memory.grow\t$dst, $flags, $delta",
- "memory.grow\t$flags", 0x40>,
- Requires<[HasAddr32]>;
+ "memory.grow\t$flags", 0x40>;
+}
+
+defm : MemoryOps<I32, "32">;
+defm : MemoryOps<I64, "64">;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
index afe89de60b36..14d723750f07 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// WebAssembly refence type operand codegen constructs.
+/// WebAssembly reference type operand codegen constructs.
///
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 64033c993e3f..4f3da2f35c61 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -40,119 +40,150 @@ def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
//===----------------------------------------------------------------------===//
// Load: v128.load
-let mayLoad = 1, UseNamedOperandTable = 1 in
-defm LOAD_V128 :
+let mayLoad = 1, UseNamedOperandTable = 1 in {
+defm LOAD_V128_A32 :
SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
"v128.load\t$dst, ${off}(${addr})$p2align",
"v128.load\t$off$p2align", 0>;
+defm LOAD_V128_A64 :
+ SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ "v128.load\t$dst, ${off}(${addr})$p2align",
+ "v128.load\t$off$p2align", 0>;
+}
// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
-def : LoadPatNoOffset<vec_t, load, LOAD_V128>;
-def : LoadPatImmOff<vec_t, load, regPlusImm, LOAD_V128>;
-def : LoadPatImmOff<vec_t, load, or_is_add, LOAD_V128>;
-def : LoadPatOffsetOnly<vec_t, load, LOAD_V128>;
-def : LoadPatGlobalAddrOffOnly<vec_t, load, LOAD_V128>;
+defm : LoadPatNoOffset<vec_t, load, "LOAD_V128">;
+defm : LoadPatImmOff<vec_t, load, regPlusImm, "LOAD_V128">;
+defm : LoadPatImmOff<vec_t, load, or_is_add, "LOAD_V128">;
+defm : LoadPatOffsetOnly<vec_t, load, "LOAD_V128">;
+defm : LoadPatGlobalAddrOffOnly<vec_t, load, "LOAD_V128">;
}
// vNxM.load_splat
multiclass SIMDLoadSplat<string vec, bits<32> simdop> {
- let mayLoad = 1, UseNamedOperandTable = 1,
- Predicates = [HasUnimplementedSIMD128] in
- defm LOAD_SPLAT_#vec :
- SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
- (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ let mayLoad = 1, UseNamedOperandTable = 1 in {
+ defm LOAD_SPLAT_#vec#_A32 :
+ SIMD_I<(outs V128:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+ (outs),
+ (ins P2Align:$p2align, offset32_op:$off), [],
+ vec#".load_splat\t$dst, ${off}(${addr})$p2align",
+ vec#".load_splat\t$off$p2align", simdop>;
+ defm LOAD_SPLAT_#vec#_A64 :
+ SIMD_I<(outs V128:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
+ (outs),
+ (ins P2Align:$p2align, offset64_op:$off), [],
vec#".load_splat\t$dst, ${off}(${addr})$p2align",
vec#".load_splat\t$off$p2align", simdop>;
+ }
}
-defm "" : SIMDLoadSplat<"v8x16", 194>;
-defm "" : SIMDLoadSplat<"v16x8", 195>;
-defm "" : SIMDLoadSplat<"v32x4", 196>;
-defm "" : SIMDLoadSplat<"v64x2", 197>;
+defm "" : SIMDLoadSplat<"v8x16", 7>;
+defm "" : SIMDLoadSplat<"v16x8", 8>;
+defm "" : SIMDLoadSplat<"v32x4", 9>;
+defm "" : SIMDLoadSplat<"v64x2", 10>;
def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>;
def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>;
-let Predicates = [HasUnimplementedSIMD128] in
foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"],
["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in {
-def : LoadPatNoOffset<!cast<ValueType>(args[0]),
- load_splat,
- !cast<NI>("LOAD_SPLAT_"#args[1])>;
-def : LoadPatImmOff<!cast<ValueType>(args[0]),
- load_splat,
- regPlusImm,
- !cast<NI>("LOAD_SPLAT_"#args[1])>;
-def : LoadPatImmOff<!cast<ValueType>(args[0]),
- load_splat,
- or_is_add,
- !cast<NI>("LOAD_SPLAT_"#args[1])>;
-def : LoadPatOffsetOnly<!cast<ValueType>(args[0]),
- load_splat,
- !cast<NI>("LOAD_SPLAT_"#args[1])>;
-def : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]),
- load_splat,
- !cast<NI>("LOAD_SPLAT_"#args[1])>;
+defm : LoadPatNoOffset<!cast<ValueType>(args[0]),
+ load_splat,
+ "LOAD_SPLAT_"#args[1]>;
+defm : LoadPatImmOff<!cast<ValueType>(args[0]),
+ load_splat,
+ regPlusImm,
+ "LOAD_SPLAT_"#args[1]>;
+defm : LoadPatImmOff<!cast<ValueType>(args[0]),
+ load_splat,
+ or_is_add,
+ "LOAD_SPLAT_"#args[1]>;
+defm : LoadPatOffsetOnly<!cast<ValueType>(args[0]),
+ load_splat,
+ "LOAD_SPLAT_"#args[1]>;
+defm : LoadPatGlobalAddrOffOnly<!cast<ValueType>(args[0]),
+ load_splat,
+ "LOAD_SPLAT_"#args[1]>;
}
// Load and extend
multiclass SIMDLoadExtend<ValueType vec_t, string name, bits<32> simdop> {
- let mayLoad = 1, UseNamedOperandTable = 1,
- Predicates = [HasUnimplementedSIMD128] in {
- defm LOAD_EXTEND_S_#vec_t :
- SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+ let mayLoad = 1, UseNamedOperandTable = 1 in {
+ defm LOAD_EXTEND_S_#vec_t#_A32 :
+ SIMD_I<(outs V128:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
name#"_s\t$dst, ${off}(${addr})$p2align",
name#"_s\t$off$p2align", simdop>;
- defm LOAD_EXTEND_U_#vec_t :
- SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+ defm LOAD_EXTEND_U_#vec_t#_A32 :
+ SIMD_I<(outs V128:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
name#"_u\t$dst, ${off}(${addr})$p2align",
name#"_u\t$off$p2align", !add(simdop, 1)>;
+ defm LOAD_EXTEND_S_#vec_t#_A64 :
+ SIMD_I<(outs V128:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ name#"_s\t$dst, ${off}(${addr})$p2align",
+ name#"_s\t$off$p2align", simdop>;
+ defm LOAD_EXTEND_U_#vec_t#_A64 :
+ SIMD_I<(outs V128:$dst),
+ (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ name#"_u\t$dst, ${off}(${addr})$p2align",
+ name#"_u\t$off$p2align", !add(simdop, 1)>;
}
}
-defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 210>;
-defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 212>;
-defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 214>;
+defm "" : SIMDLoadExtend<v8i16, "i16x8.load8x8", 1>;
+defm "" : SIMDLoadExtend<v4i32, "i32x4.load16x4", 3>;
+defm "" : SIMDLoadExtend<v2i64, "i64x2.load32x2", 5>;
-let Predicates = [HasUnimplementedSIMD128] in
foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in
foreach exts = [["sextloadv", "_S"],
["zextloadv", "_U"],
["extloadv", "_U"]] in {
-def : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]),
- !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>;
-def : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm,
- !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>;
-def : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add,
- !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>;
-def : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
- !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>;
-def : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
- !cast<NI>("LOAD_EXTEND"#exts[1]#"_"#types[0])>;
+defm : LoadPatNoOffset<types[0], !cast<PatFrag>(exts[0]#types[1]),
+ "LOAD_EXTEND"#exts[1]#"_"#types[0]>;
+defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), regPlusImm,
+ "LOAD_EXTEND"#exts[1]#"_"#types[0]>;
+defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add,
+ "LOAD_EXTEND"#exts[1]#"_"#types[0]>;
+defm : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
+ "LOAD_EXTEND"#exts[1]#"_"#types[0]>;
+defm : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
+ "LOAD_EXTEND"#exts[1]#"_"#types[0]>;
}
// Store: v128.store
-let mayStore = 1, UseNamedOperandTable = 1 in
-defm STORE_V128 :
+let mayStore = 1, UseNamedOperandTable = 1 in {
+defm STORE_V128_A32 :
SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
"v128.store\t${off}(${addr})$p2align, $vec",
- "v128.store\t$off$p2align", 1>;
-
+ "v128.store\t$off$p2align", 11>;
+defm STORE_V128_A64 :
+ SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, V128:$vec),
+ (outs), (ins P2Align:$p2align, offset64_op:$off), [],
+ "v128.store\t${off}(${addr})$p2align, $vec",
+ "v128.store\t$off$p2align", 11>;
+}
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
// Def load and store patterns from WebAssemblyInstrMemory.td for vector types
-def : StorePatNoOffset<vec_t, store, STORE_V128>;
-def : StorePatImmOff<vec_t, store, regPlusImm, STORE_V128>;
-def : StorePatImmOff<vec_t, store, or_is_add, STORE_V128>;
-def : StorePatOffsetOnly<vec_t, store, STORE_V128>;
-def : StorePatGlobalAddrOffOnly<vec_t, store, STORE_V128>;
+defm : StorePatNoOffset<vec_t, store, "STORE_V128">;
+defm : StorePatImmOff<vec_t, store, regPlusImm, "STORE_V128">;
+defm : StorePatImmOff<vec_t, store, or_is_add, "STORE_V128">;
+defm : StorePatOffsetOnly<vec_t, store, "STORE_V128">;
+defm : StorePatGlobalAddrOffOnly<vec_t, store, "STORE_V128">;
}
//===----------------------------------------------------------------------===//
@@ -166,7 +197,7 @@ multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> {
defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops,
[(set V128:$dst, (vec_t pat))],
"v128.const\t$dst, "#args,
- "v128.const\t"#args, 2>;
+ "v128.const\t"#args, 12>;
}
defm "" : ConstVec<v16i8,
@@ -244,7 +275,7 @@ defm SHUFFLE :
"v8x16.shuffle\t"#
"$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
"$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
- 3>;
+ 13>;
// Shuffles after custom lowering
def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
@@ -273,12 +304,11 @@ def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y),
// Swizzle lanes: v8x16.swizzle
def wasm_swizzle_t : SDTypeProfile<1, 2, []>;
def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>;
-let Predicates = [HasUnimplementedSIMD128] in
defm SWIZZLE :
SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
[(set (v16i8 V128:$dst),
(wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
- "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 192>;
+ "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 14>;
def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)),
(SWIZZLE V128:$src, V128:$mask)>;
@@ -298,19 +328,17 @@ def splat16 : PatFrag<(ops node:$x), (build_vector
multiclass Splat<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
PatFrag splat_pat, bits<32> simdop> {
- // Prefer splats over v128.const for const splats (65 is lowest that works)
- let AddedComplexity = 65 in
defm SPLAT_#vec_t : SIMD_I<(outs V128:$dst), (ins reg_t:$x), (outs), (ins),
[(set (vec_t V128:$dst), (splat_pat reg_t:$x))],
vec#".splat\t$dst, $x", vec#".splat", simdop>;
}
-defm "" : Splat<v16i8, "i8x16", I32, splat16, 4>;
-defm "" : Splat<v8i16, "i16x8", I32, splat8, 8>;
-defm "" : Splat<v4i32, "i32x4", I32, splat4, 12>;
-defm "" : Splat<v2i64, "i64x2", I64, splat2, 15>;
-defm "" : Splat<v4f32, "f32x4", F32, splat4, 18>;
-defm "" : Splat<v2f64, "f64x2", F64, splat2, 21>;
+defm "" : Splat<v16i8, "i8x16", I32, splat16, 15>;
+defm "" : Splat<v8i16, "i16x8", I32, splat8, 16>;
+defm "" : Splat<v4i32, "i32x4", I32, splat4, 17>;
+defm "" : Splat<v2i64, "i64x2", I64, splat2, 18>;
+defm "" : Splat<v4f32, "f32x4", F32, splat4, 19>;
+defm "" : Splat<v2f64, "f64x2", F64, splat2, 20>;
// scalar_to_vector leaves high lanes undefined, so can be a splat
class ScalarSplatPat<ValueType vec_t, ValueType lane_t,
@@ -330,82 +358,49 @@ def : ScalarSplatPat<v2f64, f64, F64>;
//===----------------------------------------------------------------------===//
// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
-multiclass ExtractLane<ValueType vec_t, string vec, ImmLeaf imm_t,
- WebAssemblyRegClass reg_t, bits<32> simdop,
- string suffix = "", SDNode extract = vector_extract> {
+multiclass ExtractLane<ValueType vec_t, string vec, WebAssemblyRegClass reg_t,
+ bits<32> simdop, string suffix = ""> {
defm EXTRACT_LANE_#vec_t#suffix :
SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
- (outs), (ins vec_i8imm_op:$idx),
- [(set reg_t:$dst, (extract (vec_t V128:$vec), (i32 imm_t:$idx)))],
+ (outs), (ins vec_i8imm_op:$idx), [],
vec#".extract_lane"#suffix#"\t$dst, $vec, $idx",
vec#".extract_lane"#suffix#"\t$idx", simdop>;
}
-multiclass ExtractPat<ValueType lane_t, int mask> {
- def _s : PatFrag<(ops node:$vec, node:$idx),
- (i32 (sext_inreg
- (i32 (vector_extract
- node:$vec,
- node:$idx
- )),
- lane_t
- ))>;
- def _u : PatFrag<(ops node:$vec, node:$idx),
- (i32 (and
- (i32 (vector_extract
- node:$vec,
- node:$idx
- )),
- (i32 mask)
- ))>;
-}
-
-defm extract_i8x16 : ExtractPat<i8, 0xff>;
-defm extract_i16x8 : ExtractPat<i16, 0xffff>;
-
-multiclass ExtractLaneExtended<string sign, bits<32> baseInst> {
- defm "" : ExtractLane<v16i8, "i8x16", LaneIdx16, I32, baseInst, sign,
- !cast<PatFrag>("extract_i8x16"#sign)>;
- defm "" : ExtractLane<v8i16, "i16x8", LaneIdx8, I32, !add(baseInst, 4), sign,
- !cast<PatFrag>("extract_i16x8"#sign)>;
-}
-
-defm "" : ExtractLaneExtended<"_s", 5>;
-let Predicates = [HasUnimplementedSIMD128] in
-defm "" : ExtractLaneExtended<"_u", 6>;
-defm "" : ExtractLane<v4i32, "i32x4", LaneIdx4, I32, 13>;
-defm "" : ExtractLane<v2i64, "i64x2", LaneIdx2, I64, 16>;
-defm "" : ExtractLane<v4f32, "f32x4", LaneIdx4, F32, 19>;
-defm "" : ExtractLane<v2f64, "f64x2", LaneIdx2, F64, 22>;
-
-// It would be more conventional to use unsigned extracts, but v8
-// doesn't implement them yet
-def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))),
- (EXTRACT_LANE_v16i8_s V128:$vec, (i32 LaneIdx16:$idx))>;
-def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))),
- (EXTRACT_LANE_v8i16_s V128:$vec, (i32 LaneIdx8:$idx))>;
-
-// Lower undef lane indices to zero
-def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)),
- (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
-def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)),
- (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
-def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)),
- (EXTRACT_LANE_v16i8_u V128:$vec, 0)>;
-def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)),
- (EXTRACT_LANE_v8i16_u V128:$vec, 0)>;
-def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8),
- (EXTRACT_LANE_v16i8_s V128:$vec, 0)>;
-def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16),
- (EXTRACT_LANE_v8i16_s V128:$vec, 0)>;
-def : Pat<(vector_extract (v4i32 V128:$vec), undef),
- (EXTRACT_LANE_v4i32 V128:$vec, 0)>;
-def : Pat<(vector_extract (v2i64 V128:$vec), undef),
- (EXTRACT_LANE_v2i64 V128:$vec, 0)>;
-def : Pat<(vector_extract (v4f32 V128:$vec), undef),
- (EXTRACT_LANE_v4f32 V128:$vec, 0)>;
-def : Pat<(vector_extract (v2f64 V128:$vec), undef),
- (EXTRACT_LANE_v2f64 V128:$vec, 0)>;
+defm "" : ExtractLane<v16i8, "i8x16", I32, 21, "_s">;
+defm "" : ExtractLane<v16i8, "i8x16", I32, 22, "_u">;
+defm "" : ExtractLane<v8i16, "i16x8", I32, 24, "_s">;
+defm "" : ExtractLane<v8i16, "i16x8", I32, 25, "_u">;
+defm "" : ExtractLane<v4i32, "i32x4", I32, 27>;
+defm "" : ExtractLane<v2i64, "i64x2", I64, 29>;
+defm "" : ExtractLane<v4f32, "f32x4", F32, 31>;
+defm "" : ExtractLane<v2f64, "f64x2", F64, 33>;
+
+def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)),
+ (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)),
+ (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)),
+ (EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)),
+ (EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)),
+ (EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>;
+def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)),
+ (EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>;
+
+def : Pat<
+ (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8),
+ (EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>;
+def : Pat<
+ (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)),
+ (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>;
+def : Pat<
+ (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16),
+ (EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>;
+def : Pat<
+ (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)),
+ (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>;
// Replace lane value: replace_lane
multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,
@@ -420,12 +415,12 @@ multiclass ReplaceLane<ValueType vec_t, string vec, ImmLeaf imm_t,
vec#".replace_lane\t$idx", simdop>;
}
-defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 7>;
-defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 11>;
-defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 14>;
-defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 17>;
-defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 20>;
-defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 23>;
+defm "" : ReplaceLane<v16i8, "i8x16", LaneIdx16, I32, i32, 23>;
+defm "" : ReplaceLane<v8i16, "i16x8", LaneIdx8, I32, i32, 26>;
+defm "" : ReplaceLane<v4i32, "i32x4", LaneIdx4, I32, i32, 28>;
+defm "" : ReplaceLane<v2i64, "i64x2", LaneIdx2, I64, i64, 30>;
+defm "" : ReplaceLane<v4f32, "f32x4", LaneIdx4, F32, f32, 32>;
+defm "" : ReplaceLane<v2f64, "f64x2", LaneIdx2, F64, f64, 34>;
// Lower undef lane indices to zero
def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
@@ -471,35 +466,35 @@ multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> {
// Equality: eq
let isCommutable = 1 in {
-defm EQ : SIMDConditionInt<"eq", SETEQ, 24>;
-defm EQ : SIMDConditionFP<"eq", SETOEQ, 64>;
+defm EQ : SIMDConditionInt<"eq", SETEQ, 35>;
+defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>;
} // isCommutable = 1
// Non-equality: ne
let isCommutable = 1 in {
-defm NE : SIMDConditionInt<"ne", SETNE, 25>;
-defm NE : SIMDConditionFP<"ne", SETUNE, 65>;
+defm NE : SIMDConditionInt<"ne", SETNE, 36>;
+defm NE : SIMDConditionFP<"ne", SETUNE, 66>;
} // isCommutable = 1
// Less than: lt_s / lt_u / lt
-defm LT_S : SIMDConditionInt<"lt_s", SETLT, 26>;
-defm LT_U : SIMDConditionInt<"lt_u", SETULT, 27>;
-defm LT : SIMDConditionFP<"lt", SETOLT, 66>;
+defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>;
+defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>;
+defm LT : SIMDConditionFP<"lt", SETOLT, 67>;
// Greater than: gt_s / gt_u / gt
-defm GT_S : SIMDConditionInt<"gt_s", SETGT, 28>;
-defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 29>;
-defm GT : SIMDConditionFP<"gt", SETOGT, 67>;
+defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>;
+defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>;
+defm GT : SIMDConditionFP<"gt", SETOGT, 68>;
// Less than or equal: le_s / le_u / le
-defm LE_S : SIMDConditionInt<"le_s", SETLE, 30>;
-defm LE_U : SIMDConditionInt<"le_u", SETULE, 31>;
-defm LE : SIMDConditionFP<"le", SETOLE, 68>;
+defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>;
+defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>;
+defm LE : SIMDConditionFP<"le", SETOLE, 69>;
// Greater than or equal: ge_s / ge_u / ge
-defm GE_S : SIMDConditionInt<"ge_s", SETGE, 32>;
-defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 33>;
-defm GE : SIMDConditionFP<"ge", SETOGE, 69>;
+defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>;
+defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>;
+defm GE : SIMDConditionFP<"ge", SETOGE, 70>;
// Lower float comparisons that don't care about NaN to standard WebAssembly
// float comparisons. These instructions are generated with nnan and in the
@@ -548,19 +543,18 @@ multiclass SIMDUnary<ValueType vec_t, string vec, SDNode node, string name,
// Bitwise logic: v128.not
foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
-defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 76>;
+defm NOT: SIMDUnary<vec_t, "v128", vnot, "not", 77>;
// Bitwise logic: v128.and / v128.or / v128.xor
let isCommutable = 1 in {
-defm AND : SIMDBitwise<and, "and", 77>;
-defm OR : SIMDBitwise<or, "or", 78>;
-defm XOR : SIMDBitwise<xor, "xor", 79>;
+defm AND : SIMDBitwise<and, "and", 78>;
+defm OR : SIMDBitwise<or, "or", 80>;
+defm XOR : SIMDBitwise<xor, "xor", 81>;
} // isCommutable = 1
// Bitwise logic: v128.andnot
def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>;
-let Predicates = [HasUnimplementedSIMD128] in
-defm ANDNOT : SIMDBitwise<andnot, "andnot", 216>;
+defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>;
// Bitwise select: v128.bitselect
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
@@ -571,7 +565,7 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
(vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c)
))
)],
- "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 80>;
+ "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>;
// Bitselect is equivalent to (c & v1) | (~c & v2)
foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
@@ -586,9 +580,9 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
multiclass SIMDUnaryInt<SDNode node, string name, bits<32> baseInst> {
defm "" : SIMDUnary<v16i8, "i8x16", node, name, baseInst>;
- defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 17)>;
- defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 34)>;
- defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 51)>;
+ defm "" : SIMDUnary<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
+ defm "" : SIMDUnary<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
+ defm "" : SIMDUnary<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
}
multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name,
@@ -600,22 +594,25 @@ multiclass SIMDReduceVec<ValueType vec_t, string vec, SDNode op, string name,
multiclass SIMDReduce<SDNode op, string name, bits<32> baseInst> {
defm "" : SIMDReduceVec<v16i8, "i8x16", op, name, baseInst>;
- defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 17)>;
- defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 34)>;
- defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 51)>;
+ defm "" : SIMDReduceVec<v8i16, "i16x8", op, name, !add(baseInst, 32)>;
+ defm "" : SIMDReduceVec<v4i32, "i32x4", op, name, !add(baseInst, 64)>;
+ defm "" : SIMDReduceVec<v2i64, "i64x2", op, name, !add(baseInst, 96)>;
}
// Integer vector negation
def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
+// Integer absolute value: abs
+defm ABS : SIMDUnaryInt<abs, "abs", 96>;
+
// Integer negation: neg
-defm NEG : SIMDUnaryInt<ivneg, "neg", 81>;
+defm NEG : SIMDUnaryInt<ivneg, "neg", 97>;
// Any lane true: any_true
-defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 82>;
+defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 98>;
// All lanes true: all_true
-defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 83>;
+defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 99>;
// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1
// can be folded out
@@ -639,109 +636,108 @@ def : Pat<(i32 (seteq
(i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
}
+multiclass SIMDBitmask<ValueType vec_t, string vec, bits<32> simdop> {
+ defm _#vec_t : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
+ [(set I32:$dst,
+ (i32 (int_wasm_bitmask (vec_t V128:$vec)))
+ )],
+ vec#".bitmask\t$dst, $vec", vec#".bitmask", simdop>;
+}
+
+defm BITMASK : SIMDBitmask<v16i8, "i8x16", 100>;
+defm BITMASK : SIMDBitmask<v8i16, "i16x8", 132>;
+defm BITMASK : SIMDBitmask<v4i32, "i32x4", 164>;
+
//===----------------------------------------------------------------------===//
// Bit shifts
//===----------------------------------------------------------------------===//
-multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, dag shift_vec,
- string name, bits<32> simdop> {
+multiclass SIMDShift<ValueType vec_t, string vec, SDNode node, string name,
+ bits<32> simdop> {
defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x),
(outs), (ins),
- [(set (vec_t V128:$dst),
- (node V128:$vec, (vec_t shift_vec)))],
+ [(set (vec_t V128:$dst), (node V128:$vec, I32:$x))],
vec#"."#name#"\t$dst, $vec, $x", vec#"."#name, simdop>;
}
multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> {
- defm "" : SIMDShift<v16i8, "i8x16", node, (splat16 I32:$x), name, baseInst>;
- defm "" : SIMDShift<v8i16, "i16x8", node, (splat8 I32:$x), name,
- !add(baseInst, 17)>;
- defm "" : SIMDShift<v4i32, "i32x4", node, (splat4 I32:$x), name,
- !add(baseInst, 34)>;
- defm "" : SIMDShift<v2i64, "i64x2", node, (splat2 (i64 (zext I32:$x))),
- name, !add(baseInst, 51)>;
+ defm "" : SIMDShift<v16i8, "i8x16", node, name, baseInst>;
+ defm "" : SIMDShift<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
+ defm "" : SIMDShift<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
+ defm "" : SIMDShift<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
}
-// Left shift by scalar: shl
-defm SHL : SIMDShiftInt<shl, "shl", 84>;
-
-// Right shift by scalar: shr_s / shr_u
-defm SHR_S : SIMDShiftInt<sra, "shr_s", 85>;
-defm SHR_U : SIMDShiftInt<srl, "shr_u", 86>;
-
-// Truncate i64 shift operands to i32s, except if they are already i32s
-foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in {
-def : Pat<(v2i64 (shifts[0]
- (v2i64 V128:$vec),
- (v2i64 (splat2 (i64 (sext I32:$x))))
- )),
- (v2i64 (shifts[1] (v2i64 V128:$vec), (i32 I32:$x)))>;
-def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), (v2i64 (splat2 I64:$x)))),
- (v2i64 (shifts[1] (v2i64 V128:$vec), (I32_WRAP_I64 I64:$x)))>;
-}
-
-// 2xi64 shifts with constant shift amounts are custom lowered to avoid wrapping
+// WebAssembly SIMD shifts are nonstandard in that the shift amount is
+// an i32 rather than a vector, so they need custom nodes.
def wasm_shift_t : SDTypeProfile<1, 2,
[SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]
>;
def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>;
def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>;
def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>;
-foreach shifts = [[wasm_shl, SHL_v2i64],
- [wasm_shr_s, SHR_S_v2i64],
- [wasm_shr_u, SHR_U_v2i64]] in
-def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), I32:$x)),
- (v2i64 (shifts[1] (v2i64 V128:$vec), I32:$x))>;
+
+// Left shift by scalar: shl
+defm SHL : SIMDShiftInt<wasm_shl, "shl", 107>;
+
+// Right shift by scalar: shr_s / shr_u
+defm SHR_S : SIMDShiftInt<wasm_shr_s, "shr_s", 108>;
+defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>;
//===----------------------------------------------------------------------===//
// Integer binary arithmetic
//===----------------------------------------------------------------------===//
+multiclass SIMDBinaryIntNoI8x16<SDNode node, string name, bits<32> baseInst> {
+ defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
+ defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
+ defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
+}
+
multiclass SIMDBinaryIntSmall<SDNode node, string name, bits<32> baseInst> {
defm "" : SIMDBinary<v16i8, "i8x16", node, name, baseInst>;
- defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 17)>;
+ defm "" : SIMDBinary<v8i16, "i16x8", node, name, !add(baseInst, 32)>;
}
multiclass SIMDBinaryIntNoI64x2<SDNode node, string name, bits<32> baseInst> {
defm "" : SIMDBinaryIntSmall<node, name, baseInst>;
- defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 34)>;
+ defm "" : SIMDBinary<v4i32, "i32x4", node, name, !add(baseInst, 64)>;
}
multiclass SIMDBinaryInt<SDNode node, string name, bits<32> baseInst> {
defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>;
- defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 51)>;
+ defm "" : SIMDBinary<v2i64, "i64x2", node, name, !add(baseInst, 96)>;
}
// Integer addition: add / add_saturate_s / add_saturate_u
let isCommutable = 1 in {
-defm ADD : SIMDBinaryInt<add, "add", 87>;
-defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 88>;
-defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 89>;
+defm ADD : SIMDBinaryInt<add, "add", 110>;
+defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_saturate_s", 111>;
+defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_saturate_u", 112>;
} // isCommutable = 1
// Integer subtraction: sub / sub_saturate_s / sub_saturate_u
-defm SUB : SIMDBinaryInt<sub, "sub", 90>;
+defm SUB : SIMDBinaryInt<sub, "sub", 113>;
defm SUB_SAT_S :
- SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 91>;
+ SIMDBinaryIntSmall<int_wasm_sub_saturate_signed, "sub_saturate_s", 114>;
defm SUB_SAT_U :
- SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 92>;
+ SIMDBinaryIntSmall<int_wasm_sub_saturate_unsigned, "sub_saturate_u", 115>;
// Integer multiplication: mul
let isCommutable = 1 in
-defm MUL : SIMDBinaryIntNoI64x2<mul, "mul", 93>;
+defm MUL : SIMDBinaryIntNoI8x16<mul, "mul", 117>;
// Integer min_s / min_u / max_s / max_u
let isCommutable = 1 in {
-defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 94>;
-defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 95>;
-defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 96>;
-defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 97>;
+defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 118>;
+defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 119>;
+defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 120>;
+defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>;
} // isCommutable = 1
// Integer unsigned rounding average: avgr_u
-let isCommutable = 1, Predicates = [HasUnimplementedSIMD128] in {
-defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 217>;
-defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 218>;
+let isCommutable = 1 in {
+defm AVGR_U : SIMDBinary<v16i8, "i8x16", int_wasm_avgr_unsigned, "avgr_u", 123>;
+defm AVGR_U : SIMDBinary<v8i16, "i16x8", int_wasm_avgr_unsigned, "avgr_u", 155>;
}
def add_nuw : PatFrag<(ops node:$lhs, node:$rhs),
@@ -749,12 +745,12 @@ def add_nuw : PatFrag<(ops node:$lhs, node:$rhs),
"return N->getFlags().hasNoUnsignedWrap();">;
foreach nodes = [[v16i8, splat16], [v8i16, splat8]] in
-def : Pat<(srl
+def : Pat<(wasm_shr_u
(add_nuw
(add_nuw (nodes[0] V128:$lhs), (nodes[0] V128:$rhs)),
(nodes[1] (i32 1))
),
- (nodes[0] (nodes[1] (i32 1)))
+ (i32 1)
),
(!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>;
@@ -763,7 +759,7 @@ let isCommutable = 1 in
defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
[(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))],
"i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
- 219>;
+ 180>;
//===----------------------------------------------------------------------===//
// Floating-point unary arithmetic
@@ -771,18 +767,27 @@ defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {
defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>;
- defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 11)>;
+ defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 12)>;
}
// Absolute value: abs
-defm ABS : SIMDUnaryFP<fabs, "abs", 149>;
+defm ABS : SIMDUnaryFP<fabs, "abs", 224>;
// Negation: neg
-defm NEG : SIMDUnaryFP<fneg, "neg", 150>;
+defm NEG : SIMDUnaryFP<fneg, "neg", 225>;
// Square root: sqrt
-let Predicates = [HasUnimplementedSIMD128] in
-defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 151>;
+defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>;
+
+// Rounding: ceil, floor, trunc, nearest
+defm CEIL : SIMDUnary<v4f32, "f32x4", int_wasm_ceil, "ceil", 216>;
+defm FLOOR : SIMDUnary<v4f32, "f32x4", int_wasm_floor, "floor", 217>;
+defm TRUNC: SIMDUnary<v4f32, "f32x4", int_wasm_trunc, "trunc", 218>;
+defm NEAREST: SIMDUnary<v4f32, "f32x4", int_wasm_nearest, "nearest", 219>;
+defm CEIL : SIMDUnary<v2f64, "f64x2", int_wasm_ceil, "ceil", 220>;
+defm FLOOR : SIMDUnary<v2f64, "f64x2", int_wasm_floor, "floor", 221>;
+defm TRUNC: SIMDUnary<v2f64, "f64x2", int_wasm_trunc, "trunc", 222>;
+defm NEAREST: SIMDUnary<v2f64, "f64x2", int_wasm_nearest, "nearest", 223>;
//===----------------------------------------------------------------------===//
// Floating-point binary arithmetic
@@ -790,29 +795,34 @@ defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 151>;
multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> {
defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>;
- defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 11)>;
+ defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 12)>;
}
// Addition: add
let isCommutable = 1 in
-defm ADD : SIMDBinaryFP<fadd, "add", 154>;
+defm ADD : SIMDBinaryFP<fadd, "add", 228>;
// Subtraction: sub
-defm SUB : SIMDBinaryFP<fsub, "sub", 155>;
+defm SUB : SIMDBinaryFP<fsub, "sub", 229>;
// Multiplication: mul
let isCommutable = 1 in
-defm MUL : SIMDBinaryFP<fmul, "mul", 156>;
+defm MUL : SIMDBinaryFP<fmul, "mul", 230>;
// Division: div
-let Predicates = [HasUnimplementedSIMD128] in
-defm DIV : SIMDBinaryFP<fdiv, "div", 157>;
+defm DIV : SIMDBinaryFP<fdiv, "div", 231>;
// NaN-propagating minimum: min
-defm MIN : SIMDBinaryFP<fminimum, "min", 158>;
+defm MIN : SIMDBinaryFP<fminimum, "min", 232>;
// NaN-propagating maximum: max
-defm MAX : SIMDBinaryFP<fmaximum, "max", 159>;
+defm MAX : SIMDBinaryFP<fmaximum, "max", 233>;
+
+// Pseudo-minimum: pmin
+defm PMIN : SIMDBinaryFP<int_wasm_pmin, "pmin", 234>;
+
+// Pseudo-maximum: pmax
+defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>;
//===----------------------------------------------------------------------===//
// Conversions
@@ -826,17 +836,13 @@ multiclass SIMDConvert<ValueType vec_t, ValueType arg_t, SDNode op,
name#"\t$dst, $vec", name, simdop>;
}
-// Integer to floating point: convert
-defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 175>;
-defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 176>;
-defm "" : SIMDConvert<v2f64, v2i64, sint_to_fp, "f64x2.convert_i64x2_s", 177>;
-defm "" : SIMDConvert<v2f64, v2i64, uint_to_fp, "f64x2.convert_i64x2_u", 178>;
-
// Floating point to integer with saturation: trunc_sat
-defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 171>;
-defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 172>;
-defm "" : SIMDConvert<v2i64, v2f64, fp_to_sint, "i64x2.trunc_sat_f64x2_s", 173>;
-defm "" : SIMDConvert<v2i64, v2f64, fp_to_uint, "i64x2.trunc_sat_f64x2_u", 174>;
+defm "" : SIMDConvert<v4i32, v4f32, fp_to_sint, "i32x4.trunc_sat_f32x4_s", 248>;
+defm "" : SIMDConvert<v4i32, v4f32, fp_to_uint, "i32x4.trunc_sat_f32x4_u", 249>;
+
+// Integer to floating point: convert
+defm "" : SIMDConvert<v4f32, v4i32, sint_to_fp, "f32x4.convert_i32x4_s", 250>;
+defm "" : SIMDConvert<v4f32, v4i32, uint_to_fp, "f32x4.convert_i32x4_u", 251>;
// Widening operations
multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg,
@@ -851,8 +857,8 @@ multiclass SIMDWiden<ValueType vec_t, string vec, ValueType arg_t, string arg,
vec#".widen_high_"#arg#"_u", !add(baseInst, 3)>;
}
-defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 202>;
-defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 206>;
+defm "" : SIMDWiden<v8i16, "i16x8", v16i8, "i8x16", 135>;
+defm "" : SIMDWiden<v4i32, "i32x4", v8i16, "i16x8", 167>;
// Narrowing operations
multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg,
@@ -871,18 +877,14 @@ multiclass SIMDNarrow<ValueType vec_t, string vec, ValueType arg_t, string arg,
!add(baseInst, 1)>;
}
-defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 198>;
-defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 200>;
+defm "" : SIMDNarrow<v16i8, "i8x16", v8i16, "i16x8", 101>;
+defm "" : SIMDNarrow<v8i16, "i16x8", v4i32, "i32x4", 133>;
// Lower llvm.wasm.trunc.saturate.* to saturating instructions
def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
(fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>;
def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
(fp_to_uint_v4i32_v4f32 (v4f32 V128:$src))>;
-def : Pat<(v2i64 (int_wasm_trunc_saturate_signed (v2f64 V128:$src))),
- (fp_to_sint_v2i64_v2f64 (v2f64 V128:$src))>;
-def : Pat<(v2i64 (int_wasm_trunc_saturate_unsigned (v2f64 V128:$src))),
- (fp_to_uint_v2i64_v2f64 (v2f64 V128:$src))>;
// Bitcasts are nops
// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
@@ -914,5 +916,5 @@ multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> baseInst> {
vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>;
}
-defm "" : SIMDQFM<v4f32, "f32x4", 0x98>;
-defm "" : SIMDQFM<v2f64, "f64x2", 0xa3>;
+defm "" : SIMDQFM<v4f32, "f32x4", 252>;
+defm "" : SIMDQFM<v2f64, "f64x2", 254>;
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index 75d04252cbe9..346938daf1aa 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-late-eh-prepare"
@@ -31,12 +32,16 @@ class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
}
bool runOnMachineFunction(MachineFunction &MF) override;
+ void recordCatchRetBBs(MachineFunction &MF);
bool addCatches(MachineFunction &MF);
bool replaceFuncletReturns(MachineFunction &MF);
bool removeUnnecessaryUnreachables(MachineFunction &MF);
bool addExceptionExtraction(MachineFunction &MF);
bool restoreStackPointer(MachineFunction &MF);
+ MachineBasicBlock *getMatchingEHPad(MachineInstr *MI);
+ SmallSet<MachineBasicBlock *, 8> CatchRetBBs;
+
public:
static char ID; // Pass identification, replacement for typeid
WebAssemblyLateEHPrepare() : MachineFunctionPass(ID) {}
@@ -57,7 +62,8 @@ FunctionPass *llvm::createWebAssemblyLateEHPrepare() {
// possible search paths should be the same.
// Returns nullptr in case it does not find any EH pad in the search, or finds
// multiple different EH pads.
-static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) {
+MachineBasicBlock *
+WebAssemblyLateEHPrepare::getMatchingEHPad(MachineInstr *MI) {
MachineFunction *MF = MI->getParent()->getParent();
SmallVector<MachineBasicBlock *, 2> WL;
SmallPtrSet<MachineBasicBlock *, 2> Visited;
@@ -76,7 +82,9 @@ static MachineBasicBlock *getMatchingEHPad(MachineInstr *MI) {
}
if (MBB == &MF->front())
return nullptr;
- WL.append(MBB->pred_begin(), MBB->pred_end());
+ for (auto *Pred : MBB->predecessors())
+ if (!CatchRetBBs.count(Pred)) // We don't go into child scopes
+ WL.push_back(Pred);
}
return EHPad;
}
@@ -110,6 +118,7 @@ bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
if (MF.getFunction().hasPersonalityFn()) {
+ recordCatchRetBBs(MF);
Changed |= addCatches(MF);
Changed |= replaceFuncletReturns(MF);
}
@@ -121,6 +130,21 @@ bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+// Record which BB ends with 'CATCHRET' instruction, because this will be
+// replaced with BRs later. This set of 'CATCHRET' BBs is necessary in
+// 'getMatchingEHPad' function.
+void WebAssemblyLateEHPrepare::recordCatchRetBBs(MachineFunction &MF) {
+ CatchRetBBs.clear();
+ for (auto &MBB : MF) {
+ auto Pos = MBB.getFirstTerminator();
+ if (Pos == MBB.end())
+ continue;
+ MachineInstr *TI = &*Pos;
+ if (TI->getOpcode() == WebAssembly::CATCHRET)
+ CatchRetBBs.insert(&MBB);
+ }
+}
+
// Add catch instruction to beginning of catchpads and cleanuppads.
bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) {
bool Changed = false;
@@ -343,7 +367,7 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) {
"There is no __clang_call_terminate() function");
Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
BuildMI(ElseMBB, DL, TII.get(WebAssembly::CONST_I32), Reg).addImm(0);
- BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL_VOID))
+ BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL))
.addGlobalAddress(ClangCallTerminateFn)
.addReg(Reg);
BuildMI(ElseMBB, DL, TII.get(WebAssembly::UNREACHABLE));
@@ -384,8 +408,8 @@ bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) {
++InsertPos;
if (InsertPos->getOpcode() == WebAssembly::CATCH)
++InsertPos;
- FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos,
- MBB.begin()->getDebugLoc());
+ FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB,
+ InsertPos, MBB.begin()->getDebugLoc());
}
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 4314aa611549..01b3aa887738 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -191,7 +191,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
Register Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp)
.addReg(Cond);
- MFI.stackifyVReg(Tmp);
+ MFI.stackifyVReg(MRI, Tmp);
Cond = Tmp;
Inverted = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 3e905c18fa3b..5fce4a600510 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -208,7 +208,8 @@
///===----------------------------------------------------------------------===//
#include "WebAssembly.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CommandLine.h"
@@ -220,10 +221,10 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-lower-em-ehsjlj"
static cl::list<std::string>
- EHWhitelist("emscripten-cxx-exceptions-whitelist",
+ EHAllowlist("emscripten-cxx-exceptions-allowed",
cl::desc("The list of function names in which Emscripten-style "
"exception handling is enabled (see emscripten "
- "EMSCRIPTEN_CATCHING_WHITELIST options)"),
+ "EMSCRIPTEN_CATCHING_ALLOWED options)"),
cl::CommaSeparated);
namespace {
@@ -247,8 +248,8 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
DenseMap<int, Function *> FindMatchingCatches;
// Map of <function signature string, invoke_ wrappers>
StringMap<Function *> InvokeWrappers;
- // Set of whitelisted function names for exception handling
- std::set<std::string> EHWhitelistSet;
+ // Set of allowed function names for exception handling
+ std::set<std::string> EHAllowlistSet;
StringRef getPassName() const override {
return "WebAssembly Lower Emscripten Exceptions";
@@ -258,13 +259,13 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
bool runSjLjOnFunction(Function &F);
Function *getFindMatchingCatch(Module &M, unsigned NumClauses);
- template <typename CallOrInvoke> Value *wrapInvoke(CallOrInvoke *CI);
- void wrapTestSetjmp(BasicBlock *BB, Instruction *InsertPt, Value *Threw,
+ Value *wrapInvoke(CallBase *CI);
+ void wrapTestSetjmp(BasicBlock *BB, DebugLoc DL, Value *Threw,
Value *SetjmpTable, Value *SetjmpTableSize, Value *&Label,
Value *&LongjmpResult, BasicBlock *&EndBB);
- template <typename CallOrInvoke> Function *getInvokeWrapper(CallOrInvoke *CI);
+ Function *getInvokeWrapper(CallBase *CI);
- bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); }
+ bool areAllExceptionsAllowed() const { return EHAllowlistSet.empty(); }
bool canLongjmp(Module &M, const Value *Callee) const;
bool isEmAsmCall(Module &M, const Value *Callee) const;
@@ -275,7 +276,7 @@ public:
WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true)
: ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj) {
- EHWhitelistSet.insert(EHWhitelist.begin(), EHWhitelist.end());
+ EHAllowlistSet.insert(EHAllowlist.begin(), EHAllowlist.end());
}
bool runOnModule(Module &M) override;
@@ -337,13 +338,31 @@ static std::string getSignature(FunctionType *FTy) {
if (FTy->isVarArg())
OS << "_...";
Sig = OS.str();
- Sig.erase(remove_if(Sig, isspace), Sig.end());
+ Sig.erase(remove_if(Sig, isSpace), Sig.end());
// When s2wasm parses .s file, a comma means the end of an argument. So a
// mangled function name can contain any character but a comma.
std::replace(Sig.begin(), Sig.end(), ',', '.');
return Sig;
}
+static Function *getEmscriptenFunction(FunctionType *Ty, const Twine &Name,
+ Module *M) {
+ Function* F = Function::Create(Ty, GlobalValue::ExternalLinkage, Name, M);
+ // Tell the linker that this function is expected to be imported from the
+ // 'env' module.
+ if (!F->hasFnAttribute("wasm-import-module")) {
+ llvm::AttrBuilder B;
+ B.addAttribute("wasm-import-module", "env");
+ F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ }
+ if (!F->hasFnAttribute("wasm-import-name")) {
+ llvm::AttrBuilder B;
+ B.addAttribute("wasm-import-name", F->getName());
+ F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ }
+ return F;
+}
+
// Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2.
// This is because a landingpad instruction contains two more arguments, a
// personality function and a cleanup bit, and __cxa_find_matching_catch_N
@@ -357,9 +376,8 @@ WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M,
PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
SmallVector<Type *, 16> Args(NumClauses, Int8PtrTy);
FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false);
- Function *F = Function::Create(
- FTy, GlobalValue::ExternalLinkage,
- "__cxa_find_matching_catch_" + Twine(NumClauses + 2), &M);
+ Function *F = getEmscriptenFunction(
+ FTy, "__cxa_find_matching_catch_" + Twine(NumClauses + 2), &M);
FindMatchingCatches[NumClauses] = F;
return F;
}
@@ -371,15 +389,14 @@ WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M,
// %__THREW__.val = __THREW__; __THREW__ = 0;
// Returns %__THREW__.val, which indicates whether an exception is thrown (or
// whether longjmp occurred), for future use.
-template <typename CallOrInvoke>
-Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
+Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
LLVMContext &C = CI->getModule()->getContext();
// If we are calling a function that is noreturn, we must remove that
// attribute. The code we insert here does expect it to return, after we
// catch the exception.
if (CI->doesNotReturn()) {
- if (auto *F = dyn_cast<Function>(CI->getCalledValue()))
+ if (auto *F = CI->getCalledFunction())
F->removeFnAttr(Attribute::NoReturn);
CI->removeAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
}
@@ -395,7 +412,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
SmallVector<Value *, 16> Args;
// Put the pointer to the callee as first argument, so it can be called
// within the invoke wrapper later
- Args.push_back(CI->getCalledValue());
+ Args.push_back(CI->getCalledOperand());
Args.append(CI->arg_begin(), CI->arg_end());
CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args);
NewCall->takeName(CI);
@@ -443,18 +460,10 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
}
// Get matching invoke wrapper based on callee signature
-template <typename CallOrInvoke>
-Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) {
+Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) {
Module *M = CI->getModule();
SmallVector<Type *, 16> ArgTys;
- Value *Callee = CI->getCalledValue();
- FunctionType *CalleeFTy;
- if (auto *F = dyn_cast<Function>(Callee))
- CalleeFTy = F->getFunctionType();
- else {
- auto *CalleeTy = cast<PointerType>(Callee->getType())->getElementType();
- CalleeFTy = cast<FunctionType>(CalleeTy);
- }
+ FunctionType *CalleeFTy = CI->getFunctionType();
std::string Sig = getSignature(CalleeFTy);
if (InvokeWrappers.find(Sig) != InvokeWrappers.end())
@@ -467,8 +476,7 @@ Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) {
FunctionType *FTy = FunctionType::get(CalleeFTy->getReturnType(), ArgTys,
CalleeFTy->isVarArg());
- Function *F =
- Function::Create(FTy, GlobalValue::ExternalLinkage, "__invoke_" + Sig, M);
+ Function *F = getEmscriptenFunction(FTy, "__invoke_" + Sig, M);
InvokeWrappers[Sig] = F;
return F;
}
@@ -538,13 +546,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M,
// As output parameters. returns %label, %longjmp_result, and the BB the last
// instruction (%longjmp_result = ...) is in.
void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
- BasicBlock *BB, Instruction *InsertPt, Value *Threw, Value *SetjmpTable,
+ BasicBlock *BB, DebugLoc DL, Value *Threw, Value *SetjmpTable,
Value *SetjmpTableSize, Value *&Label, Value *&LongjmpResult,
BasicBlock *&EndBB) {
Function *F = BB->getParent();
LLVMContext &C = BB->getModule()->getContext();
IRBuilder<> IRB(C);
- IRB.SetInsertPoint(InsertPt);
+ IRB.SetCurrentDebugLocation(DL);
// if (%__THREW__.val != 0 & threwValue != 0)
IRB.SetInsertPoint(BB);
@@ -639,12 +647,11 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
// exception handling and setjmp/longjmp handling
ThrewGV = getGlobalVariableI32(M, IRB, "__THREW__");
ThrewValueGV = getGlobalVariableI32(M, IRB, "__threwValue");
- GetTempRet0Func =
- Function::Create(FunctionType::get(IRB.getInt32Ty(), false),
- GlobalValue::ExternalLinkage, "getTempRet0", &M);
- SetTempRet0Func = Function::Create(
+ GetTempRet0Func = getEmscriptenFunction(
+ FunctionType::get(IRB.getInt32Ty(), false), "getTempRet0", &M);
+ SetTempRet0Func = getEmscriptenFunction(
FunctionType::get(IRB.getVoidTy(), IRB.getInt32Ty(), false),
- GlobalValue::ExternalLinkage, "setTempRet0", &M);
+ "setTempRet0", &M);
GetTempRet0Func->setDoesNotThrow();
SetTempRet0Func->setDoesNotThrow();
@@ -655,14 +662,12 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
// Register __resumeException function
FunctionType *ResumeFTy =
FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false);
- ResumeF = Function::Create(ResumeFTy, GlobalValue::ExternalLinkage,
- "__resumeException", &M);
+ ResumeF = getEmscriptenFunction(ResumeFTy, "__resumeException", &M);
// Register llvm_eh_typeid_for function
FunctionType *EHTypeIDTy =
FunctionType::get(IRB.getInt32Ty(), IRB.getInt8PtrTy(), false);
- EHTypeIDF = Function::Create(EHTypeIDTy, GlobalValue::ExternalLinkage,
- "llvm_eh_typeid_for", &M);
+ EHTypeIDF = getEmscriptenFunction(EHTypeIDTy, "llvm_eh_typeid_for", &M);
for (Function &F : M) {
if (F.isDeclaration())
@@ -678,34 +683,30 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
if (LongjmpF) {
// Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is
// defined in JS code
- EmLongjmpJmpbufF = Function::Create(LongjmpF->getFunctionType(),
- GlobalValue::ExternalLinkage,
- "emscripten_longjmp_jmpbuf", &M);
-
+ EmLongjmpJmpbufF = getEmscriptenFunction(LongjmpF->getFunctionType(),
+ "emscripten_longjmp_jmpbuf", &M);
LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF);
}
if (SetjmpF) {
// Register saveSetjmp function
FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
- SmallVector<Type *, 4> Params = {SetjmpFTy->getParamType(0),
- IRB.getInt32Ty(), Type::getInt32PtrTy(C),
- IRB.getInt32Ty()};
FunctionType *FTy =
- FunctionType::get(Type::getInt32PtrTy(C), Params, false);
- SaveSetjmpF =
- Function::Create(FTy, GlobalValue::ExternalLinkage, "saveSetjmp", &M);
+ FunctionType::get(Type::getInt32PtrTy(C),
+ {SetjmpFTy->getParamType(0), IRB.getInt32Ty(),
+ Type::getInt32PtrTy(C), IRB.getInt32Ty()},
+ false);
+ SaveSetjmpF = getEmscriptenFunction(FTy, "saveSetjmp", &M);
// Register testSetjmp function
- Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()};
- FTy = FunctionType::get(IRB.getInt32Ty(), Params, false);
- TestSetjmpF =
- Function::Create(FTy, GlobalValue::ExternalLinkage, "testSetjmp", &M);
+ FTy = FunctionType::get(
+ IRB.getInt32Ty(),
+ {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()}, false);
+ TestSetjmpF = getEmscriptenFunction(FTy, "testSetjmp", &M);
FTy = FunctionType::get(IRB.getVoidTy(),
{IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
- EmLongjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
- "emscripten_longjmp", &M);
+ EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M);
// Only traverse functions that uses setjmp in order not to insert
// unnecessary prep / cleanup code in every function
@@ -744,8 +745,8 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
bool Changed = false;
SmallVector<Instruction *, 64> ToErase;
SmallPtrSet<LandingPadInst *, 32> LandingPads;
- bool AllowExceptions =
- areAllExceptionsAllowed() || EHWhitelistSet.count(F.getName());
+ bool AllowExceptions = areAllExceptionsAllowed() ||
+ EHAllowlistSet.count(std::string(F.getName()));
for (BasicBlock &BB : F) {
auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
@@ -755,7 +756,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
LandingPads.insert(II->getLandingPadInst());
IRB.SetInsertPoint(II);
- bool NeedInvoke = AllowExceptions && canThrow(II->getCalledValue());
+ bool NeedInvoke = AllowExceptions && canThrow(II->getCalledOperand());
if (NeedInvoke) {
// Wrap invoke with invoke wrapper and generate preamble/postamble
Value *Threw = wrapInvoke(II);
@@ -770,7 +771,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
// call+branch
SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end());
CallInst *NewCall =
- IRB.CreateCall(II->getFunctionType(), II->getCalledValue(), Args);
+ IRB.CreateCall(II->getFunctionType(), II->getCalledOperand(), Args);
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setDebugLoc(II->getDebugLoc());
@@ -874,6 +875,27 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
return Changed;
}
+// This tries to get debug info from the instruction before which a new
+// instruction will be inserted, and if there's no debug info in that
+// instruction, tries to get the info instead from the previous instruction (if
+// any). If none of these has debug info and a DISubprogram is provided, it
+// creates a dummy debug info with the first line of the function, because IR
+// verifier requires all inlinable callsites should have debug info when both a
+// caller and callee have DISubprogram. If none of these conditions are met,
+// returns empty info.
+static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore,
+ DISubprogram *SP) {
+ assert(InsertBefore);
+ if (InsertBefore->getDebugLoc())
+ return InsertBefore->getDebugLoc();
+ const Instruction *Prev = InsertBefore->getPrevNode();
+ if (Prev && Prev->getDebugLoc())
+ return Prev->getDebugLoc();
+ if (SP)
+ return DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
+ return DebugLoc();
+}
+
bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
Module &M = *F.getParent();
LLVMContext &C = F.getContext();
@@ -891,13 +913,22 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// this instruction to a constant 4, because this value will be used in
// SSAUpdater.AddAvailableValue(...) later.
BasicBlock &EntryBB = F.getEntryBlock();
+ DebugLoc FirstDL = getOrCreateDebugLoc(&*EntryBB.begin(), F.getSubprogram());
BinaryOperator *SetjmpTableSize = BinaryOperator::Create(
Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), "setjmpTableSize",
&*EntryBB.getFirstInsertionPt());
+ SetjmpTableSize->setDebugLoc(FirstDL);
// setjmpTable = (int *) malloc(40);
Instruction *SetjmpTable = CallInst::CreateMalloc(
SetjmpTableSize, IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32(40),
nullptr, nullptr, "setjmpTable");
+ SetjmpTable->setDebugLoc(FirstDL);
+ // CallInst::CreateMalloc may return a bitcast instruction if the result types
+ // mismatch. We need to set the debug loc for the original call too.
+ auto *MallocCall = SetjmpTable->stripPointerCasts();
+ if (auto *MallocCallI = dyn_cast<Instruction>(MallocCall)) {
+ MallocCallI->setDebugLoc(FirstDL);
+ }
// setjmpTable[0] = 0;
IRB.SetInsertPoint(SetjmpTableSize);
IRB.CreateStore(IRB.getInt32(0), SetjmpTable);
@@ -966,7 +997,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
if (!CI)
continue;
- const Value *Callee = CI->getCalledValue();
+ const Value *Callee = CI->getCalledOperand();
if (!canLongjmp(M, Callee))
continue;
if (isEmAsmCall(M, Callee))
@@ -1027,12 +1058,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
Value *Label = nullptr;
Value *LongjmpResult = nullptr;
BasicBlock *EndBB = nullptr;
- wrapTestSetjmp(BB, CI, Threw, SetjmpTable, SetjmpTableSize, Label,
- LongjmpResult, EndBB);
+ wrapTestSetjmp(BB, CI->getDebugLoc(), Threw, SetjmpTable, SetjmpTableSize,
+ Label, LongjmpResult, EndBB);
assert(Label && LongjmpResult && EndBB);
// Create switch instruction
IRB.SetInsertPoint(EndBB);
+ IRB.SetCurrentDebugLocation(EndBB->getInstList().back().getDebugLoc());
SwitchInst *SI = IRB.CreateSwitch(Label, Tail, SetjmpRetPHIs.size());
// -1 means no longjmp happened, continue normally (will hit the default
// switch case). 0 means a longjmp that is not ours to handle, needs a
@@ -1056,8 +1088,17 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// Free setjmpTable buffer before each return instruction
for (BasicBlock &BB : F) {
Instruction *TI = BB.getTerminator();
- if (isa<ReturnInst>(TI))
- CallInst::CreateFree(SetjmpTable, TI);
+ if (isa<ReturnInst>(TI)) {
+ DebugLoc DL = getOrCreateDebugLoc(TI, F.getSubprogram());
+ auto *Free = CallInst::CreateFree(SetjmpTable, TI);
+ Free->setDebugLoc(DL);
+ // CallInst::CreateFree may create a bitcast instruction if its argument
+ // types mismatch. We need to set the debug loc for the bitcast too.
+ if (auto *FreeCallI = dyn_cast<CallInst>(Free)) {
+ if (auto *BitCastI = dyn_cast<BitCastInst>(FreeCallI->getArgOperand(0)))
+ BitCastI->setDebugLoc(DL);
+ }
+ }
}
// Every call to saveSetjmp can change setjmpTable and setjmpTableSize
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
index 750b2233e67a..9ccbee819c35 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
@@ -76,9 +76,13 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
!ETy->getTypeAtIndex(2U)->isPointerTy())
return false; // Not (int, ptr, ptr).
- // Collect the contents of @llvm.global_dtors, collated by priority and
- // associated symbol.
- std::map<uint16_t, MapVector<Constant *, std::vector<Constant *>>> DtorFuncs;
+ // Collect the contents of @llvm.global_dtors, ordered by priority. Within a
+ // priority, sequences of destructors with the same associated object are
+ // recorded so that we can register them as a group.
+ std::map<
+ uint16_t,
+ std::vector<std::pair<Constant *, std::vector<Constant *>>>
+ > DtorFuncs;
for (Value *O : InitList->operands()) {
auto *CS = dyn_cast<ConstantStruct>(O);
if (!CS)
@@ -96,7 +100,14 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
Constant *Associated = CS->getOperand(2);
Associated = cast<Constant>(Associated->stripPointerCasts());
- DtorFuncs[PriorityValue][Associated].push_back(DtorFunc);
+ auto &AtThisPriority = DtorFuncs[PriorityValue];
+ if (AtThisPriority.empty() || AtThisPriority.back().first != Associated) {
+ std::vector<Constant *> NewList;
+ NewList.push_back(DtorFunc);
+ AtThisPriority.push_back(std::make_pair(Associated, NewList));
+ } else {
+ AtThisPriority.back().second.push_back(DtorFunc);
+ }
}
if (DtorFuncs.empty())
return false;
@@ -131,14 +142,19 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
// first function with __cxa_atexit.
for (auto &PriorityAndMore : DtorFuncs) {
uint16_t Priority = PriorityAndMore.first;
- for (auto &AssociatedAndMore : PriorityAndMore.second) {
+ uint64_t Id = 0;
+ auto &AtThisPriority = PriorityAndMore.second;
+ for (auto &AssociatedAndMore : AtThisPriority) {
Constant *Associated = AssociatedAndMore.first;
+ auto ThisId = Id++;
Function *CallDtors = Function::Create(
AtExitFuncTy, Function::PrivateLinkage,
"call_dtors" +
(Priority != UINT16_MAX ? (Twine(".") + Twine(Priority))
: Twine()) +
+ (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId)
+ : Twine()) +
(!Associated->isNullValue() ? (Twine(".") + Associated->getName())
: Twine()),
&M);
@@ -146,7 +162,7 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C),
/*isVarArg=*/false);
- for (auto Dtor : AssociatedAndMore.second)
+ for (auto Dtor : reverse(AssociatedAndMore.second))
CallInst::Create(VoidVoid, Dtor, "", BB);
ReturnInst::Create(C, BB);
@@ -155,6 +171,8 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
"register_call_dtors" +
(Priority != UINT16_MAX ? (Twine(".") + Twine(Priority))
: Twine()) +
+ (AtThisPriority.size() > 1 ? Twine("$") + Twine(ThisId)
+ : Twine()) +
(!Associated->isNullValue() ? (Twine(".") + Associated->getName())
: Twine()),
&M);
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 59c10243c545..304dca2ebfe4 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -13,10 +13,11 @@
//===----------------------------------------------------------------------===//
#include "WebAssemblyMCInstLower.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "WebAssemblyAsmPrinter.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyRuntimeLibcallSignatures.h"
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Constants.h"
@@ -29,11 +30,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-// Defines llvm::WebAssembly::getStackOpcode to convert register instructions to
-// stack instructions
-#define GET_INSTRMAP_INFO 1
-#include "WebAssemblyGenInstrInfo.inc"
-
// This disables the removal of registers when lowering into MC, as required
// by some current tests.
cl::opt<bool>
@@ -56,7 +52,8 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
SmallVector<MVT, 1> ResultMVTs;
SmallVector<MVT, 4> ParamMVTs;
- computeSignatureVTs(FuncTy, CurrentFunc, TM, ParamMVTs, ResultMVTs);
+ const auto *const F = dyn_cast<Function>(Global);
+ computeSignatureVTs(FuncTy, F, CurrentFunc, TM, ParamMVTs, ResultMVTs);
auto Signature = signatureFromMVTs(ResultMVTs, ParamMVTs);
WasmSym->setSignature(Signature.get());
@@ -84,8 +81,9 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0;
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
WasmSym->setGlobalType(wasm::WasmGlobalType{
- uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64
- : wasm::WASM_TYPE_I32),
+ uint8_t(Subtarget.hasAddr64() && strcmp(Name, "__table_base") != 0
+ ? wasm::WASM_TYPE_I64
+ : wasm::WASM_TYPE_I32),
Mutable});
return WasmSym;
}
@@ -208,6 +206,7 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
OutMI.setOpcode(MI->getOpcode());
const MCInstrDesc &Desc = MI->getDesc();
+ unsigned NumVariadicDefs = MI->getNumExplicitDefs() - Desc.getNumDefs();
for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI->getOperand(I);
@@ -229,9 +228,10 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
MCOp = MCOperand::createReg(WAReg);
break;
}
- case MachineOperand::MO_Immediate:
- if (I < Desc.NumOperands) {
- const MCOperandInfo &Info = Desc.OpInfo[I];
+ case MachineOperand::MO_Immediate: {
+ unsigned DescIndex = I - NumVariadicDefs;
+ if (DescIndex < Desc.NumOperands) {
+ const MCOperandInfo &Info = Desc.OpInfo[DescIndex];
if (Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
SmallVector<wasm::ValType, 4> Returns;
SmallVector<wasm::ValType, 4> Params;
@@ -270,6 +270,7 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
}
MCOp = MCOperand::createImm(MO.getImm());
break;
+ }
case MachineOperand::MO_FPImmediate: {
// TODO: MC converts all floating point immediate operands to double.
// This is fine for numeric values, but may cause NaNs to change bits.
@@ -306,13 +307,15 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
if (!WasmKeepRegisters)
removeRegisterOperands(MI, OutMI);
+ else if (Desc.variadicOpsAreDefs())
+ OutMI.insert(OutMI.begin(), MCOperand::createImm(MI->getNumExplicitDefs()));
}
static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI) {
// Remove all uses of stackified registers to bring the instruction format
// into its final stack form used thruout MC, and transition opcodes to
// their _S variant.
- // We do this seperate from the above code that still may need these
+ // We do this separate from the above code that still may need these
// registers for e.g. call_indirect signatures.
// See comments in lib/Target/WebAssembly/WebAssemblyInstrFormats.td for
// details.
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index e4cc2389147b..adee2f0553f9 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -16,14 +16,15 @@
#include "WebAssemblyISelLowering.h"
#include "WebAssemblySubtarget.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() = default; // anchor.
-void WebAssemblyFunctionInfo::initWARegs() {
+void WebAssemblyFunctionInfo::initWARegs(MachineRegisterInfo &MRI) {
assert(WARegs.empty());
unsigned Reg = UnusedReg;
- WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg);
+ WARegs.resize(MRI.getNumVirtRegs(), Reg);
}
void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM,
@@ -42,15 +43,17 @@ void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM,
}
}
-void llvm::computeSignatureVTs(const FunctionType *Ty, const Function &F,
+void llvm::computeSignatureVTs(const FunctionType *Ty,
+ const Function *TargetFunc,
+ const Function &ContextFunc,
const TargetMachine &TM,
SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results) {
- computeLegalValueVTs(F, TM, Ty->getReturnType(), Results);
+ computeLegalValueVTs(ContextFunc, TM, Ty->getReturnType(), Results);
MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits());
if (Results.size() > 1 &&
- !TM.getSubtarget<WebAssemblySubtarget>(F).hasMultivalue()) {
+ !TM.getSubtarget<WebAssemblySubtarget>(ContextFunc).hasMultivalue()) {
// WebAssembly can't lower returns of multiple values without demoting to
// sret unless multivalue is enabled (see
// WebAssemblyTargetLowering::CanLowerReturn). So replace multiple return
@@ -60,9 +63,28 @@ void llvm::computeSignatureVTs(const FunctionType *Ty, const Function &F,
}
for (auto *Param : Ty->params())
- computeLegalValueVTs(F, TM, Param, Params);
+ computeLegalValueVTs(ContextFunc, TM, Param, Params);
if (Ty->isVarArg())
Params.push_back(PtrVT);
+
+ // For swiftcc, emit additional swiftself and swifterror parameters
+ // if there aren't. These additional parameters are also passed for caller.
+ // They are necessary to match callee and caller signature for indirect
+ // call.
+
+ if (TargetFunc && TargetFunc->getCallingConv() == CallingConv::Swift) {
+ MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits());
+ bool HasSwiftErrorArg = false;
+ bool HasSwiftSelfArg = false;
+ for (const auto &Arg : TargetFunc->args()) {
+ HasSwiftErrorArg |= Arg.hasAttribute(Attribute::SwiftError);
+ HasSwiftSelfArg |= Arg.hasAttribute(Attribute::SwiftSelf);
+ }
+ if (!HasSwiftErrorArg)
+ Params.push_back(PtrVT);
+ if (!HasSwiftSelfArg)
+ Params.push_back(PtrVT);
+ }
}
void llvm::valTypesFromMVTs(const ArrayRef<MVT> &In,
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 16e2f4392984..ca164fdd182c 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -30,8 +30,6 @@ struct WebAssemblyFunctionInfo;
/// This class is derived from MachineFunctionInfo and contains private
/// WebAssembly-specific information for each MachineFunction.
class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
- MachineFunction &MF;
-
std::vector<MVT> Params;
std::vector<MVT> Results;
std::vector<MVT> Locals;
@@ -55,12 +53,18 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
// A virtual register holding the base pointer for functions that have
// overaligned values on the user stack.
unsigned BasePtrVreg = -1U;
+ // A virtual register holding the frame base. This is either FP or SP
+ // after it has been replaced by a vreg
+ unsigned FrameBaseVreg = -1U;
+ // The local holding the frame base. This is either FP or SP
+ // after WebAssemblyExplicitLocals
+ unsigned FrameBaseLocal = -1U;
// Function properties.
bool CFGStackified = false;
public:
- explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
+ explicit WebAssemblyFunctionInfo(MachineFunction &MF) {}
~WebAssemblyFunctionInfo() override;
void initializeBaseYamlFields(const yaml::WebAssemblyFunctionInfo &YamlMFI);
@@ -90,12 +94,25 @@ public:
assert(BasePtrVreg != -1U && "Base ptr vreg hasn't been set");
return BasePtrVreg;
}
+ void setFrameBaseVreg(unsigned Reg) { FrameBaseVreg = Reg; }
+ unsigned getFrameBaseVreg() const {
+ assert(FrameBaseVreg != -1U && "Frame base vreg hasn't been set");
+ return FrameBaseVreg;
+ }
+ void clearFrameBaseVreg() { FrameBaseVreg = -1U; }
+ // Return true if the frame base physreg has been replaced by a virtual reg.
+ bool isFrameBaseVirtual() const { return FrameBaseVreg != -1U; }
+ void setFrameBaseLocal(unsigned Local) { FrameBaseLocal = Local; }
+ unsigned getFrameBaseLocal() const {
+ assert(FrameBaseLocal != -1U && "Frame base local hasn't been set");
+ return FrameBaseLocal;
+ }
void setBasePointerVreg(unsigned Reg) { BasePtrVreg = Reg; }
static const unsigned UnusedReg = -1u;
- void stackifyVReg(unsigned VReg) {
- assert(MF.getRegInfo().getUniqueVRegDef(VReg));
+ void stackifyVReg(MachineRegisterInfo &MRI, unsigned VReg) {
+ assert(MRI.getUniqueVRegDef(VReg));
auto I = Register::virtReg2Index(VReg);
if (I >= VRegStackified.size())
VRegStackified.resize(I + 1);
@@ -113,7 +130,7 @@ public:
return VRegStackified.test(I);
}
- void initWARegs();
+ void initWARegs(MachineRegisterInfo &MRI);
void setWAReg(unsigned VReg, unsigned WAReg) {
assert(WAReg != UnusedReg);
auto I = Register::virtReg2Index(VReg);
@@ -140,9 +157,10 @@ void computeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty,
SmallVectorImpl<MVT> &ValueVTs);
// Compute the signature for a given FunctionType (Ty). Note that it's not the
-// signature for F (F is just used to get varous context)
-void computeSignatureVTs(const FunctionType *Ty, const Function &F,
- const TargetMachine &TM, SmallVectorImpl<MVT> &Params,
+// signature for ContextFunc (ContextFunc is just used to get varous context)
+void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc,
+ const Function &ContextFunc, const TargetMachine &TM,
+ SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results);
void valTypesFromMVTs(const ArrayRef<MVT> &In,
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
index ac428fcc826a..9aea65cba280 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
@@ -201,8 +201,7 @@ bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default:
break;
- case WebAssembly::CALL_i32:
- case WebAssembly::CALL_i64:
+ case WebAssembly::CALL:
Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo);
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 0bd30791e57c..a2da0ea849e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -20,6 +20,7 @@
//===----------------------------------------------------------------------===//
#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -82,10 +83,22 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
SmallVector<LiveInterval *, 4> SplitLIs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
unsigned Reg = Register::index2VirtReg(I);
+ auto &TRI = *MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
+
if (MRI.reg_nodbg_empty(Reg))
continue;
LIS.splitSeparateComponents(LIS.getInterval(Reg), SplitLIs);
+ if (Reg == TRI.getFrameRegister(MF) && SplitLIs.size() > 0) {
+ // The live interval for the frame register was split, resulting in a new
+ // VReg. For now we only support debug info output for a single frame base
+ // value for the function, so just use the last one. It will certainly be
+ // wrong for some part of the function, but until we are able to track
+ // values through live-range splitting and stackification, it will have to
+ // do.
+ MF.getInfo<WebAssemblyFunctionInfo>()->setFrameBaseVreg(
+ SplitLIs.back()->reg);
+ }
SplitLIs.clear();
}
@@ -103,5 +116,5 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
}
}
- return false;
+ return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 9b60596e42b4..96390de8f5e7 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -42,7 +42,7 @@ public:
static char ID;
OptimizeReturned() : FunctionPass(ID) {}
- void visitCallSite(CallSite CS);
+ void visitCallBase(CallBase &CB);
};
} // End anonymous namespace
@@ -55,17 +55,16 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
return new OptimizeReturned();
}
-void OptimizeReturned::visitCallSite(CallSite CS) {
- for (unsigned I = 0, E = CS.getNumArgOperands(); I < E; ++I)
- if (CS.paramHasAttr(I, Attribute::Returned)) {
- Instruction *Inst = CS.getInstruction();
- Value *Arg = CS.getArgOperand(I);
+void OptimizeReturned::visitCallBase(CallBase &CB) {
+ for (unsigned I = 0, E = CB.getNumArgOperands(); I < E; ++I)
+ if (CB.paramHasAttr(I, Attribute::Returned)) {
+ Value *Arg = CB.getArgOperand(I);
// Ignore constants, globals, undef, etc.
if (isa<Constant>(Arg))
continue;
// Like replaceDominatedUsesWith but using Instruction/Use dominance.
- Arg->replaceUsesWithIf(Inst,
- [&](Use &U) { return DT->dominates(Inst, U); });
+ Arg->replaceUsesWithIf(&CB,
+ [&](Use &U) { return DT->dominates(&CB, U); });
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index ea6cd09a604c..a587c9d23d2b 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -66,7 +66,7 @@ static bool maybeRewriteToDrop(unsigned OldReg, unsigned NewReg,
Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
MO.setReg(NewReg);
MO.setIsDead();
- MFI.stackifyVReg(NewReg);
+ MFI.stackifyVReg(MRI, NewReg);
}
return Changed;
}
@@ -121,7 +121,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg)
.addReg(Reg);
MO.setReg(NewReg);
- MFI.stackifyVReg(NewReg);
+ MFI.stackifyVReg(MRI, NewReg);
}
}
@@ -149,8 +149,7 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default:
break;
- case WebAssembly::CALL_i32:
- case WebAssembly::CALL_i64: {
+ case WebAssembly::CALL: {
MachineOperand &Op1 = MI.getOperand(1);
if (Op1.isSymbol()) {
StringRef Name(Op1.getSymbolName());
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index 043b6f1b7d18..20fe2b2b7bfc 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -157,6 +157,9 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
Changed |= Old != New;
UsedColors.set(Color);
Assignments[Color].push_back(LI);
+ // If we reassigned the stack pointer, update the debug frame base info.
+ if (Old != New && MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Old)
+ MFI.setFrameBaseVreg(New);
LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg)
<< " to vreg" << Register::virtReg2Index(New) << "\n");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
index 72e7a7cf5042..b655014f4a90 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -66,7 +66,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
- MFI.initWARegs();
+ MFI.initWARegs(MRI);
// WebAssembly argument registers are in the same index space as local
// variables. Assign the numbers for them first.
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 421d353a89e8..1d4e2e3a8f9e 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <iterator>
using namespace llvm;
#define DEBUG_TYPE "wasm-reg-stackify"
@@ -120,6 +121,7 @@ static void convertImplicitDefToConstZero(MachineInstr *MI,
Type::getDoubleTy(MF.getFunction().getContext())));
MI->addOperand(MachineOperand::CreateFPImm(Val));
} else if (RegClass == &WebAssembly::V128RegClass) {
+ // TODO: Replace this with v128.const 0 once that is supported in V8
Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32));
MI->addOperand(MachineOperand::CreateReg(TempReg, false));
@@ -135,12 +137,12 @@ static void convertImplicitDefToConstZero(MachineInstr *MI,
// Determine whether a call to the callee referenced by
// MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
// effects.
-static void queryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
- bool &Write, bool &Effects, bool &StackPointer) {
+static void queryCallee(const MachineInstr &MI, bool &Read, bool &Write,
+ bool &Effects, bool &StackPointer) {
// All calls can use the stack pointer.
StackPointer = true;
- const MachineOperand &MO = MI.getOperand(CalleeOpNo);
+ const MachineOperand &MO = WebAssembly::getCalleeOp(MI);
if (MO.isGlobal()) {
const Constant *GV = MO.getGlobal();
if (const auto *GA = dyn_cast<GlobalAlias>(GV))
@@ -246,14 +248,14 @@ static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
}
// Check for writes to __stack_pointer global.
- if (MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 &&
+ if ((MI.getOpcode() == WebAssembly::GLOBAL_SET_I32 ||
+ MI.getOpcode() == WebAssembly::GLOBAL_SET_I64) &&
strcmp(MI.getOperand(0).getSymbolName(), "__stack_pointer") == 0)
StackPointer = true;
// Analyze calls.
if (MI.isCall()) {
- unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI.getOpcode());
- queryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
+ queryCallee(MI, Read, Write, Effects, StackPointer);
}
}
@@ -313,25 +315,59 @@ static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
// walking the block.
// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
// more precise.
-static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
- AliasAnalysis &AA, const MachineRegisterInfo &MRI) {
- assert(Def->getParent() == Insert->getParent());
+static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use,
+ const MachineInstr *Insert, AliasAnalysis &AA,
+ const WebAssemblyFunctionInfo &MFI,
+ const MachineRegisterInfo &MRI) {
+ const MachineInstr *DefI = Def->getParent();
+ const MachineInstr *UseI = Use->getParent();
+ assert(DefI->getParent() == Insert->getParent());
+ assert(UseI->getParent() == Insert->getParent());
+
+ // The first def of a multivalue instruction can be stackified by moving,
+ // since the later defs can always be placed into locals if necessary. Later
+ // defs can only be stackified if all previous defs are already stackified
+ // since ExplicitLocals will not know how to place a def in a local if a
+ // subsequent def is stackified. But only one def can be stackified by moving
+ // the instruction, so it must be the first one.
+ //
+ // TODO: This could be loosened to be the first *live* def, but care would
+ // have to be taken to ensure the drops of the initial dead defs can be
+ // placed. This would require checking that no previous defs are used in the
+ // same instruction as subsequent defs.
+ if (Def != DefI->defs().begin())
+ return false;
+
+ // If any subsequent def is used prior to the current value by the same
+ // instruction in which the current value is used, we cannot
+ // stackify. Stackifying in this case would require that def moving below the
+ // current def in the stack, which cannot be achieved, even with locals.
+ for (const auto &SubsequentDef : drop_begin(DefI->defs(), 1)) {
+ for (const auto &PriorUse : UseI->uses()) {
+ if (&PriorUse == Use)
+ break;
+ if (PriorUse.isReg() && SubsequentDef.getReg() == PriorUse.getReg())
+ return false;
+ }
+ }
+
+ // If moving is a semantic nop, it is always allowed
+ const MachineBasicBlock *MBB = DefI->getParent();
+ auto NextI = std::next(MachineBasicBlock::const_iterator(DefI));
+ for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI)
+ ;
+ if (NextI == Insert)
+ return true;
// 'catch' and 'extract_exception' should be the first instruction of a BB and
// cannot move.
- if (Def->getOpcode() == WebAssembly::CATCH ||
- Def->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) {
- const MachineBasicBlock *MBB = Def->getParent();
- auto NextI = std::next(MachineBasicBlock::const_iterator(Def));
- for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI)
- ;
- if (NextI != Insert)
- return false;
- }
+ if (DefI->getOpcode() == WebAssembly::CATCH ||
+ DefI->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32)
+ return false;
// Check for register dependencies.
SmallVector<unsigned, 4> MutableRegisters;
- for (const MachineOperand &MO : Def->operands()) {
+ for (const MachineOperand &MO : DefI->operands()) {
if (!MO.isReg() || MO.isUndef())
continue;
Register Reg = MO.getReg();
@@ -361,7 +397,7 @@ static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
}
bool Read = false, Write = false, Effects = false, StackPointer = false;
- query(*Def, AA, Read, Write, Effects, StackPointer);
+ query(*DefI, AA, Read, Write, Effects, StackPointer);
// If the instruction does not access memory and has no side effects, it has
// no additional dependencies.
@@ -369,8 +405,8 @@ static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters)
return true;
- // Scan through the intervening instructions between Def and Insert.
- MachineBasicBlock::const_iterator D(Def), I(Insert);
+ // Scan through the intervening instructions between DefI and Insert.
+ MachineBasicBlock::const_iterator D(DefI), I(Insert);
for (--I; I != D; --I) {
bool InterveningRead = false;
bool InterveningWrite = false;
@@ -495,7 +531,7 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
// No one else is using this register for anything so we can just stackify
// it in place.
- MFI.stackifyVReg(Reg);
+ MFI.stackifyVReg(MRI, Reg);
} else {
// The register may have unrelated uses or defs; create a new register for
// just our one def and use so that we can stackify it.
@@ -512,7 +548,7 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
/*RemoveDeadValNo=*/true);
- MFI.stackifyVReg(NewReg);
+ MFI.stackifyVReg(MRI, NewReg);
DefDIs.updateReg(NewReg);
@@ -541,7 +577,7 @@ static MachineInstr *rematerializeCheapDef(
MachineInstr *Clone = &*std::prev(Insert);
LIS.InsertMachineInstrInMaps(*Clone);
LIS.createAndComputeVirtRegInterval(NewReg);
- MFI.stackifyVReg(NewReg);
+ MFI.stackifyVReg(MRI, NewReg);
imposeStackOrdering(Clone);
LLVM_DEBUG(dbgs() << " - Cloned to "; Clone->dump());
@@ -632,8 +668,8 @@ static MachineInstr *moveAndTeeForMultiUse(
// Finish stackifying the new regs.
LIS.createAndComputeVirtRegInterval(TeeReg);
LIS.createAndComputeVirtRegInterval(DefReg);
- MFI.stackifyVReg(DefReg);
- MFI.stackifyVReg(TeeReg);
+ MFI.stackifyVReg(MRI, DefReg);
+ MFI.stackifyVReg(MRI, TeeReg);
imposeStackOrdering(Def);
imposeStackOrdering(Tee);
@@ -801,32 +837,32 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
CommutingState Commuting;
TreeWalkerState TreeWalker(Insert);
while (!TreeWalker.done()) {
- MachineOperand &Op = TreeWalker.pop();
+ MachineOperand &Use = TreeWalker.pop();
// We're only interested in explicit virtual register operands.
- if (!Op.isReg())
+ if (!Use.isReg())
continue;
- Register Reg = Op.getReg();
- assert(Op.isUse() && "explicit_uses() should only iterate over uses");
- assert(!Op.isImplicit() &&
+ Register Reg = Use.getReg();
+ assert(Use.isUse() && "explicit_uses() should only iterate over uses");
+ assert(!Use.isImplicit() &&
"explicit_uses() should only iterate over explicit operands");
if (Register::isPhysicalRegister(Reg))
continue;
// Identify the definition for this register at this point.
- MachineInstr *Def = getVRegDef(Reg, Insert, MRI, LIS);
- if (!Def)
+ MachineInstr *DefI = getVRegDef(Reg, Insert, MRI, LIS);
+ if (!DefI)
continue;
// Don't nest an INLINE_ASM def into anything, because we don't have
// constraints for $pop outputs.
- if (Def->isInlineAsm())
+ if (DefI->isInlineAsm())
continue;
// Argument instructions represent live-in registers and not real
// instructions.
- if (WebAssembly::isArgument(Def->getOpcode()))
+ if (WebAssembly::isArgument(DefI->getOpcode()))
continue;
// Currently catch's return value register cannot be stackified, because
@@ -843,28 +879,38 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// register should be assigned to a local to be propagated across
// 'block' boundary now.
//
- // TODO Fix this once we support the multi-value proposal.
- if (Def->getOpcode() == WebAssembly::CATCH)
+ // TODO: Fix this once we support the multivalue blocks
+ if (DefI->getOpcode() == WebAssembly::CATCH)
continue;
+ MachineOperand *Def = DefI->findRegisterDefOperand(Reg);
+ assert(Def != nullptr);
+
// Decide which strategy to take. Prefer to move a single-use value
// over cloning it, and prefer cloning over introducing a tee.
// For moving, we require the def to be in the same block as the use;
// this makes things simpler (LiveIntervals' handleMove function only
// supports intra-block moves) and it's MachineSink's job to catch all
// the sinking opportunities anyway.
- bool SameBlock = Def->getParent() == &MBB;
- bool CanMove = SameBlock && isSafeToMove(Def, Insert, AA, MRI) &&
+ bool SameBlock = DefI->getParent() == &MBB;
+ bool CanMove = SameBlock &&
+ isSafeToMove(Def, &Use, Insert, AA, MFI, MRI) &&
!TreeWalker.isOnStack(Reg);
- if (CanMove && hasOneUse(Reg, Def, MRI, MDT, LIS)) {
- Insert = moveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
- } else if (shouldRematerialize(*Def, AA, TII)) {
+ if (CanMove && hasOneUse(Reg, DefI, MRI, MDT, LIS)) {
+ Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI);
+
+ // If we are removing the frame base reg completely, remove the debug
+ // info as well.
+ // TODO: Encode this properly as a stackified value.
+ if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg)
+ MFI.clearFrameBaseVreg();
+ } else if (shouldRematerialize(*DefI, AA, TII)) {
Insert =
- rematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
+ rematerializeCheapDef(Reg, Use, *DefI, MBB, Insert->getIterator(),
LIS, MFI, MRI, TII, TRI);
- } else if (CanMove &&
- oneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
- Insert = moveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
+ } else if (CanMove && oneUseDominatesOtherUses(Reg, Use, MBB, MRI, MDT,
+ LIS, MFI)) {
+ Insert = moveAndTeeForMultiUse(Reg, Use, DefI, MBB, Insert, LIS, MFI,
MRI, TII);
} else {
// We failed to stackify the operand. If the problem was ordering
@@ -875,6 +921,25 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ // Stackifying a multivalue def may unlock in-place stackification of
+ // subsequent defs. TODO: Handle the case where the consecutive uses are
+ // not all in the same instruction.
+ auto *SubsequentDef = Insert->defs().begin();
+ auto *SubsequentUse = &Use;
+ while (SubsequentDef != Insert->defs().end() &&
+ SubsequentUse != Use.getParent()->uses().end()) {
+ if (!SubsequentDef->isReg() || !SubsequentUse->isReg())
+ break;
+ unsigned DefReg = SubsequentDef->getReg();
+ unsigned UseReg = SubsequentUse->getReg();
+ // TODO: This single-use restriction could be relaxed by using tees
+ if (DefReg != UseReg || !MRI.hasOneUse(DefReg))
+ break;
+ MFI.stackifyVReg(MRI, DefReg);
+ ++SubsequentDef;
+ ++SubsequentUse;
+ }
+
// If the instruction we just stackified is an IMPLICIT_DEF, convert it
// to a constant 0 so that the def is explicit, and the push/pop
// correspondence is maintained.
@@ -912,18 +977,20 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
for (MachineInstr &MI : MBB) {
if (MI.isDebugInstr())
continue;
- for (MachineOperand &MO : reverse(MI.explicit_operands())) {
+ for (MachineOperand &MO : reverse(MI.explicit_uses())) {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
-
- if (MFI.isVRegStackified(Reg)) {
- if (MO.isDef())
- Stack.push_back(Reg);
- else
- assert(Stack.pop_back_val() == Reg &&
- "Register stack pop should be paired with a push");
- }
+ if (MFI.isVRegStackified(Reg))
+ assert(Stack.pop_back_val() == Reg &&
+ "Register stack pop should be paired with a push");
+ }
+ for (MachineOperand &MO : MI.defs()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (MFI.isVRegStackified(Reg))
+ Stack.push_back(MO.getReg());
}
}
// TODO: Generalize this code to support keeping values on the stack across
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 789a025794ea..130589c9df8c 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -88,16 +88,17 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
// If this is an address being added to a constant, fold the frame offset
// into the constant.
- if (MI.getOpcode() == WebAssembly::ADD_I32) {
+ if (MI.getOpcode() == WebAssemblyFrameLowering::getOpcAdd(MF)) {
MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum);
if (OtherMO.isReg()) {
Register OtherMOReg = OtherMO.getReg();
if (Register::isVirtualRegister(OtherMOReg)) {
MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg);
// TODO: For now we just opportunistically do this in the case where
- // the CONST_I32 happens to have exactly one def and one use. We
+ // the CONST_I32/64 happens to have exactly one def and one use. We
// should generalize this to optimize in more cases.
- if (Def && Def->getOpcode() == WebAssembly::CONST_I32 &&
+ if (Def && Def->getOpcode() ==
+ WebAssemblyFrameLowering::getOpcConst(MF) &&
MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) {
MachineOperand &ImmMO = Def->getOperand(1);
ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
@@ -109,20 +110,22 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
}
}
- // Otherwise create an i32.add SP, offset and make it the operand.
+ // Otherwise create an i32/64.add SP, offset and make it the operand.
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
unsigned FIRegOperand = FrameRegister;
if (FrameOffset) {
- // Create i32.add SP, offset and make it the operand.
+ // Create i32/64.add SP, offset and make it the operand.
const TargetRegisterClass *PtrRC =
MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
Register OffsetOp = MRI.createVirtualRegister(PtrRC);
- BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32),
+ BuildMI(MBB, *II, II->getDebugLoc(),
+ TII->get(WebAssemblyFrameLowering::getOpcConst(MF)),
OffsetOp)
.addImm(FrameOffset);
FIRegOperand = MRI.createVirtualRegister(PtrRC);
- BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32),
+ BuildMI(MBB, *II, II->getDebugLoc(),
+ TII->get(WebAssemblyFrameLowering::getOpcAdd(MF)),
FIRegOperand)
.addReg(FrameRegister)
.addReg(OffsetOp);
@@ -132,6 +135,10 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
Register
WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ // If the PReg has been replaced by a VReg, return that.
+ const auto &MFI = MF.getInfo<WebAssemblyFunctionInfo>();
+ if (MFI->isFrameBaseVirtual())
+ return MFI->getFrameBaseVreg();
static const unsigned Regs[2][2] = {
/* !isArch64Bit isArch64Bit */
/* !hasFP */ {WebAssembly::SP32, WebAssembly::SP64},
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index 5eafd6c54e78..9f5d6b2a9a47 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -67,7 +67,7 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
});
MachineRegisterInfo &MRI = MF.getRegInfo();
- const auto &TRI = *MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
+ auto &TRI = *MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
bool Changed = false;
assert(!mustPreserveAnalysisID(LiveIntervalsID) &&
@@ -88,8 +88,18 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E;) {
MachineOperand &MO = *I++;
if (!MO.isImplicit()) {
- if (VReg == WebAssembly::NoRegister)
+ if (VReg == WebAssembly::NoRegister) {
VReg = MRI.createVirtualRegister(RC);
+ if (PReg == TRI.getFrameRegister(MF)) {
+ auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
+ assert(!FI->isFrameBaseVirtual());
+ FI->setFrameBaseVreg(VReg);
+ LLVM_DEBUG({
+ dbgs() << "replacing preg " << PReg << " with " << VReg << " ("
+ << Register::virtReg2Index(VReg) << ")\n";
+ });
+ }
+ }
MO.setReg(VReg);
if (MO.getParent()->isDebugValue())
MO.setIsDebug();
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index c6cf7b6bc551..6456026f4ba7 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -82,6 +82,7 @@ enum RuntimeLibcallSignature {
func_iPTR_i32,
func_iPTR_i64,
func_iPTR_i64_i64,
+ func_iPTR_i64_i64_i32,
func_iPTR_i64_i64_i64_i64,
func_iPTR_i64_i64_i64_i64_i64_i64,
i32_func_i64_i64,
@@ -173,10 +174,13 @@ struct RuntimeLibcallSignatureTable {
Table[RTLIB::FMA_F128] = func_iPTR_i64_i64_i64_i64_i64_i64;
Table[RTLIB::POWI_F32] = f32_func_f32_i32;
Table[RTLIB::POWI_F64] = f64_func_f64_i32;
- Table[RTLIB::POWI_F128] = func_iPTR_i64_i64_i64_i64;
+ Table[RTLIB::POWI_F128] = func_iPTR_i64_i64_i32;
Table[RTLIB::SQRT_F32] = f32_func_f32;
Table[RTLIB::SQRT_F64] = f64_func_f64;
Table[RTLIB::SQRT_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::CBRT_F32] = f32_func_f32;
+ Table[RTLIB::CBRT_F64] = f64_func_f64;
+ Table[RTLIB::CBRT_F128] = func_iPTR_i64_i64;
Table[RTLIB::LOG_F32] = f32_func_f32;
Table[RTLIB::LOG_F64] = f64_func_f64;
Table[RTLIB::LOG_F128] = func_iPTR_i64_i64;
@@ -829,6 +833,12 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
break;
+ case func_iPTR_i64_i64_i32:
+ Params.push_back(PtrTy);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I32);
+ break;
case func_iPTR_i64_i64_i64_i64:
Params.push_back(PtrTy);
Params.push_back(wasm::ValType::I64);
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
index 890e4b8e4e2a..16e05150c64e 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
@@ -20,40 +20,40 @@ WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() = default; // anchor
SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline,
+ SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
- if (!DAG.getMachineFunction()
- .getSubtarget<WebAssemblySubtarget>()
- .hasBulkMemory())
+ auto &ST = DAG.getMachineFunction().getSubtarget<WebAssemblySubtarget>();
+ if (!ST.hasBulkMemory())
return SDValue();
SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32);
+ auto LenMVT = ST.hasAddr64() ? MVT::i64 : MVT::i32;
return DAG.getNode(WebAssemblyISD::MEMORY_COPY, DL, MVT::Other,
{Chain, MemIdx, MemIdx, Dst, Src,
- DAG.getZExtOrTrunc(Size, DL, MVT::i32)});
+ DAG.getZExtOrTrunc(Size, DL, LenMVT)});
}
SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2,
- SDValue Op3, unsigned Align, bool IsVolatile,
+ SDValue Op3, Align Alignment, bool IsVolatile,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
- return EmitTargetCodeForMemcpy(DAG, DL, Chain, Op1, Op2, Op3, Align,
- IsVolatile, false, DstPtrInfo,
- SrcPtrInfo);
+ return EmitTargetCodeForMemcpy(DAG, DL, Chain, Op1, Op2, Op3,
+ Alignment, IsVolatile, false,
+ DstPtrInfo, SrcPtrInfo);
}
SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Val,
- SDValue Size, unsigned Align, bool IsVolatile,
+ SDValue Size, Align Alignment, bool IsVolatile,
MachinePointerInfo DstPtrInfo) const {
- if (!DAG.getMachineFunction()
- .getSubtarget<WebAssemblySubtarget>()
- .hasBulkMemory())
+ auto &ST = DAG.getMachineFunction().getSubtarget<WebAssemblySubtarget>();
+ if (!ST.hasBulkMemory())
return SDValue();
SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32);
+ auto LenMVT = ST.hasAddr64() ? MVT::i64 : MVT::i32;
// Only low byte matters for val argument, so anyext the i8
return DAG.getNode(WebAssemblyISD::MEMORY_FILL, DL, MVT::Other, Chain, MemIdx,
Dst, DAG.getAnyExtOrTrunc(Val, DL, MVT::i32),
- DAG.getZExtOrTrunc(Size, DL, MVT::i32));
+ DAG.getZExtOrTrunc(Size, DL, LenMVT));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
index 0b90ece27dff..f4d2132fd3af 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
@@ -24,18 +24,19 @@ public:
~WebAssemblySelectionDAGInfo() override;
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1, SDValue Op2,
- SDValue Op3, unsigned Align, bool isVolatile,
+ SDValue Op3, Align Alignment, bool isVolatile,
bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
- SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl,
- SDValue Chain, SDValue Op1, SDValue Op2,
- SDValue Op3, unsigned Align, bool isVolatile,
- MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) const override;
+ SDValue
+ EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
+ SDValue Op1, SDValue Op2, SDValue Op3,
+ Align Alignment, bool isVolatile,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const override;
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
SDValue Chain, SDValue Op1, SDValue Op2,
- SDValue Op3, unsigned Align, bool IsVolatile,
+ SDValue Op3, Align Alignment, bool IsVolatile,
MachinePointerInfo DstPtrInfo) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index a249ccf17638..89ae45722e42 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -65,7 +65,7 @@ static void rewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
assert(MI.getDesc().OpInfo[OperandNo].OperandType ==
WebAssembly::OPERAND_P2ALIGN &&
"Load and store instructions should have a p2align operand");
- uint64_t P2Align = Log2_64((*MI.memoperands_begin())->getAlignment());
+ uint64_t P2Align = Log2((*MI.memoperands_begin())->getAlign());
// WebAssembly does not currently support supernatural alignment.
P2Align = std::min(P2Align,
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 196a74565285..cacf5ab078a0 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -25,13 +25,15 @@ using namespace llvm;
#include "WebAssemblyGenSubtargetInfo.inc"
WebAssemblySubtarget &
-WebAssemblySubtarget::initializeSubtargetDependencies(StringRef FS) {
+WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
// Determine default and user-specified characteristics
+ LLVM_DEBUG(llvm::dbgs() << "initializeSubtargetDependencies\n");
- if (CPUString.empty())
- CPUString = "generic";
+ if (CPU.empty())
+ CPU = "generic";
- ParseSubtargetFeatures(CPUString, FS);
+ ParseSubtargetFeatures(CPU, FS);
return *this;
}
@@ -39,10 +41,9 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
const std::string &CPU,
const std::string &FS,
const TargetMachine &TM)
- : WebAssemblyGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
- TargetTriple(TT), FrameLowering(),
- InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
- TLInfo(TM, *this) {}
+ : WebAssemblyGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+ FrameLowering(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ TSInfo(), TLInfo(TM, *this) {}
bool WebAssemblySubtarget::enableAtomicExpand() const {
// If atomics are disabled, atomic ops are lowered instead of expanded
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 8db2120f9834..8b95a3ddb837 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -47,9 +47,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
bool HasMultivalue = false;
bool HasMutableGlobals = false;
bool HasTailCall = false;
-
- /// String name of used CPU.
- std::string CPUString;
+ bool HasReferenceTypes = false;
/// What processor and OS we're targeting.
Triple TargetTriple;
@@ -59,9 +57,8 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
WebAssemblySelectionDAGInfo TSInfo;
WebAssemblyTargetLowering TLInfo;
- /// Initializes using CPUString and the passed in feature string so that we
- /// can use initializer lists for subtarget initialization.
- WebAssemblySubtarget &initializeSubtargetDependencies(StringRef FS);
+ WebAssemblySubtarget &initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS);
public:
/// This constructor initializes the data members to match that
@@ -104,6 +101,7 @@ public:
bool hasMultivalue() const { return HasMultivalue; }
bool hasMutableGlobals() const { return HasMutableGlobals; }
bool hasTailCall() const { return HasTailCall; }
+ bool hasReferenceTypes() const { return HasReferenceTypes; }
/// Parses features string setting specified subtarget options. Definition of
/// function is auto generated by tblgen.
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 4291b48c16be..7bf655c925a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -45,6 +45,16 @@ static cl::opt<bool> EnableEmSjLj(
cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
cl::init(false));
+// A command-line option to keep implicit locals
+// for the purpose of testing with lit/llc ONLY.
+// This produces output which is not valid WebAssembly, and is not supported
+// by assemblers/disassemblers and other MC based tools.
+static cl::opt<bool> WasmDisableExplicitLocals(
+ "wasm-disable-explicit-locals", cl::Hidden,
+ cl::desc("WebAssembly: output implicit locals in"
+ " instruction output for test purposes only."),
+ cl::init(false));
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() {
// Register the target.
RegisterTargetMachine<WebAssemblyTargetMachine> X(
@@ -75,8 +85,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() {
initializeWebAssemblyExplicitLocalsPass(PR);
initializeWebAssemblyLowerBrUnlessPass(PR);
initializeWebAssemblyRegNumberingPass(PR);
+ initializeWebAssemblyDebugFixupPass(PR);
initializeWebAssemblyPeepholePass(PR);
- initializeWebAssemblyCallIndirectFixupPass(PR);
}
//===----------------------------------------------------------------------===//
@@ -210,8 +220,8 @@ private:
FeatureBitset coalesceFeatures(const Module &M) {
FeatureBitset Features =
WasmTM
- ->getSubtargetImpl(WasmTM->getTargetCPU(),
- WasmTM->getTargetFeatureString())
+ ->getSubtargetImpl(std::string(WasmTM->getTargetCPU()),
+ std::string(WasmTM->getTargetFeatureString()))
->getFeatureBits();
for (auto &F : M)
Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits();
@@ -274,21 +284,22 @@ private:
void recordFeatures(Module &M, const FeatureBitset &Features, bool Stripped) {
for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
- std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str();
- if (KV.Value == WebAssembly::FeatureAtomics && Stripped) {
- // "atomics" is special: code compiled without atomics may have had its
- // atomics lowered to nonatomic operations. In that case, atomics is
- // disallowed to prevent unsafe linking with atomics-enabled objects.
- assert(!Features[WebAssembly::FeatureAtomics] ||
- !Features[WebAssembly::FeatureBulkMemory]);
- M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey,
- wasm::WASM_FEATURE_PREFIX_DISALLOWED);
- } else if (Features[KV.Value]) {
- // Otherwise features are marked Used or not mentioned
+ if (Features[KV.Value]) {
+ // Mark features as used
+ std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str();
M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey,
wasm::WASM_FEATURE_PREFIX_USED);
}
}
+ // Code compiled without atomics or bulk-memory may have had its atomics or
+ // thread-local data lowered to nonatomic operations or non-thread-local
+ // data. In that case, we mark the pseudo-feature "shared-mem" as disallowed
+ // to tell the linker that it would be unsafe to allow this code ot be used
+ // in a module with shared memory.
+ if (Stripped) {
+ M.addModuleFlag(Module::ModFlagBehavior::Error, "wasm-feature-shared-mem",
+ wasm::WASM_FEATURE_PREFIX_DISALLOWED);
+ }
}
};
char CoalesceFeaturesAndStripAtomics::ID = 0;
@@ -395,6 +406,10 @@ bool WebAssemblyPassConfig::addInstSelector() {
// it's inconvenient to collect. Collect it now, and update the immediate
// operands.
addPass(createWebAssemblySetP2AlignOperands());
+
+ // Eliminate range checks and add default targets to br_table instructions.
+ addPass(createWebAssemblyFixBrTableDefaults());
+
return false;
}
@@ -423,11 +438,6 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
void WebAssemblyPassConfig::addPreEmitPass() {
TargetPassConfig::addPreEmitPass();
- // Rewrite pseudo call_indirect instructions as real instructions.
- // This needs to run before register stackification, because we change the
- // order of the arguments.
- addPass(createWebAssemblyCallIndirectFixup());
-
// Eliminate multiple-entry loops.
addPass(createWebAssemblyFixIrreducibleControlFlow());
@@ -472,7 +482,8 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyCFGStackify());
// Insert explicit local.get and local.set operators.
- addPass(createWebAssemblyExplicitLocals());
+ if (!WasmDisableExplicitLocals)
+ addPass(createWebAssemblyExplicitLocals());
// Lower br_unless into br_if.
addPass(createWebAssemblyLowerBrUnless());
@@ -483,6 +494,10 @@ void WebAssemblyPassConfig::addPreEmitPass() {
// Create a mapping from LLVM CodeGen virtual registers to wasm registers.
addPass(createWebAssemblyRegNumbering());
+
+ // Fix debug_values whose defs have been stackified.
+ if (!WasmDisableExplicitLocals)
+ addPass(createWebAssemblyDebugFixup());
}
yaml::MachineFunctionInfo *
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
index 850e6b9a9e9e..dd5b39773313 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
@@ -47,7 +47,7 @@ public:
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
- bool usesPhysRegsForPEI() const override { return false; }
+ bool usesPhysRegsForValues() const override { return false; }
yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override;
yaml::MachineFunctionInfo *
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index ac8ad927d334..28703a2787e0 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -44,13 +44,14 @@ unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const {
}
unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
- Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
if (auto *VTy = dyn_cast<VectorType>(Ty)) {
switch (Opcode) {
@@ -62,10 +63,11 @@ unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
// approxmation.
if (Opd2Info != TTI::OK_UniformValue &&
Opd2Info != TTI::OK_UniformConstantValue)
- Cost = VTy->getNumElements() *
- (TargetTransformInfo::TCC_Basic +
- getArithmeticInstrCost(Opcode, VTy->getElementType()) +
- TargetTransformInfo::TCC_Basic);
+ Cost =
+ cast<FixedVectorType>(VTy)->getNumElements() *
+ (TargetTransformInfo::TCC_Basic +
+ getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
+ TargetTransformInfo::TCC_Basic);
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 2731dda10bec..79588a9f5669 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -57,6 +57,7 @@ public:
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index a237da8154ab..bc2bb4fd6935 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -49,7 +49,7 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) {
if (!MI.isCall())
return false;
- const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode()));
+ const MachineOperand &MO = getCalleeOp(MI);
assert(MO.isGlobal() || MO.isSymbol());
if (MO.isSymbol()) {
@@ -79,3 +79,20 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) {
// original LLVm IR? (Even when the callee may throw)
return true;
}
+
+const MachineOperand &WebAssembly::getCalleeOp(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::CALL:
+ case WebAssembly::CALL_S:
+ case WebAssembly::RET_CALL:
+ case WebAssembly::RET_CALL_S:
+ return MI.getOperand(MI.getNumExplicitDefs());
+ case WebAssembly::CALL_INDIRECT:
+ case WebAssembly::CALL_INDIRECT_S:
+ case WebAssembly::RET_CALL_INDIRECT:
+ case WebAssembly::RET_CALL_INDIRECT_S:
+ return MI.getOperand(MI.getNumOperands() - 1);
+ default:
+ llvm_unreachable("Not a call instruction");
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
index 26cf84de89b9..4f0ed43a2481 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
@@ -44,6 +44,10 @@ template <typename T> MachineBasicBlock *getBottom(const T *Unit) {
return Bottom;
}
+/// Returns the operand number of a callee, assuming the argument is a call
+/// instruction.
+const MachineOperand &getCalleeOp(const MachineInstr &MI);
+
} // end namespace WebAssembly
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm-project/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 701b347bcbd7..c9f7574b9a41 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -86,7 +86,6 @@ lifetime2.C # violates C++ DR1696
# WASI doesn't have stdjmp.h yet
pr56982.c
-simd-2.C
# WASI doesn't have pthread.h yet
thread_local3.C
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d37d812df485..a3014b2aba92 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -31,6 +31,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -39,6 +40,11 @@
using namespace llvm;
+static cl::opt<bool> LVIInlineAsmHardening(
+ "x86-experimental-lvi-inline-asm-hardening",
+ cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
+ " Injection (LVI). This feature is experimental."), cl::Hidden);
+
static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
@@ -74,7 +80,7 @@ class X86AsmParser : public MCTargetAsmParser {
enum VEXEncoding {
VEXEncoding_Default,
- VEXEncoding_VEX2,
+ VEXEncoding_VEX,
VEXEncoding_VEX3,
VEXEncoding_EVEX,
};
@@ -326,6 +332,7 @@ private:
IES_PLUS,
IES_MINUS,
IES_OFFSET,
+ IES_CAST,
IES_NOT,
IES_MULTIPLY,
IES_DIVIDE,
@@ -352,6 +359,7 @@ private:
bool MemExpr;
bool OffsetOperator;
SMLoc OffsetOperatorLoc;
+ StringRef CurType;
bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
if (Sym) {
@@ -379,6 +387,7 @@ private:
unsigned getScale() { return Scale; }
const MCExpr *getSym() { return Sym; }
StringRef getSymName() { return SymName; }
+ StringRef getType() { return CurType; }
int64_t getImm() { return Imm + IC.execute(); }
bool isValidEndState() {
return State == IES_RBRAC || State == IES_INTEGER;
@@ -611,9 +620,9 @@ private:
}
bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
const InlineAsmIdentifierInfo &IDInfo,
- bool ParsingInlineAsm, StringRef &ErrMsg) {
+ bool ParsingMSInlineAsm, StringRef &ErrMsg) {
// InlineAsm: Treat an enum value as an integer
- if (ParsingInlineAsm)
+ if (ParsingMSInlineAsm)
if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
// Treat a symbolic constant like an integer
@@ -624,6 +633,7 @@ private:
default:
State = IES_ERROR;
break;
+ case IES_CAST:
case IES_PLUS:
case IES_MINUS:
case IES_NOT:
@@ -634,7 +644,7 @@ private:
MemExpr = true;
State = IES_INTEGER;
IC.pushOperand(IC_IMM);
- if (ParsingInlineAsm)
+ if (ParsingMSInlineAsm)
Info = IDInfo;
break;
}
@@ -736,6 +746,7 @@ private:
IC.pushOperator(IC_PLUS);
break;
case IES_INIT:
+ case IES_CAST:
assert(!BracCount && "BracCount should be zero on parsing's start");
State = IES_LBRAC;
break;
@@ -808,6 +819,7 @@ private:
case IES_INTEGER:
case IES_OFFSET:
case IES_REGISTER:
+ case IES_RBRAC:
case IES_RPAREN:
State = IES_RPAREN;
IC.pushOperator(IC_RPAREN);
@@ -815,7 +827,7 @@ private:
}
}
bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
- const InlineAsmIdentifierInfo &IDInfo, bool ParsingInlineAsm,
+ const InlineAsmIdentifierInfo &IDInfo, bool ParsingMSInlineAsm,
StringRef &ErrMsg) {
PrevState = State;
switch (State) {
@@ -833,13 +845,26 @@ private:
// As we cannot yet resolve the actual value (offset), we retain
// the requested semantics by pushing a '0' to the operands stack
IC.pushOperand(IC_IMM);
- if (ParsingInlineAsm) {
+ if (ParsingMSInlineAsm) {
Info = IDInfo;
}
break;
}
return false;
}
+ void onCast(StringRef Type) {
+ PrevState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_LPAREN:
+ setType(Type);
+ State = IES_CAST;
+ break;
+ }
+ }
+ void setType(StringRef Type) { CurType = Type; }
};
bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
@@ -858,6 +883,11 @@ private:
return nullptr;
}
+ bool MatchRegisterByName(unsigned &RegNo, StringRef RegName, SMLoc StartLoc,
+ SMLoc EndLoc);
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
+ bool RestoreOnFailure);
+
std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
bool IsSIReg(unsigned Reg);
@@ -896,10 +926,10 @@ private:
bool ParseIntelMemoryOperandSize(unsigned &Size);
std::unique_ptr<X86Operand>
- CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
- unsigned IndexReg, unsigned Scale, SMLoc Start,
- SMLoc End, unsigned Size, StringRef Identifier,
- const InlineAsmIdentifierInfo &Info);
+ CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
+ unsigned IndexReg, unsigned Scale, SMLoc Start,
+ SMLoc End, unsigned Size, StringRef Identifier,
+ const InlineAsmIdentifierInfo &Info);
bool parseDirectiveEven(SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
@@ -927,9 +957,14 @@ private:
bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
bool processInstruction(MCInst &Inst, const OperandVector &Ops);
- /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
+ // Load Value Injection (LVI) Mitigations for machine code
+ void emitWarningForSpecialLVIInstruction(SMLoc Loc);
+ void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
+ void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
+
+ /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
/// instrumentation around Inst.
- void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
+ void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
@@ -1023,6 +1058,8 @@ public:
}
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
@@ -1129,36 +1166,21 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
return checkScale(Scale, ErrMsg);
}
-bool X86AsmParser::ParseRegister(unsigned &RegNo,
- SMLoc &StartLoc, SMLoc &EndLoc) {
- MCAsmParser &Parser = getParser();
- RegNo = 0;
- const AsmToken &PercentTok = Parser.getTok();
- StartLoc = PercentTok.getLoc();
-
+bool X86AsmParser::MatchRegisterByName(unsigned &RegNo, StringRef RegName,
+ SMLoc StartLoc, SMLoc EndLoc) {
// If we encounter a %, ignore it. This code handles registers with and
// without the prefix, unprefixed registers can occur in cfi directives.
- if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
- Parser.Lex(); // Eat percent token.
+ RegName.consume_front("%");
- const AsmToken &Tok = Parser.getTok();
- EndLoc = Tok.getEndLoc();
-
- if (Tok.isNot(AsmToken::Identifier)) {
- if (isParsingIntelSyntax()) return true;
- return Error(StartLoc, "invalid register name",
- SMRange(StartLoc, EndLoc));
- }
-
- RegNo = MatchRegisterName(Tok.getString());
+ RegNo = MatchRegisterName(RegName);
// If the match failed, try the register name as lowercase.
if (RegNo == 0)
- RegNo = MatchRegisterName(Tok.getString().lower());
+ RegNo = MatchRegisterName(RegName.lower());
// The "flags" and "mxcsr" registers cannot be referenced directly.
// Treat it as an identifier instead.
- if (isParsingInlineAsm() && isParsingIntelSyntax() &&
+ if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
(RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
RegNo = 0;
@@ -1172,27 +1194,137 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
X86II::isX86_64NonExtLowByteReg(RegNo) ||
X86II::isX86_64ExtendedReg(RegNo)) {
- StringRef RegName = Tok.getString();
- Parser.Lex(); // Eat register name.
return Error(StartLoc,
"register %" + RegName + " is only available in 64-bit mode",
SMRange(StartLoc, EndLoc));
}
}
+ // If this is "db[0-15]", match it as an alias
+ // for dr[0-15].
+ if (RegNo == 0 && RegName.startswith("db")) {
+ if (RegName.size() == 3) {
+ switch (RegName[2]) {
+ case '0':
+ RegNo = X86::DR0;
+ break;
+ case '1':
+ RegNo = X86::DR1;
+ break;
+ case '2':
+ RegNo = X86::DR2;
+ break;
+ case '3':
+ RegNo = X86::DR3;
+ break;
+ case '4':
+ RegNo = X86::DR4;
+ break;
+ case '5':
+ RegNo = X86::DR5;
+ break;
+ case '6':
+ RegNo = X86::DR6;
+ break;
+ case '7':
+ RegNo = X86::DR7;
+ break;
+ case '8':
+ RegNo = X86::DR8;
+ break;
+ case '9':
+ RegNo = X86::DR9;
+ break;
+ }
+ } else if (RegName.size() == 4 && RegName[2] == '1') {
+ switch (RegName[3]) {
+ case '0':
+ RegNo = X86::DR10;
+ break;
+ case '1':
+ RegNo = X86::DR11;
+ break;
+ case '2':
+ RegNo = X86::DR12;
+ break;
+ case '3':
+ RegNo = X86::DR13;
+ break;
+ case '4':
+ RegNo = X86::DR14;
+ break;
+ case '5':
+ RegNo = X86::DR15;
+ break;
+ }
+ }
+ }
+
+ if (RegNo == 0) {
+ if (isParsingIntelSyntax())
+ return true;
+ return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
+ }
+ return false;
+}
+
+bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc, bool RestoreOnFailure) {
+ MCAsmParser &Parser = getParser();
+ MCAsmLexer &Lexer = getLexer();
+ RegNo = 0;
+
+ SmallVector<AsmToken, 5> Tokens;
+ auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
+ if (RestoreOnFailure) {
+ while (!Tokens.empty()) {
+ Lexer.UnLex(Tokens.pop_back_val());
+ }
+ }
+ };
+
+ const AsmToken &PercentTok = Parser.getTok();
+ StartLoc = PercentTok.getLoc();
+
+ // If we encounter a %, ignore it. This code handles registers with and
+ // without the prefix, unprefixed registers can occur in cfi directives.
+ if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
+ Tokens.push_back(PercentTok);
+ Parser.Lex(); // Eat percent token.
+ }
+
+ const AsmToken &Tok = Parser.getTok();
+ EndLoc = Tok.getEndLoc();
+
+ if (Tok.isNot(AsmToken::Identifier)) {
+ OnFailure();
+ if (isParsingIntelSyntax()) return true;
+ return Error(StartLoc, "invalid register name",
+ SMRange(StartLoc, EndLoc));
+ }
+
+ if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
+ OnFailure();
+ return true;
+ }
+
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
if (RegNo == X86::ST0) {
+ Tokens.push_back(Tok);
Parser.Lex(); // Eat 'st'
// Check to see if we have '(4)' after %st.
- if (getLexer().isNot(AsmToken::LParen))
+ if (Lexer.isNot(AsmToken::LParen))
return false;
// Lex the paren.
- getParser().Lex();
+ Tokens.push_back(Parser.getTok());
+ Parser.Lex();
const AsmToken &IntTok = Parser.getTok();
- if (IntTok.isNot(AsmToken::Integer))
+ if (IntTok.isNot(AsmToken::Integer)) {
+ OnFailure();
return Error(IntTok.getLoc(), "expected stack index");
+ }
switch (IntTok.getIntVal()) {
case 0: RegNo = X86::ST0; break;
case 1: RegNo = X86::ST1; break;
@@ -1202,11 +1334,18 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
case 5: RegNo = X86::ST5; break;
case 6: RegNo = X86::ST6; break;
case 7: RegNo = X86::ST7; break;
- default: return Error(IntTok.getLoc(), "invalid stack index");
+ default:
+ OnFailure();
+ return Error(IntTok.getLoc(), "invalid stack index");
}
- if (getParser().Lex().isNot(AsmToken::RParen))
+ // Lex IntTok
+ Tokens.push_back(IntTok);
+ Parser.Lex();
+ if (Lexer.isNot(AsmToken::RParen)) {
+ OnFailure();
return Error(Parser.getTok().getLoc(), "expected ')'");
+ }
EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat ')'
@@ -1215,41 +1354,8 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
EndLoc = Parser.getTok().getEndLoc();
- // If this is "db[0-15]", match it as an alias
- // for dr[0-15].
- if (RegNo == 0 && Tok.getString().startswith("db")) {
- if (Tok.getString().size() == 3) {
- switch (Tok.getString()[2]) {
- case '0': RegNo = X86::DR0; break;
- case '1': RegNo = X86::DR1; break;
- case '2': RegNo = X86::DR2; break;
- case '3': RegNo = X86::DR3; break;
- case '4': RegNo = X86::DR4; break;
- case '5': RegNo = X86::DR5; break;
- case '6': RegNo = X86::DR6; break;
- case '7': RegNo = X86::DR7; break;
- case '8': RegNo = X86::DR8; break;
- case '9': RegNo = X86::DR9; break;
- }
- } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') {
- switch (Tok.getString()[3]) {
- case '0': RegNo = X86::DR10; break;
- case '1': RegNo = X86::DR11; break;
- case '2': RegNo = X86::DR12; break;
- case '3': RegNo = X86::DR13; break;
- case '4': RegNo = X86::DR14; break;
- case '5': RegNo = X86::DR15; break;
- }
- }
-
- if (RegNo != 0) {
- EndLoc = Parser.getTok().getEndLoc();
- Parser.Lex(); // Eat it.
- return false;
- }
- }
-
if (RegNo == 0) {
+ OnFailure();
if (isParsingIntelSyntax()) return true;
return Error(StartLoc, "invalid register name",
SMRange(StartLoc, EndLoc));
@@ -1259,6 +1365,25 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
return false;
}
+bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
+}
+
+OperandMatchResultTy X86AsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ bool Result =
+ ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
+ bool PendingErrors = getParser().hasPendingError();
+ getParser().clearPendingErrors();
+ if (PendingErrors)
+ return MatchOperand_ParseFail;
+ if (Result)
+ return MatchOperand_NoMatch;
+ return MatchOperand_Success;
+}
+
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
bool Parse32 = is32BitMode() || Code16GCC;
unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
@@ -1405,7 +1530,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
return ParseATTOperand();
}
-std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
+std::unique_ptr<X86Operand> X86AsmParser::CreateMemForMSInlineAsm(
unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
const InlineAsmIdentifierInfo &Info) {
@@ -1445,8 +1570,9 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
} else {
BaseReg = BaseReg ? BaseReg : 1;
return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
- IndexReg, Scale, Start, End, Size, Identifier,
- Decl, FrontendSize);
+ IndexReg, Scale, Start, End, Size,
+ /*DefaultBaseReg=*/X86::RIP, Identifier, Decl,
+ FrontendSize);
}
}
@@ -1483,7 +1609,7 @@ bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
return true;
StringRef ErrMsg;
ParseError =
- SM.onOffset(Val, OffsetLoc, ID, Info, isParsingInlineAsm(), ErrMsg);
+ SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
if (ParseError)
return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
} else {
@@ -1525,12 +1651,51 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
UpdateLocLex = false;
- // Register
+ // (MASM only) <TYPE> PTR operator
+ if (Parser.isParsingMasm()) {
+ const AsmToken &NextTok = getLexer().peekTok();
+ if (NextTok.is(AsmToken::Identifier) &&
+ NextTok.getIdentifier().equals_lower("ptr")) {
+ SM.onCast(Identifier);
+ // Eat type and PTR.
+ consumeToken();
+ End = consumeToken();
+ break;
+ }
+ }
+ // Register, or (MASM only) <register>.<field>
unsigned Reg;
- if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) {
- if (SM.onRegister(Reg, ErrMsg))
- return Error(Tok.getLoc(), ErrMsg);
- break;
+ if (Tok.is(AsmToken::Identifier)) {
+ if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
+ if (SM.onRegister(Reg, ErrMsg))
+ return Error(IdentLoc, ErrMsg);
+ break;
+ }
+ if (Parser.isParsingMasm()) {
+ const std::pair<StringRef, StringRef> IDField =
+ Tok.getString().split('.');
+ const StringRef ID = IDField.first, Field = IDField.second;
+ SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
+ if (!Field.empty() &&
+ !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
+ if (SM.onRegister(Reg, ErrMsg))
+ return Error(IdentLoc, ErrMsg);
+
+ StringRef Type;
+ unsigned Offset = 0;
+ SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
+ if (Parser.lookUpField(Field, Type, Offset))
+ return Error(FieldStartLoc, "unknown offset");
+ else if (SM.onPlus(ErrMsg))
+ return Error(getTok().getLoc(), ErrMsg);
+ else if (SM.onInteger(Offset, ErrMsg))
+ return Error(IdentLoc, ErrMsg);
+ SM.setType(Type);
+
+ End = consumeToken();
+ break;
+ }
+ }
}
// Operator synonymous ("not", "or" etc.)
bool ParseError = false;
@@ -1542,37 +1707,40 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
// Symbol reference, when parsing assembly content
InlineAsmIdentifierInfo Info;
const MCExpr *Val;
- if (!isParsingInlineAsm()) {
- if (getParser().parsePrimaryExpr(Val, End)) {
- return Error(Tok.getLoc(), "Unexpected identifier!");
- } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
- return Error(IdentLoc, ErrMsg);
- } else
+ if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
+ // MS Dot Operator expression
+ if (Identifier.count('.') &&
+ (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
+ if (ParseIntelDotOperator(SM, End))
+ return true;
break;
+ }
}
- // MS InlineAsm operators (TYPE/LENGTH/SIZE)
- if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
- if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
- if (SM.onInteger(Val, ErrMsg))
- return Error(IdentLoc, ErrMsg);
- } else
- return true;
- break;
- }
- // MS Dot Operator expression
- if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
- if (ParseIntelDotOperator(SM, End))
+ if (isParsingMSInlineAsm()) {
+ // MS InlineAsm operators (TYPE/LENGTH/SIZE)
+ if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
+ if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
+ if (SM.onInteger(Val, ErrMsg))
+ return Error(IdentLoc, ErrMsg);
+ } else
+ return true;
+ break;
+ }
+ // MS InlineAsm identifier
+ // Call parseIdentifier() to combine @ with the identifier behind it.
+ if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
+ return Error(IdentLoc, "expected identifier");
+ if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
return true;
+ else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
+ return Error(IdentLoc, ErrMsg);
break;
}
- // MS InlineAsm identifier
- // Call parseIdentifier() to combine @ with the identifier behind it.
- if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
- return Error(IdentLoc, "expected identifier");
- if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
- return true;
- else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
+ if (getParser().parsePrimaryExpr(Val, End)) {
+ return Error(Tok.getLoc(), "Unexpected identifier!");
+ } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
return Error(IdentLoc, ErrMsg);
+ }
break;
}
case AsmToken::Integer: {
@@ -1593,8 +1761,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return Error(Loc, "invalid reference to undefined symbol");
StringRef Identifier = Sym->getName();
InlineAsmIdentifierInfo Info;
- if (SM.onIdentifierExpr(Val, Identifier, Info,
- isParsingInlineAsm(), ErrMsg))
+ if (SM.onIdentifierExpr(Val, Identifier, Info, isParsingMSInlineAsm(),
+ ErrMsg))
return Error(Loc, ErrMsg);
End = consumeToken();
} else {
@@ -1688,7 +1856,7 @@ bool X86AsmParser::ParseIntelInlineAsmIdentifier(
const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
MCAsmParser &Parser = getParser();
- assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
+ assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
Val = nullptr;
StringRef LineBuf(Identifier.data());
@@ -1777,9 +1945,11 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
}
/// Parse the '.' operator.
-bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) {
+bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
+ SMLoc &End) {
const AsmToken &Tok = getTok();
- unsigned Offset;
+ StringRef Type;
+ unsigned Offset = 0;
// Drop the optional '.'.
StringRef DotDispStr = Tok.getString();
@@ -1791,10 +1961,15 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End)
APInt DotDisp;
DotDispStr.getAsInteger(10, DotDisp);
Offset = DotDisp.getZExtValue();
- } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
- std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
- if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
- Offset))
+ } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
+ Tok.is(AsmToken::Identifier)) {
+ const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
+ const StringRef Base = BaseMember.first, Member = BaseMember.second;
+ if (getParser().lookUpField(SM.getType(), DotDispStr, Type, Offset) &&
+ getParser().lookUpField(SM.getSymName(), DotDispStr, Type, Offset) &&
+ getParser().lookUpField(DotDispStr, Type, Offset) &&
+ (!SemaCallback ||
+ SemaCallback->LookupInlineAsmField(Base, Member, Offset)))
return Error(Tok.getLoc(), "Unable to lookup field reference!");
} else
return Error(Tok.getLoc(), "Unexpected token type!");
@@ -1805,6 +1980,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End)
while (Tok.getLoc().getPointer() < DotExprEndLoc)
Lex();
SM.addImm(Offset);
+ SM.setType(Type);
return false;
}
@@ -1816,7 +1992,7 @@ bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
// Eat offset, mark start of identifier.
SMLoc Start = Lex().getLoc();
ID = getTok().getString();
- if (!isParsingInlineAsm()) {
+ if (!isParsingMSInlineAsm()) {
if ((getTok().isNot(AsmToken::Identifier) &&
getTok().isNot(AsmToken::String)) ||
getParser().parsePrimaryExpr(Val, End))
@@ -1939,7 +2115,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
if (ParseIntelExpression(SM, End))
return nullptr;
- if (isParsingInlineAsm())
+ if (isParsingMSInlineAsm())
RewriteIntelExpression(SM, Start, Tok.getLoc());
int64_t Imm = SM.getImm();
@@ -1953,7 +2129,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
// RegNo != 0 specifies a valid segment register,
// and we are parsing a segment override
if (!SM.isMemExpr() && !RegNo) {
- if (isParsingInlineAsm() && SM.isOffsetOperator()) {
+ if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
const InlineAsmIdentifierInfo Info = SM.getIdentifierInfo();
if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
// Disp includes the address of a variable; make sure this is recorded
@@ -2005,10 +2181,18 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
ErrMsg))
return ErrorOperand(Start, ErrMsg);
- if (isParsingInlineAsm())
- return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg,
- Scale, Start, End, Size, SM.getSymName(),
- SM.getIdentifierInfo());
+ if (isParsingMSInlineAsm())
+ return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start,
+ End, Size, SM.getSymName(),
+ SM.getIdentifierInfo());
+
+ // When parsing x64 MS-style assembly, all memory operands default to
+ // RIP-relative when interpreted as non-absolute references.
+ if (Parser.isParsingMasm() && is64BitMode())
+ return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp, BaseReg,
+ IndexReg, Scale, Start, End, Size,
+ /*DefaultBaseReg=*/X86::RIP);
+
if (!(BaseReg || IndexReg || RegNo))
return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
@@ -2420,8 +2604,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return Error(Parser.getTok().getLoc(), "Expected '}'");
Parser.Lex(); // Eat curly.
- if (Prefix == "vex2")
- ForcedVEXEncoding = VEXEncoding_VEX2;
+ if (Prefix == "vex" || Prefix == "vex2")
+ ForcedVEXEncoding = VEXEncoding_VEX;
else if (Prefix == "vex3")
ForcedVEXEncoding = VEXEncoding_VEX3;
else if (Prefix == "evex")
@@ -2711,7 +2895,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// In MS inline asm curly braces mark the beginning/end of a block,
// therefore they should be interepreted as end of statement
CurlyAsEndOfStatement =
- isParsingIntelSyntax() && isParsingInlineAsm() &&
+ isParsingIntelSyntax() && isParsingMSInlineAsm() &&
(getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
return TokError("unexpected token in argument list");
@@ -3096,9 +3280,122 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
static const char *getSubtargetFeatureName(uint64_t Val);
-void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
+void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
+ Warning(Loc, "Instruction may be vulnerable to LVI and "
+ "requires manual mitigation");
+ Note(SMLoc(), "See https://software.intel.com/"
+ "security-software-guidance/insights/"
+ "deep-dive-load-value-injection#specialinstructions"
+ " for more information");
+}
+
+/// RET instructions and also instructions that indirect calls/jumps from memory
+/// combine a load and a branch within a single instruction. To mitigate these
+/// instructions against LVI, they must be decomposed into separate load and
+/// branch instructions, with an LFENCE in between. For more details, see:
+/// - X86LoadValueInjectionRetHardening.cpp
+/// - X86LoadValueInjectionIndirectThunks.cpp
+/// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
+///
+/// Returns `true` if a mitigation was applied or warning was emitted.
+void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
+ // Information on control-flow instructions that require manual mitigation can
+ // be found here:
+ // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
+ switch (Inst.getOpcode()) {
+ case X86::RETW:
+ case X86::RETL:
+ case X86::RETQ:
+ case X86::RETIL:
+ case X86::RETIQ:
+ case X86::RETIW: {
+ MCInst ShlInst, FenceInst;
+ bool Parse32 = is32BitMode() || Code16GCC;
+ unsigned Basereg =
+ is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
+ const MCExpr *Disp = MCConstantExpr::create(0, getContext());
+ auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
+ /*BaseReg=*/Basereg, /*IndexReg=*/0,
+ /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
+ ShlInst.setOpcode(X86::SHL64mi);
+ ShlMemOp->addMemOperands(ShlInst, 5);
+ ShlInst.addOperand(MCOperand::createImm(0));
+ FenceInst.setOpcode(X86::LFENCE);
+ Out.emitInstruction(ShlInst, getSTI());
+ Out.emitInstruction(FenceInst, getSTI());
+ return;
+ }
+ case X86::JMP16m:
+ case X86::JMP32m:
+ case X86::JMP64m:
+ case X86::CALL16m:
+ case X86::CALL32m:
+ case X86::CALL64m:
+ emitWarningForSpecialLVIInstruction(Inst.getLoc());
+ return;
+ }
+}
+
+/// To mitigate LVI, every instruction that performs a load can be followed by
+/// an LFENCE instruction to squash any potential mis-speculation. There are
+/// some instructions that require additional considerations, and may requre
+/// manual mitigation. For more details, see:
+/// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
+///
+/// Returns `true` if a mitigation was applied or warning was emitted.
+void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
+ MCStreamer &Out) {
+ auto Opcode = Inst.getOpcode();
+ auto Flags = Inst.getFlags();
+ if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
+ // Information on REP string instructions that require manual mitigation can
+ // be found here:
+ // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
+ switch (Opcode) {
+ case X86::CMPSB:
+ case X86::CMPSW:
+ case X86::CMPSL:
+ case X86::CMPSQ:
+ case X86::SCASB:
+ case X86::SCASW:
+ case X86::SCASL:
+ case X86::SCASQ:
+ emitWarningForSpecialLVIInstruction(Inst.getLoc());
+ return;
+ }
+ } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
+ // If a REP instruction is found on its own line, it may or may not be
+ // followed by a vulnerable instruction. Emit a warning just in case.
+ emitWarningForSpecialLVIInstruction(Inst.getLoc());
+ return;
+ }
+
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+
+ // Can't mitigate after terminators or calls. A control flow change may have
+ // already occurred.
+ if (MCID.isTerminator() || MCID.isCall())
+ return;
+
+ // LFENCE has the mayLoad property, don't double fence.
+ if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
+ MCInst FenceInst;
+ FenceInst.setOpcode(X86::LFENCE);
+ Out.emitInstruction(FenceInst, getSTI());
+ }
+}
+
+void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
MCStreamer &Out) {
- Out.EmitInstruction(Inst, getSTI());
+ if (LVIInlineAsmHardening &&
+ getSTI().getFeatureBits()[X86::FeatureLVIControlFlowIntegrity])
+ applyLVICFIMitigation(Inst, Out);
+
+ Out.emitInstruction(Inst, getSTI());
+
+ if (LVIInlineAsmHardening &&
+ getSTI().getFeatureBits()[X86::FeatureLVILoadHardening])
+ applyLVILoadHardeningMitigation(Inst, Out);
}
bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -3133,7 +3430,7 @@ void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
- EmitInstruction(Inst, Operands, Out);
+ emitInstruction(Inst, Operands, Out);
Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
}
}
@@ -3170,7 +3467,7 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
(MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX)
return Match_Unsupported;
- if ((ForcedVEXEncoding == VEXEncoding_VEX2 ||
+ if ((ForcedVEXEncoding == VEXEncoding_VEX ||
ForcedVEXEncoding == VEXEncoding_VEX3) &&
(MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
return Match_Unsupported;
@@ -3240,7 +3537,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
- EmitInstruction(Inst, Operands, Out);
+ emitInstruction(Inst, Operands, Out);
Opcode = Inst.getOpcode();
return false;
case Match_InvalidImmUnsignedi4: {
@@ -3282,20 +3579,47 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
// Otherwise, we assume that this may be an integer instruction, which comes
// in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
+ // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
+ const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
// Check for the various suffix matches.
uint64_t ErrorInfoIgnore;
FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
unsigned Match[4];
+ // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
+ // So we should make sure the suffix matcher only works for memory variant
+ // that has the same size with the suffix.
+ // FIXME: This flag is a workaround for legacy instructions that didn't
+ // declare non suffix variant assembly.
+ bool HasVectorReg = false;
+ X86Operand *MemOp = nullptr;
+ for (const auto &Op : Operands) {
+ X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
+ if (X86Op->isVectorReg())
+ HasVectorReg = true;
+ else if (X86Op->isMem()) {
+ MemOp = X86Op;
+ assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
+ // Have we found an unqualified memory operand,
+ // break. IA allows only one memory operand.
+ break;
+ }
+ }
+
for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
Tmp.back() = Suffixes[I];
- Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
- MissingFeatures, MatchingInlineAsm,
- isParsingIntelSyntax());
- // If this returned as a missing feature failure, remember that.
- if (Match[I] == Match_MissingFeature)
- ErrorInfoMissingFeatures = MissingFeatures;
+ if (MemOp && HasVectorReg)
+ MemOp->Mem.Size = MemSize[I];
+ Match[I] = Match_MnemonicFail;
+ if (MemOp || !HasVectorReg) {
+ Match[I] =
+ MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
+ MatchingInlineAsm, isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match[I] == Match_MissingFeature)
+ ErrorInfoMissingFeatures = MissingFeatures;
+ }
}
// Restore the old token.
@@ -3309,7 +3633,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
if (NumSuccessfulMatches == 1) {
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
- EmitInstruction(Inst, Operands, Out);
+ emitInstruction(Inst, Operands, Out);
Opcode = Inst.getOpcode();
return false;
}
@@ -3562,7 +3886,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
;
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
- EmitInstruction(Inst, Operands, Out);
+ emitInstruction(Inst, Operands, Out);
Opcode = Inst.getOpcode();
return false;
} else if (NumSuccessfulMatches > 1) {
@@ -3684,9 +4008,9 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) {
Section = getStreamer().getCurrentSectionOnly();
}
if (Section->UseCodeAlign())
- getStreamer().EmitCodeAlignment(2, 0);
+ getStreamer().emitCodeAlignment(2, 0);
else
- getStreamer().EmitValueToAlignment(2, 0, 1, 0);
+ getStreamer().emitValueToAlignment(2, 0, 1, 0);
return false;
}
@@ -3699,7 +4023,7 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
Parser.Lex();
if (!is16BitMode()) {
SwitchMode(X86::Mode16Bit);
- getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
}
} else if (IDVal == ".code16gcc") {
// .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
@@ -3707,19 +4031,19 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
Code16GCC = true;
if (!is16BitMode()) {
SwitchMode(X86::Mode16Bit);
- getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
}
} else if (IDVal == ".code32") {
Parser.Lex();
if (!is32BitMode()) {
SwitchMode(X86::Mode32Bit);
- getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
}
} else if (IDVal == ".code64") {
Parser.Lex();
if (!is64BitMode()) {
SwitchMode(X86::Mode64Bit);
- getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
+ getParser().getStreamer().emitAssemblerFlag(MCAF_Code64);
}
} else {
Error(L, "unknown directive " + IDVal);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h
index d831a63b04ee..5cf4516ede97 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -17,9 +17,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SMLoc.h"
#include <cassert>
#include <memory>
@@ -60,6 +58,7 @@ struct X86Operand final : public MCParsedAsmOperand {
unsigned SegReg;
const MCExpr *Disp;
unsigned BaseReg;
+ unsigned DefaultBaseReg;
unsigned IndexReg;
unsigned Scale;
unsigned Size;
@@ -184,6 +183,10 @@ struct X86Operand final : public MCParsedAsmOperand {
assert(Kind == Memory && "Invalid access!");
return Mem.BaseReg;
}
+ unsigned getMemDefaultBaseReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.DefaultBaseReg;
+ }
unsigned getMemIndexReg() const {
assert(Kind == Memory && "Invalid access!");
return Mem.IndexReg;
@@ -312,6 +315,11 @@ struct X86Operand final : public MCParsedAsmOperand {
bool isMem512() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 512);
}
+
+ bool isSibMem() const {
+ return isMem() && Mem.BaseReg != X86::RIP && Mem.BaseReg != X86::EIP;
+ }
+
bool isMemIndexReg(unsigned LowR, unsigned HighR) const {
assert(Kind == Memory && "Invalid access!");
return Mem.IndexReg >= LowR && Mem.IndexReg <= HighR;
@@ -458,6 +466,14 @@ struct X86Operand final : public MCParsedAsmOperand {
X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
}
+ bool isVectorReg() const {
+ return Kind == Register &&
+ (X86MCRegisterClasses[X86::VR64RegClassID].contains(getReg()) ||
+ X86MCRegisterClasses[X86::VR128XRegClassID].contains(getReg()) ||
+ X86MCRegisterClasses[X86::VR256XRegClassID].contains(getReg()) ||
+ X86MCRegisterClasses[X86::VR512RegClassID].contains(getReg()));
+ }
+
bool isVK1Pair() const {
return Kind == Register &&
X86MCRegisterClasses[X86::VK1RegClassID].contains(getReg());
@@ -540,7 +556,10 @@ struct X86Operand final : public MCParsedAsmOperand {
void addMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 5) && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
+ if (getMemBaseReg())
+ Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
+ else
+ Inst.addOperand(MCOperand::createReg(getMemDefaultBaseReg()));
Inst.addOperand(MCOperand::createImm(getMemScale()));
Inst.addOperand(MCOperand::createReg(getMemIndexReg()));
addExpr(Inst, getMemDisp());
@@ -633,6 +652,7 @@ struct X86Operand final : public MCParsedAsmOperand {
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
Res->Mem.BaseReg = 0;
+ Res->Mem.DefaultBaseReg = 0;
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
@@ -648,11 +668,14 @@ struct X86Operand final : public MCParsedAsmOperand {
static std::unique_ptr<X86Operand>
CreateMem(unsigned ModeSize, unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc,
- SMLoc EndLoc, unsigned Size = 0, StringRef SymName = StringRef(),
- void *OpDecl = nullptr, unsigned FrontendSize = 0) {
+ SMLoc EndLoc, unsigned Size = 0,
+ unsigned DefaultBaseReg = X86::NoRegister,
+ StringRef SymName = StringRef(), void *OpDecl = nullptr,
+ unsigned FrontendSize = 0) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
- assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
+ assert((SegReg || BaseReg || IndexReg || DefaultBaseReg) &&
+ "Invalid memory operand!");
// The scale should always be one of {1,2,4,8}.
assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
@@ -661,6 +684,7 @@ struct X86Operand final : public MCParsedAsmOperand {
Res->Mem.SegReg = SegReg;
Res->Mem.Disp = Disp;
Res->Mem.BaseReg = BaseReg;
+ Res->Mem.DefaultBaseReg = DefaultBaseReg;
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index ea8c606d1564..a7fa1eb9a5ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -776,6 +776,10 @@ static int readModRM(struct InternalInstruction *insn) {
return prefix##_YMM0 + index; \
case TYPE_XMM: \
return prefix##_XMM0 + index; \
+ case TYPE_TMM: \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_TMM0 + index; \
case TYPE_VK: \
index &= 0xf; \
if (index > 7) \
@@ -849,6 +853,7 @@ static int fixupReg(struct InternalInstruction *insn,
if (!valid)
return -1;
break;
+ case ENCODING_SIB:
CASE_ENCODING_RM:
if (insn->eaBase >= insn->eaRegBase) {
insn->eaBase = (EABase)fixupRMValue(
@@ -1533,6 +1538,15 @@ static int readOperands(struct InternalInstruction *insn) {
if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
break;
+ case ENCODING_SIB:
+ // Reject if SIB wasn't used.
+ if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
+ return -1;
+ if (readModRM(insn))
+ return -1;
+ if (fixupReg(insn, &Op))
+ return -1;
+ break;
case ENCODING_REG:
CASE_ENCODING_RM:
if (readModRM(insn))
@@ -2006,9 +2020,11 @@ static bool translateRMRegister(MCInst &mcInst,
/// @param mcInst - The MCInst to append to.
/// @param insn - The instruction to extract Mod, R/M, and SIB fields
/// from.
+/// @param ForceSIB - The instruction must use SIB.
/// @return - 0 on success; nonzero otherwise
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
- const MCDisassembler *Dis) {
+ const MCDisassembler *Dis,
+ bool ForceSIB = false) {
// Addresses in an MCInst are represented as five operands:
// 1. basereg (register) The R/M base, or (if there is a SIB) the
// SIB base
@@ -2067,11 +2083,12 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
// -Any base register used other than ESP/RSP/R12D/R12. Using these as a
// base always requires a SIB byte.
// -A scale other than 1 is used.
- if (insn.sibScale != 1 ||
- (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
- (insn.sibBase != SIB_BASE_NONE &&
- insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
- insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12)) {
+ if (!ForceSIB &&
+ (insn.sibScale != 1 ||
+ (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
+ (insn.sibBase != SIB_BASE_NONE &&
+ insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
+ insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
X86::RIZ);
} else
@@ -2182,6 +2199,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_XMM:
case TYPE_YMM:
case TYPE_ZMM:
+ case TYPE_TMM:
case TYPE_VK_PAIR:
case TYPE_VK:
case TYPE_DEBUGREG:
@@ -2193,6 +2211,8 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_MVSIBY:
case TYPE_MVSIBZ:
return translateRMMemory(mcInst, insn, Dis);
+ case TYPE_MSIB:
+ return translateRMMemory(mcInst, insn, Dis, true);
}
}
@@ -2242,6 +2262,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
return false;
case ENCODING_WRITEMASK:
return translateMaskRegister(mcInst, insn.writemask);
+ case ENCODING_SIB:
CASE_ENCODING_RM:
CASE_ENCODING_VSIB:
return translateRM(mcInst, operand, insn, Dis);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 147fe46d81b9..4318c17f03a0 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -19,9 +19,6 @@
#include "llvm/Support/X86DisassemblerDecoderCommon.h"
namespace llvm {
-
-class MCInstrInfo;
-
namespace X86Disassembler {
// Accessor functions for various fields of an Intel instruction
@@ -383,6 +380,17 @@ namespace X86Disassembler {
ENTRY(BND2) \
ENTRY(BND3)
+#undef REGS_TMM
+#define REGS_TMM \
+ ENTRY(TMM0) \
+ ENTRY(TMM1) \
+ ENTRY(TMM2) \
+ ENTRY(TMM3) \
+ ENTRY(TMM4) \
+ ENTRY(TMM5) \
+ ENTRY(TMM6) \
+ ENTRY(TMM7)
+
#define ALL_EA_BASES \
EA_BASES_16BIT \
EA_BASES_32BIT \
@@ -407,6 +415,7 @@ namespace X86Disassembler {
REGS_DEBUG \
REGS_CONTROL \
REGS_BOUND \
+ REGS_TMM \
ENTRY(RIP)
/// All possible values of the base field for effective-address
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h b/contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h
index 5833017037a5..56738e9cfa73 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h
@@ -28,7 +28,6 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <iterator>
#include <utility>
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index 675a9c377b12..0134b4efce72 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -56,7 +56,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (MI->getOpcode() == X86::CALLpcrel32 &&
(STI.getFeatureBits()[X86::Mode64Bit])) {
OS << "\tcallq\t";
- printPCRelImm(MI, 0, OS);
+ printPCRelImm(MI, Address, 0, OS);
}
// data16 and data32 both have the same encoding of 0x66. While data32 is
// valid only in 16 bit systems, data16 is valid in the rest.
@@ -68,8 +68,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address,
OS << "\tdata32";
}
// Try to print any aliases first.
- else if (!printAliasInstr(MI, OS) &&
- !printVecCompareInstr(MI, OS))
+ else if (!printAliasInstr(MI, Address, OS) && !printVecCompareInstr(MI, OS))
printInstruction(MI, Address, OS);
// Next always print the annotation.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
index 3d5d384dc4a0..51ddae61d251 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
@@ -30,9 +30,10 @@ public:
// Autogenerated by tblgen, returns true if we successfully printed an
// alias.
- bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
+ raw_ostream &O);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &OS);
@@ -46,13 +47,6 @@ public:
void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &OS);
void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
- void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
- void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
-
void printbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 2284cd7a70b8..bf3b6bcb5463 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -12,7 +12,9 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -60,10 +62,9 @@ public:
else if (BranchType == "indirect")
addKind(X86::AlignBranchIndirect);
else {
- report_fatal_error(
- "'-x86-align-branch 'The branches's type is combination of jcc, "
- "fused, jmp, call, ret, indirect.(plus separated)",
- false);
+ errs() << "invalid argument " << BranchType.str()
+ << " to -x86-align-branch=; each element must be one of: fused, "
+ "jcc, jmp, call, ret, indirect.(plus separated)\n";
}
}
}
@@ -86,12 +87,13 @@ cl::opt<unsigned> X86AlignBranchBoundary(
cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
"x86-align-branch",
cl::desc(
- "Specify types of branches to align. The branches's types are "
- "combination of jcc, fused, jmp, call, ret, indirect. jcc indicates "
- "conditional jumps, fused indicates fused conditional jumps, jmp "
- "indicates unconditional jumps, call indicates direct and indirect "
- "calls, ret indicates rets, indirect indicates indirect jumps."),
- cl::value_desc("(plus separated list of types)"),
+ "Specify types of branches to align (plus separated list of types):"
+ "\njcc indicates conditional jumps"
+ "\nfused indicates fused conditional jumps"
+ "\njmp indicates direct unconditional jumps"
+ "\ncall indicates direct and indirect calls"
+ "\nret indicates rets"
+ "\nindirect indicates indirect unconditional jumps"),
cl::location(X86AlignBranchKindLoc));
cl::opt<bool> X86AlignBranchWithin32BBoundaries(
@@ -102,6 +104,18 @@ cl::opt<bool> X86AlignBranchWithin32BBoundaries(
"assumptions about labels corresponding to particular instructions, "
"and should be used with caution."));
+cl::opt<unsigned> X86PadMaxPrefixSize(
+ "x86-pad-max-prefix-size", cl::init(0),
+ cl::desc("Maximum number of prefixes to use for padding"));
+
+cl::opt<bool> X86PadForAlign(
+ "x86-pad-for-align", cl::init(true), cl::Hidden,
+ cl::desc("Pad previous instructions to implement align directives"));
+
+cl::opt<bool> X86PadForBranchAlign(
+ "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
+ cl::desc("Pad previous instructions to implement branch alignment"));
+
class X86ELFObjectWriter : public MCELFObjectTargetWriter {
public:
X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
@@ -114,14 +128,18 @@ class X86AsmBackend : public MCAsmBackend {
std::unique_ptr<const MCInstrInfo> MCII;
X86AlignBranchKind AlignBranchType;
Align AlignBoundary;
+ unsigned TargetPrefixMax = 0;
- bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
-
- bool needAlign(MCObjectStreamer &OS) const;
- bool needAlignInst(const MCInst &Inst) const;
- MCBoundaryAlignFragment *
- getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const;
MCInst PrevInst;
+ MCBoundaryAlignFragment *PendingBA = nullptr;
+ std::pair<MCFragment *, size_t> PrevInstPosition;
+ bool CanPadInst;
+
+ uint8_t determinePaddingPrefix(const MCInst &Inst) const;
+ bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
+ bool needAlign(const MCInst &Inst) const;
+ bool canPadBranches(MCObjectStreamer &OS) const;
+ bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
public:
X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
@@ -142,11 +160,14 @@ public:
AlignBoundary = assumeAligned(X86AlignBranchBoundary);
if (X86AlignBranch.getNumOccurrences())
AlignBranchType = X86AlignBranchKindLoc;
+ if (X86PadMaxPrefixSize.getNumOccurrences())
+ TargetPrefixMax = X86PadMaxPrefixSize;
}
bool allowAutoPadding() const override;
- void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
- void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
+ bool allowEnhancedRelaxation() const override;
+ void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
+ void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
unsigned getNumFixupKinds() const override {
return X86::NumTargetFixupKinds;
@@ -155,7 +176,7 @@ public:
Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
-
+
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override;
@@ -171,22 +192,34 @@ public:
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override;
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
+
+ bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const;
+
+ bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const;
+
+ bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const;
+
+ void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
};
} // end anonymous namespace
-static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool is16BitMode) {
+static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
unsigned Op = Inst.getOpcode();
switch (Op) {
default:
return Op;
case X86::JCC_1:
- return (is16BitMode) ? X86::JCC_2 : X86::JCC_4;
+ return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
case X86::JMP_1:
- return (is16BitMode) ? X86::JMP_2 : X86::JMP_4;
+ return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
}
}
@@ -275,11 +308,11 @@ static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
}
}
-static unsigned getRelaxedOpcode(const MCInst &Inst, bool is16BitMode) {
+static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
unsigned R = getRelaxedOpcodeArith(Inst);
if (R != Inst.getOpcode())
return R;
- return getRelaxedOpcodeBranch(Inst, is16BitMode);
+ return getRelaxedOpcodeBranch(Inst, Is16BitMode);
}
static X86::CondCode getCondFromBranch(const MCInst &MI,
@@ -316,6 +349,11 @@ static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
return (BaseReg == X86::RIP);
}
+/// Check if the instruction is a prefix.
+static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
+ return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
+}
+
/// Check if the instruction is valid as the first instruction in macro fusion.
static bool isFirstMacroFusibleInst(const MCInst &Inst,
const MCInstrInfo &MCII) {
@@ -327,6 +365,69 @@ static bool isFirstMacroFusibleInst(const MCInst &Inst,
return FIK != X86::FirstMacroFusionInstKind::Invalid;
}
+/// X86 can reduce the bytes of NOP by padding instructions with prefixes to
+/// get a better peformance in some cases. Here, we determine which prefix is
+/// the most suitable.
+///
+/// If the instruction has a segment override prefix, use the existing one.
+/// If the target is 64-bit, use the CS.
+/// If the target is 32-bit,
+/// - If the instruction has a ESP/EBP base register, use SS.
+/// - Otherwise use DS.
+uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
+ assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) &&
+ "Prefixes can be added only in 32-bit or 64-bit mode.");
+ const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ // Determine where the memory operand starts, if present.
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+ if (MemoryOperand != -1)
+ MemoryOperand += X86II::getOperandBias(Desc);
+
+ unsigned SegmentReg = 0;
+ if (MemoryOperand >= 0) {
+ // Check for explicit segment override on memory operand.
+ SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
+ }
+
+ switch (TSFlags & X86II::FormMask) {
+ default:
+ break;
+ case X86II::RawFrmDstSrc: {
+ // Check segment override opcode prefix as needed (not for %ds).
+ if (Inst.getOperand(2).getReg() != X86::DS)
+ SegmentReg = Inst.getOperand(2).getReg();
+ break;
+ }
+ case X86II::RawFrmSrc: {
+ // Check segment override opcode prefix as needed (not for %ds).
+ if (Inst.getOperand(1).getReg() != X86::DS)
+ SegmentReg = Inst.getOperand(1).getReg();
+ break;
+ }
+ case X86II::RawFrmMemOffs: {
+ // Check segment override opcode prefix as needed.
+ SegmentReg = Inst.getOperand(1).getReg();
+ break;
+ }
+ }
+
+ if (SegmentReg != 0)
+ return X86::getSegmentOverridePrefixForReg(SegmentReg);
+
+ if (STI.hasFeature(X86::Mode64Bit))
+ return X86::CS_Encoding;
+
+ if (MemoryOperand >= 0) {
+ unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
+ unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
+ if (BaseReg == X86::ESP || BaseReg == X86::EBP)
+ return X86::SS_Encoding;
+ }
+ return X86::DS_Encoding;
+}
+
/// Check if the two instructions will be macro-fused on the target cpu.
bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
@@ -355,19 +456,122 @@ static bool hasVariantSymbol(const MCInst &MI) {
}
bool X86AsmBackend::allowAutoPadding() const {
- return (AlignBoundary != Align::None() &&
- AlignBranchType != X86::AlignBranchNone);
+ return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
+}
+
+bool X86AsmBackend::allowEnhancedRelaxation() const {
+ return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
+}
+
+/// X86 has certain instructions which enable interrupts exactly one
+/// instruction *after* the instruction which stores to SS. Return true if the
+/// given instruction has such an interrupt delay slot.
+static bool hasInterruptDelaySlot(const MCInst &Inst) {
+ switch (Inst.getOpcode()) {
+ case X86::POPSS16:
+ case X86::POPSS32:
+ case X86::STI:
+ return true;
+
+ case X86::MOV16sr:
+ case X86::MOV32sr:
+ case X86::MOV64sr:
+ case X86::MOV16sm:
+ if (Inst.getOperand(0).getReg() == X86::SS)
+ return true;
+ break;
+ }
+ return false;
+}
+
+/// Check if the instruction to be emitted is right after any data.
+static bool
+isRightAfterData(MCFragment *CurrentFragment,
+ const std::pair<MCFragment *, size_t> &PrevInstPosition) {
+ MCFragment *F = CurrentFragment;
+ // Empty data fragments may be created to prevent further data being
+ // added into the previous fragment, we need to skip them since they
+ // have no contents.
+ for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
+ if (cast<MCDataFragment>(F)->getContents().size() != 0)
+ break;
+
+ // Since data is always emitted into a DataFragment, our check strategy is
+ // simple here.
+ // - If the fragment is a DataFragment
+ // - If it's not the fragment where the previous instruction is,
+ // returns true.
+ // - If it's the fragment holding the previous instruction but its
+ // size changed since the the previous instruction was emitted into
+ // it, returns true.
+ // - Otherwise returns false.
+ // - If the fragment is not a DataFragment, returns false.
+ if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
+ return DF != PrevInstPosition.first ||
+ DF->getContents().size() != PrevInstPosition.second;
+
+ return false;
+}
+
+/// \returns the fragment size if it has instructions, otherwise returns 0.
+static size_t getSizeForInstFragment(const MCFragment *F) {
+ if (!F || !F->hasInstructions())
+ return 0;
+ // MCEncodedFragmentWithContents being templated makes this tricky.
+ switch (F->getKind()) {
+ default:
+ llvm_unreachable("Unknown fragment with instructions!");
+ case MCFragment::FT_Data:
+ return cast<MCDataFragment>(*F).getContents().size();
+ case MCFragment::FT_Relaxable:
+ return cast<MCRelaxableFragment>(*F).getContents().size();
+ case MCFragment::FT_CompactEncodedInst:
+ return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
+ }
}
-bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
+/// Return true if we can insert NOP or prefixes automatically before the
+/// the instruction to be emitted.
+bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
+ if (hasVariantSymbol(Inst))
+ // Linker may rewrite the instruction with variant symbol operand(e.g.
+ // TLSCALL).
+ return false;
+
+ if (hasInterruptDelaySlot(PrevInst))
+ // If this instruction follows an interrupt enabling instruction with a one
+ // instruction delay, inserting a nop would change behavior.
+ return false;
+
+ if (isPrefix(PrevInst, *MCII))
+ // If this instruction follows a prefix, inserting a nop/prefix would change
+ // semantic.
+ return false;
+
+ if (isPrefix(Inst, *MCII))
+ // If this instruction is a prefix, inserting a prefix would change
+ // semantic.
+ return false;
+
+ if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
+ // If this instruction follows any data, there is no clear
+ // instruction boundary, inserting a nop/prefix would change semantic.
+ return false;
+
+ return true;
+}
+
+bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
if (!OS.getAllowAutoPadding())
return false;
assert(allowAutoPadding() && "incorrect initialization!");
- MCAssembler &Assembler = OS.getAssembler();
- MCSection *Sec = OS.getCurrentSectionOnly();
+ // We only pad in text section.
+ if (!OS.getCurrentSectionOnly()->getKind().isText())
+ return false;
+
// To be Done: Currently don't deal with Bundle cases.
- if (Assembler.isBundlingEnabled() && Sec->isBundleLocked())
+ if (OS.getAssembler().isBundlingEnabled())
return false;
// Branches only need to be aligned in 32-bit or 64-bit mode.
@@ -377,59 +581,42 @@ bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
return true;
}
-/// Check if the instruction operand needs to be aligned. Padding is disabled
-/// before intruction which may be rewritten by linker(e.g. TLSCALL).
-bool X86AsmBackend::needAlignInst(const MCInst &Inst) const {
- // Linker may rewrite the instruction with variant symbol operand.
- if (hasVariantSymbol(Inst))
- return false;
-
- const MCInstrDesc &InstDesc = MCII->get(Inst.getOpcode());
- return (InstDesc.isConditionalBranch() &&
+/// Check if the instruction operand needs to be aligned.
+bool X86AsmBackend::needAlign(const MCInst &Inst) const {
+ const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
+ return (Desc.isConditionalBranch() &&
(AlignBranchType & X86::AlignBranchJcc)) ||
- (InstDesc.isUnconditionalBranch() &&
+ (Desc.isUnconditionalBranch() &&
(AlignBranchType & X86::AlignBranchJmp)) ||
- (InstDesc.isCall() &&
- (AlignBranchType & X86::AlignBranchCall)) ||
- (InstDesc.isReturn() &&
- (AlignBranchType & X86::AlignBranchRet)) ||
- (InstDesc.isIndirectBranch() &&
+ (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
+ (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
+ (Desc.isIndirectBranch() &&
(AlignBranchType & X86::AlignBranchIndirect));
}
-static bool canReuseBoundaryAlignFragment(const MCBoundaryAlignFragment &F) {
- // If a MCBoundaryAlignFragment has not been used to emit NOP,we can reuse it.
- return !F.canEmitNops();
-}
+/// Insert BoundaryAlignFragment before instructions to align branches.
+void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
+ const MCInst &Inst) {
+ CanPadInst = canPadInst(Inst, OS);
-MCBoundaryAlignFragment *
-X86AsmBackend::getOrCreateBoundaryAlignFragment(MCObjectStreamer &OS) const {
- auto *F = dyn_cast_or_null<MCBoundaryAlignFragment>(OS.getCurrentFragment());
- if (!F || !canReuseBoundaryAlignFragment(*F)) {
- F = new MCBoundaryAlignFragment(AlignBoundary);
- OS.insert(F);
- }
- return F;
-}
+ if (!canPadBranches(OS))
+ return;
+
+ if (!isMacroFused(PrevInst, Inst))
+ // Macro fusion doesn't happen indeed, clear the pending.
+ PendingBA = nullptr;
-/// Insert MCBoundaryAlignFragment before instructions to align branches.
-void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS,
- const MCInst &Inst) {
- if (!needAlign(OS))
+ if (!CanPadInst)
return;
- MCFragment *CF = OS.getCurrentFragment();
- bool NeedAlignFused = AlignBranchType & X86::AlignBranchFused;
- if (NeedAlignFused && isMacroFused(PrevInst, Inst) && CF) {
+ if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
// Macro fusion actually happens and there is no other fragment inserted
- // after the previous instruction. NOP can be emitted in PF to align fused
- // jcc.
- if (auto *PF =
- dyn_cast_or_null<MCBoundaryAlignFragment>(CF->getPrevNode())) {
- const_cast<MCBoundaryAlignFragment *>(PF)->setEmitNops(true);
- const_cast<MCBoundaryAlignFragment *>(PF)->setFused(true);
- }
- } else if (needAlignInst(Inst)) {
+ // after the previous instruction.
+ //
+ // Do nothing here since we already inserted a BoudaryAlign fragment when
+ // we met the first instruction in the fused pair and we'll tie them
+ // together in emitInstructionEnd.
+ //
// Note: When there is at least one fragment, such as MCAlignFragment,
// inserted after the previous instruction, e.g.
//
@@ -441,34 +628,41 @@ void X86AsmBackend::alignBranchesBegin(MCObjectStreamer &OS,
//
// We will treat the JCC as a unfused branch although it may be fused
// with the CMP.
- auto *F = getOrCreateBoundaryAlignFragment(OS);
- F->setEmitNops(true);
- F->setFused(false);
- } else if (NeedAlignFused && isFirstMacroFusibleInst(Inst, *MCII)) {
- // We don't know if macro fusion happens until the reaching the next
- // instruction, so a place holder is put here if necessary.
- getOrCreateBoundaryAlignFragment(OS);
+ return;
}
- PrevInst = Inst;
+ if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
+ isFirstMacroFusibleInst(Inst, *MCII))) {
+ // If we meet a unfused branch or the first instuction in a fusiable pair,
+ // insert a BoundaryAlign fragment.
+ OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary));
+ }
}
-/// Insert a MCBoundaryAlignFragment to mark the end of the branch to be aligned
-/// if necessary.
-void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {
- if (!needAlign(OS))
+/// Set the last fragment to be aligned for the BoundaryAlignFragment.
+void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
+ PrevInst = Inst;
+ MCFragment *CF = OS.getCurrentFragment();
+ PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
+ if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
+ F->setAllowAutoPadding(CanPadInst);
+
+ if (!canPadBranches(OS))
+ return;
+
+ if (!needAlign(Inst) || !PendingBA)
return;
- // If the branch is emitted into a MCRelaxableFragment, we can determine the
- // size of the branch easily in MCAssembler::relaxBoundaryAlign. When the
- // branch is fused, the fused branch(macro fusion pair) must be emitted into
- // two fragments. Or when the branch is unfused, the branch must be emitted
- // into one fragment. The MCRelaxableFragment naturally marks the end of the
- // fused or unfused branch.
- // Otherwise, we need to insert a MCBoundaryAlignFragment to mark the end of
- // the branch. This MCBoundaryAlignFragment may be reused to emit NOP to align
- // other branch.
- if (needAlignInst(Inst) && !isa<MCRelaxableFragment>(OS.getCurrentFragment()))
- OS.insert(new MCBoundaryAlignFragment(AlignBoundary));
+
+ // Tie the aligned instructions into a a pending BoundaryAlign.
+ PendingBA->setLastFragment(CF);
+ PendingBA = nullptr;
+
+ // We need to ensure that further data isn't added to the current
+ // DataFragment, so that we can get the size of instructions later in
+ // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
+ // DataFragment.
+ if (isa_and_nonnull<MCDataFragment>(CF))
+ OS.insert(new MCDataFragment());
// Update the maximum alignment on the current section if necessary.
MCSection *Sec = OS.getCurrentSectionOnly();
@@ -478,13 +672,23 @@ void X86AsmBackend::alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) {
Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
if (STI.getTargetTriple().isOSBinFormatELF()) {
+ unsigned Type;
if (STI.getTargetTriple().getArch() == Triple::x86_64) {
- if (Name == "R_X86_64_NONE")
- return FK_NONE;
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
+#undef ELF_RELOC
+ .Default(-1u);
} else {
- if (Name == "R_386_NONE")
- return FK_NONE;
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/i386.def"
+#undef ELF_RELOC
+ .Default(-1u);
}
+ if (Type == -1u)
+ return None;
+ return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
}
return MCAsmBackend::getFixupKind(Name);
}
@@ -502,6 +706,11 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
};
+ // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
+ // do not require any extra processing.
+ if (Kind >= FirstLiteralRelocationKind)
+ return MCAsmBackend::getFixupKindInfo(FK_NONE);
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
@@ -514,7 +723,7 @@ const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
const MCFixup &Fixup,
const MCValue &) {
- return Fixup.getKind() == FK_NONE;
+ return Fixup.getKind() >= FirstLiteralRelocationKind;
}
static unsigned getFixupKindSize(unsigned Kind) {
@@ -556,7 +765,10 @@ void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
const MCSubtargetInfo *STI) const {
- unsigned Size = getFixupKindSize(Fixup.getKind());
+ unsigned Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return;
+ unsigned Size = getFixupKindSize(Kind);
assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
@@ -613,12 +825,11 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
// FIXME: Can tblgen help at all here to verify there aren't other instructions
// we can relax?
-void X86AsmBackend::relaxInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- MCInst &Res) const {
+void X86AsmBackend::relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
- bool is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
- unsigned RelaxedOp = getRelaxedOpcode(Inst, is16BitMode);
+ bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
+ unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
if (RelaxedOp == Inst.getOpcode()) {
SmallString<256> Tmp;
@@ -628,8 +839,232 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst,
report_fatal_error("unexpected instruction to relax: " + OS.str());
}
- Res = Inst;
- Res.setOpcode(RelaxedOp);
+ Inst.setOpcode(RelaxedOp);
+}
+
+/// Return true if this instruction has been fully relaxed into it's most
+/// general available form.
+static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
+ auto &Inst = RF.getInst();
+ auto &STI = *RF.getSubtargetInfo();
+ bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
+ return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
+}
+
+bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const {
+ if (!RF.getAllowAutoPadding())
+ return false;
+ // If the instruction isn't fully relaxed, shifting it around might require a
+ // larger value for one of the fixups then can be encoded. The outer loop
+ // will also catch this before moving to the next instruction, but we need to
+ // prevent padding this single instruction as well.
+ if (!isFullyRelaxed(RF))
+ return false;
+
+ const unsigned OldSize = RF.getContents().size();
+ if (OldSize == 15)
+ return false;
+
+ const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
+ const unsigned RemainingPrefixSize = [&]() -> unsigned {
+ SmallString<15> Code;
+ raw_svector_ostream VecOS(Code);
+ Emitter.emitPrefix(RF.getInst(), VecOS, STI);
+ assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
+
+ // TODO: It turns out we need a decent amount of plumbing for the target
+ // specific bits to determine number of prefixes its safe to add. Various
+ // targets (older chips mostly, but also Atom family) encounter decoder
+ // stalls with too many prefixes. For testing purposes, we set the value
+ // externally for the moment.
+ unsigned ExistingPrefixSize = Code.size();
+ if (TargetPrefixMax <= ExistingPrefixSize)
+ return 0;
+ return TargetPrefixMax - ExistingPrefixSize;
+ }();
+ const unsigned PrefixBytesToAdd =
+ std::min(MaxPossiblePad, RemainingPrefixSize);
+ if (PrefixBytesToAdd == 0)
+ return false;
+
+ const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
+
+ SmallString<256> Code;
+ Code.append(PrefixBytesToAdd, Prefix);
+ Code.append(RF.getContents().begin(), RF.getContents().end());
+ RF.getContents() = Code;
+
+ // Adjust the fixups for the change in offsets
+ for (auto &F : RF.getFixups()) {
+ F.setOffset(F.getOffset() + PrefixBytesToAdd);
+ }
+
+ RemainingSize -= PrefixBytesToAdd;
+ return true;
+}
+
+bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const {
+ if (isFullyRelaxed(RF))
+ // TODO: There are lots of other tricks we could apply for increasing
+ // encoding size without impacting performance.
+ return false;
+
+ MCInst Relaxed = RF.getInst();
+ relaxInstruction(Relaxed, *RF.getSubtargetInfo());
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<15> Code;
+ raw_svector_ostream VecOS(Code);
+ Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
+ const unsigned OldSize = RF.getContents().size();
+ const unsigned NewSize = Code.size();
+ assert(NewSize >= OldSize && "size decrease during relaxation?");
+ unsigned Delta = NewSize - OldSize;
+ if (Delta > RemainingSize)
+ return false;
+ RF.setInst(Relaxed);
+ RF.getContents() = Code;
+ RF.getFixups() = Fixups;
+ RemainingSize -= Delta;
+ return true;
+}
+
+bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
+ MCCodeEmitter &Emitter,
+ unsigned &RemainingSize) const {
+ bool Changed = false;
+ if (RemainingSize != 0)
+ Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
+ if (RemainingSize != 0)
+ Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
+ return Changed;
+}
+
+void X86AsmBackend::finishLayout(MCAssembler const &Asm,
+ MCAsmLayout &Layout) const {
+ // See if we can further relax some instructions to cut down on the number of
+ // nop bytes required for code alignment. The actual win is in reducing
+ // instruction count, not number of bytes. Modern X86-64 can easily end up
+ // decode limited. It is often better to reduce the number of instructions
+ // (i.e. eliminate nops) even at the cost of increasing the size and
+ // complexity of others.
+ if (!X86PadForAlign && !X86PadForBranchAlign)
+ return;
+
+ DenseSet<MCFragment *> LabeledFragments;
+ for (const MCSymbol &S : Asm.symbols())
+ LabeledFragments.insert(S.getFragment(false));
+
+ for (MCSection &Sec : Asm) {
+ if (!Sec.getKind().isText())
+ continue;
+
+ SmallVector<MCRelaxableFragment *, 4> Relaxable;
+ for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
+ MCFragment &F = *I;
+
+ if (LabeledFragments.count(&F))
+ Relaxable.clear();
+
+ if (F.getKind() == MCFragment::FT_Data ||
+ F.getKind() == MCFragment::FT_CompactEncodedInst)
+ // Skip and ignore
+ continue;
+
+ if (F.getKind() == MCFragment::FT_Relaxable) {
+ auto &RF = cast<MCRelaxableFragment>(*I);
+ Relaxable.push_back(&RF);
+ continue;
+ }
+
+ auto canHandle = [](MCFragment &F) -> bool {
+ switch (F.getKind()) {
+ default:
+ return false;
+ case MCFragment::FT_Align:
+ return X86PadForAlign;
+ case MCFragment::FT_BoundaryAlign:
+ return X86PadForBranchAlign;
+ }
+ };
+ // For any unhandled kind, assume we can't change layout.
+ if (!canHandle(F)) {
+ Relaxable.clear();
+ continue;
+ }
+
+#ifndef NDEBUG
+ const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
+#endif
+ const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
+
+ // To keep the effects local, prefer to relax instructions closest to
+ // the align directive. This is purely about human understandability
+ // of the resulting code. If we later find a reason to expand
+ // particular instructions over others, we can adjust.
+ MCFragment *FirstChangedFragment = nullptr;
+ unsigned RemainingSize = OrigSize;
+ while (!Relaxable.empty() && RemainingSize != 0) {
+ auto &RF = *Relaxable.pop_back_val();
+ // Give the backend a chance to play any tricks it wishes to increase
+ // the encoding size of the given instruction. Target independent code
+ // will try further relaxation, but target's may play further tricks.
+ if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
+ FirstChangedFragment = &RF;
+
+ // If we have an instruction which hasn't been fully relaxed, we can't
+ // skip past it and insert bytes before it. Changing its starting
+ // offset might require a larger negative offset than it can encode.
+ // We don't need to worry about larger positive offsets as none of the
+ // possible offsets between this and our align are visible, and the
+ // ones afterwards aren't changing.
+ if (!isFullyRelaxed(RF))
+ break;
+ }
+ Relaxable.clear();
+
+ if (FirstChangedFragment) {
+ // Make sure the offsets for any fragments in the effected range get
+ // updated. Note that this (conservatively) invalidates the offsets of
+ // those following, but this is not required.
+ Layout.invalidateFragmentsFrom(FirstChangedFragment);
+ }
+
+ // BoundaryAlign explicitly tracks it's size (unlike align)
+ if (F.getKind() == MCFragment::FT_BoundaryAlign)
+ cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
+
+#ifndef NDEBUG
+ const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
+ const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
+ assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
+ "can't move start of next fragment!");
+ assert(FinalSize == RemainingSize && "inconsistent size computation?");
+#endif
+
+ // If we're looking at a boundary align, make sure we don't try to pad
+ // its target instructions for some following directive. Doing so would
+ // break the alignment of the current boundary align.
+ if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
+ const MCFragment *LastFragment = BF->getLastFragment();
+ if (!LastFragment)
+ continue;
+ while (&*I != LastFragment)
+ ++I;
+ }
+ }
+ }
+
+ // The layout is done. Mark every fragment as valid.
+ for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
+ MCSection &Section = *Layout.getSectionOrder()[i];
+ Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
+ Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
+ }
}
/// Write a sequence of optimal nops to the output, covering \p Count
@@ -661,7 +1096,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
// This CPU doesn't support long nops. If needed add more.
// FIXME: We could generated something better than plain 0x90.
- if (!STI.getFeatureBits()[X86::FeatureNOPL]) {
+ if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) {
for (uint64_t i = 0; i < Count; ++i)
OS << '\x90';
return true;
@@ -670,7 +1105,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
// 15-bytes is the longest single NOP instruction, but 10-bytes is
// commonly the longest that can be efficiently decoded.
uint64_t MaxNopLength = 10;
- if (STI.getFeatureBits()[X86::ProcIntelSLM])
+ if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
MaxNopLength = 7;
else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
MaxNopLength = 15;
@@ -811,6 +1246,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
enum { CU_NUM_SAVED_REGS = 6 };
mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
+ Triple TT;
bool Is64Bit;
unsigned OffsetSize; ///< Offset of a "push" instruction.
@@ -838,10 +1274,140 @@ protected:
return 1;
}
+private:
+ /// Get the compact unwind number for a given register. The number
+ /// corresponds to the enum lists in compact_unwind_encoding.h.
+ int getCompactUnwindRegNum(unsigned Reg) const {
+ static const MCPhysReg CU32BitRegs[7] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const MCPhysReg CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
+ for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
+ if (*CURegs == Reg)
+ return Idx;
+
+ return -1;
+ }
+
+ /// Return the registers encoded for a compact encoding with a frame
+ /// pointer.
+ uint32_t encodeCompactUnwindRegistersWithFrame() const {
+ // Encode the registers in the order they were saved --- 3-bits per
+ // register. The list of saved registers is assumed to be in reverse
+ // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
+ uint32_t RegEnc = 0;
+ for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
+ unsigned Reg = SavedRegs[i];
+ if (Reg == 0) break;
+
+ int CURegNum = getCompactUnwindRegNum(Reg);
+ if (CURegNum == -1) return ~0U;
+
+ // Encode the 3-bit register number in order, skipping over 3-bits for
+ // each register.
+ RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
+ }
+
+ assert((RegEnc & 0x3FFFF) == RegEnc &&
+ "Invalid compact register encoding!");
+ return RegEnc;
+ }
+
+ /// Create the permutation encoding used with frameless stacks. It is
+ /// passed the number of registers to be saved and an array of the registers
+ /// saved.
+ uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
+ // The saved registers are numbered from 1 to 6. In order to encode the
+ // order in which they were saved, we re-number them according to their
+ // place in the register order. The re-numbering is relative to the last
+ // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
+ // that order:
+ //
+ // Orig Re-Num
+ // ---- ------
+ // 6 6
+ // 2 2
+ // 4 3
+ // 5 3
+ //
+ for (unsigned i = 0; i < RegCount; ++i) {
+ int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
+ if (CUReg == -1) return ~0U;
+ SavedRegs[i] = CUReg;
+ }
+
+ // Reverse the list.
+ std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
+
+ uint32_t RenumRegs[CU_NUM_SAVED_REGS];
+ for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
+ unsigned Countless = 0;
+ for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
+ if (SavedRegs[j] < SavedRegs[i])
+ ++Countless;
+
+ RenumRegs[i] = SavedRegs[i] - Countless - 1;
+ }
+
+ // Take the renumbered values and encode them into a 10-bit number.
+ uint32_t permutationEncoding = 0;
+ switch (RegCount) {
+ case 6:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 5:
+ permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
+ + 6 * RenumRegs[3] + 2 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 4:
+ permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
+ + 3 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 3:
+ permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 2:
+ permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 1:
+ permutationEncoding |= RenumRegs[5];
+ break;
+ }
+
+ assert((permutationEncoding & 0x3FF) == permutationEncoding &&
+ "Invalid compact register encoding!");
+ return permutationEncoding;
+ }
+
+public:
+ DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI)
+ : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
+ Is64Bit(TT.isArch64Bit()) {
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+ OffsetSize = Is64Bit ? 8 : 4;
+ MoveInstrSize = Is64Bit ? 3 : 2;
+ StackDivide = Is64Bit ? 8 : 4;
+ }
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ uint32_t CPUType = cantFail(MachO::getCPUType(TT));
+ uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
+ return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
+ }
+
/// Implementation of algorithm to generate the compact unwind encoding
/// for the CFI instructions.
uint32_t
- generateCompactUnwindEncodingImpl(ArrayRef<MCCFIInstruction> Instrs) const {
+ generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
if (Instrs.empty()) return 0;
// Reset the saved registers.
@@ -904,7 +1470,7 @@ protected:
// L0:
// .cfi_def_cfa_offset 80
//
- StackSize = std::abs(Inst.getOffset()) / StackDivide;
+ StackSize = Inst.getOffset() / StackDivide;
++NumDefCFAOffsets;
break;
}
@@ -991,168 +1557,6 @@ protected:
return CompactUnwindEncoding;
}
-
-private:
- /// Get the compact unwind number for a given register. The number
- /// corresponds to the enum lists in compact_unwind_encoding.h.
- int getCompactUnwindRegNum(unsigned Reg) const {
- static const MCPhysReg CU32BitRegs[7] = {
- X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
- };
- static const MCPhysReg CU64BitRegs[] = {
- X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
- };
- const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
- for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
- if (*CURegs == Reg)
- return Idx;
-
- return -1;
- }
-
- /// Return the registers encoded for a compact encoding with a frame
- /// pointer.
- uint32_t encodeCompactUnwindRegistersWithFrame() const {
- // Encode the registers in the order they were saved --- 3-bits per
- // register. The list of saved registers is assumed to be in reverse
- // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
- uint32_t RegEnc = 0;
- for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
- unsigned Reg = SavedRegs[i];
- if (Reg == 0) break;
-
- int CURegNum = getCompactUnwindRegNum(Reg);
- if (CURegNum == -1) return ~0U;
-
- // Encode the 3-bit register number in order, skipping over 3-bits for
- // each register.
- RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
- }
-
- assert((RegEnc & 0x3FFFF) == RegEnc &&
- "Invalid compact register encoding!");
- return RegEnc;
- }
-
- /// Create the permutation encoding used with frameless stacks. It is
- /// passed the number of registers to be saved and an array of the registers
- /// saved.
- uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
- // The saved registers are numbered from 1 to 6. In order to encode the
- // order in which they were saved, we re-number them according to their
- // place in the register order. The re-numbering is relative to the last
- // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
- // that order:
- //
- // Orig Re-Num
- // ---- ------
- // 6 6
- // 2 2
- // 4 3
- // 5 3
- //
- for (unsigned i = 0; i < RegCount; ++i) {
- int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
- if (CUReg == -1) return ~0U;
- SavedRegs[i] = CUReg;
- }
-
- // Reverse the list.
- std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
-
- uint32_t RenumRegs[CU_NUM_SAVED_REGS];
- for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
- unsigned Countless = 0;
- for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
- if (SavedRegs[j] < SavedRegs[i])
- ++Countless;
-
- RenumRegs[i] = SavedRegs[i] - Countless - 1;
- }
-
- // Take the renumbered values and encode them into a 10-bit number.
- uint32_t permutationEncoding = 0;
- switch (RegCount) {
- case 6:
- permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
- + 6 * RenumRegs[2] + 2 * RenumRegs[3]
- + RenumRegs[4];
- break;
- case 5:
- permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
- + 6 * RenumRegs[3] + 2 * RenumRegs[4]
- + RenumRegs[5];
- break;
- case 4:
- permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
- + 3 * RenumRegs[4] + RenumRegs[5];
- break;
- case 3:
- permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
- + RenumRegs[5];
- break;
- case 2:
- permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
- break;
- case 1:
- permutationEncoding |= RenumRegs[5];
- break;
- }
-
- assert((permutationEncoding & 0x3FF) == permutationEncoding &&
- "Invalid compact register encoding!");
- return permutationEncoding;
- }
-
-public:
- DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI, bool Is64Bit)
- : X86AsmBackend(T, STI), MRI(MRI), Is64Bit(Is64Bit) {
- memset(SavedRegs, 0, sizeof(SavedRegs));
- OffsetSize = Is64Bit ? 8 : 4;
- MoveInstrSize = Is64Bit ? 3 : 2;
- StackDivide = Is64Bit ? 8 : 4;
- }
-};
-
-class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
-public:
- DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI)
- : DarwinX86AsmBackend(T, MRI, STI, false) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- return createX86MachObjectWriter(/*Is64Bit=*/false,
- MachO::CPU_TYPE_I386,
- MachO::CPU_SUBTYPE_I386_ALL);
- }
-
- /// Generate the compact unwind encoding for the CFI instructions.
- uint32_t generateCompactUnwindEncoding(
- ArrayRef<MCCFIInstruction> Instrs) const override {
- return generateCompactUnwindEncodingImpl(Instrs);
- }
-};
-
-class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
- const MachO::CPUSubTypeX86 Subtype;
-public:
- DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI, MachO::CPUSubTypeX86 st)
- : DarwinX86AsmBackend(T, MRI, STI, true), Subtype(st) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- return createX86MachObjectWriter(/*Is64Bit=*/true, MachO::CPU_TYPE_X86_64,
- Subtype);
- }
-
- /// Generate the compact unwind encoding for the CFI instructions.
- uint32_t generateCompactUnwindEncoding(
- ArrayRef<MCCFIInstruction> Instrs) const override {
- return generateCompactUnwindEncodingImpl(Instrs);
- }
};
} // end anonymous namespace
@@ -1163,7 +1567,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
const MCTargetOptions &Options) {
const Triple &TheTriple = STI.getTargetTriple();
if (TheTriple.isOSBinFormatMachO())
- return new DarwinX86_32AsmBackend(T, MRI, STI);
+ return new DarwinX86AsmBackend(T, MRI, STI);
if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
return new WindowsX86AsmBackend(T, false, STI);
@@ -1181,13 +1585,8 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
const Triple &TheTriple = STI.getTargetTriple();
- if (TheTriple.isOSBinFormatMachO()) {
- MachO::CPUSubTypeX86 CS =
- StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
- .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H)
- .Default(MachO::CPU_SUBTYPE_X86_64_ALL);
- return new DarwinX86_64AsmBackend(T, MRI, STI, CS);
- }
+ if (TheTriple.isOSBinFormatMachO())
+ return new DarwinX86AsmBackend(T, MRI, STI);
if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
return new WindowsX86AsmBackend(T, true, STI);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index a4f8dd669e1e..79f07d3c7792 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -91,7 +91,7 @@ namespace X86 {
COND_G = 15,
LAST_VALID_COND = COND_G,
- // Artificial condition codes. These are used by AnalyzeBranch
+ // Artificial condition codes. These are used by analyzeBranch
// to indicate a block terminated with two conditional branches that together
// form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE,
// which can't be represented on x86 with a single condition. These
@@ -356,6 +356,39 @@ namespace X86 {
AlignBranchRet = 1U << 4,
AlignBranchIndirect = 1U << 5
};
+
+ /// Defines the encoding values for segment override prefix.
+ enum EncodingOfSegmentOverridePrefix : uint8_t {
+ CS_Encoding = 0x2E,
+ DS_Encoding = 0x3E,
+ ES_Encoding = 0x26,
+ FS_Encoding = 0x64,
+ GS_Encoding = 0x65,
+ SS_Encoding = 0x36
+ };
+
+ /// Given a segment register, return the encoding of the segment override
+ /// prefix for it.
+ inline EncodingOfSegmentOverridePrefix
+ getSegmentOverridePrefixForReg(unsigned Reg) {
+ switch (Reg) {
+ default:
+ llvm_unreachable("Unknown segment register!");
+ case X86::CS:
+ return CS_Encoding;
+ case X86::DS:
+ return DS_Encoding;
+ case X86::ES:
+ return ES_Encoding;
+ case X86::FS:
+ return FS_Encoding;
+ case X86::GS:
+ return GS_Encoding;
+ case X86::SS:
+ return SS_Encoding;
+ }
+ }
+
} // end namespace X86;
/// X86II - This namespace holds all of the target specific flags that
@@ -581,90 +614,107 @@ namespace X86II {
/// in the lower 4 bits of the opcode.
AddCCFrm = 9,
+ /// PrefixByte - This form is used for instructions that represent a prefix
+ /// byte like data16 or rep.
+ PrefixByte = 10,
+
/// MRM[0-7][rm] - These forms are used to represent instructions that use
/// a Mod/RM byte, and use the middle field to hold extended opcode
/// information. In the intel manual these are represented as /0, /1, ...
///
+ // Instructions operate on a register Reg/Opcode operand not the r/m field.
+ MRMr0 = 21,
+
+ /// MRMSrcMem - But force to use the SIB field.
+ MRMSrcMemFSIB = 22,
+
+ /// MRMDestMem - But force to use the SIB field.
+ MRMDestMemFSIB = 23,
+
/// MRMDestMem - This form is used for instructions that use the Mod/RM byte
/// to specify a destination, which in this case is memory.
///
- MRMDestMem = 32,
+ MRMDestMem = 24,
/// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
/// to specify a source, which in this case is memory.
///
- MRMSrcMem = 33,
+ MRMSrcMem = 25,
/// MRMSrcMem4VOp3 - This form is used for instructions that encode
/// operand 3 with VEX.VVVV and load from memory.
///
- MRMSrcMem4VOp3 = 34,
+ MRMSrcMem4VOp3 = 26,
/// MRMSrcMemOp4 - This form is used for instructions that use the Mod/RM
/// byte to specify the fourth source, which in this case is memory.
///
- MRMSrcMemOp4 = 35,
+ MRMSrcMemOp4 = 27,
/// MRMSrcMemCC - This form is used for instructions that use the Mod/RM
/// byte to specify the operands and also encodes a condition code.
///
- MRMSrcMemCC = 36,
+ MRMSrcMemCC = 28,
/// MRMXm - This form is used for instructions that use the Mod/RM byte
/// to specify a memory source, but doesn't use the middle field. And has
/// a condition code.
///
- MRMXmCC = 38,
+ MRMXmCC = 30,
/// MRMXm - This form is used for instructions that use the Mod/RM byte
/// to specify a memory source, but doesn't use the middle field.
///
- MRMXm = 39,
+ MRMXm = 31,
// Next, instructions that operate on a memory r/m operand...
- MRM0m = 40, MRM1m = 41, MRM2m = 42, MRM3m = 43, // Format /0 /1 /2 /3
- MRM4m = 44, MRM5m = 45, MRM6m = 46, MRM7m = 47, // Format /4 /5 /6 /7
+ MRM0m = 32, MRM1m = 33, MRM2m = 34, MRM3m = 35, // Format /0 /1 /2 /3
+ MRM4m = 36, MRM5m = 37, MRM6m = 38, MRM7m = 39, // Format /4 /5 /6 /7
/// MRMDestReg - This form is used for instructions that use the Mod/RM byte
/// to specify a destination, which in this case is a register.
///
- MRMDestReg = 48,
+ MRMDestReg = 40,
/// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
/// to specify a source, which in this case is a register.
///
- MRMSrcReg = 49,
+ MRMSrcReg = 41,
/// MRMSrcReg4VOp3 - This form is used for instructions that encode
/// operand 3 with VEX.VVVV and do not load from memory.
///
- MRMSrcReg4VOp3 = 50,
+ MRMSrcReg4VOp3 = 42,
/// MRMSrcRegOp4 - This form is used for instructions that use the Mod/RM
/// byte to specify the fourth source, which in this case is a register.
///
- MRMSrcRegOp4 = 51,
+ MRMSrcRegOp4 = 43,
/// MRMSrcRegCC - This form is used for instructions that use the Mod/RM
/// byte to specify the operands and also encodes a condition code
///
- MRMSrcRegCC = 52,
+ MRMSrcRegCC = 44,
/// MRMXCCr - This form is used for instructions that use the Mod/RM byte
/// to specify a register source, but doesn't use the middle field. And has
/// a condition code.
///
- MRMXrCC = 54,
+ MRMXrCC = 46,
/// MRMXr - This form is used for instructions that use the Mod/RM byte
/// to specify a register source, but doesn't use the middle field.
///
- MRMXr = 55,
+ MRMXr = 47,
// Instructions that operate on a register r/m operand...
- MRM0r = 56, MRM1r = 57, MRM2r = 58, MRM3r = 59, // Format /0 /1 /2 /3
- MRM4r = 60, MRM5r = 61, MRM6r = 62, MRM7r = 63, // Format /4 /5 /6 /7
+ MRM0r = 48, MRM1r = 49, MRM2r = 50, MRM3r = 51, // Format /0 /1 /2 /3
+ MRM4r = 52, MRM5r = 53, MRM6r = 54, MRM7r = 55, // Format /4 /5 /6 /7
+
+ // Instructions that operate that have mod=11 and an opcode but ignore r/m.
+ MRM0X = 56, MRM1X = 57, MRM2X = 58, MRM3X = 59, // Format /0 /1 /2 /3
+ MRM4X = 60, MRM5X = 61, MRM6X = 62, MRM7X = 63, // Format /4 /5 /6 /7
/// MRM_XX - A mod/rm byte of exactly 0xXX.
MRM_C0 = 64, MRM_C1 = 65, MRM_C2 = 66, MRM_C3 = 67,
@@ -900,6 +950,16 @@ namespace X86II {
NOTRACK = 1ULL << NoTrackShift
};
+ /// \returns true if the instruction with given opcode is a prefix.
+ inline bool isPrefix(uint64_t TSFlags) {
+ return (TSFlags & X86II::FormMask) == PrefixByte;
+ }
+
+ /// \returns true if the instruction with given opcode is a pseudo.
+ inline bool isPseudo(uint64_t TSFlags) {
+ return (TSFlags & X86II::FormMask) == Pseudo;
+ }
+
/// \returns the "base" X86 opcode for the specified machine
/// instruction.
inline uint8_t getBaseOpcodeFor(uint64_t TSFlags) {
@@ -1028,10 +1088,13 @@ namespace X86II {
case X86II::RawFrmDst:
case X86II::RawFrmDstSrc:
case X86II::AddCCFrm:
+ case X86II::PrefixByte:
return -1;
case X86II::MRMDestMem:
+ case X86II::MRMDestMemFSIB:
return 0;
case X86II::MRMSrcMem:
+ case X86II::MRMSrcMemFSIB:
// Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
// mask register.
return 1 + HasVEX_4V + HasEVEX_K;
@@ -1051,12 +1114,18 @@ namespace X86II {
case X86II::MRMSrcRegOp4:
case X86II::MRMSrcRegCC:
case X86II::MRMXrCC:
+ case X86II::MRMr0:
case X86II::MRMXr:
case X86II::MRM0r: case X86II::MRM1r:
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
case X86II::MRM6r: case X86II::MRM7r:
return -1;
+ case X86II::MRM0X: case X86II::MRM1X:
+ case X86II::MRM2X: case X86II::MRM3X:
+ case X86II::MRM4X: case X86II::MRM5X:
+ case X86II::MRM6X: case X86II::MRM7X:
+ return -1;
case X86II::MRMXmCC:
case X86II::MRMXm:
case X86II::MRM0m: case X86II::MRM1m:
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index bd009da60851..292dd17e2f51 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -317,8 +317,10 @@ static unsigned getRelocType32(MCContext &Ctx,
unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
MCFixupKind Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return Kind - FirstLiteralRelocationKind;
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
X86_64RelType Type = getType64(Kind, Modifier, IsPCRel);
if (getEMachine() == ELF::EM_X86_64)
return getRelocType64(Ctx, Fixup.getLoc(), Modifier, Type, IsPCRel, Kind);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 73b1969b4e82..b51011e2c52f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -15,7 +15,7 @@
#include "X86ATTInstPrinter.h"
#include "X86BaseInfo.h"
#include "X86MCTargetDesc.h"
-#include "Utils/X86ShuffleDecode.h"
+#include "X86ShuffleDecode.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/raw_ostream.h"
@@ -199,6 +199,40 @@ using namespace llvm;
CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int) \
CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int)
+#define CASE_FMA4(Inst, suf) \
+ CASE_AVX_INS_COMMON(Inst, 4, suf) \
+ CASE_AVX_INS_COMMON(Inst, 4Y, suf)
+
+#define CASE_FMA4_PACKED_RR(Inst) \
+ CASE_FMA4(Inst##PD, rr) \
+ CASE_FMA4(Inst##PS, rr)
+
+#define CASE_FMA4_PACKED_RM(Inst) \
+ CASE_FMA4(Inst##PD, rm) \
+ CASE_FMA4(Inst##PS, rm)
+
+#define CASE_FMA4_PACKED_MR(Inst) \
+ CASE_FMA4(Inst##PD, mr) \
+ CASE_FMA4(Inst##PS, mr)
+
+#define CASE_FMA4_SCALAR_RR(Inst) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rr) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rr) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rr_Int) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rr_Int)
+
+#define CASE_FMA4_SCALAR_RM(Inst) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rm) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rm) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , rm_Int) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , rm_Int)
+
+#define CASE_FMA4_SCALAR_MR(Inst) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , mr) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , mr) \
+ CASE_AVX_INS_COMMON(Inst##SD4, , mr_Int) \
+ CASE_AVX_INS_COMMON(Inst##SS4, , mr_Int)
+
static unsigned getVectorRegSize(unsigned RegNo) {
if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
return 512;
@@ -247,14 +281,15 @@ static void printMasking(raw_ostream &OS, const MCInst *MI,
OS << " {z}";
}
-static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
+static bool printFMAComments(const MCInst *MI, raw_ostream &OS,
+ const MCInstrInfo &MCII) {
const char *Mul1Name = nullptr, *Mul2Name = nullptr, *AccName = nullptr;
unsigned NumOperands = MI->getNumOperands();
bool RegForm = false;
bool Negate = false;
StringRef AccStr = "+";
- // The operands for FMA instructions without rounding fall into two forms.
+ // The operands for FMA3 instructions without rounding fall into two forms:
// dest, src1, src2, src3
// dest, src1, mask, src2, src3
// Where src3 is either a register or 5 memory address operands. So to find
@@ -262,9 +297,112 @@ static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
// index from the end by taking into account memory vs register form when
// finding src2.
+ // The operands for FMA4 instructions:
+ // dest, src1, src2, src3
+ // Where src2 OR src3 are either a register or 5 memory address operands. So
+ // to find dest and src1 we can index from the front, src2 (reg/mem) follows
+ // and then src3 (reg) will be at the end.
+
switch (MI->getOpcode()) {
default:
return false;
+
+ CASE_FMA4_PACKED_RR(FMADD)
+ CASE_FMA4_SCALAR_RR(FMADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMADD)
+ CASE_FMA4_SCALAR_RM(FMADD)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ CASE_FMA4_PACKED_MR(FMADD)
+ CASE_FMA4_SCALAR_MR(FMADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+
+ CASE_FMA4_PACKED_RR(FMSUB)
+ CASE_FMA4_SCALAR_RR(FMSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMSUB)
+ CASE_FMA4_SCALAR_RM(FMSUB)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ break;
+ CASE_FMA4_PACKED_MR(FMSUB)
+ CASE_FMA4_SCALAR_MR(FMSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ break;
+
+ CASE_FMA4_PACKED_RR(FNMADD)
+ CASE_FMA4_SCALAR_RR(FNMADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FNMADD)
+ CASE_FMA4_SCALAR_RM(FNMADD)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ Negate = true;
+ break;
+ CASE_FMA4_PACKED_MR(FNMADD)
+ CASE_FMA4_SCALAR_MR(FNMADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ Negate = true;
+ break;
+
+ CASE_FMA4_PACKED_RR(FNMSUB)
+ CASE_FMA4_SCALAR_RR(FNMSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FNMSUB)
+ CASE_FMA4_SCALAR_RM(FNMSUB)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ Negate = true;
+ break;
+ CASE_FMA4_PACKED_MR(FNMSUB)
+ CASE_FMA4_SCALAR_MR(FNMSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-";
+ Negate = true;
+ break;
+
+ CASE_FMA4_PACKED_RR(FMADDSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMADDSUB)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "+/-";
+ break;
+ CASE_FMA4_PACKED_MR(FMADDSUB)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "+/-";
+ break;
+
+ CASE_FMA4_PACKED_RR(FMSUBADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ LLVM_FALLTHROUGH;
+ CASE_FMA4_PACKED_RM(FMSUBADD)
+ Mul2Name = getRegName(MI->getOperand(2).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-/+";
+ break;
+ CASE_FMA4_PACKED_MR(FMSUBADD)
+ AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
+ Mul1Name = getRegName(MI->getOperand(1).getReg());
+ AccStr = "-/+";
+ break;
+
CASE_FMA_PACKED_REG(FMADD132)
CASE_FMA_SCALAR_REG(FMADD132)
Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
@@ -476,8 +614,9 @@ static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
if (!Mul2Name) Mul2Name = "mem";
if (!AccName) AccName = "mem";
- OS << DestName << " = ";
- // TODO: Print masking information?
+ OS << DestName;
+ printMasking(OS, MI, MCII);
+ OS << " = ";
if (Negate)
OS << '-';
@@ -504,7 +643,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
unsigned NumOperands = MI->getNumOperands();
bool RegForm = false;
- if (printFMA3Comments(MI, OS))
+ if (printFMAComments(MI, OS, MCII))
return true;
switch (MI->getOpcode()) {
@@ -669,14 +808,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::PSLLDQri:
case X86::VPSLLDQri:
case X86::VPSLLDQYri:
- case X86::VPSLLDQZ128rr:
- case X86::VPSLLDQZ256rr:
- case X86::VPSLLDQZrr:
+ case X86::VPSLLDQZ128ri:
+ case X86::VPSLLDQZ256ri:
+ case X86::VPSLLDQZri:
Src1Name = getRegName(MI->getOperand(1).getReg());
LLVM_FALLTHROUGH;
- case X86::VPSLLDQZ128rm:
- case X86::VPSLLDQZ256rm:
- case X86::VPSLLDQZrm:
+ case X86::VPSLLDQZ128mi:
+ case X86::VPSLLDQZ256mi:
+ case X86::VPSLLDQZmi:
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(NumOperands - 1).isImm())
DecodePSLLDQMask(getRegOperandNumElts(MI, 8, 0),
@@ -687,14 +826,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::PSRLDQri:
case X86::VPSRLDQri:
case X86::VPSRLDQYri:
- case X86::VPSRLDQZ128rr:
- case X86::VPSRLDQZ256rr:
- case X86::VPSRLDQZrr:
+ case X86::VPSRLDQZ128ri:
+ case X86::VPSRLDQZ256ri:
+ case X86::VPSRLDQZri:
Src1Name = getRegName(MI->getOperand(1).getReg());
LLVM_FALLTHROUGH;
- case X86::VPSRLDQZ128rm:
- case X86::VPSRLDQZ256rm:
- case X86::VPSRLDQZrm:
+ case X86::VPSRLDQZ128mi:
+ case X86::VPSRLDQZ256mi:
+ case X86::VPSRLDQZmi:
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(NumOperands - 1).isImm())
DecodePSRLDQMask(getRegOperandNumElts(MI, 8, 0),
@@ -1178,28 +1317,28 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeSubVectorBroadcast(16, 8, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, r)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, rr)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, m)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, rm)
DecodeSubVectorBroadcast(4, 2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r)
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, r)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, rr)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, rr)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
- CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, m)
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, m)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, rm)
DecodeSubVectorBroadcast(8, 2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, r)
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, r)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, rr)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, rr)
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
- CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, m)
- CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, m)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, rm)
DecodeSubVectorBroadcast(16, 2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index a21555076976..33d70fdb1214 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -13,6 +13,7 @@
#include "X86InstPrinterCommon.h"
#include "X86BaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -287,16 +288,23 @@ void X86InstPrinterCommon::printRoundingControl(const MCInst *MI, unsigned Op,
}
}
-/// printPCRelImm - This is used to print an immediate value that ends up
-/// being encoded as a pc-relative value (e.g. for jumps and calls). In
-/// Intel-style these print slightly differently than normal immediates.
-/// for example, a $ is not emitted.
-void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+/// value (e.g. for jumps and calls). In Intel-style these print slightly
+/// differently than normal immediates. For example, a $ is not emitted.
+///
+/// \p Address The address of the next instruction.
+/// \see MCInstPrinter::printInst
+void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, uint64_t Address,
+ unsigned OpNo, raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isImm())
- O << formatImm(Op.getImm());
- else {
+ if (Op.isImm()) {
+ if (PrintBranchImmAsAddress) {
+ uint64_t Target = Address + Op.getImm();
+ if (MAI.getCodePointerSize() == 4)
+ Target &= 0xffffffff;
+ O << formatHex(Target);
+ } else
+ O << formatImm(Op.getImm());
+ } else {
assert(Op.isExpr() && "unknown pcrel immediate operand");
// If a symbolic branch target was added as a constant expression then print
// that address in hex.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
index 8e28f24b619a..bb12ede3b729 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
@@ -29,7 +29,9 @@ public:
void printVPCMPMnemonic(const MCInst *MI, raw_ostream &OS);
void printCMPMnemonic(const MCInst *MI, bool IsVCmp, raw_ostream &OS);
void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &O);
- void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPCRelImm(const MCInst *MI, uint64_t Address, unsigned OpNo,
+ raw_ostream &O);
+
protected:
void printInstFlags(const MCInst *MI, raw_ostream &O);
void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index f4bb0fbf62cd..d1eb4d09851d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -45,8 +45,7 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (MI->getOpcode() == X86::DATA16_PREFIX &&
STI.getFeatureBits()[X86::Mode16Bit]) {
OS << "\tdata32";
- } else if (!printAliasInstr(MI, OS) &&
- !printVecCompareInstr(MI, OS))
+ } else if (!printAliasInstr(MI, Address, OS) && !printVecCompareInstr(MI, OS))
printInstruction(MI, Address, OS);
// Next always print the annotation.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
index b409b20cbea8..82baf611df03 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
@@ -31,9 +31,10 @@ public:
// Autogenerated by tblgen, returns true if we successfully printed an
// alias.
- bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
+ raw_ostream &O);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
@@ -47,14 +48,6 @@ public:
void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &O);
void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
- void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
-
- void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
-
void printbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "byte ptr ";
printMemReference(MI, OpNo, O);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index d986c829d98e..c294da6baffa 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -71,8 +71,6 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
// (actually, must, since otherwise the non-extern relocations we produce
// overwhelm ld64's tiny little mind and it fails).
DwarfFDESymbolsUseAbsDiff = true;
-
- UseIntegratedAssembler = true;
}
X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
@@ -102,10 +100,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
-
- // Always enable the integrated assembler by default.
- // Clang also enabled it when the OS is Solaris but that is redundant here.
- UseIntegratedAssembler = true;
}
const MCExpr *
@@ -141,8 +135,16 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
TextAlignFillValue = 0x90;
AllowAtInName = true;
+}
- UseIntegratedAssembler = true;
+void X86MCAsmInfoMicrosoftMASM::anchor() { }
+
+X86MCAsmInfoMicrosoftMASM::X86MCAsmInfoMicrosoftMASM(const Triple &Triple)
+ : X86MCAsmInfoMicrosoft(Triple) {
+ DollarIsPC = true;
+ SeparatorString = "\n";
+ CommentString = ";";
+ AllowSymbolAtNameStart = true;
}
void X86MCAsmInfoGNUCOFF::anchor() { }
@@ -164,6 +166,4 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
TextAlignFillValue = 0x90;
AllowAtInName = true;
-
- UseIntegratedAssembler = true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index b2369647a40f..ce8e84fb96b9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -13,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCASMINFO_H
#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmInfoCOFF.h"
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
@@ -49,6 +48,13 @@ public:
explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
};
+class X86MCAsmInfoMicrosoftMASM : public X86MCAsmInfoMicrosoft {
+ void anchor() override;
+
+public:
+ explicit X86MCAsmInfoMicrosoftMASM(const Triple &Triple);
+};
+
class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
void anchor() override;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 54a293702bd0..7dea0760a831 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -55,83 +55,64 @@ public:
const MCSubtargetInfo &STI) const override;
private:
- unsigned getX86RegNum(const MCOperand &MO) const {
- return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()) & 0x7;
- }
+ unsigned getX86RegNum(const MCOperand &MO) const;
- unsigned getX86RegEncoding(const MCInst &MI, unsigned OpNum) const {
- return Ctx.getRegisterInfo()->getEncodingValue(
- MI.getOperand(OpNum).getReg());
- }
+ unsigned getX86RegEncoding(const MCInst &MI, unsigned OpNum) const;
/// \param MI a single low-level machine instruction.
/// \param OpNum the operand #.
/// \returns true if the OpNumth operand of MI require a bit to be set in
/// REX prefix.
- bool isREXExtendedReg(const MCInst &MI, unsigned OpNum) const {
- return (getX86RegEncoding(MI, OpNum) >> 3) & 1;
- }
-
- void emitByte(uint8_t C, unsigned &CurByte, raw_ostream &OS) const {
- OS << (char)C;
- ++CurByte;
- }
-
- void emitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
- raw_ostream &OS) const {
- // Output the constant in little endian byte order.
- for (unsigned i = 0; i != Size; ++i) {
- emitByte(Val & 255, CurByte, OS);
- Val >>= 8;
- }
- }
+ bool isREXExtendedReg(const MCInst &MI, unsigned OpNum) const;
void emitImmediate(const MCOperand &Disp, SMLoc Loc, unsigned ImmSize,
- MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
+ MCFixupKind FixupKind, uint64_t StartByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const;
- static uint8_t modRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) {
- assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
- return RM | (RegOpcode << 3) | (Mod << 6);
- }
-
void emitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
- unsigned &CurByte, raw_ostream &OS) const {
- emitByte(modRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)), CurByte, OS);
- }
+ raw_ostream &OS) const;
void emitSIBByte(unsigned SS, unsigned Index, unsigned Base,
- unsigned &CurByte, raw_ostream &OS) const {
- // SIB byte is in the same format as the modRMByte.
- emitByte(modRMByte(SS, Index, Base), CurByte, OS);
- }
+ raw_ostream &OS) const;
void emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField,
- uint64_t TSFlags, bool Rex, unsigned &CurByte,
+ uint64_t TSFlags, bool HasREX, uint64_t StartByte,
raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ const MCSubtargetInfo &STI,
+ bool ForceSIB = false) const;
- void emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp, unsigned &CurByte,
- bool &Rex, const MCInst &MI, const MCInstrDesc &Desc,
- const MCSubtargetInfo &STI, raw_ostream &OS) const;
+ bool emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
+ const MCSubtargetInfo &STI, raw_ostream &OS) const;
- void emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
- const MCInst &MI, const MCInstrDesc &Desc,
+ void emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
raw_ostream &OS) const;
- void emitSegmentOverridePrefix(unsigned &CurByte, unsigned SegOperand,
- const MCInst &MI, raw_ostream &OS) const;
+ void emitSegmentOverridePrefix(unsigned SegOperand, const MCInst &MI,
+ raw_ostream &OS) const;
- bool emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
- const MCInst &MI, const MCInstrDesc &Desc,
+ bool emitOpcodePrefix(int MemOperand, const MCInst &MI,
const MCSubtargetInfo &STI, raw_ostream &OS) const;
- uint8_t determineREXPrefix(const MCInst &MI, uint64_t TSFlags, int MemOperand,
- const MCInstrDesc &Desc) const;
+ bool emitREXPrefix(int MemOperand, const MCInst &MI, raw_ostream &OS) const;
};
} // end anonymous namespace
+static uint8_t modRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) {
+ assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+ return RM | (RegOpcode << 3) | (Mod << 6);
+}
+
+static void emitByte(uint8_t C, raw_ostream &OS) { OS << static_cast<char>(C); }
+
+static void emitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != Size; ++i) {
+ emitByte(Val & 255, OS);
+ Val >>= 8;
+ }
+}
+
/// \returns true if this signed displacement fits in a 8-bit sign-extended
/// field.
static bool isDisp8(int Value) { return Value == (int8_t)Value; }
@@ -275,7 +256,8 @@ static bool hasSecRelSymbolRef(const MCExpr *Expr) {
static bool isPCRel32Branch(const MCInst &MI, const MCInstrInfo &MCII) {
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MCII.get(Opcode);
- if ((Opcode != X86::CALL64pcrel32 && Opcode != X86::JMP_4) ||
+ if ((Opcode != X86::CALL64pcrel32 && Opcode != X86::JMP_4 &&
+ Opcode != X86::JCC_4) ||
getImmFixupKind(Desc.TSFlags) != FK_PCRel_4)
return false;
@@ -288,9 +270,27 @@ static bool isPCRel32Branch(const MCInst &MI, const MCInstrInfo &MCII) {
return Ref && Ref->getKind() == MCSymbolRefExpr::VK_None;
}
+unsigned X86MCCodeEmitter::getX86RegNum(const MCOperand &MO) const {
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()) & 0x7;
+}
+
+unsigned X86MCCodeEmitter::getX86RegEncoding(const MCInst &MI,
+ unsigned OpNum) const {
+ return Ctx.getRegisterInfo()->getEncodingValue(MI.getOperand(OpNum).getReg());
+}
+
+/// \param MI a single low-level machine instruction.
+/// \param OpNum the operand #.
+/// \returns true if the OpNumth operand of MI require a bit to be set in
+/// REX prefix.
+bool X86MCCodeEmitter::isREXExtendedReg(const MCInst &MI,
+ unsigned OpNum) const {
+ return (getX86RegEncoding(MI, OpNum) >> 3) & 1;
+}
+
void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
unsigned Size, MCFixupKind FixupKind,
- unsigned &CurByte, raw_ostream &OS,
+ uint64_t StartByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
int ImmOffset) const {
const MCExpr *Expr = nullptr;
@@ -299,7 +299,7 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
// relocation, emit it now.
if (FixupKind != FK_PCRel_1 && FixupKind != FK_PCRel_2 &&
FixupKind != FK_PCRel_4) {
- emitConstant(DispOp.getImm() + ImmOffset, Size, CurByte, OS);
+ emitConstant(DispOp.getImm() + ImmOffset, Size, OS);
return;
}
Expr = MCConstantExpr::create(DispOp.getImm(), Ctx);
@@ -322,7 +322,7 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
}
if (Kind == GOT_Normal)
- ImmOffset = CurByte;
+ ImmOffset = static_cast<int>(OS.tell() - StartByte);
} else if (Expr->getKind() == MCExpr::SymbolRef) {
if (hasSecRelSymbolRef(Expr)) {
FixupKind = MCFixupKind(FK_SecRel_4);
@@ -361,16 +361,30 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
Ctx);
// Emit a symbolic constant as a fixup and 4 zeros.
- Fixups.push_back(MCFixup::create(CurByte, Expr, FixupKind, Loc));
- emitConstant(0, Size, CurByte, OS);
+ Fixups.push_back(MCFixup::create(static_cast<uint32_t>(OS.tell() - StartByte),
+ Expr, FixupKind, Loc));
+ emitConstant(0, Size, OS);
+}
+
+void X86MCCodeEmitter::emitRegModRMByte(const MCOperand &ModRMReg,
+ unsigned RegOpcodeFld,
+ raw_ostream &OS) const {
+ emitByte(modRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)), OS);
+}
+
+void X86MCCodeEmitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base,
+ raw_ostream &OS) const {
+ // SIB byte is in the same format as the modRMByte.
+ emitByte(modRMByte(SS, Index, Base), OS);
}
void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
unsigned RegOpcodeField,
- uint64_t TSFlags, bool Rex,
- unsigned &CurByte, raw_ostream &OS,
+ uint64_t TSFlags, bool HasREX,
+ uint64_t StartByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ const MCSubtargetInfo &STI,
+ bool ForceSIB) const {
const MCOperand &Disp = MI.getOperand(Op + X86::AddrDisp);
const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg);
const MCOperand &Scale = MI.getOperand(Op + X86::AddrScaleAmt);
@@ -383,8 +397,9 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
BaseReg == X86::EIP) { // [disp32+rIP] in X86-64 mode
assert(STI.hasFeature(X86::Mode64Bit) &&
"Rip-relative addressing requires 64-bit mode");
- assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
- emitByte(modRMByte(0, RegOpcodeField, 5), CurByte, OS);
+ assert(IndexReg.getReg() == 0 && !ForceSIB &&
+ "Invalid rip-relative address");
+ emitByte(modRMByte(0, RegOpcodeField, 5), OS);
unsigned Opcode = MI.getOpcode();
// movq loads are handled with a special relocation form which allows the
@@ -395,7 +410,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
default:
return X86::reloc_riprel_4byte;
case X86::MOV64rm:
- assert(Rex);
+ assert(HasREX);
return X86::reloc_riprel_4byte_movq_load;
case X86::CALL64m:
case X86::JMP64m:
@@ -409,8 +424,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
case X86::SBB64rm:
case X86::SUB64rm:
case X86::XOR64rm:
- return Rex ? X86::reloc_riprel_4byte_relax_rex
- : X86::reloc_riprel_4byte_relax;
+ return HasREX ? X86::reloc_riprel_4byte_relax_rex
+ : X86::reloc_riprel_4byte_relax;
}
}();
@@ -425,7 +440,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
? X86II::getSizeOfImm(TSFlags)
: 0;
- emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), CurByte, OS,
+ emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), StartByte, OS,
Fixups, -ImmSize);
return;
}
@@ -472,23 +487,23 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
if (Disp.isImm() && isDisp8(Disp.getImm())) {
if (Disp.getImm() == 0 && RMfield != 6) {
// There is no displacement; just the register.
- emitByte(modRMByte(0, RegOpcodeField, RMfield), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, RMfield), OS);
return;
}
// Use the [REG]+disp8 form, including for [BP] which cannot be encoded.
- emitByte(modRMByte(1, RegOpcodeField, RMfield), CurByte, OS);
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
+ emitByte(modRMByte(1, RegOpcodeField, RMfield), OS);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups);
return;
}
// This is the [REG]+disp16 case.
- emitByte(modRMByte(2, RegOpcodeField, RMfield), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, RMfield), OS);
} else {
// There is no BaseReg; this is the plain [disp16] case.
- emitByte(modRMByte(0, RegOpcodeField, 6), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, 6), OS);
}
// Emit 16-bit displacement for plain disp16 or [REG]+disp16 cases.
- emitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, CurByte, OS, Fixups);
+ emitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, StartByte, OS, Fixups);
return;
}
@@ -498,7 +513,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// 2-7) and absolute references.
if ( // The SIB byte must be used if there is an index register.
- IndexReg.getReg() == 0 &&
+ !ForceSIB && IndexReg.getReg() == 0 &&
// The SIB byte must be used if the base is ESP/RSP/R12, all of which
// encode to an R/M value of 4, which indicates that a SIB byte is
// present.
@@ -508,8 +523,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
(!STI.hasFeature(X86::Mode64Bit) || BaseReg != 0)) {
if (BaseReg == 0) { // [disp32] in X86-32 mode
- emitByte(modRMByte(0, RegOpcodeField, 5), CurByte, OS);
- emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, CurByte, OS, Fixups);
+ emitByte(modRMByte(0, RegOpcodeField, 5), OS);
+ emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, StartByte, OS, Fixups);
return;
}
@@ -519,7 +534,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// by emitting a displacement of 0 below.
if (BaseRegNo != N86::EBP) {
if (Disp.isImm() && Disp.getImm() == 0) {
- emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), OS);
return;
}
@@ -530,7 +545,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// This is exclusively used by call *a@tlscall(base). The relocation
// (R_386_TLSCALL or R_X86_64_TLSCALL) applies to the beginning.
Fixups.push_back(MCFixup::create(0, Sym, FK_NONE, MI.getLoc()));
- emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), OS);
return;
}
}
@@ -539,27 +554,27 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
if (Disp.isImm()) {
if (!HasEVEX && isDisp8(Disp.getImm())) {
- emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
+ emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), OS);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups);
return;
}
// Try EVEX compressed 8-bit displacement first; if failed, fall back to
// 32-bit displacement.
int CDisp8 = 0;
if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) {
- emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups,
+ emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), OS);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups,
CDisp8 - Disp.getImm());
return;
}
}
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
- emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), OS);
unsigned Opcode = MI.getOpcode();
unsigned FixupKind = Opcode == X86::MOV32rm ? X86::reloc_signed_4byte_relax
: X86::reloc_signed_4byte;
- emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), CurByte, OS,
+ emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), StartByte, OS,
Fixups);
return;
}
@@ -575,30 +590,30 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
if (BaseReg == 0) {
// If there is no base register, we emit the special case SIB byte with
// MOD=0, BASE=5, to JUST get the index, scale, and displacement.
- emitByte(modRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, 4), OS);
ForceDisp32 = true;
} else if (!Disp.isImm()) {
// Emit the normal disp32 encoding.
- emitByte(modRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, 4), OS);
ForceDisp32 = true;
} else if (Disp.getImm() == 0 &&
// Base reg can't be anything that ends up with '5' as the base
// reg, it is the magic [*] nomenclature that indicates no base.
BaseRegNo != N86::EBP) {
// Emit no displacement ModR/M byte
- emitByte(modRMByte(0, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(0, RegOpcodeField, 4), OS);
} else if (!HasEVEX && isDisp8(Disp.getImm())) {
// Emit the disp8 encoding.
- emitByte(modRMByte(1, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(1, RegOpcodeField, 4), OS);
ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
} else if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) {
// Emit the disp8 encoding.
- emitByte(modRMByte(1, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(1, RegOpcodeField, 4), OS);
ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
ImmOffset = CDisp8 - Disp.getImm();
} else {
// Emit the normal disp32 encoding.
- emitByte(modRMByte(2, RegOpcodeField, 4), CurByte, OS);
+ emitByte(modRMByte(2, RegOpcodeField, 4), OS);
}
// Calculate what the SS field value should be...
@@ -613,77 +628,78 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
IndexRegNo = getX86RegNum(IndexReg);
else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
IndexRegNo = 4;
- emitSIBByte(SS, IndexRegNo, 5, CurByte, OS);
+ emitSIBByte(SS, IndexRegNo, 5, OS);
} else {
unsigned IndexRegNo;
if (IndexReg.getReg())
IndexRegNo = getX86RegNum(IndexReg);
else
IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
- emitSIBByte(SS, IndexRegNo, getX86RegNum(Base), CurByte, OS);
+ emitSIBByte(SS, IndexRegNo, getX86RegNum(Base), OS);
}
// Do we need to output a displacement?
if (ForceDisp8)
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups,
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups,
ImmOffset);
else if (ForceDisp32 || Disp.getImm() != 0)
emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
- CurByte, OS, Fixups);
+ StartByte, OS, Fixups);
}
-void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp,
- unsigned &CurByte, bool &Rex,
- const MCInst &MI, const MCInstrDesc &Desc,
- const MCSubtargetInfo &STI,
- raw_ostream &OS) const {
+/// Emit all instruction prefixes.
+///
+/// \returns true if REX prefix is used, otherwise returns false.
+bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
+ const MCSubtargetInfo &STI,
+ raw_ostream &OS) const {
+ uint64_t TSFlags = MCII.get(MI.getOpcode()).TSFlags;
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
- if (MemoryOperand != -1)
- MemoryOperand += CurOp;
-
// Emit segment override opcode prefix as needed.
- if (MemoryOperand >= 0)
- emitSegmentOverridePrefix(CurByte, MemoryOperand + X86::AddrSegmentReg, MI,
- OS);
+ if (MemoryOperand != -1) {
+ MemoryOperand += CurOp;
+ emitSegmentOverridePrefix(MemoryOperand + X86::AddrSegmentReg, MI, OS);
+ }
// Emit the repeat opcode prefix as needed.
unsigned Flags = MI.getFlags();
if (TSFlags & X86II::REP || Flags & X86::IP_HAS_REPEAT)
- emitByte(0xF3, CurByte, OS);
+ emitByte(0xF3, OS);
if (Flags & X86::IP_HAS_REPEAT_NE)
- emitByte(0xF2, CurByte, OS);
+ emitByte(0xF2, OS);
// Emit the address size opcode prefix as needed.
- bool need_address_override;
+ bool NeedAddressOverride;
uint64_t AdSize = TSFlags & X86II::AdSizeMask;
if ((STI.hasFeature(X86::Mode16Bit) && AdSize == X86II::AdSize32) ||
(STI.hasFeature(X86::Mode32Bit) && AdSize == X86II::AdSize16) ||
(STI.hasFeature(X86::Mode64Bit) && AdSize == X86II::AdSize32)) {
- need_address_override = true;
+ NeedAddressOverride = true;
} else if (MemoryOperand < 0) {
- need_address_override = false;
+ NeedAddressOverride = false;
} else if (STI.hasFeature(X86::Mode64Bit)) {
assert(!is16BitMemOperand(MI, MemoryOperand, STI));
- need_address_override = is32BitMemOperand(MI, MemoryOperand);
+ NeedAddressOverride = is32BitMemOperand(MI, MemoryOperand);
} else if (STI.hasFeature(X86::Mode32Bit)) {
assert(!is64BitMemOperand(MI, MemoryOperand));
- need_address_override = is16BitMemOperand(MI, MemoryOperand, STI);
+ NeedAddressOverride = is16BitMemOperand(MI, MemoryOperand, STI);
} else {
assert(STI.hasFeature(X86::Mode16Bit));
assert(!is64BitMemOperand(MI, MemoryOperand));
- need_address_override = !is16BitMemOperand(MI, MemoryOperand, STI);
+ NeedAddressOverride = !is16BitMemOperand(MI, MemoryOperand, STI);
}
- if (need_address_override)
- emitByte(0x67, CurByte, OS);
+ if (NeedAddressOverride)
+ emitByte(0x67, OS);
// Encoding type for this instruction.
uint64_t Encoding = TSFlags & X86II::EncodingMask;
- if (Encoding == 0)
- Rex = emitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, STI, OS);
+ bool HasREX = false;
+ if (Encoding)
+ emitVEXOpcodePrefix(MemoryOperand, MI, OS);
else
- emitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+ HasREX = emitOpcodePrefix(MemoryOperand, MI, STI, OS);
uint64_t Form = TSFlags & X86II::FormMask;
switch (Form) {
@@ -697,11 +713,11 @@ void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp,
"SI and DI register sizes do not match");
// Emit segment override opcode prefix as needed (not for %ds).
if (MI.getOperand(2).getReg() != X86::DS)
- emitSegmentOverridePrefix(CurByte, 2, MI, OS);
+ emitSegmentOverridePrefix(2, MI, OS);
// Emit AdSize prefix as needed.
if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) ||
(STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI))
- emitByte(0x67, CurByte, OS);
+ emitByte(0x67, OS);
CurOp += 3; // Consume operands.
break;
}
@@ -709,11 +725,11 @@ void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp,
unsigned siReg = MI.getOperand(0).getReg();
// Emit segment override opcode prefix as needed (not for %ds).
if (MI.getOperand(1).getReg() != X86::DS)
- emitSegmentOverridePrefix(CurByte, 1, MI, OS);
+ emitSegmentOverridePrefix(1, MI, OS);
// Emit AdSize prefix as needed.
if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) ||
(STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI))
- emitByte(0x67, CurByte, OS);
+ emitByte(0x67, OS);
CurOp += 2; // Consume operands.
break;
}
@@ -722,24 +738,26 @@ void X86MCCodeEmitter::emitPrefixImpl(uint64_t TSFlags, unsigned &CurOp,
// Emit AdSize prefix as needed.
if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::EDI) ||
(STI.hasFeature(X86::Mode32Bit) && siReg == X86::DI))
- emitByte(0x67, CurByte, OS);
+ emitByte(0x67, OS);
++CurOp; // Consume operand.
break;
}
case X86II::RawFrmMemOffs: {
// Emit segment override opcode prefix as needed.
- emitSegmentOverridePrefix(CurByte, 1, MI, OS);
+ emitSegmentOverridePrefix(1, MI, OS);
break;
}
}
+
+ return HasREX;
}
-/// emitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
-/// called VEX.
-void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
- int MemOperand, const MCInst &MI,
- const MCInstrDesc &Desc,
+/// AVX instructions are encoded using a opcode prefix called VEX.
+void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
raw_ostream &OS) const {
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
assert(!(TSFlags & X86II::LOCK) && "Can't have LOCK VEX.");
uint64_t Encoding = TSFlags & X86II::EncodingMask;
@@ -868,8 +886,11 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
switch (TSFlags & X86II::FormMask) {
default:
llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!");
+ case X86II::MRM_C0:
case X86II::RawFrm:
+ case X86II::PrefixByte:
break;
+ case X86II::MRMDestMemFSIB:
case X86II::MRMDestMem: {
// MRMDestMem instructions forms:
// MemAddr, src1(ModR/M)
@@ -900,6 +921,7 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
EVEX_R2 = ~(RegEnc >> 4) & 1;
break;
}
+ case X86II::MRMSrcMemFSIB:
case X86II::MRMSrcMem: {
// MRMSrcMem instructions forms:
// src1(ModR/M), MemAddr
@@ -1081,6 +1103,15 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
EncodeRC = true;
break;
}
+ case X86II::MRMr0: {
+ // MRMr0 instructions forms:
+ // 11:rrr:000
+ // dst(ModR/M)
+ unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
+ VEX_R = ~(RegEnc >> 3) & 1;
+ EVEX_R2 = ~(RegEnc >> 4) & 1;
+ break;
+ }
case X86II::MRM0r:
case X86II::MRM1r:
case X86II::MRM2r:
@@ -1127,15 +1158,15 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// Can we use the 2 byte VEX prefix?
if (!(MI.getFlags() & X86::IP_USE_VEX3) && Encoding == X86II::VEX &&
VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) {
- emitByte(0xC5, CurByte, OS);
- emitByte(LastByte | (VEX_R << 7), CurByte, OS);
+ emitByte(0xC5, OS);
+ emitByte(LastByte | (VEX_R << 7), OS);
return;
}
// 3 byte VEX prefix
- emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4, CurByte, OS);
- emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
- emitByte(LastByte | (VEX_W << 7), CurByte, OS);
+ emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4, OS);
+ emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, OS);
+ emitByte(LastByte | (VEX_W << 7), OS);
} else {
assert(Encoding == X86II::EVEX && "unknown encoding!");
// EVEX opcode prefix can have 4 bytes
@@ -1146,144 +1177,137 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
assert((VEX_5M & 0x3) == VEX_5M &&
"More than 2 significant bits in VEX.m-mmmm fields for EVEX!");
- emitByte(0x62, CurByte, OS);
+ emitByte(0x62, OS);
emitByte((VEX_R << 7) | (VEX_X << 6) | (VEX_B << 5) | (EVEX_R2 << 4) |
VEX_5M,
- CurByte, OS);
- emitByte((VEX_W << 7) | (VEX_4V << 3) | (EVEX_U << 2) | VEX_PP, CurByte,
OS);
+ emitByte((VEX_W << 7) | (VEX_4V << 3) | (EVEX_U << 2) | VEX_PP, OS);
if (EncodeRC)
emitByte((EVEX_z << 7) | (EVEX_rc << 5) | (EVEX_b << 4) | (EVEX_V2 << 3) |
EVEX_aaa,
- CurByte, OS);
+ OS);
else
emitByte((EVEX_z << 7) | (EVEX_L2 << 6) | (VEX_L << 5) | (EVEX_b << 4) |
(EVEX_V2 << 3) | EVEX_aaa,
- CurByte, OS);
+ OS);
}
}
-/// Determine if the MCInst has to be encoded with a X86-64 REX prefix which
-/// specifies 1) 64-bit instructions, 2) non-default operand size, and 3) use
-/// of X86-64 extended registers.
-uint8_t X86MCCodeEmitter::determineREXPrefix(const MCInst &MI, uint64_t TSFlags,
- int MemOperand,
- const MCInstrDesc &Desc) const {
- uint8_t REX = 0;
- bool UsesHighByteReg = false;
-
- if (TSFlags & X86II::REX_W)
- REX |= 1 << 3; // set REX.W
+/// Emit REX prefix which specifies
+/// 1) 64-bit instructions,
+/// 2) non-default operand size, and
+/// 3) use of X86-64 extended registers.
+///
+/// \returns true if REX prefix is used, otherwise returns false.
+bool X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI,
+ raw_ostream &OS) const {
+ uint8_t REX = [&, MemOperand]() {
+ uint8_t REX = 0;
+ bool UsesHighByteReg = false;
+
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
+ if (TSFlags & X86II::REX_W)
+ REX |= 1 << 3; // set REX.W
+
+ if (MI.getNumOperands() == 0)
+ return REX;
+
+ unsigned NumOps = MI.getNumOperands();
+ unsigned CurOp = X86II::getOperandBias(Desc);
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ for (unsigned i = CurOp; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
+ UsesHighByteReg = true;
+ if (X86II::isX86_64NonExtLowByteReg(Reg))
+ // FIXME: The caller of determineREXPrefix slaps this prefix onto
+ // anything that returns non-zero.
+ REX |= 0x40; // REX fixed encoding prefix
+ }
- if (MI.getNumOperands() == 0)
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::AddRegFrm:
+ REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+ break;
+ case X86II::MRMSrcReg:
+ case X86II::MRMSrcRegCC:
+ REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+ REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+ break;
+ case X86II::MRMSrcMem:
+ case X86II::MRMSrcMemCC:
+ REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
+ CurOp += X86::AddrNumOperands;
+ break;
+ case X86II::MRMDestReg:
+ REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+ REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+ break;
+ case X86II::MRMDestMem:
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
+ CurOp += X86::AddrNumOperands;
+ REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+ break;
+ case X86II::MRMXmCC:
+ case X86II::MRMXm:
+ case X86II::MRM0m:
+ case X86II::MRM1m:
+ case X86II::MRM2m:
+ case X86II::MRM3m:
+ case X86II::MRM4m:
+ case X86II::MRM5m:
+ case X86II::MRM6m:
+ case X86II::MRM7m:
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
+ REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
+ break;
+ case X86II::MRMXrCC:
+ case X86II::MRMXr:
+ case X86II::MRM0r:
+ case X86II::MRM1r:
+ case X86II::MRM2r:
+ case X86II::MRM3r:
+ case X86II::MRM4r:
+ case X86II::MRM5r:
+ case X86II::MRM6r:
+ case X86II::MRM7r:
+ REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+ break;
+ case X86II::MRMr0:
+ REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+ break;
+ case X86II::MRMDestMemFSIB:
+ llvm_unreachable("FSIB format never need REX prefix!");
+ }
+ if (REX && UsesHighByteReg)
+ report_fatal_error(
+ "Cannot encode high byte register in REX-prefixed instruction");
return REX;
+ }();
- unsigned NumOps = MI.getNumOperands();
- unsigned CurOp = X86II::getOperandBias(Desc);
-
- // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
- for (unsigned i = CurOp; i != NumOps; ++i) {
- const MCOperand &MO = MI.getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
- UsesHighByteReg = true;
- if (X86II::isX86_64NonExtLowByteReg(Reg))
- // FIXME: The caller of determineREXPrefix slaps this prefix onto anything
- // that returns non-zero.
- REX |= 0x40; // REX fixed encoding prefix
- }
-
- switch (TSFlags & X86II::FormMask) {
- case X86II::AddRegFrm:
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- break;
- case X86II::MRMSrcReg:
- case X86II::MRMSrcRegCC:
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- break;
- case X86II::MRMSrcMem:
- case X86II::MRMSrcMemCC:
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
- CurOp += X86::AddrNumOperands;
- break;
- case X86II::MRMDestReg:
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- break;
- case X86II::MRMDestMem:
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
- CurOp += X86::AddrNumOperands;
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- break;
- case X86II::MRMXmCC:
- case X86II::MRMXm:
- case X86II::MRM0m:
- case X86II::MRM1m:
- case X86II::MRM2m:
- case X86II::MRM3m:
- case X86II::MRM4m:
- case X86II::MRM5m:
- case X86II::MRM6m:
- case X86II::MRM7m:
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
- break;
- case X86II::MRMXrCC:
- case X86II::MRMXr:
- case X86II::MRM0r:
- case X86II::MRM1r:
- case X86II::MRM2r:
- case X86II::MRM3r:
- case X86II::MRM4r:
- case X86II::MRM5r:
- case X86II::MRM6r:
- case X86II::MRM7r:
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- break;
- }
- if (REX && UsesHighByteReg)
- report_fatal_error(
- "Cannot encode high byte register in REX-prefixed instruction");
+ if (!REX)
+ return false;
- return REX;
+ emitByte(0x40 | REX, OS);
+ return true;
}
/// Emit segment override opcode prefix as needed.
-void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned &CurByte,
- unsigned SegOperand,
+void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned SegOperand,
const MCInst &MI,
raw_ostream &OS) const {
// Check for explicit segment override on memory operand.
- switch (MI.getOperand(SegOperand).getReg()) {
- default:
- llvm_unreachable("Unknown segment register!");
- case 0:
- break;
- case X86::CS:
- emitByte(0x2E, CurByte, OS);
- break;
- case X86::SS:
- emitByte(0x36, CurByte, OS);
- break;
- case X86::DS:
- emitByte(0x3E, CurByte, OS);
- break;
- case X86::ES:
- emitByte(0x26, CurByte, OS);
- break;
- case X86::FS:
- emitByte(0x64, CurByte, OS);
- break;
- case X86::GS:
- emitByte(0x65, CurByte, OS);
- break;
- }
+ if (unsigned Reg = MI.getOperand(SegOperand).getReg())
+ emitByte(X86::getSegmentOverridePrefixForReg(Reg), OS);
}
/// Emit all instruction prefixes prior to the opcode.
@@ -1291,48 +1315,44 @@ void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned &CurByte,
/// \param MemOperand the operand # of the start of a memory operand if present.
/// If not present, it is -1.
///
-/// \returns true if a REX prefix was used.
-bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
- int MemOperand, const MCInst &MI,
- const MCInstrDesc &Desc,
+/// \returns true if REX prefix is used, otherwise returns false.
+bool X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI,
const MCSubtargetInfo &STI,
raw_ostream &OS) const {
- bool Ret = false;
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+
// Emit the operand size opcode prefix as needed.
if ((TSFlags & X86II::OpSizeMask) ==
(STI.hasFeature(X86::Mode16Bit) ? X86II::OpSize32 : X86II::OpSize16))
- emitByte(0x66, CurByte, OS);
+ emitByte(0x66, OS);
// Emit the LOCK opcode prefix.
if (TSFlags & X86II::LOCK || MI.getFlags() & X86::IP_HAS_LOCK)
- emitByte(0xF0, CurByte, OS);
+ emitByte(0xF0, OS);
// Emit the NOTRACK opcode prefix.
if (TSFlags & X86II::NOTRACK || MI.getFlags() & X86::IP_HAS_NOTRACK)
- emitByte(0x3E, CurByte, OS);
+ emitByte(0x3E, OS);
switch (TSFlags & X86II::OpPrefixMask) {
case X86II::PD: // 66
- emitByte(0x66, CurByte, OS);
+ emitByte(0x66, OS);
break;
case X86II::XS: // F3
- emitByte(0xF3, CurByte, OS);
+ emitByte(0xF3, OS);
break;
case X86II::XD: // F2
- emitByte(0xF2, CurByte, OS);
+ emitByte(0xF2, OS);
break;
}
// Handle REX prefix.
- // FIXME: Can this come before F2 etc to simplify emission?
- if (STI.hasFeature(X86::Mode64Bit)) {
- if (uint8_t REX = determineREXPrefix(MI, TSFlags, MemOperand, Desc)) {
- emitByte(0x40 | REX, CurByte, OS);
- Ret = true;
- }
- } else {
- assert(!(TSFlags & X86II::REX_W) && "REX.W requires 64bit mode.");
- }
+ assert((STI.hasFeature(X86::Mode64Bit) || !(TSFlags & X86II::REX_W)) &&
+ "REX.W requires 64bit mode.");
+ bool HasREX = STI.hasFeature(X86::Mode64Bit)
+ ? emitREXPrefix(MemOperand, MI, OS)
+ : false;
// 0x0F escape code must be emitted just before the opcode.
switch (TSFlags & X86II::OpMapMask) {
@@ -1340,19 +1360,20 @@ bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::T8: // 0F 38
case X86II::TA: // 0F 3A
case X86II::ThreeDNow: // 0F 0F, second 0F emitted by caller.
- emitByte(0x0F, CurByte, OS);
+ emitByte(0x0F, OS);
break;
}
switch (TSFlags & X86II::OpMapMask) {
case X86II::T8: // 0F 38
- emitByte(0x38, CurByte, OS);
+ emitByte(0x38, OS);
break;
case X86II::TA: // 0F 3A
- emitByte(0x3A, CurByte, OS);
+ emitByte(0x3A, OS);
break;
}
- return Ret;
+
+ return HasREX;
}
void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
@@ -1362,16 +1383,12 @@ void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
uint64_t TSFlags = Desc.TSFlags;
// Pseudo instructions don't get encoded.
- if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+ if (X86II::isPseudo(TSFlags))
return;
unsigned CurOp = X86II::getOperandBias(Desc);
- // Keep track of the current byte being emitted.
- unsigned CurByte = 0;
-
- bool Rex = false;
- emitPrefixImpl(TSFlags, CurOp, CurByte, Rex, MI, Desc, STI, OS);
+ emitPrefixImpl(CurOp, MI, STI, OS);
}
void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -1382,17 +1399,15 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
uint64_t TSFlags = Desc.TSFlags;
// Pseudo instructions don't get encoded.
- if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+ if (X86II::isPseudo(TSFlags))
return;
unsigned NumOps = Desc.getNumOperands();
unsigned CurOp = X86II::getOperandBias(Desc);
- // Keep track of the current byte being emitted.
- unsigned CurByte = 0;
+ uint64_t StartByte = OS.tell();
- bool Rex = false;
- emitPrefixImpl(TSFlags, CurOp, CurByte, Rex, MI, Desc, STI, OS);
+ bool HasREX = emitPrefixImpl(CurOp, MI, STI, OS);
// It uses the VEX.VVVV field?
bool HasVEX_4V = TSFlags & X86II::VEX_4V;
@@ -1422,7 +1437,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::RawFrmDstSrc:
case X86II::RawFrmSrc:
case X86II::RawFrmDst:
- emitByte(BaseOpcode, CurByte, OS);
+ case X86II::PrefixByte:
+ emitByte(BaseOpcode, OS);
break;
case X86II::AddCCFrm: {
// This will be added to the opcode in the fallthrough.
@@ -1431,47 +1447,47 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
--NumOps; // Drop the operand from the end.
LLVM_FALLTHROUGH;
case X86II::RawFrm:
- emitByte(BaseOpcode + OpcodeOffset, CurByte, OS);
+ emitByte(BaseOpcode + OpcodeOffset, OS);
if (!STI.hasFeature(X86::Mode64Bit) || !isPCRel32Branch(MI, MCII))
break;
const MCOperand &Op = MI.getOperand(CurOp++);
emitImmediate(Op, MI.getLoc(), X86II::getSizeOfImm(TSFlags),
- MCFixupKind(X86::reloc_branch_4byte_pcrel), CurByte, OS,
+ MCFixupKind(X86::reloc_branch_4byte_pcrel), StartByte, OS,
Fixups);
break;
}
case X86II::RawFrmMemOffs:
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- CurByte, OS, Fixups);
+ StartByte, OS, Fixups);
++CurOp; // skip segment operand
break;
case X86II::RawFrmImm8:
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- CurByte, OS, Fixups);
- emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, CurByte,
+ StartByte, OS, Fixups);
+ emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, StartByte,
OS, Fixups);
break;
case X86II::RawFrmImm16:
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- CurByte, OS, Fixups);
- emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, CurByte,
+ StartByte, OS, Fixups);
+ emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, StartByte,
OS, Fixups);
break;
case X86II::AddRegFrm:
- emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
+ emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++)), OS);
break;
case X86II::MRMDestReg: {
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
unsigned SrcRegNum = CurOp + 1;
if (HasEVEX_K) // Skip writemask
@@ -1481,12 +1497,13 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++SrcRegNum;
emitRegModRMByte(MI.getOperand(CurOp),
- getX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
+ getX86RegNum(MI.getOperand(SrcRegNum)), OS);
CurOp = SrcRegNum + 1;
break;
}
+ case X86II::MRMDestMemFSIB:
case X86II::MRMDestMem: {
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
unsigned SrcRegNum = CurOp + X86::AddrNumOperands;
if (HasEVEX_K) // Skip writemask
@@ -1495,13 +1512,14 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
++SrcRegNum;
+ bool ForceSIB = (Form == X86II::MRMDestMemFSIB);
emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags,
- Rex, CurByte, OS, Fixups, STI);
+ HasREX, StartByte, OS, Fixups, STI, ForceSIB);
CurOp = SrcRegNum + 1;
break;
}
case X86II::MRMSrcReg: {
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
unsigned SrcRegNum = CurOp + 1;
if (HasEVEX_K) // Skip writemask
@@ -1511,7 +1529,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++SrcRegNum;
emitRegModRMByte(MI.getOperand(SrcRegNum),
- getX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ getX86RegNum(MI.getOperand(CurOp)), OS);
CurOp = SrcRegNum + 1;
if (HasVEX_I8Reg)
I8RegNum = getX86RegEncoding(MI, CurOp++);
@@ -1521,17 +1539,17 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
break;
}
case X86II::MRMSrcReg4VOp3: {
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
unsigned SrcRegNum = CurOp + 1;
emitRegModRMByte(MI.getOperand(SrcRegNum),
- getX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ getX86RegNum(MI.getOperand(CurOp)), OS);
CurOp = SrcRegNum + 1;
++CurOp; // Encoded in VEX.VVVV
break;
}
case X86II::MRMSrcRegOp4: {
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
unsigned SrcRegNum = CurOp + 1;
// Skip 1st src (which is encoded in VEX_VVVV)
@@ -1542,7 +1560,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
I8RegNum = getX86RegEncoding(MI, SrcRegNum++);
emitRegModRMByte(MI.getOperand(SrcRegNum),
- getX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ getX86RegNum(MI.getOperand(CurOp)), OS);
CurOp = SrcRegNum + 1;
break;
}
@@ -1551,12 +1569,13 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned SecondOp = CurOp++;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, CurByte, OS);
+ emitByte(BaseOpcode + CC, OS);
emitRegModRMByte(MI.getOperand(SecondOp),
- getX86RegNum(MI.getOperand(FirstOp)), CurByte, OS);
+ getX86RegNum(MI.getOperand(FirstOp)), OS);
break;
}
+ case X86II::MRMSrcMemFSIB:
case X86II::MRMSrcMem: {
unsigned FirstMemOp = CurOp + 1;
@@ -1566,10 +1585,11 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V)
++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
+ bool ForceSIB = (Form == X86II::MRMSrcMemFSIB);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
- TSFlags, Rex, CurByte, OS, Fixups, STI);
+ TSFlags, HasREX, StartByte, OS, Fixups, STI, ForceSIB);
CurOp = FirstMemOp + X86::AddrNumOperands;
if (HasVEX_I8Reg)
I8RegNum = getX86RegEncoding(MI, CurOp++);
@@ -1578,10 +1598,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMSrcMem4VOp3: {
unsigned FirstMemOp = CurOp + 1;
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
- TSFlags, Rex, CurByte, OS, Fixups, STI);
+ TSFlags, HasREX, StartByte, OS, Fixups, STI);
CurOp = FirstMemOp + X86::AddrNumOperands;
++CurOp; // Encoded in VEX.VVVV.
break;
@@ -1595,10 +1615,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
assert(HasVEX_I8Reg && "MRMSrcRegOp4 should imply VEX_I8Reg");
I8RegNum = getX86RegEncoding(MI, FirstMemOp++);
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
- TSFlags, Rex, CurByte, OS, Fixups, STI);
+ TSFlags, HasREX, StartByte, OS, Fixups, STI);
CurOp = FirstMemOp + X86::AddrNumOperands;
break;
}
@@ -1608,10 +1628,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
CurOp = FirstMemOp + X86::AddrNumOperands;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, CurByte, OS);
+ emitByte(BaseOpcode + CC, OS);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(RegOp)),
- TSFlags, Rex, CurByte, OS, Fixups, STI);
+ TSFlags, HasREX, StartByte, OS, Fixups, STI);
break;
}
@@ -1619,8 +1639,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned RegOp = CurOp++;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, CurByte, OS);
- emitRegModRMByte(MI.getOperand(RegOp), 0, CurByte, OS);
+ emitByte(BaseOpcode + CC, OS);
+ emitRegModRMByte(MI.getOperand(RegOp), 0, OS);
break;
}
@@ -1637,10 +1657,13 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++CurOp;
if (HasEVEX_K) // Skip writemask
++CurOp;
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitRegModRMByte(MI.getOperand(CurOp++),
- (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, CurByte,
- OS);
+ (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, OS);
+ break;
+ case X86II::MRMr0:
+ emitByte(BaseOpcode, OS);
+ emitByte(modRMByte(3, getX86RegNum(MI.getOperand(CurOp++)),0), OS);
break;
case X86II::MRMXmCC: {
@@ -1648,9 +1671,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
CurOp = FirstMemOp + X86::AddrNumOperands;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, CurByte, OS);
+ emitByte(BaseOpcode + CC, OS);
- emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, Rex, CurByte, OS, Fixups, STI);
+ emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, HasREX, StartByte, OS, Fixups,
+ STI);
break;
}
@@ -1667,13 +1691,25 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++CurOp;
if (HasEVEX_K) // Skip writemask
++CurOp;
- emitByte(BaseOpcode, CurByte, OS);
+ emitByte(BaseOpcode, OS);
emitMemModRMByte(MI, CurOp,
(Form == X86II::MRMXm) ? 0 : Form - X86II::MRM0m, TSFlags,
- Rex, CurByte, OS, Fixups, STI);
+ HasREX, StartByte, OS, Fixups, STI);
CurOp += X86::AddrNumOperands;
break;
+ case X86II::MRM0X:
+ case X86II::MRM1X:
+ case X86II::MRM2X:
+ case X86II::MRM3X:
+ case X86II::MRM4X:
+ case X86II::MRM5X:
+ case X86II::MRM6X:
+ case X86II::MRM7X:
+ emitByte(BaseOpcode, OS);
+ emitByte(0xC0 + ((Form - X86II::MRM0X) << 3), OS);
+ break;
+
case X86II::MRM_C0:
case X86II::MRM_C1:
case X86II::MRM_C2:
@@ -1738,8 +1774,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_FD:
case X86II::MRM_FE:
case X86II::MRM_FF:
- emitByte(BaseOpcode, CurByte, OS);
- emitByte(0xC0 + Form - X86II::MRM_C0, CurByte, OS);
+ emitByte(BaseOpcode, OS);
+ emitByte(0xC0 + Form - X86II::MRM_C0, OS);
break;
}
@@ -1754,7 +1790,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
I8RegNum |= Val;
}
emitImmediate(MCOperand::createImm(I8RegNum), MI.getLoc(), 1, FK_Data_1,
- CurByte, OS, Fixups);
+ StartByte, OS, Fixups);
} else {
// If there is a remaining operand, it must be a trailing immediate. Emit it
// according to the right size for the instruction. Some instructions
@@ -1762,13 +1798,15 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
while (CurOp != NumOps && NumOps - CurOp <= 2) {
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- CurByte, OS, Fixups);
+ StartByte, OS, Fixups);
}
}
if ((TSFlags & X86II::OpMapMask) == X86II::ThreeDNow)
- emitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+ emitByte(X86II::getBaseOpcodeFor(TSFlags), OS);
+ assert(OS.tell() - StartByte <= 15 &&
+ "The size of instruction must be no longer than 15.");
#ifndef NDEBUG
// FIXME: Verify.
if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 049a3a815984..81110ba666e9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -30,10 +30,6 @@
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
-#if _MSC_VER
-#include <intrin.h>
-#endif
-
using namespace llvm;
#define GET_REGINFO_MC_DESC
@@ -294,7 +290,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
if (!FS.empty())
ArchFS = (Twine(ArchFS) + "," + FS).str();
- std::string CPUName = CPU;
+ std::string CPUName = std::string(CPU);
if (CPUName.empty())
CPUName = "generic";
@@ -335,7 +331,10 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
MAI = new X86ELFMCAsmInfo(TheTriple);
} else if (TheTriple.isWindowsMSVCEnvironment() ||
TheTriple.isWindowsCoreCLREnvironment()) {
- MAI = new X86MCAsmInfoMicrosoft(TheTriple);
+ if (Options.getAssemblyLanguage().equals_lower("masm"))
+ MAI = new X86MCAsmInfoMicrosoftMASM(TheTriple);
+ else
+ MAI = new X86MCAsmInfoMicrosoft(TheTriple);
} else if (TheTriple.isOSCygMing() ||
TheTriple.isWindowsItaniumEnvironment()) {
MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
@@ -350,7 +349,7 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
// Initial state of the frame pointer is esp+stackGrowth.
unsigned StackPtr = is64Bit ? X86::RSP : X86::ESP;
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(
nullptr, MRI.getDwarfRegNum(StackPtr, true), -stackGrowth);
MAI->addInitialFrameState(Inst);
@@ -401,6 +400,9 @@ public:
findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
uint64_t GotSectionVA,
const Triple &TargetTriple) const override;
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override;
Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst,
uint64_t Addr,
uint64_t Size) const override;
@@ -519,6 +521,15 @@ std::vector<std::pair<uint64_t, uint64_t>> X86MCInstrAnalysis::findPltEntries(
}
}
+bool X86MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const {
+ if (Inst.getNumOperands() == 0 ||
+ Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL)
+ return false;
+ Target = Addr + Size + Inst.getOperand(0).getImm();
+ return true;
+}
+
Optional<uint64_t> X86MCInstrAnalysis::evaluateMemoryOperandAddress(
const MCInst &Inst, uint64_t Addr, uint64_t Size) const {
const MCInstrDesc &MCID = Info->get(Inst.getOpcode());
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 0c789061f0e1..e8c72be1d9b6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -13,27 +13,28 @@
#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H
#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H
-#include "llvm/MC/MCRegister.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/DataTypes.h"
+#include <memory>
#include <string>
namespace llvm {
+class formatted_raw_ostream;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
+class MCInst;
+class MCInstPrinter;
class MCInstrInfo;
class MCObjectTargetWriter;
class MCObjectWriter;
+class MCRegister;
class MCRegisterInfo;
+class MCStreamer;
class MCSubtargetInfo;
-class MCRelocationInfo;
class MCTargetOptions;
+class MCTargetStreamer;
class Target;
class Triple;
class StringRef;
-class raw_ostream;
-class raw_pwrite_stream;
/// Flavour of dwarf regnumbers
///
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
index 48fd3e0b7ab9..62c1c399a606 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------------===//
#include "X86ShuffleDecode.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
@@ -141,9 +143,6 @@ void DecodeVALIGNMask(unsigned NumElts, unsigned Imm,
ShuffleMask.push_back(i + Imm);
}
-/// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*.
-/// VT indicates the type of the vector allowing it to handle different
-/// datatypes and vector widths.
void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
unsigned Size = NumElts * ScalarBits;
@@ -197,9 +196,6 @@ void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back(h);
}
-/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
-/// the type of the vector allowing it to handle different datatypes and vector
-/// widths.
void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits,
unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumLaneElts = 128 / ScalarBits;
@@ -217,9 +213,6 @@ void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits,
}
}
-/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
-/// and punpckh*. VT indicates the type of the vector allowing it to handle
-/// different datatypes and vector widths.
void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits,
SmallVectorImpl<int> &ShuffleMask) {
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -236,9 +229,6 @@ void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits,
}
}
-/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// and punpckl*. VT indicates the type of the vector allowing it to handle
-/// different datatypes and vector widths.
void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits,
SmallVectorImpl<int> &ShuffleMask) {
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -255,13 +245,11 @@ void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits,
}
}
-/// Decodes a broadcast of the first element of a vector.
void DecodeVectorBroadcast(unsigned NumElts,
SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.append(NumElts, 0);
}
-/// Decodes a broadcast of a subvector to a larger vector type.
void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts,
SmallVectorImpl<int> &ShuffleMask) {
unsigned Scale = DstNumElts / SrcNumElts;
@@ -271,9 +259,6 @@ void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts,
ShuffleMask.push_back(j);
}
-/// Decode a shuffle packed values at 128-bit granularity
-/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2)
-/// immediate mask into a shuffle mask.
void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize,
unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
@@ -374,7 +359,6 @@ void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
}
}
-/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
for (unsigned l = 0; l != NumElts; l += 4)
@@ -384,32 +368,31 @@ void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
unsigned NumDstElts, bool IsAnyExtend,
- SmallVectorImpl<int> &Mask) {
+ SmallVectorImpl<int> &ShuffleMask) {
unsigned Scale = DstScalarBits / SrcScalarBits;
assert(SrcScalarBits < DstScalarBits &&
"Expected zero extension mask to increase scalar size");
+ int Sentinel = IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero;
for (unsigned i = 0; i != NumDstElts; i++) {
- Mask.push_back(i);
- for (unsigned j = 1; j != Scale; j++)
- Mask.push_back(IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero);
+ ShuffleMask.push_back(i);
+ ShuffleMask.append(Scale - 1, Sentinel);
}
}
void DecodeZeroMoveLowMask(unsigned NumElts,
SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back(0);
- for (unsigned i = 1; i < NumElts; i++)
- ShuffleMask.push_back(SM_SentinelZero);
+ ShuffleMask.append(NumElts - 1, SM_SentinelZero);
}
void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad,
- SmallVectorImpl<int> &Mask) {
+ SmallVectorImpl<int> &ShuffleMask) {
// First element comes from the first element of second source.
// Remaining elements: Load zero extends / Move copies from first source.
- Mask.push_back(NumElts);
+ ShuffleMask.push_back(NumElts);
for (unsigned i = 1; i < NumElts; i++)
- Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
+ ShuffleMask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
}
void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h
index f52785063071..4ef9959f7a27 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.h
@@ -14,15 +14,16 @@
#ifndef LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
#define LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/SmallVector.h"
+#include <cstdint>
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
//===----------------------------------------------------------------------===//
namespace llvm {
+class APInt;
template <typename T> class ArrayRef;
+template <typename T> class SmallVectorImpl;
enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 };
@@ -61,20 +62,14 @@ void DecodeVALIGNMask(unsigned NumElts, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);
/// Decodes the shuffle masks for pshufd/pshufw/vpermilpd/vpermilps.
-/// VT indicates the type of the vector allowing it to handle different
-/// datatypes and vector widths.
void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);
/// Decodes the shuffle masks for pshufhw.
-/// VT indicates the type of the vector allowing it to handle different
-/// datatypes and vector widths.
void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);
/// Decodes the shuffle masks for pshuflw.
-/// VT indicates the type of the vector allowing it to handle different
-/// datatypes and vector widths.
void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);
@@ -82,20 +77,14 @@ void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm,
void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask);
/// Decodes the shuffle masks for shufp*.
-/// VT indicates the type of the vector allowing it to handle different
-/// datatypes and vector widths.
void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);
/// Decodes the shuffle masks for unpckhps/unpckhpd and punpckh*.
-/// VT indicates the type of the vector allowing it to handle different
-/// datatypes and vector widths.
void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits,
SmallVectorImpl<int> &ShuffleMask);
/// Decodes the shuffle masks for unpcklps/unpcklpd and punpckl*.
-/// VT indicates the type of the vector allowing it to handle different
-/// datatypes and vector widths.
void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits,
SmallVectorImpl<int> &ShuffleMask);
@@ -119,6 +108,7 @@ void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a shuffle packed values at 128-bit granularity
+/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2)
/// immediate mask into a shuffle mask.
void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize,
unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index db624378d517..3bebcc24fd3a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -28,7 +28,7 @@ public:
void EmitWinEHHandlerData(SMLoc Loc) override;
void EmitWindowsUnwindTables() override;
void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) override;
- void FinishImpl() override;
+ void finishImpl() override;
};
void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
@@ -52,11 +52,11 @@ void X86WinCOFFStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) {
XTS->emitFPOData(ProcSym, Loc);
}
-void X86WinCOFFStreamer::FinishImpl() {
- EmitFrames(nullptr);
+void X86WinCOFFStreamer::finishImpl() {
+ emitFrames(nullptr);
EmitWindowsUnwindTables();
- MCWinCOFFStreamer::FinishImpl();
+ MCWinCOFFStreamer::finishImpl();
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
index d5494ef12370..11251fb2b2ba 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
@@ -159,7 +159,7 @@ bool X86WinCOFFTargetStreamer::checkInFPOPrologue(SMLoc L) {
MCSymbol *X86WinCOFFTargetStreamer::emitFPOLabel() {
MCSymbol *Label = getContext().createTempSymbol("cfi", true);
- getStreamer().EmitLabel(Label);
+ getStreamer().emitLabel(Label);
return Label;
}
@@ -372,13 +372,13 @@ void FPOStateMachine::emitFrameDataRecord(MCStreamer &OS, MCSymbol *Label) {
OS.emitAbsoluteSymbolDiff(Label, FPO->Begin, 4); // RvaStart
OS.emitAbsoluteSymbolDiff(FPO->End, Label, 4); // CodeSize
- OS.EmitIntValue(LocalSize, 4);
- OS.EmitIntValue(FPO->ParamsSize, 4);
- OS.EmitIntValue(MaxStackSize, 4);
- OS.EmitIntValue(FrameFuncStrTabOff, 4); // FrameFunc
+ OS.emitInt32(LocalSize);
+ OS.emitInt32(FPO->ParamsSize);
+ OS.emitInt32(MaxStackSize);
+ OS.emitInt32(FrameFuncStrTabOff); // FrameFunc
OS.emitAbsoluteSymbolDiff(FPO->PrologueEnd, Label, 2);
- OS.EmitIntValue(SavedRegSize, 2);
- OS.EmitIntValue(CurFlags, 4);
+ OS.emitInt16(SavedRegSize);
+ OS.emitInt32(CurFlags);
}
/// Compute and emit the real CodeView FrameData subsection.
@@ -398,12 +398,12 @@ bool X86WinCOFFTargetStreamer::emitFPOData(const MCSymbol *ProcSym, SMLoc L) {
MCSymbol *FrameBegin = Ctx.createTempSymbol(),
*FrameEnd = Ctx.createTempSymbol();
- OS.EmitIntValue(unsigned(DebugSubsectionKind::FrameData), 4);
+ OS.emitInt32(unsigned(DebugSubsectionKind::FrameData));
OS.emitAbsoluteSymbolDiff(FrameEnd, FrameBegin, 4);
- OS.EmitLabel(FrameBegin);
+ OS.emitLabel(FrameBegin);
// Start with the RVA of the function in question.
- OS.EmitValue(MCSymbolRefExpr::create(FPO->Function,
+ OS.emitValue(MCSymbolRefExpr::create(FPO->Function,
MCSymbolRefExpr::VK_COFF_IMGREL32, Ctx),
4);
@@ -437,8 +437,8 @@ bool X86WinCOFFTargetStreamer::emitFPOData(const MCSymbol *ProcSym, SMLoc L) {
FSM.emitFrameDataRecord(OS, Inst.Label);
}
- OS.EmitValueToAlignment(4, 0);
- OS.EmitLabel(FrameEnd);
+ OS.emitValueToAlignment(4, 0);
+ OS.emitLabel(FrameEnd);
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.h b/contrib/llvm-project/llvm/lib/Target/X86/X86.h
index a0ab5c3a5b3c..91ba4e3d091e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.h
@@ -19,9 +19,7 @@
namespace llvm {
class FunctionPass;
-class ImmutablePass;
class InstructionSelector;
-class ModulePass;
class PassRegistry;
class X86RegisterBankInfo;
class X86Subtarget;
@@ -129,14 +127,23 @@ FunctionPass *createX86DiscriminateMemOpsPass();
/// This pass applies profiling information to insert cache prefetches.
FunctionPass *createX86InsertPrefetchPass();
+/// This pass insert wait instruction after X87 instructions which could raise
+/// fp exceptions when strict-fp enabled.
+FunctionPass *createX86InsertX87waitPass();
+
+/// This pass optimizes arithmetic based on knowledge that is only used by
+/// a reduction sequence and is therefore safe to reassociate in interesting
+/// ways.
+FunctionPass *createX86PartialReductionPass();
+
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &,
X86RegisterBankInfo &);
FunctionPass *createX86LoadValueInjectionLoadHardeningPass();
-FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass();
FunctionPass *createX86LoadValueInjectionRetHardeningPass();
FunctionPass *createX86SpeculativeLoadHardeningPass();
+FunctionPass *createX86SpeculativeExecutionSideEffectSuppression();
void initializeEvexToVexInstPassPass(PassRegistry &);
void initializeFixupBWInstPassPass(PassRegistry &);
@@ -144,18 +151,21 @@ void initializeFixupLEAPassPass(PassRegistry &);
void initializeFPSPass(PassRegistry &);
void initializeWinEHStatePassPass(PassRegistry &);
void initializeX86AvoidSFBPassPass(PassRegistry &);
+void initializeX86AvoidTrailingCallPassPass(PassRegistry &);
void initializeX86CallFrameOptimizationPass(PassRegistry &);
void initializeX86CmovConverterPassPass(PassRegistry &);
void initializeX86CondBrFoldingPassPass(PassRegistry &);
void initializeX86DomainReassignmentPass(PassRegistry &);
void initializeX86ExecutionDomainFixPass(PassRegistry &);
void initializeX86ExpandPseudoPass(PassRegistry &);
+void initializeX86FixupSetCCPassPass(PassRegistry &);
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
-void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &);
void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &);
void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);
void initializeX86OptimizeLEAPassPass(PassRegistry &);
+void initializeX86PartialReductionPass(PassRegistry &);
void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
+void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &);
namespace X86AS {
enum : unsigned {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.td b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
index bb8952f54e3a..dc1ff72add49 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
@@ -52,13 +52,16 @@ def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
"Support xsave instructions">;
def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
- "Support xsaveopt instructions">;
+ "Support xsaveopt instructions",
+ [FeatureXSAVE]>;
def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
- "Support xsavec instructions">;
+ "Support xsavec instructions",
+ [FeatureXSAVE]>;
def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
- "Support xsaves instructions">;
+ "Support xsaves instructions",
+ [FeatureXSAVE]>;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
"Enable SSE instructions">;
@@ -246,6 +249,14 @@ def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
// target-feature attribute.
def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false",
"Deprecated. Support MPX instructions">;
+def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
+ "Support AMX-TILE instructions">;
+def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
+ "Support AMX-INT8 instructions",
+ [FeatureAMXTILE]>;
+def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
+ "Support AMX-BF16 instructions",
+ [FeatureAMXTILE]>;
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
@@ -273,6 +284,10 @@ def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
"Wait and pause enhancements">;
def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
"Has ENQCMD instructions">;
+def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
+ "Has serialize instruction">;
+def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
+ "Support TSXLDTRK instructions">;
// On some processors, instructions that implicitly take two memory operands are
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
@@ -329,6 +344,11 @@ def FeatureFastLZCNT
: SubtargetFeature<
"fast-lzcnt", "HasFastLZCNT", "true",
"LZCNT instructions are as fast as most simple integer ops">;
+// If the target can efficiently decode NOPs upto 7-bytes in length.
+def FeatureFast7ByteNOP
+ : SubtargetFeature<
+ "fast-7bytenop", "HasFast7ByteNOP", "true",
+ "Target can quickly decode up to 7 byte NOPs">;
// If the target can efficiently decode NOPs upto 11-bytes in length.
def FeatureFast11ByteNOP
: SubtargetFeature<
@@ -435,6 +455,15 @@ def FeatureLVIControlFlowIntegrity
"LFENCE instruction to serialize control flow. Also decompose RET "
"instructions into a POP+LFENCE+JMP sequence.">;
+// Enable SESES to mitigate speculative execution attacks
+def FeatureSpeculativeExecutionSideEffectSuppression
+ : SubtargetFeature<
+ "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
+ "Prevent speculative execution side channel timing attacks by "
+ "inserting a speculation barrier before memory reads, memory writes, "
+ "and conditional branches. Implies LVI Control Flow integrity.",
+ [FeatureLVIControlFlowIntegrity]>;
+
// Mitigate LVI attacks against data loads
def FeatureLVILoadHardening
: SubtargetFeature<
@@ -562,7 +591,8 @@ def ProcessorFeatures {
FeatureSlow3OpsLEA,
FeatureFastScalarFSQRT,
FeatureFastSHLDRotate,
- FeatureMergeToThreeWayBranch];
+ FeatureMergeToThreeWayBranch,
+ FeatureFast15ByteNOP];
list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32,
FeaturePOPCNTFalseDeps];
list<SubtargetFeature> SNBInheritableFeatures =
@@ -744,6 +774,7 @@ def ProcessorFeatures {
list<SubtargetFeature> SLMSpecificFeatures = [ProcIntelSLM,
FeatureSlowDivide64,
FeatureSlowPMULLD,
+ FeatureFast7ByteNOP,
FeaturePOPCNTFalseDeps];
list<SubtargetFeature> SLMInheritableFeatures =
!listconcat(AtomInheritableFeatures, SLMAdditionalFeatures);
@@ -778,15 +809,13 @@ def ProcessorFeatures {
!listconcat(GLPInheritableFeatures, GLPSpecificFeatures);
// Tremont
- list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLDEMOTE,
- FeatureGFNI,
- FeatureMOVDIRI,
- FeatureMOVDIR64B,
- FeatureWAITPKG];
+ list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
+ FeatureGFNI];
list<SubtargetFeature> TRMSpecificFeatures = [FeatureUseGLMDivSqrtCosts];
+ list<SubtargetFeature> TRMInheritableFeatures =
+ !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures);
list<SubtargetFeature> TRMFeatures =
- !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures,
- TRMSpecificFeatures);
+ !listconcat(TRMInheritableFeatures, TRMSpecificFeatures);
// Knights Landing
list<SubtargetFeature> KNLFeatures = [FeatureX87,
@@ -838,6 +867,7 @@ def ProcessorFeatures {
FeatureFXSR,
FeatureNOPL,
FeatureCMPXCHG16B,
+ FeaturePRFCHW,
FeatureLZCNT,
FeaturePOPCNT,
FeatureSlowSHLD,
@@ -933,6 +963,8 @@ def ProcessorFeatures {
// Excavator
list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
FeatureBMI2,
+ FeatureMOVBE,
+ FeatureRDRAND,
FeatureMWAITX];
list<SubtargetFeature> BdVer4InheritableFeatures =
!listconcat(BdVer3InheritableFeatures, BdVer4AdditionalFeatures);
@@ -993,7 +1025,7 @@ def ProcessorFeatures {
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
-// NOTE: CMPXCHG8B is here for legacy compatbility so that it is only disabled
+// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
// if i386/i486 is specifically requested.
def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16,
FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
@@ -1256,6 +1288,7 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [
FeatureNOPL,
Feature64Bit,
FeatureSlow3OpsLEA,
+ FeatureSlowDivide64,
FeatureSlowIncDec,
FeatureMacroFusion,
FeatureInsertVZEROUPPER
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 1ac291fcb887..aa03217d155d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "TargetInfo/X86TargetInfo.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -40,6 +41,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
using namespace llvm;
X86AsmPrinter::X86AsmPrinter(TargetMachine &TM,
@@ -76,7 +79,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
// Emit the rest of the function body.
- EmitFunctionBody();
+ emitFunctionBody();
// Emit the XRay table for this function.
emitXRayTable();
@@ -87,7 +90,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
return false;
}
-void X86AsmPrinter::EmitFunctionBodyStart() {
+void X86AsmPrinter::emitFunctionBodyStart() {
if (EmitFPOData) {
if (auto *XTS =
static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()))
@@ -97,7 +100,7 @@ void X86AsmPrinter::EmitFunctionBodyStart() {
}
}
-void X86AsmPrinter::EmitFunctionBodyEnd() {
+void X86AsmPrinter::emitFunctionBodyEnd() {
if (EmitFPOData) {
if (auto *XTS =
static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()))
@@ -124,7 +127,7 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE)
GVSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
else
- GVSym = getSymbol(GV);
+ GVSym = getSymbolPreferLocal(*GV);
// Handle dllimport linkage.
if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
@@ -619,7 +622,7 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
-void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
+void X86AsmPrinter::emitStartOfAsmFile(Module &M) {
const Triple &TT = TM.getTargetTriple();
if (TT.isOSBinFormatELF()) {
@@ -641,17 +644,17 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
// Emitting note header.
int WordSize = TT.isArch64Bit() ? 8 : 4;
- EmitAlignment(WordSize == 4 ? Align(4) : Align(8));
- OutStreamer->EmitIntValue(4, 4 /*size*/); // data size for "GNU\0"
- OutStreamer->EmitIntValue(8 + WordSize, 4 /*size*/); // Elf_Prop size
- OutStreamer->EmitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4 /*size*/);
- OutStreamer->EmitBytes(StringRef("GNU", 4)); // note name
+ emitAlignment(WordSize == 4 ? Align(4) : Align(8));
+ OutStreamer->emitIntValue(4, 4 /*size*/); // data size for "GNU\0"
+ OutStreamer->emitIntValue(8 + WordSize, 4 /*size*/); // Elf_Prop size
+ OutStreamer->emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4 /*size*/);
+ OutStreamer->emitBytes(StringRef("GNU", 4)); // note name
// Emitting an Elf_Prop for the CET properties.
- OutStreamer->EmitIntValue(ELF::GNU_PROPERTY_X86_FEATURE_1_AND, 4);
- OutStreamer->EmitIntValue(4, 4); // data size
- OutStreamer->EmitIntValue(FeatureFlagsAnd, 4); // data
- EmitAlignment(WordSize == 4 ? Align(4) : Align(8)); // padding
+ OutStreamer->emitInt32(ELF::GNU_PROPERTY_X86_FEATURE_1_AND);
+ OutStreamer->emitInt32(4); // data size
+ OutStreamer->emitInt32(FeatureFlagsAnd); // data
+ emitAlignment(WordSize == 4 ? Align(4) : Align(8)); // padding
OutStreamer->endSection(Nt);
OutStreamer->SwitchSection(Cur);
@@ -683,30 +686,30 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
if (M.getModuleFlag("cfguard"))
Feat00Flags |= 0x800; // Object is CFG-aware.
- OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
- OutStreamer->EmitAssignment(
+ OutStreamer->emitSymbolAttribute(S, MCSA_Global);
+ OutStreamer->emitAssignment(
S, MCConstantExpr::create(Feat00Flags, MMI->getContext()));
}
- OutStreamer->EmitSyntaxDirective();
+ OutStreamer->emitSyntaxDirective();
// If this is not inline asm and we're in 16-bit
// mode prefix assembly with .code16.
bool is16 = TT.getEnvironment() == Triple::CODE16;
if (M.getModuleInlineAsm().empty() && is16)
- OutStreamer->EmitAssemblerFlag(MCAF_Code16);
+ OutStreamer->emitAssemblerFlag(MCAF_Code16);
}
static void
emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
MachineModuleInfoImpl::StubValueTy &MCSym) {
// L_foo$stub:
- OutStreamer.EmitLabel(StubLabel);
+ OutStreamer.emitLabel(StubLabel);
// .indirect_symbol _foo
- OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+ OutStreamer.emitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/);
+ OutStreamer.emitIntValue(0, 4/*size*/);
else
// Internal to current translation unit.
//
@@ -714,7 +717,7 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
// pointers need to be indirect and pc-rel. We accomplish this by
// using NLPs; however, sometimes the types are local to the file.
// We need to fill in the value for the NLP in those cases.
- OutStreamer.EmitValue(
+ OutStreamer.emitValue(
MCSymbolRefExpr::create(MCSym.getPointer(), OutStreamer.getContext()),
4 /*size*/);
}
@@ -742,7 +745,7 @@ static void emitNonLazyStubs(MachineModuleInfo *MMI, MCStreamer &OutStreamer) {
}
}
-void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
+void X86AsmPrinter::emitEndOfAsmFile(Module &M) {
const Triple &TT = TM.getTargetTriple();
if (TT.isOSBinFormatMachO()) {
@@ -759,7 +762,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// points). If this doesn't occur, the linker can safely perform dead code
// stripping. Since LLVM never generates code that does this, it is always
// safe to set.
- OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols);
} else if (TT.isOSBinFormatCOFF()) {
if (MMI->usesMSVCFloatingPoint()) {
// In Windows' libcmt.lib, there is a file which is linked in only if the
@@ -778,7 +781,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
StringRef SymbolName =
(TT.getArch() == Triple::x86) ? "__fltused" : "_fltused";
MCSymbol *S = MMI->getContext().getOrCreateSymbol(SymbolName);
- OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
+ OutStreamer->emitSymbolAttribute(S, MCSA_Global);
return;
}
emitStackMaps(SM);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
index ee79401dc80d..eb485fa2ecef 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -9,12 +9,9 @@
#ifndef LLVM_LIB_TARGET_X86_X86ASMPRINTER_H
#define LLVM_LIB_TARGET_X86_X86ASMPRINTER_H
-#include "X86Subtarget.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/FaultMaps.h"
#include "llvm/CodeGen/StackMaps.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/Target/TargetMachine.h"
// Implemented in X86MCInstLower.cpp
namespace {
@@ -22,8 +19,10 @@ namespace {
}
namespace llvm {
+class MCCodeEmitter;
class MCStreamer;
-class MCSymbol;
+class X86Subtarget;
+class TargetMachine;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget = nullptr;
@@ -123,14 +122,14 @@ public:
const X86Subtarget &getSubtarget() const { return *Subtarget; }
- void EmitStartOfAsmFile(Module &M) override;
+ void emitStartOfAsmFile(Module &M) override;
- void EmitEndOfAsmFile(Module &M) override;
+ void emitEndOfAsmFile(Module &M) override;
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
- void EmitBasicBlockEnd(const MachineBasicBlock &MBB) override {
- AsmPrinter::EmitBasicBlockEnd(MBB);
+ void emitBasicBlockEnd(const MachineBasicBlock &MBB) override {
+ AsmPrinter::emitBasicBlockEnd(MBB);
SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
}
@@ -147,8 +146,8 @@ public:
}
bool runOnMachineFunction(MachineFunction &F) override;
- void EmitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
+ void emitFunctionBodyStart() override;
+ void emitFunctionBodyEnd() override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index 0f1d4b51062e..9f1fece1b9dd 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -1,4 +1,4 @@
-//===- X86AvoidStoreForwardingBlockis.cpp - Avoid HW Store Forward Block --===//
+//===- X86AvoidStoreForwardingBlocks.cpp - Avoid HW Store Forward Block ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -33,6 +33,7 @@
// transformation done here is correct regardless to other memory accesses.
//===----------------------------------------------------------------------===//
+#include "X86.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -287,7 +288,7 @@ static unsigned getYMMtoXMMStoreOpcode(unsigned StoreOpcode) {
return 0;
}
-static int getAddrOffset(MachineInstr *MI) {
+static int getAddrOffset(const MachineInstr *MI) {
const MCInstrDesc &Descl = MI->getDesc();
int AddrOffset = X86II::getMemoryOperandNo(Descl.TSFlags);
assert(AddrOffset != -1 && "Expected Memory Operand");
@@ -310,11 +311,11 @@ static MachineOperand &getDispOperand(MachineInstr *MI) {
// TODO: Consider expanding to other addressing modes in the future
static bool isRelevantAddressingMode(MachineInstr *MI) {
int AddrOffset = getAddrOffset(MI);
- MachineOperand &Base = getBaseOperand(MI);
- MachineOperand &Disp = getDispOperand(MI);
- MachineOperand &Scale = MI->getOperand(AddrOffset + X86::AddrScaleAmt);
- MachineOperand &Index = MI->getOperand(AddrOffset + X86::AddrIndexReg);
- MachineOperand &Segment = MI->getOperand(AddrOffset + X86::AddrSegmentReg);
+ const MachineOperand &Base = getBaseOperand(MI);
+ const MachineOperand &Disp = getDispOperand(MI);
+ const MachineOperand &Scale = MI->getOperand(AddrOffset + X86::AddrScaleAmt);
+ const MachineOperand &Index = MI->getOperand(AddrOffset + X86::AddrIndexReg);
+ const MachineOperand &Segment = MI->getOperand(AddrOffset + X86::AddrSegmentReg);
if (!((Base.isReg() && Base.getReg() != X86::NoRegister) || Base.isFI()))
return false;
@@ -410,9 +411,8 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode,
// If the load and store are consecutive, use the loadInst location to
// reduce register pressure.
MachineInstr *StInst = StoreInst;
- auto PrevInstrIt = skipDebugInstructionsBackward(
- std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
- MBB->instr_begin());
+ auto PrevInstrIt = prev_nodbg(MachineBasicBlock::instr_iterator(StoreInst),
+ MBB->instr_begin());
if (PrevInstrIt.getNodePtr() == LoadInst)
StInst = LoadInst;
MachineInstr *NewStore =
@@ -498,9 +498,10 @@ void X86AvoidSFBPass::buildCopies(int Size, MachineInstr *LoadInst,
static void updateKillStatus(MachineInstr *LoadInst, MachineInstr *StoreInst) {
MachineOperand &LoadBase = getBaseOperand(LoadInst);
MachineOperand &StoreBase = getBaseOperand(StoreInst);
- auto StorePrevNonDbgInstr = skipDebugInstructionsBackward(
- std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
- LoadInst->getParent()->instr_begin()).getNodePtr();
+ auto *StorePrevNonDbgInstr =
+ prev_nodbg(MachineBasicBlock::instr_iterator(StoreInst),
+ LoadInst->getParent()->instr_begin())
+ .getNodePtr();
if (LoadBase.isReg()) {
MachineInstr *LastLoad = LoadInst->getPrevNode();
// If the original load and store to xmm/ymm were consecutive
@@ -550,11 +551,8 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) {
if (StoreMI.getParent() == MI.getParent() &&
isPotentialBlockedMemCpyPair(MI.getOpcode(), StoreMI.getOpcode()) &&
isRelevantAddressingMode(&MI) &&
- isRelevantAddressingMode(&StoreMI)) {
- assert(MI.hasOneMemOperand() &&
- "Expected one memory operand for load instruction");
- assert(StoreMI.hasOneMemOperand() &&
- "Expected one memory operand for store instruction");
+ isRelevantAddressingMode(&StoreMI) &&
+ MI.hasOneMemOperand() && StoreMI.hasOneMemOperand()) {
if (!alias(**MI.memoperands_begin(), **StoreMI.memoperands_begin()))
BlockedLoadsStoresPairs.push_back(std::make_pair(&MI, &StoreMI));
}
@@ -563,7 +561,7 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) {
}
unsigned X86AvoidSFBPass::getRegSizeInBytes(MachineInstr *LoadInst) {
- auto TRC = TII->getRegClass(TII->get(LoadInst->getOpcode()), 0, TRI,
+ const auto *TRC = TII->getRegClass(TII->get(LoadInst->getOpcode()), 0, TRI,
*LoadInst->getParent()->getParent());
return TRI->getRegSizeInBits(*TRC) / 8;
}
@@ -616,8 +614,8 @@ void X86AvoidSFBPass::breakBlockedCopies(
static bool hasSameBaseOpValue(MachineInstr *LoadInst,
MachineInstr *StoreInst) {
- MachineOperand &LoadBase = getBaseOperand(LoadInst);
- MachineOperand &StoreBase = getBaseOperand(StoreInst);
+ const MachineOperand &LoadBase = getBaseOperand(LoadInst);
+ const MachineOperand &StoreBase = getBaseOperand(StoreInst);
if (LoadBase.isReg() != StoreBase.isReg())
return false;
if (LoadBase.isReg())
@@ -691,13 +689,12 @@ bool X86AvoidSFBPass::runOnMachineFunction(MachineFunction &MF) {
SmallVector<MachineInstr *, 2> PotentialBlockers =
findPotentialBlockers(LoadInst);
- for (auto PBInst : PotentialBlockers) {
+ for (auto *PBInst : PotentialBlockers) {
if (!isPotentialBlockingStoreInst(PBInst->getOpcode(),
LoadInst->getOpcode()) ||
- !isRelevantAddressingMode(PBInst))
+ !isRelevantAddressingMode(PBInst) || !PBInst->hasOneMemOperand())
continue;
int64_t PBstDispImm = getDispOperand(PBInst).getImm();
- assert(PBInst->hasOneMemOperand() && "Expected One Memory Operand");
unsigned PBstSize = (*PBInst->memoperands_begin())->getSize();
// This check doesn't cover all cases, but it will suffice for now.
// TODO: take branch probability into consideration, if the blocking
@@ -727,7 +724,7 @@ bool X86AvoidSFBPass::runOnMachineFunction(MachineFunction &MF) {
ForRemoval.push_back(LoadInst);
ForRemoval.push_back(StoreInst);
}
- for (auto RemovedInst : ForRemoval) {
+ for (auto *RemovedInst : ForRemoval) {
RemovedInst->eraseFromParent();
}
ForRemoval.clear();
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp
index fb4f9e2901dc..0899783d5f60 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp
@@ -6,10 +6,29 @@
//
//===----------------------------------------------------------------------===//
//
-// The Windows x64 unwinder has trouble unwinding the stack when a return
-// address points to the end of the function. This pass maintains the invariant
-// that every return address is inside the bounds of its parent function or
-// funclet by inserting int3 if the last instruction would otherwise be a call.
+// The Windows x64 unwinder decodes the instruction stream during unwinding.
+// The unwinder decodes forward from the current PC to detect epilogue code
+// patterns.
+//
+// First, this means that there must be an instruction after every
+// call instruction for the unwinder to decode. LLVM must maintain the invariant
+// that the last instruction of a function or funclet is not a call, or the
+// unwinder may decode into the next function. Similarly, a call may not
+// immediately precede an epilogue code pattern. As of this writing, the
+// SEH_Epilogue pseudo instruction takes care of that.
+//
+// Second, all non-tail call jump targets must be within the *half-open*
+// interval of the bounds of the function. The unwinder distinguishes between
+// internal jump instructions and tail calls in an epilogue sequence by checking
+// the jump target against the function bounds from the .pdata section. This
+// means that the last regular MBB of an LLVM function must not be empty if
+// there are regular jumps targeting it.
+//
+// This pass upholds these invariants by ensuring that blocks at the end of a
+// function or funclet are a) not empty and b) do not end in a CALL instruction.
+//
+// Unwinder implementation for reference:
+// https://github.com/dotnet/coreclr/blob/a9f3fc16483eecfc47fb79c362811d870be02249/src/unwinder/amd64/unwinder_amd64.cpp#L1015
//
//===----------------------------------------------------------------------===//
@@ -18,33 +37,35 @@
#include "X86Subtarget.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#define DEBUG_TYPE "x86-avoid-trailing-call"
+#define AVOIDCALL_DESC "X86 avoid trailing call pass"
+#define AVOIDCALL_NAME "x86-avoid-trailing-call"
+
+#define DEBUG_TYPE AVOIDCALL_NAME
using namespace llvm;
namespace {
-
class X86AvoidTrailingCallPass : public MachineFunctionPass {
public:
X86AvoidTrailingCallPass() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
-private:
- StringRef getPassName() const override {
- return "X86 avoid trailing call pass";
- }
static char ID;
+
+private:
+ StringRef getPassName() const override { return AVOIDCALL_DESC; }
};
+} // end anonymous namespace
char X86AvoidTrailingCallPass::ID = 0;
-} // end anonymous namespace
-
FunctionPass *llvm::createX86AvoidTrailingCallPass() {
return new X86AvoidTrailingCallPass();
}
+INITIALIZE_PASS(X86AvoidTrailingCallPass, AVOIDCALL_NAME, AVOIDCALL_DESC, false, false)
+
// A real instruction is a non-meta, non-pseudo instruction. Some pseudos
// expand to nothing, and some expand to code. This logic conservatively assumes
// they might expand to nothing.
@@ -62,6 +83,11 @@ bool X86AvoidTrailingCallPass::runOnMachineFunction(MachineFunction &MF) {
const X86InstrInfo &TII = *STI.getInstrInfo();
assert(STI.isTargetWin64() && "pass only runs on Win64");
+ // We don't need to worry about any of the invariants described above if there
+ // is no unwind info (CFI).
+ if (!MF.hasWinCFI())
+ return false;
+
// FIXME: Perhaps this pass should also replace SEH_Epilogue by inserting nops
// before epilogues.
@@ -73,33 +99,34 @@ bool X86AvoidTrailingCallPass::runOnMachineFunction(MachineFunction &MF) {
if (NextMBB && !NextMBB->isEHFuncletEntry())
continue;
- // Find the last real instruction in this block, or previous blocks if this
- // block is empty.
- MachineBasicBlock::reverse_iterator LastRealInstr;
- for (MachineBasicBlock &RMBB :
- make_range(MBB.getReverseIterator(), MF.rend())) {
- LastRealInstr = llvm::find_if(reverse(RMBB), isRealInstruction);
- if (LastRealInstr != RMBB.rend())
- break;
- }
-
- // Do nothing if this function or funclet has no instructions.
- if (LastRealInstr == MF.begin()->rend())
- continue;
+ // Find the last real instruction in this block.
+ auto LastRealInstr = llvm::find_if(reverse(MBB), isRealInstruction);
- // If this is a call instruction, insert int3 right after it with the same
- // DebugLoc. Convert back to a forward iterator and advance the insertion
- // position once.
- if (isCallInstruction(*LastRealInstr)) {
+ // If the block is empty or the last real instruction is a call instruction,
+ // insert an int3. If there is a call instruction, insert the int3 between
+ // the call and any labels or other meta instructions. If the block is
+ // empty, insert at block end.
+ bool IsEmpty = LastRealInstr == MBB.rend();
+ bool IsCall = !IsEmpty && isCallInstruction(*LastRealInstr);
+ if (IsEmpty || IsCall) {
LLVM_DEBUG({
- dbgs() << "inserting int3 after trailing call instruction:\n";
- LastRealInstr->dump();
- dbgs() << '\n';
+ if (IsCall) {
+ dbgs() << "inserting int3 after trailing call instruction:\n";
+ LastRealInstr->dump();
+ dbgs() << '\n';
+ } else {
+ dbgs() << "inserting int3 in trailing empty MBB:\n";
+ MBB.dump();
+ }
});
- MachineBasicBlock::iterator MBBI = std::next(LastRealInstr.getReverse());
- BuildMI(*LastRealInstr->getParent(), MBBI, LastRealInstr->getDebugLoc(),
- TII.get(X86::INT3));
+ MachineBasicBlock::iterator MBBI = MBB.end();
+ DebugLoc DL;
+ if (IsCall) {
+ MBBI = std::next(LastRealInstr.getReverse());
+ DL = LastRealInstr->getDebugLoc();
+ }
+ BuildMI(MBB, MBBI, DL, TII.get(X86::INT3));
Changed = true;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index f8faa572dffc..caa1f7952475 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -17,6 +17,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/X86BaseInfo.h"
+#include "X86.h"
#include "X86FrameLowering.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
@@ -162,14 +163,13 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
// memory for arguments.
unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
- bool UseStackProbe =
- !STI->getTargetLowering()->getStackProbeSymbolName(MF).empty();
+ bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
for (MachineBasicBlock &BB : MF) {
bool InsideFrameSequence = false;
for (MachineInstr &MI : BB) {
if (MI.getOpcode() == FrameSetupOpcode) {
- if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe)
+ if (TII->getFrameSize(MI) >= StackProbeSize && EmitStackProbeCall)
return false;
if (InsideFrameSequence)
return false;
@@ -199,7 +199,7 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
if (CannotReserveFrame)
return true;
- unsigned StackAlign = TFL->getStackAlignment();
+ Align StackAlign = TFL->getStackAlign();
int64_t Advantage = 0;
for (auto CC : CallSeqVector) {
@@ -222,7 +222,7 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
// We'll need a add after the call.
Advantage -= 3;
// If we have to realign the stack, we'll also need a sub before
- if (CC.ExpectedDist % StackAlign)
+ if (!isAligned(StackAlign, CC.ExpectedDist))
Advantage -= 3;
// Now, for each push, we save ~3 bytes. For small constants, we actually,
// save more (up to 5 bytes), but 3 should be a good approximation.
@@ -531,6 +531,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
}
Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp);
+ Push->cloneMemRefs(MF, *Store);
break;
case X86::MOV32mr:
case X86::MOV64mr: {
@@ -550,7 +551,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
// If PUSHrmm is not slow on this target, try to fold the source of the
// push into the instruction.
- bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
+ bool SlowPUSHrmm = STI->slowTwoMemOps();
// Check that this is legal to fold. Right now, we're extremely
// conservative about that.
@@ -562,6 +563,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
unsigned NumOps = DefMov->getDesc().getNumOperands();
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
Push->addOperand(DefMov->getOperand(i));
+ Push->cloneMergedMemRefs(MF, {&*DefMov, &*Store});
DefMov->eraseFromParent();
} else {
@@ -569,6 +571,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
.addReg(Reg)
.getInstr();
+ Push->cloneMemRefs(MF, *Store);
}
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp
index 57bf799cf89c..319dc9470604 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -108,17 +108,15 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
MachinePointerInfo &MPO) override {
LLT p0 = LLT::pointer(0, DL.getPointerSizeInBits(0));
LLT SType = LLT::scalar(DL.getPointerSizeInBits(0));
- Register SPReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(SPReg, STI.getRegisterInfo()->getStackRegister());
+ auto SPReg =
+ MIRBuilder.buildCopy(p0, STI.getRegisterInfo()->getStackRegister());
- Register OffsetReg = MRI.createGenericVirtualRegister(SType);
- MIRBuilder.buildConstant(OffsetReg, Offset);
+ auto OffsetReg = MIRBuilder.buildConstant(SType, Offset);
- Register AddrReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildPtrAdd(AddrReg, SPReg, OffsetReg);
+ auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
- return AddrReg;
+ return AddrReg.getReg(0);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
@@ -139,7 +137,7 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
if (PhysRegSize > ValSize && LocSize == ValSize) {
assert((PhysRegSize == 128 || PhysRegSize == 80) && "We expect that to be 128 bit");
auto MIB = MIRBuilder.buildAnyExt(LLT::scalar(PhysRegSize), ValVReg);
- ExtReg = MIB->getOperand(0).getReg();
+ ExtReg = MIB.getReg(0);
} else
ExtReg = extendRegister(ValVReg, VA);
@@ -148,10 +146,12 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
+ MachineFunction &MF = MIRBuilder.getMF();
Register ExtReg = extendRegister(ValVReg, VA);
- auto MMO = MIRBuilder.getMF().getMachineMemOperand(
- MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(),
- /* Alignment */ 1);
+
+ auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore,
+ VA.getLocVT().getStoreSize(),
+ inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
@@ -240,17 +240,17 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
int FI = MFI.CreateFixedObject(Size, Offset, true);
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
- Register AddrReg = MRI.createGenericVirtualRegister(
- LLT::pointer(0, DL.getPointerSizeInBits(0)));
- MIRBuilder.buildFrameIndex(AddrReg, FI);
- return AddrReg;
+ return MIRBuilder
+ .buildFrameIndex(LLT::pointer(0, DL.getPointerSizeInBits(0)), FI)
+ .getReg(0);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
- auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+ MachineFunction &MF = MIRBuilder.getMF();
+ auto MMO = MF.getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
- 1);
+ inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h
index 444a0c7d0122..b5ea7782896b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallLowering.h
@@ -14,12 +14,12 @@
#ifndef LLVM_LIB_TARGET_X86_X86CALLLOWERING_H
#define LLVM_LIB_TARGET_X86_X86CALLLOWERING_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include <functional>
namespace llvm {
+template <typename T> class ArrayRef;
class DataLayout;
class MachineRegisterInfo;
class X86TargetLowering;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp
index aee344a26764..c899db60e016 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -60,7 +60,7 @@ static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT,
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
}
- // Successful in allocating regsiters - stop scanning next rules.
+ // Successful in allocating registers - stop scanning next rules.
return true;
}
@@ -166,7 +166,7 @@ static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State.getMachineFunction().getSubtarget().getRegisterInfo();
if (TRI->regsOverlap(Reg, X86::XMM4) ||
TRI->regsOverlap(Reg, X86::XMM5))
- State.AllocateStack(8, 8);
+ State.AllocateStack(8, Align(8));
if (!ArgFlags.isHva()) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
@@ -281,7 +281,7 @@ static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
if (UseRegs)
It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
else
- It.convertToMem(State.AllocateStack(4, 4));
+ It.convertToMem(State.AllocateStack(4, Align(4)));
State.addLoc(It);
}
@@ -305,7 +305,7 @@ static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
if (ArgCount == 1 && ValNo == 0) {
// If we have one argument, the argument is five stack slots big, at fixed
// offset zero.
- Offset = State.AllocateStack(5 * SlotSize, 4);
+ Offset = State.AllocateStack(5 * SlotSize, Align(4));
} else if (ArgCount == 2 && ValNo == 0) {
// If we have two arguments, the stack slot is *after* the error code
// argument. Pretend it doesn't consume stack space, and account for it when
@@ -316,7 +316,7 @@ static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
// appears first on the stack, and is then followed by the five slot
// interrupt struct.
Offset = 0;
- (void)State.AllocateStack(6 * SlotSize, 4);
+ (void)State.AllocateStack(6 * SlotSize, Align(4));
} else {
report_fatal_error("unsupported x86 interrupt prototype");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
index db1aef2fd09d..802e694999b6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CallingConv.td
@@ -789,8 +789,9 @@ def CC_X86_32_Vector_Darwin : CallingConv<[
/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
/// values are spilled on the stack.
def CC_X86_32_Common : CallingConv<[
- // Handles byval parameters.
+ // Handles byval/preallocated parameters.
CCIfByVal<CCPassByVal<4, 4>>,
+ CCIfPreallocated<CCPassByVal<4, 4>>,
// The first 3 float or double arguments, if marked 'inreg' and if the call
// is not a vararg call and if SSE2 is available, are passed in SSE registers.
@@ -1145,7 +1146,7 @@ def CSR_64_Intel_OCL_BI : CalleeSavedRegs<(add CSR_64,
def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64,
(sequence "YMM%u", 8, 15))>;
-def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RDI, RSI, R14, R15,
+def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RSI, R14, R15,
(sequence "ZMM%u", 16, 31),
K4, K5, K6, K7)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
index 7051550d52e6..2ff8ee19561b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
static cl::opt<bool> EnableDiscriminateMemops(
DEBUG_TYPE, cl::init(false),
cl::desc("Generate unique debug info for each instruction with a memory "
- "operand. Should be enabled for profile-drived cache prefetching, "
+ "operand. Should be enabled for profile-driven cache prefetching, "
"both in the build of the binary being profiled, as well as in "
"the build of the binary consuming the profile."),
cl::Hidden);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 438b9fd8eebb..488ee51f1d89 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -283,7 +283,7 @@ public:
// A converter is identified by <destination domain, source opcode>
typedef std::pair<int, unsigned> InstrConverterBaseKeyTy;
-typedef DenseMap<InstrConverterBaseKeyTy, InstrConverterBase *>
+typedef DenseMap<InstrConverterBaseKeyTy, std::unique_ptr<InstrConverterBase>>
InstrConverterBaseMap;
/// A closure is a set of virtual register representing all of the edges in
@@ -471,8 +471,8 @@ void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) {
// instruction.
for (int i = 0; i != NumDomains; ++i) {
if (C.isLegal((RegDomain)i)) {
- InstrConverterBase *IC = Converters.lookup({i, MI->getOpcode()});
- if (!IC || !IC->isLegal(MI, TII))
+ auto I = Converters.find({i, MI->getOpcode()});
+ if (I == Converters.end() || !I->second->isLegal(MI, TII))
C.setIllegal((RegDomain)i);
}
}
@@ -484,8 +484,8 @@ double X86DomainReassignment::calculateCost(const Closure &C,
double Cost = 0.0;
for (auto *MI : C.instructions())
- Cost +=
- Converters.lookup({DstDomain, MI->getOpcode()})->getExtraCost(MI, MRI);
+ Cost += Converters.find({DstDomain, MI->getOpcode()})
+ ->second->getExtraCost(MI, MRI);
return Cost;
}
@@ -501,8 +501,8 @@ void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const {
// appropriate converter.
SmallVector<MachineInstr *, 8> ToErase;
for (auto *MI : C.instructions())
- if (Converters.lookup({Domain, MI->getOpcode()})
- ->convertInstr(MI, TII, MRI))
+ if (Converters.find({Domain, MI->getOpcode()})
+ ->second->convertInstr(MI, TII, MRI))
ToErase.push_back(MI);
// Iterate all registers in the closure, replace them with registers in the
@@ -606,19 +606,21 @@ void X86DomainReassignment::buildClosure(Closure &C, unsigned Reg) {
void X86DomainReassignment::initConverters() {
Converters[{MaskDomain, TargetOpcode::PHI}] =
- new InstrIgnore(TargetOpcode::PHI);
+ std::make_unique<InstrIgnore>(TargetOpcode::PHI);
Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] =
- new InstrIgnore(TargetOpcode::IMPLICIT_DEF);
+ std::make_unique<InstrIgnore>(TargetOpcode::IMPLICIT_DEF);
Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] =
- new InstrReplaceWithCopy(TargetOpcode::INSERT_SUBREG, 2);
+ std::make_unique<InstrReplaceWithCopy>(TargetOpcode::INSERT_SUBREG, 2);
Converters[{MaskDomain, TargetOpcode::COPY}] =
- new InstrCOPYReplacer(TargetOpcode::COPY, MaskDomain, TargetOpcode::COPY);
+ std::make_unique<InstrCOPYReplacer>(TargetOpcode::COPY, MaskDomain,
+ TargetOpcode::COPY);
auto createReplacerDstCOPY = [&](unsigned From, unsigned To) {
- Converters[{MaskDomain, From}] = new InstrReplacerDstCOPY(From, To);
+ Converters[{MaskDomain, From}] =
+ std::make_unique<InstrReplacerDstCOPY>(From, To);
};
createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm);
@@ -638,7 +640,7 @@ void X86DomainReassignment::initConverters() {
}
auto createReplacer = [&](unsigned From, unsigned To) {
- Converters[{MaskDomain, From}] = new InstrReplacer(From, To);
+ Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
};
createReplacer(X86::MOV16rm, X86::KMOVWkm);
@@ -779,8 +781,6 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
}
}
- DeleteContainerSeconds(Converters);
-
LLVM_DEBUG(
dbgs() << "***** Machine Function after Domain Reassignment *****\n");
LLVM_DEBUG(MF.print(dbgs()));
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
index f1cf9b94c9e5..540ad98b6d54 100755
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86EvexToVex.cpp
@@ -237,11 +237,9 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
// Make sure the tables are sorted.
static std::atomic<bool> TableChecked(false);
if (!TableChecked.load(std::memory_order_relaxed)) {
- assert(std::is_sorted(std::begin(X86EvexToVex128CompressTable),
- std::end(X86EvexToVex128CompressTable)) &&
+ assert(llvm::is_sorted(X86EvexToVex128CompressTable) &&
"X86EvexToVex128CompressTable is not sorted!");
- assert(std::is_sorted(std::begin(X86EvexToVex256CompressTable),
- std::end(X86EvexToVex256CompressTable)) &&
+ assert(llvm::is_sorted(X86EvexToVex256CompressTable) &&
"X86EvexToVex256CompressTable is not sorted!");
TableChecked.store(true, std::memory_order_relaxed);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index d35d65914b34..c47ef4708e91 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -275,7 +275,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineInstr &NewMI = *std::prev(MBBI);
NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI);
- MBB.getParent()->moveCallSiteInfo(&*MBBI, &NewMI);
+
+ // Update the call site info.
+ if (MBBI->isCandidateForCallSiteEntry())
+ MBB.getParent()->moveCallSiteInfo(&*MBBI, &NewMI);
// Delete the pseudo instruction TCRETURN.
MBB.erase(MBBI);
@@ -331,14 +334,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MBB.erase(MBBI);
return true;
}
- case X86::EH_RESTORE: {
- // Restore ESP and EBP, and optionally ESI if required.
- bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(
- MBB.getParent()->getFunction().getPersonalityFn()));
- X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH);
- MBBI->eraseFromParent();
- return true;
- }
case X86::LCMPXCHG8B_SAVE_EBX:
case X86::LCMPXCHG16B_SAVE_RBX: {
// Perform the following transformation.
@@ -371,6 +366,82 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MBBI->eraseFromParent();
return true;
}
+ // Loading/storing mask pairs requires two kmov operations. The second one of
+ // these needs a 2 byte displacement relative to the specified address (with
+ // 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the
+ // same spill size, they all are stored using MASKPAIR16STORE, loaded using
+ // MASKPAIR16LOAD.
+ //
+ // The displacement value might wrap around in theory, thus the asserts in
+ // both cases.
+ case X86::MASKPAIR16LOAD: {
+ int64_t Disp = MBBI->getOperand(1 + X86::AddrDisp).getImm();
+ assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
+ Register Reg = MBBI->getOperand(0).getReg();
+ bool DstIsDead = MBBI->getOperand(0).isDead();
+ Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
+ Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
+
+ auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
+ .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
+ .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
+
+ for (int i = 0; i < X86::AddrNumOperands; ++i) {
+ MIBLo.add(MBBI->getOperand(1 + i));
+ if (i == X86::AddrDisp)
+ MIBHi.addImm(Disp + 2);
+ else
+ MIBHi.add(MBBI->getOperand(1 + i));
+ }
+
+ // Split the memory operand, adjusting the offset and size for the halves.
+ MachineMemOperand *OldMMO = MBBI->memoperands().front();
+ MachineFunction *MF = MBB.getParent();
+ MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2);
+ MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2);
+
+ MIBLo.setMemRefs(MMOLo);
+ MIBHi.setMemRefs(MMOHi);
+
+ // Delete the pseudo.
+ MBB.erase(MBBI);
+ return true;
+ }
+ case X86::MASKPAIR16STORE: {
+ int64_t Disp = MBBI->getOperand(X86::AddrDisp).getImm();
+ assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
+ Register Reg = MBBI->getOperand(X86::AddrNumOperands).getReg();
+ bool SrcIsKill = MBBI->getOperand(X86::AddrNumOperands).isKill();
+ Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
+ Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
+
+ auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
+ auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
+
+ for (int i = 0; i < X86::AddrNumOperands; ++i) {
+ MIBLo.add(MBBI->getOperand(i));
+ if (i == X86::AddrDisp)
+ MIBHi.addImm(Disp + 2);
+ else
+ MIBHi.add(MBBI->getOperand(i));
+ }
+ MIBLo.addReg(Reg0, getKillRegState(SrcIsKill));
+ MIBHi.addReg(Reg1, getKillRegState(SrcIsKill));
+
+ // Split the memory operand, adjusting the offset and size for the halves.
+ MachineMemOperand *OldMMO = MBBI->memoperands().front();
+ MachineFunction *MF = MBB.getParent();
+ MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2);
+ MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2);
+
+ MIBLo.setMemRefs(MMOLo);
+ MIBHi.setMemRefs(MMOHi);
+
+ // Delete the pseudo.
+ MBB.erase(MBBI);
+ return true;
+ }
case TargetOpcode::ICALL_BRANCH_FUNNEL:
ExpandICallBranchFunnel(&MBB, MBBI);
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
index a1d256ea872d..b305940139c0 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
@@ -26,7 +26,6 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
@@ -498,7 +497,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
default: return false;
case MVT::i1: {
// Mask out all but lowest bit.
- unsigned AndResult = createResultReg(&X86::GR8RegClass);
+ Register AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::AND8ri), AndResult)
.addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
@@ -691,7 +690,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
}
}
- unsigned ValReg = getRegForValue(Val);
+ Register ValReg = getRegForValue(Val);
if (ValReg == 0)
return false;
@@ -761,9 +760,9 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
// Ok, we need to do a load from a stub. If we've already loaded from
// this stub, reuse the loaded pointer, otherwise emit the load now.
- DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
- unsigned LoadReg;
- if (I != LocalValueMap.end() && I->second != 0) {
+ DenseMap<const Value *, Register>::iterator I = LocalValueMap.find(V);
+ Register LoadReg;
+ if (I != LocalValueMap.end() && I->second) {
LoadReg = I->second;
} else {
// Issue load from stub.
@@ -1128,10 +1127,8 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
return false;
- unsigned Alignment = S->getAlignment();
- unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = ABIAlignment;
+ Align Alignment = S->getAlign();
+ Align ABIAlignment = DL.getABITypeAlign(Val->getType());
bool Aligned = Alignment >= ABIAlignment;
X86AddressMode AM;
@@ -1196,7 +1193,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
const Value *RV = Ret->getOperand(0);
- unsigned Reg = getRegForValue(RV);
+ Register Reg = getRegForValue(RV);
if (Reg == 0)
return false;
@@ -1264,7 +1261,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into %rax/%eax.
if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
- unsigned Reg = X86MFInfo->getSRetReturnReg();
+ Register Reg = X86MFInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
@@ -1322,14 +1319,9 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
if (!X86SelectAddress(Ptr, AM))
return false;
- unsigned Alignment = LI->getAlignment();
- unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = ABIAlignment;
-
unsigned ResultReg = 0;
if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
- Alignment))
+ LI->getAlign().value()))
return false;
updateValueMap(I, ResultReg);
@@ -1392,7 +1384,7 @@ static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
const DebugLoc &CurDbgLoc) {
- unsigned Op0Reg = getRegForValue(Op0);
+ Register Op0Reg = getRegForValue(Op0);
if (Op0Reg == 0) return false;
// Handle 'null' like i32/i64 0.
@@ -1414,7 +1406,7 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
if (CompareOpc == 0) return false;
- unsigned Op1Reg = getRegForValue(Op1);
+ Register Op1Reg = getRegForValue(Op1);
if (Op1Reg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
.addReg(Op0Reg)
@@ -1487,8 +1479,8 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
return false;
- unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
- unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
+ Register FlagReg1 = createResultReg(&X86::GR8RegClass);
+ Register FlagReg2 = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
FlagReg1).addImm(SETFOpc[0]);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
@@ -1522,7 +1514,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
if (!TLI.isTypeLegal(DstVT))
return false;
- unsigned ResultReg = getRegForValue(I->getOperand(0));
+ Register ResultReg = getRegForValue(I->getOperand(0));
if (ResultReg == 0)
return false;
@@ -1548,7 +1540,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
default: llvm_unreachable("Unexpected zext to i64 source type");
}
- unsigned Result32 = createResultReg(&X86::GR32RegClass);
+ Register Result32 = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
.addReg(ResultReg);
@@ -1559,7 +1551,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
} else if (DstVT == MVT::i16) {
// i8->i16 doesn't exist in the autogenerated isel table. Need to zero
// extend to 32-bits and then extract down to 16-bits.
- unsigned Result32 = createResultReg(&X86::GR32RegClass);
+ Register Result32 = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
Result32).addReg(ResultReg);
@@ -1581,7 +1573,7 @@ bool X86FastISel::X86SelectSExt(const Instruction *I) {
if (!TLI.isTypeLegal(DstVT))
return false;
- unsigned ResultReg = getRegForValue(I->getOperand(0));
+ Register ResultReg = getRegForValue(I->getOperand(0));
if (ResultReg == 0)
return false;
@@ -1589,7 +1581,7 @@ bool X86FastISel::X86SelectSExt(const Instruction *I) {
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
if (SrcVT == MVT::i1) {
// Set the high bits to zero.
- unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
+ Register ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
/*TODO: Kill=*/false);
if (ZExtReg == 0)
return false;
@@ -1605,7 +1597,7 @@ bool X86FastISel::X86SelectSExt(const Instruction *I) {
if (DstVT == MVT::i16) {
// i8->i16 doesn't exist in the autogenerated isel table. Need to sign
// extend to 32-bits and then extract down to 16-bits.
- unsigned Result32 = createResultReg(&X86::GR32RegClass);
+ Register Result32 = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
Result32).addReg(ResultReg);
@@ -1720,7 +1712,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
case MVT::i64: TestOpc = X86::TEST64ri32; break;
}
if (TestOpc) {
- unsigned OpReg = getRegForValue(TI->getOperand(0));
+ Register OpReg = getRegForValue(TI->getOperand(0));
if (OpReg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
@@ -1742,7 +1734,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
} else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
// Fake request the condition, otherwise the intrinsic might be completely
// optimized away.
- unsigned TmpReg = getRegForValue(BI->getCondition());
+ Register TmpReg = getRegForValue(BI->getCondition());
if (TmpReg == 0)
return false;
@@ -1755,7 +1747,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Otherwise do a clumsy setcc and re-test it.
// Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
// in an explicit cast, so make sure to handle that correctly.
- unsigned OpReg = getRegForValue(BI->getCondition());
+ Register OpReg = getRegForValue(BI->getCondition());
if (OpReg == 0) return false;
// In case OpReg is a K register, COPY to a GPR
@@ -1824,10 +1816,10 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
if (!isTypeLegal(I->getType(), VT))
return false;
- unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ Register Op0Reg = getRegForValue(I->getOperand(0));
if (Op0Reg == 0) return false;
- unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ Register Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
CReg).addReg(Op1Reg);
@@ -1839,7 +1831,7 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
TII.get(TargetOpcode::KILL), X86::CL)
.addReg(CReg, RegState::Kill);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
.addReg(Op0Reg);
updateValueMap(I, ResultReg);
@@ -1933,10 +1925,10 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
const DivRemEntry &TypeEntry = OpTable[TypeIndex];
const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
- unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ Register Op0Reg = getRegForValue(I->getOperand(0));
if (Op0Reg == 0)
return false;
- unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ Register Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0)
return false;
@@ -1949,7 +1941,7 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(OpEntry.OpSignExtend));
else {
- unsigned Zero32 = createResultReg(&X86::GR32RegClass);
+ Register Zero32 = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::MOV32r0), Zero32);
@@ -1986,8 +1978,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
if ((I->getOpcode() == Instruction::SRem ||
I->getOpcode() == Instruction::URem) &&
OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
- unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
- unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
+ Register SourceSuperReg = createResultReg(&X86::GR16RegClass);
+ Register ResultSuperReg = createResultReg(&X86::GR16RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Copy), SourceSuperReg).addReg(X86::AX);
@@ -2066,15 +2058,15 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
return false;
if (SETFOpc) {
- unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
- unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
+ Register FlagReg1 = createResultReg(&X86::GR8RegClass);
+ Register FlagReg2 = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
FlagReg1).addImm(SETFOpc[0]);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
FlagReg2).addImm(SETFOpc[1]);
auto const &II = TII.get(SETFOpc[2]);
if (II.getNumDefs()) {
- unsigned TmpReg = createResultReg(&X86::GR8RegClass);
+ Register TmpReg = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
.addReg(FlagReg2).addReg(FlagReg1);
} else {
@@ -2086,7 +2078,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
} else if (foldX86XALUIntrinsic(CC, I, Cond)) {
// Fake request the condition, otherwise the intrinsic might be completely
// optimized away.
- unsigned TmpReg = getRegForValue(Cond);
+ Register TmpReg = getRegForValue(Cond);
if (TmpReg == 0)
return false;
@@ -2099,7 +2091,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
// accurate. If we read more than the lsb, we may see non-zero values
// whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
// the select. This is achieved by performing TEST against 1.
- unsigned CondReg = getRegForValue(Cond);
+ Register CondReg = getRegForValue(Cond);
if (CondReg == 0)
return false;
bool CondIsKill = hasTrivialKill(Cond);
@@ -2122,10 +2114,10 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
const Value *LHS = I->getOperand(1);
const Value *RHS = I->getOperand(2);
- unsigned RHSReg = getRegForValue(RHS);
+ Register RHSReg = getRegForValue(RHS);
bool RHSIsKill = hasTrivialKill(RHS);
- unsigned LHSReg = getRegForValue(LHS);
+ Register LHSReg = getRegForValue(LHS);
bool LHSIsKill = hasTrivialKill(LHS);
if (!LHSReg || !RHSReg)
@@ -2133,7 +2125,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
- unsigned ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill,
+ Register ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill,
LHSReg, LHSIsKill, CC);
updateValueMap(I, ResultReg);
return true;
@@ -2182,19 +2174,19 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
const Value *LHS = I->getOperand(1);
const Value *RHS = I->getOperand(2);
- unsigned LHSReg = getRegForValue(LHS);
+ Register LHSReg = getRegForValue(LHS);
bool LHSIsKill = hasTrivialKill(LHS);
- unsigned RHSReg = getRegForValue(RHS);
+ Register RHSReg = getRegForValue(RHS);
bool RHSIsKill = hasTrivialKill(RHS);
- unsigned CmpLHSReg = getRegForValue(CmpLHS);
+ Register CmpLHSReg = getRegForValue(CmpLHS);
bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
- unsigned CmpRHSReg = getRegForValue(CmpRHS);
+ Register CmpRHSReg = getRegForValue(CmpRHS);
bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
- if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
+ if (!LHSReg || !RHSReg || !CmpLHSReg || !CmpRHSReg)
return false;
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
@@ -2207,12 +2199,12 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
unsigned CmpOpcode =
(RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
- unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
+ Register CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
// Need an IMPLICIT_DEF for the input that is used to generate the upper
// bits of the result register since its not based on any of the inputs.
- unsigned ImplicitDefReg = createResultReg(VR128X);
+ Register ImplicitDefReg = createResultReg(VR128X);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
@@ -2241,9 +2233,9 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
unsigned BlendOpcode =
(RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
- unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
+ Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
+ Register VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
LHSReg, LHSIsKill, CmpReg, true);
ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -2263,13 +2255,13 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
}
const TargetRegisterClass *VR128 = &X86::VR128RegClass;
- unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ Register CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
+ Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
LHSReg, LHSIsKill);
- unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
+ Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
RHSReg, RHSIsKill);
- unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
+ Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
AndReg, /*IsKill=*/true);
ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -2317,7 +2309,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
return false;
} else {
- unsigned CondReg = getRegForValue(Cond);
+ Register CondReg = getRegForValue(Cond);
if (CondReg == 0)
return false;
bool CondIsKill = hasTrivialKill(Cond);
@@ -2340,10 +2332,10 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
const Value *LHS = I->getOperand(1);
const Value *RHS = I->getOperand(2);
- unsigned LHSReg = getRegForValue(LHS);
+ Register LHSReg = getRegForValue(LHS);
bool LHSIsKill = hasTrivialKill(LHS);
- unsigned RHSReg = getRegForValue(RHS);
+ Register RHSReg = getRegForValue(RHS);
bool RHSIsKill = hasTrivialKill(RHS);
if (!LHSReg || !RHSReg)
@@ -2351,7 +2343,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
- unsigned ResultReg =
+ Register ResultReg =
fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
updateValueMap(I, ResultReg);
return true;
@@ -2373,12 +2365,12 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
}
// No need for a select anymore - this is an unconditional move.
if (Opnd) {
- unsigned OpReg = getRegForValue(Opnd);
+ Register OpReg = getRegForValue(Opnd);
if (OpReg == 0)
return false;
bool OpIsKill = hasTrivialKill(Opnd);
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(OpReg, getKillRegState(OpIsKill));
@@ -2419,7 +2411,7 @@ bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
return false;
// Select integer to float/double conversion.
- unsigned OpReg = getRegForValue(I->getOperand(0));
+ Register OpReg = getRegForValue(I->getOperand(0));
if (OpReg == 0)
return false;
@@ -2448,10 +2440,10 @@ bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
- unsigned ImplicitDefReg = createResultReg(RC);
+ Register ImplicitDefReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
- unsigned ResultReg =
+ Register ResultReg =
fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
updateValueMap(I, ResultReg);
return true;
@@ -2474,7 +2466,7 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
"Instruction must be an FPExt or FPTrunc!");
bool HasAVX = Subtarget->hasAVX();
- unsigned OpReg = getRegForValue(I->getOperand(0));
+ Register OpReg = getRegForValue(I->getOperand(0));
if (OpReg == 0)
return false;
@@ -2486,7 +2478,7 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
}
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
ResultReg);
@@ -2537,7 +2529,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
if (!TLI.isTypeLegal(SrcVT))
return false;
- unsigned InputReg = getRegForValue(I->getOperand(0));
+ Register InputReg = getRegForValue(I->getOperand(0));
if (!InputReg)
// Unhandled operand. Halt "fast" selection and bail.
return false;
@@ -2549,7 +2541,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
}
// Issue an extract_subreg.
- unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
+ Register ResultReg = fastEmitInst_extractsubreg(MVT::i8,
InputReg, false,
X86::sub_8bit);
if (!ResultReg)
@@ -2608,7 +2600,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
return false;
const Value *Op = II->getArgOperand(0);
- unsigned InputReg = getRegForValue(Op);
+ Register InputReg = getRegForValue(Op);
if (InputReg == 0)
return false;
@@ -2632,12 +2624,15 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// used to provide rounding control: use MXCSR.RC, encoded as 0b100.
// It's consistent with the other FP instructions, which are usually
// controlled by MXCSR.
- InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
+ unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr
+ : X86::VCVTPS2PHrr;
+ InputReg = fastEmitInst_ri(Opc, RC, InputReg, false, 4);
// Move the lower 32-bits of ResultReg to another register of class GR32.
+ Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr
+ : X86::VMOVPDI2DIrr;
ResultReg = createResultReg(&X86::GR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::VMOVPDI2DIrr), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(InputReg, RegState::Kill);
// The result value is in the lower 16-bits of ResultReg.
@@ -2645,19 +2640,21 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
} else {
assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
- // Explicitly sign-extend the input to 32-bit.
- InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
+ // Explicitly zero-extend the input to 32-bit.
+ InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::ZERO_EXTEND, InputReg,
/*Kill=*/false);
// The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
InputReg, /*Kill=*/true);
- InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
+ unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr
+ : X86::VCVTPH2PSrr;
+ InputReg = fastEmitInst_r(Opc, RC, InputReg, /*Kill=*/true);
// The result value is in the lower 32-bits of ResultReg.
// Emit an explicit copy from register class VR128 to register class FR32.
- ResultReg = createResultReg(&X86::FR32RegClass);
+ ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(InputReg, RegState::Kill);
@@ -2700,7 +2697,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// Always make a copy of the frame register to a vreg first, so that we
// never directly reference the frame register (the TwoAddressInstruction-
// Pass doesn't like that).
- unsigned SrcReg = createResultReg(RC);
+ Register SrcReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
@@ -2830,7 +2827,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
}
const Value *SrcVal = II->getArgOperand(0);
- unsigned SrcReg = getRegForValue(SrcVal);
+ Register SrcReg = getRegForValue(SrcVal);
if (SrcReg == 0)
return false;
@@ -2843,7 +2840,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
}
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
ResultReg);
@@ -2903,7 +2900,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
}
- unsigned LHSReg = getRegForValue(LHS);
+ Register LHSReg = getRegForValue(LHS);
if (LHSReg == 0)
return false;
bool LHSIsKill = hasTrivialKill(LHS);
@@ -2974,7 +2971,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
return false;
// Assign to a GPR since the overflow return value is lowered to a SETcc.
- unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
+ Register ResultReg2 = createResultReg(&X86::GR8RegClass);
assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
ResultReg2).addImm(CondCode);
@@ -3041,11 +3038,11 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
Op = IE->getOperand(0);
}
- unsigned Reg = getRegForValue(Op);
+ Register Reg = getRegForValue(Op);
if (Reg == 0)
return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(Reg);
@@ -3139,11 +3136,11 @@ bool X86FastISel::fastLowerArguments() {
case MVT::f32: LLVM_FALLTHROUGH;
case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
}
- unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
// Without this, EmitLiveInCopies may eliminate the livein if its only
// use is a bitcast (which isn't turned into an instruction).
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(DstReg, getKillRegState(true));
@@ -3154,7 +3151,7 @@ bool X86FastISel::fastLowerArguments() {
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
CallingConv::ID CC,
- ImmutableCallSite *CS) {
+ const CallBase *CB) {
if (Subtarget->is64Bit())
return 0;
if (Subtarget->getTargetTriple().isOSMSVCRT())
@@ -3163,9 +3160,9 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
CC == CallingConv::HiPE || CC == CallingConv::Tail)
return 0;
- if (CS)
- if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
- CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
+ if (CB)
+ if (CB->arg_empty() || !CB->paramHasAttr(0, Attribute::StructRet) ||
+ CB->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
return 0;
return 4;
@@ -3186,14 +3183,12 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CC);
- const CallInst *CI =
- CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
+ const CallInst *CI = dyn_cast_or_null<CallInst>(CLI.CB);
const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
// Call / invoke instructions with NoCfCheck attribute require special
// handling.
- const auto *II =
- CLI.CS ? dyn_cast<InvokeInst>(CLI.CS->getInstruction()) : nullptr;
+ const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CB);
if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()))
return false;
@@ -3239,11 +3234,11 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
return false;
// Don't know about inalloca yet.
- if (CLI.CS && CLI.CS->hasInAllocaArgument())
+ if (CLI.CB && CLI.CB->hasInAllocaArgument())
return false;
for (auto Flag : CLI.OutFlags)
- if (Flag.isSwiftError())
+ if (Flag.isSwiftError() || Flag.isPreallocated())
return false;
SmallVector<MVT, 16> OutVTs;
@@ -3269,9 +3264,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
MVT VT;
auto *TI = dyn_cast<TruncInst>(Val);
unsigned ResultReg;
- if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
- (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
- TI->hasOneUse()) {
+ if (TI && TI->getType()->isIntegerTy(1) && CLI.CB &&
+ (TI->getParent() == CLI.CB->getParent()) && TI->hasOneUse()) {
Value *PrevVal = TI->getOperand(0);
ResultReg = getRegForValue(PrevVal);
@@ -3284,7 +3278,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
ResultReg =
fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
} else {
- if (!isTypeLegal(Val->getType(), VT))
+ if (!isTypeLegal(Val->getType(), VT) ||
+ (VT.isVector() && VT.getVectorElementType() == MVT::i1))
return false;
ResultReg = getRegForValue(Val);
}
@@ -3302,7 +3297,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Allocate shadow area for Win64
if (IsWin64)
- CCInfo.AllocateStack(32, 8);
+ CCInfo.AllocateStack(32, Align(8));
CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
@@ -3406,7 +3401,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
OutRegs.push_back(VA.getLocReg());
} else {
- assert(VA.isMemLoc());
+ assert(VA.isMemLoc() && "Unknown value location!");
// Don't emit stores for undef values.
if (isa<UndefValue>(ArgVal))
@@ -3417,7 +3412,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
AM.Base.Reg = RegInfo->getStackRegister();
AM.Disp = LocMemOffset;
ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
- unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
+ Align Alignment = DL.getABITypeAlign(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
@@ -3537,7 +3532,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
TM.Options.GuaranteedTailCallOpt)
? NumBytes // Callee pops everything.
- : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
+ : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CB);
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(NumBytesForCalleeToPop);
@@ -3549,7 +3544,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
- unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
+ Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
EVT CopyVT = VA.getValVT();
@@ -3582,7 +3577,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
EVT ResVT = VA.getValVT();
unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
unsigned MemSize = ResVT.getSizeInBits()/8;
- int FI = MFI.CreateStackObject(MemSize, MemSize, false);
+ int FI = MFI.CreateStackObject(MemSize, Align(MemSize), false);
addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc)), FI)
.addReg(CopyReg);
@@ -3647,7 +3642,7 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
return X86SelectZExt(I);
if (DstVT.bitsLT(SrcVT))
return X86SelectTrunc(I);
- unsigned Reg = getRegForValue(I->getOperand(0));
+ Register Reg = getRegForValue(I->getOperand(0));
if (Reg == 0) return false;
updateValueMap(I, Reg);
return true;
@@ -3668,13 +3663,18 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
DstVT.getVectorElementType() == MVT::i1)
return false;
- unsigned Reg = getRegForValue(I->getOperand(0));
- if (Reg == 0)
+ Register Reg = getRegForValue(I->getOperand(0));
+ if (!Reg)
return false;
- // No instruction is needed for conversion. Reuse the register used by
- // the fist operand.
- updateValueMap(I, Reg);
+ // Emit a reg-reg copy so we don't propagate cached known bits information
+ // with the wrong VT if we fall out of fast isel after selecting this.
+ const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
+ Register ResultReg = createResultReg(DstClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
+
+ updateValueMap(I, ResultReg);
return true;
}
}
@@ -3688,7 +3688,7 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
uint64_t Imm = CI->getZExtValue();
if (Imm == 0) {
- unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
+ Register SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
case MVT::i1:
@@ -3701,7 +3701,7 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
case MVT::i32:
return SrcReg;
case MVT::i64: {
- unsigned ResultReg = createResultReg(&X86::GR64RegClass);
+ Register ResultReg = createResultReg(&X86::GR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
.addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
@@ -3769,11 +3769,7 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
}
// MachineConstantPool wants an explicit alignment.
- unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
- if (Align == 0) {
- // Alignment of vector types. FIXME!
- Align = DL.getTypeAllocSize(CFP->getType());
- }
+ Align Alignment = DL.getPrefTypeAlign(CFP->getType());
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
@@ -3786,11 +3782,12 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
PICBase = X86::RIP;
// Create the load from the constant pool.
- unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
+ unsigned CPI = MCP.getConstantPoolIndex(CFP, Alignment);
+ Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
- if (CM == CodeModel::Large) {
- unsigned AddrReg = createResultReg(&X86::GR64RegClass);
+ // Large code model only applies to 64-bit mode.
+ if (Subtarget->is64Bit() && CM == CodeModel::Large) {
+ Register AddrReg = createResultReg(&X86::GR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
AddrReg)
.addConstantPoolIndex(CPI, 0, OpFlag);
@@ -3799,7 +3796,7 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
addDirectMem(MIB, AddrReg);
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getConstantPool(*FuncInfo.MF),
- MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
+ MachineMemOperand::MOLoad, DL.getPointerSize(), Alignment);
MIB->addMemOperand(*FuncInfo.MF, MMO);
return ResultReg;
}
@@ -3824,7 +3821,7 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
return AM.Base.Reg;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
if (TM.getRelocationModel() == Reloc::Static &&
TLI.getPointerTy(DL) == MVT::i64) {
// The displacement code could be more than 32 bits away so we need to use
@@ -3883,7 +3880,7 @@ unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
: X86::LEA64r;
const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg), AM);
return ResultReg;
@@ -3916,7 +3913,7 @@ unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
return 0;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
return ResultReg;
}
@@ -3932,16 +3929,12 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const X86InstrInfo &XII = (const X86InstrInfo &)TII;
unsigned Size = DL.getTypeAllocSize(LI->getType());
- unsigned Alignment = LI->getAlignment();
-
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = DL.getABITypeAlignment(LI->getType());
SmallVector<MachineOperand, 8> AddrOps;
AM.getFullAddress(AddrOps);
MachineInstr *Result = XII.foldMemoryOperandImpl(
- *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
+ *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->getAlign(),
/*AllowCommute=*/true);
if (!Result)
return false;
@@ -3958,7 +3951,7 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
continue;
// Found the index reg, now try to rewrite it.
- unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
+ Register IndexReg = constrainOperandRegClass(Result->getDesc(),
MO.getReg(), OperandNo);
if (IndexReg == MO.getReg())
continue;
@@ -3980,7 +3973,7 @@ unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
unsigned Op3, bool Op3IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index f8c4a2adb851..78de041329e2 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -350,7 +350,7 @@ MachineInstr *FixupBWInstPass::tryReplaceExtend(unsigned New32BitOpcode,
return nullptr;
// Don't interfere with formation of CBW instructions which should be a
- // shorter encoding than even the MOVSX32rr8. It's also immunte to partial
+ // shorter encoding than even the MOVSX32rr8. It's also immune to partial
// merge issues on Intel CPUs.
if (MI->getOpcode() == X86::MOVSX16rr8 &&
MI->getOperand(0).getReg() == X86::AX &&
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 9ac401bb0253..424279038921 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -16,8 +16,11 @@
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/Debug.h"
@@ -111,6 +114,12 @@ public:
MachineFunctionProperties::Property::NoVRegs);
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
private:
TargetSchedModel TSM;
const X86InstrInfo *TII = nullptr;
@@ -205,21 +214,27 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
TSM.init(&ST);
TII = ST.getInstrInfo();
TRI = ST.getRegisterInfo();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *MBFI = (PSI && PSI->hasProfileSummary())
+ ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
+ : nullptr;
LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
for (MachineBasicBlock &MBB : MF) {
// First pass. Try to remove or optimize existing LEAs.
+ bool OptIncDecPerBB =
+ OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
if (!isLEA(I->getOpcode()))
continue;
- if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
+ if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
continue;
if (IsSlowLEA)
processInstructionForSlowLEA(I, MBB);
else if (IsSlow3OpsLEA)
- processInstrForSlow3OpLEA(I, MBB, OptIncDec);
+ processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
}
// Second pass for creating LEAs. This may reverse some of the
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp
index 924f429fc138..09668d7c5468 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupSetCC.cpp
@@ -36,6 +36,8 @@ STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted");
namespace {
class X86FixupSetCCPass : public MachineFunctionPass {
public:
+ static char ID;
+
X86FixupSetCCPass() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "X86 Fixup SetCC"; }
@@ -47,12 +49,12 @@ private:
const X86InstrInfo *TII = nullptr;
enum { SearchBound = 16 };
-
- static char ID;
};
+} // end anonymous namespace
char X86FixupSetCCPass::ID = 0;
-}
+
+INITIALIZE_PASS(X86FixupSetCCPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index b1d2de29c896..831695dabcd8 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -124,10 +124,6 @@ private:
MachineInstr &JmpI, CondRegArray &CondRegs);
void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
MachineInstr &CopyDefI);
- void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
- MachineBasicBlock::iterator TestPos,
- DebugLoc TestLoc, MachineInstr &SetBI,
- MachineOperand &FlagUse, CondRegArray &CondRegs);
void rewriteSetCC(MachineBasicBlock &TestMBB,
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
MachineInstr &SetCCI, MachineOperand &FlagUse,
@@ -165,6 +161,7 @@ enum class FlagArithMnemonic {
RCL,
RCR,
SBB,
+ SETB,
};
} // namespace
@@ -235,6 +232,10 @@ static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
case X86::ADOX32rm:
case X86::ADOX64rm:
return FlagArithMnemonic::ADOX;
+
+ case X86::SETB_C32r:
+ case X86::SETB_C64r:
+ return FlagArithMnemonic::SETB;
}
}
@@ -638,24 +639,9 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
// logic.
FlagsKilled = true;
- switch (MI.getOpcode()) {
- case X86::SETB_C8r:
- case X86::SETB_C16r:
- case X86::SETB_C32r:
- case X86::SETB_C64r:
- // Use custom lowering for arithmetic that is merely extending the
- // carry flag. We model this as the SETB_C* pseudo instructions.
- rewriteSetCarryExtended(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
- CondRegs);
- break;
-
- default:
- // Generically handle remaining uses as arithmetic instructions.
- rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
- CondRegs);
- break;
- }
- break;
+ // Generically handle remaining uses as arithmetic instructions.
+ rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
+ CondRegs);
}
// If this was the last use of the flags, we're done.
@@ -821,6 +807,7 @@ void X86FlagsCopyLoweringPass::rewriteArithmetic(
case FlagArithMnemonic::RCL:
case FlagArithMnemonic::RCR:
case FlagArithMnemonic::SBB:
+ case FlagArithMnemonic::SETB:
Cond = X86::COND_B; // CF == 1
// Set up an addend that when one is added will need a carry due to not
// having a higher bit available.
@@ -959,130 +946,6 @@ void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
MI.eraseFromParent();
}
-void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
- MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
- DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
- CondRegArray &CondRegs) {
- // This routine is only used to handle pseudos for setting a register to zero
- // or all ones based on CF. This is essentially the sign extended from 1-bit
- // form of SETB and modeled with the SETB_C* pseudos. They require special
- // handling as they aren't normal SETcc instructions and are lowered to an
- // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
- // they are only provided in reg-defining forms. A complicating factor is that
- // they can define many different register widths.
- assert(SetBI.getOperand(0).isReg() &&
- "Cannot have a non-register defined operand to this variant of SETB!");
-
- // Little helper to do the common final step of replacing the register def'ed
- // by this SETB instruction with a new register and removing the SETB
- // instruction.
- auto RewriteToReg = [&](unsigned Reg) {
- MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
- SetBI.eraseFromParent();
- };
-
- // Grab the register class used for this particular instruction.
- auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
-
- MachineBasicBlock &MBB = *SetBI.getParent();
- auto SetPos = SetBI.getIterator();
- auto SetLoc = SetBI.getDebugLoc();
-
- auto AdjustReg = [&](unsigned Reg) {
- auto &OrigRC = *MRI->getRegClass(Reg);
- if (&OrigRC == &SetBRC)
- return Reg;
-
- unsigned NewReg;
-
- int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
- int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
- assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
- assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
- int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
- X86::NoSubRegister, X86::sub_32bit};
-
- // If the original size is smaller than the target *and* is smaller than 4
- // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
- // to maximize the chance of being able to CSE that operation and to avoid
- // partial dependency stalls extending to 2-bytes.
- if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
- NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
- BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
- .addReg(Reg);
- if (&SetBRC == &X86::GR32RegClass)
- return NewReg;
- Reg = NewReg;
- OrigRegSize = 4;
- }
-
- NewReg = MRI->createVirtualRegister(&SetBRC);
- if (OrigRegSize < TargetRegSize) {
- BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
- NewReg)
- .addImm(0)
- .addReg(Reg)
- .addImm(SubRegIdx[OrigRegSize]);
- } else if (OrigRegSize > TargetRegSize) {
- if (TargetRegSize == 1 && !Subtarget->is64Bit()) {
- // Need to constrain the register class.
- MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass);
- }
-
- BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY),
- NewReg)
- .addReg(Reg, 0, SubRegIdx[TargetRegSize]);
- } else {
- BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
- .addReg(Reg);
- }
- return NewReg;
- };
-
- unsigned &CondReg = CondRegs[X86::COND_B];
- if (!CondReg)
- CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
-
- // Adjust the condition to have the desired register width by zero-extending
- // as needed.
- // FIXME: We should use a better API to avoid the local reference and using a
- // different variable here.
- unsigned ExtCondReg = AdjustReg(CondReg);
-
- // Now we need to turn this into a bitmask. We do this by subtracting it from
- // zero.
- Register ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
- BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
- ZeroReg = AdjustReg(ZeroReg);
-
- unsigned Sub;
- switch (SetBI.getOpcode()) {
- case X86::SETB_C8r:
- Sub = X86::SUB8rr;
- break;
-
- case X86::SETB_C16r:
- Sub = X86::SUB16rr;
- break;
-
- case X86::SETB_C32r:
- Sub = X86::SUB32rr;
- break;
-
- case X86::SETB_C64r:
- Sub = X86::SUB64rr;
- break;
-
- default:
- llvm_unreachable("Invalid SETB_C* opcode!");
- }
- Register ResultReg = MRI->createVirtualRegister(&SetBRC);
- BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
- .addReg(ZeroReg)
- .addReg(ExtCondReg);
- return RewriteToReg(ResultReg);
-}
-
void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
MachineBasicBlock::iterator TestPos,
DebugLoc TestLoc,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 13bbd6ccfce4..e6ee46957500 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -1364,6 +1364,9 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
MBB->remove(&*I++);
I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
+ if (!MI.mayRaiseFPException())
+ I->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
// If both operands are killed, pop one off of the stack in addition to
// overwriting the other one.
if (KillsOp0 && KillsOp1 && Op0 != Op1) {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
index 1da20371caf5..c7ca6fb2a4fc 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -17,6 +17,7 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -32,6 +33,12 @@
#include "llvm/Target/TargetOptions.h"
#include <cstdlib>
+#define DEBUG_TYPE "x86-fl"
+
+STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
+STATISTIC(NumFrameExtraProbe,
+ "Number of extra stack probes generated in prologue");
+
using namespace llvm;
X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
@@ -50,7 +57,8 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo().hasVarSizedObjects() &&
- !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
+ !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
+ !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
}
/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
@@ -60,6 +68,7 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
bool
X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) ||
+ MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
(hasFP(MF) && !TRI->needsStackRealignment(MF)) ||
TRI->hasBasePointer(MF);
}
@@ -83,10 +92,10 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
- TRI->needsStackRealignment(MF) ||
- MFI.hasVarSizedObjects() ||
+ TRI->needsStackRealignment(MF) || MFI.hasVarSizedObjects() ||
MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+ MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
MFI.hasCopyImplyingStackAdjustment());
@@ -257,7 +266,20 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
uint64_t Chunk = (1LL << 31) - 1;
- if (Offset > Chunk) {
+ MachineFunction &MF = *MBB.getParent();
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ const X86TargetLowering &TLI = *STI.getTargetLowering();
+ const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
+
+ // It's ok to not take into account large chunks when probing, as the
+ // allocation is split in smaller chunks anyway.
+ if (EmitInlineStackProbe && !InEpilogue) {
+
+ // This pseudo-instruction is going to be expanded, potentially using a
+ // loop, by inlineStackProbe().
+ BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
+ return;
+ } else if (Offset > Chunk) {
// Rather than emit a long series of instructions for large offsets,
// load the offset into a register and do one sub/add
unsigned Reg = 0;
@@ -381,8 +403,8 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
} else {
bool IsSub = Offset < 0;
uint64_t AbsOffset = IsSub ? -Offset : Offset;
- unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
- : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
+ const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
+ : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(AbsOffset);
@@ -457,9 +479,32 @@ void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
.addCFIIndex(CFIIndex);
}
+/// Emits Dwarf Info specifying offsets of callee saved registers and
+/// frame pointer. This is called only when basic block sections are enabled.
+void X86FrameLowering::emitCalleeSavedFrameMoves(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
+ MachineFunction &MF = *MBB.getParent();
+ if (!hasFP(MF)) {
+ emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true);
+ return;
+ }
+ const MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ const unsigned FramePtr = TRI->getFrameRegister(MF);
+ const unsigned MachineFramePtr =
+ STI.isTarget64BitILP32() ? unsigned(getX86SubSuperRegister(FramePtr, 64))
+ : FramePtr;
+ unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
+ // Offset = space for return address + size of the frame pointer itself.
+ unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
+ BuildCFI(MBB, MBBI, DebugLoc{},
+ MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
+ emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true);
+}
+
void X86FrameLowering::emitCalleeSavedFrameMoves(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL) const {
+ const DebugLoc &DL, bool IsPrologue) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
@@ -474,10 +519,15 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
I = CSI.begin(), E = CSI.end(); I != E; ++I) {
int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
unsigned Reg = I->getReg();
-
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+
+ if (IsPrologue) {
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+ } else {
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createRestore(nullptr, DwarfReg));
+ }
}
}
@@ -488,7 +538,8 @@ void X86FrameLowering::emitStackProbe(MachineFunction &MF,
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
if (STI.isTargetWindowsCoreCLR()) {
if (InProlog) {
- emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
+ .addImm(0 /* no explicit stack size */);
} else {
emitStackProbeInline(MF, MBB, MBBI, DL, false);
}
@@ -499,26 +550,13 @@ void X86FrameLowering::emitStackProbe(MachineFunction &MF,
void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const {
- const StringRef ChkStkStubSymbol = "__chkstk_stub";
- MachineInstr *ChkStkStub = nullptr;
-
- for (MachineInstr &MI : PrologMBB) {
- if (MI.isCall() && MI.getOperand(0).isSymbol() &&
- ChkStkStubSymbol == MI.getOperand(0).getSymbolName()) {
- ChkStkStub = &MI;
- break;
- }
- }
-
- if (ChkStkStub != nullptr) {
- assert(!ChkStkStub->isBundled() &&
- "Not expecting bundled instructions here");
- MachineBasicBlock::iterator MBBI = std::next(ChkStkStub->getIterator());
- assert(std::prev(MBBI) == ChkStkStub &&
- "MBBI expected after __chkstk_stub.");
- DebugLoc DL = PrologMBB.findDebugLoc(MBBI);
- emitStackProbeInline(MF, PrologMBB, MBBI, DL, true);
- ChkStkStub->eraseFromParent();
+ auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
+ return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
+ });
+ if (Where != PrologMBB.end()) {
+ DebugLoc DL = PrologMBB.findDebugLoc(Where);
+ emitStackProbeInline(MF, PrologMBB, Where, DL, true);
+ Where->eraseFromParent();
}
}
@@ -528,6 +566,167 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
const DebugLoc &DL,
bool InProlog) const {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
+ emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
+ else
+ emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
+}
+
+void X86FrameLowering::emitStackProbeInlineGeneric(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
+ MachineInstr &AllocWithProbe = *MBBI;
+ uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
+
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ const X86TargetLowering &TLI = *STI.getTargetLowering();
+ assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
+ "different expansion expected for CoreCLR 64 bit");
+
+ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
+ uint64_t ProbeChunk = StackProbeSize * 8;
+
+ // Synthesize a loop or unroll it, depending on the number of iterations.
+ if (Offset > ProbeChunk) {
+ emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset);
+ } else {
+ emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset);
+ }
+}
+
+void X86FrameLowering::emitStackProbeInlineGenericBlock(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ uint64_t Offset) const {
+
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ const X86TargetLowering &TLI = *STI.getTargetLowering();
+ const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
+ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
+ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
+ uint64_t CurrentOffset = 0;
+ // 0 Thanks to return address being saved on the stack
+ uint64_t CurrentProbeOffset = 0;
+
+ // For the first N - 1 pages, just probe. I tried to take advantage of
+ // natural probes but it implies much more logic and there was very few
+ // interesting natural probes to interleave.
+ while (CurrentOffset + StackProbeSize < Offset) {
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(StackProbeSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+
+
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
+ .setMIFlag(MachineInstr::FrameSetup),
+ StackPtr, false, 0)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ NumFrameExtraProbe++;
+ CurrentOffset += StackProbeSize;
+ CurrentProbeOffset += StackProbeSize;
+ }
+
+ uint64_t ChunkSize = Offset - CurrentOffset;
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(ChunkSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+}
+
+void X86FrameLowering::emitStackProbeInlineGenericLoop(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ uint64_t Offset) const {
+ assert(Offset && "null offset");
+
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ const X86TargetLowering &TLI = *STI.getTargetLowering();
+ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
+ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
+
+ // Synthesize a loop
+ NumFrameLoopProbe++;
+ const BasicBlock *LLVM_BB = MBB.getBasicBlock();
+
+ MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = ++MBB.getIterator();
+ MF.insert(MBBIter, testMBB);
+ MF.insert(MBBIter, tailMBB);
+
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // save loop bound
+ {
+ const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed)
+ .addReg(FinalStackProbed)
+ .addImm(Offset / StackProbeSize * StackProbeSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // allocate a page
+ {
+ const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
+ BuildMI(testMBB, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(StackProbeSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // touch the page
+ addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
+ .setMIFlag(MachineInstr::FrameSetup),
+ StackPtr, false, 0)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // cmp with stack pointer bound
+ BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
+ .addReg(StackPtr)
+ .addReg(FinalStackProbed)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // jump
+ BuildMI(testMBB, DL, TII.get(X86::JCC_1))
+ .addMBB(testMBB)
+ .addImm(X86::COND_NE)
+ .setMIFlag(MachineInstr::FrameSetup);
+ testMBB->addSuccessor(testMBB);
+ testMBB->addSuccessor(tailMBB);
+
+ // BB management
+ tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
+ tailMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ MBB.addSuccessor(testMBB);
+
+ // handle tail
+ unsigned TailOffset = Offset % StackProbeSize;
+ if (TailOffset) {
+ const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset);
+ BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(TailOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // Update Live In information
+ recomputeLiveIns(*testMBB);
+ recomputeLiveIns(*tailMBB);
+}
+
+void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
assert(STI.is64Bit() && "different expansion needed for 32 bit");
assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
const TargetInstrInfo &TII = *STI.getInstrInfo();
@@ -821,16 +1020,6 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
}
}
-void X86FrameLowering::emitStackProbeInlineStub(
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
-
- assert(InProlog && "ChkStkStub called outside prolog!");
-
- BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("__chkstk_stub");
-}
-
static unsigned calculateSetFPREG(uint64_t SPAdjust) {
// Win64 ABI has a less restrictive limitation of 240; 128 works equally well
// and might require smaller successive adjustments.
@@ -846,15 +1035,15 @@ static unsigned calculateSetFPREG(uint64_t SPAdjust) {
// go with the minimum SlotSize.
uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
- uint64_t MaxAlign = MFI.getMaxAlignment(); // Desired stack alignment.
- unsigned StackAlign = getStackAlignment();
+ Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
+ Align StackAlign = getStackAlign();
if (MF.getFunction().hasFnAttribute("stackrealign")) {
if (MFI.hasCalls())
MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
else if (MaxAlign < SlotSize)
- MaxAlign = SlotSize;
+ MaxAlign = Align(SlotSize);
}
- return MaxAlign;
+ return MaxAlign.value();
}
void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
@@ -1014,7 +1203,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
X86FI->setCalleeSavedFrameSize(
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
- bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
+ const bool EmitStackProbeCall =
+ STI.getTargetLowering()->hasStackProbeSymbol(MF);
unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
// Re-align the stack on 64-bit if the x86-interrupt calling convention is
@@ -1032,11 +1222,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone). We also check that we don't
// push and pop from the stack.
- if (has128ByteRedZone(MF) &&
- !TRI->needsStackRealignment(MF) &&
+ if (has128ByteRedZone(MF) && !TRI->needsStackRealignment(MF) &&
!MFI.hasVarSizedObjects() && // No dynamic alloca.
!MFI.adjustsStack() && // No calls.
- !UseStackProbe && // No stack probes.
+ !EmitStackProbeCall && // No stack probes.
!MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
@@ -1115,7 +1304,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Define the current CFA rule to use the provided offset.
assert(StackSize);
BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth));
// Change the rule for the FramePtr to be an "offset" rule.
unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
@@ -1192,7 +1381,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Define the current CFA rule to use the provided offset.
assert(StackSize);
BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset));
StackOffset += stackGrowth;
}
@@ -1237,7 +1426,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
uint64_t AlignedNumBytes = NumBytes;
if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
- if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
+ if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
assert(!X86FI->getUsesRedZone() &&
"The Red Zone is not accounted for in stack probes");
@@ -1323,17 +1512,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
Establisher, false, PSPSlotOffset)
.addMemOperand(MF.getMachineMemOperand(
- NoInfo, MachineMemOperand::MOLoad, SlotSize, SlotSize));
+ NoInfo, MachineMemOperand::MOLoad, SlotSize, Align(SlotSize)));
;
// Save the root establisher back into the current funclet's (mostly
// empty) frame, in case a sub-funclet or the GC needs it.
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
false, PSPSlotOffset)
.addReg(Establisher)
- .addMemOperand(
- MF.getMachineMemOperand(NoInfo, MachineMemOperand::MOStore |
- MachineMemOperand::MOVolatile,
- SlotSize, SlotSize));
+ .addMemOperand(MF.getMachineMemOperand(
+ NoInfo,
+ MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
+ SlotSize, Align(SlotSize)));
}
SPOrEstablisher = Establisher;
} else {
@@ -1370,7 +1559,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// into the registration node so that the runtime will restore it for us.
if (!MBB.isCleanupFuncletEntry()) {
assert(Personality == EHPersonality::MSVC_CXX);
- unsigned FrameReg;
+ Register FrameReg;
int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex;
int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg);
// ESP is the first field, so no extra displacement is needed.
@@ -1389,7 +1578,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
if (X86::FR64RegClass.contains(Reg)) {
int Offset;
- unsigned IgnoredFrameReg;
+ Register IgnoredFrameReg;
if (IsWin64Prologue && IsFunclet)
Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
else
@@ -1423,7 +1612,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(StackPtr)
.addMemOperand(MF.getMachineMemOperand(
PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
- SlotSize, SlotSize));
+ SlotSize, Align(SlotSize)));
}
// Realign stack after we spilled callee-saved registers (so that we'll be
@@ -1464,7 +1653,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// it recovers the frame pointer from the base pointer rather than the
// other way around.
unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
- unsigned UsedReg;
+ Register UsedReg;
int Offset =
getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
assert(UsedReg == BasePtr);
@@ -1479,12 +1668,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (!HasFP && NumBytes) {
// Define the current CFA rule to use the provided offset.
assert(StackSize);
- BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
- nullptr, -StackSize + stackGrowth));
+ BuildCFI(
+ MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth));
}
// Emit DWARF info specifying the offsets of the callee-saved registers.
- emitCalleeSavedFrameMoves(MBB, MBBI, DL);
+ emitCalleeSavedFrameMoves(MBB, MBBI, DL, true);
}
// X86 Interrupt handling function cannot assume anything about the direction
@@ -1541,7 +1731,7 @@ static bool isFuncletReturnInstr(MachineInstr &MI) {
unsigned
X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
- unsigned SPReg;
+ Register SPReg;
int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
/*IgnoreSPUpdates*/ true);
assert(Offset >= 0 && SPReg == TRI->getStackRegister());
@@ -1573,7 +1763,7 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
// RBP is not included in the callee saved register block. After pushing RBP,
// everything is 16 byte aligned. Everything we allocate before an outgoing
// call must also be 16 byte aligned.
- unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlignment());
+ unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
// Subtract out the size of the callee saved registers. This is how much stack
// each funclet will allocate.
return FrameSizeMinusRBP + XMMSize - CSSize;
@@ -1634,6 +1824,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
uint64_t SEHStackAllocAmt = NumBytes;
+ // AfterPop is the position to insert .cfi_restore.
+ MachineBasicBlock::iterator AfterPop = MBBI;
if (HasFP) {
// Pop EBP.
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
@@ -1642,8 +1834,15 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (NeedsDwarfCFI) {
unsigned DwarfStackPtr =
TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
- BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
- nullptr, DwarfStackPtr, -SlotSize));
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize));
+ if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
+ unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
+ BuildCFI(MBB, AfterPop, DL,
+ MCCFIInstruction::createRestore(nullptr, DwarfFramePtr));
+ --MBBI;
+ --AfterPop;
+ }
--MBBI;
}
}
@@ -1711,8 +1910,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
if (!hasFP(MF) && NeedsDwarfCFI) {
// Define the current CFA rule to use the provided offset.
- BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
- nullptr, -CSSize - SlotSize));
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, CSSize + SlotSize));
}
--MBBI;
}
@@ -1738,11 +1937,18 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (Opc == X86::POP32r || Opc == X86::POP64r) {
Offset += SlotSize;
BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::createDefCfaOffset(nullptr, Offset));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset));
}
}
}
+ // Emit DWARF info specifying the restores of the callee-saved registers.
+ // For epilogue with return inside or being other block without successor,
+ // no need to generate .cfi_restore for callee-saved registers.
+ if (NeedsDwarfCFI && !MBB.succ_empty() && !MBB.isReturnBlock()) {
+ emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
+ }
+
if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
// Add the return addr area delta back since we are not tail calling.
int Offset = -1 * X86FI->getTCReturnAddrDelta();
@@ -1756,7 +1962,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const {
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
bool IsFixed = MFI.isFixedObjectIndex(FI);
@@ -1821,7 +2027,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// Skip the saved EBP.
return Offset + SlotSize + FPDelta;
} else {
- assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0);
+ assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
return Offset + StackSize;
}
} else if (TRI->needsStackRealignment(MF)) {
@@ -1829,7 +2035,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// Skip the saved EBP.
return Offset + SlotSize + FPDelta;
} else {
- assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0);
+ assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
return Offset + StackSize;
}
// FIXME: Support tail calls
@@ -1849,8 +2055,8 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return Offset + FPDelta;
}
-int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF,
- int FI, unsigned &FrameReg) const {
+int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
@@ -1860,21 +2066,21 @@ int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF,
return getFrameIndexReference(MF, FI, FrameReg);
FrameReg = TRI->getStackRegister();
- return alignDown(MFI.getMaxCallFrameSize(), getStackAlignment()) + it->second;
+ return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) +
+ it->second;
}
int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF,
- int FI, unsigned &FrameReg,
+ int FI, Register &FrameReg,
int Adjustment) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
FrameReg = TRI->getStackRegister();
return MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + Adjustment;
}
-int
-X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
- int FI, unsigned &FrameReg,
- bool IgnoreSPUpdates) const {
+int X86FrameLowering::getFrameIndexReferencePreferSP(
+ const MachineFunction &MF, int FI, Register &FrameReg,
+ bool IgnoreSPUpdates) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
// Does not include any dynamic realign.
@@ -1985,7 +2191,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
if (this->TRI->hasBasePointer(MF)) {
// Allocate a spill slot for EBP if we have a base pointer and EH funclets.
if (MF.hasEHFunclets()) {
- int FI = MFI.CreateSpillStackObject(SlotSize, SlotSize);
+ int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize));
X86FI->setHasSEHFramePtrSave(true);
X86FI->setSEHFramePtrSaveIndex(FI);
}
@@ -2038,16 +2244,16 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
unsigned Size = TRI->getSpillSize(*RC);
- unsigned Align = TRI->getSpillAlignment(*RC);
+ Align Alignment = TRI->getSpillAlign(*RC);
// ensure alignment
assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
- SpillSlotOffset = -alignTo(-SpillSlotOffset, Align);
+ SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
// spill into slot
SpillSlotOffset -= Size;
int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
CSI[i - 1].setFrameIdx(SlotIndex);
- MFI.ensureMaxAlignment(Align);
+ MFI.ensureMaxAlignment(Alignment);
// Save the start offset and size of XMM in stack frame for funclets.
if (X86::VR128RegClass.contains(Reg)) {
@@ -2061,8 +2267,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
bool X86FrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBB.findDebugLoc(MI);
// Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
@@ -2161,10 +2366,9 @@ void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
CatchRetTarget->setHasAddressTaken();
}
-bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool X86FrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -2799,6 +3003,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
I = MBB.erase(I);
auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
+ // Try to avoid emitting dead SP adjustments if the block end is unreachable,
+ // typically because the function is marked noreturn (abort, throw,
+ // assert_fail, etc).
+ if (isDestroy && blockEndIsUnreachable(MBB, I))
+ return I;
+
if (!reserveCallFrame) {
// If the stack pointer can be changed after prologue, turn the
// adjcallstackup instruction into a 'sub ESP, <amt>' and the
@@ -2807,8 +3017,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned StackAlign = getStackAlignment();
- Amount = alignTo(Amount, StackAlign);
+ Amount = alignTo(Amount, getStackAlign());
const Function &F = MF.getFunction();
bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
@@ -2881,13 +3090,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
return I;
}
- if (isDestroy && InternalAmt && !blockEndIsUnreachable(MBB, I)) {
- // If we are performing frame pointer elimination and if the callee pops
- // something off the stack pointer, add it back. We do this until we have
- // more advanced stack pointer tracking ability.
- // We are not tracking the stack pointer adjustment by the callee, so make
- // sure we restore the stack pointer immediately after the call, there may
- // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
+ if (InternalAmt) {
MachineBasicBlock::iterator CI = I;
MachineBasicBlock::iterator B = MBB.begin();
while (CI != B && !std::prev(CI)->isCall())
@@ -2964,7 +3167,7 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
.setMIFlag(MachineInstr::FrameSetup);
}
- unsigned UsedReg;
+ Register UsedReg;
int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg);
int EndOffset = -EHRegOffset - EHRegSize;
FuncInfo.EHRegNodeEndOffset = EndOffset;
@@ -3003,8 +3206,8 @@ int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
return TRI->getSlotSize();
}
-unsigned X86FrameLowering::getInitialCFARegister(const MachineFunction &MF)
- const {
+Register
+X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const {
return TRI->getDwarfRegNum(StackPtr, true);
}
@@ -3014,7 +3217,7 @@ struct X86FrameSortingObject {
bool IsValid = false; // true if we care about this Object.
unsigned ObjectIndex = 0; // Index of Object into MFI list.
unsigned ObjectSize = 0; // Size of Object in bytes.
- unsigned ObjectAlignment = 1; // Alignment of Object in bytes.
+ Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
unsigned ObjectNumUses = 0; // Object static number of uses.
};
@@ -3099,7 +3302,7 @@ void X86FrameLowering::orderFrameObjects(
for (auto &Obj : ObjectsToAllocate) {
SortingObjects[Obj].IsValid = true;
SortingObjects[Obj].ObjectIndex = Obj;
- SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlignment(Obj);
+ SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
// Set the size.
int ObjectSize = MFI.getObjectSize(Obj);
if (ObjectSize == 0)
@@ -3192,7 +3395,7 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
int FrameIndex = H.CatchObj.FrameIndex;
if (FrameIndex != INT_MAX) {
// Ensure alignment.
- unsigned Align = MFI.getObjectAlignment(FrameIndex);
+ unsigned Align = MFI.getObjectAlign(FrameIndex).value();
MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
@@ -3219,3 +3422,24 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
UnwindHelpFI)
.addImm(-2);
}
+
+void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced(
+ MachineFunction &MF, RegScavenger *RS) const {
+ if (STI.is32Bit() && MF.hasEHFunclets())
+ restoreWinEHStackPointersInParent(MF);
+}
+
+void X86FrameLowering::restoreWinEHStackPointersInParent(
+ MachineFunction &MF) const {
+ // 32-bit functions have to restore stack pointers when control is transferred
+ // back to the parent function. These blocks are identified as eh pads that
+ // are not funclet entries.
+ bool IsSEH = isAsynchronousEHPersonality(
+ classifyEHPersonality(MF.getFunction().getPersonalityFn()));
+ for (MachineBasicBlock &MBB : MF) {
+ bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
+ if (NeedsRestore)
+ restoreWin32EHStackPointers(MBB, MBB.begin(), DebugLoc(),
+ /*RestoreSP=*/IsSEH);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
index 2103d6471ead..c0b4be95f88d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
@@ -58,9 +58,14 @@ public:
void inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const override;
+ void
+ emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const override;
+
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL) const;
+ const DebugLoc &DL,
+ bool IsPrologue) const override;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
@@ -83,13 +88,14 @@ public:
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
@@ -97,14 +103,14 @@ public:
bool needsFrameIndexResolution(const MachineFunction &MF) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
- int getWin64EHFrameIndexRef(const MachineFunction &MF,
- int FI, unsigned &SPReg) const;
- int getFrameIndexReferenceSP(const MachineFunction &MF,
- int FI, unsigned &SPReg, int Adjustment) const;
+ int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
+ Register &SPReg) const;
+ int getFrameIndexReferenceSP(const MachineFunction &MF, int FI,
+ Register &SPReg, int Adjustment) const;
int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
- unsigned &FrameReg,
+ Register &FrameReg,
bool IgnoreSPUpdates) const override;
MachineBasicBlock::iterator
@@ -116,6 +122,10 @@ public:
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const override;
+ void
+ processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF,
+ RegScavenger *RS) const override;
+
/// Check the instruction before/after the passed instruction. If
/// it is an ADD/SUB/LEA instruction it is deleted argument and the
/// stack adjustment is returned as a positive value for ADD/LEA and
@@ -169,12 +179,14 @@ public:
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, bool RestoreSP = false) const;
+ void restoreWinEHStackPointersInParent(MachineFunction &MF) const;
+
int getInitialCFAOffset(const MachineFunction &MF) const override;
- unsigned getInitialCFARegister(const MachineFunction &MF) const override;
+ Register getInitialCFARegister(const MachineFunction &MF) const override;
/// Return true if the function has a redzone (accessible bytes past the
- /// frame of the top of stack function) as part of it's ABI.
+ /// frame of the top of stack function) as part of it's ABI.
bool has128ByteRedZone(const MachineFunction& MF) const;
private:
@@ -189,11 +201,33 @@ private:
void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, bool InProlog) const;
+ void emitStackProbeInlineWindowsCoreCLR64(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL,
+ bool InProlog) const;
+ void emitStackProbeInlineGeneric(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, bool InProlog) const;
+
+ void emitStackProbeInlineGenericBlock(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL,
+ uint64_t Offset) const;
+
+ void emitStackProbeInlineGenericLoop(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL,
+ uint64_t Offset) const;
/// Emit a stub to later inline the target stack probe.
- void emitStackProbeInlineStub(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, bool InProlog) const;
+ MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL,
+ bool InProlog) const;
/// Aligns the stack pointer by ANDing it with -MaxAlign.
void BuildStackAlignAND(MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 88af0ebcfd0e..3cd80cb04ab8 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -17,8 +17,6 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/ConstantRange.h"
@@ -31,9 +29,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include <stdint.h>
using namespace llvm;
@@ -45,6 +40,10 @@ static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
cl::desc("Enable setting constant bits to reduce size of mask immediates"),
cl::Hidden);
+static cl::opt<bool> EnablePromoteAnyextLoad(
+ "x86-promote-anyext-load", cl::init(true),
+ cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden);
+
//===----------------------------------------------------------------------===//
// Pattern Matcher Implementation
//===----------------------------------------------------------------------===//
@@ -72,14 +71,14 @@ namespace {
const char *ES;
MCSymbol *MCSym;
int JT;
- unsigned Align; // CP alignment.
+ Align Alignment; // CP alignment.
unsigned char SymbolFlags; // X86II::MO_*
bool NegateIndex = false;
X86ISelAddressMode()
: BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
- MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
+ MCSym(nullptr), JT(-1), SymbolFlags(X86II::MO_NO_FLAG) {}
bool hasSymbolicDisplacement() const {
return GV != nullptr || CP != nullptr || ES != nullptr ||
@@ -145,7 +144,7 @@ namespace {
dbgs() << MCSym;
else
dbgs() << "nul";
- dbgs() << " JT" << JT << " Align" << Align << '\n';
+ dbgs() << " JT" << JT << " Align" << Alignment.value() << '\n';
}
#endif
};
@@ -161,10 +160,6 @@ namespace {
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- /// If true, selector should try to optimize for code size instead of
- /// performance.
- bool OptForSize;
-
/// If true, selector should try to optimize for minimum code size.
bool OptForMinSize;
@@ -173,7 +168,7 @@ namespace {
public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
+ : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
OptForMinSize(false), IndirectTlsSegRefs(false) {}
StringRef getPassName() const override {
@@ -187,16 +182,15 @@ namespace {
"indirect-tls-seg-refs");
// OptFor[Min]Size are used in pattern predicates that isel is matching.
- OptForSize = MF.getFunction().hasOptSize();
OptForMinSize = MF.getFunction().hasMinSize();
- assert((!OptForMinSize || OptForSize) &&
+ assert((!OptForMinSize || MF.getFunction().hasOptSize()) &&
"OptForMinSize implies OptForSize");
SelectionDAGISel::runOnMachineFunction(MF);
return true;
}
- void EmitFunctionEntryCode() override;
+ void emitFunctionEntryCode() override;
bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
@@ -221,9 +215,9 @@ namespace {
bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
- SDValue &Scale, SDValue &Index, SDValue &Disp,
- SDValue &Segment);
+ bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp,
+ SDValue ScaleOp, SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp, SDValue &Segment);
bool selectMOV64Imm32(SDValue N, SDValue &Imm);
bool selectLEAAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
@@ -234,11 +228,6 @@ namespace {
bool selectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
- SDValue &Base, SDValue &Scale,
- SDValue &Index, SDValue &Disp,
- SDValue &Segment,
- SDValue &NodeWithChain);
bool selectRelocImm(SDValue N, SDValue &Op);
bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
@@ -259,6 +248,8 @@ namespace {
SDValue &Index, SDValue &Disp,
SDValue &Segment);
+ bool isProfitableToFormMaskedOp(SDNode *N) const;
+
/// Implement addressing mode selection for inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
@@ -300,8 +291,8 @@ namespace {
MVT::i32, AM.Disp,
AM.SymbolFlags);
else if (AM.CP)
- Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
- AM.Align, AM.Disp, AM.SymbolFlags);
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment,
+ AM.Disp, AM.SymbolFlags);
else if (AM.ES) {
assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
@@ -368,9 +359,10 @@ namespace {
if (User->getNumOperands() != 2)
continue;
- // If this can match to INC/DEC, don't count it as a use.
- if (User->getOpcode() == ISD::ADD &&
- (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0))))
+ // If this is a sign-extended 8-bit integer immediate used in an ALU
+ // instruction, there is probably an opcode encoding to save space.
+ auto *C = dyn_cast<ConstantSDNode>(N);
+ if (C && isInt<8>(C->getSExtValue()))
continue;
// Immediates that are used for offsets as part of stack
@@ -475,14 +467,6 @@ namespace {
bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
- /// Returns whether this is a relocatable immediate in the range
- /// [-2^Width .. 2^Width-1].
- template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
- if (auto *CN = dyn_cast<ConstantSDNode>(N))
- return isInt<Width>(CN->getSExtValue());
- return isSExtAbsoluteSymbolRef(Width, N);
- }
-
// Indicates we should prefer to use a non-temporal load for this load.
bool useNonTemporalLoad(LoadSDNode *N) const {
if (!N->isNonTemporal())
@@ -513,8 +497,8 @@ namespace {
bool shrinkAndImmediate(SDNode *N);
bool isMaskZeroExtended(SDNode *N) const;
bool tryShiftAmountMod(SDNode *N);
- bool combineIncDecVector(SDNode *Node);
bool tryShrinkShlLogicImm(SDNode *N);
+ bool tryVPTERNLOG(SDNode *N);
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
bool tryMatchBitSelect(SDNode *N);
@@ -581,12 +565,6 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
if (!N.hasOneUse())
return false;
- // FIXME: Temporary hack to prevent strict floating point nodes from
- // folding into masked operations illegally.
- if (U == Root && Root->getOpcode() == ISD::VSELECT &&
- N.getOpcode() != ISD::LOAD && N.getOpcode() != X86ISD::VBROADCAST_LOAD)
- return false;
-
if (N.getOpcode() != ISD::LOAD)
return true;
@@ -650,6 +628,11 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
(-Imm->getAPIntValue()).isSignedIntN(8))
return false;
+
+ if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) &&
+ (-Imm->getAPIntValue()).isSignedIntN(8) &&
+ hasNoCarryFlagUses(SDValue(U, 1)))
+ return false;
}
// If the other operand is a TLS address, we should fold it instead.
@@ -724,6 +707,20 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
return true;
}
+// Indicates it is profitable to form an AVX512 masked operation. Returning
+// false will favor a masked register-register masked move or vblendm and the
+// operation will be selected separately.
+bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const {
+ assert(
+ (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) &&
+ "Unexpected opcode!");
+
+ // If the operation has additional users, the operation will be duplicated.
+ // Check the use count to prevent that.
+ // FIXME: Are there cheap opcodes we might want to duplicate?
+ return N->getOperand(1).hasOneUse();
+}
+
/// Replace the original chain operand of the call with
/// load's chain operand and move load below the call's chain operand.
static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
@@ -799,6 +796,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
}
void X86DAGToDAGISel::PreprocessISelDAG() {
+ bool MadeChange = false;
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
@@ -811,11 +809,111 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I;
- CurDAG->DeleteNode(N);
+ MadeChange = true;
continue;
}
+ /// Convert vector increment or decrement to sub/add with an all-ones
+ /// constant:
+ /// add X, <1, 1...> --> sub X, <-1, -1...>
+ /// sub X, <1, 1...> --> add X, <-1, -1...>
+ /// The all-ones vector constant can be materialized using a pcmpeq
+ /// instruction that is commonly recognized as an idiom (has no register
+ /// dependency), so that's better/smaller than loading a splat 1 constant.
+ if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ N->getSimpleValueType(0).isVector()) {
+
+ APInt SplatVal;
+ if (X86::isConstantSplat(N->getOperand(1), SplatVal) &&
+ SplatVal.isOneValue()) {
+ SDLoc DL(N);
+
+ MVT VT = N->getSimpleValueType(0);
+ unsigned NumElts = VT.getSizeInBits() / 32;
+ SDValue AllOnes =
+ CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts));
+ AllOnes = CurDAG->getBitcast(VT, AllOnes);
+
+ unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
+ SDValue Res =
+ CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes);
+ --I;
+ CurDAG->ReplaceAllUsesWith(N, Res.getNode());
+ ++I;
+ MadeChange = true;
+ continue;
+ }
+ }
+
switch (N->getOpcode()) {
+ case X86ISD::VBROADCAST: {
+ MVT VT = N->getSimpleValueType(0);
+ // Emulate v32i16/v64i8 broadcast without BWI.
+ if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) {
+ MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8;
+ SDLoc dl(N);
+ SDValue NarrowBCast =
+ CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0));
+ SDValue Res =
+ CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
+ NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
+ unsigned Index = VT == MVT::v32i16 ? 16 : 32;
+ Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
+ CurDAG->getIntPtrConstant(Index, dl));
+
+ --I;
+ CurDAG->ReplaceAllUsesWith(N, Res.getNode());
+ ++I;
+ MadeChange = true;
+ continue;
+ }
+
+ break;
+ }
+ case X86ISD::VBROADCAST_LOAD: {
+ MVT VT = N->getSimpleValueType(0);
+ // Emulate v32i16/v64i8 broadcast without BWI.
+ if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) {
+ MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8;
+ auto *MemNode = cast<MemSDNode>(N);
+ SDLoc dl(N);
+ SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other);
+ SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()};
+ SDValue NarrowBCast = CurDAG->getMemIntrinsicNode(
+ X86ISD::VBROADCAST_LOAD, dl, VTs, Ops, MemNode->getMemoryVT(),
+ MemNode->getMemOperand());
+ SDValue Res =
+ CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
+ NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
+ unsigned Index = VT == MVT::v32i16 ? 16 : 32;
+ Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
+ CurDAG->getIntPtrConstant(Index, dl));
+
+ --I;
+ SDValue To[] = {Res, NarrowBCast.getValue(1)};
+ CurDAG->ReplaceAllUsesWith(N, To);
+ ++I;
+ MadeChange = true;
+ continue;
+ }
+
+ break;
+ }
+ case ISD::VSELECT: {
+ // Replace VSELECT with non-mask conditions with with BLENDV.
+ if (N->getOperand(0).getValueType().getVectorElementType() == MVT::i1)
+ break;
+
+ assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
+ SDValue Blendv =
+ CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1), N->getOperand(2));
+ --I;
+ CurDAG->ReplaceAllUsesWith(N, Blendv.getNode());
+ ++I;
+ MadeChange = true;
+ continue;
+ }
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
@@ -849,7 +947,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
--I;
CurDAG->ReplaceAllUsesWith(N, Res.getNode());
++I;
- CurDAG->DeleteNode(N);
+ MadeChange = true;
continue;
}
case ISD::SHL:
@@ -872,27 +970,33 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I;
- CurDAG->DeleteNode(N);
+ MadeChange = true;
continue;
}
case ISD::ANY_EXTEND:
case ISD::ANY_EXTEND_VECTOR_INREG: {
// Replace vector any extend with the zero extend equivalents so we don't
// need 2 sets of patterns. Ignore vXi1 extensions.
- if (!N->getValueType(0).isVector() ||
- N->getOperand(0).getScalarValueSizeInBits() == 1)
+ if (!N->getValueType(0).isVector())
break;
- unsigned NewOpc = N->getOpcode() == ISD::ANY_EXTEND
- ? ISD::ZERO_EXTEND
- : ISD::ZERO_EXTEND_VECTOR_INREG;
+ unsigned NewOpc;
+ if (N->getOperand(0).getScalarValueSizeInBits() == 1) {
+ assert(N->getOpcode() == ISD::ANY_EXTEND &&
+ "Unexpected opcode for mask vector!");
+ NewOpc = ISD::SIGN_EXTEND;
+ } else {
+ NewOpc = N->getOpcode() == ISD::ANY_EXTEND
+ ? ISD::ZERO_EXTEND
+ : ISD::ZERO_EXTEND_VECTOR_INREG;
+ }
SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
N->getOperand(0));
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I;
- CurDAG->DeleteNode(N);
+ MadeChange = true;
continue;
}
case ISD::FCEIL:
@@ -936,7 +1040,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
--I;
CurDAG->ReplaceAllUsesWith(N, Res.getNode());
++I;
- CurDAG->DeleteNode(N);
+ MadeChange = true;
continue;
}
case X86ISD::FANDN:
@@ -979,7 +1083,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I;
- CurDAG->DeleteNode(N);
+ MadeChange = true;
continue;
}
}
@@ -1018,6 +1122,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
continue;
moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
++NumLoadMoved;
+ MadeChange = true;
continue;
}
@@ -1064,14 +1169,17 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// operations. Based on this, decide what we want to do.
MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT;
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+ int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
SDLoc dl(N);
// FIXME: optimize the case where the src/dest is a load or store?
- SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
- MemTmp, MachinePointerInfo(), MemVT);
- SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
- MachinePointerInfo(), MemVT);
+ SDValue Store = CurDAG->getTruncStore(
+ CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT);
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store,
+ MemTmp, MPI, MemVT);
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
// extload we created. This will cause general havok on the dag because
@@ -1117,6 +1225,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// operations. Based on this, decide what we want to do.
MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT;
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+ int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
SDLoc dl(N);
// FIXME: optimize the case where the src/dest is a load or store?
@@ -1127,7 +1238,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
SDVTList VTs = CurDAG->getVTList(MVT::Other);
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp};
Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT,
- MachinePointerInfo(), 0,
+ MPI, /*Align*/ None,
MachineMemOperand::MOStore);
if (N->getFlags().hasNoFPExcept()) {
SDNodeFlags Flags = Store->getFlags();
@@ -1137,15 +1248,15 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
} else {
assert(SrcVT == MemVT && "Unexpected VT!");
Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp,
- MachinePointerInfo());
+ MPI);
}
if (!DstIsSSE) {
SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other);
SDValue Ops[] = {Store, MemTmp};
- Result = CurDAG->getMemIntrinsicNode(X86ISD::FLD, dl, VTs, Ops, MemVT,
- MachinePointerInfo(), 0,
- MachineMemOperand::MOLoad);
+ Result = CurDAG->getMemIntrinsicNode(
+ X86ISD::FLD, dl, VTs, Ops, MemVT, MPI,
+ /*Align*/ None, MachineMemOperand::MOLoad);
if (N->getFlags().hasNoFPExcept()) {
SDNodeFlags Flags = Result->getFlags();
Flags.setNoFPExcept(true);
@@ -1153,8 +1264,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
}
} else {
assert(DstVT == MemVT && "Unexpected VT!");
- Result =
- CurDAG->getLoad(DstVT, dl, Store, MemTmp, MachinePointerInfo());
+ Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI);
}
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
@@ -1171,13 +1281,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// Now that we did that, the node is dead. Increment the iterator to the
// next node to process, then delete N.
++I;
- CurDAG->DeleteNode(N);
+ MadeChange = true;
}
- // The load+call transform above can leave some dead nodes in the graph. Make
- // sure we remove them. Its possible some of the other transforms do to so
- // just remove dead nodes unconditionally.
- CurDAG->RemoveDeadNodes();
+ // Remove any dead nodes that may have been left behind.
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
}
// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
@@ -1275,6 +1384,8 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
And.getOperand(6) /* Chain */ };
MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
MVT::i32, MVT::Other, Ops);
+ CurDAG->setNodeMemRefs(
+ Test, cast<MachineSDNode>(And.getNode())->memoperands());
ReplaceUses(N, Test);
MadeChange = true;
continue;
@@ -1390,7 +1501,7 @@ void X86DAGToDAGISel::emitSpecialCodeForMain() {
}
}
-void X86DAGToDAGISel::EmitFunctionEntryCode() {
+void X86DAGToDAGISel::emitFunctionEntryCode() {
// If this is main, emit special code for main.
const Function &F = MF->getFunction();
if (F.hasExternalLinkage() && F.getName() == "main")
@@ -1409,18 +1520,20 @@ static bool isDispSafeForFrameIndex(int64_t Val) {
bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
X86ISelAddressMode &AM) {
- // If there's no offset to fold, we don't need to do any work.
- if (Offset == 0)
- return false;
+ // We may have already matched a displacement and the caller just added the
+ // symbolic displacement. So we still need to do the checks even if Offset
+ // is zero.
+
+ int64_t Val = AM.Disp + Offset;
// Cannot combine ExternalSymbol displacements with integer offsets.
- if (AM.ES || AM.MCSym)
+ if (Val != 0 && (AM.ES || AM.MCSym))
return true;
- int64_t Val = AM.Disp + Offset;
CodeModel::Model M = TM.getCodeModel();
if (Subtarget->is64Bit()) {
- if (!X86::isOffsetSuitableForCodeModel(Val, M,
+ if (Val != 0 &&
+ !X86::isOffsetSuitableForCodeModel(Val, M,
AM.hasSymbolicDisplacement()))
return true;
// In addition to the checks required for a register base, check that
@@ -1449,13 +1562,13 @@ bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
(Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
Subtarget->isTargetFuchsia()))
switch (N->getPointerInfo().getAddrSpace()) {
- case 256:
+ case X86AS::GS:
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
return false;
- case 257:
+ case X86AS::FS:
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
return false;
- // Address space 258 is not handled here, because it is not used to
+ // Address space X86AS::SS is not handled here, because it is not used to
// address TLS areas.
}
@@ -1505,7 +1618,7 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
Offset = G->getOffset();
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
AM.CP = CP->getConstVal();
- AM.Align = CP->getAlignment();
+ AM.Alignment = CP->getAlign();
AM.SymbolFlags = CP->getTargetFlags();
Offset = CP->getOffset();
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
@@ -1583,9 +1696,10 @@ bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
return false;
AM = Backup;
- // Try again after commuting the operands.
- if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
- !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
+ // Try again after commutating the operands.
+ if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM,
+ Depth + 1) &&
+ !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1))
return false;
AM = Backup;
@@ -1782,7 +1896,7 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
// There is nothing we can do here unless the mask is removing some bits.
// Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
- if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
+ if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
// We also need to ensure that mask is a continuous run of bits.
if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
@@ -1877,7 +1991,7 @@ static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
// There is nothing we can do here unless the mask is removing some bits.
// Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
- if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
+ if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
MVT VT = N.getSimpleValueType();
SDLoc DL(N);
@@ -2280,15 +2394,16 @@ bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
return matchAddressBase(N, AM);
}
-bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
- SDValue &Scale, SDValue &Index,
- SDValue &Disp, SDValue &Segment) {
+bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
+ SDValue IndexOp, SDValue ScaleOp,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
X86ISelAddressMode AM;
- auto *Mgs = cast<X86MaskedGatherScatterSDNode>(Parent);
- AM.IndexReg = Mgs->getIndex();
- AM.Scale = cast<ConstantSDNode>(Mgs->getScale())->getZExtValue();
+ AM.IndexReg = IndexOp;
+ AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
- unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
+ unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace();
if (AddrSpace == X86AS::GS)
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
if (AddrSpace == X86AS::FS)
@@ -2296,11 +2411,11 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
if (AddrSpace == X86AS::SS)
AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
- SDLoc DL(N);
- MVT VT = N.getSimpleValueType();
+ SDLoc DL(BasePtr);
+ MVT VT = BasePtr.getSimpleValueType();
// Try to match into the base and displacement fields.
- if (matchVectorAddress(N, AM))
+ if (matchVectorAddress(BasePtr, AM))
return false;
getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
@@ -2331,12 +2446,11 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
unsigned AddrSpace =
cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
- // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
- if (AddrSpace == 256)
+ if (AddrSpace == X86AS::GS)
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
- if (AddrSpace == 257)
+ if (AddrSpace == X86AS::FS)
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
- if (AddrSpace == 258)
+ if (AddrSpace == X86AS::SS)
AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
}
@@ -2351,86 +2465,7 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
return true;
}
-// We can only fold a load if all nodes between it and the root node have a
-// single use. If there are additional uses, we could end up duplicating the
-// load.
-static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
- while (User != Root) {
- if (!User->hasOneUse())
- return false;
- User = *User->use_begin();
- }
-
- return true;
-}
-
-/// Match a scalar SSE load. In particular, we want to match a load whose top
-/// elements are either undef or zeros. The load flavor is derived from the
-/// type of N, which is either v4f32 or v2f64.
-///
-/// We also return:
-/// PatternChainNode: this is the matched node that has a chain input and
-/// output.
-bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
- SDValue N, SDValue &Base,
- SDValue &Scale, SDValue &Index,
- SDValue &Disp, SDValue &Segment,
- SDValue &PatternNodeWithChain) {
- if (!hasSingleUsesFromRoot(Root, Parent))
- return false;
-
- // We can allow a full vector load here since narrowing a load is ok unless
- // it's volatile or atomic.
- if (ISD::isNON_EXTLoad(N.getNode())) {
- LoadSDNode *LD = cast<LoadSDNode>(N);
- if (LD->isSimple() &&
- IsProfitableToFold(N, LD, Root) &&
- IsLegalToFold(N, Parent, Root, OptLevel)) {
- PatternNodeWithChain = N;
- return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
- Segment);
- }
- }
-
- // We can also match the special zero extended load opcode.
- if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
- PatternNodeWithChain = N;
- if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
- auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
- return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
- Segment);
- }
- }
-
- // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
- // once. Otherwise the load might get duplicated and the chain output of the
- // duplicate load will not be observed by all dependencies.
- if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
- PatternNodeWithChain = N.getOperand(0);
- if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
- IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
- LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
- return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
- Segment);
- }
- }
-
- return false;
-}
-
-
bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
- if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
- uint64_t ImmVal = CN->getZExtValue();
- if (!isUInt<32>(ImmVal))
- return false;
-
- Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
- return true;
- }
-
// In static codegen with small code model, we can get the address of a label
// into a register with 'movl'
if (N->getOpcode() != X86ISD::Wrapper)
@@ -2604,12 +2639,6 @@ bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
}
bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
- if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
- Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
- N.getValueType());
- return true;
- }
-
// Keep track of the original value type and whether this value was
// truncated. If we see a truncation from pointer type to VT that truncates
// bits that are known to be zero, we can use a narrow reference.
@@ -3896,49 +3925,82 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
return true;
}
-/// Convert vector increment or decrement to sub/add with an all-ones constant:
-/// add X, <1, 1...> --> sub X, <-1, -1...>
-/// sub X, <1, 1...> --> add X, <-1, -1...>
-/// The all-ones vector constant can be materialized using a pcmpeq instruction
-/// that is commonly recognized as an idiom (has no register dependency), so
-/// that's better/smaller than loading a splat 1 constant.
-bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) {
- assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) &&
- "Unexpected opcode for increment/decrement transform");
-
- EVT VT = Node->getValueType(0);
- assert(VT.isVector() && "Should only be called for vectors.");
-
- SDValue X = Node->getOperand(0);
- SDValue OneVec = Node->getOperand(1);
+// Try to match two logic ops to a VPTERNLOG.
+// FIXME: Handle inverted inputs?
+// FIXME: Handle more complex patterns that use an operand more than once?
+bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
+ MVT NVT = N->getSimpleValueType(0);
- APInt SplatVal;
- if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue())
+ // Make sure we support VPTERNLOG.
+ if (!NVT.isVector() || !Subtarget->hasAVX512() ||
+ NVT.getVectorElementType() == MVT::i1)
return false;
- SDLoc DL(Node);
- SDValue OneConstant, AllOnesVec;
+ // We need VLX for 128/256-bit.
+ if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
+ return false;
- APInt Ones = APInt::getAllOnesValue(32);
- assert(VT.getSizeInBits() % 32 == 0 &&
- "Expected bit count to be a multiple of 32");
- OneConstant = CurDAG->getConstant(Ones, DL, MVT::i32);
- insertDAGNode(*CurDAG, X, OneConstant);
+ unsigned Opc1 = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
- unsigned NumElts = VT.getSizeInBits() / 32;
- assert(NumElts > 0 && "Expected to get non-empty vector.");
- AllOnesVec = CurDAG->getSplatBuildVector(MVT::getVectorVT(MVT::i32, NumElts),
- DL, OneConstant);
- insertDAGNode(*CurDAG, X, AllOnesVec);
+ auto isLogicOp = [](unsigned Opc) {
+ return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR ||
+ Opc == X86ISD::ANDNP;
+ };
- AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec);
- insertDAGNode(*CurDAG, X, AllOnesVec);
+ SDValue A, B, C;
+ unsigned Opc2;
+ if (isLogicOp(N1.getOpcode()) && N1.hasOneUse()) {
+ Opc2 = N1.getOpcode();
+ A = N0;
+ B = N1.getOperand(0);
+ C = N1.getOperand(1);
+ } else if (isLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
+ Opc2 = N0.getOpcode();
+ A = N1;
+ B = N0.getOperand(0);
+ C = N0.getOperand(1);
+ } else
+ return false;
- unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
- SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec);
+ uint64_t Imm;
+ switch (Opc1) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::AND:
+ switch (Opc2) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::AND: Imm = 0x80; break;
+ case ISD::OR: Imm = 0xe0; break;
+ case ISD::XOR: Imm = 0x60; break;
+ case X86ISD::ANDNP: Imm = 0x20; break;
+ }
+ break;
+ case ISD::OR:
+ switch (Opc2) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::AND: Imm = 0xf8; break;
+ case ISD::OR: Imm = 0xfe; break;
+ case ISD::XOR: Imm = 0xf6; break;
+ case X86ISD::ANDNP: Imm = 0xf2; break;
+ }
+ break;
+ case ISD::XOR:
+ switch (Opc2) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::AND: Imm = 0x78; break;
+ case ISD::OR: Imm = 0x1e; break;
+ case ISD::XOR: Imm = 0x96; break;
+ case X86ISD::ANDNP: Imm = 0xd2; break;
+ }
+ break;
+ }
- ReplaceNode(Node, NewNode.getNode());
- SelectCode(NewNode.getNode());
+ SDLoc DL(N);
+ SDValue New = CurDAG->getNode(X86ISD::VPTERNLOG, DL, NVT, A, B, C,
+ CurDAG->getTargetConstant(Imm, DL, MVT::i8));
+ ReplaceNode(N, New.getNode());
+ SelectCode(New.getNode());
return true;
}
@@ -4014,159 +4076,50 @@ bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
bool FoldedBCast, bool Masked) {
- if (Masked) {
- if (FoldedLoad) {
- switch (TestVT.SimpleTy) {
- default: llvm_unreachable("Unexpected VT!");
- case MVT::v16i8:
- return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk;
- case MVT::v8i16:
- return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk;
- case MVT::v4i32:
- return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk;
- case MVT::v2i64:
- return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk;
- case MVT::v32i8:
- return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk;
- case MVT::v16i16:
- return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk;
- case MVT::v8i32:
- return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk;
- case MVT::v4i64:
- return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk;
- case MVT::v64i8:
- return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk;
- case MVT::v32i16:
- return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk;
- case MVT::v16i32:
- return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk;
- case MVT::v8i64:
- return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk;
- }
- }
-
- if (FoldedBCast) {
- switch (TestVT.SimpleTy) {
- default: llvm_unreachable("Unexpected VT!");
- case MVT::v4i32:
- return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk;
- case MVT::v2i64:
- return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk;
- case MVT::v8i32:
- return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk;
- case MVT::v4i64:
- return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk;
- case MVT::v16i32:
- return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk;
- case MVT::v8i64:
- return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk;
- }
- }
-
- switch (TestVT.SimpleTy) {
- default: llvm_unreachable("Unexpected VT!");
- case MVT::v16i8:
- return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk;
- case MVT::v8i16:
- return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk;
- case MVT::v4i32:
- return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk;
- case MVT::v2i64:
- return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk;
- case MVT::v32i8:
- return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk;
- case MVT::v16i16:
- return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk;
- case MVT::v8i32:
- return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk;
- case MVT::v4i64:
- return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk;
- case MVT::v64i8:
- return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk;
- case MVT::v32i16:
- return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk;
- case MVT::v16i32:
- return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk;
- case MVT::v8i64:
- return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk;
- }
- }
+#define VPTESTM_CASE(VT, SUFFIX) \
+case MVT::VT: \
+ if (Masked) \
+ return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \
+ return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX;
+
+
+#define VPTESTM_BROADCAST_CASES(SUFFIX) \
+default: llvm_unreachable("Unexpected VT!"); \
+VPTESTM_CASE(v4i32, DZ128##SUFFIX) \
+VPTESTM_CASE(v2i64, QZ128##SUFFIX) \
+VPTESTM_CASE(v8i32, DZ256##SUFFIX) \
+VPTESTM_CASE(v4i64, QZ256##SUFFIX) \
+VPTESTM_CASE(v16i32, DZ##SUFFIX) \
+VPTESTM_CASE(v8i64, QZ##SUFFIX)
+
+#define VPTESTM_FULL_CASES(SUFFIX) \
+VPTESTM_BROADCAST_CASES(SUFFIX) \
+VPTESTM_CASE(v16i8, BZ128##SUFFIX) \
+VPTESTM_CASE(v8i16, WZ128##SUFFIX) \
+VPTESTM_CASE(v32i8, BZ256##SUFFIX) \
+VPTESTM_CASE(v16i16, WZ256##SUFFIX) \
+VPTESTM_CASE(v64i8, BZ##SUFFIX) \
+VPTESTM_CASE(v32i16, WZ##SUFFIX)
if (FoldedLoad) {
switch (TestVT.SimpleTy) {
- default: llvm_unreachable("Unexpected VT!");
- case MVT::v16i8:
- return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm;
- case MVT::v8i16:
- return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm;
- case MVT::v4i32:
- return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm;
- case MVT::v2i64:
- return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm;
- case MVT::v32i8:
- return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm;
- case MVT::v16i16:
- return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm;
- case MVT::v8i32:
- return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm;
- case MVT::v4i64:
- return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm;
- case MVT::v64i8:
- return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm;
- case MVT::v32i16:
- return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm;
- case MVT::v16i32:
- return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm;
- case MVT::v8i64:
- return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm;
+ VPTESTM_FULL_CASES(rm)
}
}
if (FoldedBCast) {
switch (TestVT.SimpleTy) {
- default: llvm_unreachable("Unexpected VT!");
- case MVT::v4i32:
- return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb;
- case MVT::v2i64:
- return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb;
- case MVT::v8i32:
- return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb;
- case MVT::v4i64:
- return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb;
- case MVT::v16i32:
- return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb;
- case MVT::v8i64:
- return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb;
+ VPTESTM_BROADCAST_CASES(rmb)
}
}
switch (TestVT.SimpleTy) {
- default: llvm_unreachable("Unexpected VT!");
- case MVT::v16i8:
- return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr;
- case MVT::v8i16:
- return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr;
- case MVT::v4i32:
- return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr;
- case MVT::v2i64:
- return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr;
- case MVT::v32i8:
- return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr;
- case MVT::v16i16:
- return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr;
- case MVT::v8i32:
- return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr;
- case MVT::v4i64:
- return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr;
- case MVT::v64i8:
- return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr;
- case MVT::v32i16:
- return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr;
- case MVT::v16i32:
- return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr;
- case MVT::v8i64:
- return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr;
+ VPTESTM_FULL_CASES(rr)
}
+
+#undef VPTESTM_FULL_CASES
+#undef VPTESTM_BROADCAST_CASES
+#undef VPTESTM_CASE
}
// Try to create VPTESTM instruction. If InMask is not null, it will be used
@@ -4477,8 +4430,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
}
+ case Intrinsic::x86_tileloadd64:
+ case Intrinsic::x86_tileloaddt164:
+ case Intrinsic::x86_tilestored64: {
+ if (!Subtarget->hasAMXTILE())
+ break;
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_tileloadd64: Opc = X86::PTILELOADD; break;
+ case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1; break;
+ case Intrinsic::x86_tilestored64: Opc = X86::PTILESTORED; break;
+ }
+ // FIXME: Match displacement and scale.
+ unsigned TIndex = Node->getConstantOperandVal(2);
+ SDValue TReg = getI8Imm(TIndex, dl);
+ SDValue Base = Node->getOperand(3);
+ SDValue Scale = getI8Imm(1, dl);
+ SDValue Index = Node->getOperand(4);
+ SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDValue Segment = CurDAG->getRegister(0, MVT::i16);
+ SDValue Chain = Node->getOperand(0);
+ MachineSDNode *CNode;
+ if (Opc == X86::PTILESTORED) {
+ SDValue Ops[] = { Base, Scale, Index, Disp, Segment, TReg, Chain };
+ CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
+ } else {
+ SDValue Ops[] = { TReg, Base, Scale, Index, Disp, Segment, Chain };
+ CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
+ }
+ ReplaceNode(Node, CNode);
+ return;
+ }
}
-
break;
}
case ISD::BRIND: {
@@ -4490,9 +4474,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// Converts a 32-bit register to a 64-bit, zero-extended version of
// it. This is needed because x86-64 can do many things, but jmp %r32
// ain't one of them.
- const SDValue &Target = Node->getOperand(1);
- assert(Target.getSimpleValueType() == llvm::MVT::i32);
- SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
+ SDValue Target = Node->getOperand(1);
+ assert(Target.getValueType() == MVT::i32 && "Unexpected VT!");
+ SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, MVT::i64);
SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
Node->getOperand(0), ZextTarget);
ReplaceNode(Node, Brind.getNode());
@@ -4516,21 +4500,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
}
break;
- case ISD::VSELECT: {
- // Replace VSELECT with non-mask conditions with with BLENDV.
- if (Node->getOperand(0).getValueType().getVectorElementType() == MVT::i1)
- break;
-
- assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
- SDValue Blendv = CurDAG->getNode(
- X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
- Node->getOperand(1), Node->getOperand(2));
- ReplaceNode(Node, Blendv.getNode());
- SelectCode(Blendv.getNode());
- // We already called ReplaceUses.
- return;
- }
-
case ISD::SRL:
if (matchBitExtract(Node))
return;
@@ -4569,24 +4538,21 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case ISD::XOR:
if (tryShrinkShlLogicImm(Node))
return;
-
if (Opcode == ISD::OR && tryMatchBitSelect(Node))
return;
+ if (tryVPTERNLOG(Node))
+ return;
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB: {
- if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && NVT.isVector() &&
- combineIncDecVector(Node))
- return;
-
// Try to avoid folding immediates with multiple uses for optsize.
// This code tries to select to register form directly to avoid going
// through the isel table which might fold the immediate. We can't change
// the patterns on the add/sub/and/or/xor with immediate paterns in the
// tablegen files to check immediate use count without making the patterns
// unavailable to the fast-isel table.
- if (!OptForSize)
+ if (!CurDAG->shouldOptForSize())
break;
// Only handle i8/i16/i32/i64.
@@ -4720,7 +4686,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
- // Multiply is commmutative.
+ // Multiply is commutative.
if (!FoldedLoad) {
FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
if (FoldedLoad)
@@ -4772,31 +4738,31 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N1 = Node->getOperand(1);
unsigned Opc, MOpc;
- bool isSigned = Opcode == ISD::SMUL_LOHI;
- if (!isSigned) {
- switch (NVT.SimpleTy) {
- default: llvm_unreachable("Unsupported VT!");
- case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
- case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
- }
- } else {
- switch (NVT.SimpleTy) {
- default: llvm_unreachable("Unsupported VT!");
- case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
- case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
- }
- }
-
- unsigned SrcReg, LoReg, HiReg;
- switch (Opc) {
- default: llvm_unreachable("Unknown MUL opcode!");
- case X86::IMUL32r:
- case X86::MUL32r:
- SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
+ unsigned LoReg, HiReg;
+ bool IsSigned = Opcode == ISD::SMUL_LOHI;
+ bool UseMULX = !IsSigned && Subtarget->hasBMI2();
+ bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
+ switch (NVT.SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i32:
+ Opc = UseMULXHi ? X86::MULX32Hrr :
+ UseMULX ? X86::MULX32rr :
+ IsSigned ? X86::IMUL32r : X86::MUL32r;
+ MOpc = UseMULXHi ? X86::MULX32Hrm :
+ UseMULX ? X86::MULX32rm :
+ IsSigned ? X86::IMUL32m : X86::MUL32m;
+ LoReg = UseMULX ? X86::EDX : X86::EAX;
+ HiReg = X86::EDX;
break;
- case X86::IMUL64r:
- case X86::MUL64r:
- SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
+ case MVT::i64:
+ Opc = UseMULXHi ? X86::MULX64Hrr :
+ UseMULX ? X86::MULX64rr :
+ IsSigned ? X86::IMUL64r : X86::MUL64r;
+ MOpc = UseMULXHi ? X86::MULX64Hrm :
+ UseMULX ? X86::MULX64rm :
+ IsSigned ? X86::IMUL64m : X86::MUL64m;
+ LoReg = UseMULX ? X86::RDX : X86::RAX;
+ HiReg = X86::RDX;
break;
}
@@ -4809,17 +4775,31 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
std::swap(N0, N1);
}
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
+ SDValue ResHi, ResLo;
if (foldedLoad) {
SDValue Chain;
MachineSDNode *CNode = nullptr;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
- SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
- CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
- Chain = SDValue(CNode, 0);
- InFlag = SDValue(CNode, 1);
+ if (UseMULXHi) {
+ SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
+ CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+ ResHi = SDValue(CNode, 0);
+ Chain = SDValue(CNode, 1);
+ } else if (UseMULX) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other);
+ CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ Chain = SDValue(CNode, 2);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
+ CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+ Chain = SDValue(CNode, 0);
+ InFlag = SDValue(CNode, 1);
+ }
// Update the chain.
ReplaceUses(N1.getValue(1), Chain);
@@ -4827,27 +4807,42 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
SDValue Ops[] = { N1, InFlag };
- SDVTList VTs = CurDAG->getVTList(MVT::Glue);
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
- InFlag = SDValue(CNode, 0);
+ if (UseMULXHi) {
+ SDVTList VTs = CurDAG->getVTList(NVT);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+ ResHi = SDValue(CNode, 0);
+ } else if (UseMULX) {
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+ ResHi = SDValue(CNode, 0);
+ ResLo = SDValue(CNode, 1);
+ } else {
+ SDVTList VTs = CurDAG->getVTList(MVT::Glue);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+ InFlag = SDValue(CNode, 0);
+ }
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- assert(LoReg && "Register for low half is not defined!");
- SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
- NVT, InFlag);
- InFlag = ResLo.getValue(2);
+ if (!ResLo) {
+ assert(LoReg && "Register for low half is not defined!");
+ ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
+ NVT, InFlag);
+ InFlag = ResLo.getValue(2);
+ }
ReplaceUses(SDValue(Node, 0), ResLo);
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- assert(HiReg && "Register for high half is not defined!");
- SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
- NVT, InFlag);
- InFlag = ResHi.getValue(2);
+ if (!ResHi) {
+ assert(HiReg && "Register for high half is not defined!");
+ ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
+ NVT, InFlag);
+ InFlag = ResHi.getValue(2);
+ }
ReplaceUses(SDValue(Node, 1), ResHi);
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
dbgs() << '\n');
@@ -4862,23 +4857,23 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- unsigned Opc, MOpc;
+ unsigned ROpc, MOpc;
bool isSigned = Opcode == ISD::SDIVREM;
if (!isSigned) {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
- case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
- case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
- case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
- case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
+ case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break;
+ case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break;
+ case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break;
+ case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break;
}
} else {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
- case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
- case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
- case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
- case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
+ case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
+ case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
+ case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+ case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
}
}
@@ -4943,7 +4938,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
} else {
// Zero out the high part, effectively zero extending the input.
- SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
+ SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
+ SDValue ClrNode =
+ SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0);
switch (NVT.SimpleTy) {
case MVT::i16:
ClrNode =
@@ -4985,7 +4982,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
InFlag =
- SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
+ SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InFlag), 0);
}
// Prevent use of AH in a REX instruction by explicitly copying it to
@@ -5034,6 +5031,77 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
}
+ case X86ISD::FCMP:
+ case X86ISD::STRICT_FCMP:
+ case X86ISD::STRICT_FCMPS: {
+ bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP ||
+ Node->getOpcode() == X86ISD::STRICT_FCMPS;
+ SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0);
+ SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1);
+
+ // Save the original VT of the compare.
+ MVT CmpVT = N0.getSimpleValueType();
+
+ // Floating point needs special handling if we don't have FCOMI.
+ if (Subtarget->hasCMov())
+ break;
+
+ bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS;
+
+ unsigned Opc;
+ switch (CmpVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected type!");
+ case MVT::f32:
+ Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32;
+ break;
+ case MVT::f64:
+ Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64;
+ break;
+ case MVT::f80:
+ Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80;
+ break;
+ }
+
+ SDValue Cmp;
+ SDValue Chain =
+ IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode();
+ if (IsStrictCmp) {
+ SDVTList VTs = CurDAG->getVTList(MVT::i16, MVT::Other);
+ Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0);
+ Chain = Cmp.getValue(1);
+ } else {
+ Cmp = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i16, N0, N1), 0);
+ }
+
+ // Move FPSW to AX.
+ SDValue FPSW = CurDAG->getCopyToReg(Chain, dl, X86::FPSW, Cmp, SDValue());
+ Chain = FPSW;
+ SDValue FNSTSW =
+ SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, FPSW,
+ FPSW.getValue(1)),
+ 0);
+
+ // Extract upper 8-bits of AX.
+ SDValue Extract =
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW);
+
+ // Move AH into flags.
+ // Some 64-bit targets lack SAHF support, but they do support FCOMI.
+ assert(Subtarget->hasLAHFSAHF() &&
+ "Target doesn't support SAHF or FCOMI?");
+ SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue());
+ Chain = AH;
+ SDValue SAHF = SDValue(
+ CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0);
+
+ if (IsStrictCmp)
+ ReplaceUses(SDValue(Node, 1), Chain);
+
+ ReplaceUses(SDValue(Node, 0), SAHF);
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+
case X86ISD::CMP: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
@@ -5267,6 +5335,279 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
if (foldLoadStoreIntoMemOperand(Node))
return;
break;
+
+ case X86ISD::SETCC_CARRY: {
+ // We have to do this manually because tblgen will put the eflags copy in
+ // the wrong place if we use an extract_subreg in the pattern.
+ MVT VT = Node->getSimpleValueType(0);
+
+ // Copy flags to the EFLAGS register and glue it to next node.
+ SDValue EFLAGS =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
+ Node->getOperand(1), SDValue());
+
+ // Create a 64-bit instruction if the result is 64-bits otherwise use the
+ // 32-bit version.
+ unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
+ MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
+ SDValue Result = SDValue(
+ CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0);
+
+ // For less than 32-bits we need to extract from the 32-bit node.
+ if (VT == MVT::i8 || VT == MVT::i16) {
+ int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
+ Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
+ }
+
+ ReplaceUses(SDValue(Node, 0), Result);
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ case X86ISD::SBB: {
+ if (isNullConstant(Node->getOperand(0)) &&
+ isNullConstant(Node->getOperand(1))) {
+ MVT VT = Node->getSimpleValueType(0);
+
+ // Create zero.
+ SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
+ SDValue Zero =
+ SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0);
+ if (VT == MVT::i64) {
+ Zero = SDValue(
+ CurDAG->getMachineNode(
+ TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
+ CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
+ CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
+ 0);
+ }
+
+ // Copy flags to the EFLAGS register and glue it to next node.
+ SDValue EFLAGS =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
+ Node->getOperand(2), SDValue());
+
+ // Create a 64-bit instruction if the result is 64-bits otherwise use the
+ // 32-bit version.
+ unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
+ MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
+ VTs = CurDAG->getVTList(SBBVT, MVT::i32);
+ SDValue Result =
+ SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {Zero, Zero, EFLAGS,
+ EFLAGS.getValue(1)}),
+ 0);
+
+ // Replace the flag use.
+ ReplaceUses(SDValue(Node, 1), Result.getValue(1));
+
+ // Replace the result use.
+ if (!SDValue(Node, 0).use_empty()) {
+ // For less than 32-bits we need to extract from the 32-bit node.
+ if (VT == MVT::i8 || VT == MVT::i16) {
+ int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
+ Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
+ }
+ ReplaceUses(SDValue(Node, 0), Result);
+ }
+
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ break;
+ }
+ case X86ISD::MGATHER: {
+ auto *Mgt = cast<X86MaskedGatherSDNode>(Node);
+ SDValue IndexOp = Mgt->getIndex();
+ SDValue Mask = Mgt->getMask();
+ MVT IndexVT = IndexOp.getSimpleValueType();
+ MVT ValueVT = Node->getSimpleValueType(0);
+ MVT MaskVT = Mask.getSimpleValueType();
+
+ // This is just to prevent crashes if the nodes are malformed somehow. We're
+ // otherwise only doing loose type checking in here based on type what
+ // a type constraint would say just like table based isel.
+ if (!ValueVT.isVector() || !MaskVT.isVector())
+ break;
+
+ unsigned NumElts = ValueVT.getVectorNumElements();
+ MVT ValueSVT = ValueVT.getVectorElementType();
+
+ bool IsFP = ValueSVT.isFloatingPoint();
+ unsigned EltSize = ValueSVT.getSizeInBits();
+
+ unsigned Opc = 0;
+ bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1;
+ if (AVX512Gather) {
+ if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
+ Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm;
+ else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
+ Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm;
+ else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
+ Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm;
+ else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
+ Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm;
+ else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
+ Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm;
+ else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
+ Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm;
+ else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
+ Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm;
+ else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
+ Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm;
+ else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
+ Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm;
+ else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
+ Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm;
+ else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
+ Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm;
+ else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
+ Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm;
+ } else {
+ assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() &&
+ "Unexpected mask VT!");
+ if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
+ Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm;
+ else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
+ Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm;
+ else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
+ Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm;
+ else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
+ Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm;
+ else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
+ Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm;
+ else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
+ Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm;
+ else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
+ Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm;
+ else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
+ Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm;
+ }
+
+ if (!Opc)
+ break;
+
+ SDValue Base, Scale, Index, Disp, Segment;
+ if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(),
+ Base, Scale, Index, Disp, Segment))
+ break;
+
+ SDValue PassThru = Mgt->getPassThru();
+ SDValue Chain = Mgt->getChain();
+ // Gather instructions have a mask output not in the ISD node.
+ SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other);
+
+ MachineSDNode *NewNode;
+ if (AVX512Gather) {
+ SDValue Ops[] = {PassThru, Mask, Base, Scale,
+ Index, Disp, Segment, Chain};
+ NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
+ } else {
+ SDValue Ops[] = {PassThru, Base, Scale, Index,
+ Disp, Segment, Mask, Chain};
+ NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
+ }
+ CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()});
+ ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ case X86ISD::MSCATTER: {
+ auto *Sc = cast<X86MaskedScatterSDNode>(Node);
+ SDValue Value = Sc->getValue();
+ SDValue IndexOp = Sc->getIndex();
+ MVT IndexVT = IndexOp.getSimpleValueType();
+ MVT ValueVT = Value.getSimpleValueType();
+
+ // This is just to prevent crashes if the nodes are malformed somehow. We're
+ // otherwise only doing loose type checking in here based on type what
+ // a type constraint would say just like table based isel.
+ if (!ValueVT.isVector())
+ break;
+
+ unsigned NumElts = ValueVT.getVectorNumElements();
+ MVT ValueSVT = ValueVT.getVectorElementType();
+
+ bool IsFP = ValueSVT.isFloatingPoint();
+ unsigned EltSize = ValueSVT.getSizeInBits();
+
+ unsigned Opc;
+ if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
+ Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr;
+ else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
+ Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr;
+ else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
+ Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr;
+ else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
+ Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr;
+ else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
+ Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr;
+ else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
+ Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr;
+ else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
+ Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr;
+ else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
+ Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr;
+ else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
+ Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr;
+ else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
+ Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr;
+ else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
+ Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr;
+ else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
+ Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr;
+ else
+ break;
+
+ SDValue Base, Scale, Index, Disp, Segment;
+ if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(),
+ Base, Scale, Index, Disp, Segment))
+ break;
+
+ SDValue Mask = Sc->getMask();
+ SDValue Chain = Sc->getChain();
+ // Scatter instructions have a mask output not in the ISD node.
+ SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other);
+ SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain};
+
+ MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
+ CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()});
+ ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ case ISD::PREALLOCATED_SETUP: {
+ auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ auto CallId = MFI->getPreallocatedIdForCallSite(
+ cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
+ SDValue Chain = Node->getOperand(0);
+ SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
+ MachineSDNode *New = CurDAG->getMachineNode(
+ TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
+ ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ case ISD::PREALLOCATED_ARG: {
+ auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ auto CallId = MFI->getPreallocatedIdForCallSite(
+ cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
+ SDValue Chain = Node->getOperand(0);
+ SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
+ SDValue ArgIndex = Node->getOperand(2);
+ SDValue Ops[3];
+ Ops[0] = CallIdValue;
+ Ops[1] = ArgIndex;
+ Ops[2] = Chain;
+ MachineSDNode *New = CurDAG->getMachineNode(
+ TargetOpcode::PREALLOCATED_ARG, dl,
+ CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
+ MVT::Other),
+ Ops);
+ ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
+ ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
}
SelectCode(Node);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
index c8720d9ae3a6..450927aaf5cc 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12,7 +12,8 @@
//===----------------------------------------------------------------------===//
#include "X86ISelLowering.h"
-#include "Utils/X86ShuffleDecode.h"
+#include "MCTargetDesc/X86ShuffleDecode.h"
+#include "X86.h"
#include "X86CallingConv.h"
#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
@@ -28,6 +29,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -37,7 +39,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -75,13 +76,6 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
" of the loop header PC will be 0)."),
cl::Hidden);
-// Added in 10.0.
-static cl::opt<bool> EnableOldKNLABI(
- "x86-enable-old-knl-abi", cl::init(false),
- cl::desc("Enables passing v32i16 and v64i8 in 2 YMM registers instead of "
- "one ZMM register on AVX512F, but not AVX512BW targets."),
- cl::Hidden);
-
static cl::opt<bool> MulConstantOptimization(
"mul-constant-optimization", cl::init(true),
cl::desc("Replace 'mul x, Const' with more effective instructions like "
@@ -164,7 +158,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
// 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
- // FIXME: Should we be limitting the atomic size on other configs? Default is
+ // FIXME: Should we be limiting the atomic size on other configs? Default is
// 1024.
if (!Subtarget.hasCmpxchg8b())
setMaxAtomicSizeInBitsSupported(32);
@@ -190,12 +184,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// SETOEQ and SETUNE require checking two conditions.
- setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
- setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
- setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
- setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
- setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
+ for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
+ setCondCodeAction(ISD::SETOEQ, VT, Expand);
+ setCondCodeAction(ISD::SETUNE, VT, Expand);
+ }
// Integer absolute.
if (Subtarget.hasCMov()) {
@@ -206,10 +198,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
+ // For slow shld targets we only lower for code size.
+ LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
+
+ setOperationAction(ShiftOp , MVT::i8 , Custom);
setOperationAction(ShiftOp , MVT::i16 , Custom);
- setOperationAction(ShiftOp , MVT::i32 , Custom);
+ setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
if (Subtarget.is64Bit())
- setOperationAction(ShiftOp , MVT::i64 , Custom);
+ setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
}
if (!Subtarget.useSoftFloat()) {
@@ -270,6 +266,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
+
+ setOperationAction(ISD::LRINT, MVT::f32, Custom);
+ setOperationAction(ISD::LRINT, MVT::f64, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f32, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f64, Custom);
+
+ if (!Subtarget.is64Bit()) {
+ setOperationAction(ISD::LRINT, MVT::i64, Custom);
+ setOperationAction(ISD::LLRINT, MVT::i64, Custom);
+ }
}
// Handle address space casts between mixed sized pointers.
@@ -347,34 +353,28 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
} else {
- setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
- setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
- setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
+ if (VT == MVT::i64 && !Subtarget.is64Bit())
+ continue;
+ setOperationAction(ISD::CTLZ , VT, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
}
}
- // Special handling for half-precision floating point conversions.
- // If we don't have F16C support, then lower half float conversions
- // into library calls.
- if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
- setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
- setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+ for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
+ ISD::STRICT_FP_TO_FP16}) {
+ // Special handling for half-precision floating point conversions.
+ // If we don't have F16C support, then lower half float conversions
+ // into library calls.
+ setOperationAction(
+ Op, MVT::f32,
+ (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
+ // There's never any support for operations beyond MVT::f32.
+ setOperationAction(Op, MVT::f64, Expand);
+ setOperationAction(Op, MVT::f80, Expand);
+ setOperationAction(Op, MVT::f128, Expand);
}
- // There's never any support for operations beyond MVT::f32.
- setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
- setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
- setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
- setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
- setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
- setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
-
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
@@ -542,7 +542,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
- } else if (!useSoftFloat() && X86ScalarSSEf32 && (UseX87 || Is64Bit)) {
+ } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 &&
+ (UseX87 || Is64Bit)) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, &X86::FR32RegClass);
@@ -663,8 +664,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::f80, Expand);
setOperationAction(ISD::LROUND, MVT::f80, Expand);
setOperationAction(ISD::LLROUND, MVT::f80, Expand);
- setOperationAction(ISD::LRINT, MVT::f80, Expand);
- setOperationAction(ISD::LLRINT, MVT::f80, Expand);
+ setOperationAction(ISD::LRINT, MVT::f80, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f80, Custom);
// Handle constrained floating-point operations of scalar.
setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
@@ -1038,8 +1039,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
- // With AVX512, expanding (and promoting the shifts) is better.
- if (!Subtarget.hasAVX512())
+ // With 512-bit registers or AVX512VL+BW, expanding (and promoting the
+ // shifts) is better.
+ if (!Subtarget.useAVX512Regs() &&
+ !(Subtarget.hasBWI() && Subtarget.hasVLX()))
setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
@@ -1078,6 +1081,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
+
+ setOperationAction(ISD::FROUND, RoundedTy, Custom);
}
setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
@@ -1170,6 +1175,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+
+ setOperationAction(ISD::FROUND, VT, Custom);
+
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
@@ -1221,7 +1229,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
// With BWI, expanding (and promoting the shifts) is the better.
- if (!Subtarget.hasBWI())
+ if (!Subtarget.useBWIRegs())
setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
@@ -1412,19 +1420,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
}
- for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
+ for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::SADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::SSUBSAT, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ }
+
+ for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::UADDSAT, VT, Custom);
- setOperationAction(ISD::SADDSAT, VT, Custom);
- setOperationAction(ISD::USUBSAT, VT, Custom);
- setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
@@ -1432,7 +1444,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Expand);
}
for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
@@ -1443,10 +1454,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// elements. 512-bits can be disabled based on prefer-vector-width and
// required-vector-width function attributes.
if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
+ bool HasBWI = Subtarget.hasBWI();
+
addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
+ addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
+ addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
@@ -1454,6 +1469,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
+ if (HasBWI)
+ setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
}
for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
@@ -1497,6 +1514,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
+ if (HasBWI)
+ setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
// With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
// to 512-bit rather than use the AVX2 instructions so that we can use
@@ -1509,19 +1528,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
- setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- // Need to custom widen this if we don't have AVX512BW.
- setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom);
+ if (HasBWI) {
+ // Extends from v64i1 masks to 512-bit vectors.
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
+ }
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
@@ -1535,47 +1561,69 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
- setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::FROUND, VT, Custom);
}
- // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
- for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v64i8}) {
+ for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
}
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
+ setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
+
+ setOperationAction(ISD::MUL, MVT::v8i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i32, Legal);
+ setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::MUL, MVT::v64i8, Custom);
+
+ setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
+ setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
- setOperationAction(ISD::MUL, MVT::v8i64, Custom);
- setOperationAction(ISD::MUL, MVT::v16i32, Legal);
+ setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
- setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
- setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
+ for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ // The condition codes aren't legal in SSE/AVX and under AVX512 we use
+ // setcc all the way to isel and prefer SETGT in some isel patterns.
+ setCondCodeAction(ISD::SETLT, VT, Custom);
+ setCondCodeAction(ISD::SETLE, VT, Custom);
+ }
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
+ }
- // The condition codes aren't legal in SSE/AVX and under AVX512 we use
- // setcc all the way to isel and prefer SETGT in some isel patterns.
- setCondCodeAction(ISD::SETLT, VT, Custom);
- setCondCodeAction(ISD::SETLE, VT, Custom);
+ for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
+ setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
}
if (Subtarget.hasDQI()) {
@@ -1610,36 +1658,42 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
MVT::v8f32, MVT::v4f64 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
+ for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
+ MVT::v16f32, MVT::v8f64 }) {
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ }
+
for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
- if (!Subtarget.hasBWI()) {
- // Need to custom split v32i16/v64i8 bitcasts.
- setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
- setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
-
- // Better to split these into two 256-bit ops.
- setOperationAction(ISD::BITREVERSE, MVT::v8i64, Custom);
- setOperationAction(ISD::BITREVERSE, MVT::v16i32, Custom);
+ if (HasBWI) {
+ for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
+ setOperationAction(ISD::MLOAD, VT, Legal);
+ setOperationAction(ISD::MSTORE, VT, Legal);
+ }
+ } else {
+ setOperationAction(ISD::STORE, MVT::v32i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v64i8, Custom);
}
if (Subtarget.hasVBMI2()) {
- for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
+ for (auto VT : { MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::FSHL, VT, Custom);
setOperationAction(ISD::FSHR, VT, Custom);
}
}
- }// has AVX-512
+ }// useAVX512Regs
// This block controls legalization for operations that don't have
// pre-AVX512 equivalents. Without VLX we use 512-bit operations for
@@ -1667,6 +1721,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
Subtarget.hasVLX() ? Legal : Custom);
+ if (Subtarget.hasDQI()) {
+ // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
+ // v2f32 UINT_TO_FP is already custom under SSE2.
+ assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
+ isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
+ "Unexpected operation action!");
+ // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
+ }
+
for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
@@ -1746,12 +1813,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
for (auto VT : { MVT::v16i1, MVT::v32i1 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
@@ -1759,93 +1824,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
- }
-
- // This block controls legalization for v32i16 and v64i8. 512-bits can be
- // disabled based on prefer-vector-width and required-vector-width function
- // attributes.
- if (!Subtarget.useSoftFloat() && Subtarget.useBWIRegs()) {
- addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
- addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
-
- // Extends from v64i1 masks to 512-bit vectors.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
-
- setOperationAction(ISD::MUL, MVT::v32i16, Legal);
- setOperationAction(ISD::MUL, MVT::v64i8, Custom);
- setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
- setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
- setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
- setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
- setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
-
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
- setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
-
- setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
-
- for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::ABS, VT, Legal);
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::MLOAD, VT, Legal);
- setOperationAction(ISD::MSTORE, VT, Legal);
- setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTLZ, VT, Custom);
- setOperationAction(ISD::SMAX, VT, Legal);
- setOperationAction(ISD::UMAX, VT, Legal);
- setOperationAction(ISD::SMIN, VT, Legal);
- setOperationAction(ISD::UMIN, VT, Legal);
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::UADDSAT, VT, Legal);
- setOperationAction(ISD::SADDSAT, VT, Legal);
- setOperationAction(ISD::USUBSAT, VT, Legal);
- setOperationAction(ISD::SSUBSAT, VT, Legal);
- setOperationAction(ISD::SELECT, VT, Custom);
-
- // The condition codes aren't legal in SSE/AVX and under AVX512 we use
- // setcc all the way to isel and prefer SETGT in some isel patterns.
- setCondCodeAction(ISD::SETLT, VT, Custom);
- setCondCodeAction(ISD::SETLE, VT, Custom);
- }
-
- for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
- setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
- }
-
- if (Subtarget.hasBITALG()) {
- for (auto VT : { MVT::v64i8, MVT::v32i16 })
- setOperationAction(ISD::CTPOP, VT, Legal);
- }
-
- if (Subtarget.hasVBMI2()) {
- setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
- setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
- }
- }
- if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
@@ -1874,19 +1853,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
- if (Subtarget.hasDQI()) {
- // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
- // v2f32 UINT_TO_FP is already custom under SSE2.
- assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&
- isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&
- "Unexpected operation action!");
- // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
- setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
- }
-
if (Subtarget.hasBWI()) {
setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
@@ -1983,6 +1949,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
+ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
@@ -2000,6 +1967,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FNEG);
setTargetDAGCombine(ISD::FMA);
+ setTargetDAGCombine(ISD::STRICT_FMA);
setTargetDAGCombine(ISD::FMINNUM);
setTargetDAGCombine(ISD::FMAXNUM);
setTargetDAGCombine(ISD::SUB);
@@ -2024,6 +1992,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::MSCATTER);
setTargetDAGCombine(ISD::MGATHER);
+ setTargetDAGCombine(ISD::FP16_TO_FP);
+ setTargetDAGCombine(ISD::FP_EXTEND);
+ setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
+ setTargetDAGCombine(ISD::FP_ROUND);
computeRegisterProperties(Subtarget.getRegisterInfo());
@@ -2075,7 +2047,8 @@ SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
TargetLoweringBase::LegalizeTypeAction
X86TargetLowering::getPreferredVectorAction(MVT VT) const {
- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
+ if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
+ !Subtarget.hasBWI())
return TypeSplitVector;
if (VT.getVectorNumElements() != 1 &&
@@ -2085,51 +2058,73 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+static std::pair<MVT, unsigned>
+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
+ const X86Subtarget &Subtarget) {
+ // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
+ // convention is one that uses k registers.
+ if (NumElts == 2)
+ return {MVT::v2i64, 1};
+ if (NumElts == 4)
+ return {MVT::v4i32, 1};
+ if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
+ CC != CallingConv::Intel_OCL_BI)
+ return {MVT::v8i16, 1};
+ if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
+ CC != CallingConv::Intel_OCL_BI)
+ return {MVT::v16i8, 1};
+ // v32i1 passes in ymm unless we have BWI and the calling convention is
+ // regcall.
+ if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
+ return {MVT::v32i8, 1};
+ // Split v64i1 vectors if we don't have v64i8 available.
+ if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
+ if (Subtarget.useAVX512Regs())
+ return {MVT::v64i8, 1};
+ return {MVT::v32i8, 2};
+ }
+
+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+ if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
+ NumElts > 64)
+ return {MVT::i8, NumElts};
+
+ return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
+}
+
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // v32i1 vectors should be promoted to v32i8 to match avx2.
- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
- return MVT::v32i8;
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
- Subtarget.hasAVX512() &&
- (!isPowerOf2_32(VT.getVectorNumElements()) ||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
- return MVT::i8;
- // Split v64i1 vectors if we don't have v64i8 available.
- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
- CC != CallingConv::X86_RegCall)
- return MVT::v32i1;
- // FIXME: Should we just make these types legal and custom split operations?
- if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
- Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
- return MVT::v16i32;
+ Subtarget.hasAVX512()) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ MVT RegisterVT;
+ unsigned NumRegisters;
+ std::tie(RegisterVT, NumRegisters) =
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return RegisterVT;
+ }
+
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // v32i1 vectors should be promoted to v32i8 to match avx2.
- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
- return 1;
- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
- Subtarget.hasAVX512() &&
- (!isPowerOf2_32(VT.getVectorNumElements()) ||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
- return VT.getVectorNumElements();
- // Split v64i1 vectors if we don't have v64i8 available.
- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
- CC != CallingConv::X86_RegCall)
- return 2;
- // FIXME: Should we just make these types legal and custom split operations?
- if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
- Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
- return 1;
+ Subtarget.hasAVX512()) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ MVT RegisterVT;
+ unsigned NumRegisters;
+ std::tie(RegisterVT, NumRegisters) =
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return NumRegisters;
+ }
+
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
@@ -2140,8 +2135,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
Subtarget.hasAVX512() &&
(!isPowerOf2_32(VT.getVectorNumElements()) ||
- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
+ (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
+ VT.getVectorNumElements() > 64)) {
RegisterVT = MVT::i8;
IntermediateVT = MVT::i1;
NumIntermediates = VT.getVectorNumElements();
@@ -2151,7 +2146,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
// Split v64i1 vectors if we don't have v64i8 available.
if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
CC != CallingConv::X86_RegCall) {
- RegisterVT = MVT::v32i1;
+ RegisterVT = MVT::v32i8;
IntermediateVT = MVT::v32i1;
NumIntermediates = 2;
return 2;
@@ -2194,20 +2189,20 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
/// Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
-static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
+static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
if (MaxAlign == 16)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- if (VTy->getBitWidth() == 128)
- MaxAlign = 16;
+ if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
+ MaxAlign = Align(16);
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- unsigned EltAlign = 0;
+ Align EltAlign;
getMaxByValAlign(ATy->getElementType(), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (auto *EltTy : STy->elements()) {
- unsigned EltAlign = 0;
+ Align EltAlign;
getMaxByValAlign(EltTy, EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
@@ -2225,46 +2220,34 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
if (Subtarget.is64Bit()) {
// Max of 8 and alignment of type.
- unsigned TyAlign = DL.getABITypeAlignment(Ty);
+ Align TyAlign = DL.getABITypeAlign(Ty);
if (TyAlign > 8)
- return TyAlign;
+ return TyAlign.value();
return 8;
}
- unsigned Align = 4;
+ Align Alignment(4);
if (Subtarget.hasSSE1())
- getMaxByValAlign(Ty, Align);
- return Align;
-}
-
-/// Returns the target specific optimal type for load
-/// and store operations as a result of memset, memcpy, and memmove
-/// lowering. If DstAlign is zero that means it's safe to destination
-/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
-/// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If 'IsMemset' is
-/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
-/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
-/// source is constant so it does not need to be loaded.
+ getMaxByValAlign(Ty, Alignment);
+ return Alignment.value();
+}
+
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
/// For vector ops we check that the overall size isn't larger than our
/// preferred vector width.
EVT X86TargetLowering::getOptimalMemOpType(
- uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes) const {
if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
- if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
- ((DstAlign == 0 || DstAlign >= 16) &&
- (SrcAlign == 0 || SrcAlign >= 16)))) {
+ if (Op.size() >= 16 &&
+ (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
// FIXME: Check if unaligned 64-byte accesses are slow.
- if (Size >= 64 && Subtarget.hasAVX512() &&
+ if (Op.size() >= 64 && Subtarget.hasAVX512() &&
(Subtarget.getPreferVectorWidth() >= 512)) {
return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
}
// FIXME: Check if unaligned 32-byte accesses are slow.
- if (Size >= 32 && Subtarget.hasAVX() &&
+ if (Op.size() >= 32 && Subtarget.hasAVX() &&
(Subtarget.getPreferVectorWidth() >= 256)) {
// Although this isn't a well-supported type for AVX1, we'll let
// legalization and shuffle lowering produce the optimal codegen. If we
@@ -2280,8 +2263,8 @@ EVT X86TargetLowering::getOptimalMemOpType(
if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
(Subtarget.getPreferVectorWidth() >= 128))
return MVT::v4f32;
- } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
- !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
+ } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
+ Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
// Also, do not use f64 to lower memset unless this is a memset of zeros.
@@ -2294,7 +2277,7 @@ EVT X86TargetLowering::getOptimalMemOpType(
// This is a compromise. If we reach here, unaligned accesses may be slow on
// this target. However, creating smaller, aligned accesses could be even
// slower and would certainly be a lot more code.
- if (Subtarget.is64Bit() && Size >= 8)
+ if (Subtarget.is64Bit() && Op.size() >= 8)
return MVT::i64;
return MVT::i32;
}
@@ -2611,7 +2594,7 @@ static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
/// Breaks v64i1 value into two registers and adds the new node to the DAG
static void Passv64i1ArgInRegs(
const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
- SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, CCValAssign &VA,
+ SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
CCValAssign &NextVA, const X86Subtarget &Subtarget) {
assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
assert(Subtarget.is32Bit() && "Expecting 32 bit target");
@@ -2656,14 +2639,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
- SDValue Flag;
- SmallVector<SDValue, 6> RetOps;
- RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
- // Operand #1 = Bytes To Pop
- RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
- MVT::i32));
-
- // Copy the result values into the output registers.
+ SmallVector<std::pair<Register, SDValue>, 4> RetVals;
for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
++I, ++OutsIndex) {
CCValAssign &VA = RVLocs[I];
@@ -2715,7 +2691,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// change the value to the FP stack register class.
if (isScalarFPTypeInSSEReg(VA.getValVT()))
ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
- RetOps.push_back(ValToCopy);
+ RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
// Don't emit a copytoreg.
continue;
}
@@ -2736,31 +2712,39 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
}
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
-
if (VA.needsCustom()) {
assert(VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs");
- Passv64i1ArgInRegs(dl, DAG, ValToCopy, RegsToPass, VA, RVLocs[++I],
+ Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
Subtarget);
- assert(2 == RegsToPass.size() &&
- "Expecting two registers after Pass64BitArgInRegs");
-
// Add the second register to the CalleeSaveDisableRegs list.
if (ShouldDisableCalleeSavedRegister)
MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
} else {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
+ RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
}
+ }
+
+ SDValue Flag;
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ // Operand #1 = Bytes To Pop
+ RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
+ MVT::i32));
- // Add nodes to the DAG and add the values into the RetOps list
- for (auto &Reg : RegsToPass) {
- Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
- Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
+ // Copy the result values into the output registers.
+ for (auto &RetVal : RetVals) {
+ if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
+ RetOps.push_back(RetVal.second);
+ continue; // Don't emit a copytoreg.
}
+
+ Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(
+ DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
}
// Swift calling convention does not require we copy the sret argument
@@ -2775,7 +2759,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
// false, then an sret argument may be implicitly inserted in the SelDAG. In
// either case FuncInfo->setSRetReturnReg() will have been called.
- if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
+ if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
// When we have both sret and another return value, we should use the
// original Chain stored in RetOps[0], instead of the current Chain updated
// in the above loop. If we only have sret, RetOps[0] equals to Chain.
@@ -2798,7 +2782,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
getPointerTy(MF.getDataLayout()));
- unsigned RetValReg
+ Register RetValReg
= (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
X86::RAX : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
@@ -2924,7 +2908,7 @@ static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
if (nullptr == InFlag) {
// When no physical register is present,
// create an intermediate virtual register.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
@@ -3133,10 +3117,10 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
SelectionDAG &DAG, const SDLoc &dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
- return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- /*isVolatile*/false, /*AlwaysInline=*/true,
- /*isTailCall*/false,
- MachinePointerInfo(), MachinePointerInfo());
+ return DAG.getMemcpy(
+ Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
+ /*isVolatile*/ false, /*AlwaysInline=*/true,
+ /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
}
/// Return true if the calling convention is one that we can guarantee TCO for.
@@ -3176,8 +3160,7 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
if (!CI->isTailCall())
return false;
- ImmutableCallSite CS(CI);
- CallingConv::ID CalleeCC = CS.getCallingConv();
+ CallingConv::ID CalleeCC = CI->getCallingConv();
if (!mayTailCallThisCC(CalleeCC))
return false;
@@ -3341,20 +3324,223 @@ static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
#ifndef NDEBUG
static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
- return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
- [](const CCValAssign &A, const CCValAssign &B) -> bool {
- return A.getValNo() < B.getValNo();
- });
+ return llvm::is_sorted(
+ ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
+ return A.getValNo() < B.getValNo();
+ });
}
#endif
+namespace {
+/// This is a helper class for lowering variable arguments parameters.
+class VarArgsLoweringHelper {
+public:
+ VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget,
+ CallingConv::ID CallConv, CCState &CCInfo)
+ : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
+ TheMachineFunction(DAG.getMachineFunction()),
+ TheFunction(TheMachineFunction.getFunction()),
+ FrameInfo(TheMachineFunction.getFrameInfo()),
+ FrameLowering(*Subtarget.getFrameLowering()),
+ TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
+ CCInfo(CCInfo) {}
+
+ // Lower variable arguments parameters.
+ void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
+
+private:
+ void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
+
+ void forwardMustTailParameters(SDValue &Chain);
+
+ bool is64Bit() { return Subtarget.is64Bit(); }
+ bool isWin64() { return Subtarget.isCallingConvWin64(CallConv); }
+
+ X86MachineFunctionInfo *FuncInfo;
+ const SDLoc &DL;
+ SelectionDAG &DAG;
+ const X86Subtarget &Subtarget;
+ MachineFunction &TheMachineFunction;
+ const Function &TheFunction;
+ MachineFrameInfo &FrameInfo;
+ const TargetFrameLowering &FrameLowering;
+ const TargetLowering &TargLowering;
+ CallingConv::ID CallConv;
+ CCState &CCInfo;
+};
+} // namespace
+
+void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
+ SDValue &Chain, unsigned StackSize) {
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start. We
+ // can skip this if there are no va_start calls.
+ if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall)) {
+ FuncInfo->setVarArgsFrameIndex(
+ FrameInfo.CreateFixedObject(1, StackSize, true));
+ }
+
+ // Figure out if XMM registers are in use.
+ assert(!(Subtarget.useSoftFloat() &&
+ TheFunction.hasFnAttribute(Attribute::NoImplicitFloat)) &&
+ "SSE register cannot be used when SSE is disabled!");
+
+ // 64-bit calling conventions support varargs and register parameters, so we
+ // have to do extra work to spill them in the prologue.
+ if (is64Bit()) {
+ // Find the first unallocated argument registers.
+ ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
+ ArrayRef<MCPhysReg> ArgXMMs =
+ get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
+
+ assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
+ "SSE register cannot be used when SSE is disabled!");
+
+ if (isWin64()) {
+ // Get to the caller-allocated home save location. Add 8 to account
+ // for the return address.
+ int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
+ FuncInfo->setRegSaveFrameIndex(
+ FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+ // Fixup to set vararg frame on shadow area (4 x i64).
+ if (NumIntRegs < 4)
+ FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+ } else {
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so
+ // they may be loaded by dereferencing the result of va_next.
+ FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+ FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
+ FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
+ ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
+ }
+
+ SmallVector<SDValue, 6>
+ LiveGPRs; // list of SDValue for GPR registers keeping live input value
+ SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
+ // keeping live input value
+ SDValue ALVal; // if applicable keeps SDValue for %al register
+
+ // Gather all the live in physical registers.
+ for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
+ Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
+ LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
+ }
+ const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
+ if (!AvailableXmms.empty()) {
+ Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
+ ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
+ for (MCPhysReg Reg : AvailableXmms) {
+ Register XMMReg = TheMachineFunction.addLiveIn(Reg, &X86::VR128RegClass);
+ LiveXMMRegs.push_back(
+ DAG.getCopyFromReg(Chain, DL, XMMReg, MVT::v4f32));
+ }
+ }
+
+ // Store the integer parameter registers.
+ SmallVector<SDValue, 8> MemOps;
+ SDValue RSFIN =
+ DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+ TargLowering.getPointerTy(DAG.getDataLayout()));
+ unsigned Offset = FuncInfo->getVarArgsGPOffset();
+ for (SDValue Val : LiveGPRs) {
+ SDValue FIN = DAG.getNode(ISD::ADD, DL,
+ TargLowering.getPointerTy(DAG.getDataLayout()),
+ RSFIN, DAG.getIntPtrConstant(Offset, DL));
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(),
+ FuncInfo->getRegSaveFrameIndex(), Offset));
+ MemOps.push_back(Store);
+ Offset += 8;
+ }
+
+ // Now store the XMM (fp + vector) parameter registers.
+ if (!LiveXMMRegs.empty()) {
+ SmallVector<SDValue, 12> SaveXMMOps;
+ SaveXMMOps.push_back(Chain);
+ SaveXMMOps.push_back(ALVal);
+ SaveXMMOps.push_back(
+ DAG.getIntPtrConstant(FuncInfo->getRegSaveFrameIndex(), DL));
+ SaveXMMOps.push_back(
+ DAG.getIntPtrConstant(FuncInfo->getVarArgsFPOffset(), DL));
+ SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
+ LiveXMMRegs.end());
+ MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
+ MVT::Other, SaveXMMOps));
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
+ }
+}
+
+void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
+ // Find the largest legal vector type.
+ MVT VecVT = MVT::Other;
+ // FIXME: Only some x86_32 calling conventions support AVX512.
+ if (Subtarget.useAVX512Regs() &&
+ (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
+ CallConv == CallingConv::Intel_OCL_BI)))
+ VecVT = MVT::v16f32;
+ else if (Subtarget.hasAVX())
+ VecVT = MVT::v8f32;
+ else if (Subtarget.hasSSE2())
+ VecVT = MVT::v4f32;
+
+ // We forward some GPRs and some vector types.
+ SmallVector<MVT, 2> RegParmTypes;
+ MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
+ RegParmTypes.push_back(IntVT);
+ if (VecVT != MVT::Other)
+ RegParmTypes.push_back(VecVT);
+
+ // Compute the set of forwarded registers. The rest are scratch.
+ SmallVectorImpl<ForwardedRegister> &Forwards =
+ FuncInfo->getForwardedMustTailRegParms();
+ CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
+
+ // Forward AL for SysV x86_64 targets, since it is used for varargs.
+ if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
+ Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
+ Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
+ }
+
+ // Copy all forwards from physical to virtual registers.
+ for (ForwardedRegister &FR : Forwards) {
+ // FIXME: Can we use a less constrained schedule?
+ SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
+ FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
+ TargLowering.getRegClassFor(FR.VT));
+ Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
+ }
+}
+
+void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
+ unsigned StackSize) {
+ // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
+ // If necessary, it would be set into the correct value later.
+ FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
+ FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
+
+ if (FrameInfo.hasVAStart())
+ createVarArgAreaAndStoreRegisters(Chain, StackSize);
+
+ if (FrameInfo.hasMustTailInVarArgFunc())
+ forwardMustTailParameters(Chain);
+}
+
SDValue X86TargetLowering::LowerFormalArguments(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
const Function &F = MF.getFunction();
if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
@@ -3366,16 +3552,16 @@ SDValue X86TargetLowering::LowerFormalArguments(
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
assert(
- !(isVarArg && canGuaranteeTCO(CallConv)) &&
+ !(IsVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64.
if (IsWin64)
- CCInfo.AllocateStack(32, 8);
+ CCInfo.AllocateStack(32, Align(8));
CCInfo.AnalyzeArguments(Ins, CC_X86);
@@ -3446,7 +3632,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
else
llvm_unreachable("Unknown argument type!");
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
}
@@ -3500,7 +3686,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
// the argument into a virtual register so that we can access it from the
// return points.
if (Ins[I].Flags.isSRet()) {
- unsigned Reg = FuncInfo->getSRetReturnReg();
+ Register Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
MVT PtrTy = getPointerTy(DAG.getDataLayout());
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
@@ -3518,147 +3704,12 @@ SDValue X86TargetLowering::LowerFormalArguments(
MF.getTarget().Options.GuaranteedTailCallOpt))
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
- // If the function takes variable number of arguments, make a frame index for
- // the start of the first vararg value... for expansion of llvm.va_start. We
- // can skip this if there are no va_start calls.
- if (MFI.hasVAStart() &&
- (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
- CallConv != CallingConv::X86_ThisCall))) {
- FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
- }
-
- // Figure out if XMM registers are in use.
- assert(!(Subtarget.useSoftFloat() &&
- F.hasFnAttribute(Attribute::NoImplicitFloat)) &&
- "SSE register cannot be used when SSE is disabled!");
-
- // 64-bit calling conventions support varargs and register parameters, so we
- // have to do extra work to spill them in the prologue.
- if (Is64Bit && isVarArg && MFI.hasVAStart()) {
- // Find the first unallocated argument registers.
- ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
- ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
- unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
- unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
- assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
- "SSE register cannot be used when SSE is disabled!");
-
- // Gather all the live in physical registers.
- SmallVector<SDValue, 6> LiveGPRs;
- SmallVector<SDValue, 8> LiveXMMRegs;
- SDValue ALVal;
- for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
- unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
- LiveGPRs.push_back(
- DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
- }
- if (!ArgXMMs.empty()) {
- unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
- ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
- for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
- unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
- LiveXMMRegs.push_back(
- DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
- }
- }
-
- if (IsWin64) {
- // Get to the caller-allocated home save location. Add 8 to account
- // for the return address.
- int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
- FuncInfo->setRegSaveFrameIndex(
- MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
- // Fixup to set vararg frame on shadow area (4 x i64).
- if (NumIntRegs < 4)
- FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
- } else {
- // For X86-64, if there are vararg parameters that are passed via
- // registers, then we must store them to their spots on the stack so
- // they may be loaded by dereferencing the result of va_next.
- FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
- FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
- FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
- ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
- }
-
- // Store the integer parameter registers.
- SmallVector<SDValue, 8> MemOps;
- SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
- getPointerTy(DAG.getDataLayout()));
- unsigned Offset = FuncInfo->getVarArgsGPOffset();
- for (SDValue Val : LiveGPRs) {
- SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
- RSFIN, DAG.getIntPtrConstant(Offset, dl));
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(),
- FuncInfo->getRegSaveFrameIndex(), Offset));
- MemOps.push_back(Store);
- Offset += 8;
- }
-
- if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
- // Now store the XMM (fp + vector) parameter registers.
- SmallVector<SDValue, 12> SaveXMMOps;
- SaveXMMOps.push_back(Chain);
- SaveXMMOps.push_back(ALVal);
- SaveXMMOps.push_back(DAG.getIntPtrConstant(
- FuncInfo->getRegSaveFrameIndex(), dl));
- SaveXMMOps.push_back(DAG.getIntPtrConstant(
- FuncInfo->getVarArgsFPOffset(), dl));
- SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
- LiveXMMRegs.end());
- MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
- MVT::Other, SaveXMMOps));
- }
-
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
- }
-
- if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
- // Find the largest legal vector type.
- MVT VecVT = MVT::Other;
- // FIXME: Only some x86_32 calling conventions support AVX512.
- if (Subtarget.useAVX512Regs() &&
- (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
- CallConv == CallingConv::Intel_OCL_BI)))
- VecVT = MVT::v16f32;
- else if (Subtarget.hasAVX())
- VecVT = MVT::v8f32;
- else if (Subtarget.hasSSE2())
- VecVT = MVT::v4f32;
-
- // We forward some GPRs and some vector types.
- SmallVector<MVT, 2> RegParmTypes;
- MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
- RegParmTypes.push_back(IntVT);
- if (VecVT != MVT::Other)
- RegParmTypes.push_back(VecVT);
-
- // Compute the set of forwarded registers. The rest are scratch.
- SmallVectorImpl<ForwardedRegister> &Forwards =
- FuncInfo->getForwardedMustTailRegParms();
- CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
-
- // Forward AL for SysV x86_64 targets, since it is used for varargs.
- if (Is64Bit && !IsWin64 && !CCInfo.isAllocated(X86::AL)) {
- unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
- Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
- }
-
- // Copy all forwards from physical to virtual registers.
- for (ForwardedRegister &FR : Forwards) {
- // FIXME: Can we use a less constrained schedule?
- SDValue RegVal = DAG.getCopyFromReg(Chain, dl, FR.VReg, FR.VT);
- FR.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(FR.VT));
- Chain = DAG.getCopyToReg(Chain, dl, FR.VReg, RegVal);
- }
- }
+ if (IsVarArg)
+ VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
+ .lowerVarArgsParameters(Chain, StackSize);
// Some CCs need callee pop.
- if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+ if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
@@ -3677,10 +3728,6 @@ SDValue X86TargetLowering::LowerFormalArguments(
if (!Is64Bit) {
// RegSaveFrameIndex is X86-64 only.
FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
- if (CallConv == CallingConv::X86_FastCall ||
- CallConv == CallingConv::X86_ThisCall)
- // fastcc functions can't have varargs.
- FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
}
FuncInfo->setArgumentStackSize(StackSize);
@@ -3697,7 +3744,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
// same, so the size of funclets' (mostly empty) frames is dictated by
// how far this slot is from the bottom (since they allocate just enough
// space to accommodate holding this slot at the correct offset).
- int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
+ int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSS=*/false);
EHInfo->PSPSymFrameIdx = PSPSymFI;
}
}
@@ -3705,7 +3752,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
if (CallConv == CallingConv::X86_RegCall ||
F.hasFnAttribute("no_caller_saved_registers")) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
+ for (std::pair<Register, Register> Pair : MRI.liveins())
MRI.disableCalleeSavedRegister(Pair.first);
}
@@ -3716,12 +3763,13 @@ SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
SDValue Arg, const SDLoc &dl,
SelectionDAG &DAG,
const CCValAssign &VA,
- ISD::ArgFlagsTy Flags) const {
+ ISD::ArgFlagsTy Flags,
+ bool isByVal) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
StackPtr, PtrOff);
- if (Flags.isByVal())
+ if (isByVal)
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
return DAG.getStore(
@@ -3796,18 +3844,17 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
CallConv == CallingConv::Tail;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
- const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
+ const auto *CI = dyn_cast_or_null<CallInst>(CLI.CB);
const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
(Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
- const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CS.getInstruction());
+ const auto *II = dyn_cast_or_null<InvokeInst>(CLI.CB);
bool HasNoCfCheck =
(CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck());
const Module *M = MF.getMMI().getModule();
Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
MachineFunction::CallSiteInfo CSInfo;
-
if (CallConv == CallingConv::X86_INTR)
report_fatal_error("X86 interrupts may not be called directly");
@@ -3823,7 +3870,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = false;
}
- bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
+ bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
if (IsMustTail) {
// Force this to be a tail call. The verifier rules are enough to ensure
// that we can lower this successfully without moving the return address
@@ -3854,7 +3901,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Allocate shadow area for Win64.
if (IsWin64)
- CCInfo.AllocateStack(32, 8);
+ CCInfo.AllocateStack(32, Align(8));
CCInfo.AnalyzeArguments(Outs, CC_X86);
@@ -3900,6 +3947,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (ArgLocs.back().getLocMemOffset() != 0)
report_fatal_error("any parameter with the inalloca attribute must be "
"the only memory argument");
+ } else if (CLI.IsPreallocated) {
+ assert(ArgLocs.back().isMemLoc() &&
+ "cannot use preallocated attribute on a register "
+ "parameter");
+ SmallVector<size_t, 4> PreallocatedOffsets;
+ for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
+ if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
+ PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
+ }
+ }
+ auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
+ size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
+ MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
+ MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
+ NumBytesToPush = 0;
}
if (!IsSibcall && !IsMustTail)
@@ -3912,7 +3974,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
Is64Bit, FPDiff, dl);
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
@@ -3927,9 +3989,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
++I, ++OutIndex) {
assert(OutIndex < Outs.size() && "Invalid Out index");
- // Skip inalloca arguments, they have already been written.
+ // Skip inalloca/preallocated arguments, they have already been written.
ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
- if (Flags.isInAlloca())
+ if (Flags.isInAlloca() || Flags.isPreallocated())
continue;
CCValAssign &VA = ArgLocs[I];
@@ -3968,8 +4030,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// the caller from seeing any modifications the callee may make
// as guaranteed by the `byval` attribute.
int FrameIdx = MF.getFrameInfo().CreateStackObject(
- Flags.getByValSize(), std::max(16, (int)Flags.getByValAlign()),
- false);
+ Flags.getByValSize(),
+ std::max(Align(16), Flags.getNonZeroByValAlign()), false);
SDValue StackSlot =
DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
Chain =
@@ -3998,12 +4060,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.EnableDebugEntryValues)
+ if (Options.EmitCallSiteInfo)
CSInfo.emplace_back(VA.getLocReg(), I);
if (isVarArg && IsWin64) {
// Win64 ABI requires argument XMM reg to be copied to the corresponding
// shadow reg if callee is a varargs function.
- unsigned ShadowReg = 0;
+ Register ShadowReg;
switch (VA.getLocReg()) {
case X86::XMM0: ShadowReg = X86::RCX; break;
case X86::XMM1: ShadowReg = X86::RDX; break;
@@ -4019,7 +4081,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
getPointerTy(DAG.getDataLayout()));
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
- dl, DAG, VA, Flags));
+ dl, DAG, VA, Flags, isByVal));
}
}
@@ -4031,7 +4093,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// GOT pointer.
if (!isTailCall) {
RegsToPass.push_back(std::make_pair(
- unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
+ Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
getPointerTy(DAG.getDataLayout()))));
} else {
// If we are tail calling and generating PIC/GOT style code load the
@@ -4069,8 +4131,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
assert((Subtarget.hasSSE1() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
-
- RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
+ RegsToPass.push_back(std::make_pair(Register(X86::AL),
DAG.getConstant(NumXMMRegs, dl,
MVT::i8)));
}
@@ -4079,7 +4140,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const auto &Forwards = X86Info->getForwardedMustTailRegParms();
for (const auto &F : Forwards) {
SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
- RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
+ RegsToPass.push_back(std::make_pair(F.PReg, Val));
}
}
@@ -4117,8 +4178,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(VA.isMemLoc());
SDValue Arg = OutVals[OutsIndex];
ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
- // Skip inalloca arguments. They don't require any work.
- if (Flags.isInAlloca())
+ // Skip inalloca/preallocated arguments. They don't require any work.
+ if (Flags.isInAlloca() || Flags.isPreallocated())
continue;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
@@ -4219,7 +4280,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// is thrown, the runtime will not restore CSRs.
// FIXME: Model this more precisely so that we can register allocate across
// the normal edge and spill and fill across the exceptional edge.
- if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
+ if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
const Function &CallerFn = MF.getFunction();
EHPersonality Pers =
CallerFn.hasPersonalityFn()
@@ -4278,11 +4339,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
}
InFlag = Chain.getValue(1);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
// Save heapallocsite metadata.
- if (CLI.CS)
- if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite"))
+ if (CLI.CB)
+ if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
// Create the CALLSEQ_END node.
@@ -4301,12 +4363,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else
NumBytesForCalleeToPop = 0; // Callee pops nothing.
- if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
- // No need to reset the stack after the call if the call doesn't return. To
- // make the MI verify, we'll pretend the callee does it for us.
- NumBytesForCalleeToPop = NumBytes;
- }
-
// Returns a flag for retval copy to use.
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain,
@@ -4337,7 +4393,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// (within module) calls are supported at the moment.
// To keep the stack aligned according to platform abi the function
// GetAlignedArgumentStackSize ensures that argument delta is always multiples
-// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
+// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
// If a tail called function callee has more arguments than the caller the
// caller needs to make sure that there is room to move the RETADDR to. This is
// achieved by reserving an area the size of the argument delta right after the
@@ -4359,7 +4415,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned
X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
SelectionDAG &DAG) const {
- const Align StackAlignment(Subtarget.getFrameLowering()->getStackAlignment());
+ const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
assert(StackSize % SlotSize == 0 &&
"StackSize must be a multiple of SlotSize");
@@ -4395,7 +4451,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
- unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!Register::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
@@ -4578,7 +4634,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
// Allocate shadow area for Win64
if (IsCalleeWin64)
- CCInfo.AllocateStack(32, 8);
+ CCInfo.AllocateStack(32, Align(8));
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
StackArgsSize = CCInfo.getNextStackOffset();
@@ -4693,6 +4749,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::INSERTPS:
case X86ISD::EXTRQI:
case X86ISD::INSERTQI:
+ case X86ISD::VALIGN:
case X86ISD::PALIGNR:
case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ:
@@ -4739,6 +4796,13 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
}
}
+static bool isTargetShuffleSplat(SDValue Op) {
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::EXTRACT_SUBVECTOR)
+ return isTargetShuffleSplat(Op.getOperand(0));
+ return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD;
+}
+
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -4972,7 +5036,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
ScalarVT = MVT::i32;
Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
- Info.align = Align::None();
+ Info.align = Align(1);
Info.flags |= MachineMemOperand::MOStore;
break;
}
@@ -4985,7 +5049,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
unsigned NumElts = std::min(DataVT.getVectorNumElements(),
IndexVT.getVectorNumElements());
Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
- Info.align = Align::None();
+ Info.align = Align(1);
Info.flags |= MachineMemOperand::MOLoad;
break;
}
@@ -4997,7 +5061,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
unsigned NumElts = std::min(DataVT.getVectorNumElements(),
IndexVT.getVectorNumElements());
Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
- Info.align = Align::None();
+ Info.align = Align(1);
Info.flags |= MachineMemOperand::MOStore;
break;
}
@@ -5146,7 +5210,8 @@ bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
}
-bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
+bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
+ bool) const {
// TODO: Allow vectors?
if (VT.isVector())
return false;
@@ -5374,6 +5439,19 @@ static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
}
+/// Return true if the value of any element in Mask is the zero sentinel value.
+static bool isAnyZero(ArrayRef<int> Mask) {
+ return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
+}
+
+/// Return true if the value of any element in Mask is the zero or undef
+/// sentinel values.
+static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
+ return llvm::any_of(Mask, [](int M) {
+ return M == SM_SentinelZero || M == SM_SentinelUndef;
+ });
+}
+
/// Return true if Val is undef or if its value falls within the
/// specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
@@ -5511,6 +5589,36 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask) {
return canWidenShuffleElements(Mask, WidenedMask);
}
+// Attempt to narrow/widen shuffle mask until it matches the target number of
+// elements.
+static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts,
+ SmallVectorImpl<int> &ScaledMask) {
+ unsigned NumSrcElts = Mask.size();
+ assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
+ "Illegal shuffle scale factor");
+
+ // Narrowing is guaranteed to work.
+ if (NumDstElts >= NumSrcElts) {
+ int Scale = NumDstElts / NumSrcElts;
+ llvm::narrowShuffleMaskElts(Scale, Mask, ScaledMask);
+ return true;
+ }
+
+ // We have to repeat the widening until we reach the target size, but we can
+ // split out the first widening as it sets up ScaledMask for us.
+ if (canWidenShuffleElements(Mask, ScaledMask)) {
+ while (ScaledMask.size() > NumDstElts) {
+ SmallVector<int, 16> WidenedMask;
+ if (!canWidenShuffleElements(ScaledMask, WidenedMask))
+ return false;
+ ScaledMask = std::move(WidenedMask);
+ }
+ return true;
+ }
+
+ return false;
+}
+
/// Returns true if Elt is a constant zero or a floating point constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
return isNullConstant(Elt) || isNullFPConstant(Elt);
@@ -5725,7 +5833,7 @@ static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
}
-// Helper function to collect subvector ops that are concated together,
+// Helper function to collect subvector ops that are concatenated together,
// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
// The subvectors in Ops are guaranteed to be the same type.
static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
@@ -5736,8 +5844,7 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
return true;
}
- if (N->getOpcode() == ISD::INSERT_SUBVECTOR &&
- isa<ConstantSDNode>(N->getOperand(2))) {
+ if (N->getOpcode() == ISD::INSERT_SUBVECTOR) {
SDValue Src = N->getOperand(0);
SDValue Sub = N->getOperand(1);
const APInt &Idx = N->getConstantOperandAPInt(2);
@@ -5746,19 +5853,93 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
// TODO - Handle more general insert_subvector chains.
if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
- Idx == (VT.getVectorNumElements() / 2) &&
- Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
- Src.getOperand(1).getValueType() == SubVT &&
- isNullConstant(Src.getOperand(2))) {
- Ops.push_back(Src.getOperand(1));
- Ops.push_back(Sub);
- return true;
+ Idx == (VT.getVectorNumElements() / 2)) {
+ // insert_subvector(insert_subvector(undef, x, lo), y, hi)
+ if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ Src.getOperand(1).getValueType() == SubVT &&
+ isNullConstant(Src.getOperand(2))) {
+ Ops.push_back(Src.getOperand(1));
+ Ops.push_back(Sub);
+ return true;
+ }
+ // insert_subvector(x, extract_subvector(x, lo), hi)
+ if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) {
+ Ops.append(2, Sub);
+ return true;
+ }
}
}
return false;
}
+static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG,
+ const SDLoc &dl) {
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned SizeInBits = VT.getSizeInBits();
+ assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 &&
+ "Can't split odd sized vector");
+
+ SDValue Lo = extractSubVector(Op, 0, DAG, dl, SizeInBits / 2);
+ SDValue Hi = extractSubVector(Op, NumElems / 2, DAG, dl, SizeInBits / 2);
+ return std::make_pair(Lo, Hi);
+}
+
+// Split an unary integer op into 2 half sized ops.
+static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ // Make sure we only try to split 256/512-bit types to avoid creating
+ // narrow vectors.
+ assert((Op.getOperand(0).getValueType().is256BitVector() ||
+ Op.getOperand(0).getValueType().is512BitVector()) &&
+ (VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!");
+ assert(Op.getOperand(0).getValueType().getVectorNumElements() ==
+ VT.getVectorNumElements() &&
+ "Unexpected VTs!");
+
+ SDLoc dl(Op);
+
+ // Extract the Lo/Hi vectors
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = splitVector(Op.getOperand(0), DAG, dl);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, LoVT, Lo),
+ DAG.getNode(Op.getOpcode(), dl, HiVT, Hi));
+}
+
+/// Break a binary integer operation into 2 half sized ops and then
+/// concatenate the result back.
+static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ // Sanity check that all the types match.
+ assert(Op.getOperand(0).getValueType() == VT &&
+ Op.getOperand(1).getValueType() == VT && "Unexpected VTs!");
+ assert((VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!");
+
+ SDLoc dl(Op);
+
+ // Extract the LHS Lo/Hi vectors
+ SDValue LHS1, LHS2;
+ std::tie(LHS1, LHS2) = splitVector(Op.getOperand(0), DAG, dl);
+
+ // Extract the RHS Lo/Hi vectors
+ SDValue RHS1, RHS2;
+ std::tie(RHS1, RHS2) = splitVector(Op.getOperand(1), DAG, dl);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, LoVT, LHS1, RHS1),
+ DAG.getNode(Op.getOpcode(), dl, HiVT, LHS2, RHS2));
+}
+
// Helper for splitting operands of an operation to legal target size and
// apply a function on each part.
// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
@@ -5815,21 +5996,17 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
SDValue Vec = Op.getOperand(0);
SDValue SubVec = Op.getOperand(1);
SDValue Idx = Op.getOperand(2);
-
- if (!isa<ConstantSDNode>(Idx))
- return SDValue();
+ unsigned IdxVal = Op.getConstantOperandVal(2);
// Inserting undef is a nop. We can just return the original vector.
if (SubVec.isUndef())
return Vec;
- unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
return Op;
MVT OpVT = Op.getSimpleValueType();
unsigned NumElems = OpVT.getVectorNumElements();
-
SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
// Extend to natively supported kshift.
@@ -5849,7 +6026,6 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
MVT SubVecVT = SubVec.getSimpleValueType();
unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
-
assert(IdxVal + SubVecNumElems <= NumElems &&
IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR");
@@ -5900,7 +6076,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
if (SubVecNumElems * 2 == NumElems) {
// Special case, use legal zero extending insert_subvector. This allows
- // isel to opimitize when bits are known zero.
+ // isel to optimize when bits are known zero.
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
DAG.getConstant(0, dl, WideOpVT),
@@ -6042,8 +6218,8 @@ static SDValue getExtendInVec(unsigned Opcode, const SDLoc &DL, EVT VT,
// Match (xor X, -1) -> X.
// Match extract_subvector(xor X, -1) -> extract_subvector(X).
// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y).
-static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
- V = peekThroughBitcasts(V);
+static SDValue IsNOT(SDValue V, SelectionDAG &DAG, bool OneUse = false) {
+ V = OneUse ? peekThroughOneUseBitcasts(V) : peekThroughBitcasts(V);
if (V.getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()))
return V.getOperand(0);
@@ -6067,6 +6243,35 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
return SDValue();
}
+void llvm::createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
+ bool Lo, bool Unary) {
+ assert(Mask.empty() && "Expected an empty shuffle mask vector");
+ int NumElts = VT.getVectorNumElements();
+ int NumEltsInLane = 128 / VT.getScalarSizeInBits();
+ for (int i = 0; i < NumElts; ++i) {
+ unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
+ int Pos = (i % NumEltsInLane) / 2 + LaneStart;
+ Pos += (Unary ? 0 : NumElts * (i % 2));
+ Pos += (Lo ? 0 : NumEltsInLane / 2);
+ Mask.push_back(Pos);
+ }
+}
+
+/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
+/// imposed by AVX and specific to the unary pattern. Example:
+/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
+/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
+void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
+ bool Lo) {
+ assert(Mask.empty() && "Expected an empty shuffle mask vector");
+ int NumElts = VT.getVectorNumElements();
+ for (int i = 0; i < NumElts; ++i) {
+ int Pos = i / 2;
+ Pos += (Lo ? 0 : NumElts / 2);
+ Mask.push_back(Pos);
+ }
+}
+
/// Returns a vector_shuffle node for an unpackl operation.
static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
SDValue V1, SDValue V2) {
@@ -6102,14 +6307,10 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
}
-static const Constant *getTargetConstantFromNode(LoadSDNode *Load) {
- if (!Load || !ISD::isNormalLoad(Load))
- return nullptr;
-
- SDValue Ptr = Load->getBasePtr();
- if (Ptr->getOpcode() == X86ISD::Wrapper ||
- Ptr->getOpcode() == X86ISD::WrapperRIP)
- Ptr = Ptr->getOperand(0);
+static const Constant *getTargetConstantFromBasePtr(SDValue Ptr) {
+ if (Ptr.getOpcode() == X86ISD::Wrapper ||
+ Ptr.getOpcode() == X86ISD::WrapperRIP)
+ Ptr = Ptr.getOperand(0);
auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)
@@ -6118,6 +6319,12 @@ static const Constant *getTargetConstantFromNode(LoadSDNode *Load) {
return CNode->getConstVal();
}
+static const Constant *getTargetConstantFromNode(LoadSDNode *Load) {
+ if (!Load || !ISD::isNormalLoad(Load))
+ return nullptr;
+ return getTargetConstantFromBasePtr(Load->getBasePtr());
+}
+
static const Constant *getTargetConstantFromNode(SDValue Op) {
Op = peekThroughBitcasts(Op);
return getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op));
@@ -6298,23 +6505,6 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
}
// Extract constant bits from a broadcasted constant pool scalar.
- if (Op.getOpcode() == X86ISD::VBROADCAST &&
- EltSizeInBits <= VT.getScalarSizeInBits()) {
- if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
- unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
- unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
-
- APInt UndefSrcElts(NumSrcElts, 0);
- SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
- if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
- if (UndefSrcElts[0])
- UndefSrcElts.setBits(0, NumSrcElts);
- SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
- return CastBitData(UndefSrcElts, SrcEltBits);
- }
- }
- }
-
if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD &&
EltSizeInBits <= VT.getScalarSizeInBits()) {
auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
@@ -6322,16 +6512,7 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
return false;
SDValue Ptr = MemIntr->getBasePtr();
- if (Ptr->getOpcode() == X86ISD::Wrapper ||
- Ptr->getOpcode() == X86ISD::WrapperRIP)
- Ptr = Ptr->getOperand(0);
-
- auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
- if (!CNode || CNode->isMachineConstantPoolEntry() ||
- CNode->getOffset() != 0)
- return false;
-
- if (const Constant *C = CNode->getConstVal()) {
+ if (const Constant *C = getTargetConstantFromBasePtr(Ptr)) {
unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits();
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
@@ -6375,8 +6556,7 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
}
// Insert constant bits from a base and sub vector sources.
- if (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
- isa<ConstantSDNode>(Op.getOperand(2))) {
+ if (Op.getOpcode() == ISD::INSERT_SUBVECTOR) {
// TODO - support insert_subvector through bitcasts.
if (EltSizeInBits != VT.getScalarSizeInBits())
return false;
@@ -6398,8 +6578,7 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
}
// Extract constant bits from a subvector's source.
- if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- isa<ConstantSDNode>(Op.getOperand(1))) {
+ if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
// TODO - support extract_subvector through bitcasts.
if (EltSizeInBits != VT.getScalarSizeInBits())
return false;
@@ -6468,11 +6647,12 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
namespace llvm {
namespace X86 {
-bool isConstantSplat(SDValue Op, APInt &SplatVal) {
+bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs) {
APInt UndefElts;
SmallVector<APInt, 16> EltBits;
if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(),
- UndefElts, EltBits, true, false)) {
+ UndefElts, EltBits, true,
+ AllowPartialUndefs)) {
int SplatIndex = -1;
for (int i = 0, e = EltBits.size(); i != e; ++i) {
if (UndefElts[i])
@@ -6513,20 +6693,26 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
}
/// Create a shuffle mask that matches the PACKSS/PACKUS truncation.
+/// A multi-stage pack shuffle mask is created by specifying NumStages > 1.
/// Note: This ignores saturation, so inputs must be checked first.
static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
- bool Unary) {
+ bool Unary, unsigned NumStages = 1) {
assert(Mask.empty() && "Expected an empty shuffle mask vector");
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits() / 128;
unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits();
unsigned Offset = Unary ? 0 : NumElts;
+ unsigned Repetitions = 1u << (NumStages - 1);
+ unsigned Increment = 1u << NumStages;
+ assert((NumEltsPerLane >> NumStages) > 0 && "Illegal packing compaction");
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
- Mask.push_back(Elt + (Lane * NumEltsPerLane));
- for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
- Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset);
+ for (unsigned Stage = 0; Stage != Repetitions; ++Stage) {
+ for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment)
+ Mask.push_back(Elt + (Lane * NumEltsPerLane));
+ for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment)
+ Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset);
+ }
}
}
@@ -6597,7 +6783,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
unsigned MaskEltSize = VT.getScalarSizeInBits();
SmallVector<uint64_t, 32> RawMask;
APInt RawUndefs;
- SDValue ImmN;
+ uint64_t ImmN;
assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector");
assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector");
@@ -6608,23 +6794,22 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::BLENDI:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- DecodeBLENDMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodeBLENDMask(NumElems, ImmN, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::SHUFP:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- DecodeSHUFPMask(NumElems, MaskEltSize,
- cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodeSHUFPMask(NumElems, MaskEltSize, ImmN, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::INSERTPS:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodeINSERTPSMask(ImmN, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::EXTRQI:
@@ -6672,13 +6857,23 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
DecodeMOVLHPSMask(NumElems, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
+ case X86ISD::VALIGN:
+ assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) &&
+ "Only 32-bit and 64-bit elements are supported!");
+ assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
+ assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodeVALIGNMask(NumElems, ImmN, Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(N->getOperand(0));
+ break;
case X86ISD::PALIGNR:
assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- DecodePALIGNRMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodePALIGNRMask(NumElems, ImmN, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
Ops.push_back(N->getOperand(1));
Ops.push_back(N->getOperand(0));
@@ -6686,39 +6881,34 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::VSHLDQ:
assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- DecodePSLLDQMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodePSLLDQMask(NumElems, ImmN, Mask);
IsUnary = true;
break;
case X86ISD::VSRLDQ:
assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- DecodePSRLDQMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodePSRLDQMask(NumElems, ImmN, Mask);
IsUnary = true;
break;
case X86ISD::PSHUFD:
case X86ISD::VPERMILPI:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- DecodePSHUFMask(NumElems, MaskEltSize,
- cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodePSHUFMask(NumElems, MaskEltSize, ImmN, Mask);
IsUnary = true;
break;
case X86ISD::PSHUFHW:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- DecodePSHUFHWMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodePSHUFHWMask(NumElems, ImmN, Mask);
IsUnary = true;
break;
case X86ISD::PSHUFLW:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- DecodePSHUFLWMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodePSHUFLWMask(NumElems, ImmN, Mask);
IsUnary = true;
break;
case X86ISD::VZEXT_MOVL:
@@ -6770,8 +6960,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
}
case X86ISD::VPERMI:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- DecodeVPERMMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodeVPERMMask(NumElems, ImmN, Mask);
IsUnary = true;
break;
case X86ISD::MOVSS:
@@ -6783,17 +6973,15 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::VPERM2X128:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- DecodeVPERM2X128Mask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ DecodeVPERM2X128Mask(NumElems, ImmN, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::SHUF128:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
- ImmN = N->getOperand(N->getNumOperands() - 1);
- decodeVSHUF64x2FamilyMask(NumElems, MaskEltSize,
- cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ ImmN = N->getConstantOperandVal(N->getNumOperands() - 1);
+ decodeVSHUF64x2FamilyMask(NumElems, MaskEltSize, ImmN, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVSLDUP:
@@ -6875,9 +7063,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
return false;
// Check if we're getting a shuffle mask with zero'd elements.
- if (!AllowSentinelZero)
- if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
- return false;
+ if (!AllowSentinelZero && isAnyZero(Mask))
+ return false;
// If we have a fake unary shuffle, the shuffle mask is spread across two
// inputs that are actually the same node. Re-map the mask to always point
@@ -7060,6 +7247,20 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
continue;
}
+ // INSERT_SUBVECTOR - to widen vectors we often insert them into UNDEF
+ // base vectors.
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
+ SDValue Vec = V.getOperand(0);
+ int NumVecElts = Vec.getValueType().getVectorNumElements();
+ if (Vec.isUndef() && Size == NumVecElts) {
+ int Idx = V.getConstantOperandVal(2);
+ int NumSubElts = V.getOperand(1).getValueType().getVectorNumElements();
+ if (M < Idx || (Idx + NumSubElts) <= M)
+ KnownUndef.setBit(i);
+ }
+ continue;
+ }
+
// Attempt to extract from the source's constant bits.
if (IsSrcConstant[SrcIdx]) {
if (UndefSrcElts[SrcIdx][M])
@@ -7111,7 +7312,7 @@ static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl<int> &Mask,
// TODO: Use DemandedElts variant.
static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
SmallVectorImpl<int> &Mask,
- SelectionDAG &DAG, unsigned Depth,
+ const SelectionDAG &DAG, unsigned Depth,
bool ResolveKnownElts);
// Attempt to decode ops that could be represented as a shuffle mask.
@@ -7120,7 +7321,7 @@ static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
SmallVectorImpl<int> &Mask,
SmallVectorImpl<SDValue> &Ops,
- SelectionDAG &DAG, unsigned Depth,
+ const SelectionDAG &DAG, unsigned Depth,
bool ResolveKnownElts) {
Mask.clear();
Ops.clear();
@@ -7132,6 +7333,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
if ((NumBitsPerElt % 8) != 0 || (NumSizeInBits % 8) != 0)
return false;
assert(NumElts == DemandedElts.getBitWidth() && "Unexpected vector size");
+ unsigned NumSizeInBytes = NumSizeInBits / 8;
+ unsigned NumBytesPerElt = NumBitsPerElt / 8;
unsigned Opcode = N.getOpcode();
switch (Opcode) {
@@ -7179,8 +7382,6 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
DAG.computeKnownBits(N.getOperand(1), DemandedElts, Depth + 1);
if (Known0.One.isNullValue() && Known1.One.isNullValue()) {
bool IsByteMask = true;
- unsigned NumSizeInBytes = NumSizeInBits / 8;
- unsigned NumBytesPerElt = NumBitsPerElt / 8;
APInt ZeroMask = APInt::getNullValue(NumBytesPerElt);
APInt SelectMask = APInt::getNullValue(NumBytesPerElt);
for (unsigned i = 0; i != NumBytesPerElt && IsByteMask; ++i) {
@@ -7220,10 +7421,21 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
!getTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG, Depth + 1,
true))
return false;
+
+ // Shuffle inputs must be the same size as the result.
+ if (llvm::any_of(SrcInputs0, [VT](SDValue Op) {
+ return VT.getSizeInBits() != Op.getValueSizeInBits();
+ }))
+ return false;
+ if (llvm::any_of(SrcInputs1, [VT](SDValue Op) {
+ return VT.getSizeInBits() != Op.getValueSizeInBits();
+ }))
+ return false;
+
size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
SmallVector<int, 64> Mask0, Mask1;
- scaleShuffleMask<int>(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
- scaleShuffleMask<int>(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
+ narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
+ narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
for (size_t i = 0; i != MaskSize; ++i) {
if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef)
Mask.push_back(SM_SentinelUndef);
@@ -7245,14 +7457,12 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
SDValue Sub = N.getOperand(1);
EVT SubVT = Sub.getValueType();
unsigned NumSubElts = SubVT.getVectorNumElements();
- if (!isa<ConstantSDNode>(N.getOperand(2)) ||
- !N->isOnlyUserOf(Sub.getNode()))
+ if (!N->isOnlyUserOf(Sub.getNode()))
return false;
uint64_t InsertIdx = N.getConstantOperandVal(2);
// Handle INSERT_SUBVECTOR(SRC0, EXTRACT_SUBVECTOR(SRC1)).
if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- Sub.getOperand(0).getValueType() == VT &&
- isa<ConstantSDNode>(Sub.getOperand(1))) {
+ Sub.getOperand(0).getValueType() == VT) {
uint64_t ExtractIdx = Sub.getConstantOperandVal(1);
for (int i = 0; i != (int)NumElts; ++i)
Mask.push_back(i);
@@ -7268,13 +7478,20 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
if (!getTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs,
SubMask, DAG, Depth + 1, ResolveKnownElts))
return false;
+
+ // Subvector shuffle inputs must not be larger than the subvector.
+ if (llvm::any_of(SubInputs, [SubVT](SDValue SubInput) {
+ return SubVT.getSizeInBits() < SubInput.getValueSizeInBits();
+ }))
+ return false;
+
if (SubMask.size() != NumSubElts) {
assert(((SubMask.size() % NumSubElts) == 0 ||
(NumSubElts % SubMask.size()) == 0) && "Illegal submask scale");
if ((NumSubElts % SubMask.size()) == 0) {
int Scale = NumSubElts / SubMask.size();
SmallVector<int,64> ScaledSubMask;
- scaleShuffleMask<int>(Scale, SubMask, ScaledSubMask);
+ narrowShuffleMaskElts(Scale, SubMask, ScaledSubMask);
SubMask = ScaledSubMask;
} else {
int Scale = SubMask.size() / NumSubElts;
@@ -7284,14 +7501,7 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
}
}
Ops.push_back(Src);
- for (SDValue &SubInput : SubInputs) {
- EVT SubSVT = SubInput.getValueType().getScalarType();
- EVT AltVT = EVT::getVectorVT(*DAG.getContext(), SubSVT,
- NumSizeInBits / SubSVT.getSizeInBits());
- Ops.push_back(DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), AltVT,
- DAG.getUNDEF(AltVT), SubInput,
- DAG.getIntPtrConstant(0, SDLoc(N))));
- }
+ Ops.append(SubInputs.begin(), SubInputs.end());
for (int i = 0; i != (int)NumElts; ++i)
Mask.push_back(i);
for (int i = 0; i != (int)NumSubElts; ++i) {
@@ -7304,75 +7514,83 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
}
return true;
}
- case ISD::SCALAR_TO_VECTOR: {
- // Match against a scalar_to_vector of an extract from a vector,
- // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
- SDValue N0 = N.getOperand(0);
- SDValue SrcExtract;
+ case X86ISD::PINSRB:
+ case X86ISD::PINSRW:
+ case ISD::SCALAR_TO_VECTOR:
+ case ISD::INSERT_VECTOR_ELT: {
+ // Match against a insert_vector_elt/scalar_to_vector of an extract from a
+ // vector, for matching src/dst vector types.
+ SDValue Scl = N.getOperand(Opcode == ISD::SCALAR_TO_VECTOR ? 0 : 1);
+
+ unsigned DstIdx = 0;
+ if (Opcode != ISD::SCALAR_TO_VECTOR) {
+ // Check we have an in-range constant insertion index.
+ if (!isa<ConstantSDNode>(N.getOperand(2)) ||
+ N.getConstantOperandAPInt(2).uge(NumElts))
+ return false;
+ DstIdx = N.getConstantOperandVal(2);
+
+ // Attempt to recognise an INSERT*(VEC, 0, DstIdx) shuffle pattern.
+ if (X86::isZeroNode(Scl)) {
+ Ops.push_back(N.getOperand(0));
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(i == DstIdx ? SM_SentinelZero : (int)i);
+ return true;
+ }
+ }
- if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- N0.getOperand(0).getValueType() == VT) ||
- (N0.getOpcode() == X86ISD::PEXTRW &&
- N0.getOperand(0).getValueType() == MVT::v8i16) ||
- (N0.getOpcode() == X86ISD::PEXTRB &&
- N0.getOperand(0).getValueType() == MVT::v16i8)) {
- SrcExtract = N0;
+ // Peek through trunc/aext/zext.
+ // TODO: aext shouldn't require SM_SentinelZero padding.
+ // TODO: handle shift of scalars.
+ unsigned MinBitsPerElt = Scl.getScalarValueSizeInBits();
+ while (Scl.getOpcode() == ISD::TRUNCATE ||
+ Scl.getOpcode() == ISD::ANY_EXTEND ||
+ Scl.getOpcode() == ISD::ZERO_EXTEND) {
+ Scl = Scl.getOperand(0);
+ MinBitsPerElt =
+ std::min<unsigned>(MinBitsPerElt, Scl.getScalarValueSizeInBits());
}
+ if ((MinBitsPerElt % 8) != 0)
+ return false;
+ // Attempt to find the source vector the scalar was extracted from.
+ SDValue SrcExtract;
+ if ((Scl.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Scl.getOpcode() == X86ISD::PEXTRW ||
+ Scl.getOpcode() == X86ISD::PEXTRB) &&
+ Scl.getOperand(0).getValueSizeInBits() == NumSizeInBits) {
+ SrcExtract = Scl;
+ }
if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)))
return false;
SDValue SrcVec = SrcExtract.getOperand(0);
EVT SrcVT = SrcVec.getValueType();
- unsigned NumSrcElts = SrcVT.getVectorNumElements();
- unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;
-
- unsigned SrcIdx = SrcExtract.getConstantOperandVal(1);
- if (NumSrcElts <= SrcIdx)
+ if (!SrcVT.getScalarType().isByteSized())
return false;
-
- Ops.push_back(SrcVec);
- Mask.push_back(SrcIdx);
- Mask.append(NumZeros, SM_SentinelZero);
- Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
- return true;
- }
- case X86ISD::PINSRB:
- case X86ISD::PINSRW: {
- SDValue InVec = N.getOperand(0);
- SDValue InScl = N.getOperand(1);
- SDValue InIndex = N.getOperand(2);
- if (!isa<ConstantSDNode>(InIndex) ||
- cast<ConstantSDNode>(InIndex)->getAPIntValue().uge(NumElts))
- return false;
- uint64_t InIdx = N.getConstantOperandVal(2);
-
- // Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern.
- if (X86::isZeroNode(InScl)) {
- Ops.push_back(InVec);
- for (unsigned i = 0; i != NumElts; ++i)
- Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i);
- return true;
+ unsigned SrcIdx = SrcExtract.getConstantOperandVal(1);
+ unsigned SrcByte = SrcIdx * (SrcVT.getScalarSizeInBits() / 8);
+ unsigned DstByte = DstIdx * NumBytesPerElt;
+ MinBitsPerElt =
+ std::min<unsigned>(MinBitsPerElt, SrcVT.getScalarSizeInBits());
+
+ // Create 'identity' byte level shuffle mask and then add inserted bytes.
+ if (Opcode == ISD::SCALAR_TO_VECTOR) {
+ Ops.push_back(SrcVec);
+ Mask.append(NumSizeInBytes, SM_SentinelUndef);
+ } else {
+ Ops.push_back(SrcVec);
+ Ops.push_back(N.getOperand(0));
+ for (int i = 0; i != (int)NumSizeInBytes; ++i)
+ Mask.push_back(NumSizeInBytes + i);
}
- // Attempt to recognise a PINSR*(PEXTR*) shuffle pattern.
- // TODO: Expand this to support INSERT_VECTOR_ELT/etc.
- unsigned ExOp =
- (X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW);
- if (InScl.getOpcode() != ExOp)
- return false;
-
- SDValue ExVec = InScl.getOperand(0);
- SDValue ExIndex = InScl.getOperand(1);
- if (!isa<ConstantSDNode>(ExIndex) ||
- cast<ConstantSDNode>(ExIndex)->getAPIntValue().uge(NumElts))
- return false;
- uint64_t ExIdx = InScl.getConstantOperandVal(1);
-
- Ops.push_back(InVec);
- Ops.push_back(ExVec);
- for (unsigned i = 0; i != NumElts; ++i)
- Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
+ unsigned MinBytesPerElts = MinBitsPerElt / 8;
+ MinBytesPerElts = std::min(MinBytesPerElts, NumBytesPerElt);
+ for (unsigned i = 0; i != MinBytesPerElts; ++i)
+ Mask[DstByte + i] = SrcByte + i;
+ for (unsigned i = MinBytesPerElts; i < NumBytesPerElt; ++i)
+ Mask[DstByte + i] = SM_SentinelZero;
return true;
}
case X86ISD::PACKSS:
@@ -7412,6 +7630,23 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
createPackShuffleMask(VT, Mask, IsUnary);
return true;
}
+ case X86ISD::VTRUNC: {
+ SDValue Src = N.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ // Truncated source must be a simple vector.
+ if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 ||
+ (SrcVT.getScalarSizeInBits() % 8) != 0)
+ return false;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ unsigned NumBitsPerSrcElt = SrcVT.getScalarSizeInBits();
+ unsigned Scale = NumBitsPerSrcElt / NumBitsPerElt;
+ assert((NumBitsPerSrcElt % NumBitsPerElt) == 0 && "Illegal truncation");
+ for (unsigned i = 0; i != NumSrcElts; ++i)
+ Mask.push_back(i * Scale);
+ Mask.append(NumElts - NumSrcElts, SM_SentinelZero);
+ Ops.push_back(Src);
+ return true;
+ }
case X86ISD::VSHLI:
case X86ISD::VSRLI: {
uint64_t ShiftVal = N.getConstantOperandVal(1);
@@ -7426,40 +7661,43 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
break;
uint64_t ByteShift = ShiftVal / 8;
- unsigned NumBytes = NumSizeInBits / 8;
- unsigned NumBytesPerElt = NumBitsPerElt / 8;
Ops.push_back(N.getOperand(0));
// Clear mask to all zeros and insert the shifted byte indices.
- Mask.append(NumBytes, SM_SentinelZero);
+ Mask.append(NumSizeInBytes, SM_SentinelZero);
if (X86ISD::VSHLI == Opcode) {
- for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
+ for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
Mask[i + j] = i + j - ByteShift;
} else {
- for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
+ for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt)
for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
Mask[i + j - ByteShift] = i + j;
}
return true;
}
+ case X86ISD::VROTLI:
+ case X86ISD::VROTRI: {
+ // We can only decode 'whole byte' bit rotates as shuffles.
+ uint64_t RotateVal = N.getConstantOperandAPInt(1).urem(NumBitsPerElt);
+ if ((RotateVal % 8) != 0)
+ return false;
+ Ops.push_back(N.getOperand(0));
+ int Offset = RotateVal / 8;
+ Offset = (X86ISD::VROTLI == Opcode ? NumBytesPerElt - Offset : Offset);
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int BaseIdx = i * NumBytesPerElt;
+ for (int j = 0; j != (int)NumBytesPerElt; ++j) {
+ Mask.push_back(BaseIdx + ((Offset + j) % NumBytesPerElt));
+ }
+ }
+ return true;
+ }
case X86ISD::VBROADCAST: {
SDValue Src = N.getOperand(0);
- MVT SrcVT = Src.getSimpleValueType();
- if (!SrcVT.isVector())
+ if (!Src.getSimpleValueType().isVector())
return false;
-
- if (NumSizeInBits != SrcVT.getSizeInBits()) {
- assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
- "Illegal broadcast type");
- SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
- NumSizeInBits / SrcVT.getScalarSizeInBits());
- Src = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), SrcVT,
- DAG.getUNDEF(SrcVT), Src,
- DAG.getIntPtrConstant(0, SDLoc(N)));
- }
-
Ops.push_back(Src);
Mask.append(NumElts, 0);
return true;
@@ -7476,22 +7714,10 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
(SrcVT.getScalarSizeInBits() % 8) != 0)
return false;
- unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits();
bool IsAnyExtend =
(ISD::ANY_EXTEND == Opcode || ISD::ANY_EXTEND_VECTOR_INREG == Opcode);
- DecodeZeroExtendMask(NumSrcBitsPerElt, NumBitsPerElt, NumElts, IsAnyExtend,
- Mask);
-
- if (NumSizeInBits != SrcVT.getSizeInBits()) {
- assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
- "Illegal zero-extension type");
- SrcVT = MVT::getVectorVT(SrcVT.getSimpleVT().getScalarType(),
- NumSizeInBits / NumSrcBitsPerElt);
- Src = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), SrcVT,
- DAG.getUNDEF(SrcVT), Src,
- DAG.getIntPtrConstant(0, SDLoc(N)));
- }
-
+ DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts,
+ IsAnyExtend, Mask);
Ops.push_back(Src);
return true;
}
@@ -7549,7 +7775,7 @@ static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts,
SmallVectorImpl<SDValue> &Inputs,
SmallVectorImpl<int> &Mask,
APInt &KnownUndef, APInt &KnownZero,
- SelectionDAG &DAG, unsigned Depth,
+ const SelectionDAG &DAG, unsigned Depth,
bool ResolveKnownElts) {
EVT VT = Op.getValueType();
if (!VT.isSimple() || !VT.isVector())
@@ -7570,7 +7796,7 @@ static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts,
static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
SmallVectorImpl<int> &Mask,
- SelectionDAG &DAG, unsigned Depth = 0,
+ const SelectionDAG &DAG, unsigned Depth = 0,
bool ResolveKnownElts = true) {
EVT VT = Op.getValueType();
if (!VT.isSimple() || !VT.isVector())
@@ -7583,93 +7809,107 @@ static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
KnownZero, DAG, Depth, ResolveKnownElts);
}
-/// Returns the scalar element that will make up the ith
+/// Returns the scalar element that will make up the i'th
/// element of the result of the vector shuffle.
-static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
- unsigned Depth) {
- if (Depth == 6)
- return SDValue(); // Limit search depth.
+static SDValue getShuffleScalarElt(SDValue Op, unsigned Index,
+ SelectionDAG &DAG, unsigned Depth) {
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return SDValue(); // Limit search depth.
- SDValue V = SDValue(N, 0);
- EVT VT = V.getValueType();
- unsigned Opcode = V.getOpcode();
+ EVT VT = Op.getValueType();
+ unsigned Opcode = Op.getOpcode();
+ unsigned NumElems = VT.getVectorNumElements();
// Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
- if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
+ if (auto *SV = dyn_cast<ShuffleVectorSDNode>(Op)) {
int Elt = SV->getMaskElt(Index);
if (Elt < 0)
return DAG.getUNDEF(VT.getVectorElementType());
- unsigned NumElems = VT.getVectorNumElements();
- SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
- : SV->getOperand(1);
- return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
+ SDValue Src = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1);
+ return getShuffleScalarElt(Src, Elt % NumElems, DAG, Depth + 1);
}
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
- MVT ShufVT = V.getSimpleValueType();
+ MVT ShufVT = VT.getSimpleVT();
MVT ShufSVT = ShufVT.getVectorElementType();
int NumElems = (int)ShufVT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
SmallVector<SDValue, 16> ShuffleOps;
bool IsUnary;
- if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary))
+ if (!getTargetShuffleMask(Op.getNode(), ShufVT, true, ShuffleOps,
+ ShuffleMask, IsUnary))
return SDValue();
int Elt = ShuffleMask[Index];
if (Elt == SM_SentinelZero)
- return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(N), ShufSVT)
- : DAG.getConstantFP(+0.0, SDLoc(N), ShufSVT);
+ return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(Op), ShufSVT)
+ : DAG.getConstantFP(+0.0, SDLoc(Op), ShufSVT);
if (Elt == SM_SentinelUndef)
return DAG.getUNDEF(ShufSVT);
- assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range");
- SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
- return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
- Depth+1);
+ assert(0 <= Elt && Elt < (2 * NumElems) && "Shuffle index out of range");
+ SDValue Src = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
+ return getShuffleScalarElt(Src, Elt % NumElems, DAG, Depth + 1);
}
// Recurse into insert_subvector base/sub vector to find scalars.
- if (Opcode == ISD::INSERT_SUBVECTOR &&
- isa<ConstantSDNode>(N->getOperand(2))) {
- SDValue Vec = N->getOperand(0);
- SDValue Sub = N->getOperand(1);
- EVT SubVT = Sub.getValueType();
- unsigned NumSubElts = SubVT.getVectorNumElements();
- uint64_t SubIdx = N->getConstantOperandVal(2);
+ if (Opcode == ISD::INSERT_SUBVECTOR) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ uint64_t SubIdx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
if (SubIdx <= Index && Index < (SubIdx + NumSubElts))
- return getShuffleScalarElt(Sub.getNode(), Index - SubIdx, DAG, Depth + 1);
- return getShuffleScalarElt(Vec.getNode(), Index, DAG, Depth + 1);
+ return getShuffleScalarElt(Sub, Index - SubIdx, DAG, Depth + 1);
+ return getShuffleScalarElt(Vec, Index, DAG, Depth + 1);
+ }
+
+ // Recurse into concat_vectors sub vector to find scalars.
+ if (Opcode == ISD::CONCAT_VECTORS) {
+ EVT SubVT = Op.getOperand(0).getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ uint64_t SubIdx = Index / NumSubElts;
+ uint64_t SubElt = Index % NumSubElts;
+ return getShuffleScalarElt(Op.getOperand(SubIdx), SubElt, DAG, Depth + 1);
}
// Recurse into extract_subvector src vector to find scalars.
- if (Opcode == ISD::EXTRACT_SUBVECTOR &&
- isa<ConstantSDNode>(N->getOperand(1))) {
- SDValue Src = N->getOperand(0);
- uint64_t SrcIdx = N->getConstantOperandVal(1);
- return getShuffleScalarElt(Src.getNode(), Index + SrcIdx, DAG, Depth + 1);
+ if (Opcode == ISD::EXTRACT_SUBVECTOR) {
+ SDValue Src = Op.getOperand(0);
+ uint64_t SrcIdx = Op.getConstantOperandVal(1);
+ return getShuffleScalarElt(Src, Index + SrcIdx, DAG, Depth + 1);
}
- // Actual nodes that may contain scalar elements
+ // We only peek through bitcasts of the same vector width.
if (Opcode == ISD::BITCAST) {
- V = V.getOperand(0);
- EVT SrcVT = V.getValueType();
- unsigned NumElems = VT.getVectorNumElements();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.isVector() && SrcVT.getVectorNumElements() == NumElems)
+ return getShuffleScalarElt(Src, Index, DAG, Depth + 1);
+ return SDValue();
+ }
- if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
- return SDValue();
+ // Actual nodes that may contain scalar elements
+
+ // For insert_vector_elt - either return the index matching scalar or recurse
+ // into the base vector.
+ if (Opcode == ISD::INSERT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Op.getOperand(2))) {
+ if (Op.getConstantOperandAPInt(2) == Index)
+ return Op.getOperand(1);
+ return getShuffleScalarElt(Op.getOperand(0), Index, DAG, Depth + 1);
}
- if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
- return (Index == 0) ? V.getOperand(0)
+ if (Opcode == ISD::SCALAR_TO_VECTOR)
+ return (Index == 0) ? Op.getOperand(0)
: DAG.getUNDEF(VT.getVectorElementType());
- if (V.getOpcode() == ISD::BUILD_VECTOR)
- return V.getOperand(Index);
+ if (Opcode == ISD::BUILD_VECTOR)
+ return Op.getOperand(Index);
return SDValue();
}
@@ -7762,10 +8002,11 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
Elt = NextElt;
}
- // If our first insertion is not the first index then insert into zero
- // vector to break any register dependency else use SCALAR_TO_VECTOR.
+ // If our first insertion is not the first index or zeros are needed, then
+ // insert into zero vector. Otherwise, use SCALAR_TO_VECTOR (leaves high
+ // elements undefined).
if (!V) {
- if (i != 0)
+ if (i != 0 || NumZero)
V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else {
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Elt);
@@ -7964,11 +8205,12 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
// FIXME: 256-bit vector instructions don't require a strict alignment,
// improve this code to support it better.
- unsigned RequiredAlign = VT.getSizeInBits()/8;
+ Align RequiredAlign(VT.getSizeInBits() / 8);
SDValue Chain = LD->getChain();
// Make sure the stack object alignment is at least 16 or 32.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
+ MaybeAlign InferredAlign = DAG.InferPtrAlign(Ptr);
+ if (!InferredAlign || *InferredAlign < RequiredAlign) {
if (MFI.isFixedObjectIndex(FI)) {
// Can't change the alignment. FIXME: It's possible to compute
// the exact stack offset and reference FI + adjust offset instead.
@@ -7983,9 +8225,9 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
// Ptr + (Offset & ~15).
if (Offset < 0)
return SDValue();
- if ((Offset % RequiredAlign) & 3)
+ if ((Offset % RequiredAlign.value()) & 3)
return SDValue();
- int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
+ int64_t StartOffset = Offset & ~int64_t(RequiredAlign.value() - 1);
if (StartOffset) {
SDLoc DL(Ptr);
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@@ -8024,8 +8266,8 @@ static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
case ISD::SCALAR_TO_VECTOR:
return findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset);
case ISD::SRL:
- if (isa<ConstantSDNode>(Elt.getOperand(1))) {
- uint64_t Idx = Elt.getConstantOperandVal(1);
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) {
+ uint64_t Idx = IdxC->getZExtValue();
if ((Idx % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) {
ByteOffset += Idx / 8;
return true;
@@ -8033,13 +8275,13 @@ static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
}
break;
case ISD::EXTRACT_VECTOR_ELT:
- if (isa<ConstantSDNode>(Elt.getOperand(1))) {
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) {
SDValue Src = Elt.getOperand(0);
unsigned SrcSizeInBits = Src.getScalarValueSizeInBits();
unsigned DstSizeInBits = Elt.getScalarValueSizeInBits();
if (DstSizeInBits == SrcSizeInBits && (SrcSizeInBits % 8) == 0 &&
findEltLoadSrc(Src, Ld, ByteOffset)) {
- uint64_t Idx = Elt.getConstantOperandVal(1);
+ uint64_t Idx = IdxC->getZExtValue();
ByteOffset += Idx * (SrcSizeInBits / 8);
return true;
}
@@ -8169,7 +8411,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
"Cannot merge volatile or atomic loads.");
SDValue NewLd =
DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
- LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
+ LDBase->getPointerInfo(), LDBase->getOriginalAlign(),
+ MMOFlags);
for (auto *LD : Loads)
if (LD)
DAG.makeEquivalentMemoryOrdering(LD, NewLd);
@@ -8247,14 +8490,16 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits)
: MVT::getIntegerVT(LoadSizeInBits);
MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSizeInBits);
+ // Allow v4f32 on SSE1 only targets.
+ // FIXME: Add more isel patterns so we can just use VT directly.
+ if (!Subtarget.hasSSE2() && VT == MVT::v4f32)
+ VecVT = MVT::v4f32;
if (TLI.isTypeLegal(VecVT)) {
SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
- SDValue ResNode =
- DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT,
- LDBase->getPointerInfo(),
- LDBase->getAlignment(),
- MachineMemOperand::MOLoad);
+ SDValue ResNode = DAG.getMemIntrinsicNode(
+ X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(),
+ LDBase->getOriginalAlign(), MachineMemOperand::MOLoad);
for (auto *LD : Loads)
if (LD)
DAG.makeEquivalentMemoryOrdering(LD, ResNode);
@@ -8318,13 +8563,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// Combine a vector ops (shuffles etc.) that is equal to build_vector load1,
// load2, load3, load4, <0, 1, 2, 3> into a vector load if the load addresses
// are consecutive, non-overlapping, and in the right order.
-static SDValue combineToConsecutiveLoads(EVT VT, SDNode *N, const SDLoc &DL,
+static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget,
bool isAfterLegalize) {
SmallVector<SDValue, 64> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
- if (SDValue Elt = getShuffleScalarElt(N, i, DAG, 0)) {
+ if (SDValue Elt = getShuffleScalarElt(Op, i, DAG, 0)) {
Elts.push_back(Elt);
continue;
}
@@ -8439,7 +8684,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
SDValue Ld = BVOp->getSplatValue(&UndefElements);
// Attempt to use VBROADCASTM
- // From this paterrn:
+ // From this pattern:
// a. t0 = (zext_i64 (bitcast_i8 v2i1 X))
// b. t1 = (build_vector t0 t0)
//
@@ -8486,8 +8731,8 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
LLVMContext *Ctx = DAG.getContext();
MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
if (Subtarget.hasAVX()) {
- if (SplatBitSize <= 64 && Subtarget.hasAVX2() &&
- !(SplatBitSize == 64 && Subtarget.is32Bit())) {
+ if (SplatBitSize == 32 || SplatBitSize == 64 ||
+ (SplatBitSize < 32 && Subtarget.hasAVX2())) {
// Splatted value can fit in one INTEGER constant in constant pool.
// Load the constant and broadcast it.
MVT CVT = MVT::getIntegerVT(SplatBitSize);
@@ -8496,46 +8741,25 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
SDValue CP = DAG.getConstantPool(C, PVT);
unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
- unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
- Ld = DAG.getLoad(
- CVT, dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- Alignment);
- SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
- MVT::getVectorVT(CVT, Repeat), Ld);
- return DAG.getBitcast(VT, Brdcst);
- } else if (SplatBitSize == 32 || SplatBitSize == 64) {
- // Splatted value can fit in one FLOAT constant in constant pool.
- // Load the constant and broadcast it.
- // AVX have support for 32 and 64 bit broadcast for floats only.
- // No 64bit integer in 32bit subtarget.
- MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
- // Lower the splat via APFloat directly, to avoid any conversion.
- Constant *C =
- SplatBitSize == 32
- ? ConstantFP::get(*Ctx,
- APFloat(APFloat::IEEEsingle(), SplatValue))
- : ConstantFP::get(*Ctx,
- APFloat(APFloat::IEEEdouble(), SplatValue));
- SDValue CP = DAG.getConstantPool(C, PVT);
- unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
-
- unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
- Ld = DAG.getLoad(
- CVT, dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- Alignment);
- SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
- MVT::getVectorVT(CVT, Repeat), Ld);
+ Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
+ SDVTList Tys =
+ DAG.getVTList(MVT::getVectorVT(CVT, Repeat), MVT::Other);
+ SDValue Ops[] = {DAG.getEntryNode(), CP};
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+ SDValue Brdcst = DAG.getMemIntrinsicNode(
+ X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT, MPI, Alignment,
+ MachineMemOperand::MOLoad);
return DAG.getBitcast(VT, Brdcst);
- } else if (SplatBitSize > 64) {
+ }
+ if (SplatBitSize > 64) {
// Load the vector of constants and broadcast it.
MVT CVT = VT.getScalarType();
Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
*Ctx);
SDValue VCP = DAG.getConstantPool(VecC, PVT);
unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
- unsigned Alignment = cast<ConstantPoolSDNode>(VCP)->getAlignment();
+ Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign();
Ld = DAG.getLoad(
MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
@@ -8560,10 +8784,12 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
bool ConstSplatVal =
(Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP);
+ bool IsLoad = ISD::isNormalLoad(Ld.getNode());
// Make sure that all of the users of a non-constant load are from the
// BUILD_VECTOR node.
- if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
+ // FIXME: Is the use count needed for non-constant, non-load case?
+ if (!ConstSplatVal && !IsLoad && !BVOp->isOnlyUserOf(Ld.getNode()))
return SDValue();
unsigned ScalarSize = Ld.getValueSizeInBits();
@@ -8603,18 +8829,17 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CP =
DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
- unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
- Ld = DAG.getLoad(
- CVT, dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- Alignment);
+ Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
- return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = {DAG.getEntryNode(), CP};
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+ return DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT,
+ MPI, Alignment, MachineMemOperand::MOLoad);
}
}
- bool IsLoad = ISD::isNormalLoad(Ld.getNode());
-
// Handle AVX2 in-register broadcasts.
if (!IsLoad && Subtarget.hasInt256() &&
(ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
@@ -8624,15 +8849,34 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
if (!IsLoad)
return SDValue();
+ // Make sure the non-chain result is only used by this build vector.
+ if (!Ld->hasNUsesOfValue(NumElts - NumUndefElts, 0))
+ return SDValue();
+
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
- (Subtarget.hasVLX() && ScalarSize == 64))
- return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+ (Subtarget.hasVLX() && ScalarSize == 64)) {
+ auto *LN = cast<LoadSDNode>(Ld);
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
+ SDValue BCast =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops,
+ LN->getMemoryVT(), LN->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
+ return BCast;
+ }
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
// double since there is no vbroadcastsd xmm
- if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) {
- if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
- return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+ if (Subtarget.hasInt256() && Ld.getValueType().isInteger() &&
+ (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)) {
+ auto *LN = cast<LoadSDNode>(Ld);
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
+ SDValue BCast =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops,
+ LN->getMemoryVT(), LN->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
+ return BCast;
}
// Unsupported broadcast.
@@ -8746,20 +8990,6 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
return NV;
}
-static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
- assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
- Op.getScalarValueSizeInBits() == 1 &&
- "Can not convert non-constant vector");
- uint64_t Immediate = 0;
- for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
- SDValue In = Op.getOperand(idx);
- if (!In.isUndef())
- Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
- }
- SDLoc dl(Op);
- MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8));
- return DAG.getConstant(Immediate, dl, VT);
-}
// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -8782,11 +9012,11 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
SDValue In = Op.getOperand(idx);
if (In.isUndef())
continue;
- if (!isa<ConstantSDNode>(In))
- NonConstIdx.push_back(idx);
- else {
- Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
+ if (auto *InC = dyn_cast<ConstantSDNode>(In)) {
+ Immediate |= (InC->getZExtValue() & 0x1) << idx;
HasConstElts = true;
+ } else {
+ NonConstIdx.push_back(idx);
}
if (SplatIdx < 0)
SplatIdx = idx;
@@ -8805,9 +9035,24 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
if (Cond.getOpcode() != ISD::SETCC)
Cond = DAG.getNode(ISD::AND, dl, MVT::i8, Cond,
DAG.getConstant(1, dl, MVT::i8));
- return DAG.getSelect(dl, VT, Cond,
- DAG.getConstant(1, dl, VT),
- DAG.getConstant(0, dl, VT));
+
+ // Perform the select in the scalar domain so we can use cmov.
+ if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
+ SDValue Select = DAG.getSelect(dl, MVT::i32, Cond,
+ DAG.getAllOnesConstant(dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
+ Select = DAG.getBitcast(MVT::v32i1, Select);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Select, Select);
+ } else {
+ MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U));
+ SDValue Select = DAG.getSelect(dl, ImmVT, Cond,
+ DAG.getAllOnesConstant(dl, ImmVT),
+ DAG.getConstant(0, dl, ImmVT));
+ MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1;
+ Select = DAG.getBitcast(VecVT, Select);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Select,
+ DAG.getIntPtrConstant(0, dl));
+ }
}
// insert elements one by one
@@ -8907,8 +9152,8 @@ static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode,
if (!CanFold)
break;
- unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
- unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
+ unsigned I0 = Op0.getConstantOperandVal(1);
+ unsigned I1 = Op1.getConstantOperandVal(1);
if (i * 2 < NumElts) {
if (V0.isUndef()) {
@@ -9056,11 +9301,10 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Op0.getOperand(1)) ||
- !isa<ConstantSDNode>(Op1.getOperand(1)) ||
Op0.getOperand(1) != Op1.getOperand(1))
return false;
- unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
+ unsigned I0 = Op0.getConstantOperandVal(1);
if (I0 != i)
return false;
@@ -9445,6 +9689,9 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
return SDValue();
}
+static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG);
+
/// If a BUILD_VECTOR's source elements all apply the same bit operation and
/// one of their operands is constant, lower to a pair of BUILD_VECTOR and
/// just apply the bit to the vectors.
@@ -9452,6 +9699,7 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
/// from this, but enough scalar bit operations are created from the later
/// legalization + scalarization stages to need basic support.
static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op->getSimpleValueType(0);
@@ -9515,7 +9763,14 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
- return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS);
+
+ if (!IsShift)
+ return Res;
+
+ // Immediately lower the shift to ensure the constant build vector doesn't
+ // get converted to a constant pool before the shift is lowered.
+ return LowerShift(Res, Subtarget, DAG);
}
/// Create a vector constant without a load. SSE/AVX provide the bare minimum
@@ -9571,9 +9826,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
Subtarget, DAG, SDLoc(IndicesVec));
- return extractSubVector(
- createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0,
- DAG, DL, SizeInBits);
+ SDValue NewSrcVec =
+ createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
+ if (NewSrcVec)
+ return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits);
+ return SDValue();
} else if (SrcVec.getValueSizeInBits() < SizeInBits) {
// Widen smaller SrcVec to match VT.
SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
@@ -9869,7 +10126,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return HorizontalOp;
if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
return Broadcast;
- if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG))
+ if (SDValue BitOp = lowerBuildVectorToBitOp(BV, Subtarget, DAG))
return BitOp;
unsigned EVTBits = EltVT.getSizeInBits();
@@ -9929,7 +10186,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
assert(!VarElt.getNode() && !InsIndex.getNode() &&
"Expected one variable element in this vector");
VarElt = Elt;
- InsIndex = DAG.getConstant(i, dl, getVectorIdxTy(DAG.getDataLayout()));
+ InsIndex = DAG.getVectorIdxConstant(i, dl);
}
}
Constant *CV = ConstantVector::get(ConstVecOps);
@@ -10929,6 +11186,71 @@ static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT,
return SDValue();
}
+/// Check if the mask can be mapped to a preliminary shuffle (vperm 64-bit)
+/// followed by unpack 256-bit.
+static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
+ SmallVector<int, 32> Unpckl, Unpckh;
+ createSplat2ShuffleMask(VT, Unpckl, /* Lo */ true);
+ createSplat2ShuffleMask(VT, Unpckh, /* Lo */ false);
+
+ unsigned UnpackOpcode;
+ if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
+ UnpackOpcode = X86ISD::UNPCKL;
+ else if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
+ UnpackOpcode = X86ISD::UNPCKH;
+ else
+ return SDValue();
+
+ // This is a "natural" unpack operation (rather than the 128-bit sectored
+ // operation implemented by AVX). We need to rearrange 64-bit chunks of the
+ // input in order to use the x86 instruction.
+ V1 = DAG.getVectorShuffle(MVT::v4f64, DL, DAG.getBitcast(MVT::v4f64, V1),
+ DAG.getUNDEF(MVT::v4f64), {0, 2, 1, 3});
+ V1 = DAG.getBitcast(VT, V1);
+ return DAG.getNode(UnpackOpcode, DL, VT, V1, V1);
+}
+
+// Check if the mask can be mapped to a TRUNCATE or VTRUNC, truncating the
+// source into the lower elements and zeroing the upper elements.
+// TODO: Merge with matchShuffleAsVPMOV.
+static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT,
+ ArrayRef<int> Mask, const APInt &Zeroable,
+ const X86Subtarget &Subtarget) {
+ if (!VT.is512BitVector() && !Subtarget.hasVLX())
+ return false;
+
+ unsigned NumElts = Mask.size();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned MaxScale = 64 / EltSizeInBits;
+
+ for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
+ unsigned SrcEltBits = EltSizeInBits * Scale;
+ if (SrcEltBits < 32 && !Subtarget.hasBWI())
+ continue;
+ unsigned NumSrcElts = NumElts / Scale;
+ if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale))
+ continue;
+ unsigned UpperElts = NumElts - NumSrcElts;
+ if (!Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue())
+ continue;
+ SrcVT = MVT::getIntegerVT(EltSizeInBits * Scale);
+ SrcVT = MVT::getVectorVT(SrcVT, NumSrcElts);
+ DstVT = MVT::getIntegerVT(EltSizeInBits);
+ if ((NumSrcElts * EltSizeInBits) >= 128) {
+ // ISD::TRUNCATE
+ DstVT = MVT::getVectorVT(DstVT, NumSrcElts);
+ } else {
+ // X86ISD::VTRUNC
+ DstVT = MVT::getVectorVT(DstVT, 128 / EltSizeInBits);
+ }
+ return true;
+ }
+
+ return false;
+}
+
static bool matchShuffleAsVPMOV(ArrayRef<int> Mask, bool SwappedOps,
int Delta) {
int Size = (int)Mask.size();
@@ -11022,22 +11344,93 @@ static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, ArrayRef<int> Mask,
return DAG.getNode(X86ISD::VTRUNC, DL, VT, Src);
}
+/// Check whether a compaction lowering can be done by dropping even
+/// elements and compute how many times even elements must be dropped.
+///
+/// This handles shuffles which take every Nth element where N is a power of
+/// two. Example shuffle masks:
+///
+/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14
+/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+/// N = 2: 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12
+/// N = 2: 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28
+/// N = 3: 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8
+/// N = 3: 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24
+///
+/// Any of these lanes can of course be undef.
+///
+/// This routine only supports N <= 3.
+/// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here
+/// for larger N.
+///
+/// \returns N above, or the number of times even elements must be dropped if
+/// there is such a number. Otherwise returns zero.
+static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
+ bool IsSingleInput) {
+ // The modulus for the shuffle vector entries is based on whether this is
+ // a single input or not.
+ int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
+ assert(isPowerOf2_32((uint32_t)ShuffleModulus) &&
+ "We should only be called with masks with a power-of-2 size!");
+
+ uint64_t ModMask = (uint64_t)ShuffleModulus - 1;
+
+ // We track whether the input is viable for all power-of-2 strides 2^1, 2^2,
+ // and 2^3 simultaneously. This is because we may have ambiguity with
+ // partially undef inputs.
+ bool ViableForN[3] = {true, true, true};
+
+ for (int i = 0, e = Mask.size(); i < e; ++i) {
+ // Ignore undef lanes, we'll optimistically collapse them to the pattern we
+ // want.
+ if (Mask[i] < 0)
+ continue;
+
+ bool IsAnyViable = false;
+ for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
+ if (ViableForN[j]) {
+ uint64_t N = j + 1;
+
+ // The shuffle mask must be equal to (i * 2^N) % M.
+ if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask))
+ IsAnyViable = true;
+ else
+ ViableForN[j] = false;
+ }
+ // Early exit if we exhaust the possible powers of two.
+ if (!IsAnyViable)
+ break;
+ }
+
+ for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
+ if (ViableForN[j])
+ return j + 1;
+
+ // Return 0 as there is no viable power of two.
+ return 0;
+}
+
// X86 has dedicated pack instructions that can handle specific truncation
// operations: PACKSS and PACKUS.
+// Checks for compaction shuffle masks if MaxStages > 1.
+// TODO: Add support for matching multiple PACKSS/PACKUS stages.
static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2,
unsigned &PackOpcode, ArrayRef<int> TargetMask,
SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+ const X86Subtarget &Subtarget,
+ unsigned MaxStages = 1) {
unsigned NumElts = VT.getVectorNumElements();
unsigned BitSize = VT.getScalarSizeInBits();
- MVT PackSVT = MVT::getIntegerVT(BitSize * 2);
- MVT PackVT = MVT::getVectorVT(PackSVT, NumElts / 2);
+ assert(0 < MaxStages && MaxStages <= 3 && (BitSize << MaxStages) <= 64 &&
+ "Illegal maximum compaction");
- auto MatchPACK = [&](SDValue N1, SDValue N2) {
+ auto MatchPACK = [&](SDValue N1, SDValue N2, MVT PackVT) {
+ unsigned NumSrcBits = PackVT.getScalarSizeInBits();
+ unsigned NumPackedBits = NumSrcBits - BitSize;
SDValue VV1 = DAG.getBitcast(PackVT, N1);
SDValue VV2 = DAG.getBitcast(PackVT, N2);
- if (Subtarget.hasSSE41() || PackSVT == MVT::i16) {
- APInt ZeroMask = APInt::getHighBitsSet(BitSize * 2, BitSize);
+ if (Subtarget.hasSSE41() || BitSize == 8) {
+ APInt ZeroMask = APInt::getHighBitsSet(NumSrcBits, NumPackedBits);
if ((N1.isUndef() || DAG.MaskedValueIsZero(VV1, ZeroMask)) &&
(N2.isUndef() || DAG.MaskedValueIsZero(VV2, ZeroMask))) {
V1 = VV1;
@@ -11047,8 +11440,8 @@ static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2,
return true;
}
}
- if ((N1.isUndef() || DAG.ComputeNumSignBits(VV1) > BitSize) &&
- (N2.isUndef() || DAG.ComputeNumSignBits(VV2) > BitSize)) {
+ if ((N1.isUndef() || DAG.ComputeNumSignBits(VV1) > NumPackedBits) &&
+ (N2.isUndef() || DAG.ComputeNumSignBits(VV2) > NumPackedBits)) {
V1 = VV1;
V2 = VV2;
SrcVT = PackVT;
@@ -11058,19 +11451,25 @@ static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2,
return false;
};
- // Try binary shuffle.
- SmallVector<int, 32> BinaryMask;
- createPackShuffleMask(VT, BinaryMask, false);
- if (isTargetShuffleEquivalent(TargetMask, BinaryMask, V1, V2))
- if (MatchPACK(V1, V2))
- return true;
+ // Attempt to match against wider and wider compaction patterns.
+ for (unsigned NumStages = 1; NumStages <= MaxStages; ++NumStages) {
+ MVT PackSVT = MVT::getIntegerVT(BitSize << NumStages);
+ MVT PackVT = MVT::getVectorVT(PackSVT, NumElts >> NumStages);
- // Try unary shuffle.
- SmallVector<int, 32> UnaryMask;
- createPackShuffleMask(VT, UnaryMask, true);
- if (isTargetShuffleEquivalent(TargetMask, UnaryMask, V1))
- if (MatchPACK(V1, V1))
- return true;
+ // Try binary shuffle.
+ SmallVector<int, 32> BinaryMask;
+ createPackShuffleMask(VT, BinaryMask, false, NumStages);
+ if (isTargetShuffleEquivalent(TargetMask, BinaryMask, V1, V2))
+ if (MatchPACK(V1, V2, PackVT))
+ return true;
+
+ // Try unary shuffle.
+ SmallVector<int, 32> UnaryMask;
+ createPackShuffleMask(VT, UnaryMask, true, NumStages);
+ if (isTargetShuffleEquivalent(TargetMask, UnaryMask, V1))
+ if (MatchPACK(V1, V1, PackVT))
+ return true;
+ }
return false;
}
@@ -11080,12 +11479,44 @@ static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
const X86Subtarget &Subtarget) {
MVT PackVT;
unsigned PackOpcode;
- if (matchShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG,
- Subtarget))
- return DAG.getNode(PackOpcode, DL, VT, DAG.getBitcast(PackVT, V1),
- DAG.getBitcast(PackVT, V2));
+ unsigned SizeBits = VT.getSizeInBits();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ unsigned MaxStages = Log2_32(64 / EltBits);
+ if (!matchShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG,
+ Subtarget, MaxStages))
+ return SDValue();
- return SDValue();
+ unsigned CurrentEltBits = PackVT.getScalarSizeInBits();
+ unsigned NumStages = Log2_32(CurrentEltBits / EltBits);
+
+ // Don't lower multi-stage packs on AVX512, truncation is better.
+ if (NumStages != 1 && SizeBits == 128 && Subtarget.hasVLX())
+ return SDValue();
+
+ // Pack to the largest type possible:
+ // vXi64/vXi32 -> PACK*SDW and vXi16 -> PACK*SWB.
+ unsigned MaxPackBits = 16;
+ if (CurrentEltBits > 16 &&
+ (PackOpcode == X86ISD::PACKSS || Subtarget.hasSSE41()))
+ MaxPackBits = 32;
+
+ // Repeatedly pack down to the target size.
+ SDValue Res;
+ for (unsigned i = 0; i != NumStages; ++i) {
+ unsigned SrcEltBits = std::min(MaxPackBits, CurrentEltBits);
+ unsigned NumSrcElts = SizeBits / SrcEltBits;
+ MVT SrcSVT = MVT::getIntegerVT(SrcEltBits);
+ MVT DstSVT = MVT::getIntegerVT(SrcEltBits / 2);
+ MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts);
+ MVT DstVT = MVT::getVectorVT(DstSVT, NumSrcElts * 2);
+ Res = DAG.getNode(PackOpcode, DL, DstVT, DAG.getBitcast(SrcVT, V1),
+ DAG.getBitcast(SrcVT, V2));
+ V1 = V2 = Res;
+ CurrentEltBits /= 2;
+ }
+ assert(Res && Res.getValueType() == VT &&
+ "Failed to lower compaction shuffle");
+ return Res;
}
/// Try to emit a bitmask instruction for a shuffle.
@@ -11109,8 +11540,9 @@ static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
MVT LogicVT = VT;
if (EltVT == MVT::f32 || EltVT == MVT::f64) {
Zero = DAG.getConstantFP(0.0, DL, EltVT);
- AllOnes = DAG.getConstantFP(
- APFloat::getAllOnesValue(EltVT.getSizeInBits(), true), DL, EltVT);
+ APFloat AllOnesValue = APFloat::getAllOnesValue(
+ SelectionDAG::EVTToAPFloatSemantics(EltVT), EltVT.getSizeInBits());
+ AllOnes = DAG.getConstantFP(AllOnesValue, DL, EltVT);
LogicVT =
MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, Mask.size());
} else {
@@ -11312,6 +11744,12 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
}
+ // If we have VPTERNLOG, we can use that as a bit blend.
+ if (Subtarget.hasVLX())
+ if (SDValue BitBlend =
+ lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
+ return BitBlend;
+
// Scale the blend by the number of bytes per element.
int Scale = VT.getScalarSizeInBits() / 8;
@@ -11622,10 +12060,101 @@ static SDValue lowerShuffleAsDecomposedShuffleBlend(
return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
}
-/// Try to lower a vector shuffle as a rotation.
+/// Try to lower a vector shuffle as a bit rotation.
+///
+/// Look for a repeated rotation pattern in each sub group.
+/// Returns a ISD::ROTL element rotation amount or -1 if failed.
+static int matchShuffleAsBitRotate(ArrayRef<int> Mask, int NumSubElts) {
+ int NumElts = Mask.size();
+ assert((NumElts % NumSubElts) == 0 && "Illegal shuffle mask");
+
+ int RotateAmt = -1;
+ for (int i = 0; i != NumElts; i += NumSubElts) {
+ for (int j = 0; j != NumSubElts; ++j) {
+ int M = Mask[i + j];
+ if (M < 0)
+ continue;
+ if (!isInRange(M, i, i + NumSubElts))
+ return -1;
+ int Offset = (NumSubElts - (M - (i + j))) % NumSubElts;
+ if (0 <= RotateAmt && Offset != RotateAmt)
+ return -1;
+ RotateAmt = Offset;
+ }
+ }
+ return RotateAmt;
+}
+
+static int matchShuffleAsBitRotate(MVT &RotateVT, int EltSizeInBits,
+ const X86Subtarget &Subtarget,
+ ArrayRef<int> Mask) {
+ assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
+ assert(EltSizeInBits < 64 && "Can't rotate 64-bit integers");
+
+ // AVX512 only has vXi32/vXi64 rotates, so limit the rotation sub group size.
+ int MinSubElts = Subtarget.hasAVX512() ? std::max(32 / EltSizeInBits, 2) : 2;
+ int MaxSubElts = 64 / EltSizeInBits;
+ for (int NumSubElts = MinSubElts; NumSubElts <= MaxSubElts; NumSubElts *= 2) {
+ int RotateAmt = matchShuffleAsBitRotate(Mask, NumSubElts);
+ if (RotateAmt < 0)
+ continue;
+
+ int NumElts = Mask.size();
+ MVT RotateSVT = MVT::getIntegerVT(EltSizeInBits * NumSubElts);
+ RotateVT = MVT::getVectorVT(RotateSVT, NumElts / NumSubElts);
+ return RotateAmt * EltSizeInBits;
+ }
+
+ return -1;
+}
+
+/// Lower shuffle using X86ISD::VROTLI rotations.
+static SDValue lowerShuffleAsBitRotate(const SDLoc &DL, MVT VT, SDValue V1,
+ ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ // Only XOP + AVX512 targets have bit rotation instructions.
+ // If we at least have SSSE3 (PSHUFB) then we shouldn't attempt to use this.
+ bool IsLegal =
+ (VT.is128BitVector() && Subtarget.hasXOP()) || Subtarget.hasAVX512();
+ if (!IsLegal && Subtarget.hasSSE3())
+ return SDValue();
+
+ MVT RotateVT;
+ int RotateAmt = matchShuffleAsBitRotate(RotateVT, VT.getScalarSizeInBits(),
+ Subtarget, Mask);
+ if (RotateAmt < 0)
+ return SDValue();
+
+ // For pre-SSSE3 targets, if we are shuffling vXi8 elts then ISD::ROTL,
+ // expanded to OR(SRL,SHL), will be more efficient, but if they can
+ // widen to vXi16 or more then existing lowering should will be better.
+ if (!IsLegal) {
+ if ((RotateAmt % 16) == 0)
+ return SDValue();
+ // TODO: Use getTargetVShiftByConstNode.
+ unsigned ShlAmt = RotateAmt;
+ unsigned SrlAmt = RotateVT.getScalarSizeInBits() - RotateAmt;
+ V1 = DAG.getBitcast(RotateVT, V1);
+ SDValue SHL = DAG.getNode(X86ISD::VSHLI, DL, RotateVT, V1,
+ DAG.getTargetConstant(ShlAmt, DL, MVT::i8));
+ SDValue SRL = DAG.getNode(X86ISD::VSRLI, DL, RotateVT, V1,
+ DAG.getTargetConstant(SrlAmt, DL, MVT::i8));
+ SDValue Rot = DAG.getNode(ISD::OR, DL, RotateVT, SHL, SRL);
+ return DAG.getBitcast(VT, Rot);
+ }
+
+ SDValue Rot =
+ DAG.getNode(X86ISD::VROTLI, DL, RotateVT, DAG.getBitcast(RotateVT, V1),
+ DAG.getTargetConstant(RotateAmt, DL, MVT::i8));
+ return DAG.getBitcast(VT, Rot);
+}
+
+/// Try to match a vector shuffle as an element rotation.
///
/// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512.
-static int matchShuffleAsRotate(SDValue &V1, SDValue &V2, ArrayRef<int> Mask) {
+static int matchShuffleAsElementRotate(SDValue &V1, SDValue &V2,
+ ArrayRef<int> Mask) {
int NumElts = Mask.size();
// We need to detect various ways of spelling a rotation:
@@ -11712,7 +12241,7 @@ static int matchShuffleAsRotate(SDValue &V1, SDValue &V2, ArrayRef<int> Mask) {
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
ArrayRef<int> Mask) {
// Don't accept any shuffles with zero elements.
- if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
+ if (isAnyZero(Mask))
return -1;
// PALIGNR works on 128-bit lanes.
@@ -11720,7 +12249,7 @@ static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
return -1;
- int Rotation = matchShuffleAsRotate(V1, V2, RepeatedMask);
+ int Rotation = matchShuffleAsElementRotate(V1, V2, RepeatedMask);
if (Rotation <= 0)
return -1;
@@ -11788,7 +12317,7 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
/// elements, and takes the low elements as the result. Note that while this is
/// specified as a *right shift* because x86 is little-endian, it is a *left
/// rotate* of the vector lanes.
-static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1,
+static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -11800,7 +12329,7 @@ static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1,
&& "VLX required for 128/256-bit vectors");
SDValue Lo = V1, Hi = V2;
- int Rotation = matchShuffleAsRotate(Lo, Hi, Mask);
+ int Rotation = matchShuffleAsElementRotate(Lo, Hi, Mask);
if (Rotation <= 0)
return SDValue();
@@ -12566,13 +13095,13 @@ static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0,
assert(Subtarget.hasAVX2() &&
"We can only lower integer broadcasts with AVX2!");
- EVT EltVT = VT.getVectorElementType();
- EVT V0VT = V0.getValueType();
+ MVT EltVT = VT.getVectorElementType();
+ MVT V0VT = V0.getSimpleValueType();
assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!");
assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!");
- EVT V0EltVT = V0VT.getVectorElementType();
+ MVT V0EltVT = V0VT.getVectorElementType();
if (!V0EltVT.isInteger())
return SDValue();
@@ -12636,7 +13165,7 @@ static bool isSingleSHUFPSMask(ArrayRef<int> Mask) {
static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
SDValue N1, ArrayRef<int> Mask,
SelectionDAG &DAG) {
- EVT VT = N0.getValueType();
+ MVT VT = N0.getSimpleValueType();
assert((VT.is128BitVector() &&
(VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64)) &&
"VPERM* family of shuffles requires 32-bit or 64-bit elements");
@@ -12649,9 +13178,8 @@ static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
return SDValue();
SDValue WideVec = N0.getOperand(0);
- EVT WideVT = WideVec.getValueType();
- if (!WideVT.is256BitVector() || !isa<ConstantSDNode>(N0.getOperand(1)) ||
- !isa<ConstantSDNode>(N1.getOperand(1)))
+ MVT WideVT = WideVec.getSimpleValueType();
+ if (!WideVT.is256BitVector())
return SDValue();
// Match extracts of each half of the wide source vector. Commute the shuffle
@@ -12699,7 +13227,6 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
// With MOVDDUP (v2f64) we can broadcast from a register or a load, otherwise
// we can only broadcast from a register with AVX2.
- unsigned NumElts = Mask.size();
unsigned NumEltBits = VT.getScalarSizeInBits();
unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.hasAVX2())
? X86ISD::MOVDDUP
@@ -12707,15 +13234,7 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
bool BroadcastFromReg = (Opcode == X86ISD::MOVDDUP) || Subtarget.hasAVX2();
// Check that the mask is a broadcast.
- int BroadcastIdx = -1;
- for (int i = 0; i != (int)NumElts; ++i) {
- SmallVector<int, 8> BroadcastMask(NumElts, i);
- if (isShuffleEquivalent(V1, V2, Mask, BroadcastMask)) {
- BroadcastIdx = i;
- break;
- }
- }
-
+ int BroadcastIdx = getSplatIndex(Mask);
if (BroadcastIdx < 0)
return SDValue();
assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with "
@@ -12724,6 +13243,8 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
// Go up the chain of (vector) values to find a scalar load that we can
// combine with the broadcast.
+ // TODO: Combine this logic with findEltLoadSrc() used by
+ // EltsFromConsecutiveLoads().
int BitOffset = BroadcastIdx * NumEltBits;
SDValue V = V1;
for (;;) {
@@ -12739,14 +13260,19 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
BitOffset %= OpBitWidth;
continue;
}
+ case ISD::EXTRACT_SUBVECTOR: {
+ // The extraction index adds to the existing offset.
+ unsigned EltBitWidth = V.getScalarValueSizeInBits();
+ unsigned Idx = V.getConstantOperandVal(1);
+ unsigned BeginOffset = Idx * EltBitWidth;
+ BitOffset += BeginOffset;
+ V = V.getOperand(0);
+ continue;
+ }
case ISD::INSERT_SUBVECTOR: {
SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
- auto ConstantIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
- if (!ConstantIdx)
- break;
-
int EltBitWidth = VOuter.getScalarValueSizeInBits();
- int Idx = (int)ConstantIdx->getZExtValue();
+ int Idx = (int)V.getConstantOperandVal(2);
int NumSubElts = (int)VInner.getSimpleValueType().getVectorNumElements();
int BeginOffset = Idx * EltBitWidth;
int EndOffset = BeginOffset + NumSubElts * EltBitWidth;
@@ -12777,8 +13303,6 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
DL, VT, V, BroadcastIdx, Subtarget, DAG))
return TruncBroadcast;
- MVT BroadcastVT = VT;
-
// Also check the simpler case, where we can directly reuse the scalar.
if (!BitCastSrc &&
((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) ||
@@ -12788,23 +13312,34 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
// If we can't broadcast from a register, check that the input is a load.
if (!BroadcastFromReg && !isShuffleFoldableLoad(V))
return SDValue();
- } else if (MayFoldLoad(V) && cast<LoadSDNode>(V)->isSimple()) {
- // 32-bit targets need to load i64 as a f64 and then bitcast the result.
- if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) {
- BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
- Opcode = (BroadcastVT.is128BitVector() && !Subtarget.hasAVX2())
- ? X86ISD::MOVDDUP
- : Opcode;
- }
+ } else if (ISD::isNormalLoad(V.getNode()) &&
+ cast<LoadSDNode>(V)->isSimple()) {
+ // We do not check for one-use of the vector load because a broadcast load
+ // is expected to be a win for code size, register pressure, and possibly
+ // uops even if the original vector load is not eliminated.
- // If we are broadcasting a load that is only used by the shuffle
- // then we can reduce the vector load to the broadcasted scalar load.
+ // Reduce the vector load and shuffle to a broadcasted scalar load.
LoadSDNode *Ld = cast<LoadSDNode>(V);
SDValue BaseAddr = Ld->getOperand(1);
- EVT SVT = BroadcastVT.getScalarType();
+ MVT SVT = VT.getScalarType();
unsigned Offset = BroadcastIdx * SVT.getStoreSize();
assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset");
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
+
+ // Directly form VBROADCAST_LOAD if we're using VBROADCAST opcode rather
+ // than MOVDDUP.
+ // FIXME: Should we add VBROADCAST_LOAD isel patterns for pre-AVX?
+ if (Opcode == X86ISD::VBROADCAST) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Ld->getChain(), NewAddr};
+ V = DAG.getMemIntrinsicNode(
+ X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SVT,
+ DAG.getMachineFunction().getMachineMemOperand(
+ Ld->getMemOperand(), Offset, SVT.getStoreSize()));
+ DAG.makeEquivalentMemoryOrdering(Ld, V);
+ return DAG.getBitcast(VT, V);
+ }
+ assert(SVT == MVT::f64 && "Unexpected VT!");
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
DAG.getMachineFunction().getMachineMemOperand(
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
@@ -12839,38 +13374,26 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
DAG.getBitcast(MVT::f64, V));
- // Bitcast back to the same scalar type as BroadcastVT.
- if (V.getValueType().getScalarType() != BroadcastVT.getScalarType()) {
- assert(NumEltBits == BroadcastVT.getScalarSizeInBits() &&
- "Unexpected vector element size");
- MVT ExtVT;
- if (V.getValueType().isVector()) {
- unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
- ExtVT = MVT::getVectorVT(BroadcastVT.getScalarType(), NumSrcElts);
- } else {
- ExtVT = BroadcastVT.getScalarType();
- }
- V = DAG.getBitcast(ExtVT, V);
- }
-
- // 32-bit targets need to load i64 as a f64 and then bitcast the result.
- if (!Subtarget.is64Bit() && V.getValueType() == MVT::i64) {
- V = DAG.getBitcast(MVT::f64, V);
- unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements();
- BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
+ // If this is a scalar, do the broadcast on this type and bitcast.
+ if (!V.getValueType().isVector()) {
+ assert(V.getScalarValueSizeInBits() == NumEltBits &&
+ "Unexpected scalar size");
+ MVT BroadcastVT = MVT::getVectorVT(V.getSimpleValueType(),
+ VT.getVectorNumElements());
+ return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}
// We only support broadcasting from 128-bit vectors to minimize the
// number of patterns we need to deal with in isel. So extract down to
// 128-bits, removing as many bitcasts as possible.
- if (V.getValueSizeInBits() > 128) {
- MVT ExtVT = V.getSimpleValueType().getScalarType();
- ExtVT = MVT::getVectorVT(ExtVT, 128 / ExtVT.getScalarSizeInBits());
+ if (V.getValueSizeInBits() > 128)
V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL);
- V = DAG.getBitcast(ExtVT, V);
- }
- return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
+ // Otherwise cast V to a vector with the same element type as VT, but
+ // possibly narrower than VT. Then perform the broadcast.
+ unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
+ MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), NumSrcElts);
+ return DAG.getNode(Opcode, DL, VT, DAG.getBitcast(CastVT, V));
}
// Check for whether we can use INSERTPS to perform the shuffle. We only use
@@ -13259,7 +13782,7 @@ static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
if (Subtarget.hasSSSE3()) {
if (Subtarget.hasVLX())
- if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v2i64, V1, V2, Mask,
+ if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v2i64, V1, V2, Mask,
Subtarget, DAG))
return Rotate;
@@ -13293,8 +13816,7 @@ static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
SDValue LowV = V1, HighV = V2;
- int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};
-
+ SmallVector<int, 4> NewMask(Mask.begin(), Mask.end());
int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
if (NumV2Elements == 1) {
@@ -13548,7 +14070,7 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
if (Subtarget.hasSSSE3()) {
if (Subtarget.hasVLX())
- if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v4i32, V1, V2, Mask,
+ if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i32, V1, V2, Mask,
Subtarget, DAG))
return Rotate;
@@ -14186,6 +14708,11 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Mask, Subtarget, DAG))
return Broadcast;
+ // Try to use bit rotation instructions.
+ if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v8i16, V1, Mask,
+ Subtarget, DAG))
+ return Rotate;
+
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
return V;
@@ -14262,6 +14789,29 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return V;
+ // Attempt to lower using compaction, SSE41 is necessary for PACKUSDW.
+ // We could use SIGN_EXTEND_INREG+PACKSSDW for older targets but this seems to
+ // be slower than a PSHUFLW+PSHUFHW+PSHUFD chain.
+ int NumEvenDrops = canLowerByDroppingEvenElements(Mask, false);
+ if ((NumEvenDrops == 1 || NumEvenDrops == 2) && Subtarget.hasSSE41() &&
+ !Subtarget.hasVLX()) {
+ SmallVector<SDValue, 8> DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32));
+ for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1))
+ DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32);
+ SDValue DWordClearMask = DAG.getBuildVector(MVT::v4i32, DL, DWordClearOps);
+ V1 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V1),
+ DWordClearMask);
+ V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2),
+ DWordClearMask);
+ // Now pack things back together.
+ SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, V1, V2);
+ if (NumEvenDrops == 2) {
+ Result = DAG.getBitcast(MVT::v4i32, Result);
+ Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, Result, Result);
+ }
+ return Result;
+ }
+
// Try to lower by permuting the inputs into an unpack instruction.
if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2,
Mask, Subtarget, DAG))
@@ -14281,72 +14831,6 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Mask, Subtarget, DAG);
}
-/// Check whether a compaction lowering can be done by dropping even
-/// elements and compute how many times even elements must be dropped.
-///
-/// This handles shuffles which take every Nth element where N is a power of
-/// two. Example shuffle masks:
-///
-/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14
-/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
-/// N = 2: 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12
-/// N = 2: 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28
-/// N = 3: 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8
-/// N = 3: 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24
-///
-/// Any of these lanes can of course be undef.
-///
-/// This routine only supports N <= 3.
-/// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here
-/// for larger N.
-///
-/// \returns N above, or the number of times even elements must be dropped if
-/// there is such a number. Otherwise returns zero.
-static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
- bool IsSingleInput) {
- // The modulus for the shuffle vector entries is based on whether this is
- // a single input or not.
- int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
- assert(isPowerOf2_32((uint32_t)ShuffleModulus) &&
- "We should only be called with masks with a power-of-2 size!");
-
- uint64_t ModMask = (uint64_t)ShuffleModulus - 1;
-
- // We track whether the input is viable for all power-of-2 strides 2^1, 2^2,
- // and 2^3 simultaneously. This is because we may have ambiguity with
- // partially undef inputs.
- bool ViableForN[3] = {true, true, true};
-
- for (int i = 0, e = Mask.size(); i < e; ++i) {
- // Ignore undef lanes, we'll optimistically collapse them to the pattern we
- // want.
- if (Mask[i] < 0)
- continue;
-
- bool IsAnyViable = false;
- for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
- if (ViableForN[j]) {
- uint64_t N = j + 1;
-
- // The shuffle mask must be equal to (i * 2^N) % M.
- if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask))
- IsAnyViable = true;
- else
- ViableForN[j] = false;
- }
- // Early exit if we exhaust the possible powers of two.
- if (!IsAnyViable)
- break;
- }
-
- for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
- if (ViableForN[j])
- return j + 1;
-
- // Return 0 as there is no viable power of two.
- return 0;
-}
-
static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
@@ -14410,6 +14894,11 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Mask, Subtarget, DAG))
return Broadcast;
+ // Try to use bit rotation instructions.
+ if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v16i8, V1, Mask,
+ Subtarget, DAG))
+ return Rotate;
+
if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
return V;
@@ -14524,6 +15013,10 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return V;
+ // Check for compaction patterns.
+ bool IsSingleInput = V2.isUndef();
+ int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput);
+
// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
// with PSHUFB. It is important to do this before we attempt to generate any
// blends but after all of the single-input lowerings. If the single input
@@ -14534,10 +15027,13 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// and there are *very* few patterns that would actually be faster than the
// PSHUFB approach because of its ability to zero lanes.
//
+ // If the mask is a binary compaction, we can more efficiently perform this
+ // as a PACKUS(AND(),AND()) - which is quicker than UNPACK(PSHUFB(),PSHUFB()).
+ //
// FIXME: The only exceptions to the above are blends which are exact
// interleavings with direct instructions supporting them. We currently don't
// handle those well here.
- if (Subtarget.hasSSSE3()) {
+ if (Subtarget.hasSSSE3() && (IsSingleInput || NumEvenDrops != 1)) {
bool V1InUse = false;
bool V2InUse = false;
@@ -14595,8 +15091,7 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// We special case these as they can be particularly efficiently handled with
// the PACKUSB instruction on x86 and they show up in common patterns of
// rearranging bytes to truncate wide elements.
- bool IsSingleInput = V2.isUndef();
- if (int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput)) {
+ if (NumEvenDrops) {
// NumEvenDrops is the power of two stride of the elements. Another way of
// thinking about it is that we need to drop the even elements this many
// times to get the original input.
@@ -14604,23 +15099,23 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// First we need to zero all the dropped bytes.
assert(NumEvenDrops <= 3 &&
"No support for dropping even elements more than 3 times.");
- SmallVector<SDValue, 16> ByteClearOps(16, DAG.getConstant(0, DL, MVT::i8));
- for (unsigned i = 0; i != 16; i += 1 << NumEvenDrops)
- ByteClearOps[i] = DAG.getConstant(0xFF, DL, MVT::i8);
- SDValue ByteClearMask = DAG.getBuildVector(MVT::v16i8, DL, ByteClearOps);
- V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
+ SmallVector<SDValue, 8> WordClearOps(8, DAG.getConstant(0, DL, MVT::i16));
+ for (unsigned i = 0; i != 8; i += 1 << (NumEvenDrops - 1))
+ WordClearOps[i] = DAG.getConstant(0xFF, DL, MVT::i16);
+ SDValue WordClearMask = DAG.getBuildVector(MVT::v8i16, DL, WordClearOps);
+ V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, V1),
+ WordClearMask);
if (!IsSingleInput)
- V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask);
+ V2 = DAG.getNode(ISD::AND, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, V2),
+ WordClearMask);
// Now pack things back together.
- V1 = DAG.getBitcast(MVT::v8i16, V1);
- V2 = IsSingleInput ? V1 : DAG.getBitcast(MVT::v8i16, V2);
- SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2);
+ SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1,
+ IsSingleInput ? V1 : V2);
for (int i = 1; i < NumEvenDrops; ++i) {
Result = DAG.getBitcast(MVT::v8i16, Result);
Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result);
}
-
return Result;
}
@@ -14725,37 +15220,13 @@ static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
int NumElements = VT.getVectorNumElements();
int SplitNumElements = NumElements / 2;
MVT ScalarVT = VT.getVectorElementType();
- MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);
+ MVT SplitVT = MVT::getVectorVT(ScalarVT, SplitNumElements);
- // Rather than splitting build-vectors, just build two narrower build
- // vectors. This helps shuffling with splats and zeros.
+ // Use splitVector/extractSubVector so that split build-vectors just build two
+ // narrower build vectors. This helps shuffling with splats and zeros.
auto SplitVector = [&](SDValue V) {
- V = peekThroughBitcasts(V);
-
- MVT OrigVT = V.getSimpleValueType();
- int OrigNumElements = OrigVT.getVectorNumElements();
- int OrigSplitNumElements = OrigNumElements / 2;
- MVT OrigScalarVT = OrigVT.getVectorElementType();
- MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2);
-
SDValue LoV, HiV;
-
- auto *BV = dyn_cast<BuildVectorSDNode>(V);
- if (!BV) {
- LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
- DAG.getIntPtrConstant(0, DL));
- HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
- DAG.getIntPtrConstant(OrigSplitNumElements, DL));
- } else {
-
- SmallVector<SDValue, 16> LoOps, HiOps;
- for (int i = 0; i < OrigSplitNumElements; ++i) {
- LoOps.push_back(BV->getOperand(i));
- HiOps.push_back(BV->getOperand(i + OrigSplitNumElements));
- }
- LoV = DAG.getBuildVector(OrigSplitVT, DL, LoOps);
- HiV = DAG.getBuildVector(OrigSplitVT, DL, HiOps);
- }
+ std::tie(LoV, HiV) = splitVector(peekThroughBitcasts(V), DAG, DL);
return std::make_pair(DAG.getBitcast(SplitVT, LoV),
DAG.getBitcast(SplitVT, HiV));
};
@@ -15963,7 +16434,7 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
SmallVector<int, 2> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) {
SmallVector<int, 4> PSHUFDMask;
- scaleShuffleMask<int>(2, RepeatedMask, PSHUFDMask);
+ narrowShuffleMaskElts(2, RepeatedMask, PSHUFDMask);
return DAG.getBitcast(
MVT::v4i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
@@ -15984,7 +16455,7 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have VLX support, we can use VALIGN or VEXPAND.
if (Subtarget.hasVLX()) {
- if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v4i64, V1, V2, Mask,
+ if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i64, V1, V2, Mask,
Subtarget, DAG))
return Rotate;
@@ -16085,13 +16556,14 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have a single input shuffle with different shuffle patterns in the
// two 128-bit lanes use the variable mask to VPERMILPS.
if (V2.isUndef()) {
- SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
- if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask))
+ if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask)) {
+ SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, V1, VPermMask);
-
- if (Subtarget.hasAVX2())
+ }
+ if (Subtarget.hasAVX2()) {
+ SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1);
-
+ }
// Otherwise, fall back.
return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v8f32, V1, V2, Mask,
DAG, Subtarget);
@@ -16190,7 +16662,7 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have VLX support, we can use VALIGN or EXPAND.
if (Subtarget.hasVLX()) {
- if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i32, V1, V2, Mask,
+ if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i32, V1, V2, Mask,
Subtarget, DAG))
return Rotate;
@@ -16210,9 +16682,14 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
return V;
- // If the shuffle patterns aren't repeated but it is a single input, directly
- // generate a cross-lane VPERMD instruction.
if (V2.isUndef()) {
+ // Try to produce a fixed cross-128-bit lane permute followed by unpack
+ // because that should be faster than the variable permute alternatives.
+ if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v8i32, Mask, V1, V2, DAG))
+ return V;
+
+ // If the shuffle patterns aren't repeated but it's a single input, directly
+ // generate a cross-lane VPERMD instruction.
SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8i32, VPermMask, V1);
}
@@ -16294,6 +16771,16 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
if (V2.isUndef()) {
+ // Try to use bit rotation instructions.
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v16i16, V1, Mask, Subtarget, DAG))
+ return Rotate;
+
+ // Try to produce a fixed cross-128-bit lane permute followed by unpack
+ // because that should be faster than the variable permute alternatives.
+ if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v16i16, Mask, V1, V2, DAG))
+ return V;
+
// There are no generalized cross-lane shuffle operations available on i16
// element types.
if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) {
@@ -16379,7 +16866,7 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Try to use shift instructions.
if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Shift;
// Try to use byte rotation instructions.
@@ -16387,6 +16874,12 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Subtarget, DAG))
return Rotate;
+ // Try to use bit rotation instructions.
+ if (V2.isUndef())
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v32i8, V1, Mask, Subtarget, DAG))
+ return Rotate;
+
// Try to create an in-lane repeating shuffle mask and then shuffle the
// results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
@@ -16396,6 +16889,11 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// There are no generalized cross-lane shuffle operations available on i8
// element types.
if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) {
+ // Try to produce a fixed cross-128-bit lane permute followed by unpack
+ // because that should be faster than the variable permute alternatives.
+ if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v32i8, Mask, V1, V2, DAG))
+ return V;
+
if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
return V;
@@ -16518,13 +17016,14 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle.");
// TODO - use Zeroable like we do for lowerV2X128VectorShuffle?
- SmallVector<int, 4> WidenedMask;
- if (!canWidenShuffleElements(Mask, WidenedMask))
+ SmallVector<int, 4> Widened128Mask;
+ if (!canWidenShuffleElements(Mask, Widened128Mask))
return SDValue();
+ assert(Widened128Mask.size() == 4 && "Shuffle widening mismatch");
// Try to use an insert into a zero vector.
- if (WidenedMask[0] == 0 && (Zeroable & 0xf0) == 0xf0 &&
- (WidenedMask[1] == 1 || (Zeroable & 0x0c) == 0x0c)) {
+ if (Widened128Mask[0] == 0 && (Zeroable & 0xf0) == 0xf0 &&
+ (Widened128Mask[1] == 1 || (Zeroable & 0x0c) == 0x0c)) {
unsigned NumElts = ((Zeroable & 0x0c) == 0x0c) ? 2 : 4;
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
@@ -16536,37 +17035,34 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
// Check for patterns which can be matched with a single insert of a 256-bit
// subvector.
- bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask,
- {0, 1, 2, 3, 0, 1, 2, 3});
- if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask,
- {0, 1, 2, 3, 8, 9, 10, 11})) {
+ bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 2, 3, 0, 1, 2, 3});
+ if (OnlyUsesV1 ||
+ isShuffleEquivalent(V1, V2, Mask, {0, 1, 2, 3, 8, 9, 10, 11})) {
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
- SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
- OnlyUsesV1 ? V1 : V2,
- DAG.getIntPtrConstant(0, DL));
+ SDValue SubVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2,
+ DAG.getIntPtrConstant(0, DL));
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec,
DAG.getIntPtrConstant(4, DL));
}
- assert(WidenedMask.size() == 4);
-
// See if this is an insertion of the lower 128-bits of V2 into V1.
bool IsInsert = true;
int V2Index = -1;
for (int i = 0; i < 4; ++i) {
- assert(WidenedMask[i] >= -1);
- if (WidenedMask[i] < 0)
+ assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value");
+ if (Widened128Mask[i] < 0)
continue;
// Make sure all V1 subvectors are in place.
- if (WidenedMask[i] < 4) {
- if (WidenedMask[i] != i) {
+ if (Widened128Mask[i] < 4) {
+ if (Widened128Mask[i] != i) {
IsInsert = false;
break;
}
} else {
// Make sure we only have a single V2 index and its the lowest 128-bits.
- if (V2Index >= 0 || WidenedMask[i] != 4) {
+ if (V2Index >= 0 || Widened128Mask[i] != 4) {
IsInsert = false;
break;
}
@@ -16580,16 +17076,26 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL);
}
+ // See if we can widen to a 256-bit lane shuffle, we're going to lose 128-lane
+ // UNDEF info by lowering to X86ISD::SHUF128 anyway, so by widening where
+ // possible we at least ensure the lanes stay sequential to help later
+ // combines.
+ SmallVector<int, 2> Widened256Mask;
+ if (canWidenShuffleElements(Widened128Mask, Widened256Mask)) {
+ Widened128Mask.clear();
+ narrowShuffleMaskElts(2, Widened256Mask, Widened128Mask);
+ }
+
// Try to lower to vshuf64x2/vshuf32x4.
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
unsigned PermMask = 0;
// Insure elements came from the same Op.
for (int i = 0; i < 4; ++i) {
- assert(WidenedMask[i] >= -1);
- if (WidenedMask[i] < 0)
+ assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value");
+ if (Widened128Mask[i] < 0)
continue;
- SDValue Op = WidenedMask[i] >= 4 ? V2 : V1;
+ SDValue Op = Widened128Mask[i] >= 4 ? V2 : V1;
unsigned OpIndex = i / 2;
if (Ops[OpIndex].isUndef())
Ops[OpIndex] = Op;
@@ -16598,7 +17104,7 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
// Convert the 128-bit shuffle mask selection values into 128-bit selection
// bits defined by a vshuf64x2 instruction's immediate control byte.
- PermMask |= (WidenedMask[i] % 4) << (i * 2);
+ PermMask |= (Widened128Mask[i] % 4) << (i * 2);
}
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
@@ -16696,6 +17202,12 @@ static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
}
+ // Try to create an in-lane repeating shuffle mask and then shuffle the
+ // results into the target lanes.
+ if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
+ DL, MVT::v16f32, V1, V2, Mask, Subtarget, DAG))
+ return V;
+
// If we have a single input shuffle with different shuffle patterns in the
// 128-bit lanes and don't lane cross, use variable mask VPERMILPS.
if (V2.isUndef() &&
@@ -16728,7 +17240,7 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
SmallVector<int, 2> Repeated128Mask;
if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) {
SmallVector<int, 4> PSHUFDMask;
- scaleShuffleMask<int>(2, Repeated128Mask, PSHUFDMask);
+ narrowShuffleMaskElts(2, Repeated128Mask, PSHUFDMask);
return DAG.getBitcast(
MVT::v8i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32,
@@ -16752,7 +17264,7 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Shift;
// Try to use VALIGN.
- if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i64, V1, V2, Mask,
+ if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i64, V1, V2, Mask,
Subtarget, DAG))
return Rotate;
@@ -16814,7 +17326,7 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Shift;
// Try to use VALIGN.
- if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v16i32, V1, V2, Mask,
+ if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v16i32, V1, V2, Mask,
Subtarget, DAG))
return Rotate;
@@ -16833,6 +17345,13 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
CastV1, CastV2, DAG);
return DAG.getBitcast(MVT::v16i32, ShufPS);
}
+
+ // Try to create an in-lane repeating shuffle mask and then shuffle the
+ // results into the target lanes.
+ if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
+ DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
+ return V;
+
// If we have AVX512F support, we can use VEXPAND.
if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2,
DAG, Subtarget))
@@ -16841,6 +17360,7 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
+
return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
}
@@ -16865,6 +17385,11 @@ static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG))
return V;
+ // Use dedicated pack instructions for masks that match their pattern.
+ if (SDValue V =
+ lowerShuffleWithPACK(DL, MVT::v32i16, Mask, V1, V2, DAG, Subtarget))
+ return V;
+
// Try to use shift instructions.
if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask,
Zeroable, Subtarget, DAG))
@@ -16876,18 +17401,23 @@ static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Rotate;
if (V2.isUndef()) {
+ // Try to use bit rotation instructions.
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v32i16, V1, Mask, Subtarget, DAG))
+ return Rotate;
+
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) {
// As this is a single-input shuffle, the repeated mask should be
// a strictly valid v8i16 mask that we can pass through to the v8i16
// lowering to handle even the v32 case.
- return lowerV8I16GeneralSingleInputShuffle(
- DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG);
+ return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v32i16, V1,
+ RepeatedMask, Subtarget, DAG);
}
}
if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ Zeroable, Subtarget, DAG))
return Blend;
if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2,
@@ -16933,6 +17463,17 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Subtarget, DAG))
return Rotate;
+ // Try to use bit rotation instructions.
+ if (V2.isUndef())
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v64i8, V1, Mask, Subtarget, DAG))
+ return Rotate;
+
+ // Lower as AND if possible.
+ if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v64i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Masked;
+
if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1, V2,
Zeroable, Subtarget, DAG))
return PSHUFB;
@@ -16995,6 +17536,18 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
Subtarget, DAG))
return Broadcast;
+ if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) {
+ // Try using bit ops for masking and blending before falling back to
+ // splitting.
+ if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
+ return V;
+ if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
+ return V;
+
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
+ }
+
// Dispatch to each element type for lowering. If we don't have support for
// specific element type shuffles at 512 bits, immediately split them and
// lower them. Each lowering routine of a given type is allowed to assume that
@@ -17477,6 +18030,10 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
unsigned EltSize = VT.getScalarSizeInBits();
unsigned NumElts = VT.getVectorNumElements();
+ // Expand v32i16/v64i8 without BWI.
+ if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
+ return SDValue();
+
// If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition
// into an i1 condition so that we can use the mask-based 512-bit blend
// instructions.
@@ -17532,14 +18089,24 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ assert(isa<ConstantSDNode>(Idx) && "Constant index expected");
SDLoc dl(Op);
- if (!Op.getOperand(0).getSimpleValueType().is128BitVector())
+ if (!Vec.getSimpleValueType().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
- SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
- Op.getOperand(0), Op.getOperand(1));
+ // If IdxVal is 0, it's cheaper to do a move instead of a pextrb, unless
+ // we're going to zero extend the register or fold the store.
+ if (llvm::isNullConstant(Idx) && !MayFoldIntoZeroExtend(Op) &&
+ !MayFoldIntoStore(Op))
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getBitcast(MVT::v4i32, Vec), Idx));
+
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec, Idx);
return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract);
}
@@ -17552,22 +18119,17 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
if (!Op.hasOneUse())
return SDValue();
SDNode *User = *Op.getNode()->use_begin();
- if ((User->getOpcode() != ISD::STORE ||
- isNullConstant(Op.getOperand(1))) &&
+ if ((User->getOpcode() != ISD::STORE || isNullConstant(Idx)) &&
(User->getOpcode() != ISD::BITCAST ||
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getBitcast(MVT::v4i32, Op.getOperand(0)),
- Op.getOperand(1));
+ DAG.getBitcast(MVT::v4i32, Vec), Idx);
return DAG.getBitcast(MVT::f32, Extract);
}
- if (VT == MVT::i32 || VT == MVT::i64) {
- // ExtractPS/pextrq works with constant index.
- if (isa<ConstantSDNode>(Op.getOperand(1)))
+ if (VT == MVT::i32 || VT == MVT::i64)
return Op;
- }
return SDValue();
}
@@ -17580,6 +18142,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Vec);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
+ auto* IdxC = dyn_cast<ConstantSDNode>(Idx);
MVT EltVT = Op.getSimpleValueType();
assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) &&
@@ -17587,7 +18150,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
// variable index can't be handled in mask registers,
// extend vector to VR512/128
- if (!isa<ConstantSDNode>(Idx)) {
+ if (!IdxC) {
unsigned NumElts = VecVT.getVectorNumElements();
// Extending v8i1/v16i1 to 512-bit get better performance on KNL
// than extending to 128/256bit.
@@ -17598,7 +18161,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
}
- unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ unsigned IdxVal = IdxC->getZExtValue();
if (IdxVal == 0) // the operation is legal
return Op;
@@ -17627,11 +18190,12 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue Vec = Op.getOperand(0);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
+ auto* IdxC = dyn_cast<ConstantSDNode>(Idx);
if (VecVT.getVectorElementType() == MVT::i1)
return ExtractBitFromMaskVector(Op, DAG, Subtarget);
- if (!isa<ConstantSDNode>(Idx)) {
+ if (!IdxC) {
// Its more profitable to go through memory (1 cycles throughput)
// than using VMOVD + VPERMV/PSHUFB sequence ( 2/3 cycles throughput)
// IACA tool was used to get performance estimation
@@ -17665,7 +18229,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return SDValue();
}
- unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ unsigned IdxVal = IdxC->getZExtValue();
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
@@ -17697,9 +18261,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec), Idx));
- // Transform it so it match pextrw which produces a 32-bit result.
- SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
- Op.getOperand(0), Op.getOperand(1));
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Vec, Idx);
return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract);
}
@@ -17789,9 +18351,7 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG,
// Copy into a k-register, extract to v1i1 and insert_subvector.
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Elt);
-
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, Vec, EltInVec,
- Op.getOperand(2));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, Vec, EltInVec, Idx);
}
SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
@@ -17864,11 +18424,22 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
// This will be just movd/movq/movss/movsd.
- if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode()) &&
- (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
- EltVT == MVT::i64)) {
- N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
- return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
+ if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode())) {
+ if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
+ EltVT == MVT::i64) {
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
+ return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
+ }
+
+ // We can't directly insert an i8 or i16 into a vector, so zero extend
+ // it to i32 first.
+ if (EltVT == MVT::i16 || EltVT == MVT::i8) {
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, N1);
+ MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, N1);
+ N1 = getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
+ return DAG.getBitcast(VT, N1);
+ }
}
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
@@ -17981,12 +18552,8 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
- SDValue Idx = Op.getOperand(1);
-
- if (!isa<ConstantSDNode>(Idx))
- return SDValue();
+ uint64_t IdxVal = Op.getConstantOperandVal(1);
- unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
if (IdxVal == 0) // the operation is legal
return Op;
@@ -18045,7 +18612,7 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result = DAG.getTargetConstantPool(
- CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), OpFlag);
+ CP->getConstVal(), PtrVT, CP->getAlign(), CP->getOffset(), OpFlag);
SDLoc DL(CP);
Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
@@ -18554,25 +19121,47 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
Op0, Op1, Amt);
}
-
- assert((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
- "Unexpected funnel shift type!");
+ assert(
+ (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+ "Unexpected funnel shift type!");
// Expand slow SHLD/SHRD cases if we are not optimizing for size.
bool OptForSize = DAG.shouldOptForSize();
- if (!OptForSize && Subtarget.isSHLDSlow())
- return SDValue();
+ bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow();
- if (IsFSHR)
- std::swap(Op0, Op1);
+ // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z & (bw-1))) >> bw.
+ // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))).
+ if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) &&
+ !isa<ConstantSDNode>(Amt)) {
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType());
+ SDValue HiShift = DAG.getConstant(EltSizeInBits, DL, Amt.getValueType());
+ Op0 = DAG.getAnyExtOrTrunc(Op0, DL, MVT::i32);
+ Op1 = DAG.getZExtOrTrunc(Op1, DL, MVT::i32);
+ Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, Mask);
+ SDValue Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, HiShift);
+ Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, Op1);
+ if (IsFSHR) {
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, Amt);
+ } else {
+ Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Res, Amt);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, HiShift);
+ }
+ return DAG.getZExtOrTrunc(Res, DL, VT);
+ }
+
+ if (VT == MVT::i8 || ExpandFunnel)
+ return SDValue();
// i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
- if (VT == MVT::i16)
+ if (VT == MVT::i16) {
Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
DAG.getConstant(15, DL, Amt.getValueType()));
+ unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL);
+ return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt);
+ }
- unsigned SHDOp = (IsFSHR ? X86ISD::SHRD : X86ISD::SHLD);
- return DAG.getNode(SHDOp, DL, VT, Op0, Op1, Amt);
+ return Op;
}
// Try to use a packed vector operation to handle i64 on 32-bit targets when
@@ -18682,6 +19271,56 @@ static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, DL));
}
+/// Given a scalar cast to FP with a cast to integer operand (almost an ftrunc),
+/// try to vectorize the cast ops. This will avoid an expensive round-trip
+/// between XMM and GPR.
+static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // TODO: Allow FP_TO_UINT.
+ SDValue CastToInt = CastToFP.getOperand(0);
+ MVT VT = CastToFP.getSimpleValueType();
+ if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector())
+ return SDValue();
+
+ MVT IntVT = CastToInt.getSimpleValueType();
+ SDValue X = CastToInt.getOperand(0);
+ MVT SrcVT = X.getSimpleValueType();
+ if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
+ return SDValue();
+
+ // See if we have 128-bit vector cast instructions for this type of cast.
+ // We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
+ if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
+ IntVT != MVT::i32)
+ return SDValue();
+
+ unsigned SrcSize = SrcVT.getSizeInBits();
+ unsigned IntSize = IntVT.getSizeInBits();
+ unsigned VTSize = VT.getSizeInBits();
+ MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize);
+ MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize);
+ MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize);
+
+ // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
+ unsigned ToIntOpcode =
+ SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
+ unsigned ToFPOpcode =
+ IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
+
+ // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
+ //
+ // We are not defining the high elements (for example, zero them) because
+ // that could nullify any performance advantage that we hoped to gain from
+ // this vector op hack. We do not expect any adverse effects (like denorm
+ // penalties) with cast ops.
+ SDLoc DL(CastToFP);
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
+ SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecSrcVT, X);
+ SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX);
+ SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecVT, VCastToInt);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx);
+}
+
static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(Op);
@@ -18739,15 +19378,15 @@ static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG,
SmallVector<SDValue, 4> SignCvts(4);
SmallVector<SDValue, 4> Chains(4);
for (int i = 0; i != 4; ++i) {
- SDValue Src = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, SignSrc,
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, SignSrc,
DAG.getIntPtrConstant(i, DL));
if (IsStrict) {
SignCvts[i] =
DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {MVT::f32, MVT::Other},
- {Op.getOperand(0), Src});
+ {Op.getOperand(0), Elt});
Chains[i] = SignCvts[i].getValue(1);
} else {
- SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, DL, MVT::f32, Src);
+ SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, DL, MVT::f32, Elt);
}
}
SDValue SignCvt = DAG.getBuildVector(VT, DL, SignCvts);
@@ -18784,6 +19423,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
+ if (SDValue R = lowerFPToIntToFP(Op, DAG, Subtarget))
+ return R;
+
if (SrcVT.isVector()) {
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
// Note: Since v2f64 is a legal type. We don't need to zero extend the
@@ -18832,21 +19474,23 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
SDValue ValueToStore = Src;
- if (SrcVT == MVT::i64 && UseSSEReg && !Subtarget.is64Bit())
+ if (SrcVT == MVT::i64 && Subtarget.hasSSE2() && !Subtarget.is64Bit())
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
// with two 32-bit stores.
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
- unsigned Size = SrcVT.getSizeInBits()/8;
+ unsigned Size = SrcVT.getStoreSize();
+ Align Alignment(Size);
MachineFunction &MF = DAG.getMachineFunction();
auto PtrVT = getPointerTy(MF.getDataLayout());
- int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false);
+ int SSFI = MF.getFrameInfo().CreateStackObject(Size, Alignment, false);
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- Chain = DAG.getStore(
- Chain, dl, ValueToStore, StackSlot,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
- std::pair<SDValue, SDValue> Tmp = BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+ Chain = DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Alignment);
+ std::pair<SDValue, SDValue> Tmp =
+ BuildFILD(VT, SrcVT, dl, Chain, StackSlot, MPI, Alignment, DAG);
if (IsStrict)
return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
@@ -18854,58 +19498,40 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
return Tmp.first;
}
-std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
- SDValue StackSlot,
- SelectionDAG &DAG) const {
+std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(
+ EVT DstVT, EVT SrcVT, const SDLoc &DL, SDValue Chain, SDValue Pointer,
+ MachinePointerInfo PtrInfo, Align Alignment, SelectionDAG &DAG) const {
// Build the FILD
- SDLoc DL(Op);
SDVTList Tys;
- bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
+ bool useSSE = isScalarFPTypeInSSEReg(DstVT);
if (useSSE)
- Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
+ Tys = DAG.getVTList(MVT::f80, MVT::Other);
else
- Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+ Tys = DAG.getVTList(DstVT, MVT::Other);
- unsigned ByteSize = SrcVT.getSizeInBits() / 8;
-
- FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
- MachineMemOperand *LoadMMO;
- if (FI) {
- int SSFI = FI->getIndex();
- LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
- MachineMemOperand::MOLoad, ByteSize, ByteSize);
- } else {
- LoadMMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
- StackSlot = StackSlot.getOperand(1);
- }
- SDValue FILDOps[] = {Chain, StackSlot};
+ SDValue FILDOps[] = {Chain, Pointer};
SDValue Result =
- DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, DL,
- Tys, FILDOps, SrcVT, LoadMMO);
+ DAG.getMemIntrinsicNode(X86ISD::FILD, DL, Tys, FILDOps, SrcVT, PtrInfo,
+ Alignment, MachineMemOperand::MOLoad);
Chain = Result.getValue(1);
if (useSSE) {
- SDValue InFlag = Result.getValue(2);
-
- // FIXME: Currently the FST is glued to the FILD_FLAG. This
- // shouldn't be necessary except that RFP cannot be live across
- // multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
- unsigned SSFISize = Op.getValueSizeInBits() / 8;
- int SSFI = MF.getFrameInfo().CreateStackObject(SSFISize, SSFISize, false);
+ unsigned SSFISize = DstVT.getStoreSize();
+ int SSFI =
+ MF.getFrameInfo().CreateStackObject(SSFISize, Align(SSFISize), false);
auto PtrVT = getPointerTy(MF.getDataLayout());
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
Tys = DAG.getVTList(MVT::Other);
- SDValue FSTOps[] = {Chain, Result, StackSlot, InFlag};
+ SDValue FSTOps[] = {Chain, Result, StackSlot};
MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
- MachineMemOperand::MOStore, SSFISize, SSFISize);
+ MachineMemOperand::MOStore, SSFISize, Align(SSFISize));
- Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, FSTOps,
- Op.getValueType(), StoreMMO);
+ Chain =
+ DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, FSTOps, DstVT, StoreMMO);
Result = DAG.getLoad(
- Op.getValueType(), DL, Chain, StackSlot,
+ DstVT, DL, Chain, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
Chain = Result.getValue(1);
}
@@ -18948,7 +19574,7 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
Constant *C0 = ConstantDataVector::get(*Context, CV0);
auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
- SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16);
+ SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, Align(16));
SmallVector<Constant*,2> CV1;
CV1.push_back(
@@ -18958,7 +19584,7 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
- SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);
+ SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, Align(16));
// Load the 64-bit value into an XMM register.
SDValue XR1 =
@@ -19163,13 +19789,13 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
*DAG.getContext(),
APFloat(APFloat::IEEEdouble(), APInt(64, 0x4330000000000000ULL)));
auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
- SDValue CPIdx = DAG.getConstantPool(Bias, PtrVT, /*Alignment*/ 8);
+ SDValue CPIdx = DAG.getConstantPool(Bias, PtrVT, Align(8));
SDVTList Tys = DAG.getVTList(MVT::v4f64, MVT::Other);
SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
SDValue VBias = DAG.getMemIntrinsicNode(
X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- /*Alignment*/ 8, MachineMemOperand::MOLoad);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(8),
+ MachineMemOperand::MOLoad);
SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v4i64, ZExtIn,
DAG.getBitcast(MVT::v4i64, VBias));
@@ -19337,15 +19963,18 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
return SDValue();
// Make a 64-bit buffer, and use it to build an FILD.
- SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64, 8);
+ int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI);
if (SrcVT == MVT::i32) {
SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
SDValue Store1 =
- DAG.getStore(Chain, dl, Src, StackSlot, MachinePointerInfo());
+ DAG.getStore(Chain, dl, Src, StackSlot, MPI, 8 /*Align*/);
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
- OffsetSlot, MachinePointerInfo());
+ OffsetSlot, MPI.getWithOffset(4), 4);
std::pair<SDValue, SDValue> Tmp =
- BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, Align(8), DAG);
if (IsStrict)
return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
@@ -19361,21 +19990,17 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
}
SDValue Store =
- DAG.getStore(Chain, dl, ValueToStore, StackSlot, MachinePointerInfo());
+ DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Align(8));
// For i64 source, we need to add the appropriate power of 2 if the input
// was negative. This is the same as the optimization in
// DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
// we must be careful to do the computation in x87 extended precision, not
// in SSE. (The generic code can't know it's OK to do this, or how to.)
- int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
- MachineMemOperand::MOLoad, 8, 8);
-
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot };
- SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
- MVT::i64, MMO);
+ SDValue Fild =
+ DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MPI,
+ Align(8), MachineMemOperand::MOLoad);
Chain = Fild.getValue(1);
@@ -19388,6 +20013,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
APInt FF(64, 0x5F80000000000000ULL);
SDValue FudgePtr = DAG.getConstantPool(
ConstantInt::get(*DAG.getContext(), FF), PtrVT);
+ Align CPAlignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlign();
// Get a pointer to FF if the sign bit was set, or to 0 otherwise.
SDValue Zero = DAG.getIntPtrConstant(0, dl);
@@ -19399,7 +20025,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue Fudge = DAG.getExtLoad(
ISD::EXTLOAD, dl, MVT::f80, Chain, FudgePtr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
- /* Alignment = */ 4);
+ CPAlignment);
Chain = Fudge.getValue(1);
// Extend everything to 80 bits to force it to be done on x87.
// TODO: Are there any fast-math-flags to propagate here?
@@ -19462,7 +20088,8 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
// stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getStoreSize();
- int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
+ int SSFI =
+ MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
@@ -19537,20 +20164,20 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI);
- SDVTList Tys = DAG.getVTList(TheVT, MVT::Other);
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Chain, StackSlot };
unsigned FLDSize = TheVT.getStoreSize();
assert(FLDSize <= MemSize && "Stack slot not big enough");
MachineMemOperand *MMO = MF.getMachineMemOperand(
- MPI, MachineMemOperand::MOLoad, FLDSize, FLDSize);
+ MPI, MachineMemOperand::MOLoad, FLDSize, Align(FLDSize));
Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, TheVT, MMO);
Chain = Value.getValue(1);
}
// Build the FP_TO_INT*_IN_MEM
MachineMemOperand *MMO = MF.getMachineMemOperand(
- MPI, MachineMemOperand::MOStore, MemSize, MemSize);
+ MPI, MachineMemOperand::MOStore, MemSize, Align(MemSize));
SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL,
DAG.getVTList(MVT::Other),
@@ -19590,14 +20217,9 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
unsigned ExtendInVecOpc = getOpcode_EXTEND_VECTOR_INREG(Opc);
- // Custom legalize v8i8->v8i64 on CPUs without avx512bw.
- if (InVT == MVT::v8i8) {
- if (VT != MVT::v8i64)
- return SDValue();
-
- In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
- MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
- return DAG.getNode(ExtendInVecOpc, dl, VT, In);
+ if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {
+ assert(InVT == MVT::v32i8 && "Unexpected VT!");
+ return splitVectorIntUnary(Op, DAG);
}
if (Subtarget.hasInt256())
@@ -19729,7 +20351,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
"Unexpected PACK opcode");
assert(DstVT.isVector() && "VT not a vector?");
- // Requires SSE2 but AVX512 has fast vector truncate.
+ // Requires SSE2 for PACKSS (SSE41 PACKUSDW is handled below).
if (!Subtarget.hasSSE2())
return SDValue();
@@ -19770,15 +20392,14 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
InVT = EVT::getVectorVT(Ctx, InVT, 128 / InVT.getSizeInBits());
OutVT = EVT::getVectorVT(Ctx, OutVT, 128 / OutVT.getSizeInBits());
In = DAG.getBitcast(InVT, In);
- SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, In);
+ SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, DAG.getUNDEF(InVT));
Res = extractSubVector(Res, 0, DAG, DL, 64);
return DAG.getBitcast(DstVT, Res);
}
- // Extract lower/upper subvectors.
- unsigned NumSubElts = NumElems / 2;
- SDValue Lo = extractSubVector(In, 0 * NumSubElts, DAG, DL, SrcSizeInBits / 2);
- SDValue Hi = extractSubVector(In, 1 * NumSubElts, DAG, DL, SrcSizeInBits / 2);
+ // Split lower/upper subvectors.
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = splitVector(In, DAG, DL);
unsigned SubSizeInBits = SrcSizeInBits / 2;
InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
@@ -19804,7 +20425,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
// Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits.
SmallVector<int, 64> Mask;
int Scale = 64 / OutVT.getScalarSizeInBits();
- scaleShuffleMask<int>(Scale, ArrayRef<int>({ 0, 2, 1, 3 }), Mask);
+ narrowShuffleMaskElts(Scale, { 0, 2, 1, 3 }, Mask);
Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask);
if (DstVT.is256BitVector())
@@ -19818,7 +20439,7 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
// Recursively pack lower/upper subvectors, concat result and pack again.
assert(SrcSizeInBits >= 256 && "Expected 256-bit vector or greater");
- EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumSubElts);
+ EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
Lo = truncateVectorWithPACK(Opcode, PackedVT, Lo, DL, DAG, Subtarget);
Hi = truncateVectorWithPACK(Opcode, PackedVT, Hi, DL, DAG, Subtarget);
@@ -19865,17 +20486,22 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
// trying to avoid 512-bit vectors. If we are avoiding 512-bit vectors
// we need to split into two 8 element vectors which we can extend to v8i32,
// truncate and concat the results. There's an additional complication if
- // the original type is v16i8. In that case we can't split the v16i8 so
- // first we pre-extend it to v16i16 which we can split to v8i16, then extend
- // to v8i32, truncate that to v8i1 and concat the two halves.
+ // the original type is v16i8. In that case we can't split the v16i8
+ // directly, so we need to shuffle high elements to low and use
+ // sign_extend_vector_inreg.
if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) {
+ SDValue Lo, Hi;
if (InVT == MVT::v16i8) {
- // First we need to sign extend up to 256-bits so we can split that.
- InVT = MVT::v16i16;
- In = DAG.getNode(ISD::SIGN_EXTEND, DL, InVT, In);
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, In);
+ Hi = DAG.getVectorShuffle(
+ InVT, DL, In, In,
+ {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, Hi);
+ } else {
+ assert(InVT == MVT::v16i16 && "Unexpected VT!");
+ Lo = extract128BitVector(In, 0, DAG, DL);
+ Hi = extract128BitVector(In, 8, DAG, DL);
}
- SDValue Lo = extract128BitVector(In, 0, DAG, DL);
- SDValue Hi = extract128BitVector(In, 8, DAG, DL);
// We're split now, just emit two truncates and a concat. The two
// truncates will trigger legalization to come back to this function.
Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Lo);
@@ -19918,7 +20544,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
if (!TLI.isTypeLegal(InVT)) {
if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) &&
VT.is128BitVector()) {
- assert(Subtarget.hasVLX() && "Unexpected subtarget!");
+ assert((InVT == MVT::v16i64 || Subtarget.hasVLX()) &&
+ "Unexpected subtarget!");
// The default behavior is to truncate one step, concatenate, and then
// truncate the remainder. We'd rather produce two 64-bit results and
// concatenate those.
@@ -19942,6 +20569,11 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// vpmovqb/w/d, vpmovdb/w, vpmovwb
if (Subtarget.hasAVX512()) {
+ if (InVT == MVT::v32i16 && !Subtarget.hasBWI()) {
+ assert(VT == MVT::v32i8 && "Unexpected VT!");
+ return splitVectorIntUnary(Op, DAG);
+ }
+
// word to byte only under BWI. Otherwise we have to promoted to v16i32
// and then truncate that. But we should only do that if we haven't been
// asked to avoid 512-bit vectors. The actual promotion to v16i32 will be
@@ -20174,6 +20806,25 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
}
if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) {
+ if (!Subtarget.hasVLX()) {
+ // Non-strict nodes without VLX can we widened to v4f32->v4i64 by type
+ // legalizer and then widened again by vector op legalization.
+ if (!IsStrict)
+ return SDValue();
+
+ SDValue Zero = DAG.getConstantFP(0.0, dl, MVT::v2f32);
+ SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32,
+ {Src, Zero, Zero, Zero});
+ Tmp = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
+ {Op->getOperand(0), Tmp});
+ SDValue Chain = Tmp.getValue(1);
+ Tmp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Tmp,
+ DAG.getIntPtrConstant(0, dl));
+ if (IsStrict)
+ return DAG.getMergeValues({Tmp, Chain}, dl);
+ return Tmp;
+ }
+
assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL");
SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));
@@ -20281,6 +20932,62 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
}
+SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+
+ // If the source is in an SSE register, the node is Legal.
+ if (isScalarFPTypeInSSEReg(SrcVT))
+ return Op;
+
+ return LRINT_LLRINTHelper(Op.getNode(), DAG);
+}
+
+SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N,
+ SelectionDAG &DAG) const {
+ EVT DstVT = N->getValueType(0);
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ if (SrcVT != MVT::f32 && SrcVT != MVT::f64 && SrcVT != MVT::f80) {
+ // f16 must be promoted before using the lowering in this routine.
+ // fp128 does not use this lowering.
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ SDValue Chain = DAG.getEntryNode();
+
+ bool UseSSE = isScalarFPTypeInSSEReg(SrcVT);
+
+ // If we're converting from SSE, the stack slot needs to hold both types.
+ // Otherwise it only needs to hold the DstVT.
+ EVT OtherVT = UseSSE ? SrcVT : DstVT;
+ SDValue StackPtr = DAG.CreateStackTemporary(DstVT, OtherVT);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+
+ if (UseSSE) {
+ assert(DstVT == MVT::i64 && "Invalid LRINT/LLRINT to lower!");
+ Chain = DAG.getStore(Chain, DL, Src, StackPtr, MPI);
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+ SDValue Ops[] = { Chain, StackPtr };
+
+ Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI,
+ /*Align*/ None, MachineMemOperand::MOLoad);
+ Chain = Src.getValue(1);
+ }
+
+ SDValue StoreOps[] = { Chain, Src, StackPtr };
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL, DAG.getVTList(MVT::Other),
+ StoreOps, DstVT, MPI, /*Align*/ None,
+ MachineMemOperand::MOStore);
+
+ return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI);
+}
+
SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
@@ -20333,6 +21040,67 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return Tmp.first;
}
+static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ assert(Src.getValueType() == MVT::i16 && Op.getValueType() == MVT::f32 &&
+ "Unexpected VT!");
+
+ SDLoc dl(Op);
+ SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16,
+ DAG.getConstant(0, dl, MVT::v8i16), Src,
+ DAG.getIntPtrConstant(0, dl));
+
+ SDValue Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {MVT::v4f32, MVT::Other},
+ {Op.getOperand(0), Res});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res);
+ }
+
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+ DAG.getIntPtrConstant(0, dl));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+
+ return Res;
+}
+
+static SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) {
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ assert(Src.getValueType() == MVT::f32 && Op.getValueType() == MVT::i16 &&
+ "Unexpected VT!");
+
+ SDLoc dl(Op);
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4f32,
+ DAG.getConstantFP(0, dl, MVT::v4f32), Src,
+ DAG.getIntPtrConstant(0, dl));
+ Res = DAG.getNode(
+ X86ISD::STRICT_CVTPS2PH, dl, {MVT::v8i16, MVT::Other},
+ {Op.getOperand(0), Res, DAG.getTargetConstant(4, dl, MVT::i32)});
+ Chain = Res.getValue(1);
+ } else {
+ // FIXME: Should we use zeros for upper elements for non-strict?
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, Src);
+ Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res,
+ DAG.getTargetConstant(4, dl, MVT::i32));
+ }
+
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Res,
+ DAG.getIntPtrConstant(0, dl));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+
+ return Res;
+}
+
/// Depending on uarch and/or optimizing for size, we might prefer to use a
/// vector operation in place of the typical scalar operation.
static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
@@ -20413,6 +21181,30 @@ SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
}
+/// ISD::FROUND is defined to round to nearest with ties rounding away from 0.
+/// This mode isn't supported in hardware on X86. But as long as we aren't
+/// compiling with trapping math, we can emulate this with
+/// floor(X + copysign(nextafter(0.5, 0.0), X)).
+static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) {
+ SDValue N0 = Op.getOperand(0);
+ SDLoc dl(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ // N0 += copysign(nextafter(0.5, 0.0), N0)
+ const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
+ bool Ignored;
+ APFloat Point5Pred = APFloat(0.5f);
+ Point5Pred.convert(Sem, APFloat::rmNearestTiesToEven, &Ignored);
+ Point5Pred.next(/*nextDown*/true);
+
+ SDValue Adder = DAG.getNode(ISD::FCOPYSIGN, dl, VT,
+ DAG.getConstantFP(Point5Pred, dl, VT), N0);
+ N0 = DAG.getNode(ISD::FADD, dl, VT, N0, Adder);
+
+ // Truncate the result to remove fraction.
+ return DAG.getNode(ISD::FTRUNC, dl, VT, N0);
+}
+
/// The only differences between FABS and FNEG are the mask and the logic op.
/// FNEG also has a folding opportunity for FNEG(FABS(x)).
static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
@@ -20568,9 +21360,12 @@ static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
}
/// Helper for matching OR(EXTRACTELT(X,0),OR(EXTRACTELT(X,1),...))
-/// style scalarized (associative) reduction patterns.
+/// style scalarized (associative) reduction patterns. Partial reductions
+/// are supported when the pointer SrcMask is non-null.
+/// TODO - move this to SelectionDAG?
static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
- SmallVectorImpl<SDValue> &SrcOps) {
+ SmallVectorImpl<SDValue> &SrcOps,
+ SmallVectorImpl<APInt> *SrcMask = nullptr) {
SmallVector<SDValue, 8> Opnds;
DenseMap<SDValue, APInt> SrcOpMap;
EVT VT = MVT::Other;
@@ -20598,8 +21393,8 @@ static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
return false;
// Quit if without a constant index.
- SDValue Idx = I->getOperand(1);
- if (!isa<ConstantSDNode>(Idx))
+ auto *Idx = dyn_cast<ConstantSDNode>(I->getOperand(1));
+ if (!Idx)
return false;
SDValue Src = I->getOperand(0);
@@ -20615,61 +21410,167 @@ static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
M = SrcOpMap.insert(std::make_pair(Src, EltCount)).first;
SrcOps.push_back(Src);
}
+
// Quit if element already used.
- unsigned CIdx = cast<ConstantSDNode>(Idx)->getZExtValue();
+ unsigned CIdx = Idx->getZExtValue();
if (M->second[CIdx])
return false;
M->second.setBit(CIdx);
}
- // Quit if not all elements are used.
- for (DenseMap<SDValue, APInt>::const_iterator I = SrcOpMap.begin(),
- E = SrcOpMap.end();
- I != E; ++I) {
- if (!I->second.isAllOnesValue())
- return false;
+ if (SrcMask) {
+ // Collect the source partial masks.
+ for (SDValue &SrcOp : SrcOps)
+ SrcMask->push_back(SrcOpMap[SrcOp]);
+ } else {
+ // Quit if not all elements are used.
+ for (DenseMap<SDValue, APInt>::const_iterator I = SrcOpMap.begin(),
+ E = SrcOpMap.end();
+ I != E; ++I) {
+ if (!I->second.isAllOnesValue())
+ return false;
+ }
}
return true;
}
-// Check whether an OR'd tree is PTEST-able.
-static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
+// Helper function for comparing all bits of a vector against zero.
+static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
+ const APInt &Mask,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG, X86::CondCode &X86CC) {
+ EVT VT = V.getValueType();
+ assert(Mask.getBitWidth() == VT.getScalarSizeInBits() &&
+ "Element Mask vs Vector bitwidth mismatch");
+
+ assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode");
+ X86CC = (CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE);
+
+ auto MaskBits = [&](SDValue Src) {
+ if (Mask.isAllOnesValue())
+ return Src;
+ EVT SrcVT = Src.getValueType();
+ SDValue MaskValue = DAG.getConstant(Mask, DL, SrcVT);
+ return DAG.getNode(ISD::AND, DL, SrcVT, Src, MaskValue);
+ };
+
+ // For sub-128-bit vector, cast to (legal) integer and compare with zero.
+ if (VT.getSizeInBits() < 128) {
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(IntVT))
+ return SDValue();
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ DAG.getBitcast(IntVT, MaskBits(V)),
+ DAG.getConstant(0, DL, IntVT));
+ }
+
+ // Quit if not splittable to 128/256-bit vector.
+ if (!isPowerOf2_32(VT.getSizeInBits()))
+ return SDValue();
+
+ // Split down to 128/256-bit vector.
+ unsigned TestSize = Subtarget.hasAVX() ? 256 : 128;
+ while (VT.getSizeInBits() > TestSize) {
+ auto Split = DAG.SplitVector(V, DL);
+ VT = Split.first.getValueType();
+ V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second);
+ }
+
+ bool UsePTEST = Subtarget.hasSSE41();
+ if (UsePTEST) {
+ MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ V = DAG.getBitcast(TestVT, MaskBits(V));
+ return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, V, V);
+ }
+
+ // Without PTEST, a masked v2i64 or-reduction is not faster than
+ // scalarization.
+ if (!Mask.isAllOnesValue() && VT.getScalarSizeInBits() > 32)
+ return SDValue();
+
+ V = DAG.getBitcast(MVT::v16i8, MaskBits(V));
+ V = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8, V,
+ getZeroVector(MVT::v16i8, Subtarget, DAG, DL));
+ V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32, V,
+ DAG.getConstant(0xFFFF, DL, MVT::i32));
+}
+
+// Check whether an OR'd reduction tree is PTEST-able, or if we can fallback to
+// CMP(MOVMSK(PCMPEQB(X,0))).
+static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
+ const SDLoc &DL,
const X86Subtarget &Subtarget,
SelectionDAG &DAG, SDValue &X86CC) {
- assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
+ assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode");
- if (!Subtarget.hasSSE41() || !Op->hasOneUse())
+ if (!Subtarget.hasSSE2() || !Op->hasOneUse())
return SDValue();
- SmallVector<SDValue, 8> VecIns;
- if (!matchScalarReduction(Op, ISD::OR, VecIns))
- return SDValue();
+ // Check whether we're masking/truncating an OR-reduction result, in which
+ // case track the masked bits.
+ APInt Mask = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
+ switch (Op.getOpcode()) {
+ case ISD::TRUNCATE: {
+ SDValue Src = Op.getOperand(0);
+ Mask = APInt::getLowBitsSet(Src.getScalarValueSizeInBits(),
+ Op.getScalarValueSizeInBits());
+ Op = Src;
+ break;
+ }
+ case ISD::AND: {
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Mask = Cst->getAPIntValue();
+ Op = Op.getOperand(0);
+ }
+ break;
+ }
+ }
- // Quit if not 128/256-bit vector.
- EVT VT = VecIns[0].getValueType();
- if (!VT.is128BitVector() && !VT.is256BitVector())
- return SDValue();
+ SmallVector<SDValue, 8> VecIns;
+ if (Op.getOpcode() == ISD::OR && matchScalarReduction(Op, ISD::OR, VecIns)) {
+ EVT VT = VecIns[0].getValueType();
+ assert(llvm::all_of(VecIns,
+ [VT](SDValue V) { return VT == V.getValueType(); }) &&
+ "Reduction source vector mismatch");
+
+ // Quit if less than 128-bits or not splittable to 128/256-bit vector.
+ if (VT.getSizeInBits() < 128 || !isPowerOf2_32(VT.getSizeInBits()))
+ return SDValue();
- SDLoc DL(Op);
- MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ // If more than one full vector is evaluated, OR them first before PTEST.
+ for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1;
+ Slot += 2, e += 1) {
+ // Each iteration will OR 2 nodes and append the result until there is
+ // only 1 node left, i.e. the final OR'd value of all vectors.
+ SDValue LHS = VecIns[Slot];
+ SDValue RHS = VecIns[Slot + 1];
+ VecIns.push_back(DAG.getNode(ISD::OR, DL, VT, LHS, RHS));
+ }
- // Cast all vectors into TestVT for PTEST.
- for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
- VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]);
+ X86::CondCode CCode;
+ if (SDValue V = LowerVectorAllZero(DL, VecIns.back(), CC, Mask, Subtarget,
+ DAG, CCode)) {
+ X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8);
+ return V;
+ }
+ }
- // If more than one full vector is evaluated, OR them first before PTEST.
- for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
- // Each iteration will OR 2 nodes and append the result until there is only
- // 1 node left, i.e. the final OR'd value of all vectors.
- SDValue LHS = VecIns[Slot];
- SDValue RHS = VecIns[Slot + 1];
- VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
+ if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ ISD::NodeType BinOp;
+ if (SDValue Match =
+ DAG.matchBinOpReduction(Op.getNode(), BinOp, {ISD::OR})) {
+ X86::CondCode CCode;
+ if (SDValue V =
+ LowerVectorAllZero(DL, Match, CC, Mask, Subtarget, DAG, CCode)) {
+ X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8);
+ return V;
+ }
+ }
}
- X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE,
- DL, MVT::i8);
- return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back());
+ return SDValue();
}
/// return true if \c Op has a use that doesn't just read flags.
@@ -20814,27 +21715,14 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
/// equivalent.
-static std::pair<SDValue, SDValue> EmitCmp(SDValue Op0, SDValue Op1,
- unsigned X86CC, const SDLoc &dl,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- SDValue Chain, bool IsSignaling) {
+static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ const SDLoc &dl, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
if (isNullConstant(Op1))
- return std::make_pair(EmitTest(Op0, X86CC, dl, DAG, Subtarget), Chain);
+ return EmitTest(Op0, X86CC, dl, DAG, Subtarget);
EVT CmpVT = Op0.getValueType();
- if (CmpVT.isFloatingPoint()) {
- if (Chain) {
- SDValue Res =
- DAG.getNode(IsSignaling ? X86ISD::STRICT_FCMPS : X86ISD::STRICT_FCMP,
- dl, {MVT::i32, MVT::Other}, {Chain, Op0, Op1});
- return std::make_pair(Res, Res.getValue(1));
- }
- return std::make_pair(DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1),
- SDValue());
- }
-
assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 ||
CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!");
@@ -20884,40 +21772,28 @@ static std::pair<SDValue, SDValue> EmitCmp(SDValue Op0, SDValue Op1,
Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1);
}
+ // 0-x == y --> x+y == 0
+ // 0-x != y --> x+y != 0
+ if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op0.getOperand(0)) &&
+ Op0.hasOneUse() && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
+ SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
+ SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(1), Op1);
+ return Add.getValue(1);
+ }
+
+ // x == 0-y --> x+y == 0
+ // x != 0-y --> x+y != 0
+ if (Op1.getOpcode() == ISD::SUB && isNullConstant(Op1.getOperand(0)) &&
+ Op1.hasOneUse() && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
+ SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
+ SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0, Op1.getOperand(1));
+ return Add.getValue(1);
+ }
+
// Use SUB instead of CMP to enable CSE between SUB and CMP.
SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
- return std::make_pair(Sub.getValue(1), SDValue());
-}
-
-/// Convert a comparison if required by the subtarget.
-SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
- SelectionDAG &DAG) const {
- // If the subtarget does not support the FUCOMI instruction, floating-point
- // comparisons have to be converted.
- bool IsCmp = Cmp.getOpcode() == X86ISD::CMP;
- bool IsStrictCmp = Cmp.getOpcode() == X86ISD::STRICT_FCMP ||
- Cmp.getOpcode() == X86ISD::STRICT_FCMPS;
-
- if (Subtarget.hasCMov() || (!IsCmp && !IsStrictCmp) ||
- !Cmp.getOperand(IsStrictCmp ? 1 : 0).getValueType().isFloatingPoint() ||
- !Cmp.getOperand(IsStrictCmp ? 2 : 1).getValueType().isFloatingPoint())
- return Cmp;
-
- // The instruction selector will select an FUCOM instruction instead of
- // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence
- // build an SDNode sequence that transfers the result from FPSW into EFLAGS:
- // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86any_fcmp ...)), 8))))
- SDLoc dl(Cmp);
- SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
- SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
- SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
- DAG.getConstant(8, dl, MVT::i8));
- SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
-
- // Some 64-bit targets lack SAHF support, but they do support FCOMI.
- assert(Subtarget.hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
- return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
+ return Sub.getValue(1);
}
/// Check if replacement of SQRT with RSQRT should be disabled.
@@ -21211,32 +22087,30 @@ static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
/// Break a VSETCC 256-bit integer VSETCC into two new 128 ones and then
/// concatenate the result back.
-static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
+static SDValue splitIntVSETCC(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
- assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
- "Unsupported value type for operation");
+ assert(Op.getOpcode() == ISD::SETCC && "Unsupported operation");
+ assert(Op.getOperand(0).getValueType().isInteger() &&
+ VT == Op.getOperand(0).getValueType() && "Unsupported VTs!");
- unsigned NumElems = VT.getVectorNumElements();
SDLoc dl(Op);
SDValue CC = Op.getOperand(2);
- // Extract the LHS vectors
- SDValue LHS = Op.getOperand(0);
- SDValue LHS1 = extract128BitVector(LHS, 0, DAG, dl);
- SDValue LHS2 = extract128BitVector(LHS, NumElems / 2, DAG, dl);
+ // Extract the LHS Lo/Hi vectors
+ SDValue LHS1, LHS2;
+ std::tie(LHS1, LHS2) = splitVector(Op.getOperand(0), DAG, dl);
- // Extract the RHS vectors
- SDValue RHS = Op.getOperand(1);
- SDValue RHS1 = extract128BitVector(RHS, 0, DAG, dl);
- SDValue RHS2 = extract128BitVector(RHS, NumElems / 2, DAG, dl);
+ // Extract the RHS Lo/Hi vectors
+ SDValue RHS1, RHS2;
+ std::tie(RHS1, RHS2) = splitVector(Op.getOperand(1), DAG, dl);
// Issue the operation on the smaller types and concatenate the result back
- MVT EltVT = VT.getVectorElementType();
- MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
- DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
+ DAG.getNode(ISD::SETCC, dl, LoVT, LHS1, RHS1, CC),
+ DAG.getNode(ISD::SETCC, dl, HiVT, LHS2, RHS2, CC));
}
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
@@ -21369,8 +22243,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+ // If we have a strict compare with a vXi1 result and the input is 128/256
+ // bits we can't use a masked compare unless we have VLX. If we use a wider
+ // compare like we do for non-strict, we might trigger spurious exceptions
+ // from the upper elements. Instead emit a AVX compare and convert to mask.
unsigned Opc;
- if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) {
+ if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1 &&
+ (!IsStrict || Subtarget.hasVLX() ||
+ Op0.getSimpleValueType().is512BitVector())) {
assert(VT.getVectorNumElements() <= 16);
Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM;
} else {
@@ -21466,10 +22346,19 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));
}
- // If this is SSE/AVX CMPP, bitcast the result back to integer to match the
- // result type of SETCC. The bitcast is expected to be optimized away
- // during combining/isel.
- Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);
+ if (VT.getSizeInBits() > Op.getSimpleValueType().getSizeInBits()) {
+ // We emitted a compare with an XMM/YMM result. Finish converting to a
+ // mask register using a vptestm.
+ EVT CastVT = EVT(VT).changeVectorElementTypeToInteger();
+ Cmp = DAG.getBitcast(CastVT, Cmp);
+ Cmp = DAG.getSetCC(dl, Op.getSimpleValueType(), Cmp,
+ DAG.getConstant(0, dl, CastVT), ISD::SETNE);
+ } else {
+ // If this is SSE/AVX CMPP, bitcast the result back to integer to match
+ // the result type of SETCC. The bitcast is expected to be optimized
+ // away during combining/isel.
+ Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);
+ }
if (IsStrict)
return DAG.getMergeValues({Cmp, Chain}, dl);
@@ -21563,7 +22452,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// Break 256-bit integer vector compare into smaller ones.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return Lower256IntVSETCC(Op, DAG);
+ return splitIntVSETCC(Op, DAG);
+
+ if (VT == MVT::v32i16 || VT == MVT::v64i8) {
+ assert(!Subtarget.hasBWI() && "Unexpected VT with AVX512BW!");
+ return splitIntVSETCC(Op, DAG);
+ }
// If this is a SETNE against the signed minimum value, change it to SETGT.
// If this is a SETNE against the signed maximum value, change it to SETLT.
@@ -21812,9 +22706,8 @@ static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC,
/// corresponding X86 condition code constant in X86CC.
SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
ISD::CondCode CC, const SDLoc &dl,
- SelectionDAG &DAG, SDValue &X86CC,
- SDValue &Chain,
- bool IsSignaling) const {
+ SelectionDAG &DAG,
+ SDValue &X86CC) const {
// Optimize to BT if possible.
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
@@ -21825,13 +22718,12 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
return BT;
}
- // Try to use PTEST for a tree ORs equality compared with 0.
+ // Try to use PTEST/PMOVMSKB for a tree ORs equality compared with 0.
// TODO: We could do AND tree with all 1s as well by using the C flag.
- if (Op0.getOpcode() == ISD::OR && isNullConstant(Op1) &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
- if (SDValue PTEST = LowerVectorAllZeroTest(Op0, CC, Subtarget, DAG, X86CC))
- return PTEST;
- }
+ if (isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE))
+ if (SDValue CmpZ =
+ MatchVectorAllZeroTest(Op0, CC, dl, Subtarget, DAG, X86CC))
+ return CmpZ;
// Try to lower using KORTEST or KTEST.
if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC))
@@ -21873,17 +22765,11 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
}
}
- bool IsFP = Op1.getSimpleValueType().isFloatingPoint();
- X86::CondCode CondCode = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG);
- if (CondCode == X86::COND_INVALID)
- return SDValue();
+ X86::CondCode CondCode =
+ TranslateX86CC(CC, dl, /*IsFP*/ false, Op0, Op1, DAG);
+ assert(CondCode != X86::COND_INVALID && "Unexpected condition code!");
- std::pair<SDValue, SDValue> Tmp =
- EmitCmp(Op0, Op1, CondCode, dl, DAG, Subtarget, Chain, IsSignaling);
- SDValue EFLAGS = Tmp.first;
- if (Chain)
- Chain = Tmp.second;
- EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
+ SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG, Subtarget);
X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8);
return EFLAGS;
}
@@ -21920,18 +22806,32 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
}
}
- SDValue X86CC;
- SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC, Chain,
- Op.getOpcode() == ISD::STRICT_FSETCCS);
- if (!EFLAGS)
- return SDValue();
+ if (Op0.getSimpleValueType().isInteger()) {
+ SDValue X86CC;
+ SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
+ SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
+ return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
+ }
- SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
+ // Handle floating point.
+ X86::CondCode CondCode = TranslateX86CC(CC, dl, /*IsFP*/ true, Op0, Op1, DAG);
+ if (CondCode == X86::COND_INVALID)
+ return SDValue();
- if (IsStrict)
- return DAG.getMergeValues({Res, Chain}, dl);
+ SDValue EFLAGS;
+ if (IsStrict) {
+ bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
+ EFLAGS =
+ DAG.getNode(IsSignaling ? X86ISD::STRICT_FCMPS : X86ISD::STRICT_FCMP,
+ dl, {MVT::i32, MVT::Other}, {Chain, Op0, Op1});
+ Chain = EFLAGS.getValue(1);
+ } else {
+ EFLAGS = DAG.getNode(X86ISD::FCMP, dl, MVT::i32, Op0, Op1);
+ }
- return Res;
+ SDValue X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8);
+ SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
+ return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
}
SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
@@ -21946,9 +22846,8 @@ SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const
// Recreate the carry if needed.
EVT CarryVT = Carry.getValueType();
- APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
- Carry, DAG.getConstant(NegOne, DL, CarryVT));
+ Carry, DAG.getAllOnesConstant(DL, CarryVT));
SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1));
@@ -22024,7 +22923,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getOpcode();
if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI ||
- Opc == X86ISD::SAHF)
+ Opc == X86ISD::FCMP)
return true;
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
@@ -22057,9 +22956,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
// are available or VBLENDV if AVX is available.
// Otherwise FP cmovs get lowered into a less efficient branch sequence later.
- if (Cond.getOpcode() == ISD::SETCC &&
- ((Subtarget.hasSSE2() && VT == MVT::f64) ||
- (Subtarget.hasSSE1() && VT == MVT::f32)) &&
+ if (Cond.getOpcode() == ISD::SETCC && isScalarFPTypeInSSEReg(VT) &&
VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
bool IsAlwaysSignaling;
@@ -22115,45 +23012,11 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
// AVX512 fallback is to lower selects of scalar floats to masked moves.
- if ((VT == MVT::f64 || VT == MVT::f32) && Subtarget.hasAVX512()) {
+ if (isScalarFPTypeInSSEReg(VT) && Subtarget.hasAVX512()) {
SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond);
return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
}
- // For v64i1 without 64-bit support we need to split and rejoin.
- if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
- assert(Subtarget.hasBWI() && "Expected BWI to be legal");
- SDValue Op1Lo = extractSubVector(Op1, 0, DAG, DL, 32);
- SDValue Op2Lo = extractSubVector(Op2, 0, DAG, DL, 32);
- SDValue Op1Hi = extractSubVector(Op1, 32, DAG, DL, 32);
- SDValue Op2Hi = extractSubVector(Op2, 32, DAG, DL, 32);
- SDValue Lo = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Lo, Op2Lo);
- SDValue Hi = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Hi, Op2Hi);
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
- }
-
- if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
- SDValue Op1Scalar;
- if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
- Op1Scalar = ConvertI1VectorToInteger(Op1, DAG);
- else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
- Op1Scalar = Op1.getOperand(0);
- SDValue Op2Scalar;
- if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
- Op2Scalar = ConvertI1VectorToInteger(Op2, DAG);
- else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
- Op2Scalar = Op2.getOperand(0);
- if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
- SDValue newSelect = DAG.getSelect(DL, Op1Scalar.getValueType(), Cond,
- Op1Scalar, Op2Scalar);
- if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
- return DAG.getBitcast(VT, newSelect);
- SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
- DAG.getIntPtrConstant(0, DL));
- }
- }
-
if (Cond.getOpcode() == ISD::SETCC) {
if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
Cond = NewCond;
@@ -22175,12 +23038,28 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
isNullConstant(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
+ SDValue CmpOp0 = Cmp.getOperand(0);
unsigned CondCode = Cond.getConstantOperandVal(0);
- if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
+ // Special handling for __builtin_ffs(X) - 1 pattern which looks like
+ // (select (seteq X, 0), -1, (cttz_zero_undef X)). Disable the special
+ // handle to keep the CMP with 0. This should be removed by
+ // optimizeCompareInst by using the flags from the BSR/TZCNT used for the
+ // cttz_zero_undef.
+ auto MatchFFSMinus1 = [&](SDValue Op1, SDValue Op2) {
+ return (Op1.getOpcode() == ISD::CTTZ_ZERO_UNDEF && Op1.hasOneUse() &&
+ Op1.getOperand(0) == CmpOp0 && isAllOnesConstant(Op2));
+ };
+ if (Subtarget.hasCMov() && (VT == MVT::i32 || VT == MVT::i64) &&
+ ((CondCode == X86::COND_NE && MatchFFSMinus1(Op1, Op2)) ||
+ (CondCode == X86::COND_E && MatchFFSMinus1(Op2, Op1)))) {
+ // Keep Cmp.
+ } else if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
- SDValue CmpOp0 = Cmp.getOperand(0);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SDVTList CmpVTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
// Apply further optimizations for special cases
// (select (x != 0), -1, 0) -> neg & sbb
@@ -22188,31 +23067,25 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (isNullConstant(Y) &&
(isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
- SDValue CmpZero = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, CmpVTs, Zero, CmpOp0);
Zero = DAG.getConstant(0, DL, Op.getValueType());
- return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, CmpZero);
+ return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Neg.getValue(1));
}
- Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ Cmp = DAG.getNode(X86ISD::SUB, DL, CmpVTs,
CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
- Cmp = ConvertCmpIfNecessary(Cmp, DAG);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SDValue Zero = DAG.getConstant(0, DL, Op.getValueType());
SDValue Res = // Res = 0 or -1.
- DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
+ DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp.getValue(1));
if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
Res = DAG.getNOT(DL, Res, Res.getValueType());
- if (!isNullConstant(Op2))
- Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
- return Res;
+ return DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
} else if (!Subtarget.hasCMov() && CondCode == X86::COND_E &&
Cmp.getOperand(0).getOpcode() == ISD::AND &&
isOneConstant(Cmp.getOperand(0).getOperand(1))) {
- SDValue CmpOp0 = Cmp.getOperand(0);
SDValue Src1, Src2;
// true if Op2 is XOR or OR operator and one of its operands
// is equal to Op1
@@ -22265,7 +23138,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = Cond.getOperand(1);
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
- !isScalarFPTypeInSSEReg(VT)) // FPStack?
+ !isScalarFPTypeInSSEReg(VT) && Subtarget.hasCMov()) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
@@ -22311,7 +23184,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// a >= b ? -1 : 0 -> RES = setcc_carry
// a >= b ? 0 : -1 -> RES = ~setcc_carry
if (Cond.getOpcode() == X86ISD::SUB) {
- Cond = ConvertCmpIfNecessary(Cond, DAG);
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
@@ -22333,7 +23205,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
if (T1.getValueType() == T2.getValueType() &&
- // Blacklist CopyFromReg to avoid partial register stalls.
+ // Exclude CopyFromReg to avoid partial register stalls.
T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1,
CC, Cond);
@@ -22570,14 +23442,9 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
InVT.getVectorElementType() == MVT::i32) &&
"Unexpected element type");
- // Custom legalize v8i8->v8i64 on CPUs without avx512bw.
- if (InVT == MVT::v8i8) {
- if (VT != MVT::v8i64)
- return SDValue();
-
- In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
- MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
- return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, VT, In);
+ if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {
+ assert(InVT == MVT::v32i8 && "Unexpected VT!");
+ return splitVectorIntUnary(Op, DAG);
}
if (Subtarget.hasInt256())
@@ -22620,23 +23487,19 @@ static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) {
if (!Store->isSimple())
return SDValue();
- EVT StoreVT = StoredVal.getValueType();
- unsigned NumElems = StoreVT.getVectorNumElements();
- unsigned HalfSize = StoredVal.getValueSizeInBits() / 2;
- unsigned HalfAlign = (128 == HalfSize ? 16 : 32);
-
SDLoc DL(Store);
- SDValue Value0 = extractSubVector(StoredVal, 0, DAG, DL, HalfSize);
- SDValue Value1 = extractSubVector(StoredVal, NumElems / 2, DAG, DL, HalfSize);
+ SDValue Value0, Value1;
+ std::tie(Value0, Value1) = splitVector(StoredVal, DAG, DL);
+ unsigned HalfOffset = Value0.getValueType().getStoreSize();
SDValue Ptr0 = Store->getBasePtr();
- SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, HalfAlign, DL);
- unsigned Alignment = Store->getAlignment();
+ SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, HalfOffset, DL);
SDValue Ch0 =
DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(),
- Alignment, Store->getMemOperand()->getFlags());
+ Store->getOriginalAlign(),
+ Store->getMemOperand()->getFlags());
SDValue Ch1 = DAG.getStore(Store->getChain(), DL, Value1, Ptr1,
- Store->getPointerInfo().getWithOffset(HalfAlign),
- MinAlign(Alignment, HalfAlign),
+ Store->getPointerInfo().getWithOffset(HalfOffset),
+ Store->getOriginalAlign(),
Store->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1);
}
@@ -22659,7 +23522,6 @@ static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT,
MVT StoreSVT = StoreVT.getScalarType();
unsigned NumElems = StoreVT.getVectorNumElements();
unsigned ScalarSize = StoreSVT.getStoreSize();
- unsigned Alignment = Store->getAlignment();
SDLoc DL(Store);
SmallVector<SDValue, 4> Stores;
@@ -22670,7 +23532,7 @@ static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT,
DAG.getIntPtrConstant(i, DL));
SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr,
Store->getPointerInfo().getWithOffset(Offset),
- MinAlign(Alignment, Offset),
+ Store->getOriginalAlign(),
Store->getMemOperand()->getFlags());
Stores.push_back(Ch);
}
@@ -22699,7 +23561,7 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
+ St->getPointerInfo(), St->getOriginalAlign(),
St->getMemOperand()->getFlags());
}
@@ -22711,7 +23573,9 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
// and each half can execute independently. Some cores would split the op into
// halves anyway, so the concat (vinsertf128) is purely an extra op.
MVT StoreVT = StoredVal.getSimpleValueType();
- if (StoreVT.is256BitVector()) {
+ if (StoreVT.is256BitVector() ||
+ ((StoreVT == MVT::v32i16 || StoreVT == MVT::v64i8) &&
+ !Subtarget.hasBWI())) {
SmallVector<SDValue, 4> CatOps;
if (StoredVal.hasOneUse() && collectConcatOps(StoredVal.getNode(), CatOps))
return splitVectorStore(St, DAG);
@@ -22738,7 +23602,7 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
DAG.getIntPtrConstant(0, dl));
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
+ St->getPointerInfo(), St->getOriginalAlign(),
St->getMemOperand()->getFlags());
}
assert(Subtarget.hasSSE1() && "Expected SSE");
@@ -22773,7 +23637,7 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
"Expected AVX512F without AVX512DQI");
SDValue NewLd = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), Ld->getAlignment(),
+ Ld->getPointerInfo(), Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
// Replace chain users with the new chain.
@@ -22801,163 +23665,44 @@ static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
Op.getOperand(1).hasOneUse());
}
-/// Return true if node is an ISD::XOR of a X86ISD::SETCC and 1 and that the
-/// SETCC node has a single use.
-static bool isXor1OfSetCC(SDValue Op) {
- if (Op.getOpcode() != ISD::XOR)
- return false;
- if (isOneConstant(Op.getOperand(1)))
- return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
- Op.getOperand(0).hasOneUse();
- return false;
-}
-
SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
- bool addTest = true;
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
SDValue Dest = Op.getOperand(2);
SDLoc dl(Op);
- SDValue CC;
- bool Inverted = false;
- if (Cond.getOpcode() == ISD::SETCC) {
- // Check for setcc([su]{add,sub,mul}o == 0).
- if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
- isNullConstant(Cond.getOperand(1)) &&
- Cond.getOperand(0).getResNo() == 1 &&
- (Cond.getOperand(0).getOpcode() == ISD::SADDO ||
- Cond.getOperand(0).getOpcode() == ISD::UADDO ||
- Cond.getOperand(0).getOpcode() == ISD::SSUBO ||
- Cond.getOperand(0).getOpcode() == ISD::USUBO ||
- Cond.getOperand(0).getOpcode() == ISD::SMULO ||
- Cond.getOperand(0).getOpcode() == ISD::UMULO)) {
- Inverted = true;
- Cond = Cond.getOperand(0);
- } else {
- if (SDValue NewCond = LowerSETCC(Cond, DAG))
- Cond = NewCond;
- }
- }
-#if 0
- // FIXME: LowerXALUO doesn't handle these!!
- else if (Cond.getOpcode() == X86ISD::ADD ||
- Cond.getOpcode() == X86ISD::SUB ||
- Cond.getOpcode() == X86ISD::SMUL ||
- Cond.getOpcode() == X86ISD::UMUL)
- Cond = LowerXALUO(Cond, DAG);
-#endif
+ if (Cond.getOpcode() == ISD::SETCC &&
+ Cond.getOperand(0).getValueType() != MVT::f128) {
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
- // Look pass (and (setcc_carry (cmp ...)), 1).
- if (Cond.getOpcode() == ISD::AND &&
- Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
- isOneConstant(Cond.getOperand(1)))
- Cond = Cond.getOperand(0);
+ // Special case for
+ // setcc([su]{add,sub,mul}o == 0)
+ // setcc([su]{add,sub,mul}o != 1)
+ if (ISD::isOverflowIntrOpRes(LHS) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ (isNullConstant(RHS) || isOneConstant(RHS))) {
+ SDValue Value, Overflow;
+ X86::CondCode X86Cond;
+ std::tie(Value, Overflow) = getX86XALUOOp(X86Cond, LHS.getValue(0), DAG);
- // If condition flag is set by a X86ISD::CMP, then use it as the condition
- // setting operand in place of the X86ISD::SETCC.
- unsigned CondOpcode = Cond.getOpcode();
- if (CondOpcode == X86ISD::SETCC ||
- CondOpcode == X86ISD::SETCC_CARRY) {
- CC = Cond.getOperand(0);
+ if ((CC == ISD::SETEQ) == isNullConstant(RHS))
+ X86Cond = X86::GetOppositeBranchCondition(X86Cond);
- SDValue Cmp = Cond.getOperand(1);
- unsigned Opc = Cmp.getOpcode();
- // FIXME: WHY THE SPECIAL CASING OF LogicalCmp??
- if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
- Cond = Cmp;
- addTest = false;
- } else {
- switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
- default: break;
- case X86::COND_O:
- case X86::COND_B:
- // These can only come from an arithmetic instruction with overflow,
- // e.g. SADDO, UADDO.
- Cond = Cond.getOperand(1);
- addTest = false;
- break;
- }
+ SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
+ return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ Overflow);
}
- }
- CondOpcode = Cond.getOpcode();
- if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
- CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
- CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) {
- SDValue Value;
- X86::CondCode X86Cond;
- std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG);
- if (Inverted)
- X86Cond = X86::GetOppositeBranchCondition(X86Cond);
+ if (LHS.getSimpleValueType().isInteger()) {
+ SDValue CCVal;
+ SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, CC, SDLoc(Cond), DAG, CCVal);
+ return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ EFLAGS);
+ }
- CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
- addTest = false;
- } else {
- unsigned CondOpc;
- if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
- SDValue Cmp = Cond.getOperand(0).getOperand(1);
- if (CondOpc == ISD::OR) {
- // Also, recognize the pattern generated by an FCMP_UNE. We can emit
- // two branches instead of an explicit OR instruction with a
- // separate test.
- if (Cmp == Cond.getOperand(1).getOperand(1) &&
- isX86LogicalCmp(Cmp)) {
- CC = Cond.getOperand(0).getOperand(0);
- Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
- Chain, Dest, CC, Cmp);
- CC = Cond.getOperand(1).getOperand(0);
- Cond = Cmp;
- addTest = false;
- }
- } else { // ISD::AND
- // Also, recognize the pattern generated by an FCMP_OEQ. We can emit
- // two branches instead of an explicit AND instruction with a
- // separate test. However, we only do this if this block doesn't
- // have a fall-through edge, because this requires an explicit
- // jmp when the condition is false.
- if (Cmp == Cond.getOperand(1).getOperand(1) &&
- isX86LogicalCmp(Cmp) &&
- Op.getNode()->hasOneUse()) {
- X86::CondCode CCode0 =
- (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
- CCode0 = X86::GetOppositeBranchCondition(CCode0);
- CC = DAG.getTargetConstant(CCode0, dl, MVT::i8);
- SDNode *User = *Op.getNode()->use_begin();
- // Look for an unconditional branch following this conditional branch.
- // We need this because we need to reverse the successors in order
- // to implement FCMP_OEQ.
- if (User->getOpcode() == ISD::BR) {
- SDValue FalseBB = User->getOperand(1);
- SDNode *NewBR =
- DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
- assert(NewBR == User);
- (void)NewBR;
- Dest = FalseBB;
-
- Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain,
- Dest, CC, Cmp);
- X86::CondCode CCode1 =
- (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
- CCode1 = X86::GetOppositeBranchCondition(CCode1);
- CC = DAG.getTargetConstant(CCode1, dl, MVT::i8);
- Cond = Cmp;
- addTest = false;
- }
- }
- }
- } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
- // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
- // It should be transformed during dag combiner except when the condition
- // is set by a arithmetics with overflow node.
- X86::CondCode CCode =
- (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
- CCode = X86::GetOppositeBranchCondition(CCode);
- CC = DAG.getTargetConstant(CCode, dl, MVT::i8);
- Cond = Cond.getOperand(0).getOperand(1);
- addTest = false;
- } else if (Cond.getOpcode() == ISD::SETCC &&
- cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETOEQ) {
+ if (CC == ISD::SETOEQ) {
// For FCMP_OEQ, we can emit
// two branches instead of an explicit AND instruction with a
// separate test. However, we only do this if this block doesn't
@@ -22976,59 +23721,65 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
(void)NewBR;
Dest = FalseBB;
- SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
- Cond.getOperand(0), Cond.getOperand(1));
- Cmp = ConvertCmpIfNecessary(Cmp, DAG);
- CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
- Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
- Chain, Dest, CC, Cmp);
- CC = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8);
- Cond = Cmp;
- addTest = false;
+ SDValue Cmp =
+ DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS);
+ SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest,
+ CCVal, Cmp);
+ CCVal = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8);
+ return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ Cmp);
}
}
- } else if (Cond.getOpcode() == ISD::SETCC &&
- cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
+ } else if (CC == ISD::SETUNE) {
// For FCMP_UNE, we can emit
// two branches instead of an explicit OR instruction with a
// separate test.
- SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
- Cond.getOperand(0), Cond.getOperand(1));
- Cmp = ConvertCmpIfNecessary(Cmp, DAG);
- CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
- Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
- Chain, Dest, CC, Cmp);
- CC = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8);
- Cond = Cmp;
- addTest = false;
+ SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS);
+ SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8);
+ Chain =
+ DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, Cmp);
+ CCVal = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8);
+ return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ Cmp);
+ } else {
+ X86::CondCode X86Cond =
+ TranslateX86CC(CC, dl, /*IsFP*/ true, LHS, RHS, DAG);
+ SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS);
+ SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
+ return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ Cmp);
}
}
- if (addTest) {
- // Look pass the truncate if the high bits are known zero.
- if (isTruncWithZeroHighBitsInput(Cond, DAG))
- Cond = Cond.getOperand(0);
+ if (ISD::isOverflowIntrOpRes(Cond)) {
+ SDValue Value, Overflow;
+ X86::CondCode X86Cond;
+ std::tie(Value, Overflow) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG);
- // We know the result of AND is compared against zero. Try to match
- // it to BT.
- if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- SDValue BTCC;
- if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, dl, DAG, BTCC)) {
- CC = BTCC;
- Cond = BT;
- addTest = false;
- }
- }
+ SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
+ return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ Overflow);
}
- if (addTest) {
- X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
- CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8);
- Cond = EmitTest(Cond, X86Cond, dl, DAG, Subtarget);
- }
- Cond = ConvertCmpIfNecessary(Cond, DAG);
- return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
- Chain, Dest, CC, Cond);
+ // Look past the truncate if the high bits are known zero.
+ if (isTruncWithZeroHighBitsInput(Cond, DAG))
+ Cond = Cond.getOperand(0);
+
+ EVT CondVT = Cond.getValueType();
+
+ // Add an AND with 1 if we don't already have one.
+ if (!(Cond.getOpcode() == ISD::AND && isOneConstant(Cond.getOperand(1))))
+ Cond =
+ DAG.getNode(ISD::AND, dl, CondVT, Cond, DAG.getConstant(1, dl, CondVT));
+
+ SDValue LHS = Cond;
+ SDValue RHS = DAG.getConstant(0, dl, CondVT);
+
+ SDValue CCVal;
+ SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, ISD::SETNE, dl, DAG, CCVal);
+ return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ EFLAGS);
}
// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
@@ -23041,9 +23792,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
- bool EmitStackProbe = !getStackProbeSymbolName(MF).empty();
+ bool EmitStackProbeCall = hasStackProbeSymbol(MF);
bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) ||
- SplitStack || EmitStackProbe;
+ SplitStack || EmitStackProbeCall;
SDLoc dl(Op);
// Get the inputs.
@@ -23067,12 +23818,22 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
- SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
- Chain = SP.getValue(1);
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
- const Align StackAlign(TFI.getStackAlignment());
- Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
- if (Alignment && Alignment > StackAlign)
+ const Align StackAlign = TFI.getStackAlign();
+ if (hasInlineStackProbe(MF)) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
+ Register Vreg = MRI.createVirtualRegister(AddrRegClass);
+ Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
+ Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain,
+ DAG.getRegister(Vreg, SPTy));
+ } else {
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ }
+ if (Alignment && *Alignment > StackAlign)
Result =
DAG.getNode(ISD::AND, dl, VT, Result,
DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT));
@@ -23203,14 +23964,13 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// Decide which area this value should be read from.
// TODO: Implement the AMD64 ABI in its entirety. This simple
// selection mechanism works only for the basic types.
- if (ArgVT == MVT::f80) {
- llvm_unreachable("va_arg for f80 not yet implemented");
- } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+ assert(ArgVT != MVT::f80 && "va_arg for f80 not yet implemented");
+ if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
- } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
- ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
} else {
- llvm_unreachable("Unhandled argument type in LowerVAARG");
+ assert(ArgVT.isInteger() && ArgSize <= 32 /*bytes*/ &&
+ "Unhandled argument type in LowerVAARG");
+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
}
if (ArgMode == 2) {
@@ -23227,11 +23987,8 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(Align, dl, MVT::i32)};
SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other);
SDValue VAARG = DAG.getMemIntrinsicNode(
- X86ISD::VAARG_64, dl,
- VTs, InstOps, MVT::i64,
- MachinePointerInfo(SV),
- /*Align=*/0,
- MachineMemOperand::MOLoad | MachineMemOperand::MOStore);
+ X86ISD::VAARG_64, dl, VTs, InstOps, MVT::i64, MachinePointerInfo(SV),
+ /*Align=*/None, MachineMemOperand::MOLoad | MachineMemOperand::MOStore);
Chain = VAARG.getValue(1);
// Load the next argument and return it
@@ -23255,9 +24012,8 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
SDLoc DL(Op);
- return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
- DAG.getIntPtrConstant(24, DL), 8, /*isVolatile*/false,
- false, false,
+ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24, DL),
+ Align(8), /*isVolatile*/ false, false, false,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
@@ -24004,8 +24760,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
+ // Some conditions require the operands to be swapped.
+ if (CC == ISD::SETLT || CC == ISD::SETLE)
+ std::swap(LHS, RHS);
+
SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
- SDValue InvComi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, RHS, LHS);
SDValue SetCC;
switch (CC) {
case ISD::SETEQ: { // (ZF = 0 and PF = 0)
@@ -24021,18 +24780,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
break;
}
case ISD::SETGT: // (CF = 0 and ZF = 0)
+ case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
break;
- case ISD::SETLT: { // The condition is opposite to GT. Swap the operands.
- SetCC = getSETCC(X86::COND_A, InvComi, dl, DAG);
- break;
}
case ISD::SETGE: // CF = 0
+ case ISD::SETLE: // Condition opposite to GE. Operands swapped above.
SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
break;
- case ISD::SETLE: // The condition is opposite to GE. Swap the operands.
- SetCC = getSETCC(X86::COND_AE, InvComi, dl, DAG);
- break;
default:
llvm_unreachable("Unexpected illegal condition!");
}
@@ -24481,6 +25236,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Clamp out of bounds shift amounts since they will otherwise be masked
// to 8-bits which may make it no longer out of bounds.
unsigned ShiftAmount = C->getAPIntValue().getLimitedValue(255);
+ if (ShiftAmount == 0)
+ return Op.getOperand(1);
+
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1),
DAG.getTargetConstant(ShiftAmount, DL, MVT::i32));
@@ -24540,19 +25298,23 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,
TLI.getPointerTy(DAG.getDataLayout()));
EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger();
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other);
// If source is undef or we know it won't be used, use a zero vector
// to break register dependency.
// TODO: use undef instead and let BreakFalseDeps deal with it?
if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
+ // Cast mask to an integer type.
+ Mask = DAG.getBitcast(MaskVT, Mask);
+
MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
- SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
- VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
- return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
+ SDValue Res =
+ DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops,
+ MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+ return DAG.getMergeValues({Res, Res.getValue(1)}, dl);
}
static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
@@ -24577,7 +25339,7 @@ static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
if (Mask.getValueType() != MaskVT)
Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other);
// If source is undef or we know it won't be used, use a zero vector
// to break register dependency.
// TODO: use undef instead and let BreakFalseDeps deal with it?
@@ -24587,9 +25349,10 @@ static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
- SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
- VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
- return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
+ SDValue Res =
+ DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops,
+ MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+ return DAG.getMergeValues({Res, Res.getValue(1)}, dl);
}
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
@@ -24615,11 +25378,12 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
- SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
+ SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale};
- SDValue Res = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
- VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
- return Res.getValue(1);
+ SDValue Res =
+ DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops,
+ MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+ return Res;
}
static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
@@ -24778,13 +25542,11 @@ static SDValue
EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val,
SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
SelectionDAG &DAG) {
-
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
SDValue Ops[] = { Chain, Val, Ptr, Undef };
- return SignedSat ?
- DAG.getTargetMemSDNode<TruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
- DAG.getTargetMemSDNode<TruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
+ unsigned Opc = SignedSat ? X86ISD::VTRUNCSTORES : X86ISD::VTRUNCSTOREUS;
+ return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO);
}
/// Emit Masked Truncating Store with signed or unsigned saturation.
@@ -24792,12 +25554,10 @@ static SDValue
EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
MachineMemOperand *MMO, SelectionDAG &DAG) {
-
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Ops[] = { Chain, Val, Ptr, Mask };
- return SignedSat ?
- DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
- DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
+ unsigned Opc = SignedSat ? X86ISD::VMTRUNCSTORES : X86ISD::VMTRUNCSTOREUS;
+ return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO);
}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
@@ -25147,7 +25907,7 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op));
}
-unsigned X86TargetLowering::getExceptionPointerRegister(
+Register X86TargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR)
return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;
@@ -25155,7 +25915,7 @@ unsigned X86TargetLowering::getExceptionPointerRegister(
return Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX;
}
-unsigned X86TargetLowering::getExceptionSelectorRegister(
+Register X86TargetLowering::getExceptionSelectorRegister(
const Constant *PersonalityFn) const {
// Funclet personalities don't use selectors (the runtime does the selection).
assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)));
@@ -25179,7 +25939,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
"Invalid Frame Register!");
SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
- unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
+ Register StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
DAG.getIntPtrConstant(RegInfo->getSlotSize(),
@@ -25393,93 +26153,51 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2 Round to +inf
3 Round to -inf
- To perform the conversion, we do:
- (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+ To perform the conversion, we use a packed lookup table of the four 2-bit
+ values that we can index by FPSP[11:10]
+ 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]
+
+ (0x2d >> ((FPSR & 0xc00) >> 9)) & 3
*/
MachineFunction &MF = DAG.getMachineFunction();
- const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
- const Align StackAlignment(TFI.getStackAlignment());
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
// Save FP Control Word to stack slot
- int SSFI =
- MF.getFrameInfo().CreateStackObject(2, StackAlignment.value(), false);
+ int SSFI = MF.getFrameInfo().CreateStackObject(2, Align(2), false);
SDValue StackSlot =
DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
- MachineMemOperand::MOStore, 2, 2);
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI);
- SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
- SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
- DAG.getVTList(MVT::Other),
- Ops, MVT::i16, MMO);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Ops[] = {Chain, StackSlot};
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
+ DAG.getVTList(MVT::Other), Ops, MVT::i16, MPI,
+ Align(2), MachineMemOperand::MOStore);
// Load FP Control Word from stack slot
- SDValue CWD =
- DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo());
+ SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI, Align(2));
+ Chain = CWD.getValue(1);
- // Transform as necessary
- SDValue CWD1 =
+ // Mask and turn the control bits into a shift for the lookup table.
+ SDValue Shift =
DAG.getNode(ISD::SRL, DL, MVT::i16,
DAG.getNode(ISD::AND, DL, MVT::i16,
- CWD, DAG.getConstant(0x800, DL, MVT::i16)),
- DAG.getConstant(11, DL, MVT::i8));
- SDValue CWD2 =
- DAG.getNode(ISD::SRL, DL, MVT::i16,
- DAG.getNode(ISD::AND, DL, MVT::i16,
- CWD, DAG.getConstant(0x400, DL, MVT::i16)),
+ CWD, DAG.getConstant(0xc00, DL, MVT::i16)),
DAG.getConstant(9, DL, MVT::i8));
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Shift);
+ SDValue LUT = DAG.getConstant(0x2d, DL, MVT::i32);
SDValue RetVal =
- DAG.getNode(ISD::AND, DL, MVT::i16,
- DAG.getNode(ISD::ADD, DL, MVT::i16,
- DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
- DAG.getConstant(1, DL, MVT::i16)),
- DAG.getConstant(3, DL, MVT::i16));
+ DAG.getNode(ISD::AND, DL, MVT::i32,
+ DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift),
+ DAG.getConstant(3, DL, MVT::i32));
- return DAG.getNode((VT.getSizeInBits() < 16 ?
- ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
-}
+ RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT);
-// Split an unary integer op into 2 half sized ops.
-static SDValue LowerVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
- unsigned NumElems = VT.getVectorNumElements();
- unsigned SizeInBits = VT.getSizeInBits();
- MVT EltVT = VT.getVectorElementType();
- SDValue Src = Op.getOperand(0);
- assert(EltVT == Src.getSimpleValueType().getVectorElementType() &&
- "Src and Op should have the same element type!");
-
- // Extract the Lo/Hi vectors
- SDLoc dl(Op);
- SDValue Lo = extractSubVector(Src, 0, DAG, dl, SizeInBits / 2);
- SDValue Hi = extractSubVector(Src, NumElems / 2, DAG, dl, SizeInBits / 2);
-
- MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- DAG.getNode(Op.getOpcode(), dl, NewVT, Lo),
- DAG.getNode(Op.getOpcode(), dl, NewVT, Hi));
-}
-
-// Decompose 256-bit ops into smaller 128-bit ops.
-static SDValue Lower256IntUnary(SDValue Op, SelectionDAG &DAG) {
- assert(Op.getSimpleValueType().is256BitVector() &&
- Op.getSimpleValueType().isInteger() &&
- "Only handle AVX 256-bit vector integer operation");
- return LowerVectorIntUnary(Op, DAG);
-}
-
-// Decompose 512-bit ops into smaller 256-bit ops.
-static SDValue Lower512IntUnary(SDValue Op, SelectionDAG &DAG) {
- assert(Op.getSimpleValueType().is512BitVector() &&
- Op.getSimpleValueType().isInteger() &&
- "Only handle AVX 512-bit vector integer operation");
- return LowerVectorIntUnary(Op, DAG);
+ return DAG.getMergeValues({RetVal, Chain}, DL);
}
/// Lower a vector CTLZ using native supported vector CTLZ instruction.
@@ -25502,7 +26220,7 @@ static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG,
// Split vector, it's Lo and Hi parts will be handled in next iteration.
if (NumElems > 16 ||
(NumElems == 16 && !Subtarget.canExtendTo512DQ()))
- return LowerVectorIntUnary(Op, DAG);
+ return splitVectorIntUnary(Op, DAG);
MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
@@ -25612,11 +26330,11 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return Lower256IntUnary(Op, DAG);
+ return splitVectorIntUnary(Op, DAG);
// Decompose 512-bit ops into smaller 256-bit ops.
if (VT.is512BitVector() && !Subtarget.hasBWI())
- return Lower512IntUnary(Op, DAG);
+ return splitVectorIntUnary(Op, DAG);
assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
@@ -25682,64 +26400,6 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
}
-/// Break a 256-bit integer operation into two new 128-bit ones and then
-/// concatenate the result back.
-static SDValue split256IntArith(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
-
- assert(VT.is256BitVector() && VT.isInteger() &&
- "Unsupported value type for operation");
-
- unsigned NumElems = VT.getVectorNumElements();
- SDLoc dl(Op);
-
- // Extract the LHS vectors
- SDValue LHS = Op.getOperand(0);
- SDValue LHS1 = extract128BitVector(LHS, 0, DAG, dl);
- SDValue LHS2 = extract128BitVector(LHS, NumElems / 2, DAG, dl);
-
- // Extract the RHS vectors
- SDValue RHS = Op.getOperand(1);
- SDValue RHS1 = extract128BitVector(RHS, 0, DAG, dl);
- SDValue RHS2 = extract128BitVector(RHS, NumElems / 2, DAG, dl);
-
- MVT EltVT = VT.getVectorElementType();
- MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
-
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
- DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
-}
-
-/// Break a 512-bit integer operation into two new 256-bit ones and then
-/// concatenate the result back.
-static SDValue split512IntArith(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
-
- assert(VT.is512BitVector() && VT.isInteger() &&
- "Unsupported value type for operation");
-
- unsigned NumElems = VT.getVectorNumElements();
- SDLoc dl(Op);
-
- // Extract the LHS vectors
- SDValue LHS = Op.getOperand(0);
- SDValue LHS1 = extract256BitVector(LHS, 0, DAG, dl);
- SDValue LHS2 = extract256BitVector(LHS, NumElems / 2, DAG, dl);
-
- // Extract the RHS vectors
- SDValue RHS = Op.getOperand(1);
- SDValue RHS1 = extract256BitVector(RHS, 0, DAG, dl);
- SDValue RHS2 = extract256BitVector(RHS, NumElems / 2, DAG, dl);
-
- MVT EltVT = VT.getVectorElementType();
- MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
-
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
- DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
-}
-
static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
@@ -25750,10 +26410,13 @@ static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
Op.getOperand(0), Op.getOperand(1));
+ if (VT == MVT::v32i16 || VT == MVT::v64i8)
+ return splitVectorIntBinary(Op, DAG);
+
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
- return split256IntArith(Op, DAG);
+ return splitVectorIntBinary(Op, DAG);
}
static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
@@ -25798,10 +26461,13 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
+ if (VT == MVT::v32i16 || VT == MVT::v64i8)
+ return splitVectorIntBinary(Op, DAG);
+
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
- return split256IntArith(Op, DAG);
+ return splitVectorIntBinary(Op, DAG);
}
static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
@@ -25831,9 +26497,12 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is256BitVector() && !Subtarget.hasInt256()) {
assert(VT.isInteger() &&
"Only handle AVX 256-bit vector integer operation");
- return Lower256IntUnary(Op, DAG);
+ return splitVectorIntUnary(Op, DAG);
}
+ if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
+ return splitVectorIntUnary(Op, DAG);
+
// Default to expand.
return SDValue();
}
@@ -25843,7 +26512,10 @@ static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
// For AVX1 cases, split to use legal ops (everything but v4i64).
if (VT.getScalarType() != MVT::i64 && VT.is256BitVector())
- return split256IntArith(Op, DAG);
+ return splitVectorIntBinary(Op, DAG);
+
+ if (VT == MVT::v32i16 || VT == MVT::v64i8)
+ return splitVectorIntBinary(Op, DAG);
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
@@ -25887,7 +26559,10 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
// Decompose 256-bit ops into 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return split256IntArith(Op, DAG);
+ return splitVectorIntBinary(Op, DAG);
+
+ if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
+ return splitVectorIntBinary(Op, DAG);
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
@@ -26033,7 +26708,10 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
// Decompose 256-bit ops into 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return split256IntArith(Op, DAG);
+ return splitVectorIntBinary(Op, DAG);
+
+ if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())
+ return splitVectorIntBinary(Op, DAG);
if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) {
assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
@@ -26122,41 +26800,9 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
}
- // For signed 512-bit vectors, split into 256-bit vectors to allow the
- // sign-extension to occur.
- if (VT == MVT::v64i8 && IsSigned)
- return split512IntArith(Op, DAG);
-
- // Signed AVX2 implementation - extend xmm subvectors to ymm.
- if (VT == MVT::v32i8 && IsSigned) {
- MVT ExVT = MVT::v16i16;
- SDValue ALo = extract128BitVector(A, 0, DAG, dl);
- SDValue BLo = extract128BitVector(B, 0, DAG, dl);
- SDValue AHi = extract128BitVector(A, NumElts / 2, DAG, dl);
- SDValue BHi = extract128BitVector(B, NumElts / 2, DAG, dl);
- ALo = DAG.getNode(ExAVX, dl, ExVT, ALo);
- BLo = DAG.getNode(ExAVX, dl, ExVT, BLo);
- AHi = DAG.getNode(ExAVX, dl, ExVT, AHi);
- BHi = DAG.getNode(ExAVX, dl, ExVT, BHi);
- SDValue Lo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
- SDValue Hi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
- Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Lo, 8, DAG);
- Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Hi, 8, DAG);
-
- // Bitcast back to VT and then pack all the even elements from Lo and Hi.
- // Shuffle lowering should turn this into PACKUS+PERMQ
- Lo = DAG.getBitcast(VT, Lo);
- Hi = DAG.getBitcast(VT, Hi);
- return DAG.getVectorShuffle(VT, dl, Lo, Hi,
- { 0, 2, 4, 6, 8, 10, 12, 14,
- 16, 18, 20, 22, 24, 26, 28, 30,
- 32, 34, 36, 38, 40, 42, 44, 46,
- 48, 50, 52, 54, 56, 58, 60, 62});
- }
-
- // For signed v16i8 and all unsigned vXi8 we will unpack the low and high
- // half of each 128 bit lane to widen to a vXi16 type. Do the multiplies,
- // shift the results and pack the half lane results back together.
+ // For vXi8 we will unpack the low and high half of each 128 bit lane to widen
+ // to a vXi16 type. Do the multiplies, shift the results and pack the half
+ // lane results back together.
MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
@@ -26270,9 +26916,12 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 &&
"Unexpected argument type for lowering");
SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
Entry.Node = StackPtr;
InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr,
- MachinePointerInfo(), /* Alignment = */ 16);
+ MPI, /* Alignment = */ 16);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Ty = PointerType::get(ArgTy,0);
Entry.IsSExt = false;
@@ -26413,7 +27062,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
return ArithmeticShiftRight64(ShiftAmt);
if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) ||
- VT == MVT::v64i8) {
+ (Subtarget.hasBWI() && VT == MVT::v64i8)) {
unsigned NumElts = VT.getVectorNumElements();
MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
@@ -26859,8 +27508,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// Constant ISD::SRA/SRL can be performed efficiently on vXi8 vectors as we
// extend to vXi16 to perform a MUL scale effectively as a MUL_LOHI.
if (ConstantAmt && (Opc == ISD::SRA || Opc == ISD::SRL) &&
- (VT == MVT::v16i8 || VT == MVT::v64i8 ||
- (VT == MVT::v32i8 && Subtarget.hasInt256())) &&
+ (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||
+ (VT == MVT::v64i8 && Subtarget.hasBWI())) &&
!Subtarget.hasXOP()) {
int NumElts = VT.getVectorNumElements();
SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8);
@@ -26923,12 +27572,13 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
ISD::SETGT);
return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1));
} else if (Subtarget.hasSSE41()) {
- // On SSE41 targets we make use of the fact that VSELECT lowers
- // to PBLENDVB which selects bytes based just on the sign bit.
+ // On SSE41 targets we can use PBLENDVB which selects bytes based just
+ // on the sign bit.
V0 = DAG.getBitcast(VT, V0);
V1 = DAG.getBitcast(VT, V1);
Sel = DAG.getBitcast(VT, Sel);
- return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1));
+ return DAG.getBitcast(SelVT,
+ DAG.getNode(X86ISD::BLENDV, dl, VT, Sel, V0, V1));
}
// On pre-SSE41 targets we test for the sign bit by comparing to
// zero - a negative value will set all bits of the lanes to true
@@ -27038,14 +27688,15 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
!ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) {
- // On SSE41 targets we make use of the fact that VSELECT lowers
- // to PBLENDVB which selects bytes based just on the sign bit.
+ // On SSE41 targets we can use PBLENDVB which selects bytes based just on
+ // the sign bit.
if (UseSSE41) {
MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
V0 = DAG.getBitcast(ExtVT, V0);
V1 = DAG.getBitcast(ExtVT, V1);
Sel = DAG.getBitcast(ExtVT, Sel);
- return DAG.getBitcast(VT, DAG.getSelect(dl, ExtVT, Sel, V0, V1));
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::BLENDV, dl, ExtVT, Sel, V0, V1));
}
// On pre-SSE41 targets we splat the sign bit - a negative value will
// set all bits of the lanes to true and VSELECT uses that in
@@ -27096,7 +27747,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// Decompose 256-bit shifts into 128-bit shifts.
if (VT.is256BitVector())
- return split256IntArith(Op, DAG);
+ return splitVectorIntBinary(Op, DAG);
+
+ if (VT == MVT::v32i16 || VT == MVT::v64i8)
+ return splitVectorIntBinary(Op, DAG);
return SDValue();
}
@@ -27114,28 +27768,21 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
int NumElts = VT.getVectorNumElements();
// Check for constant splat rotation amount.
- APInt UndefElts;
- SmallVector<APInt, 32> EltBits;
- int CstSplatIndex = -1;
- if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits))
- for (int i = 0; i != NumElts; ++i)
- if (!UndefElts[i]) {
- if (CstSplatIndex < 0 || EltBits[i] == EltBits[CstSplatIndex]) {
- CstSplatIndex = i;
- continue;
- }
- CstSplatIndex = -1;
- break;
- }
+ APInt CstSplatValue;
+ bool IsCstSplat = X86::isConstantSplat(Amt, CstSplatValue);
+
+ // Check for splat rotate by zero.
+ if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
+ return R;
// AVX512 implicitly uses modulo rotation amounts.
if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
// Attempt to rotate by immediate.
- if (0 <= CstSplatIndex) {
- unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
- uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
- return DAG.getNode(Op, DL, VT, R,
- DAG.getTargetConstant(RotateAmt, DL, MVT::i8));
+ if (IsCstSplat) {
+ unsigned RotOpc = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
+ uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
+ return DAG.getNode(RotOpc, DL, VT, R,
+ DAG.getTargetConstant(RotAmt, DL, MVT::i8));
}
// Else, fall-back on VPROLV/VPRORV.
@@ -27149,14 +27796,14 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// XOP implicitly uses modulo rotation amounts.
if (Subtarget.hasXOP()) {
if (VT.is256BitVector())
- return split256IntArith(Op, DAG);
+ return splitVectorIntBinary(Op, DAG);
assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
// Attempt to rotate by immediate.
- if (0 <= CstSplatIndex) {
- uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
+ if (IsCstSplat) {
+ uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
- DAG.getTargetConstant(RotateAmt, DL, MVT::i8));
+ DAG.getTargetConstant(RotAmt, DL, MVT::i8));
}
// Use general rotate by variable (per-element).
@@ -27165,7 +27812,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// Split 256-bit integers on pre-AVX2 targets.
if (VT.is256BitVector() && !Subtarget.hasAVX2())
- return split256IntArith(Op, DAG);
+ return splitVectorIntBinary(Op, DAG);
assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||
((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&
@@ -27173,7 +27820,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
"Only vXi32/vXi16/vXi8 vector rotates supported");
// Rotate by an uniform constant - expand back to shifts.
- if (0 <= CstSplatIndex)
+ if (IsCstSplat)
return SDValue();
bool IsSplatAmt = DAG.isSplatValue(Amt);
@@ -27189,12 +27836,13 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
if (Subtarget.hasSSE41()) {
- // On SSE41 targets we make use of the fact that VSELECT lowers
- // to PBLENDVB which selects bytes based just on the sign bit.
+ // On SSE41 targets we can use PBLENDVB which selects bytes based just
+ // on the sign bit.
V0 = DAG.getBitcast(VT, V0);
V1 = DAG.getBitcast(VT, V1);
Sel = DAG.getBitcast(VT, Sel);
- return DAG.getBitcast(SelVT, DAG.getSelect(DL, VT, Sel, V0, V1));
+ return DAG.getBitcast(SelVT,
+ DAG.getNode(X86ISD::BLENDV, DL, VT, Sel, V0, V1));
}
// On pre-SSE41 targets we test for the sign bit by comparing to
// zero - a negative value will set all bits of the lanes to true
@@ -27306,15 +27954,14 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
return false;
}
-// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
-// TODO: In 32-bit mode, use FISTP when X87 is available?
bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
Type *MemType = SI->getValueOperand()->getType();
bool NoImplicitFloatOps =
SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
- !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2())
+ !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+ (Subtarget.hasSSE1() || Subtarget.hasX87()))
return false;
return needsCmpXchgNb(MemType);
@@ -27333,7 +27980,7 @@ X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
- (Subtarget.hasSSE2() || Subtarget.hasX87()))
+ (Subtarget.hasSSE1() || Subtarget.hasX87()))
return AtomicExpansionKind::None;
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
@@ -27399,7 +28046,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
AI->use_empty())
return nullptr;
- auto Builder = IRBuilder<>(AI);
+ IRBuilder<> Builder(AI);
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
auto SSID = AI->getSyncScopeID();
// We must restrict the ordering to avoid generating loads with Release or
@@ -27441,7 +28088,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
// Finally we can emit the atomic load.
LoadInst *Loaded =
Builder.CreateAlignedLoad(AI->getType(), AI->getPointerOperand(),
- AI->getType()->getPrimitiveSizeInBits());
+ Align(AI->getType()->getPrimitiveSizeInBits()));
Loaded->setAtomic(Order, SSID);
AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();
@@ -27636,18 +28283,6 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
}
- // Custom splitting for BWI types when AVX512F is available but BWI isn't.
- if ((SrcVT == MVT::v32i16 || SrcVT == MVT::v64i8) && DstVT.isVector() &&
- DAG.getTargetLoweringInfo().isTypeLegal(DstVT)) {
- SDLoc dl(Op);
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl);
- MVT CastVT = DstVT.getHalfNumVectorElementsVT();
- Lo = DAG.getBitcast(CastVT, Lo);
- Hi = DAG.getBitcast(CastVT, Hi);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi);
- }
-
// Use MOVMSK for vector to scalar conversion to prevent scalarization.
if ((SrcVT == MVT::v16i1 || SrcVT == MVT::v32i1) && DstVT.isScalarInteger()) {
assert(!Subtarget.hasAVX512() && "Should use K-registers with AVX512");
@@ -27831,11 +28466,11 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return Lower256IntUnary(Op, DAG);
+ return splitVectorIntUnary(Op, DAG);
// Decompose 512-bit ops into smaller 256-bit ops.
if (VT.is512BitVector() && !Subtarget.hasBWI())
- return Lower512IntUnary(Op, DAG);
+ return splitVectorIntUnary(Op, DAG);
// For element types greater than i8, do vXi8 pop counts and a bytesum.
if (VT.getScalarType() != MVT::i8) {
@@ -27879,7 +28514,7 @@ static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector())
- return Lower256IntUnary(Op, DAG);
+ return splitVectorIntUnary(Op, DAG);
assert(VT.is128BitVector() &&
"Only 128-bit vector bitreverse lowering supported.");
@@ -27916,12 +28551,9 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
SDValue In = Op.getOperand(0);
SDLoc DL(Op);
- // Split v8i64/v16i32 without BWI so that we can still use the PSHUFB
- // lowering.
- if (VT == MVT::v8i64 || VT == MVT::v16i32) {
- assert(!Subtarget.hasBWI() && "BWI should Expand BITREVERSE");
- return Lower512IntUnary(Op, DAG);
- }
+ // Split v64i8 without BWI so that we can still use the PSHUFB lowering.
+ if (VT == MVT::v64i8 && !Subtarget.hasBWI())
+ return splitVectorIntUnary(Op, DAG);
unsigned NumElts = VT.getVectorNumElements();
assert(VT.getScalarType() == MVT::i8 &&
@@ -27929,7 +28561,7 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
// Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return Lower256IntUnary(Op, DAG);
+ return splitVectorIntUnary(Op, DAG);
// Perform BITREVERSE using PSHUFB lookups. Each byte is split into
// two nibbles and a PSHUFB lookup to find the bitreverse of each
@@ -28073,28 +28705,54 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
return Op;
if (VT == MVT::i64 && !IsTypeLegal) {
- // For illegal i64 atomic_stores, we can try to use MOVQ if SSE2 is enabled.
- // FIXME: Use movlps with SSE1.
- // FIXME: Use fist with X87.
+ // For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE
+ // is enabled.
bool NoImplicitFloatOps =
DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat);
- if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
- Subtarget.hasSSE2()) {
- SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
- Node->getOperand(2));
- SDVTList Tys = DAG.getVTList(MVT::Other);
- SDValue Ops[] = { Node->getChain(), SclToVec, Node->getBasePtr() };
- SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys,
- Ops, MVT::i64,
- Node->getMemOperand());
+ if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
+ SDValue Chain;
+ if (Subtarget.hasSSE1()) {
+ SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ Node->getOperand(2));
+ MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
+ SclToVec = DAG.getBitcast(StVT, SclToVec);
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()};
+ Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops,
+ MVT::i64, Node->getMemOperand());
+ } else if (Subtarget.hasX87()) {
+ // First load this into an 80-bit X87 register using a stack temporary.
+ // This will put the whole integer into the significand.
+ SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ Chain =
+ DAG.getStore(Node->getChain(), dl, Node->getOperand(2), StackPtr,
+ MPI, /*Align*/ 0, MachineMemOperand::MOStore);
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+ SDValue LdOps[] = {Chain, StackPtr};
+ SDValue Value =
+ DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, LdOps, MVT::i64, MPI,
+ /*Align*/ None, MachineMemOperand::MOLoad);
+ Chain = Value.getValue(1);
+
+ // Now use an FIST to do the atomic store.
+ SDValue StoreOps[] = {Chain, Value, Node->getBasePtr()};
+ Chain =
+ DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other),
+ StoreOps, MVT::i64, Node->getMemOperand());
+ }
- // If this is a sequentially consistent store, also emit an appropriate
- // barrier.
- if (IsSeqCst)
- Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
+ if (Chain) {
+ // If this is a sequentially consistent store, also emit an appropriate
+ // barrier.
+ if (IsSeqCst)
+ Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
- return Chain;
+ return Chain;
+ }
}
}
@@ -28123,9 +28781,8 @@ static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
// Set the carry flag.
SDValue Carry = Op.getOperand(2);
EVT CarryVT = Carry.getValueType();
- APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
- Carry, DAG.getConstant(NegOne, DL, CarryVT));
+ Carry, DAG.getAllOnesConstant(DL, CarryVT));
unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB;
SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0),
@@ -28170,7 +28827,7 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy)
- : (Type *)VectorType::get(ArgTy, 4);
+ : (Type *)FixedVectorType::get(ArgTy, 4);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
@@ -28267,17 +28924,15 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT));
- SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other);
+ SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
- SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
- VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand());
- return SDValue(NewScatter.getNode(), 1);
+ return DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops,
+ N->getMemoryVT(), N->getMemOperand());
}
return SDValue();
}
MVT IndexVT = Index.getSimpleValueType();
- MVT MaskVT = Mask.getSimpleValueType();
// If the index is v2i32, we're being called by type legalization and we
// should just let the default handling take care of it.
@@ -28295,18 +28950,17 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts);
- MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
Src = ExtendToType(Src, VT, DAG);
Index = ExtendToType(Index, IndexVT, DAG);
Mask = ExtendToType(Mask, MaskVT, DAG, true);
}
- SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
+ SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
- SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
- VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand());
- return SDValue(NewScatter.getNode(), 1);
+ return DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops,
+ N->getMemoryVT(), N->getMemOperand());
}
static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
@@ -28332,8 +28986,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(),
N->isExpandingLoad());
// Emit a blend.
- SDValue Select = DAG.getNode(ISD::VSELECT, dl, MaskVT, Mask, NewLoad,
- PassThru);
+ SDValue Select = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
return DAG.getMergeValues({ Select, NewLoad.getValue(1) }, dl);
}
@@ -28369,10 +29022,10 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
N->getExtensionType(), N->isExpandingLoad());
- SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- NewLoad.getValue(0),
- DAG.getIntPtrConstant(0, dl));
- SDValue RetOps[] = {Exract, NewLoad.getValue(1)};
+ SDValue Extract =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0),
+ DAG.getIntPtrConstant(0, dl));
+ SDValue RetOps[] = {Extract, NewLoad.getValue(1)};
return DAG.getMergeValues(RetOps, dl);
}
@@ -28430,7 +29083,6 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
SDValue Mask = N->getMask();
SDValue PassThru = N->getPassThru();
MVT IndexVT = Index.getSimpleValueType();
- MVT MaskVT = Mask.getSimpleValueType();
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
@@ -28451,7 +29103,7 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts);
- MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
PassThru = ExtendToType(PassThru, VT, DAG);
Index = ExtendToType(Index, IndexVT, DAG);
@@ -28460,12 +29112,12 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
N->getScale() };
- SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
- DAG.getVTList(VT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(),
+ SDValue NewGather = DAG.getMemIntrinsicNode(
+ X86ISD::MGATHER, dl, DAG.getVTList(VT, MVT::Other), Ops, N->getMemoryVT(),
N->getMemOperand());
SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT,
NewGather, DAG.getIntPtrConstant(0, dl));
- return DAG.getMergeValues({Extract, NewGather.getValue(2)}, dl);
+ return DAG.getMergeValues({Extract, NewGather.getValue(1)}, dl);
}
static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
@@ -28531,6 +29183,20 @@ SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
return Tmp.first;
}
+// Custom split CVTPS2PH with wide types.
+static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ SDValue RC = Op.getOperand(1);
+ Lo = DAG.getNode(X86ISD::CVTPS2PH, dl, LoVT, Lo, RC);
+ Hi = DAG.getNode(X86ISD::CVTPS2PH, dl, HiVT, Hi, RC);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+}
+
/// Provide custom lowering hooks for some operations.
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -28584,14 +29250,21 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::FP16_TO_FP:
+ case ISD::STRICT_FP16_TO_FP: return LowerFP16_TO_FP(Op, DAG);
+ case ISD::FP_TO_FP16:
+ case ISD::STRICT_FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG);
case ISD::STORE: return LowerStore(Op, Subtarget, DAG);
case ISD::FADD:
case ISD::FSUB: return lowerFaddFsub(Op, DAG);
+ case ISD::FROUND: return LowerFROUND(Op, DAG);
case ISD::FABS:
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
+ case ISD::LRINT:
+ case ISD::LLRINT: return LowerLRINT_LLRINT(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG);
@@ -28659,8 +29332,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG);
case ISD::GC_TRANSITION_START:
case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION(Op, DAG);
- case ISD::ADDRSPACECAST:
- return LowerADDRSPACECAST(Op, DAG);
+ case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG);
+ case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
}
}
@@ -28706,6 +29379,35 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
N->dump(&DAG);
#endif
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case X86ISD::CVTPH2PS: {
+ EVT VT = N->getValueType(0);
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ Lo = DAG.getNode(X86ISD::CVTPH2PS, dl, LoVT, Lo);
+ Hi = DAG.getNode(X86ISD::CVTPH2PS, dl, HiVT, Hi);
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ Results.push_back(Res);
+ return;
+ }
+ case X86ISD::STRICT_CVTPH2PS: {
+ EVT VT = N->getValueType(0);
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 1);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ Lo = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {LoVT, MVT::Other},
+ {N->getOperand(0), Lo});
+ Hi = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {HiVT, MVT::Other},
+ {N->getOperand(0), Hi});
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ Results.push_back(Res);
+ Results.push_back(Chain);
+ return;
+ }
case ISD::CTPOP: {
assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
// Use a v2i64 if possible.
@@ -28775,7 +29477,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::ABS: {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
assert(N->getValueType(0) == MVT::i64 &&
"Unexpected type (!= i64) on ABS.");
MVT HalfT = MVT::i32;
@@ -28788,15 +29489,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
DAG.getConstant(1, dl, HalfT));
Tmp = DAG.getNode(
ISD::SRA, dl, HalfT, Hi,
- DAG.getConstant(HalfT.getSizeInBits() - 1, dl,
- TLI.getShiftAmountTy(HalfT, DAG.getDataLayout())));
+ DAG.getShiftAmountConstant(HalfT.getSizeInBits() - 1, HalfT, dl));
Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
SDValue(Lo.getNode(), 1));
Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
- Results.push_back(Lo);
- Results.push_back(Hi);
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi));
return;
}
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
@@ -29148,6 +29847,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case ISD::LRINT:
+ case ISD::LLRINT: {
+ if (SDValue V = LRINT_LLRINTHelper(N, DAG))
+ Results.push_back(V);
+ return;
+ }
+
case ISD::SINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
case ISD::UINT_TO_FP:
@@ -29185,14 +29891,14 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue SignSrc = DAG.getSelect(dl, SrcVT, IsNeg, Sign, Src);
SmallVector<SDValue, 4> SignCvts(4, DAG.getConstantFP(0.0, dl, MVT::f32));
for (int i = 0; i != 2; ++i) {
- SDValue Src = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
SignSrc, DAG.getIntPtrConstant(i, dl));
if (IsStrict)
SignCvts[i] =
DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {MVT::f32, MVT::Other},
- {N->getOperand(0), Src});
+ {N->getOperand(0), Elt});
else
- SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Src);
+ SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Elt);
};
SDValue SignCvt = DAG.getBuildVector(MVT::v4f32, dl, SignCvts);
SDValue Slow, Chain;
@@ -29272,7 +29978,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(V.getValue(1));
return;
}
- case ISD::FP_EXTEND: {
+ case ISD::FP_EXTEND:
+ case ISD::STRICT_FP_EXTEND: {
// Right now, only MVT::v2f32 has OperationAction for FP_EXTEND.
// No other ValueType for FP_EXTEND should reach this point.
assert(N->getValueType(0) == MVT::v2f32 &&
@@ -29394,15 +30101,27 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Attribute::NoImplicitFloat);
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
auto *Node = cast<AtomicSDNode>(N);
- if (Subtarget.hasSSE2()) {
- // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the
- // lower 64-bits.
- SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ if (Subtarget.hasSSE1()) {
+ // Use a VZEXT_LOAD which will be selected as MOVQ or XORPS+MOVLPS.
+ // Then extract the lower 64-bits.
+ MVT LdVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
+ SDVTList Tys = DAG.getVTList(LdVT, MVT::Other);
SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
MVT::i64, Node->getMemOperand());
- SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
+ if (Subtarget.hasSSE2()) {
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Res);
+ Results.push_back(Ld.getValue(1));
+ return;
+ }
+ // We use an alternative sequence for SSE1 that extracts as v2f32 and
+ // then casts to i64. This avoids a 128-bit stack temporary being
+ // created by type legalization if we were to cast v4f32->v2i64.
+ SDValue Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Ld,
DAG.getIntPtrConstant(0, dl));
+ Res = DAG.getBitcast(MVT::i64, Res);
Results.push_back(Res);
Results.push_back(Ld.getValue(1));
return;
@@ -29410,14 +30129,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (Subtarget.hasX87()) {
// First load this into an 80-bit X87 register. This will put the whole
// integer into the significand.
- // FIXME: Do we need to glue? See FIXME comment in BuildFILD.
- SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue);
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
- SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG,
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD,
dl, Tys, Ops, MVT::i64,
Node->getMemOperand());
SDValue Chain = Result.getValue(1);
- SDValue InFlag = Result.getValue(2);
// Now store the X87 register to a stack temporary and convert to i64.
// This store is not atomic and doesn't need to be.
@@ -29427,11 +30144,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
- SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag };
- Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl,
- DAG.getVTList(MVT::Other), StoreOps,
- MVT::i64, MPI, 0 /*Align*/,
- MachineMemOperand::MOStore);
+ SDValue StoreOps[] = { Chain, Result, StackPtr };
+ Chain = DAG.getMemIntrinsicNode(
+ X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
+ MPI, None /*Align*/, MachineMemOperand::MOStore);
// Finally load the value back from the stack temporary and return it.
// This load is not atomic and doesn't need to be.
@@ -29480,24 +30196,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
- // Custom splitting for BWI types when AVX512F is available but BWI isn't.
- if ((DstVT == MVT::v32i16 || DstVT == MVT::v64i8) &&
- SrcVT.isVector() && isTypeLegal(SrcVT)) {
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
- MVT CastVT = (DstVT == MVT::v32i16) ? MVT::v16i16 : MVT::v32i8;
- Lo = DAG.getBitcast(CastVT, Lo);
- Hi = DAG.getBitcast(CastVT, Hi);
- SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi);
- Results.push_back(Res);
- return;
- }
-
if (DstVT.isVector() && SrcVT == MVT::x86mmx) {
+ // FIXME: Use v4f32 for SSE1?
+ assert(Subtarget.hasSSE2() && "Requires SSE2");
assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector &&
"Unexpected type action!");
EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT);
- SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, WideVT, N->getOperand(0));
+ SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64,
+ N->getOperand(0));
+ Res = DAG.getBitcast(WideVT, Res);
Results.push_back(Res);
return;
}
@@ -29529,11 +30236,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
SDValue Ops[] = { Gather->getChain(), PassThru, Mask,
Gather->getBasePtr(), Index, Gather->getScale() };
- SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
- DAG.getVTList(WideVT, Mask.getValueType(), MVT::Other), Ops, dl,
- Gather->getMemoryVT(), Gather->getMemOperand());
+ SDValue Res = DAG.getMemIntrinsicNode(
+ X86ISD::MGATHER, dl, DAG.getVTList(WideVT, MVT::Other), Ops,
+ Gather->getMemoryVT(), Gather->getMemOperand());
Results.push_back(Res);
- Results.push_back(Res.getValue(2));
+ Results.push_back(Res.getValue(1));
return;
}
return;
@@ -29552,7 +30259,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (Subtarget.hasSSE2()) {
MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), Ld->getAlignment(),
+ Ld->getPointerInfo(), Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
SDValue Chain = Res.getValue(1);
MVT VecVT = MVT::getVectorVT(LdVT, 2);
@@ -29573,25 +30280,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::ADDRSPACECAST: {
- SDValue Src = N->getOperand(0);
- EVT DstVT = N->getValueType(0);
- AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
- unsigned SrcAS = CastN->getSrcAddressSpace();
-
- assert(SrcAS != CastN->getDestAddressSpace() &&
- "addrspacecast must be between different address spaces");
-
- SDValue Res;
- if (SrcAS == X86AS::PTR32_UPTR && DstVT == MVT::i64)
- Res = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Src);
- else if (DstVT == MVT::i64)
- Res = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Src);
- else if (DstVT == MVT::i32)
- Res = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src);
- else
- report_fatal_error("Unrecognized addrspacecast type legalization");
-
- Results.push_back(Res);
+ SDValue V = LowerADDRSPACECAST(SDValue(N,0), DAG);
+ Results.push_back(V);
return;
}
}
@@ -29600,362 +30290,367 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((X86ISD::NodeType)Opcode) {
case X86ISD::FIRST_NUMBER: break;
- case X86ISD::BSF: return "X86ISD::BSF";
- case X86ISD::BSR: return "X86ISD::BSR";
- case X86ISD::SHLD: return "X86ISD::SHLD";
- case X86ISD::SHRD: return "X86ISD::SHRD";
- case X86ISD::FAND: return "X86ISD::FAND";
- case X86ISD::FANDN: return "X86ISD::FANDN";
- case X86ISD::FOR: return "X86ISD::FOR";
- case X86ISD::FXOR: return "X86ISD::FXOR";
- case X86ISD::FILD: return "X86ISD::FILD";
- case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
- case X86ISD::FIST: return "X86ISD::FIST";
- case X86ISD::FP_TO_INT_IN_MEM: return "X86ISD::FP_TO_INT_IN_MEM";
- case X86ISD::FLD: return "X86ISD::FLD";
- case X86ISD::FST: return "X86ISD::FST";
- case X86ISD::CALL: return "X86ISD::CALL";
- case X86ISD::BT: return "X86ISD::BT";
- case X86ISD::CMP: return "X86ISD::CMP";
- case X86ISD::STRICT_FCMP: return "X86ISD::STRICT_FCMP";
- case X86ISD::STRICT_FCMPS: return "X86ISD::STRICT_FCMPS";
- case X86ISD::COMI: return "X86ISD::COMI";
- case X86ISD::UCOMI: return "X86ISD::UCOMI";
- case X86ISD::CMPM: return "X86ISD::CMPM";
- case X86ISD::STRICT_CMPM: return "X86ISD::STRICT_CMPM";
- case X86ISD::CMPM_SAE: return "X86ISD::CMPM_SAE";
- case X86ISD::SETCC: return "X86ISD::SETCC";
- case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
- case X86ISD::FSETCC: return "X86ISD::FSETCC";
- case X86ISD::FSETCCM: return "X86ISD::FSETCCM";
- case X86ISD::FSETCCM_SAE: return "X86ISD::FSETCCM_SAE";
- case X86ISD::CMOV: return "X86ISD::CMOV";
- case X86ISD::BRCOND: return "X86ISD::BRCOND";
- case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
- case X86ISD::IRET: return "X86ISD::IRET";
- case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
- case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
- case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
- case X86ISD::Wrapper: return "X86ISD::Wrapper";
- case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP";
- case X86ISD::MOVQ2DQ: return "X86ISD::MOVQ2DQ";
- case X86ISD::MOVDQ2Q: return "X86ISD::MOVDQ2Q";
- case X86ISD::MMX_MOVD2W: return "X86ISD::MMX_MOVD2W";
- case X86ISD::MMX_MOVW2D: return "X86ISD::MMX_MOVW2D";
- case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
- case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
- case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
- case X86ISD::PINSRB: return "X86ISD::PINSRB";
- case X86ISD::PINSRW: return "X86ISD::PINSRW";
- case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
- case X86ISD::ANDNP: return "X86ISD::ANDNP";
- case X86ISD::BLENDI: return "X86ISD::BLENDI";
- case X86ISD::BLENDV: return "X86ISD::BLENDV";
- case X86ISD::HADD: return "X86ISD::HADD";
- case X86ISD::HSUB: return "X86ISD::HSUB";
- case X86ISD::FHADD: return "X86ISD::FHADD";
- case X86ISD::FHSUB: return "X86ISD::FHSUB";
- case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
- case X86ISD::FMAX: return "X86ISD::FMAX";
- case X86ISD::FMAXS: return "X86ISD::FMAXS";
- case X86ISD::FMAX_SAE: return "X86ISD::FMAX_SAE";
- case X86ISD::FMAXS_SAE: return "X86ISD::FMAXS_SAE";
- case X86ISD::FMIN: return "X86ISD::FMIN";
- case X86ISD::FMINS: return "X86ISD::FMINS";
- case X86ISD::FMIN_SAE: return "X86ISD::FMIN_SAE";
- case X86ISD::FMINS_SAE: return "X86ISD::FMINS_SAE";
- case X86ISD::FMAXC: return "X86ISD::FMAXC";
- case X86ISD::FMINC: return "X86ISD::FMINC";
- case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
- case X86ISD::FRCP: return "X86ISD::FRCP";
- case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
- case X86ISD::INSERTQI: return "X86ISD::INSERTQI";
- case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
- case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
- case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
- case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP";
- case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP";
- case X86ISD::EH_SJLJ_SETUP_DISPATCH:
- return "X86ISD::EH_SJLJ_SETUP_DISPATCH";
- case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
- case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
- case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
- case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r";
- case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
- case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
- case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG";
- case X86ISD::LCMPXCHG8_SAVE_EBX_DAG:
- return "X86ISD::LCMPXCHG8_SAVE_EBX_DAG";
- case X86ISD::LCMPXCHG16_SAVE_RBX_DAG:
- return "X86ISD::LCMPXCHG16_SAVE_RBX_DAG";
- case X86ISD::LADD: return "X86ISD::LADD";
- case X86ISD::LSUB: return "X86ISD::LSUB";
- case X86ISD::LOR: return "X86ISD::LOR";
- case X86ISD::LXOR: return "X86ISD::LXOR";
- case X86ISD::LAND: return "X86ISD::LAND";
- case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
- case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
- case X86ISD::VEXTRACT_STORE: return "X86ISD::VEXTRACT_STORE";
- case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
- case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
- case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
- case X86ISD::VMTRUNC: return "X86ISD::VMTRUNC";
- case X86ISD::VMTRUNCS: return "X86ISD::VMTRUNCS";
- case X86ISD::VMTRUNCUS: return "X86ISD::VMTRUNCUS";
- case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES";
- case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS";
- case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";
- case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS";
- case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
- case X86ISD::STRICT_VFPEXT: return "X86ISD::STRICT_VFPEXT";
- case X86ISD::VFPEXT_SAE: return "X86ISD::VFPEXT_SAE";
- case X86ISD::VFPEXTS: return "X86ISD::VFPEXTS";
- case X86ISD::VFPEXTS_SAE: return "X86ISD::VFPEXTS_SAE";
- case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
- case X86ISD::STRICT_VFPROUND: return "X86ISD::STRICT_VFPROUND";
- case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND";
- case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
- case X86ISD::VFPROUNDS: return "X86ISD::VFPROUNDS";
- case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
- case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
- case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
- case X86ISD::VSHL: return "X86ISD::VSHL";
- case X86ISD::VSRL: return "X86ISD::VSRL";
- case X86ISD::VSRA: return "X86ISD::VSRA";
- case X86ISD::VSHLI: return "X86ISD::VSHLI";
- case X86ISD::VSRLI: return "X86ISD::VSRLI";
- case X86ISD::VSRAI: return "X86ISD::VSRAI";
- case X86ISD::VSHLV: return "X86ISD::VSHLV";
- case X86ISD::VSRLV: return "X86ISD::VSRLV";
- case X86ISD::VSRAV: return "X86ISD::VSRAV";
- case X86ISD::VROTLI: return "X86ISD::VROTLI";
- case X86ISD::VROTRI: return "X86ISD::VROTRI";
- case X86ISD::VPPERM: return "X86ISD::VPPERM";
- case X86ISD::CMPP: return "X86ISD::CMPP";
- case X86ISD::STRICT_CMPP: return "X86ISD::STRICT_CMPP";
- case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
- case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
- case X86ISD::PHMINPOS: return "X86ISD::PHMINPOS";
- case X86ISD::ADD: return "X86ISD::ADD";
- case X86ISD::SUB: return "X86ISD::SUB";
- case X86ISD::ADC: return "X86ISD::ADC";
- case X86ISD::SBB: return "X86ISD::SBB";
- case X86ISD::SMUL: return "X86ISD::SMUL";
- case X86ISD::UMUL: return "X86ISD::UMUL";
- case X86ISD::OR: return "X86ISD::OR";
- case X86ISD::XOR: return "X86ISD::XOR";
- case X86ISD::AND: return "X86ISD::AND";
- case X86ISD::BEXTR: return "X86ISD::BEXTR";
- case X86ISD::BZHI: return "X86ISD::BZHI";
- case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
- case X86ISD::MOVMSK: return "X86ISD::MOVMSK";
- case X86ISD::PTEST: return "X86ISD::PTEST";
- case X86ISD::TESTP: return "X86ISD::TESTP";
- case X86ISD::KORTEST: return "X86ISD::KORTEST";
- case X86ISD::KTEST: return "X86ISD::KTEST";
- case X86ISD::KADD: return "X86ISD::KADD";
- case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL";
- case X86ISD::KSHIFTR: return "X86ISD::KSHIFTR";
- case X86ISD::PACKSS: return "X86ISD::PACKSS";
- case X86ISD::PACKUS: return "X86ISD::PACKUS";
- case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
- case X86ISD::VALIGN: return "X86ISD::VALIGN";
- case X86ISD::VSHLD: return "X86ISD::VSHLD";
- case X86ISD::VSHRD: return "X86ISD::VSHRD";
- case X86ISD::VSHLDV: return "X86ISD::VSHLDV";
- case X86ISD::VSHRDV: return "X86ISD::VSHRDV";
- case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
- case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
- case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
- case X86ISD::SHUFP: return "X86ISD::SHUFP";
- case X86ISD::SHUF128: return "X86ISD::SHUF128";
- case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
- case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
- case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
- case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
- case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
- case X86ISD::MOVSD: return "X86ISD::MOVSD";
- case X86ISD::MOVSS: return "X86ISD::MOVSS";
- case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
- case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
- case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
- case X86ISD::VBROADCAST_LOAD: return "X86ISD::VBROADCAST_LOAD";
- case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
- case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST";
- case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV";
- case X86ISD::VPERMILPI: return "X86ISD::VPERMILPI";
- case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
- case X86ISD::VPERMV: return "X86ISD::VPERMV";
- case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
- case X86ISD::VPERMI: return "X86ISD::VPERMI";
- case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
- case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
- case X86ISD::VFIXUPIMM_SAE: return "X86ISD::VFIXUPIMM_SAE";
- case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS";
- case X86ISD::VFIXUPIMMS_SAE: return "X86ISD::VFIXUPIMMS_SAE";
- case X86ISD::VRANGE: return "X86ISD::VRANGE";
- case X86ISD::VRANGE_SAE: return "X86ISD::VRANGE_SAE";
- case X86ISD::VRANGES: return "X86ISD::VRANGES";
- case X86ISD::VRANGES_SAE: return "X86ISD::VRANGES_SAE";
- case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
- case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
- case X86ISD::PSADBW: return "X86ISD::PSADBW";
- case X86ISD::DBPSADBW: return "X86ISD::DBPSADBW";
- case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
- case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
- case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
- case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
- case X86ISD::MFENCE: return "X86ISD::MFENCE";
- case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
- case X86ISD::SAHF: return "X86ISD::SAHF";
- case X86ISD::RDRAND: return "X86ISD::RDRAND";
- case X86ISD::RDSEED: return "X86ISD::RDSEED";
- case X86ISD::RDPKRU: return "X86ISD::RDPKRU";
- case X86ISD::WRPKRU: return "X86ISD::WRPKRU";
- case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW";
- case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD";
- case X86ISD::VPSHA: return "X86ISD::VPSHA";
- case X86ISD::VPSHL: return "X86ISD::VPSHL";
- case X86ISD::VPCOM: return "X86ISD::VPCOM";
- case X86ISD::VPCOMU: return "X86ISD::VPCOMU";
- case X86ISD::VPERMIL2: return "X86ISD::VPERMIL2";
- case X86ISD::FMSUB: return "X86ISD::FMSUB";
- case X86ISD::FNMADD: return "X86ISD::FNMADD";
- case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
- case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
- case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
- case X86ISD::FMADD_RND: return "X86ISD::FMADD_RND";
- case X86ISD::FNMADD_RND: return "X86ISD::FNMADD_RND";
- case X86ISD::FMSUB_RND: return "X86ISD::FMSUB_RND";
- case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND";
- case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND";
- case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND";
- case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H";
- case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L";
- case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
- case X86ISD::STRICT_VRNDSCALE: return "X86ISD::STRICT_VRNDSCALE";
- case X86ISD::VRNDSCALE_SAE: return "X86ISD::VRNDSCALE_SAE";
- case X86ISD::VRNDSCALES: return "X86ISD::VRNDSCALES";
- case X86ISD::VRNDSCALES_SAE: return "X86ISD::VRNDSCALES_SAE";
- case X86ISD::VREDUCE: return "X86ISD::VREDUCE";
- case X86ISD::VREDUCE_SAE: return "X86ISD::VREDUCE_SAE";
- case X86ISD::VREDUCES: return "X86ISD::VREDUCES";
- case X86ISD::VREDUCES_SAE: return "X86ISD::VREDUCES_SAE";
- case X86ISD::VGETMANT: return "X86ISD::VGETMANT";
- case X86ISD::VGETMANT_SAE: return "X86ISD::VGETMANT_SAE";
- case X86ISD::VGETMANTS: return "X86ISD::VGETMANTS";
- case X86ISD::VGETMANTS_SAE: return "X86ISD::VGETMANTS_SAE";
- case X86ISD::PCMPESTR: return "X86ISD::PCMPESTR";
- case X86ISD::PCMPISTR: return "X86ISD::PCMPISTR";
- case X86ISD::XTEST: return "X86ISD::XTEST";
- case X86ISD::COMPRESS: return "X86ISD::COMPRESS";
- case X86ISD::EXPAND: return "X86ISD::EXPAND";
- case X86ISD::SELECTS: return "X86ISD::SELECTS";
- case X86ISD::ADDSUB: return "X86ISD::ADDSUB";
- case X86ISD::RCP14: return "X86ISD::RCP14";
- case X86ISD::RCP14S: return "X86ISD::RCP14S";
- case X86ISD::RCP28: return "X86ISD::RCP28";
- case X86ISD::RCP28_SAE: return "X86ISD::RCP28_SAE";
- case X86ISD::RCP28S: return "X86ISD::RCP28S";
- case X86ISD::RCP28S_SAE: return "X86ISD::RCP28S_SAE";
- case X86ISD::EXP2: return "X86ISD::EXP2";
- case X86ISD::EXP2_SAE: return "X86ISD::EXP2_SAE";
- case X86ISD::RSQRT14: return "X86ISD::RSQRT14";
- case X86ISD::RSQRT14S: return "X86ISD::RSQRT14S";
- case X86ISD::RSQRT28: return "X86ISD::RSQRT28";
- case X86ISD::RSQRT28_SAE: return "X86ISD::RSQRT28_SAE";
- case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S";
- case X86ISD::RSQRT28S_SAE: return "X86ISD::RSQRT28S_SAE";
- case X86ISD::FADD_RND: return "X86ISD::FADD_RND";
- case X86ISD::FADDS: return "X86ISD::FADDS";
- case X86ISD::FADDS_RND: return "X86ISD::FADDS_RND";
- case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND";
- case X86ISD::FSUBS: return "X86ISD::FSUBS";
- case X86ISD::FSUBS_RND: return "X86ISD::FSUBS_RND";
- case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND";
- case X86ISD::FMULS: return "X86ISD::FMULS";
- case X86ISD::FMULS_RND: return "X86ISD::FMULS_RND";
- case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
- case X86ISD::FDIVS: return "X86ISD::FDIVS";
- case X86ISD::FDIVS_RND: return "X86ISD::FDIVS_RND";
- case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
- case X86ISD::FSQRTS: return "X86ISD::FSQRTS";
- case X86ISD::FSQRTS_RND: return "X86ISD::FSQRTS_RND";
- case X86ISD::FGETEXP: return "X86ISD::FGETEXP";
- case X86ISD::FGETEXP_SAE: return "X86ISD::FGETEXP_SAE";
- case X86ISD::FGETEXPS: return "X86ISD::FGETEXPS";
- case X86ISD::FGETEXPS_SAE: return "X86ISD::FGETEXPS_SAE";
- case X86ISD::SCALEF: return "X86ISD::SCALEF";
- case X86ISD::SCALEF_RND: return "X86ISD::SCALEF_RND";
- case X86ISD::SCALEFS: return "X86ISD::SCALEFS";
- case X86ISD::SCALEFS_RND: return "X86ISD::SCALEFS_RND";
- case X86ISD::AVG: return "X86ISD::AVG";
- case X86ISD::MULHRS: return "X86ISD::MULHRS";
- case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
- case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
- case X86ISD::CVTTP2SI: return "X86ISD::CVTTP2SI";
- case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI";
- case X86ISD::STRICT_CVTTP2SI: return "X86ISD::STRICT_CVTTP2SI";
- case X86ISD::STRICT_CVTTP2UI: return "X86ISD::STRICT_CVTTP2UI";
- case X86ISD::MCVTTP2SI: return "X86ISD::MCVTTP2SI";
- case X86ISD::MCVTTP2UI: return "X86ISD::MCVTTP2UI";
- case X86ISD::CVTTP2SI_SAE: return "X86ISD::CVTTP2SI_SAE";
- case X86ISD::CVTTP2UI_SAE: return "X86ISD::CVTTP2UI_SAE";
- case X86ISD::CVTTS2SI: return "X86ISD::CVTTS2SI";
- case X86ISD::CVTTS2UI: return "X86ISD::CVTTS2UI";
- case X86ISD::CVTTS2SI_SAE: return "X86ISD::CVTTS2SI_SAE";
- case X86ISD::CVTTS2UI_SAE: return "X86ISD::CVTTS2UI_SAE";
- case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P";
- case X86ISD::CVTUI2P: return "X86ISD::CVTUI2P";
- case X86ISD::STRICT_CVTSI2P: return "X86ISD::STRICT_CVTSI2P";
- case X86ISD::STRICT_CVTUI2P: return "X86ISD::STRICT_CVTUI2P";
- case X86ISD::MCVTSI2P: return "X86ISD::MCVTSI2P";
- case X86ISD::MCVTUI2P: return "X86ISD::MCVTUI2P";
- case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS";
- case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS";
- case X86ISD::MULTISHIFT: return "X86ISD::MULTISHIFT";
- case X86ISD::SCALAR_SINT_TO_FP: return "X86ISD::SCALAR_SINT_TO_FP";
- case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
- case X86ISD::SCALAR_UINT_TO_FP: return "X86ISD::SCALAR_UINT_TO_FP";
- case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
- case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH";
- case X86ISD::MCVTPS2PH: return "X86ISD::MCVTPS2PH";
- case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS";
- case X86ISD::CVTPH2PS_SAE: return "X86ISD::CVTPH2PS_SAE";
- case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI";
- case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI";
- case X86ISD::MCVTP2SI: return "X86ISD::MCVTP2SI";
- case X86ISD::MCVTP2UI: return "X86ISD::MCVTP2UI";
- case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND";
- case X86ISD::CVTP2UI_RND: return "X86ISD::CVTP2UI_RND";
- case X86ISD::CVTS2SI: return "X86ISD::CVTS2SI";
- case X86ISD::CVTS2UI: return "X86ISD::CVTS2UI";
- case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND";
- case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND";
- case X86ISD::CVTNE2PS2BF16: return "X86ISD::CVTNE2PS2BF16";
- case X86ISD::CVTNEPS2BF16: return "X86ISD::CVTNEPS2BF16";
- case X86ISD::MCVTNEPS2BF16: return "X86ISD::MCVTNEPS2BF16";
- case X86ISD::DPBF16PS: return "X86ISD::DPBF16PS";
- case X86ISD::LWPINS: return "X86ISD::LWPINS";
- case X86ISD::MGATHER: return "X86ISD::MGATHER";
- case X86ISD::MSCATTER: return "X86ISD::MSCATTER";
- case X86ISD::VPDPBUSD: return "X86ISD::VPDPBUSD";
- case X86ISD::VPDPBUSDS: return "X86ISD::VPDPBUSDS";
- case X86ISD::VPDPWSSD: return "X86ISD::VPDPWSSD";
- case X86ISD::VPDPWSSDS: return "X86ISD::VPDPWSSDS";
- case X86ISD::VPSHUFBITQMB: return "X86ISD::VPSHUFBITQMB";
- case X86ISD::GF2P8MULB: return "X86ISD::GF2P8MULB";
- case X86ISD::GF2P8AFFINEQB: return "X86ISD::GF2P8AFFINEQB";
- case X86ISD::GF2P8AFFINEINVQB: return "X86ISD::GF2P8AFFINEINVQB";
- case X86ISD::NT_CALL: return "X86ISD::NT_CALL";
- case X86ISD::NT_BRIND: return "X86ISD::NT_BRIND";
- case X86ISD::UMWAIT: return "X86ISD::UMWAIT";
- case X86ISD::TPAUSE: return "X86ISD::TPAUSE";
- case X86ISD::ENQCMD: return "X86ISD:ENQCMD";
- case X86ISD::ENQCMDS: return "X86ISD:ENQCMDS";
- case X86ISD::VP2INTERSECT: return "X86ISD::VP2INTERSECT";
+#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE;
+ NODE_NAME_CASE(BSF)
+ NODE_NAME_CASE(BSR)
+ NODE_NAME_CASE(FSHL)
+ NODE_NAME_CASE(FSHR)
+ NODE_NAME_CASE(FAND)
+ NODE_NAME_CASE(FANDN)
+ NODE_NAME_CASE(FOR)
+ NODE_NAME_CASE(FXOR)
+ NODE_NAME_CASE(FILD)
+ NODE_NAME_CASE(FIST)
+ NODE_NAME_CASE(FP_TO_INT_IN_MEM)
+ NODE_NAME_CASE(FLD)
+ NODE_NAME_CASE(FST)
+ NODE_NAME_CASE(CALL)
+ NODE_NAME_CASE(BT)
+ NODE_NAME_CASE(CMP)
+ NODE_NAME_CASE(FCMP)
+ NODE_NAME_CASE(STRICT_FCMP)
+ NODE_NAME_CASE(STRICT_FCMPS)
+ NODE_NAME_CASE(COMI)
+ NODE_NAME_CASE(UCOMI)
+ NODE_NAME_CASE(CMPM)
+ NODE_NAME_CASE(STRICT_CMPM)
+ NODE_NAME_CASE(CMPM_SAE)
+ NODE_NAME_CASE(SETCC)
+ NODE_NAME_CASE(SETCC_CARRY)
+ NODE_NAME_CASE(FSETCC)
+ NODE_NAME_CASE(FSETCCM)
+ NODE_NAME_CASE(FSETCCM_SAE)
+ NODE_NAME_CASE(CMOV)
+ NODE_NAME_CASE(BRCOND)
+ NODE_NAME_CASE(RET_FLAG)
+ NODE_NAME_CASE(IRET)
+ NODE_NAME_CASE(REP_STOS)
+ NODE_NAME_CASE(REP_MOVS)
+ NODE_NAME_CASE(GlobalBaseReg)
+ NODE_NAME_CASE(Wrapper)
+ NODE_NAME_CASE(WrapperRIP)
+ NODE_NAME_CASE(MOVQ2DQ)
+ NODE_NAME_CASE(MOVDQ2Q)
+ NODE_NAME_CASE(MMX_MOVD2W)
+ NODE_NAME_CASE(MMX_MOVW2D)
+ NODE_NAME_CASE(PEXTRB)
+ NODE_NAME_CASE(PEXTRW)
+ NODE_NAME_CASE(INSERTPS)
+ NODE_NAME_CASE(PINSRB)
+ NODE_NAME_CASE(PINSRW)
+ NODE_NAME_CASE(PSHUFB)
+ NODE_NAME_CASE(ANDNP)
+ NODE_NAME_CASE(BLENDI)
+ NODE_NAME_CASE(BLENDV)
+ NODE_NAME_CASE(HADD)
+ NODE_NAME_CASE(HSUB)
+ NODE_NAME_CASE(FHADD)
+ NODE_NAME_CASE(FHSUB)
+ NODE_NAME_CASE(CONFLICT)
+ NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(FMAXS)
+ NODE_NAME_CASE(FMAX_SAE)
+ NODE_NAME_CASE(FMAXS_SAE)
+ NODE_NAME_CASE(FMIN)
+ NODE_NAME_CASE(FMINS)
+ NODE_NAME_CASE(FMIN_SAE)
+ NODE_NAME_CASE(FMINS_SAE)
+ NODE_NAME_CASE(FMAXC)
+ NODE_NAME_CASE(FMINC)
+ NODE_NAME_CASE(FRSQRT)
+ NODE_NAME_CASE(FRCP)
+ NODE_NAME_CASE(EXTRQI)
+ NODE_NAME_CASE(INSERTQI)
+ NODE_NAME_CASE(TLSADDR)
+ NODE_NAME_CASE(TLSBASEADDR)
+ NODE_NAME_CASE(TLSCALL)
+ NODE_NAME_CASE(EH_SJLJ_SETJMP)
+ NODE_NAME_CASE(EH_SJLJ_LONGJMP)
+ NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH)
+ NODE_NAME_CASE(EH_RETURN)
+ NODE_NAME_CASE(TC_RETURN)
+ NODE_NAME_CASE(FNSTCW16m)
+ NODE_NAME_CASE(LCMPXCHG_DAG)
+ NODE_NAME_CASE(LCMPXCHG8_DAG)
+ NODE_NAME_CASE(LCMPXCHG16_DAG)
+ NODE_NAME_CASE(LCMPXCHG8_SAVE_EBX_DAG)
+ NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
+ NODE_NAME_CASE(LADD)
+ NODE_NAME_CASE(LSUB)
+ NODE_NAME_CASE(LOR)
+ NODE_NAME_CASE(LXOR)
+ NODE_NAME_CASE(LAND)
+ NODE_NAME_CASE(VZEXT_MOVL)
+ NODE_NAME_CASE(VZEXT_LOAD)
+ NODE_NAME_CASE(VEXTRACT_STORE)
+ NODE_NAME_CASE(VTRUNC)
+ NODE_NAME_CASE(VTRUNCS)
+ NODE_NAME_CASE(VTRUNCUS)
+ NODE_NAME_CASE(VMTRUNC)
+ NODE_NAME_CASE(VMTRUNCS)
+ NODE_NAME_CASE(VMTRUNCUS)
+ NODE_NAME_CASE(VTRUNCSTORES)
+ NODE_NAME_CASE(VTRUNCSTOREUS)
+ NODE_NAME_CASE(VMTRUNCSTORES)
+ NODE_NAME_CASE(VMTRUNCSTOREUS)
+ NODE_NAME_CASE(VFPEXT)
+ NODE_NAME_CASE(STRICT_VFPEXT)
+ NODE_NAME_CASE(VFPEXT_SAE)
+ NODE_NAME_CASE(VFPEXTS)
+ NODE_NAME_CASE(VFPEXTS_SAE)
+ NODE_NAME_CASE(VFPROUND)
+ NODE_NAME_CASE(STRICT_VFPROUND)
+ NODE_NAME_CASE(VMFPROUND)
+ NODE_NAME_CASE(VFPROUND_RND)
+ NODE_NAME_CASE(VFPROUNDS)
+ NODE_NAME_CASE(VFPROUNDS_RND)
+ NODE_NAME_CASE(VSHLDQ)
+ NODE_NAME_CASE(VSRLDQ)
+ NODE_NAME_CASE(VSHL)
+ NODE_NAME_CASE(VSRL)
+ NODE_NAME_CASE(VSRA)
+ NODE_NAME_CASE(VSHLI)
+ NODE_NAME_CASE(VSRLI)
+ NODE_NAME_CASE(VSRAI)
+ NODE_NAME_CASE(VSHLV)
+ NODE_NAME_CASE(VSRLV)
+ NODE_NAME_CASE(VSRAV)
+ NODE_NAME_CASE(VROTLI)
+ NODE_NAME_CASE(VROTRI)
+ NODE_NAME_CASE(VPPERM)
+ NODE_NAME_CASE(CMPP)
+ NODE_NAME_CASE(STRICT_CMPP)
+ NODE_NAME_CASE(PCMPEQ)
+ NODE_NAME_CASE(PCMPGT)
+ NODE_NAME_CASE(PHMINPOS)
+ NODE_NAME_CASE(ADD)
+ NODE_NAME_CASE(SUB)
+ NODE_NAME_CASE(ADC)
+ NODE_NAME_CASE(SBB)
+ NODE_NAME_CASE(SMUL)
+ NODE_NAME_CASE(UMUL)
+ NODE_NAME_CASE(OR)
+ NODE_NAME_CASE(XOR)
+ NODE_NAME_CASE(AND)
+ NODE_NAME_CASE(BEXTR)
+ NODE_NAME_CASE(BZHI)
+ NODE_NAME_CASE(PDEP)
+ NODE_NAME_CASE(PEXT)
+ NODE_NAME_CASE(MUL_IMM)
+ NODE_NAME_CASE(MOVMSK)
+ NODE_NAME_CASE(PTEST)
+ NODE_NAME_CASE(TESTP)
+ NODE_NAME_CASE(KORTEST)
+ NODE_NAME_CASE(KTEST)
+ NODE_NAME_CASE(KADD)
+ NODE_NAME_CASE(KSHIFTL)
+ NODE_NAME_CASE(KSHIFTR)
+ NODE_NAME_CASE(PACKSS)
+ NODE_NAME_CASE(PACKUS)
+ NODE_NAME_CASE(PALIGNR)
+ NODE_NAME_CASE(VALIGN)
+ NODE_NAME_CASE(VSHLD)
+ NODE_NAME_CASE(VSHRD)
+ NODE_NAME_CASE(VSHLDV)
+ NODE_NAME_CASE(VSHRDV)
+ NODE_NAME_CASE(PSHUFD)
+ NODE_NAME_CASE(PSHUFHW)
+ NODE_NAME_CASE(PSHUFLW)
+ NODE_NAME_CASE(SHUFP)
+ NODE_NAME_CASE(SHUF128)
+ NODE_NAME_CASE(MOVLHPS)
+ NODE_NAME_CASE(MOVHLPS)
+ NODE_NAME_CASE(MOVDDUP)
+ NODE_NAME_CASE(MOVSHDUP)
+ NODE_NAME_CASE(MOVSLDUP)
+ NODE_NAME_CASE(MOVSD)
+ NODE_NAME_CASE(MOVSS)
+ NODE_NAME_CASE(UNPCKL)
+ NODE_NAME_CASE(UNPCKH)
+ NODE_NAME_CASE(VBROADCAST)
+ NODE_NAME_CASE(VBROADCAST_LOAD)
+ NODE_NAME_CASE(VBROADCASTM)
+ NODE_NAME_CASE(SUBV_BROADCAST)
+ NODE_NAME_CASE(VPERMILPV)
+ NODE_NAME_CASE(VPERMILPI)
+ NODE_NAME_CASE(VPERM2X128)
+ NODE_NAME_CASE(VPERMV)
+ NODE_NAME_CASE(VPERMV3)
+ NODE_NAME_CASE(VPERMI)
+ NODE_NAME_CASE(VPTERNLOG)
+ NODE_NAME_CASE(VFIXUPIMM)
+ NODE_NAME_CASE(VFIXUPIMM_SAE)
+ NODE_NAME_CASE(VFIXUPIMMS)
+ NODE_NAME_CASE(VFIXUPIMMS_SAE)
+ NODE_NAME_CASE(VRANGE)
+ NODE_NAME_CASE(VRANGE_SAE)
+ NODE_NAME_CASE(VRANGES)
+ NODE_NAME_CASE(VRANGES_SAE)
+ NODE_NAME_CASE(PMULUDQ)
+ NODE_NAME_CASE(PMULDQ)
+ NODE_NAME_CASE(PSADBW)
+ NODE_NAME_CASE(DBPSADBW)
+ NODE_NAME_CASE(VASTART_SAVE_XMM_REGS)
+ NODE_NAME_CASE(VAARG_64)
+ NODE_NAME_CASE(WIN_ALLOCA)
+ NODE_NAME_CASE(MEMBARRIER)
+ NODE_NAME_CASE(MFENCE)
+ NODE_NAME_CASE(SEG_ALLOCA)
+ NODE_NAME_CASE(PROBED_ALLOCA)
+ NODE_NAME_CASE(RDRAND)
+ NODE_NAME_CASE(RDSEED)
+ NODE_NAME_CASE(RDPKRU)
+ NODE_NAME_CASE(WRPKRU)
+ NODE_NAME_CASE(VPMADDUBSW)
+ NODE_NAME_CASE(VPMADDWD)
+ NODE_NAME_CASE(VPSHA)
+ NODE_NAME_CASE(VPSHL)
+ NODE_NAME_CASE(VPCOM)
+ NODE_NAME_CASE(VPCOMU)
+ NODE_NAME_CASE(VPERMIL2)
+ NODE_NAME_CASE(FMSUB)
+ NODE_NAME_CASE(STRICT_FMSUB)
+ NODE_NAME_CASE(FNMADD)
+ NODE_NAME_CASE(STRICT_FNMADD)
+ NODE_NAME_CASE(FNMSUB)
+ NODE_NAME_CASE(STRICT_FNMSUB)
+ NODE_NAME_CASE(FMADDSUB)
+ NODE_NAME_CASE(FMSUBADD)
+ NODE_NAME_CASE(FMADD_RND)
+ NODE_NAME_CASE(FNMADD_RND)
+ NODE_NAME_CASE(FMSUB_RND)
+ NODE_NAME_CASE(FNMSUB_RND)
+ NODE_NAME_CASE(FMADDSUB_RND)
+ NODE_NAME_CASE(FMSUBADD_RND)
+ NODE_NAME_CASE(VPMADD52H)
+ NODE_NAME_CASE(VPMADD52L)
+ NODE_NAME_CASE(VRNDSCALE)
+ NODE_NAME_CASE(STRICT_VRNDSCALE)
+ NODE_NAME_CASE(VRNDSCALE_SAE)
+ NODE_NAME_CASE(VRNDSCALES)
+ NODE_NAME_CASE(VRNDSCALES_SAE)
+ NODE_NAME_CASE(VREDUCE)
+ NODE_NAME_CASE(VREDUCE_SAE)
+ NODE_NAME_CASE(VREDUCES)
+ NODE_NAME_CASE(VREDUCES_SAE)
+ NODE_NAME_CASE(VGETMANT)
+ NODE_NAME_CASE(VGETMANT_SAE)
+ NODE_NAME_CASE(VGETMANTS)
+ NODE_NAME_CASE(VGETMANTS_SAE)
+ NODE_NAME_CASE(PCMPESTR)
+ NODE_NAME_CASE(PCMPISTR)
+ NODE_NAME_CASE(XTEST)
+ NODE_NAME_CASE(COMPRESS)
+ NODE_NAME_CASE(EXPAND)
+ NODE_NAME_CASE(SELECTS)
+ NODE_NAME_CASE(ADDSUB)
+ NODE_NAME_CASE(RCP14)
+ NODE_NAME_CASE(RCP14S)
+ NODE_NAME_CASE(RCP28)
+ NODE_NAME_CASE(RCP28_SAE)
+ NODE_NAME_CASE(RCP28S)
+ NODE_NAME_CASE(RCP28S_SAE)
+ NODE_NAME_CASE(EXP2)
+ NODE_NAME_CASE(EXP2_SAE)
+ NODE_NAME_CASE(RSQRT14)
+ NODE_NAME_CASE(RSQRT14S)
+ NODE_NAME_CASE(RSQRT28)
+ NODE_NAME_CASE(RSQRT28_SAE)
+ NODE_NAME_CASE(RSQRT28S)
+ NODE_NAME_CASE(RSQRT28S_SAE)
+ NODE_NAME_CASE(FADD_RND)
+ NODE_NAME_CASE(FADDS)
+ NODE_NAME_CASE(FADDS_RND)
+ NODE_NAME_CASE(FSUB_RND)
+ NODE_NAME_CASE(FSUBS)
+ NODE_NAME_CASE(FSUBS_RND)
+ NODE_NAME_CASE(FMUL_RND)
+ NODE_NAME_CASE(FMULS)
+ NODE_NAME_CASE(FMULS_RND)
+ NODE_NAME_CASE(FDIV_RND)
+ NODE_NAME_CASE(FDIVS)
+ NODE_NAME_CASE(FDIVS_RND)
+ NODE_NAME_CASE(FSQRT_RND)
+ NODE_NAME_CASE(FSQRTS)
+ NODE_NAME_CASE(FSQRTS_RND)
+ NODE_NAME_CASE(FGETEXP)
+ NODE_NAME_CASE(FGETEXP_SAE)
+ NODE_NAME_CASE(FGETEXPS)
+ NODE_NAME_CASE(FGETEXPS_SAE)
+ NODE_NAME_CASE(SCALEF)
+ NODE_NAME_CASE(SCALEF_RND)
+ NODE_NAME_CASE(SCALEFS)
+ NODE_NAME_CASE(SCALEFS_RND)
+ NODE_NAME_CASE(AVG)
+ NODE_NAME_CASE(MULHRS)
+ NODE_NAME_CASE(SINT_TO_FP_RND)
+ NODE_NAME_CASE(UINT_TO_FP_RND)
+ NODE_NAME_CASE(CVTTP2SI)
+ NODE_NAME_CASE(CVTTP2UI)
+ NODE_NAME_CASE(STRICT_CVTTP2SI)
+ NODE_NAME_CASE(STRICT_CVTTP2UI)
+ NODE_NAME_CASE(MCVTTP2SI)
+ NODE_NAME_CASE(MCVTTP2UI)
+ NODE_NAME_CASE(CVTTP2SI_SAE)
+ NODE_NAME_CASE(CVTTP2UI_SAE)
+ NODE_NAME_CASE(CVTTS2SI)
+ NODE_NAME_CASE(CVTTS2UI)
+ NODE_NAME_CASE(CVTTS2SI_SAE)
+ NODE_NAME_CASE(CVTTS2UI_SAE)
+ NODE_NAME_CASE(CVTSI2P)
+ NODE_NAME_CASE(CVTUI2P)
+ NODE_NAME_CASE(STRICT_CVTSI2P)
+ NODE_NAME_CASE(STRICT_CVTUI2P)
+ NODE_NAME_CASE(MCVTSI2P)
+ NODE_NAME_CASE(MCVTUI2P)
+ NODE_NAME_CASE(VFPCLASS)
+ NODE_NAME_CASE(VFPCLASSS)
+ NODE_NAME_CASE(MULTISHIFT)
+ NODE_NAME_CASE(SCALAR_SINT_TO_FP)
+ NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND)
+ NODE_NAME_CASE(SCALAR_UINT_TO_FP)
+ NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND)
+ NODE_NAME_CASE(CVTPS2PH)
+ NODE_NAME_CASE(STRICT_CVTPS2PH)
+ NODE_NAME_CASE(MCVTPS2PH)
+ NODE_NAME_CASE(CVTPH2PS)
+ NODE_NAME_CASE(STRICT_CVTPH2PS)
+ NODE_NAME_CASE(CVTPH2PS_SAE)
+ NODE_NAME_CASE(CVTP2SI)
+ NODE_NAME_CASE(CVTP2UI)
+ NODE_NAME_CASE(MCVTP2SI)
+ NODE_NAME_CASE(MCVTP2UI)
+ NODE_NAME_CASE(CVTP2SI_RND)
+ NODE_NAME_CASE(CVTP2UI_RND)
+ NODE_NAME_CASE(CVTS2SI)
+ NODE_NAME_CASE(CVTS2UI)
+ NODE_NAME_CASE(CVTS2SI_RND)
+ NODE_NAME_CASE(CVTS2UI_RND)
+ NODE_NAME_CASE(CVTNE2PS2BF16)
+ NODE_NAME_CASE(CVTNEPS2BF16)
+ NODE_NAME_CASE(MCVTNEPS2BF16)
+ NODE_NAME_CASE(DPBF16PS)
+ NODE_NAME_CASE(LWPINS)
+ NODE_NAME_CASE(MGATHER)
+ NODE_NAME_CASE(MSCATTER)
+ NODE_NAME_CASE(VPDPBUSD)
+ NODE_NAME_CASE(VPDPBUSDS)
+ NODE_NAME_CASE(VPDPWSSD)
+ NODE_NAME_CASE(VPDPWSSDS)
+ NODE_NAME_CASE(VPSHUFBITQMB)
+ NODE_NAME_CASE(GF2P8MULB)
+ NODE_NAME_CASE(GF2P8AFFINEQB)
+ NODE_NAME_CASE(GF2P8AFFINEINVQB)
+ NODE_NAME_CASE(NT_CALL)
+ NODE_NAME_CASE(NT_BRIND)
+ NODE_NAME_CASE(UMWAIT)
+ NODE_NAME_CASE(TPAUSE)
+ NODE_NAME_CASE(ENQCMD)
+ NODE_NAME_CASE(ENQCMDS)
+ NODE_NAME_CASE(VP2INTERSECT)
}
return nullptr;
+#undef NODE_NAME_CASE
}
/// Return true if the addressing mode represented by AM is legal for this
@@ -30021,7 +30716,8 @@ bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
return false;
// XOP has v16i8/v8i16/v4i32/v2i64 variable vector shifts.
- if (Subtarget.hasXOP() && Ty->getPrimitiveSizeInBits() == 128 &&
+ // Splitting for v32i8/v16i16 on XOP+AVX2 targets is still preferred.
+ if (Subtarget.hasXOP() &&
(Bits == 8 || Bits == 16 || Bits == 32 || Bits == 64))
return false;
@@ -30107,7 +30803,7 @@ bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const {
}
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
- if (!VT1.isInteger() || !VT2.isInteger())
+ if (!VT1.isScalarInteger() || !VT2.isScalarInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
@@ -30148,6 +30844,39 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return false;
}
+bool X86TargetLowering::shouldSinkOperands(Instruction *I,
+ SmallVectorImpl<Use *> &Ops) const {
+ // A uniform shift amount in a vector shift or funnel shift may be much
+ // cheaper than a generic variable vector shift, so make that pattern visible
+ // to SDAG by sinking the shuffle instruction next to the shift.
+ int ShiftAmountOpNum = -1;
+ if (I->isShift())
+ ShiftAmountOpNum = 1;
+ else if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::fshl ||
+ II->getIntrinsicID() == Intrinsic::fshr)
+ ShiftAmountOpNum = 2;
+ }
+
+ if (ShiftAmountOpNum == -1)
+ return false;
+
+ auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum));
+ if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
+ isVectorShiftByScalarCheap(I->getType())) {
+ Ops.push_back(&I->getOperandUse(ShiftAmountOpNum));
+ return true;
+ }
+
+ return false;
+}
+
+bool X86TargetLowering::shouldConvertPhiType(Type *From, Type *To) const {
+ if (!Subtarget.is64Bit())
+ return false;
+ return TargetLowering::shouldConvertPhiType(From, To);
+}
+
bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
if (isa<MaskedLoadSDNode>(ExtVal.getOperand(0)))
return false;
@@ -30191,7 +30920,7 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
/// VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
-bool X86TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
+bool X86TargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const {
if (!VT.isSimple())
return false;
@@ -30336,7 +31065,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
MachineOperand &Segment = MI.getOperand(5);
unsigned ArgSize = MI.getOperand(6).getImm();
unsigned ArgMode = MI.getOperand(7).getImm();
- unsigned Align = MI.getOperand(8).getImm();
+ Align Alignment = Align(MI.getOperand(8).getImm());
MachineFunction *MF = MBB->getParent();
@@ -30376,7 +31105,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
/* Align ArgSize to a multiple of 8 */
unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
- bool NeedsAlign = (Align > 8);
+ bool NeedsAlign = (Alignment > 8);
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *overflowMBB;
@@ -30524,17 +31253,16 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// to OverflowDestReg.
if (NeedsAlign) {
// Align the overflow address
- assert(isPowerOf2_32(Align) && "Alignment must be a power of 2");
Register TmpReg = MRI.createVirtualRegister(AddrRegClass);
// aligned_addr = (addr + (align-1)) & ~(align-1)
BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
- .addReg(OverflowAddrReg)
- .addImm(Align-1);
+ .addReg(OverflowAddrReg)
+ .addImm(Alignment.value() - 1);
BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
- .addReg(TmpReg)
- .addImm(~(uint64_t)(Align-1));
+ .addReg(TmpReg)
+ .addImm(~(uint64_t)(Alignment.value() - 1));
} else {
BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
.addReg(OverflowAddrReg);
@@ -30630,7 +31358,7 @@ MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachineMemOperand *MMO = F->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*F, RegSaveFrameIndex, Offset),
MachineMemOperand::MOStore,
- /*Size=*/16, /*Align=*/16);
+ /*Size=*/16, Align(16));
BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
.addFrameIndex(RegSaveFrameIndex)
.addImm(/*Scale=*/1)
@@ -30697,11 +31425,13 @@ static bool isCMOVPseudo(MachineInstr &MI) {
case X86::CMOV_RFP32:
case X86::CMOV_RFP64:
case X86::CMOV_RFP80:
+ case X86::CMOV_VR64:
case X86::CMOV_VR128:
case X86::CMOV_VR128X:
case X86::CMOV_VR256:
case X86::CMOV_VR256X:
case X86::CMOV_VR512:
+ case X86::CMOV_VK1:
case X86::CMOV_VK2:
case X86::CMOV_VK4:
case X86::CMOV_VK8:
@@ -30998,8 +31728,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
(NextMIIt->getOperand(3).getImm() == CC ||
NextMIIt->getOperand(3).getImm() == OppCC)) {
LastCMOV = &*NextMIIt;
- ++NextMIIt;
- NextMIIt = skipDebugInstructionsForward(NextMIIt, ThisMBB->end());
+ NextMIIt = next_nodbg(NextMIIt, ThisMBB->end());
}
}
@@ -31071,6 +31800,112 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
return SinkMBB;
}
+static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
+ if (IsLP64) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const X86FrameLowering &TFI = *Subtarget.getFrameLowering();
+ DebugLoc DL = MI.getDebugLoc();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+
+ const unsigned ProbeSize = getStackProbeSize(*MF);
+
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = ++MBB->getIterator();
+ MF->insert(MBBIter, testMBB);
+ MF->insert(MBBIter, blockMBB);
+ MF->insert(MBBIter, tailMBB);
+
+ Register sizeVReg = MI.getOperand(1).getReg();
+
+ Register physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP;
+
+ Register TmpStackPtr = MRI.createVirtualRegister(
+ TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass);
+ Register FinalStackPtr = MRI.createVirtualRegister(
+ TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass);
+
+ BuildMI(*MBB, {MI}, DL, TII->get(TargetOpcode::COPY), TmpStackPtr)
+ .addReg(physSPReg);
+ {
+ const unsigned Opc = TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr;
+ BuildMI(*MBB, {MI}, DL, TII->get(Opc), FinalStackPtr)
+ .addReg(TmpStackPtr)
+ .addReg(sizeVReg);
+ }
+
+ // test rsp size
+
+ BuildMI(testMBB, DL,
+ TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
+ .addReg(FinalStackPtr)
+ .addReg(physSPReg);
+
+ BuildMI(testMBB, DL, TII->get(X86::JCC_1))
+ .addMBB(tailMBB)
+ .addImm(X86::COND_L);
+ testMBB->addSuccessor(blockMBB);
+ testMBB->addSuccessor(tailMBB);
+
+ // Touch the block then extend it. This is done on the opposite side of
+ // static probe where we allocate then touch, to avoid the need of probing the
+ // tail of the static alloca. Possible scenarios are:
+ //
+ // + ---- <- ------------ <- ------------- <- ------------ +
+ // | |
+ // [free probe] -> [page alloc] -> [alloc probe] -> [tail alloc] + -> [dyn probe] -> [page alloc] -> [dyn probe] -> [tail alloc] +
+ // | |
+ // + <- ----------- <- ------------ <- ----------- <- ------------ +
+ //
+ // The property we want to enforce is to never have more than [page alloc] between two probes.
+
+ const unsigned MovMIOpc =
+ TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi;
+ addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
+ .addImm(0);
+
+ BuildMI(blockMBB, DL,
+ TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr, ProbeSize)), physSPReg)
+ .addReg(physSPReg)
+ .addImm(ProbeSize);
+
+
+ BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB);
+ blockMBB->addSuccessor(testMBB);
+
+ // Replace original instruction by the expected stack ptr
+ BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
+ .addReg(FinalStackPtr);
+
+ tailMBB->splice(tailMBB->end(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ tailMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ MBB->addSuccessor(testMBB);
+
+ // Delete the original pseudo instruction.
+ MI.eraseFromParent();
+
+ // And we're done.
+ return tailMBB;
+}
+
MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -31231,29 +32066,16 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
BB->addSuccessor(RestoreMBB);
MI.getOperand(0).setMBB(RestoreMBB);
+ // Marking this as an EH pad but not a funclet entry block causes PEI to
+ // restore stack pointers in the block.
+ RestoreMBB->setIsEHPad(true);
+
auto RestoreMBBI = RestoreMBB->begin();
- BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::EH_RESTORE));
BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB);
return BB;
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredCatchPad(MachineInstr &MI,
- MachineBasicBlock *BB) const {
- MachineFunction *MF = BB->getParent();
- const Constant *PerFn = MF->getFunction().getPersonalityFn();
- bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn));
- // Only 32-bit SEH requires special handling for catchpad.
- if (IsSEH && Subtarget.is32Bit()) {
- const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- DebugLoc DL = MI.getDebugLoc();
- BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE));
- }
- MI.eraseFromParent();
- return BB;
-}
-
-MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
MachineBasicBlock *BB) const {
// So, here we replace TLSADDR with the sequence:
@@ -31755,12 +32577,17 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MBB->addSuccessor(checkSspMBB);
// Initialize a register with zero.
- Register ZReg = MRI.createVirtualRegister(PtrRC);
- unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
- BuildMI(checkSspMBB, DL, TII->get(XorRROpc))
- .addDef(ZReg)
- .addReg(ZReg, RegState::Undef)
- .addReg(ZReg, RegState::Undef);
+ Register ZReg = MRI.createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(checkSspMBB, DL, TII->get(X86::MOV32r0), ZReg);
+
+ if (PVT == MVT::i64) {
+ Register TmpZReg = MRI.createVirtualRegister(PtrRC);
+ BuildMI(checkSspMBB, DL, TII->get(X86::SUBREG_TO_REG), TmpZReg)
+ .addImm(0)
+ .addReg(ZReg)
+ .addImm(X86::sub_32bit);
+ ZReg = TmpZReg;
+ }
// Read the current SSP Register value to the zeroed register.
Register SSPCopyReg = MRI.createVirtualRegister(PtrRC);
@@ -31889,7 +32716,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
Register Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
+ Register FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
Register SP = RegInfo->getStackRegister();
MachineInstrBuilder MIB;
@@ -32236,6 +33063,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
+ auto TMMImmToTMMReg = [](unsigned Imm) {
+ assert (Imm < 8 && "Illegal tmm index");
+ return X86::TMM0 + Imm;
+ };
switch (MI.getOpcode()) {
default: llvm_unreachable("Unexpected instr type to insert");
case X86::TLS_addr32:
@@ -32250,11 +33081,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return EmitLoweredIndirectThunk(MI, BB);
case X86::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
- case X86::CATCHPAD:
- return EmitLoweredCatchPad(MI, BB);
case X86::SEG_ALLOCA_32:
case X86::SEG_ALLOCA_64:
return EmitLoweredSegAlloca(MI, BB);
+ case X86::PROBED_ALLOCA_32:
+ case X86::PROBED_ALLOCA_64:
+ return EmitLoweredProbedAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
@@ -32268,11 +33100,13 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::CMOV_RFP32:
case X86::CMOV_RFP64:
case X86::CMOV_RFP80:
+ case X86::CMOV_VR64:
case X86::CMOV_VR128:
case X86::CMOV_VR128X:
case X86::CMOV_VR256:
case X86::CMOV_VR256X:
case X86::CMOV_VR512:
+ case X86::CMOV_VK1:
case X86::CMOV_VK2:
case X86::CMOV_VK4:
case X86::CMOV_VK8:
@@ -32327,7 +33161,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::FP80_TO_INT64_IN_MEM: {
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
- int OrigCWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
+ int OrigCWFrameIdx =
+ MF->getFrameInfo().CreateStackObject(2, Align(2), false);
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FNSTCW16m)), OrigCWFrameIdx);
@@ -32348,7 +33183,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addReg(NewCW, RegState::Kill, X86::sub_16bit);
// Prepare memory for FLDCW.
- int NewCWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
+ int NewCWFrameIdx =
+ MF->getFrameInfo().CreateStackObject(2, Align(2), false);
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
NewCWFrameIdx)
.addReg(NewCW16, RegState::Kill);
@@ -32483,6 +33319,97 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BB->addLiveIn(BasePtr);
return BB;
}
+ case TargetOpcode::PREALLOCATED_SETUP: {
+ assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
+ auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+ MFI->setHasPreallocatedCall(true);
+ int64_t PreallocatedId = MI.getOperand(0).getImm();
+ size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
+ assert(StackAdjustment != 0 && "0 stack adjustment");
+ LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment "
+ << StackAdjustment << "\n");
+ BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP)
+ .addReg(X86::ESP)
+ .addImm(StackAdjustment);
+ MI.eraseFromParent();
+ return BB;
+ }
+ case TargetOpcode::PREALLOCATED_ARG: {
+ assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
+ int64_t PreallocatedId = MI.getOperand(1).getImm();
+ int64_t ArgIdx = MI.getOperand(2).getImm();
+ auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+ size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
+ LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
+ << ", arg offset " << ArgOffset << "\n");
+ // stack pointer + offset
+ addRegOffset(
+ BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()),
+ X86::ESP, false, ArgOffset);
+ MI.eraseFromParent();
+ return BB;
+ }
+ case X86::PTDPBSSD:
+ case X86::PTDPBSUD:
+ case X86::PTDPBUSD:
+ case X86::PTDPBUUD:
+ case X86::PTDPBF16PS: {
+ const DebugLoc &DL = MI.getDebugLoc();
+ unsigned Opc;
+ switch (MI.getOpcode()) {
+ case X86::PTDPBSSD: Opc = X86::TDPBSSD; break;
+ case X86::PTDPBSUD: Opc = X86::TDPBSUD; break;
+ case X86::PTDPBUSD: Opc = X86::TDPBUSD; break;
+ case X86::PTDPBUUD: Opc = X86::TDPBUUD; break;
+ case X86::PTDPBF16PS: Opc = X86::TDPBF16PS; break;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
+ MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Define);
+ MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Undef);
+ MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef);
+ MIB.addReg(TMMImmToTMMReg(MI.getOperand(2).getImm()), RegState::Undef);
+
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+ }
+ case X86::PTILEZERO: {
+ const DebugLoc &DL = MI.getDebugLoc();
+ unsigned Imm = MI.getOperand(0).getImm();
+ BuildMI(*BB, MI, DL, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+ }
+ case X86::PTILELOADD:
+ case X86::PTILELOADDT1:
+ case X86::PTILESTORED: {
+ const DebugLoc &DL = MI.getDebugLoc();
+ unsigned Opc;
+ switch (MI.getOpcode()) {
+ case X86::PTILELOADD: Opc = X86::TILELOADD; break;
+ case X86::PTILELOADDT1: Opc = X86::TILELOADDT1; break;
+ case X86::PTILESTORED: Opc = X86::TILESTORED; break;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
+ unsigned CurOp = 0;
+ if (Opc != X86::TILESTORED)
+ MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
+ RegState::Define);
+
+ MIB.add(MI.getOperand(CurOp++)); // base
+ MIB.add(MI.getOperand(CurOp++)); // scale
+ MIB.add(MI.getOperand(CurOp++)); // index -- stride
+ MIB.add(MI.getOperand(CurOp++)); // displacement
+ MIB.add(MI.getOperand(CurOp++)); // segment
+
+ if (Opc == X86::TILESTORED)
+ MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
+ RegState::Undef);
+
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+ }
}
}
@@ -32492,20 +33419,53 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
bool
X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
- const APInt &Demanded,
+ const APInt &DemandedBits,
+ const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
- // Only optimize Ands to prevent shrinking a constant that could be
- // matched by movzx.
- if (Op.getOpcode() != ISD::AND)
- return false;
-
EVT VT = Op.getValueType();
+ unsigned Opcode = Op.getOpcode();
+ unsigned EltSize = VT.getScalarSizeInBits();
- // Ignore vectors.
- if (VT.isVector())
+ if (VT.isVector()) {
+ // If the constant is only all signbits in the active bits, then we should
+ // extend it to the entire constant to allow it act as a boolean constant
+ // vector.
+ auto NeedsSignExtension = [&](SDValue V, unsigned ActiveBits) {
+ if (!ISD::isBuildVectorOfConstantSDNodes(V.getNode()))
+ return false;
+ for (unsigned i = 0, e = V.getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i] || V.getOperand(i).isUndef())
+ continue;
+ const APInt &Val = V.getConstantOperandAPInt(i);
+ if (Val.getBitWidth() > Val.getNumSignBits() &&
+ Val.trunc(ActiveBits).getNumSignBits() == ActiveBits)
+ return true;
+ }
+ return false;
+ };
+ // For vectors - if we have a constant, then try to sign extend.
+ // TODO: Handle AND/ANDN cases.
+ unsigned ActiveBits = DemandedBits.getActiveBits();
+ if (EltSize > ActiveBits && EltSize > 1 && isTypeLegal(VT) &&
+ (Opcode == ISD::OR || Opcode == ISD::XOR) &&
+ NeedsSignExtension(Op.getOperand(1), ActiveBits)) {
+ EVT ExtSVT = EVT::getIntegerVT(*TLO.DAG.getContext(), ActiveBits);
+ EVT ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtSVT,
+ VT.getVectorNumElements());
+ SDValue NewC =
+ TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(Op), VT,
+ Op.getOperand(1), TLO.DAG.getValueType(ExtVT));
+ SDValue NewOp =
+ TLO.DAG.getNode(Opcode, SDLoc(Op), VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
+ }
return false;
+ }
- unsigned Size = VT.getSizeInBits();
+ // Only optimize Ands to prevent shrinking a constant that could be
+ // matched by movzx.
+ if (Opcode != ISD::AND)
+ return false;
// Make sure the RHS really is a constant.
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
@@ -32515,7 +33475,7 @@ X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
const APInt &Mask = C->getAPIntValue();
// Clear all non-demanded bits initially.
- APInt ShrunkMask = Mask & Demanded;
+ APInt ShrunkMask = Mask & DemandedBits;
// Find the width of the shrunk mask.
unsigned Width = ShrunkMask.getActiveBits();
@@ -32527,10 +33487,10 @@ X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
// Find the next power of 2 width, rounding up to a byte.
Width = PowerOf2Ceil(std::max(Width, 8U));
// Truncate the width to size to handle illegal types.
- Width = std::min(Width, Size);
+ Width = std::min(Width, EltSize);
// Calculate a possible zero extend mask for this constant.
- APInt ZeroExtendMask = APInt::getLowBitsSet(Size, Width);
+ APInt ZeroExtendMask = APInt::getLowBitsSet(EltSize, Width);
// If we aren't changing the mask, just return true to keep it and prevent
// the caller from optimizing.
@@ -32539,7 +33499,7 @@ X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
// Make sure the new mask can be represented by a combination of mask bits
// and non-demanded bits.
- if (!ZeroExtendMask.isSubsetOf(Mask | ~Demanded))
+ if (!ZeroExtendMask.isSubsetOf(Mask | ~DemandedBits))
return false;
// Replace the constant with the zero extend mask.
@@ -32555,6 +33515,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned BitWidth = Known.getBitWidth();
+ unsigned NumElts = DemandedElts.getBitWidth();
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
assert((Opc >= ISD::BUILTIN_OP_END ||
@@ -32582,7 +33543,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
Op.getConstantOperandVal(1));
Known = DAG.computeKnownBits(Src, DemandedElt, Depth + 1);
- Known = Known.zextOrTrunc(BitWidth, false);
+ Known = Known.anyextOrTrunc(BitWidth);
Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits());
break;
}
@@ -32652,10 +33613,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- // Output known-0 bits are only known if clear in both the LHS & RHS.
- Known.Zero &= Known2.Zero;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- Known.One |= Known2.One;
+ Known |= Known2;
break;
}
case X86ISD::PSADBW: {
@@ -32679,6 +33637,76 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.Zero &= Known2.Zero;
break;
}
+ case X86ISD::BEXTR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (auto* Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
+ unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);
+ unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);
+
+ // If the length is 0, the result is 0.
+ if (Length == 0) {
+ Known.setAllZero();
+ break;
+ }
+
+ if ((Shift + Length) <= BitWidth) {
+ Known = DAG.computeKnownBits(Op0, Depth + 1);
+ Known = Known.extractBits(Length, Shift);
+ Known = Known.zextOrTrunc(BitWidth);
+ }
+ }
+ break;
+ }
+ case X86ISD::CVTSI2P:
+ case X86ISD::CVTUI2P:
+ case X86ISD::CVTP2SI:
+ case X86ISD::CVTP2UI:
+ case X86ISD::MCVTP2SI:
+ case X86ISD::MCVTP2UI:
+ case X86ISD::CVTTP2SI:
+ case X86ISD::CVTTP2UI:
+ case X86ISD::MCVTTP2SI:
+ case X86ISD::MCVTTP2UI:
+ case X86ISD::MCVTSI2P:
+ case X86ISD::MCVTUI2P:
+ case X86ISD::VFPROUND:
+ case X86ISD::VMFPROUND:
+ case X86ISD::CVTPS2PH:
+ case X86ISD::MCVTPS2PH: {
+ // Conversions - upper elements are known zero.
+ EVT SrcVT = Op.getOperand(0).getValueType();
+ if (SrcVT.isVector()) {
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ if (NumElts > NumSrcElts &&
+ DemandedElts.countTrailingZeros() >= NumSrcElts)
+ Known.setAllZero();
+ }
+ break;
+ }
+ case X86ISD::STRICT_CVTTP2SI:
+ case X86ISD::STRICT_CVTTP2UI:
+ case X86ISD::STRICT_CVTSI2P:
+ case X86ISD::STRICT_CVTUI2P:
+ case X86ISD::STRICT_VFPROUND:
+ case X86ISD::STRICT_CVTPS2PH: {
+ // Strict Conversions - upper elements are known zero.
+ EVT SrcVT = Op.getOperand(1).getValueType();
+ if (SrcVT.isVector()) {
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ if (NumElts > NumSrcElts &&
+ DemandedElts.countTrailingZeros() >= NumSrcElts)
+ Known.setAllZero();
+ }
+ break;
+ }
+ case X86ISD::MOVQ2DQ: {
+ // Move from MMX to XMM. Upper half of XMM should be 0.
+ if (DemandedElts.countTrailingZeros() >= (NumElts / 2))
+ Known.setAllZero();
+ break;
+ }
}
// Handle target shuffles.
@@ -32745,11 +33773,12 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
return VTBits;
case X86ISD::VTRUNC: {
- // TODO: Add DemandedElts support.
SDValue Src = Op.getOperand(0);
- unsigned NumSrcBits = Src.getScalarValueSizeInBits();
+ MVT SrcVT = Src.getSimpleValueType();
+ unsigned NumSrcBits = SrcVT.getScalarSizeInBits();
assert(VTBits < NumSrcBits && "Illegal truncation input type");
- unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
+ APInt DemandedSrc = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements());
+ unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
if (Tmp > (NumSrcBits - VTBits))
return Tmp - (NumSrcBits - VTBits);
return 1;
@@ -32877,6 +33906,21 @@ SDValue X86TargetLowering::unwrapAddress(SDValue N) const {
return N;
}
+// Helper to look for a normal load that can be narrowed into a vzload with the
+// specified VT and memory VT. Returns SDValue() on failure.
+static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT,
+ SelectionDAG &DAG) {
+ // Can't if the load is volatile or atomic.
+ if (!LN->isSimple())
+ return SDValue();
+
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
+ return DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, SDLoc(LN), Tys, Ops, MemVT,
+ LN->getPointerInfo(), LN->getOriginalAlign(),
+ LN->getMemOperand()->getFlags());
+}
+
// Attempt to match a combined shuffle mask against supported unary shuffle
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
@@ -33021,9 +34065,7 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned InputSizeInBits = MaskVT.getSizeInBits();
unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
-
- bool ContainsZeros =
- llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
+ bool ContainsZeros = isAnyZero(Mask);
// Handle VPERMI/VPERMILPD vXi64/vXi64 patterns.
if (!ContainsZeros && MaskScalarSizeInBits == 64) {
@@ -33071,7 +34113,7 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Narrow the repeated mask to create 32-bit element permutes.
SmallVector<int, 4> WordMask = RepeatedMask;
if (MaskScalarSizeInBits == 64)
- scaleShuffleMask<int>(2, RepeatedMask, WordMask);
+ narrowShuffleMaskElts(2, RepeatedMask, WordMask);
Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
@@ -33114,17 +34156,32 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
// Attempt to match against byte/bit shifts.
- // FIXME: Add 512-bit support.
- if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
- (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
+ if (AllowIntDomain &&
+ ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
+ (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
int ShiftAmt = matchShuffleAsShift(ShuffleVT, Shuffle, MaskScalarSizeInBits,
Mask, 0, Zeroable, Subtarget);
- if (0 < ShiftAmt) {
+ if (0 < ShiftAmt && (!ShuffleVT.is512BitVector() || Subtarget.hasBWI() ||
+ 32 <= ShuffleVT.getScalarSizeInBits())) {
PermuteImm = (unsigned)ShiftAmt;
return true;
}
}
+ // Attempt to match against bit rotates.
+ if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits < 64 &&
+ ((MaskVT.is128BitVector() && Subtarget.hasXOP()) ||
+ Subtarget.hasAVX512())) {
+ int RotateAmt = matchShuffleAsBitRotate(ShuffleVT, MaskScalarSizeInBits,
+ Subtarget, Mask);
+ if (0 < RotateAmt) {
+ Shuffle = X86ISD::VROTLI;
+ PermuteImm = (unsigned)RotateAmt;
+ return true;
+ }
+ }
+
return false;
}
@@ -33205,9 +34262,29 @@ static bool matchBinaryPermuteShuffle(
unsigned NumMaskElts = Mask.size();
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
+ // Attempt to match against VALIGND/VALIGNQ rotate.
+ if (AllowIntDomain && (EltSizeInBits == 64 || EltSizeInBits == 32) &&
+ ((MaskVT.is128BitVector() && Subtarget.hasVLX()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasVLX()) ||
+ (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
+ if (!isAnyZero(Mask)) {
+ int Rotation = matchShuffleAsElementRotate(V1, V2, Mask);
+ if (0 < Rotation) {
+ Shuffle = X86ISD::VALIGN;
+ if (EltSizeInBits == 64)
+ ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64);
+ else
+ ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32);
+ PermuteImm = Rotation;
+ return true;
+ }
+ }
+ }
+
// Attempt to match against PALIGNR byte rotate.
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
- (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
+ (MaskVT.is512BitVector() && Subtarget.hasBWI()))) {
int ByteRotation = matchShuffleAsByteRotate(MaskVT, V1, V2, Mask);
if (0 < ByteRotation) {
Shuffle = X86ISD::PALIGNR;
@@ -33257,8 +34334,7 @@ static bool matchBinaryPermuteShuffle(
// Attempt to combine to INSERTPS, but only if it has elements that need to
// be set to zero.
if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
- MaskVT.is128BitVector() &&
- llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }) &&
+ MaskVT.is128BitVector() && isAnyZero(Mask) &&
matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
Shuffle = X86ISD::INSERTPS;
ShuffleVT = MVT::v4f32;
@@ -33386,6 +34462,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return DAG.getBitcast(RootVT, V1);
}
+ bool OptForSize = DAG.shouldOptForSize();
unsigned RootSizeInBits = RootVT.getSizeInBits();
unsigned NumRootElts = RootVT.getVectorNumElements();
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
@@ -33396,11 +34473,21 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Don't combine if we are a AVX512/EVEX target and the mask element size
// is different from the root element size - this would prevent writemasks
// from being reused.
- // TODO - this currently prevents all lane shuffles from occurring.
- // TODO - check for writemasks usage instead of always preventing combining.
- // TODO - attempt to narrow Mask back to writemask size.
- bool IsEVEXShuffle =
- RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128);
+ bool IsMaskedShuffle = false;
+ if (RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128)) {
+ if (Root.hasOneUse() && Root->use_begin()->getOpcode() == ISD::VSELECT &&
+ Root->use_begin()->getOperand(0).getScalarValueSizeInBits() == 1) {
+ IsMaskedShuffle = true;
+ }
+ }
+
+ // If we are shuffling a broadcast (and not introducing zeros) then
+ // we can just use the broadcast directly. This works for smaller broadcast
+ // elements as well as they already repeat across each mask element
+ if (UnaryShuffle && isTargetShuffleSplat(V1) && !isAnyZero(BaseMask) &&
+ (BaseMaskEltSizeInBits % V1.getScalarValueSizeInBits()) == 0) {
+ return DAG.getBitcast(RootVT, V1);
+ }
// Attempt to match a subvector broadcast.
// shuffle(insert_subvector(undef, sub, 0), undef, 0, 0, 0, 0)
@@ -33420,27 +34507,138 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- // TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
+ // Handle 128/256-bit lane shuffles of 512-bit vectors.
+ if (RootVT.is512BitVector() &&
+ (NumBaseMaskElts == 2 || NumBaseMaskElts == 4)) {
+ MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
+
+ // If the upper subvectors are zeroable, then an extract+insert is more
+ // optimal than using X86ISD::SHUF128. The insertion is free, even if it has
+ // to zero the upper subvectors.
+ if (isUndefOrZeroInRange(BaseMask, 1, NumBaseMaskElts - 1)) {
+ if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
+ return SDValue(); // Nothing to do!
+ assert(isInRange(BaseMask[0], 0, NumBaseMaskElts) &&
+ "Unexpected lane shuffle");
+ Res = DAG.getBitcast(ShuffleVT, V1);
+ unsigned SubIdx = BaseMask[0] * (8 / NumBaseMaskElts);
+ bool UseZero = isAnyZero(BaseMask);
+ Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits);
+ Res = widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
+ return DAG.getBitcast(RootVT, Res);
+ }
+
+ // Narrow shuffle mask to v4x128.
+ SmallVector<int, 4> Mask;
+ assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size");
+ narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, Mask);
+
+ // Try to lower to vshuf64x2/vshuf32x4.
+ auto MatchSHUF128 = [](MVT ShuffleVT, const SDLoc &DL, ArrayRef<int> Mask,
+ SDValue V1, SDValue V2, SelectionDAG &DAG) {
+ unsigned PermMask = 0;
+ // Insure elements came from the same Op.
+ SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
+ for (int i = 0; i < 4; ++i) {
+ assert(Mask[i] >= -1 && "Illegal shuffle sentinel value");
+ if (Mask[i] < 0)
+ continue;
+
+ SDValue Op = Mask[i] >= 4 ? V2 : V1;
+ unsigned OpIndex = i / 2;
+ if (Ops[OpIndex].isUndef())
+ Ops[OpIndex] = Op;
+ else if (Ops[OpIndex] != Op)
+ return SDValue();
+
+ // Convert the 128-bit shuffle mask selection values into 128-bit
+ // selection bits defined by a vshuf64x2 instruction's immediate control
+ // byte.
+ PermMask |= (Mask[i] % 4) << (i * 2);
+ }
+
+ return DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT,
+ DAG.getBitcast(ShuffleVT, Ops[0]),
+ DAG.getBitcast(ShuffleVT, Ops[1]),
+ DAG.getTargetConstant(PermMask, DL, MVT::i8));
+ };
+
+ // FIXME: Is there a better way to do this? is256BitLaneRepeatedShuffleMask
+ // doesn't work because our mask is for 128 bits and we don't have an MVT
+ // to match that.
+ bool PreferPERMQ =
+ UnaryShuffle && isUndefOrInRange(Mask[0], 0, 2) &&
+ isUndefOrInRange(Mask[1], 0, 2) && isUndefOrInRange(Mask[2], 2, 4) &&
+ isUndefOrInRange(Mask[3], 2, 4) &&
+ (Mask[0] < 0 || Mask[2] < 0 || Mask[0] == (Mask[2] % 2)) &&
+ (Mask[1] < 0 || Mask[3] < 0 || Mask[1] == (Mask[3] % 2));
+
+ if (!isAnyZero(Mask) && !PreferPERMQ) {
+ if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask, V1, V2, DAG))
+ return DAG.getBitcast(RootVT, V);
+ }
+ }
// Handle 128-bit lane shuffles of 256-bit vectors.
- // If we have AVX2, prefer to use VPERMQ/VPERMPD for unary shuffles unless
- // we need to use the zeroing feature.
- // TODO - this should support binary shuffles.
- if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
- !(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) &&
- !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
+ if (RootVT.is256BitVector() && NumBaseMaskElts == 2) {
+ MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
+
+ // If the upper half is zeroable, then an extract+insert is more optimal
+ // than using X86ISD::VPERM2X128. The insertion is free, even if it has to
+ // zero the upper half.
+ if (isUndefOrZero(BaseMask[1])) {
+ if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
+ return SDValue(); // Nothing to do!
+ assert(isInRange(BaseMask[0], 0, 2) && "Unexpected lane shuffle");
+ Res = DAG.getBitcast(ShuffleVT, V1);
+ Res = extract128BitVector(Res, BaseMask[0] * 2, DAG, DL);
+ Res = widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG,
+ DL, 256);
+ return DAG.getBitcast(RootVT, Res);
+ }
+
if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
return SDValue(); // Nothing to do!
- MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
- unsigned PermMask = 0;
- PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
- PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
-
- Res = DAG.getBitcast(ShuffleVT, V1);
- Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
- DAG.getUNDEF(ShuffleVT),
- DAG.getTargetConstant(PermMask, DL, MVT::i8));
- return DAG.getBitcast(RootVT, Res);
+
+ // If we have AVX2, prefer to use VPERMQ/VPERMPD for unary shuffles unless
+ // we need to use the zeroing feature.
+ // Prefer blends for sequential shuffles unless we are optimizing for size.
+ if (UnaryShuffle &&
+ !(Subtarget.hasAVX2() && isUndefOrInRange(BaseMask, 0, 2)) &&
+ (OptForSize || !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0))) {
+ unsigned PermMask = 0;
+ PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
+ PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
+
+ Res = DAG.getBitcast(ShuffleVT, V1);
+ Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
+ DAG.getUNDEF(ShuffleVT),
+ DAG.getTargetConstant(PermMask, DL, MVT::i8));
+ return DAG.getBitcast(RootVT, Res);
+ }
+
+ if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)
+ return SDValue(); // Nothing to do!
+
+ // TODO - handle AVX512VL cases with X86ISD::SHUF128.
+ if (!UnaryShuffle && !IsMaskedShuffle) {
+ assert(llvm::all_of(BaseMask, [](int M) { return 0 <= M && M < 4; }) &&
+ "Unexpected shuffle sentinel value");
+ // Prefer blends to X86ISD::VPERM2X128.
+ if (!((BaseMask[0] == 0 && BaseMask[1] == 3) ||
+ (BaseMask[0] == 2 && BaseMask[1] == 1))) {
+ unsigned PermMask = 0;
+ PermMask |= ((BaseMask[0] & 3) << 0);
+ PermMask |= ((BaseMask[1] & 3) << 4);
+
+ Res = DAG.getNode(
+ X86ISD::VPERM2X128, DL, ShuffleVT,
+ DAG.getBitcast(ShuffleVT, isInRange(BaseMask[0], 0, 2) ? V1 : V2),
+ DAG.getBitcast(ShuffleVT, isInRange(BaseMask[1], 0, 2) ? V1 : V2),
+ DAG.getTargetConstant(PermMask, DL, MVT::i8));
+ return DAG.getBitcast(RootVT, Res);
+ }
+ }
}
// For masks that have been widened to 128-bit elements or more,
@@ -33449,9 +34647,20 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (BaseMaskEltSizeInBits > 64) {
assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
int MaskScale = BaseMaskEltSizeInBits / 64;
- scaleShuffleMask<int>(MaskScale, BaseMask, Mask);
+ narrowShuffleMaskElts(MaskScale, BaseMask, Mask);
} else {
- Mask = SmallVector<int, 64>(BaseMask.begin(), BaseMask.end());
+ Mask.assign(BaseMask.begin(), BaseMask.end());
+ }
+
+ // For masked shuffles, we're trying to match the root width for better
+ // writemask folding, attempt to scale the mask.
+ // TODO - variable shuffles might need this to be widened again.
+ if (IsMaskedShuffle && NumRootElts > Mask.size()) {
+ assert((NumRootElts % Mask.size()) == 0 && "Illegal mask size");
+ int MaskScale = NumRootElts / Mask.size();
+ SmallVector<int, 64> ScaledMask;
+ narrowShuffleMaskElts(MaskScale, Mask, ScaledMask);
+ Mask = std::move(ScaledMask);
}
unsigned NumMaskElts = Mask.size();
@@ -33484,26 +34693,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
APInt Zeroable = KnownUndef | KnownZero;
if (UnaryShuffle) {
- // If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
- // directly if we don't shuffle the lower element and we shuffle the upper
- // (zero) elements within themselves.
- if (V1.getOpcode() == X86ISD::VZEXT_LOAD &&
- (cast<MemIntrinsicSDNode>(V1)->getMemoryVT().getScalarSizeInBits() %
- MaskEltSizeInBits) == 0) {
- unsigned Scale =
- cast<MemIntrinsicSDNode>(V1)->getMemoryVT().getScalarSizeInBits() /
- MaskEltSizeInBits;
- ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale);
- if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) &&
- isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) {
- return DAG.getBitcast(RootVT, V1);
- }
- }
-
// Attempt to match against broadcast-from-vector.
// Limit AVX1 to cases where we're loading+broadcasting a scalar element.
- if ((Subtarget.hasAVX2() || (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits))
- && (!IsEVEXShuffle || NumRootElts == NumMaskElts)) {
+ if ((Subtarget.hasAVX2() ||
+ (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits)) &&
+ (!IsMaskedShuffle || NumRootElts == NumMaskElts)) {
SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
if (V1.getValueType() == MaskVT &&
@@ -33529,7 +34723,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT) &&
- (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
+ (!IsMaskedShuffle ||
+ (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
@@ -33540,7 +34735,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, Subtarget, Shuffle, ShuffleVT,
PermuteImm) &&
- (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
+ (!IsMaskedShuffle ||
+ (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleVT, V1);
@@ -33550,12 +34746,31 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
+ // Attempt to combine to INSERTPS, but only if the inserted element has come
+ // from a scalar.
+ // TODO: Handle other insertions here as well?
+ if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 &&
+ MaskEltSizeInBits == 32 && Subtarget.hasSSE41() &&
+ !isTargetShuffleEquivalent(Mask, {4, 1, 2, 3})) {
+ SDValue SrcV1 = V1, SrcV2 = V2;
+ if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask, DAG) &&
+ SrcV2.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS)
+ return SDValue(); // Nothing to do!
+ Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32,
+ DAG.getBitcast(MVT::v4f32, SrcV1),
+ DAG.getBitcast(MVT::v4f32, SrcV2),
+ DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
+ return DAG.getBitcast(RootVT, Res);
+ }
+ }
+
SDValue NewV1 = V1; // Save operands in case early exit happens.
SDValue NewV2 = V2;
if (matchBinaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT, UnaryShuffle) &&
- (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
+ (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
@@ -33566,10 +34781,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
NewV1 = V1; // Save operands in case early exit happens.
NewV2 = V2;
- if (matchBinaryPermuteShuffle(
- MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
- NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
- (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
+ if (matchBinaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
+ AllowIntDomain, NewV1, NewV2, DL, DAG,
+ Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
+ (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
@@ -33609,6 +34824,44 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
+ // Match shuffle against TRUNCATE patterns.
+ if (AllowIntDomain && MaskEltSizeInBits < 64 && Subtarget.hasAVX512()) {
+ // Match against a VTRUNC instruction, accounting for src/dst sizes.
+ if (matchShuffleAsVTRUNC(ShuffleSrcVT, ShuffleVT, IntMaskVT, Mask, Zeroable,
+ Subtarget)) {
+ bool IsTRUNCATE = ShuffleVT.getVectorNumElements() ==
+ ShuffleSrcVT.getVectorNumElements();
+ unsigned Opc =
+ IsTRUNCATE ? (unsigned)ISD::TRUNCATE : (unsigned)X86ISD::VTRUNC;
+ if (Depth == 0 && Root.getOpcode() == Opc)
+ return SDValue(); // Nothing to do!
+ V1 = DAG.getBitcast(ShuffleSrcVT, V1);
+ Res = DAG.getNode(Opc, DL, ShuffleVT, V1);
+ if (ShuffleVT.getSizeInBits() < RootSizeInBits)
+ Res = widenSubVector(Res, true, Subtarget, DAG, DL, RootSizeInBits);
+ return DAG.getBitcast(RootVT, Res);
+ }
+
+ // Do we need a more general binary truncation pattern?
+ if (RootSizeInBits < 512 &&
+ ((RootVT.is256BitVector() && Subtarget.useAVX512Regs()) ||
+ (RootVT.is128BitVector() && Subtarget.hasVLX())) &&
+ (MaskEltSizeInBits > 8 || Subtarget.hasBWI()) &&
+ isSequentialOrUndefInRange(Mask, 0, NumMaskElts, 0, 2)) {
+ if (Depth == 0 && Root.getOpcode() == ISD::TRUNCATE)
+ return SDValue(); // Nothing to do!
+ ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2);
+ ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2);
+ V1 = DAG.getBitcast(ShuffleSrcVT, V1);
+ V2 = DAG.getBitcast(ShuffleSrcVT, V2);
+ ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2);
+ ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts);
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShuffleSrcVT, V1, V2);
+ Res = DAG.getNode(ISD::TRUNCATE, DL, IntMaskVT, Res);
+ return DAG.getBitcast(RootVT, Res);
+ }
+ }
+
// Don't try to re-form single instruction chains under any circumstances now
// that we've done encoding canonicalization for them.
if (Depth < 1)
@@ -33618,8 +34871,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2;
AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
- bool MaskContainsZeros =
- any_of(Mask, [](int M) { return M == SM_SentinelZero; });
+ bool MaskContainsZeros = isAnyZero(Mask);
if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) {
// If we have a single input lane-crossing shuffle then lower to VPERMV.
@@ -33714,7 +34966,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL);
Res = DAG.getBitcast(MaskVT, V1);
unsigned AndOpcode =
- FloatDomain ? unsigned(X86ISD::FAND) : unsigned(ISD::AND);
+ MaskVT.isFloatingPoint() ? unsigned(X86ISD::FAND) : unsigned(ISD::AND);
Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask);
return DAG.getBitcast(RootVT, Res);
}
@@ -33791,7 +35043,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
continue;
}
if (M == SM_SentinelZero) {
- PSHUFBMask.push_back(DAG.getConstant(255, DL, MVT::i8));
+ PSHUFBMask.push_back(DAG.getConstant(0x80, DL, MVT::i8));
continue;
}
M = Ratio * M + i % Ratio;
@@ -33822,7 +35074,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
continue;
}
if (M == SM_SentinelZero) {
- VPPERMMask.push_back(DAG.getConstant(128, DL, MVT::i8));
+ VPPERMMask.push_back(DAG.getConstant(0x80, DL, MVT::i8));
continue;
}
M = Ratio * M + i % Ratio;
@@ -33897,8 +35149,7 @@ static SDValue combineX86ShuffleChainWithExtract(
unsigned &Offset = Offsets[i];
Src = peekThroughBitcasts(Src);
EVT BaseVT = Src.getValueType();
- while (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- isa<ConstantSDNode>(Src.getOperand(1))) {
+ while (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
Offset += Src.getConstantOperandVal(1);
Src = Src.getOperand(0);
}
@@ -34121,7 +35372,8 @@ static SDValue combineX86ShufflesRecursively(
assert(Root.getSimpleValueType().isVector() &&
"Shuffles operate on vector types!");
- assert(VT.getSizeInBits() == Root.getSimpleValueType().getSizeInBits() &&
+ unsigned RootSizeInBits = Root.getSimpleValueType().getSizeInBits();
+ assert(VT.getSizeInBits() == RootSizeInBits &&
"Can only combine shuffles of the same vector register size.");
// Extract target shuffle mask and resolve sentinels and inputs.
@@ -34135,6 +35387,18 @@ static SDValue combineX86ShufflesRecursively(
OpZero, DAG, Depth, false))
return SDValue();
+ // Shuffle inputs must be the same size as the result, bail on any larger
+ // inputs and widen any smaller inputs.
+ if (llvm::any_of(OpInputs, [RootSizeInBits](SDValue Op) {
+ return Op.getValueSizeInBits() > RootSizeInBits;
+ }))
+ return SDValue();
+
+ for (SDValue &Op : OpInputs)
+ if (Op.getValueSizeInBits() < RootSizeInBits)
+ Op = widenSubVector(peekThroughOneUseBitcasts(Op), false, Subtarget, DAG,
+ SDLoc(Op), RootSizeInBits);
+
SmallVector<int, 64> Mask;
SmallVector<SDValue, 16> Ops;
@@ -34535,6 +35799,59 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
return V;
}
+// Attempt to commute shufps LHS loads:
+// permilps(shufps(load(),x)) --> permilps(shufps(x,load()))
+static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
+ SelectionDAG &DAG) {
+ // TODO: Add vXf64 support.
+ if (VT != MVT::v4f32 && VT != MVT::v8f32 && VT != MVT::v16f32)
+ return SDValue();
+
+ // SHUFP(LHS, RHS) -> SHUFP(RHS, LHS) iff LHS is foldable + RHS is not.
+ auto commuteSHUFP = [&VT, &DL, &DAG](SDValue Parent, SDValue V) {
+ if (V.getOpcode() != X86ISD::SHUFP || !Parent->isOnlyUserOf(V.getNode()))
+ return SDValue();
+ SDValue N0 = V.getOperand(0);
+ SDValue N1 = V.getOperand(1);
+ unsigned Imm = V.getConstantOperandVal(2);
+ if (!MayFoldLoad(peekThroughOneUseBitcasts(N0)) ||
+ MayFoldLoad(peekThroughOneUseBitcasts(N1)))
+ return SDValue();
+ Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4);
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0,
+ DAG.getTargetConstant(Imm, DL, MVT::i8));
+ };
+
+ switch (N.getOpcode()) {
+ case X86ISD::VPERMILPI:
+ if (SDValue NewSHUFP = commuteSHUFP(N, N.getOperand(0))) {
+ unsigned Imm = N.getConstantOperandVal(1);
+ return DAG.getNode(X86ISD::VPERMILPI, DL, VT, NewSHUFP,
+ DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8));
+ }
+ break;
+ case X86ISD::SHUFP: {
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ unsigned Imm = N.getConstantOperandVal(2);
+ if (N0 == N1) {
+ if (SDValue NewSHUFP = commuteSHUFP(N, N0))
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, NewSHUFP,
+ DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8));
+ } else if (SDValue NewSHUFP = commuteSHUFP(N, N0)) {
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, N1,
+ DAG.getTargetConstant(Imm ^ 0x0A, DL, MVT::i8));
+ } else if (SDValue NewSHUFP = commuteSHUFP(N, N1)) {
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, N0, NewSHUFP,
+ DAG.getTargetConstant(Imm ^ 0xA0, DL, MVT::i8));
+ }
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
/// Try to combine x86 target specific shuffles.
static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -34544,35 +35861,105 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
SmallVector<int, 4> Mask;
unsigned Opcode = N.getOpcode();
+ bool IsUnary;
+ SmallVector<int, 64> TargetMask;
+ SmallVector<SDValue, 2> TargetOps;
+ if (isTargetShuffle(Opcode))
+ getTargetShuffleMask(N.getNode(), VT, true, TargetOps, TargetMask, IsUnary);
+
// Combine binary shuffle of 2 similar 'Horizontal' instructions into a
- // single instruction.
- if (VT.getScalarSizeInBits() == 64 &&
- (Opcode == X86ISD::MOVSD || Opcode == X86ISD::UNPCKH ||
- Opcode == X86ISD::UNPCKL)) {
- auto BC0 = peekThroughBitcasts(N.getOperand(0));
- auto BC1 = peekThroughBitcasts(N.getOperand(1));
- EVT VT0 = BC0.getValueType();
- EVT VT1 = BC1.getValueType();
- unsigned Opcode0 = BC0.getOpcode();
- unsigned Opcode1 = BC1.getOpcode();
- if (Opcode0 == Opcode1 && VT0 == VT1 &&
- (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
- Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB ||
- Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS)) {
- SDValue Lo, Hi;
- if (Opcode == X86ISD::MOVSD) {
- Lo = BC1.getOperand(0);
- Hi = BC0.getOperand(1);
- } else {
- Lo = BC0.getOperand(Opcode == X86ISD::UNPCKH ? 1 : 0);
- Hi = BC1.getOperand(Opcode == X86ISD::UNPCKH ? 1 : 0);
+ // single instruction. Attempt to match a v2X64 repeating shuffle pattern that
+ // represents the LHS/RHS inputs for the lower/upper halves.
+ SmallVector<int, 16> TargetMask128;
+ if (!TargetMask.empty() && 0 < TargetOps.size() && TargetOps.size() <= 2 &&
+ isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128)) {
+ SmallVector<int, 16> WidenedMask128 = TargetMask128;
+ while (WidenedMask128.size() > 2) {
+ SmallVector<int, 16> WidenedMask;
+ if (!canWidenShuffleElements(WidenedMask128, WidenedMask))
+ break;
+ WidenedMask128 = std::move(WidenedMask);
+ }
+ if (WidenedMask128.size() == 2) {
+ assert(isUndefOrZeroOrInRange(WidenedMask128, 0, 4) && "Illegal shuffle");
+ SDValue BC0 = peekThroughBitcasts(TargetOps.front());
+ SDValue BC1 = peekThroughBitcasts(TargetOps.back());
+ EVT VT0 = BC0.getValueType();
+ EVT VT1 = BC1.getValueType();
+ unsigned Opcode0 = BC0.getOpcode();
+ unsigned Opcode1 = BC1.getOpcode();
+ bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
+ Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
+ if (Opcode0 == Opcode1 && VT0 == VT1 &&
+ (isHoriz || Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS)) {
+ bool SingleOp = (TargetOps.size() == 1);
+ if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
+ SDValue Lo = isInRange(WidenedMask128[0], 0, 2) ? BC0 : BC1;
+ SDValue Hi = isInRange(WidenedMask128[1], 0, 2) ? BC0 : BC1;
+ Lo = Lo.getOperand(WidenedMask128[0] & 1);
+ Hi = Hi.getOperand(WidenedMask128[1] & 1);
+ if (SingleOp) {
+ MVT SrcVT = BC0.getOperand(0).getSimpleValueType();
+ SDValue Undef = DAG.getUNDEF(SrcVT);
+ SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL);
+ Lo = (WidenedMask128[0] == SM_SentinelZero ? Zero : Lo);
+ Hi = (WidenedMask128[1] == SM_SentinelZero ? Zero : Hi);
+ Lo = (WidenedMask128[0] == SM_SentinelUndef ? Undef : Lo);
+ Hi = (WidenedMask128[1] == SM_SentinelUndef ? Undef : Hi);
+ }
+ SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi);
+ return DAG.getBitcast(VT, Horiz);
+ }
}
- SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi);
- return DAG.getBitcast(VT, Horiz);
}
}
+ if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))
+ return R;
+
+ // Canonicalize UNARYSHUFFLE(XOR(X,-1) -> XOR(UNARYSHUFFLE(X),-1) to
+ // help expose the 'NOT' pattern further up the DAG.
+ // TODO: This might be beneficial for any binop with a 'splattable' operand.
+ switch (Opcode) {
+ case X86ISD::MOVDDUP:
+ case X86ISD::PSHUFD: {
+ SDValue Src = N.getOperand(0);
+ if (Src.hasOneUse() && Src.getValueType() == VT) {
+ if (SDValue Not = IsNOT(Src, DAG, /*OneUse*/ true)) {
+ Not = DAG.getBitcast(VT, Not);
+ Not = Opcode == X86ISD::MOVDDUP
+ ? DAG.getNode(Opcode, DL, VT, Not)
+ : DAG.getNode(Opcode, DL, VT, Not, N.getOperand(1));
+ EVT IntVT = Not.getValueType().changeTypeToInteger();
+ SDValue AllOnes = DAG.getConstant(-1, DL, IntVT);
+ Not = DAG.getBitcast(IntVT, Not);
+ Not = DAG.getNode(ISD::XOR, DL, IntVT, Not, AllOnes);
+ return DAG.getBitcast(VT, Not);
+ }
+ }
+ break;
+ }
+ }
+
+ // Handle specific target shuffles.
switch (Opcode) {
+ case X86ISD::MOVDDUP: {
+ SDValue Src = N.getOperand(0);
+ // Turn a 128-bit MOVDDUP of a full vector load into movddup+vzload.
+ if (VT == MVT::v2f64 && Src.hasOneUse() &&
+ ISD::isNormalLoad(Src.getNode())) {
+ LoadSDNode *LN = cast<LoadSDNode>(Src);
+ if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::f64, MVT::v2f64, DAG)) {
+ SDValue Movddup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, VZLoad);
+ DCI.CombineTo(N.getNode(), Movddup);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ return N; // Return N so it doesn't get rechecked!
+ }
+ }
+
+ return SDValue();
+ }
case X86ISD::VBROADCAST: {
SDValue Src = N.getOperand(0);
SDValue BC = peekThroughBitcasts(Src);
@@ -34598,7 +35985,8 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
// broadcast(bitcast(src)) -> bitcast(broadcast(src))
// 32-bit targets have to bitcast i64 to f64, so better to bitcast upward.
if (Src.getOpcode() == ISD::BITCAST &&
- SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits()) {
+ SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(BCVT)) {
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(),
VT.getVectorNumElements());
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
@@ -34645,6 +36033,190 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return N; // Return N so it doesn't get rechecked!
}
+ // Due to isTypeDesirableForOp, we won't always shrink a load truncated to
+ // i16. So shrink it ourselves if we can make a broadcast_load.
+ if (SrcVT == MVT::i16 && Src.getOpcode() == ISD::TRUNCATE &&
+ Src.hasOneUse() && Src.getOperand(0).hasOneUse()) {
+ assert(Subtarget.hasAVX2() && "Expected AVX2");
+ SDValue TruncIn = Src.getOperand(0);
+
+ // If this is a truncate of a non extending load we can just narrow it to
+ // use a broadcast_load.
+ if (ISD::isNormalLoad(TruncIn.getNode())) {
+ LoadSDNode *LN = cast<LoadSDNode>(TruncIn);
+ // Unless its volatile or atomic.
+ if (LN->isSimple()) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue BcastLd = DAG.getMemIntrinsicNode(
+ X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16,
+ LN->getPointerInfo(), LN->getOriginalAlign(),
+ LN->getMemOperand()->getFlags());
+ DCI.CombineTo(N.getNode(), BcastLd);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(Src.getNode());
+ return N; // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // If this is a truncate of an i16 extload, we can directly replace it.
+ if (ISD::isUNINDEXEDLoad(Src.getOperand(0).getNode()) &&
+ ISD::isEXTLoad(Src.getOperand(0).getNode())) {
+ LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0));
+ if (LN->getMemoryVT().getSizeInBits() == 16) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue BcastLd =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
+ LN->getMemoryVT(), LN->getMemOperand());
+ DCI.CombineTo(N.getNode(), BcastLd);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(Src.getNode());
+ return N; // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // If this is a truncate of load that has been shifted right, we can
+ // offset the pointer and use a narrower load.
+ if (TruncIn.getOpcode() == ISD::SRL &&
+ TruncIn.getOperand(0).hasOneUse() &&
+ isa<ConstantSDNode>(TruncIn.getOperand(1)) &&
+ ISD::isNormalLoad(TruncIn.getOperand(0).getNode())) {
+ LoadSDNode *LN = cast<LoadSDNode>(TruncIn.getOperand(0));
+ unsigned ShiftAmt = TruncIn.getConstantOperandVal(1);
+ // Make sure the shift amount and the load size are divisible by 16.
+ // Don't do this if the load is volatile or atomic.
+ if (ShiftAmt % 16 == 0 && TruncIn.getValueSizeInBits() % 16 == 0 &&
+ LN->isSimple()) {
+ unsigned Offset = ShiftAmt / 8;
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ptr = DAG.getMemBasePlusOffset(LN->getBasePtr(), Offset, DL);
+ SDValue Ops[] = { LN->getChain(), Ptr };
+ SDValue BcastLd = DAG.getMemIntrinsicNode(
+ X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16,
+ LN->getPointerInfo().getWithOffset(Offset),
+ LN->getOriginalAlign(),
+ LN->getMemOperand()->getFlags());
+ DCI.CombineTo(N.getNode(), BcastLd);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(Src.getNode());
+ return N; // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // vbroadcast(vzload X) -> vbroadcast_load X
+ if (Src.getOpcode() == X86ISD::VZEXT_LOAD && Src.hasOneUse()) {
+ MemSDNode *LN = cast<MemIntrinsicSDNode>(Src);
+ if (LN->getMemoryVT().getSizeInBits() == VT.getScalarSizeInBits()) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue BcastLd =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
+ LN->getMemoryVT(), LN->getMemOperand());
+ DCI.CombineTo(N.getNode(), BcastLd);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ return N; // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // vbroadcast(vector load X) -> vbroadcast_load
+ if (SrcVT == MVT::v2f64 && Src.hasOneUse() &&
+ ISD::isNormalLoad(Src.getNode())) {
+ LoadSDNode *LN = cast<LoadSDNode>(Src);
+ // Unless the load is volatile or atomic.
+ if (LN->isSimple()) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue BcastLd = DAG.getMemIntrinsicNode(
+ X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64,
+ LN->getPointerInfo(), LN->getOriginalAlign(),
+ LN->getMemOperand()->getFlags());
+ DCI.CombineTo(N.getNode(), BcastLd);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ return N; // Return N so it doesn't get rechecked!
+ }
+ }
+
+ return SDValue();
+ }
+ case X86ISD::VZEXT_MOVL: {
+ SDValue N0 = N.getOperand(0);
+
+ // If this a vzmovl of a full vector load, replace it with a vzload, unless
+ // the load is volatile.
+ if (N0.hasOneUse() && ISD::isNormalLoad(N0.getNode())) {
+ auto *LN = cast<LoadSDNode>(N0);
+ if (SDValue VZLoad =
+ narrowLoadToVZLoad(LN, VT.getVectorElementType(), VT, DAG)) {
+ DCI.CombineTo(N.getNode(), VZLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ return N;
+ }
+ }
+
+ // If this a VZEXT_MOVL of a VBROADCAST_LOAD, we don't need the broadcast
+ // and can just use a VZEXT_LOAD.
+ // FIXME: Is there some way to do this with SimplifyDemandedVectorElts?
+ if (N0.hasOneUse() && N0.getOpcode() == X86ISD::VBROADCAST_LOAD) {
+ auto *LN = cast<MemSDNode>(N0);
+ if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
+ SDValue VZLoad =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops,
+ LN->getMemoryVT(), LN->getMemOperand());
+ DCI.CombineTo(N.getNode(), VZLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ return N;
+ }
+ }
+
+ // Turn (v2i64 (vzext_movl (scalar_to_vector (i64 X)))) into
+ // (v2i64 (bitcast (v4i32 (vzext_movl (scalar_to_vector (i32 (trunc X)))))))
+ // if the upper bits of the i64 are zero.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getValueType() == MVT::i64) {
+ SDValue In = N0.getOperand(0);
+ APInt Mask = APInt::getHighBitsSet(64, 32);
+ if (DAG.MaskedValueIsZero(In, Mask)) {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, In);
+ MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);
+ SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Trunc);
+ SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, SclVec);
+ return DAG.getBitcast(VT, Movl);
+ }
+ }
+
+ // Load a scalar integer constant directly to XMM instead of transferring an
+ // immediate value from GPR.
+ // vzext_movl (scalar_to_vector C) --> load [C,0...]
+ if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ // Create a vector constant - scalar constant followed by zeros.
+ EVT ScalarVT = N0.getOperand(0).getValueType();
+ Type *ScalarTy = ScalarVT.getTypeForEVT(*DAG.getContext());
+ unsigned NumElts = VT.getVectorNumElements();
+ Constant *Zero = ConstantInt::getNullValue(ScalarTy);
+ SmallVector<Constant *, 32> ConstantVec(NumElts, Zero);
+ ConstantVec[0] = const_cast<ConstantInt *>(C->getConstantIntValue());
+
+ // Load the vector constant from constant pool.
+ MVT PVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue CP = DAG.getConstantPool(ConstantVector::get(ConstantVec), PVT);
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+ Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
+ return DAG.getLoad(VT, DL, DAG.getEntryNode(), CP, MPI, Alignment,
+ MachineMemOperand::MOLoad);
+ }
+ }
+
return SDValue();
}
case X86ISD::BLENDI: {
@@ -34685,6 +36257,34 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
return SDValue();
}
+ case X86ISD::VPERM2X128: {
+ // If both 128-bit values were inserted into high halves of 256-bit values,
+ // the shuffle can be reduced to a concatenation of subvectors:
+ // vperm2x128 (ins ?, X, C1), (ins ?, Y, C2), 0x31 --> concat X, Y
+ // Note: We are only looking for the exact high/high shuffle mask because we
+ // expect to fold other similar patterns before creating this opcode.
+ SDValue Ins0 = peekThroughBitcasts(N.getOperand(0));
+ SDValue Ins1 = peekThroughBitcasts(N.getOperand(1));
+ unsigned Imm = N.getConstantOperandVal(2);
+ if (!(Imm == 0x31 &&
+ Ins0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ Ins1.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ Ins0.getValueType() == Ins1.getValueType()))
+ return SDValue();
+
+ SDValue X = Ins0.getOperand(1);
+ SDValue Y = Ins1.getOperand(1);
+ unsigned C1 = Ins0.getConstantOperandVal(2);
+ unsigned C2 = Ins1.getConstantOperandVal(2);
+ MVT SrcVT = X.getSimpleValueType();
+ unsigned SrcElts = SrcVT.getVectorNumElements();
+ if (SrcVT != Y.getSimpleValueType() || SrcVT.getSizeInBits() != 128 ||
+ C1 != SrcElts || C2 != SrcElts)
+ return SDValue();
+
+ return DAG.getBitcast(VT, DAG.getNode(ISD::CONCAT_VECTORS, DL,
+ Ins1.getValueType(), X, Y));
+ }
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
@@ -34724,8 +36324,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32");
SDValue Op0 = N.getOperand(0);
SDValue Op1 = N.getOperand(1);
- SDValue Op2 = N.getOperand(2);
- unsigned InsertPSMask = cast<ConstantSDNode>(Op2)->getZExtValue();
+ unsigned InsertPSMask = N.getConstantOperandVal(2);
unsigned SrcIdx = (InsertPSMask >> 6) & 0x3;
unsigned DstIdx = (InsertPSMask >> 4) & 0x3;
unsigned ZeroMask = InsertPSMask & 0xF;
@@ -34865,9 +36464,9 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
(V.getOpcode() == X86ISD::PSHUFLW ||
V.getOpcode() == X86ISD::PSHUFHW) &&
V.getOpcode() != N.getOpcode() &&
- V.hasOneUse()) {
+ V.hasOneUse() && V.getOperand(0).hasOneUse()) {
SDValue D = peekThroughOneUseBitcasts(V.getOperand(0));
- if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
+ if (D.getOpcode() == X86ISD::PSHUFD) {
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
SmallVector<int, 4> DMask = getPSHUFShuffleMask(D);
int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
@@ -35266,7 +36865,8 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
}
// Attempt to combine into a vector load/broadcast.
- if (SDValue LD = combineToConsecutiveLoads(VT, N, dl, DAG, Subtarget, true))
+ if (SDValue LD = combineToConsecutiveLoads(VT, SDValue(N, 0), dl, DAG,
+ Subtarget, true))
return LD;
// For AVX2, we sometimes want to combine
@@ -35299,79 +36899,100 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
return SDValue(N, 0);
}
- // Look for a v2i64/v2f64 VZEXT_MOVL of a node that already produces zeros
- // in the upper 64 bits.
- // TODO: Can we generalize this using computeKnownBits.
- if (N->getOpcode() == X86ISD::VZEXT_MOVL &&
- (VT == MVT::v2f64 || VT == MVT::v2i64) &&
- N->getOperand(0).getOpcode() == ISD::BITCAST &&
- (N->getOperand(0).getOperand(0).getValueType() == MVT::v4f32 ||
- N->getOperand(0).getOperand(0).getValueType() == MVT::v4i32)) {
- SDValue In = N->getOperand(0).getOperand(0);
- switch (In.getOpcode()) {
- default:
- break;
- case X86ISD::CVTP2SI: case X86ISD::CVTP2UI:
- case X86ISD::MCVTP2SI: case X86ISD::MCVTP2UI:
- case X86ISD::CVTTP2SI: case X86ISD::CVTTP2UI:
- case X86ISD::MCVTTP2SI: case X86ISD::MCVTTP2UI:
- case X86ISD::CVTSI2P: case X86ISD::CVTUI2P:
- case X86ISD::MCVTSI2P: case X86ISD::MCVTUI2P:
- case X86ISD::VFPROUND: case X86ISD::VMFPROUND:
- if (In.getOperand(0).getValueType() == MVT::v2f64 ||
- In.getOperand(0).getValueType() == MVT::v2i64)
- return N->getOperand(0); // return the bitcast
- break;
- case X86ISD::STRICT_CVTTP2SI:
- case X86ISD::STRICT_CVTTP2UI:
- case X86ISD::STRICT_CVTSI2P:
- case X86ISD::STRICT_CVTUI2P:
- case X86ISD::STRICT_VFPROUND:
- if (In.getOperand(1).getValueType() == MVT::v2f64 ||
- In.getOperand(1).getValueType() == MVT::v2i64)
- return N->getOperand(0);
- break;
- }
- }
-
// Pull subvector inserts into undef through VZEXT_MOVL by making it an
// insert into a zero vector. This helps get VZEXT_MOVL closer to
// scalar_to_vectors where 256/512 are canonicalized to an insert and a
// 128-bit scalar_to_vector. This reduces the number of isel patterns.
if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps() &&
- N->getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR &&
- N->getOperand(0).hasOneUse() &&
- N->getOperand(0).getOperand(0).isUndef() &&
- isNullConstant(N->getOperand(0).getOperand(2))) {
- SDValue In = N->getOperand(0).getOperand(1);
- SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, In.getValueType(), In);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT,
- getZeroVector(VT.getSimpleVT(), Subtarget, DAG, dl),
- Movl, N->getOperand(0).getOperand(2));
- }
-
- // If this a vzmovl of a full vector load, replace it with a vzload, unless
- // the load is volatile.
- if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
- ISD::isNormalLoad(N->getOperand(0).getNode())) {
- LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
- if (LN->isSimple()) {
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
- SDValue VZLoad =
- DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
- VT.getVectorElementType(),
- LN->getPointerInfo(),
- LN->getAlignment(),
- MachineMemOperand::MOLoad);
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
- return VZLoad;
+ N->getOperand(0).hasOneUse()) {
+ SDValue V = peekThroughOneUseBitcasts(N->getOperand(0));
+
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(0).isUndef() && isNullConstant(V.getOperand(2))) {
+ SDValue In = V.getOperand(1);
+ MVT SubVT =
+ MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(),
+ In.getValueSizeInBits() / VT.getScalarSizeInBits());
+ In = DAG.getBitcast(SubVT, In);
+ SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, SubVT, In);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT,
+ getZeroVector(VT.getSimpleVT(), Subtarget, DAG, dl),
+ Movl, V.getOperand(2));
}
}
return SDValue();
}
+// Simplify variable target shuffle masks based on the demanded elements.
+// TODO: Handle DemandedBits in mask indices as well?
+bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetShuffle(
+ SDValue Op, const APInt &DemandedElts, unsigned MaskIndex,
+ TargetLowering::TargetLoweringOpt &TLO, unsigned Depth) const {
+ // If we're demanding all elements don't bother trying to simplify the mask.
+ unsigned NumElts = DemandedElts.getBitWidth();
+ if (DemandedElts.isAllOnesValue())
+ return false;
+
+ SDValue Mask = Op.getOperand(MaskIndex);
+ if (!Mask.hasOneUse())
+ return false;
+
+ // Attempt to generically simplify the variable shuffle mask.
+ APInt MaskUndef, MaskZero;
+ if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
+ Depth + 1))
+ return true;
+
+ // Attempt to extract+simplify a (constant pool load) shuffle mask.
+ // TODO: Support other types from getTargetShuffleMaskIndices?
+ SDValue BC = peekThroughOneUseBitcasts(Mask);
+ EVT BCVT = BC.getValueType();
+ auto *Load = dyn_cast<LoadSDNode>(BC);
+ if (!Load)
+ return false;
+
+ const Constant *C = getTargetConstantFromNode(Load);
+ if (!C)
+ return false;
+
+ Type *CTy = C->getType();
+ if (!CTy->isVectorTy() ||
+ CTy->getPrimitiveSizeInBits() != Mask.getValueSizeInBits())
+ return false;
+
+ // Handle scaling for i64 elements on 32-bit targets.
+ unsigned NumCstElts = cast<FixedVectorType>(CTy)->getNumElements();
+ if (NumCstElts != NumElts && NumCstElts != (NumElts * 2))
+ return false;
+ unsigned Scale = NumCstElts / NumElts;
+
+ // Simplify mask if we have an undemanded element that is not undef.
+ bool Simplified = false;
+ SmallVector<Constant *, 32> ConstVecOps;
+ for (unsigned i = 0; i != NumCstElts; ++i) {
+ Constant *Elt = C->getAggregateElement(i);
+ if (!DemandedElts[i / Scale] && !isa<UndefValue>(Elt)) {
+ ConstVecOps.push_back(UndefValue::get(Elt->getType()));
+ Simplified = true;
+ continue;
+ }
+ ConstVecOps.push_back(Elt);
+ }
+ if (!Simplified)
+ return false;
+
+ // Generate new constant pool entry + legalize immediately for the load.
+ SDLoc DL(Op);
+ SDValue CV = TLO.DAG.getConstantPool(ConstantVector::get(ConstVecOps), BCVT);
+ SDValue LegalCV = LowerConstantPool(CV, TLO.DAG);
+ SDValue NewMask = TLO.DAG.getLoad(
+ BCVT, DL, TLO.DAG.getEntryNode(), LegalCV,
+ MachinePointerInfo::getConstantPool(TLO.DAG.getMachineFunction()),
+ Load->getAlign());
+ return TLO.CombineTo(Mask, TLO.DAG.getBitcast(Mask.getValueType(), NewMask));
+}
+
bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
TargetLoweringOpt &TLO, unsigned Depth) const {
@@ -35541,12 +37162,10 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// Aggressively peek through ops to get at the demanded elts.
// TODO - we should do this for all target/faux shuffles ops.
if (!DemandedElts.isAllOnesValue()) {
- APInt DemandedSrcBits =
- APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
- SDValue NewN0 = SimplifyMultipleUseDemandedBits(
- N0, DemandedSrcBits, DemandedLHS, TLO.DAG, Depth + 1);
- SDValue NewN1 = SimplifyMultipleUseDemandedBits(
- N1, DemandedSrcBits, DemandedRHS, TLO.DAG, Depth + 1);
+ SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS,
+ TLO.DAG, Depth + 1);
+ SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS,
+ TLO.DAG, Depth + 1);
if (NewN0 || NewN1) {
NewN0 = NewN0 ? NewN0 : N0;
NewN1 = NewN1 ? NewN1 : N1;
@@ -35608,6 +37227,15 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
KnownUndef = LHSUndef & RHSUndef;
break;
}
+ case X86ISD::VZEXT_MOVL: {
+ // If upper demanded elements are already zero then we have nothing to do.
+ SDValue Src = Op.getOperand(0);
+ APInt DemandedUpperElts = DemandedElts;
+ DemandedUpperElts.clearLowBits(1);
+ if (TLO.DAG.computeKnownBits(Src, DemandedUpperElts, Depth + 1).isZero())
+ return TLO.CombineTo(Op, Src);
+ break;
+ }
case X86ISD::VBROADCAST: {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
@@ -35625,36 +37253,32 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
Depth + 1))
return true;
+ // Aggressively peek through src to get at the demanded elt.
+ // TODO - we should do this for all target/faux shuffles ops.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
+ Src, SrcElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
break;
}
- case X86ISD::VPERMV: {
- SDValue Mask = Op.getOperand(0);
- APInt MaskUndef, MaskZero;
- if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
- Depth + 1))
+ case X86ISD::VPERMV:
+ if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 0, TLO,
+ Depth))
return true;
break;
- }
case X86ISD::PSHUFB:
case X86ISD::VPERMV3:
- case X86ISD::VPERMILPV: {
- SDValue Mask = Op.getOperand(1);
- APInt MaskUndef, MaskZero;
- if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
- Depth + 1))
+ case X86ISD::VPERMILPV:
+ if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 1, TLO,
+ Depth))
return true;
break;
- }
case X86ISD::VPPERM:
- case X86ISD::VPERMIL2: {
- SDValue Mask = Op.getOperand(2);
- APInt MaskUndef, MaskZero;
- if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
- Depth + 1))
+ case X86ISD::VPERMIL2:
+ if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 2, TLO,
+ Depth))
return true;
break;
}
- }
// For 256/512-bit ops that are 128/256-bit ops glued together, if we do not
// demand any of the high elements, then narrow the op to 128/256-bits: e.g.
@@ -35669,18 +37293,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
ExtSizeInBits = SizeInBits / 4;
switch (Opc) {
- // Zero upper elements.
- case X86ISD::VZEXT_MOVL: {
- SDLoc DL(Op);
- SDValue Ext0 =
- extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
- SDValue ExtOp =
- TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0);
- SDValue UndefVec = TLO.DAG.getUNDEF(VT);
- SDValue Insert =
- insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
- return TLO.CombineTo(Op, Insert);
- }
// Subvector broadcast.
case X86ISD::SUBV_BROADCAST: {
SDLoc DL(Op);
@@ -35733,10 +37345,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
}
break;
}
- // Target Shuffles.
+ // Zero upper elements.
+ case X86ISD::VZEXT_MOVL:
+ // Target unary shuffles by immediate:
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ case X86ISD::VPERMILPI:
+ // (Non-Lane Crossing) Target Shuffles.
+ case X86ISD::VPERMILPV:
+ case X86ISD::VPERMIL2:
case X86ISD::PSHUFB:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
+ case X86ISD::BLENDI:
// Saturated Packs.
case X86ISD::PACKSS:
case X86ISD::PACKUS:
@@ -35746,14 +37368,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
case X86ISD::FHADD:
case X86ISD::FHSUB: {
SDLoc DL(Op);
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ SDValue SrcOp = Op.getOperand(i);
+ EVT SrcVT = SrcOp.getValueType();
+ assert((!SrcVT.isVector() || SrcVT.getSizeInBits() == SizeInBits) &&
+ "Unsupported vector size");
+ Ops.push_back(SrcVT.isVector() ? extractSubVector(SrcOp, 0, TLO.DAG, DL,
+ ExtSizeInBits)
+ : SrcOp);
+ }
MVT ExtVT = VT.getSimpleVT();
ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
ExtSizeInBits / ExtVT.getScalarSizeInBits());
- SDValue Ext0 =
- extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
- SDValue Ext1 =
- extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
- SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1);
+ SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ops);
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
SDValue Insert =
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
@@ -35850,6 +37478,18 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
unsigned BitWidth = OriginalDemandedBits.getBitWidth();
unsigned Opc = Op.getOpcode();
switch(Opc) {
+ case X86ISD::VTRUNC: {
+ KnownBits KnownOp;
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+
+ // Simplify the input, using demanded bit information.
+ APInt TruncMask = OriginalDemandedBits.zext(SrcVT.getScalarSizeInBits());
+ APInt DemandedElts = OriginalDemandedElts.trunc(SrcVT.getVectorNumElements());
+ if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, KnownOp, TLO, Depth + 1))
+ return true;
+ break;
+ }
case X86ISD::PMULDQ:
case X86ISD::PMULUDQ: {
// PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
@@ -35906,6 +37546,14 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
}
+ // If we are only demanding sign bits then we can use the shift source directly.
+ unsigned NumSignBits =
+ TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1);
+ unsigned UpperDemandedBits =
+ BitWidth - OriginalDemandedBits.countTrailingZeros();
+ if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
+ return TLO.CombineTo(Op, Op0);
+
if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
TLO, Depth + 1))
return true;
@@ -36019,7 +37667,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1)));
- Known = KnownVec.zext(BitWidth, true);
+ Known = KnownVec.zext(BitWidth);
return false;
}
break;
@@ -36072,6 +37720,17 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
if (SimplifyDemandedBits(Op.getOperand(1), SignMask, DemandedRHS,
KnownRHS, TLO, Depth + 1))
return true;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op.getOperand(0), SignMask, DemandedLHS, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op.getOperand(1), SignMask, DemandedRHS, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ SDValue Op0 = DemandedOp0 ? DemandedOp0 : Op.getOperand(0);
+ SDValue Op1 = DemandedOp1 ? DemandedOp1 : Op.getOperand(1);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, Op0, Op1));
+ }
}
// TODO - add general PACKSS/PACKUS SimplifyDemandedBits support.
break;
@@ -36104,16 +37763,51 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// MOVMSK only uses the MSB from each vector element.
KnownBits KnownSrc;
- if (SimplifyDemandedBits(Src, APInt::getSignMask(SrcBits), DemandedElts,
- KnownSrc, TLO, Depth + 1))
+ APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
+ Depth + 1))
return true;
if (KnownSrc.One[SrcBits - 1])
Known.One.setLowBits(NumElts);
else if (KnownSrc.Zero[SrcBits - 1])
Known.Zero.setLowBits(NumElts);
+
+ // Attempt to avoid multi-use os if we don't need anything from it.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
return false;
}
+ case X86ISD::BEXTR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ // Only bottom 16-bits of the control bits are required.
+ if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
+ // NOTE: SimplifyDemandedBits won't do this for constants.
+ const APInt &Val1 = Cst1->getAPIntValue();
+ APInt MaskedVal1 = Val1 & 0xFFFF;
+ if (MaskedVal1 != Val1) {
+ SDLoc DL(Op);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(X86ISD::BEXTR, DL, VT, Op0,
+ TLO.DAG.getConstant(MaskedVal1, DL, VT)));
+ }
+ }
+
+ KnownBits Known1;
+ APInt DemandedMask(APInt::getLowBitsSet(BitWidth, 16));
+ if (SimplifyDemandedBits(Op1, DemandedMask, Known1, TLO, Depth + 1))
+ return true;
+
+ // If the length is 0, replace with 0.
+ KnownBits LengthBits = Known1.extractBits(8, 8);
+ if (LengthBits.isZero())
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+
+ break;
+ }
}
return TargetLowering::SimplifyDemandedBitsForTargetNode(
@@ -36137,8 +37831,26 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
!DemandedElts[CIdx->getZExtValue()])
return Vec;
- break;
+ break;
+ }
+ case X86ISD::VSHLI: {
+ // If we are only demanding sign bits then we can use the shift source
+ // directly.
+ SDValue Op0 = Op.getOperand(0);
+ unsigned ShAmt = Op.getConstantOperandVal(1);
+ unsigned BitWidth = DemandedBits.getBitWidth();
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
+ return Op0;
+ break;
}
+ case X86ISD::VSRAI:
+ // iff we only need the sign bit then we can use the source directly.
+ // TODO: generalize where we only demand extended signbits.
+ if (DemandedBits.isSignMask())
+ return Op.getOperand(0);
+ break;
case X86ISD::PCMPGT:
// icmp sgt(0, R) == ashr(R, BitWidth-1).
// iff we only need the sign bit then we can use R directly.
@@ -36172,13 +37884,13 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
int M = ShuffleMask[i];
if (!DemandedElts[i] || ShuffleUndef[i])
continue;
- int Op = M / NumElts;
- int Index = M % NumElts;
- if (M < 0 || Index != i) {
+ int OpIdx = M / NumElts;
+ int EltIdx = M % NumElts;
+ if (M < 0 || EltIdx != i) {
IdentityOp.clearAllBits();
break;
}
- IdentityOp &= APInt::getOneBitSet(NumOps, Op);
+ IdentityOp &= APInt::getOneBitSet(NumOps, OpIdx);
if (IdentityOp == 0)
break;
}
@@ -36209,6 +37921,51 @@ static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) {
return false;
}
+// Helper to flip between AND/OR/XOR opcodes and their X86ISD FP equivalents.
+static unsigned getAltBitOpcode(unsigned Opcode) {
+ switch(Opcode) {
+ case ISD::AND: return X86ISD::FAND;
+ case ISD::OR: return X86ISD::FOR;
+ case ISD::XOR: return X86ISD::FXOR;
+ case X86ISD::ANDNP: return X86ISD::FANDN;
+ }
+ llvm_unreachable("Unknown bitwise opcode");
+}
+
+// Helper to adjust v4i32 MOVMSK expansion to work with SSE1-only targets.
+static SDValue adjustBitcastSrcVectorSSE1(SelectionDAG &DAG, SDValue Src,
+ const SDLoc &DL) {
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT != MVT::v4i1)
+ return SDValue();
+
+ switch (Src.getOpcode()) {
+ case ISD::SETCC:
+ if (Src.getOperand(0).getValueType() == MVT::v4i32 &&
+ ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) &&
+ cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT) {
+ SDValue Op0 = Src.getOperand(0);
+ if (ISD::isNormalLoad(Op0.getNode()))
+ return DAG.getBitcast(MVT::v4f32, Op0);
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op0.getOperand(0).getValueType() == MVT::v4f32)
+ return Op0.getOperand(0);
+ }
+ break;
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR: {
+ SDValue Op0 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(0), DL);
+ SDValue Op1 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(1), DL);
+ if (Op0 && Op1)
+ return DAG.getNode(getAltBitOpcode(Src.getOpcode()), DL, MVT::v4f32, Op0,
+ Op1);
+ break;
+ }
+ }
+ return SDValue();
+}
+
// Helper to push sign extension of vXi1 SETCC result through bitops.
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
SDValue Src, const SDLoc &DL) {
@@ -36239,18 +37996,40 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
return SDValue();
+ // Recognize the IR pattern for the movmsk intrinsic under SSE1 before type
+ // legalization destroys the v4i32 type.
+ if (Subtarget.hasSSE1() && !Subtarget.hasSSE2()) {
+ if (SDValue V = adjustBitcastSrcVectorSSE1(DAG, Src, DL)) {
+ V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32,
+ DAG.getBitcast(MVT::v4f32, V));
+ return DAG.getZExtOrTrunc(V, DL, VT);
+ }
+ }
+
// If the input is a truncate from v16i8 or v32i8 go ahead and use a
// movmskb even with avx512. This will be better than truncating to vXi1 and
// using a kmov. This can especially help KNL if the input is a v16i8/v32i8
// vpcmpeqb/vpcmpgtb.
- bool IsTruncated = Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse() &&
- (Src.getOperand(0).getValueType() == MVT::v16i8 ||
- Src.getOperand(0).getValueType() == MVT::v32i8 ||
- Src.getOperand(0).getValueType() == MVT::v64i8);
+ bool PreferMovMsk = Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse() &&
+ (Src.getOperand(0).getValueType() == MVT::v16i8 ||
+ Src.getOperand(0).getValueType() == MVT::v32i8 ||
+ Src.getOperand(0).getValueType() == MVT::v64i8);
+
+ // Prefer movmsk for AVX512 for (bitcast (setlt X, 0)) which can be handled
+ // directly with vpmovmskb/vmovmskps/vmovmskpd.
+ if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() &&
+ cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT &&
+ ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) {
+ EVT CmpVT = Src.getOperand(0).getValueType();
+ EVT EltVT = CmpVT.getVectorElementType();
+ if (CmpVT.getSizeInBits() <= 256 &&
+ (EltVT == MVT::i8 || EltVT == MVT::i32 || EltVT == MVT::i64))
+ PreferMovMsk = true;
+ }
// With AVX512 vxi1 types are legal and we prefer using k-regs.
// MOVMSK is supported in SSE2 or later.
- if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !IsTruncated))
+ if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !PreferMovMsk))
return SDValue();
// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
@@ -36306,7 +38085,14 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
case MVT::v64i1:
// If we have AVX512F, but not AVX512BW and the input is truncated from
// v64i8 checked earlier. Then split the input and make two pmovmskbs.
- if (Subtarget.hasAVX512() && !Subtarget.hasBWI()) {
+ if (Subtarget.hasAVX512()) {
+ if (Subtarget.hasBWI())
+ return SDValue();
+ SExtVT = MVT::v64i8;
+ break;
+ }
+ // Split if this is a <64 x i8> comparison result.
+ if (checkBitcastSrcVectorSize(Src, 512)) {
SExtVT = MVT::v64i8;
break;
}
@@ -36476,6 +38262,74 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
return Ops[0];
}
+// Recursive function that attempts to find if a bool vector node was originally
+// a vector/float/double that got truncated/extended/bitcast to/from a scalar
+// integer. If so, replace the scalar ops with bool vector equivalents back down
+// the chain.
+static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, SDLoc DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned Opc = V.getOpcode();
+ switch (Opc) {
+ case ISD::BITCAST: {
+ // Bitcast from a vector/float/double, we can cheaply bitcast to VT.
+ SDValue Src = V.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.isVector() || SrcVT.isFloatingPoint())
+ return DAG.getBitcast(VT, Src);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ // If we find a suitable source, a truncated scalar becomes a subvector.
+ SDValue Src = V.getOperand(0);
+ EVT NewSrcVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i1, Src.getValueSizeInBits());
+ if (TLI.isTypeLegal(NewSrcVT))
+ if (SDValue N0 =
+ combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG, Subtarget))
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N0,
+ DAG.getIntPtrConstant(0, DL));
+ break;
+ }
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ // If we find a suitable source, an extended scalar becomes a subvector.
+ SDValue Src = V.getOperand(0);
+ EVT NewSrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Src.getScalarValueSizeInBits());
+ if (TLI.isTypeLegal(NewSrcVT))
+ if (SDValue N0 =
+ combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG, Subtarget))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ Opc == ISD::ANY_EXTEND ? DAG.getUNDEF(VT)
+ : DAG.getConstant(0, DL, VT),
+ N0, DAG.getIntPtrConstant(0, DL));
+ break;
+ }
+ case ISD::OR: {
+ // If we find suitable sources, we can just move an OR to the vector domain.
+ SDValue Src0 = V.getOperand(0);
+ SDValue Src1 = V.getOperand(1);
+ if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget))
+ if (SDValue N1 = combineBitcastToBoolVector(VT, Src1, DL, DAG, Subtarget))
+ return DAG.getNode(Opc, DL, VT, N0, N1);
+ break;
+ }
+ case ISD::SHL: {
+ // If we find a suitable source, a SHL becomes a KSHIFTL.
+ SDValue Src0 = V.getOperand(0);
+ if (auto *Amt = dyn_cast<ConstantSDNode>(V.getOperand(1)))
+ if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget))
+ return DAG.getNode(
+ X86ISD::KSHIFTL, DL, VT, N0,
+ DAG.getTargetConstant(Amt->getZExtValue(), DL, MVT::i8));
+ break;
+ }
+ }
+ return SDValue();
+}
+
static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -36494,24 +38348,6 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget))
return V;
- // Recognize the IR pattern for the movmsk intrinsic under SSE1 befoer type
- // legalization destroys the v4i32 type.
- if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && SrcVT == MVT::v4i1 &&
- VT.isScalarInteger() && N0.getOpcode() == ISD::SETCC &&
- N0.getOperand(0).getValueType() == MVT::v4i32 &&
- ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()) &&
- cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETLT) {
- SDValue N00 = N0.getOperand(0);
- // Only do this if we can avoid scalarizing the input.
- if (ISD::isNormalLoad(N00.getNode()) ||
- (N00.getOpcode() == ISD::BITCAST &&
- N00.getOperand(0).getValueType() == MVT::v4f32)) {
- SDValue V = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32,
- DAG.getBitcast(MVT::v4f32, N00));
- return DAG.getZExtOrTrunc(V, dl, VT);
- }
- }
-
// If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
// type, widen both sides to avoid a trip through memory.
if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
@@ -36553,6 +38389,16 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
N0 = DAG.getBitcast(MVT::i8, N0);
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
}
+ } else {
+ // If we're bitcasting from iX to vXi1, see if the integer originally
+ // began as a vXi1 and whether we can remove the bitcast entirely.
+ if (VT.isVector() && VT.getScalarType() == MVT::i1 &&
+ SrcVT.isScalarInteger() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ if (SDValue V =
+ combineBitcastToBoolVector(VT, N0, SDLoc(N), DAG, Subtarget))
+ return V;
+ }
}
// Look for (i8 (bitcast (v8i1 (extract_subvector (v16i1 X), 0)))) and
@@ -36567,19 +38413,30 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
DAG.getBitcast(MVT::i16, N0.getOperand(0)));
- // Combine (bitcast (vbroadcast_load)) -> (vbroadcast_load). The memory VT
- // determines // the number of bits loaded. Remaining bits are zero.
+ // Canonicalize (bitcast (vbroadcast_load)) so that the output of the bitcast
+ // and the vbroadcast_load are both integer or both fp. In some cases this
+ // will remove the bitcast entirely.
if (N0.getOpcode() == X86ISD::VBROADCAST_LOAD && N0.hasOneUse() &&
- VT.getScalarSizeInBits() == SrcVT.getScalarSizeInBits()) {
+ VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) {
auto *BCast = cast<MemIntrinsicSDNode>(N0);
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() };
- SDValue ResNode =
- DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops,
- VT.getVectorElementType(),
- BCast->getMemOperand());
- DAG.ReplaceAllUsesOfValueWith(SDValue(BCast, 1), ResNode.getValue(1));
- return ResNode;
+ unsigned SrcVTSize = SrcVT.getScalarSizeInBits();
+ unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits();
+ // Don't swap i8/i16 since don't have fp types that size.
+ if (MemSize >= 32) {
+ MVT MemVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(MemSize)
+ : MVT::getIntegerVT(MemSize);
+ MVT LoadVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(SrcVTSize)
+ : MVT::getIntegerVT(SrcVTSize);
+ LoadVT = MVT::getVectorVT(LoadVT, SrcVT.getVectorNumElements());
+
+ SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
+ SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() };
+ SDValue ResNode =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops,
+ MemVT, BCast->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(BCast, 1), ResNode.getValue(1));
+ return DAG.getBitcast(VT, ResNode);
+ }
}
// Since MMX types are special and don't usually play with other vector types,
@@ -36666,6 +38523,47 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
return DAG.getConstant(0, SDLoc(N0), VT);
}
+ // Look for MOVMSK that is maybe truncated and then bitcasted to vXi1.
+ // Turn it into a sign bit compare that produces a k-register. This avoids
+ // a trip through a GPR.
+ if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() &&
+ VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+ isPowerOf2_32(VT.getVectorNumElements())) {
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue Src = N0;
+
+ // Peek through truncate.
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
+ Src = N0.getOperand(0);
+
+ if (Src.getOpcode() == X86ISD::MOVMSK && Src.hasOneUse()) {
+ SDValue MovmskIn = Src.getOperand(0);
+ MVT MovmskVT = MovmskIn.getSimpleValueType();
+ unsigned MovMskElts = MovmskVT.getVectorNumElements();
+
+ // We allow extra bits of the movmsk to be used since they are known zero.
+ // We can't convert a VPMOVMSKB without avx512bw.
+ if (MovMskElts <= NumElts &&
+ (Subtarget.hasBWI() || MovmskVT.getVectorElementType() != MVT::i8)) {
+ EVT IntVT = EVT(MovmskVT).changeVectorElementTypeToInteger();
+ MovmskIn = DAG.getBitcast(IntVT, MovmskIn);
+ SDLoc dl(N);
+ MVT CmpVT = MVT::getVectorVT(MVT::i1, MovMskElts);
+ SDValue Cmp = DAG.getSetCC(dl, CmpVT, MovmskIn,
+ DAG.getConstant(0, dl, IntVT), ISD::SETLT);
+ if (EVT(CmpVT) == VT)
+ return Cmp;
+
+ // Pad with zeroes up to original VT to replace the zeroes that were
+ // being used from the MOVMSK.
+ unsigned NumConcats = NumElts / MovMskElts;
+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, CmpVT));
+ Ops[0] = Cmp;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Ops);
+ }
+ }
+ }
+
// Try to remove bitcasts from input and output of mask arithmetic to
// remove GPR<->K-register crossings.
if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget))
@@ -36790,12 +38688,9 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
// First, reduce the source down to 128-bit, applying BinOp to lo/hi.
while (SrcVT.getSizeInBits() > 128) {
- unsigned NumElts = SrcVT.getVectorNumElements();
- unsigned NumSubElts = NumElts / 2;
- SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcSVT, NumSubElts);
- unsigned SubSizeInBits = SrcVT.getSizeInBits();
- SDValue Lo = extractSubVector(MinPos, 0, DAG, DL, SubSizeInBits);
- SDValue Hi = extractSubVector(MinPos, NumSubElts, DAG, DL, SubSizeInBits);
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = splitVector(MinPos, DAG, DL);
+ SrcVT = Lo.getValueType();
MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi);
}
assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) ||
@@ -36882,6 +38777,25 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
Movmsk = DAG.getBitcast(MovmskVT, Match);
} else {
+ // For all_of(setcc(vec,0,eq)) - avoid vXi64 comparisons if we don't have
+ // PCMPEQQ (SSE41+), use PCMPEQD instead.
+ if (BinOp == ISD::AND && !Subtarget.hasSSE41() &&
+ Match.getOpcode() == ISD::SETCC &&
+ ISD::isBuildVectorAllZeros(Match.getOperand(1).getNode()) &&
+ cast<CondCodeSDNode>(Match.getOperand(2))->get() ==
+ ISD::CondCode::SETEQ) {
+ SDValue Vec = Match.getOperand(0);
+ if (Vec.getValueType().getScalarType() == MVT::i64 &&
+ (2 * NumElts) <= MaxElts) {
+ NumElts *= 2;
+ EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
+ Match = DAG.getSetCC(
+ DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)),
+ DAG.getBitcast(CmpVT, Match.getOperand(1)), ISD::CondCode::SETEQ);
+ }
+ }
+
// Use combineBitcastvxi1 to create the MOVMSK.
while (NumElts > MaxElts) {
SDValue Lo, Hi;
@@ -36896,10 +38810,7 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
return SDValue();
Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, NumElts > 32 ? MVT::i64 : MVT::i32);
} else {
- // Bail with AVX512VL (which uses predicate registers).
- if (Subtarget.hasVLX())
- return SDValue();
-
+ // FIXME: Better handling of k-registers or 512-bit vectors?
unsigned MatchSizeInBits = Match.getValueSizeInBits();
if (!(MatchSizeInBits == 128 ||
(MatchSizeInBits == 256 && Subtarget.hasAVX())))
@@ -36976,21 +38887,14 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
if (!Subtarget.hasSSE2())
return SDValue();
- // Verify the type we're extracting from is any integer type above i16.
- EVT VT = Extract->getOperand(0).getValueType();
- if (!VT.isSimple() || !(VT.getVectorElementType().getSizeInBits() > 16))
+ EVT ExtractVT = Extract->getValueType(0);
+ // Verify the type we're extracting is either i32 or i64.
+ // FIXME: Could support other types, but this is what we have coverage for.
+ if (ExtractVT != MVT::i32 && ExtractVT != MVT::i64)
return SDValue();
- unsigned RegSize = 128;
- if (Subtarget.useBWIRegs())
- RegSize = 512;
- else if (Subtarget.hasAVX())
- RegSize = 256;
-
- // We handle upto v16i* for SSE2 / v32i* for AVX / v64i* for AVX512.
- // TODO: We should be able to handle larger vectors by splitting them before
- // feeding them into several SADs, and then reducing over those.
- if (RegSize / VT.getVectorNumElements() < 8)
+ EVT VT = Extract->getOperand(0).getValueType();
+ if (!isPowerOf2_32(VT.getVectorNumElements()))
return SDValue();
// Match shuffle + add pyramid.
@@ -37006,8 +38910,8 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
// (extends the sign bit which is zero).
// So it is correct to skip the sign/zero extend instruction.
if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND ||
- Root.getOpcode() == ISD::ZERO_EXTEND ||
- Root.getOpcode() == ISD::ANY_EXTEND))
+ Root.getOpcode() == ISD::ZERO_EXTEND ||
+ Root.getOpcode() == ISD::ANY_EXTEND))
Root = Root.getOperand(0);
// If there was a match, we want Root to be a select that is the root of an
@@ -37027,7 +38931,7 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
// If the original vector was wider than 8 elements, sum over the results
// in the SAD vector.
unsigned Stages = Log2_32(VT.getVectorNumElements());
- MVT SadVT = SAD.getSimpleValueType();
+ EVT SadVT = SAD.getValueType();
if (Stages > 3) {
unsigned SadElems = SadVT.getVectorNumElements();
@@ -37042,12 +38946,12 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
}
}
- MVT Type = Extract->getSimpleValueType(0);
- unsigned TypeSizeInBits = Type.getSizeInBits();
- // Return the lowest TypeSizeInBits bits.
- MVT ResVT = MVT::getVectorVT(Type, SadVT.getSizeInBits() / TypeSizeInBits);
+ unsigned ExtractSizeInBits = ExtractVT.getSizeInBits();
+ // Return the lowest ExtractSizeInBits bits.
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(), ExtractVT,
+ SadVT.getSizeInBits() / ExtractSizeInBits);
SAD = DAG.getBitcast(ResVT, SAD);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Type, SAD,
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, SAD,
Extract->getOperand(1));
}
@@ -37066,19 +38970,34 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT SrcVT = Src.getValueType();
EVT SrcSVT = SrcVT.getVectorElementType();
+ unsigned SrcEltBits = SrcSVT.getSizeInBits();
unsigned NumSrcElts = SrcVT.getVectorNumElements();
// Don't attempt this for boolean mask vectors or unknown extraction indices.
if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx))
return SDValue();
+ const APInt &IdxC = N->getConstantOperandAPInt(1);
+ if (IdxC.uge(NumSrcElts))
+ return SDValue();
+
SDValue SrcBC = peekThroughBitcasts(Src);
- // Handle extract(broadcast(scalar_value)), it doesn't matter what index is.
+ // Handle extract(bitcast(broadcast(scalar_value))).
if (X86ISD::VBROADCAST == SrcBC.getOpcode()) {
SDValue SrcOp = SrcBC.getOperand(0);
- if (SrcOp.getValueSizeInBits() == VT.getSizeInBits())
- return DAG.getBitcast(VT, SrcOp);
+ EVT SrcOpVT = SrcOp.getValueType();
+ if (SrcOpVT.isScalarInteger() && VT.isInteger() &&
+ (SrcOpVT.getSizeInBits() % SrcEltBits) == 0) {
+ unsigned Scale = SrcOpVT.getSizeInBits() / SrcEltBits;
+ unsigned Offset = IdxC.urem(Scale) * SrcEltBits;
+ // TODO support non-zero offsets.
+ if (Offset == 0) {
+ SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, SrcVT.getScalarType());
+ SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, VT);
+ return SrcOp;
+ }
+ }
}
// If we're extracting a single element from a broadcast load and there are
@@ -37087,22 +39006,43 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC);
unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits();
if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth &&
- VT.getSizeInBits() == SrcBCWidth) {
+ VT.getSizeInBits() == SrcBCWidth && SrcEltBits == SrcBCWidth) {
SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(),
MemIntr->getBasePtr(),
MemIntr->getPointerInfo(),
- MemIntr->getAlignment(),
+ MemIntr->getOriginalAlign(),
MemIntr->getMemOperand()->getFlags());
DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1));
return Load;
}
}
+ // Handle extract(bitcast(scalar_to_vector(scalar_value))) for integers.
+ // TODO: Move to DAGCombine?
+ if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() &&
+ SrcBC.getValueType().isInteger() &&
+ (SrcBC.getScalarValueSizeInBits() % SrcEltBits) == 0 &&
+ SrcBC.getScalarValueSizeInBits() ==
+ SrcBC.getOperand(0).getValueSizeInBits()) {
+ unsigned Scale = SrcBC.getScalarValueSizeInBits() / SrcEltBits;
+ if (IdxC.ult(Scale)) {
+ unsigned Offset = IdxC.getZExtValue() * SrcVT.getScalarSizeInBits();
+ SDValue Scl = SrcBC.getOperand(0);
+ EVT SclVT = Scl.getValueType();
+ if (Offset) {
+ Scl = DAG.getNode(ISD::SRL, dl, SclVT, Scl,
+ DAG.getShiftAmountConstant(Offset, SclVT, dl));
+ }
+ Scl = DAG.getZExtOrTrunc(Scl, dl, SrcVT.getScalarType());
+ Scl = DAG.getZExtOrTrunc(Scl, dl, VT);
+ return Scl;
+ }
+ }
+
// Handle extract(truncate(x)) for 0'th index.
// TODO: Treat this as a faux shuffle?
// TODO: When can we use this for general indices?
- if (ISD::TRUNCATE == Src.getOpcode() && SrcVT.is128BitVector() &&
- isNullConstant(Idx)) {
+ if (ISD::TRUNCATE == Src.getOpcode() && SrcVT.is128BitVector() && IdxC == 0) {
Src = extract128BitVector(Src.getOperand(0), 0, DAG, dl);
Src = DAG.getBitcast(SrcVT, Src);
return DAG.getNode(N->getOpcode(), dl, VT, Src, Idx);
@@ -37114,12 +39054,18 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
if (!getTargetShuffleInputs(SrcBC, Ops, Mask, DAG))
return SDValue();
+ // Shuffle inputs must be the same size as the result.
+ if (llvm::any_of(Ops, [SrcVT](SDValue Op) {
+ return SrcVT.getSizeInBits() != Op.getValueSizeInBits();
+ }))
+ return SDValue();
+
// Attempt to narrow/widen the shuffle mask to the correct size.
if (Mask.size() != NumSrcElts) {
if ((NumSrcElts % Mask.size()) == 0) {
SmallVector<int, 16> ScaledMask;
int Scale = NumSrcElts / Mask.size();
- scaleShuffleMask<int>(Scale, Mask, ScaledMask);
+ narrowShuffleMaskElts(Scale, Mask, ScaledMask);
Mask = std::move(ScaledMask);
} else if ((Mask.size() % NumSrcElts) == 0) {
// Simplify Mask based on demanded element.
@@ -37144,7 +39090,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
if (Mask.size() != NumSrcElts)
return SDValue();
- int SrcIdx = Mask[N->getConstantOperandVal(1)];
+ int SrcIdx = Mask[IdxC.getZExtValue()];
// If the shuffle source element is undef/zero then we can just accept it.
if (SrcIdx == SM_SentinelUndef)
@@ -37171,8 +39117,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
if ((SrcVT == MVT::v8i16 && Subtarget.hasSSE2()) ||
(SrcVT == MVT::v16i8 && Subtarget.hasSSE41())) {
- assert(VT.getSizeInBits() >= SrcSVT.getSizeInBits() &&
- "Unexpected extraction type");
+ assert(VT.getSizeInBits() >= SrcEltBits && "Unexpected extraction type");
unsigned OpCode = (SrcVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB);
SrcOp = DAG.getBitcast(SrcVT, SrcOp);
SDValue ExtOp = DAG.getNode(OpCode, dl, MVT::i32, SrcOp,
@@ -37342,12 +39287,10 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
// vXi8 reduction - sum lo/hi halves then use PSADBW.
if (VT == MVT::i8) {
while (Rdx.getValueSizeInBits() > 128) {
- unsigned HalfSize = VecVT.getSizeInBits() / 2;
- unsigned HalfElts = VecVT.getVectorNumElements() / 2;
- SDValue Lo = extractSubVector(Rdx, 0, DAG, DL, HalfSize);
- SDValue Hi = extractSubVector(Rdx, HalfElts, DAG, DL, HalfSize);
- Rdx = DAG.getNode(ISD::ADD, DL, Lo.getValueType(), Lo, Hi);
- VecVT = Rdx.getValueType();
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL);
+ VecVT = Lo.getValueType();
+ Rdx = DAG.getNode(ISD::ADD, DL, VecVT, Lo, Hi);
}
assert(VecVT == MVT::v16i8 && "v16i8 reduction expected");
@@ -37362,8 +39305,7 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
}
// Only use (F)HADD opcodes if they aren't microcoded or minimizes codesize.
- bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
- if (!Subtarget.hasFastHorizontalOps() && !OptForSize)
+ if (!shouldUseHorizontalOp(true, DAG, Subtarget))
return SDValue();
unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD;
@@ -37495,11 +39437,21 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
// Attempt to extract a i1 element by using MOVMSK to extract the signbits
// and then testing the relevant element.
+ //
+ // Note that we only combine extracts on the *same* result number, i.e.
+ // t0 = merge_values a0, a1, a2, a3
+ // i1 = extract_vector_elt t0, Constant:i64<2>
+ // i1 = extract_vector_elt t0, Constant:i64<3>
+ // but not
+ // i1 = extract_vector_elt t0:1, Constant:i64<2>
+ // since the latter would need its own MOVMSK.
if (CIdx && SrcVT.getScalarType() == MVT::i1) {
SmallVector<SDNode *, 16> BoolExtracts;
- auto IsBoolExtract = [&BoolExtracts](SDNode *Use) {
+ unsigned ResNo = InputVector.getResNo();
+ auto IsBoolExtract = [&BoolExtracts, &ResNo](SDNode *Use) {
if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(Use->getOperand(1)) &&
+ Use->getOperand(0).getResNo() == ResNo &&
Use->getValueType(0) == MVT::i1) {
BoolExtracts.push_back(Use);
return true;
@@ -37548,8 +39500,6 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
assert(CondVT.isVector() && "Vector select expects a vector selector!");
- // Check if the first operand is all zeros and Cond type is vXi1.
- // This situation only applies to avx512.
// TODO: Use isNullOrNullSplat() to distinguish constants with undefs?
// TODO: Can we assert that both operands are not zeros (because that should
// get simplified at node creation time)?
@@ -37564,14 +39514,6 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
return DAG.getConstant(0, DL, VT);
}
- if (TValIsAllZeros && !FValIsAllZeros && Subtarget.hasAVX512() &&
- Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) {
- // Invert the cond to not(cond) : xor(op,allones)=not(op)
- SDValue CondNew = DAG.getNOT(DL, Cond, CondVT);
- // Vselect cond, op1, op2 = Vselect not(cond), op2, op1
- return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
- }
-
// To use the condition operand as a bitwise mask, it must have elements that
// are the same size as the select elements. Ie, the condition operand must
// have already been promoted from the IR select condition type <N x i1>.
@@ -37796,12 +39738,13 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
return true;
};
+ APInt DemandedBits(APInt::getSignMask(BitWidth));
+
if (OnlyUsedAsSelectCond(Cond)) {
- APInt DemandedMask(APInt::getSignMask(BitWidth));
KnownBits Known;
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
- if (!TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO, 0, true))
+ if (!TLI.SimplifyDemandedBits(Cond, DemandedBits, Known, TLO, 0, true))
return SDValue();
// If we changed the computation somewhere in the DAG, this change will
@@ -37823,15 +39766,9 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
}
// Otherwise we can still at least try to simplify multiple use bits.
- APInt DemandedMask(APInt::getSignMask(BitWidth));
- APInt DemandedElts(APInt::getAllOnesValue(VT.getVectorNumElements()));
- KnownBits Known;
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
- if (SDValue V = TLI.SimplifyMultipleUseDemandedBits(Cond, DemandedMask,
- DemandedElts, DAG, 0))
- return DAG.getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
- V, N->getOperand(1), N->getOperand(2));
+ if (SDValue V = TLI.SimplifyMultipleUseDemandedBits(Cond, DemandedBits, DAG))
+ return DAG.getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), V,
+ N->getOperand(1), N->getOperand(2));
return SDValue();
}
@@ -38315,6 +40252,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
}
+ // Check if the first operand is all zeros and Cond type is vXi1.
+ // If this an avx512 target we can improve the use of zero masking by
+ // swapping the operands and inverting the condition.
+ if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() &&
+ Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 &&
+ ISD::isBuildVectorAllZeros(LHS.getNode()) &&
+ !ISD::isBuildVectorAllZeros(RHS.getNode())) {
+ // Invert the cond to not(cond) : xor(op,allones)=not(op)
+ SDValue CondNew = DAG.getNOT(DL, Cond, CondVT);
+ // Vselect cond, op1, op2 = Vselect not(cond), op2, op1
+ return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
+ }
+
// Early exit check
if (!TLI.isTypeLegal(VT))
return SDValue();
@@ -38334,12 +40284,86 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(N->getOpcode(), DL, VT,
DAG.getBitcast(CondVT, CondNot), RHS, LHS);
- // Custom action for SELECT MMX
- if (VT == MVT::x86mmx) {
- LHS = DAG.getBitcast(MVT::i64, LHS);
- RHS = DAG.getBitcast(MVT::i64, RHS);
- SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::i64, Cond, LHS, RHS);
- return DAG.getBitcast(VT, newSelect);
+ // Try to optimize vXi1 selects if both operands are either all constants or
+ // bitcasts from scalar integer type. In that case we can convert the operands
+ // to integer and use an integer select which will be converted to a CMOV.
+ // We need to take a little bit of care to avoid creating an i64 type after
+ // type legalization.
+ if (N->getOpcode() == ISD::SELECT && VT.isVector() &&
+ VT.getVectorElementType() == MVT::i1 &&
+ (DCI.isBeforeLegalize() || (VT != MVT::v64i1 || Subtarget.is64Bit()))) {
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
+ bool LHSIsConst = ISD::isBuildVectorOfConstantSDNodes(LHS.getNode());
+ bool RHSIsConst = ISD::isBuildVectorOfConstantSDNodes(RHS.getNode());
+
+ if ((LHSIsConst ||
+ (LHS.getOpcode() == ISD::BITCAST &&
+ LHS.getOperand(0).getValueType() == IntVT)) &&
+ (RHSIsConst ||
+ (RHS.getOpcode() == ISD::BITCAST &&
+ RHS.getOperand(0).getValueType() == IntVT))) {
+ if (LHSIsConst)
+ LHS = combinevXi1ConstantToInteger(LHS, DAG);
+ else
+ LHS = LHS.getOperand(0);
+
+ if (RHSIsConst)
+ RHS = combinevXi1ConstantToInteger(RHS, DAG);
+ else
+ RHS = RHS.getOperand(0);
+
+ SDValue Select = DAG.getSelect(DL, IntVT, Cond, LHS, RHS);
+ return DAG.getBitcast(VT, Select);
+ }
+ }
+
+ // If this is "((X & C) == 0) ? Y : Z" and C is a constant mask vector of
+ // single bits, then invert the predicate and swap the select operands.
+ // This can lower using a vector shift bit-hack rather than mask and compare.
+ if (DCI.isBeforeLegalize() && !Subtarget.hasAVX512() &&
+ N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1 &&
+ Cond.getOperand(0).getOpcode() == ISD::AND &&
+ isNullOrNullSplat(Cond.getOperand(1)) &&
+ cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
+ Cond.getOperand(0).getValueType() == VT) {
+ // The 'and' mask must be composed of power-of-2 constants.
+ SDValue And = Cond.getOperand(0);
+ auto *C = isConstOrConstSplat(And.getOperand(1));
+ if (C && C->getAPIntValue().isPowerOf2()) {
+ // vselect (X & C == 0), LHS, RHS --> vselect (X & C != 0), RHS, LHS
+ SDValue NotCond =
+ DAG.getSetCC(DL, CondVT, And, Cond.getOperand(1), ISD::SETNE);
+ return DAG.getSelect(DL, VT, NotCond, RHS, LHS);
+ }
+
+ // If we have a non-splat but still powers-of-2 mask, AVX1 can use pmulld
+ // and AVX2 can use vpsllv{dq}. 8-bit lacks a proper shift or multiply.
+ // 16-bit lacks a proper blendv.
+ unsigned EltBitWidth = VT.getScalarSizeInBits();
+ bool CanShiftBlend =
+ TLI.isTypeLegal(VT) && ((Subtarget.hasAVX() && EltBitWidth == 32) ||
+ (Subtarget.hasAVX2() && EltBitWidth == 64) ||
+ (Subtarget.hasXOP()));
+ if (CanShiftBlend &&
+ ISD::matchUnaryPredicate(And.getOperand(1), [](ConstantSDNode *C) {
+ return C->getAPIntValue().isPowerOf2();
+ })) {
+ // Create a left-shift constant to get the mask bits over to the sign-bit.
+ SDValue Mask = And.getOperand(1);
+ SmallVector<int, 32> ShlVals;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ auto *MaskVal = cast<ConstantSDNode>(Mask.getOperand(i));
+ ShlVals.push_back(EltBitWidth - 1 -
+ MaskVal->getAPIntValue().exactLogBase2());
+ }
+ // vsel ((X & C) == 0), LHS, RHS --> vsel ((shl X, C') < 0), RHS, LHS
+ SDValue ShlAmt = getConstVector(ShlVals, VT.getSimpleVT(), DAG, DL);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And.getOperand(0), ShlAmt);
+ SDValue NewCond =
+ DAG.getSetCC(DL, CondVT, Shl, Cond.getOperand(1), ISD::SETLT);
+ return DAG.getSelect(DL, VT, NewCond, RHS, LHS);
+ }
}
return SDValue();
@@ -38665,6 +40689,282 @@ static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) {
return SDValue();
}
+/// If we are inverting an PTEST/TESTP operand, attempt to adjust the CC
+/// to avoid the inversion.
+static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // TODO: Handle X86ISD::KTEST/X86ISD::KORTEST.
+ if (EFLAGS.getOpcode() != X86ISD::PTEST &&
+ EFLAGS.getOpcode() != X86ISD::TESTP)
+ return SDValue();
+
+ // PTEST/TESTP sets EFLAGS as:
+ // TESTZ: ZF = (Op0 & Op1) == 0
+ // TESTC: CF = (~Op0 & Op1) == 0
+ // TESTNZC: ZF == 0 && CF == 0
+ EVT VT = EFLAGS.getValueType();
+ SDValue Op0 = EFLAGS.getOperand(0);
+ SDValue Op1 = EFLAGS.getOperand(1);
+ EVT OpVT = Op0.getValueType();
+
+ // TEST*(~X,Y) == TEST*(X,Y)
+ if (SDValue NotOp0 = IsNOT(Op0, DAG)) {
+ X86::CondCode InvCC;
+ switch (CC) {
+ case X86::COND_B:
+ // testc -> testz.
+ InvCC = X86::COND_E;
+ break;
+ case X86::COND_AE:
+ // !testc -> !testz.
+ InvCC = X86::COND_NE;
+ break;
+ case X86::COND_E:
+ // testz -> testc.
+ InvCC = X86::COND_B;
+ break;
+ case X86::COND_NE:
+ // !testz -> !testc.
+ InvCC = X86::COND_AE;
+ break;
+ case X86::COND_A:
+ case X86::COND_BE:
+ // testnzc -> testnzc (no change).
+ InvCC = CC;
+ break;
+ default:
+ InvCC = X86::COND_INVALID;
+ break;
+ }
+
+ if (InvCC != X86::COND_INVALID) {
+ CC = InvCC;
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
+ DAG.getBitcast(OpVT, NotOp0), Op1);
+ }
+ }
+
+ if (CC == X86::COND_E || CC == X86::COND_NE) {
+ // TESTZ(X,~Y) == TESTC(Y,X)
+ if (SDValue NotOp1 = IsNOT(Op1, DAG)) {
+ CC = (CC == X86::COND_E ? X86::COND_B : X86::COND_AE);
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
+ DAG.getBitcast(OpVT, NotOp1), Op0);
+ }
+
+ if (Op0 == Op1) {
+ SDValue BC = peekThroughBitcasts(Op0);
+ EVT BCVT = BC.getValueType();
+ assert(BCVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(BCVT) &&
+ "Unexpected vector type");
+
+ // TESTZ(AND(X,Y),AND(X,Y)) == TESTZ(X,Y)
+ if (BC.getOpcode() == ISD::AND || BC.getOpcode() == X86ISD::FAND) {
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
+ DAG.getBitcast(OpVT, BC.getOperand(0)),
+ DAG.getBitcast(OpVT, BC.getOperand(1)));
+ }
+
+ // TESTZ(AND(~X,Y),AND(~X,Y)) == TESTC(X,Y)
+ if (BC.getOpcode() == X86ISD::ANDNP || BC.getOpcode() == X86ISD::FANDN) {
+ CC = (CC == X86::COND_E ? X86::COND_B : X86::COND_AE);
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
+ DAG.getBitcast(OpVT, BC.getOperand(0)),
+ DAG.getBitcast(OpVT, BC.getOperand(1)));
+ }
+
+ // If every element is an all-sign value, see if we can use MOVMSK to
+ // more efficiently extract the sign bits and compare that.
+ // TODO: Handle TESTC with comparison inversion.
+ // TODO: Can we remove SimplifyMultipleUseDemandedBits and rely on
+ // MOVMSK combines to make sure its never worse than PTEST?
+ unsigned EltBits = BCVT.getScalarSizeInBits();
+ if (DAG.ComputeNumSignBits(BC) == EltBits) {
+ assert(VT == MVT::i32 && "Expected i32 EFLAGS comparison result");
+ APInt SignMask = APInt::getSignMask(EltBits);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (SDValue Res =
+ TLI.SimplifyMultipleUseDemandedBits(BC, SignMask, DAG)) {
+ // For vXi16 cases we need to use pmovmksb and extract every other
+ // sign bit.
+ SDLoc DL(EFLAGS);
+ if (EltBits == 16) {
+ MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8;
+ Res = DAG.getBitcast(MovmskVT, Res);
+ Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
+ Res = DAG.getNode(ISD::AND, DL, MVT::i32, Res,
+ DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
+ } else {
+ Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
+ }
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Res,
+ DAG.getConstant(0, DL, MVT::i32));
+ }
+ }
+ }
+
+ // TESTZ(-1,X) == TESTZ(X,X)
+ if (ISD::isBuildVectorAllOnes(Op0.getNode()))
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op1, Op1);
+
+ // TESTZ(X,-1) == TESTZ(X,X)
+ if (ISD::isBuildVectorAllOnes(Op1.getNode()))
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0);
+ }
+
+ return SDValue();
+}
+
+// Attempt to simplify the MOVMSK input based on the comparison type.
+static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // Handle eq/ne against zero (any_of).
+ // Handle eq/ne against -1 (all_of).
+ if (!(CC == X86::COND_E || CC == X86::COND_NE))
+ return SDValue();
+ if (EFLAGS.getValueType() != MVT::i32)
+ return SDValue();
+ unsigned CmpOpcode = EFLAGS.getOpcode();
+ if (CmpOpcode != X86ISD::CMP && CmpOpcode != X86ISD::SUB)
+ return SDValue();
+ auto *CmpConstant = dyn_cast<ConstantSDNode>(EFLAGS.getOperand(1));
+ if (!CmpConstant)
+ return SDValue();
+ const APInt &CmpVal = CmpConstant->getAPIntValue();
+
+ SDValue CmpOp = EFLAGS.getOperand(0);
+ unsigned CmpBits = CmpOp.getValueSizeInBits();
+ assert(CmpBits == CmpVal.getBitWidth() && "Value size mismatch");
+
+ // Peek through any truncate.
+ if (CmpOp.getOpcode() == ISD::TRUNCATE)
+ CmpOp = CmpOp.getOperand(0);
+
+ // Bail if we don't find a MOVMSK.
+ if (CmpOp.getOpcode() != X86ISD::MOVMSK)
+ return SDValue();
+
+ SDValue Vec = CmpOp.getOperand(0);
+ MVT VecVT = Vec.getSimpleValueType();
+ assert((VecVT.is128BitVector() || VecVT.is256BitVector()) &&
+ "Unexpected MOVMSK operand");
+ unsigned NumElts = VecVT.getVectorNumElements();
+ unsigned NumEltBits = VecVT.getScalarSizeInBits();
+
+ bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isNullValue();
+ bool IsAllOf = CmpOpcode == X86ISD::SUB && NumElts <= CmpBits &&
+ CmpVal.isMask(NumElts);
+ if (!IsAnyOf && !IsAllOf)
+ return SDValue();
+
+ // See if we can peek through to a vector with a wider element type, if the
+ // signbits extend down to all the sub-elements as well.
+ // Calling MOVMSK with the wider type, avoiding the bitcast, helps expose
+ // potential SimplifyDemandedBits/Elts cases.
+ if (Vec.getOpcode() == ISD::BITCAST) {
+ SDValue BC = peekThroughBitcasts(Vec);
+ MVT BCVT = BC.getSimpleValueType();
+ unsigned BCNumElts = BCVT.getVectorNumElements();
+ unsigned BCNumEltBits = BCVT.getScalarSizeInBits();
+ if ((BCNumEltBits == 32 || BCNumEltBits == 64) &&
+ BCNumEltBits > NumEltBits &&
+ DAG.ComputeNumSignBits(BC) > (BCNumEltBits - NumEltBits)) {
+ SDLoc DL(EFLAGS);
+ unsigned CmpMask = IsAnyOf ? 0 : ((1 << BCNumElts) - 1);
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, BC),
+ DAG.getConstant(CmpMask, DL, MVT::i32));
+ }
+ }
+
+ // MOVMSK(PCMPEQ(X,0)) == -1 -> PTESTZ(X,X).
+ // MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X).
+ if (IsAllOf && Subtarget.hasSSE41()) {
+ SDValue BC = peekThroughBitcasts(Vec);
+ if (BC.getOpcode() == X86ISD::PCMPEQ &&
+ ISD::isBuildVectorAllZeros(BC.getOperand(1).getNode())) {
+ MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ SDValue V = DAG.getBitcast(TestVT, BC.getOperand(0));
+ return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+ }
+ }
+
+ // See if we can avoid a PACKSS by calling MOVMSK on the sources.
+ // For vXi16 cases we can use a v2Xi8 PMOVMSKB. We must mask out
+ // sign bits prior to the comparison with zero unless we know that
+ // the vXi16 splats the sign bit down to the lower i8 half.
+ // TODO: Handle all_of patterns.
+ if (Vec.getOpcode() == X86ISD::PACKSS && VecVT == MVT::v16i8) {
+ SDValue VecOp0 = Vec.getOperand(0);
+ SDValue VecOp1 = Vec.getOperand(1);
+ bool SignExt0 = DAG.ComputeNumSignBits(VecOp0) > 8;
+ bool SignExt1 = DAG.ComputeNumSignBits(VecOp1) > 8;
+ // PMOVMSKB(PACKSSBW(X, undef)) -> PMOVMSKB(BITCAST_v16i8(X)) & 0xAAAA.
+ if (IsAnyOf && CmpBits == 8 && VecOp1.isUndef()) {
+ SDLoc DL(EFLAGS);
+ SDValue Result = DAG.getBitcast(MVT::v16i8, VecOp0);
+ Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
+ Result = DAG.getZExtOrTrunc(Result, DL, MVT::i16);
+ if (!SignExt0) {
+ Result = DAG.getNode(ISD::AND, DL, MVT::i16, Result,
+ DAG.getConstant(0xAAAA, DL, MVT::i16));
+ }
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
+ DAG.getConstant(0, DL, MVT::i16));
+ }
+ // PMOVMSKB(PACKSSBW(LO(X), HI(X)))
+ // -> PMOVMSKB(BITCAST_v32i8(X)) & 0xAAAAAAAA.
+ if (CmpBits == 16 && Subtarget.hasInt256() &&
+ VecOp0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ VecOp1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ VecOp0.getOperand(0) == VecOp1.getOperand(0) &&
+ VecOp0.getConstantOperandAPInt(1) == 0 &&
+ VecOp1.getConstantOperandAPInt(1) == 8 &&
+ (IsAnyOf || (SignExt0 && SignExt1))) {
+ SDLoc DL(EFLAGS);
+ SDValue Result = DAG.getBitcast(MVT::v32i8, VecOp0.getOperand(0));
+ Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
+ unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF;
+ if (!SignExt0 || !SignExt1) {
+ assert(IsAnyOf && "Only perform v16i16 signmasks for any_of patterns");
+ Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
+ DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
+ }
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
+ DAG.getConstant(CmpMask, DL, MVT::i32));
+ }
+ }
+
+ // MOVMSK(SHUFFLE(X,u)) -> MOVMSK(X) iff every element is referenced.
+ SmallVector<int, 32> ShuffleMask;
+ SmallVector<SDValue, 2> ShuffleInputs;
+ if (NumElts == CmpBits &&
+ getTargetShuffleInputs(peekThroughBitcasts(Vec), ShuffleInputs,
+ ShuffleMask, DAG) &&
+ ShuffleInputs.size() == 1 && !isAnyZeroOrUndef(ShuffleMask) &&
+ ShuffleInputs[0].getValueSizeInBits() == VecVT.getSizeInBits()) {
+ unsigned NumShuffleElts = ShuffleMask.size();
+ APInt DemandedElts = APInt::getNullValue(NumShuffleElts);
+ for (int M : ShuffleMask) {
+ assert(0 <= M && M < (int)NumShuffleElts && "Bad unary shuffle index");
+ DemandedElts.setBit(M);
+ }
+ if (DemandedElts.isAllOnesValue()) {
+ SDLoc DL(EFLAGS);
+ SDValue Result = DAG.getBitcast(VecVT, ShuffleInputs[0]);
+ Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
+ Result =
+ DAG.getZExtOrTrunc(Result, DL, EFLAGS.getOperand(0).getValueType());
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
+ EFLAGS.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
/// Optimize an EFLAGS definition used according to the condition code \p CC
/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
/// uses of chain values.
@@ -38677,6 +40977,13 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
return R;
+
+ if (SDValue R = combinePTESTCC(EFLAGS, CC, DAG, Subtarget))
+ return R;
+
+ if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget))
+ return R;
+
return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
}
@@ -38698,7 +41005,10 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
// Try to simplify the EFLAGS and condition code operands.
// We can't always do this as FCMOV only supports a subset of X86 cond.
if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) {
- if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
+ if (!(FalseOp.getValueType() == MVT::f80 ||
+ (FalseOp.getValueType() == MVT::f64 && !Subtarget.hasSSE2()) ||
+ (FalseOp.getValueType() == MVT::f32 && !Subtarget.hasSSE1())) ||
+ !Subtarget.hasCMov() || hasFPCMov(CC)) {
SDValue Ops[] = {FalseOp, TrueOp, DAG.getTargetConstant(CC, DL, MVT::i8),
Flags};
return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
@@ -39007,7 +41317,7 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
: ISD::SIGN_EXTEND,
DL, VT, MulLo);
- MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2);
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts / 2);
// Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
// the higher part is also needed.
SDValue MulHi =
@@ -39138,10 +41448,14 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
if (!VT.isVector() || VT.getVectorElementType() != MVT::i32)
return SDValue();
- // Make sure the vXi16 type is legal. This covers the AVX512 without BWI case.
- // Also allow v2i32 if it will be widened.
+ // Make sure the type is legal or will be widened to a legal type.
+ if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements());
- if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(WVT))
+
+ // Without BWI, we would need to split v32i16.
+ if (WVT == MVT::v32i16 && !Subtarget.hasBWI())
return SDValue();
SDValue N0 = N->getOperand(0);
@@ -39358,6 +41672,64 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
return NewMul;
}
+// Try to form a MULHU or MULHS node by looking for
+// (srl (mul ext, ext), 16)
+// TODO: This is X86 specific because we want to be able to handle wide types
+// before type legalization. But we can only do it if the vector will be
+// legalized via widening/splitting. Type legalization can't handle promotion
+// of a MULHU/MULHS. There isn't a way to convey this to the generic DAG
+// combiner.
+static SDValue combineShiftToPMULH(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ "SRL or SRA node is required here!");
+ SDLoc DL(N);
+
+ // Only do this with SSE4.1. On earlier targets reduceVMULWidth will expand
+ // the multiply.
+ if (!Subtarget.hasSSE41())
+ return SDValue();
+
+ // The operation feeding into the shift must be a multiply.
+ SDValue ShiftOperand = N->getOperand(0);
+ if (ShiftOperand.getOpcode() != ISD::MUL || !ShiftOperand.hasOneUse())
+ return SDValue();
+
+ // Input type should be at least vXi32.
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() || VT.getVectorElementType().getSizeInBits() < 32)
+ return SDValue();
+
+ // Need a shift by 16.
+ APInt ShiftAmt;
+ if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), ShiftAmt) ||
+ ShiftAmt != 16)
+ return SDValue();
+
+ SDValue LHS = ShiftOperand.getOperand(0);
+ SDValue RHS = ShiftOperand.getOperand(1);
+
+ unsigned ExtOpc = LHS.getOpcode();
+ if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) ||
+ RHS.getOpcode() != ExtOpc)
+ return SDValue();
+
+ // Peek through the extends.
+ LHS = LHS.getOperand(0);
+ RHS = RHS.getOperand(0);
+
+ // Ensure the input types match.
+ EVT MulVT = LHS.getValueType();
+ if (MulVT.getVectorElementType() != MVT::i16 || RHS.getValueType() != MulVT)
+ return SDValue();
+
+ unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU;
+ SDValue Mulh = DAG.getNode(Opc, DL, MulVT, LHS, RHS);
+
+ ExtOpc = N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOpc, DL, VT, Mulh);
+}
+
static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -39417,12 +41789,16 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
unsigned Size = VT.getSizeInBits();
+ if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget))
+ return V;
+
// fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
// into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
// into (lshr, (sext (a), SarConst - [56,48,32,24,16]))
@@ -39471,11 +41847,15 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG) {
}
static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget))
+ return V;
+
// Only do this on the last DAG combine as it can interfere with other
// combines.
if (!DCI.isAfterLegalizeDAG())
@@ -39519,16 +41899,92 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineVectorPackWithShuffle(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) &&
+ "Unexpected pack opcode");
+
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned NumDstElts = VT.getVectorNumElements();
+
+ // Attempt to fold PACK(LOSUBVECTOR(SHUFFLE(X)),HISUBVECTOR(SHUFFLE(X)))
+ // to SHUFFLE(PACK(LOSUBVECTOR(X),HISUBVECTOR(X))), this is mainly for
+ // truncation trees that help us avoid lane crossing shuffles.
+ // TODO: There's a lot more we can do for PACK/HADD style shuffle combines.
+ if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N0.getConstantOperandAPInt(1) == 0 &&
+ N1.getConstantOperandAPInt(1) == (NumDstElts / 2) &&
+ N0.getOperand(0) == N1.getOperand(0) && VT.is128BitVector() &&
+ N0.getOperand(0).getValueType().is256BitVector()) {
+ // TODO - support target/faux shuffles.
+ SDValue Vec = peekThroughBitcasts(N0.getOperand(0));
+ if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec)) {
+ // To keep the PACK LHS/RHS coherency, we must be able to scale the unary
+ // shuffle to a vXi64 width - we can probably relax this in the future.
+ SmallVector<int, 4> ShuffleMask;
+ if (SVN->getOperand(1).isUndef() &&
+ scaleShuffleElements(SVN->getMask(), 4, ShuffleMask)) {
+ SDLoc DL(N);
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(SVN->getOperand(0), DL);
+ Lo = DAG.getBitcast(N0.getValueType(), Lo);
+ Hi = DAG.getBitcast(N1.getValueType(), Hi);
+ SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi);
+ Res = DAG.getBitcast(MVT::v4i32, Res);
+ Res = DAG.getVectorShuffle(MVT::v4i32, DL, Res, Res, ShuffleMask);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+ }
+
+ // Attempt to fold PACK(SHUFFLE(X,Y),SHUFFLE(X,Y)) -> SHUFFLE(PACK(X,Y)).
+ // TODO: Relax shuffle scaling to support sub-128-bit subvector shuffles.
+ if (VT.is256BitVector()) {
+ if (auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(N0)) {
+ if (auto *SVN1 = dyn_cast<ShuffleVectorSDNode>(N1)) {
+ SmallVector<int, 2> ShuffleMask0, ShuffleMask1;
+ if (scaleShuffleElements(SVN0->getMask(), 2, ShuffleMask0) &&
+ scaleShuffleElements(SVN1->getMask(), 2, ShuffleMask1)) {
+ SDValue Op00 = SVN0->getOperand(0);
+ SDValue Op01 = SVN0->getOperand(1);
+ SDValue Op10 = SVN1->getOperand(0);
+ SDValue Op11 = SVN1->getOperand(1);
+ if ((Op00 == Op11) && (Op01 == Op10)) {
+ std::swap(Op10, Op11);
+ ShuffleVectorSDNode::commuteMask(ShuffleMask1);
+ }
+ if ((Op00 == Op10) && (Op01 == Op11)) {
+ SmallVector<int, 4> ShuffleMask;
+ ShuffleMask.append(ShuffleMask0.begin(), ShuffleMask0.end());
+ ShuffleMask.append(ShuffleMask1.begin(), ShuffleMask1.end());
+ SDLoc DL(N);
+ SDValue Res = DAG.getNode(Opcode, DL, VT, Op00, Op01);
+ Res = DAG.getBitcast(MVT::v4i64, Res);
+ Res = DAG.getVectorShuffle(MVT::v4i64, DL, Res, Res, ShuffleMask);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
unsigned Opcode = N->getOpcode();
assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) &&
- "Unexpected shift opcode");
+ "Unexpected pack opcode");
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ unsigned NumDstElts = VT.getVectorNumElements();
unsigned DstBitsPerElt = VT.getScalarSizeInBits();
unsigned SrcBitsPerElt = 2 * DstBitsPerElt;
assert(N0.getScalarValueSizeInBits() == SrcBitsPerElt &&
@@ -39545,7 +42001,6 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) &&
getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) {
unsigned NumLanes = VT.getSizeInBits() / 128;
- unsigned NumDstElts = VT.getVectorNumElements();
unsigned NumSrcElts = NumDstElts / 2;
unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
@@ -39592,6 +42047,10 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
}
+ // Try to fold PACK(SHUFFLE(),SHUFFLE()) -> SHUFFLE(PACK()).
+ if (SDValue V = combineVectorPackWithShuffle(N, DAG))
+ return V;
+
// Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
// truncate to create a larger truncate.
if (Subtarget.hasAVX512() &&
@@ -39674,26 +42133,37 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
if (ShiftVal >= NumBitsPerElt) {
if (LogicalShift)
return DAG.getConstant(0, SDLoc(N), VT);
- else
- ShiftVal = NumBitsPerElt - 1;
+ ShiftVal = NumBitsPerElt - 1;
}
- // Shift N0 by zero -> N0.
+ // (shift X, 0) -> X
if (!ShiftVal)
return N0;
- // Shift zero -> zero.
+ // (shift 0, C) -> 0
if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ // N0 is all zeros or undef. We guarantee that the bits shifted into the
+ // result are all zeros, not undef.
return DAG.getConstant(0, SDLoc(N), VT);
- // Fold (VSRAI (VSRAI X, C1), C2) --> (VSRAI X, (C1 + C2)) with (C1 + C2)
- // clamped to (NumBitsPerElt - 1).
- if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSRAI) {
+ // (VSRAI -1, C) -> -1
+ if (!LogicalShift && ISD::isBuildVectorAllOnes(N0.getNode()))
+ // N0 is all ones or undef. We guarantee that the bits shifted into the
+ // result are all ones, not undef.
+ return DAG.getConstant(-1, SDLoc(N), VT);
+
+ // (shift (shift X, C2), C1) -> (shift X, (C1 + C2))
+ if (Opcode == N0.getOpcode()) {
unsigned ShiftVal2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
unsigned NewShiftVal = ShiftVal + ShiftVal2;
- if (NewShiftVal >= NumBitsPerElt)
+ if (NewShiftVal >= NumBitsPerElt) {
+ // Out of range logical bit shifts are guaranteed to be zero.
+ // Out of range arithmetic bit shifts splat the sign bit.
+ if (LogicalShift)
+ return DAG.getConstant(0, SDLoc(N), VT);
NewShiftVal = NumBitsPerElt - 1;
- return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0),
+ }
+ return DAG.getNode(Opcode, SDLoc(N), VT, N0.getOperand(0),
DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8));
}
@@ -39743,19 +42213,24 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
assert(((N->getOpcode() == X86ISD::PINSRB && VT == MVT::v16i8) ||
- (N->getOpcode() == X86ISD::PINSRW && VT == MVT::v8i16)) &&
+ (N->getOpcode() == X86ISD::PINSRW && VT == MVT::v8i16) ||
+ N->getOpcode() == ISD::INSERT_VECTOR_ELT) &&
"Unexpected vector insertion");
- unsigned NumBitsPerElt = VT.getScalarSizeInBits();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(SDValue(N, 0),
- APInt::getAllOnesValue(NumBitsPerElt), DCI))
- return SDValue(N, 0);
+ if (N->getOpcode() == X86ISD::PINSRB || N->getOpcode() == X86ISD::PINSRW) {
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+ APInt::getAllOnesValue(NumBitsPerElt), DCI))
+ return SDValue(N, 0);
+ }
- // Attempt to combine PINSRB/PINSRW patterns to a shuffle.
- SDValue Op(N, 0);
- if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
- return Res;
+ // Attempt to combine insertion patterns to a shuffle.
+ if (VT.isSimple() && DCI.isAfterLegalizeDAG()) {
+ SDValue Op(N, 0);
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
+ return Res;
+ }
return SDValue();
}
@@ -39778,7 +42253,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
// The SETCCs should both refer to the same CMP.
- if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
+ if (CMP0.getOpcode() != X86ISD::FCMP || CMP0 != CMP1)
return SDValue();
SDValue CMP00 = CMP0->getOperand(0);
@@ -39877,10 +42352,27 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (SDValue Not = IsNOT(N0, DAG)) {
+ auto GetNot = [&VT, &DAG](SDValue V) {
+ // Basic X = NOT(Y) detection.
+ if (SDValue Not = IsNOT(V, DAG))
+ return Not;
+ // Fold BROADCAST(NOT(Y)) -> BROADCAST(Y).
+ if (V.getOpcode() == X86ISD::VBROADCAST) {
+ SDValue Src = V.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ if (!SrcVT.isVector())
+ return SDValue();
+ if (SDValue Not = IsNOT(Src, DAG))
+ return DAG.getNode(X86ISD::VBROADCAST, SDLoc(V), VT,
+ DAG.getBitcast(SrcVT, Not));
+ }
+ return SDValue();
+ };
+
+ if (SDValue Not = GetNot(N0)) {
X = Not;
Y = N1;
- } else if (SDValue Not = IsNOT(N1, DAG)) {
+ } else if (SDValue Not = GetNot(N1)) {
X = Not;
Y = N0;
} else
@@ -39891,6 +42383,65 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y);
}
+// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
+// logical operations, like in the example below.
+// or (and (truncate x, truncate y)),
+// (xor (truncate z, build_vector (constants)))
+// Given a target type \p VT, we generate
+// or (and x, y), (xor z, zext(build_vector (constants)))
+// given x, y and z are of type \p VT. We can do so, if operands are either
+// truncates from VT types, the second operand is a vector of constants or can
+// be recursively promoted.
+static SDValue PromoteMaskArithmetic(SDNode *N, EVT VT, SelectionDAG &DAG,
+ unsigned Depth) {
+ // Limit recursion to avoid excessive compile times.
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return SDValue();
+
+ if (N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND &&
+ N->getOpcode() != ISD::OR)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDLoc DL(N);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrPromote(N->getOpcode(), VT))
+ return SDValue();
+
+ if (SDValue NN0 = PromoteMaskArithmetic(N0.getNode(), VT, DAG, Depth + 1))
+ N0 = NN0;
+ else {
+ // The Left side has to be a trunc.
+ if (N0.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+
+ // The type of the truncated inputs.
+ if (N0.getOperand(0).getValueType() != VT)
+ return SDValue();
+
+ N0 = N0.getOperand(0);
+ }
+
+ if (SDValue NN1 = PromoteMaskArithmetic(N1.getNode(), VT, DAG, Depth + 1))
+ N1 = NN1;
+ else {
+ // The right side has to be a 'trunc' or a constant vector.
+ bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getValueType() == VT;
+ if (!RHSTrunc && !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()))
+ return SDValue();
+
+ if (RHSTrunc)
+ N1 = N1.getOperand(0);
+ else
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1);
+ }
+
+ return DAG.getNode(N->getOpcode(), DL, VT, N0, N1);
+}
+
// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
// register. In most cases we actually compare or select YMM-sized registers
// and mixing the two types creates horrible code. This method optimizes
@@ -39902,6 +42453,7 @@ static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
assert(VT.isVector() && "Expected vector type");
+ SDLoc DL(N);
assert((N->getOpcode() == ISD::ANY_EXTEND ||
N->getOpcode() == ISD::ZERO_EXTEND ||
N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
@@ -39909,57 +42461,33 @@ static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue Narrow = N->getOperand(0);
EVT NarrowVT = Narrow.getValueType();
- if (Narrow->getOpcode() != ISD::XOR &&
- Narrow->getOpcode() != ISD::AND &&
- Narrow->getOpcode() != ISD::OR)
- return SDValue();
-
- SDValue N0 = Narrow->getOperand(0);
- SDValue N1 = Narrow->getOperand(1);
- SDLoc DL(Narrow);
-
- // The Left side has to be a trunc.
- if (N0.getOpcode() != ISD::TRUNCATE)
- return SDValue();
-
- // The type of the truncated inputs.
- if (N0.getOperand(0).getValueType() != VT)
- return SDValue();
-
- // The right side has to be a 'trunc' or a constant vector.
- bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
- N1.getOperand(0).getValueType() == VT;
- if (!RHSTrunc &&
- !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()))
- return SDValue();
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
- if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), VT))
- return SDValue();
-
- // Set N0 and N1 to hold the inputs to the new wide operation.
- N0 = N0.getOperand(0);
- if (RHSTrunc)
- N1 = N1.getOperand(0);
- else
- N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1);
-
// Generate the wide operation.
- SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1);
- unsigned Opcode = N->getOpcode();
- switch (Opcode) {
+ SDValue Op = PromoteMaskArithmetic(Narrow.getNode(), VT, DAG, 0);
+ if (!Op)
+ return SDValue();
+ switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode");
case ISD::ANY_EXTEND:
return Op;
case ISD::ZERO_EXTEND:
- return DAG.getZeroExtendInReg(Op, DL, NarrowVT.getScalarType());
+ return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
case ISD::SIGN_EXTEND:
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
Op, DAG.getValueType(NarrowVT));
}
}
+static unsigned convertIntLogicToFPLogicOpcode(unsigned Opcode) {
+ unsigned FPOpcode;
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected input node for FP logic conversion");
+ case ISD::AND: FPOpcode = X86ISD::FAND; break;
+ case ISD::OR: FPOpcode = X86ISD::FOR; break;
+ case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
+ }
+ return FPOpcode;
+}
+
/// If both input operands of a logic op are being cast from floating point
/// types, try to convert this into a floating point logic node to avoid
/// unnecessary moves from SSE to integer registers.
@@ -39984,18 +42512,45 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
(Subtarget.hasSSE2() && N00Type == MVT::f64)))
return SDValue();
- unsigned FPOpcode;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected input node for FP logic conversion");
- case ISD::AND: FPOpcode = X86ISD::FAND; break;
- case ISD::OR: FPOpcode = X86ISD::FOR; break;
- case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
- }
-
+ unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode());
SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
return DAG.getBitcast(VT, FPLogic);
}
+// Attempt to fold BITOP(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(BITOP(X,Y))
+// to reduce XMM->GPR traffic.
+static SDValue combineBitOpWithMOVMSK(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opc = N->getOpcode();
+ assert((Opc == ISD::OR || Opc == ISD::AND || Opc == ISD::XOR) &&
+ "Unexpected bit opcode");
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Both operands must be single use MOVMSK.
+ if (N0.getOpcode() != X86ISD::MOVMSK || !N0.hasOneUse() ||
+ N1.getOpcode() != X86ISD::MOVMSK || !N1.hasOneUse())
+ return SDValue();
+
+ SDValue Vec0 = N0.getOperand(0);
+ SDValue Vec1 = N1.getOperand(0);
+ EVT VecVT0 = Vec0.getValueType();
+ EVT VecVT1 = Vec1.getValueType();
+
+ // Both MOVMSK operands must be from vectors of the same size and same element
+ // size, but its OK for a fp/int diff.
+ if (VecVT0.getSizeInBits() != VecVT1.getSizeInBits() ||
+ VecVT0.getScalarSizeInBits() != VecVT1.getScalarSizeInBits())
+ return SDValue();
+
+ SDLoc DL(N);
+ unsigned VecOpc =
+ VecVT0.isFloatingPoint() ? convertIntLogicToFPLogicOpcode(Opc) : Opc;
+ SDValue Result =
+ DAG.getNode(VecOpc, DL, VecVT0, Vec0, DAG.getBitcast(VecVT0, Vec1));
+ return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
+}
+
/// If this is a zero/all-bits result that is bitwise-anded with a low bits
/// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and'
/// with a shift-right to eliminate loading the vector constant mask value.
@@ -40318,7 +42873,8 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
// TODO: Support multiple SrcOps.
if (VT == MVT::i1) {
SmallVector<SDValue, 2> SrcOps;
- if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps) &&
+ SmallVector<APInt, 2> SrcPartials;
+ if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps, &SrcPartials) &&
SrcOps.size() == 1) {
SDLoc dl(N);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -40328,9 +42884,11 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType()))
Mask = DAG.getBitcast(MaskVT, SrcOps[0]);
if (Mask) {
- APInt AllBits = APInt::getAllOnesValue(NumElts);
- return DAG.getSetCC(dl, MVT::i1, Mask,
- DAG.getConstant(AllBits, dl, MaskVT), ISD::SETEQ);
+ assert(SrcPartials[0].getBitWidth() == NumElts &&
+ "Unexpected partial reduction mask");
+ SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT);
+ Mask = DAG.getNode(ISD::AND, dl, MaskVT, Mask, PartialBits);
+ return DAG.getSetCC(dl, MVT::i1, Mask, PartialBits, ISD::SETEQ);
}
}
}
@@ -40338,6 +42896,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineScalarAndWithMaskSetcc(N, DAG, Subtarget))
return V;
+ if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
+ return R;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -40446,6 +43007,16 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
}
SDLoc DL(N);
+
+ if (UseVPTERNLOG) {
+ // Emit a VPTERNLOG node directly.
+ SDValue A = DAG.getBitcast(VT, N0.getOperand(1));
+ SDValue B = DAG.getBitcast(VT, N0.getOperand(0));
+ SDValue C = DAG.getBitcast(VT, N1.getOperand(0));
+ SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8);
+ return DAG.getNode(X86ISD::VPTERNLOG, DL, VT, A, B, C, Imm);
+ }
+
SDValue X = N->getOperand(0);
SDValue Y =
DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)),
@@ -40529,6 +43100,10 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasSSE41())
return SDValue();
+ // If we have VPTERNLOG we should prefer that since PBLENDVB is multiple uops.
+ if (Subtarget.hasVLX())
+ return SDValue();
+
MVT BlendVT = VT.is256BitVector() ? MVT::v32i8 : MVT::v16i8;
X = DAG.getBitcast(BlendVT, X);
@@ -40645,139 +43220,6 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
return Ret;
}
-static SDValue combineOrShiftToFunnelShift(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- assert(N->getOpcode() == ISD::OR && "Expected ISD::OR node");
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- EVT VT = N->getValueType(0);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
- if (!TLI.isOperationLegalOrCustom(ISD::FSHL, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::FSHR, VT))
- return SDValue();
-
- // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
- bool OptForSize = DAG.shouldOptForSize();
- unsigned Bits = VT.getScalarSizeInBits();
-
- // SHLD/SHRD instructions have lower register pressure, but on some
- // platforms they have higher latency than the equivalent
- // series of shifts/or that would otherwise be generated.
- // Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions
- // have higher latencies and we are not optimizing for size.
- if (!OptForSize && Subtarget.isSHLDSlow())
- return SDValue();
-
- if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
- std::swap(N0, N1);
- if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
- return SDValue();
- if (!N0.hasOneUse() || !N1.hasOneUse())
- return SDValue();
-
- EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-
- SDValue ShAmt0 = N0.getOperand(1);
- if (ShAmt0.getValueType() != ShiftVT)
- return SDValue();
- SDValue ShAmt1 = N1.getOperand(1);
- if (ShAmt1.getValueType() != ShiftVT)
- return SDValue();
-
- // Peek through any modulo shift masks.
- SDValue ShMsk0;
- if (ShAmt0.getOpcode() == ISD::AND &&
- isa<ConstantSDNode>(ShAmt0.getOperand(1)) &&
- ShAmt0.getConstantOperandAPInt(1) == (Bits - 1)) {
- ShMsk0 = ShAmt0;
- ShAmt0 = ShAmt0.getOperand(0);
- }
- SDValue ShMsk1;
- if (ShAmt1.getOpcode() == ISD::AND &&
- isa<ConstantSDNode>(ShAmt1.getOperand(1)) &&
- ShAmt1.getConstantOperandAPInt(1) == (Bits - 1)) {
- ShMsk1 = ShAmt1;
- ShAmt1 = ShAmt1.getOperand(0);
- }
-
- if (ShAmt0.getOpcode() == ISD::TRUNCATE)
- ShAmt0 = ShAmt0.getOperand(0);
- if (ShAmt1.getOpcode() == ISD::TRUNCATE)
- ShAmt1 = ShAmt1.getOperand(0);
-
- SDLoc DL(N);
- unsigned Opc = ISD::FSHL;
- SDValue Op0 = N0.getOperand(0);
- SDValue Op1 = N1.getOperand(0);
- if (ShAmt0.getOpcode() == ISD::SUB || ShAmt0.getOpcode() == ISD::XOR) {
- Opc = ISD::FSHR;
- std::swap(Op0, Op1);
- std::swap(ShAmt0, ShAmt1);
- std::swap(ShMsk0, ShMsk1);
- }
-
- auto GetFunnelShift = [&DAG, &DL, VT, Opc, &ShiftVT](SDValue Op0, SDValue Op1,
- SDValue Amt) {
- if (Opc == ISD::FSHR)
- std::swap(Op0, Op1);
- return DAG.getNode(Opc, DL, VT, Op0, Op1,
- DAG.getNode(ISD::TRUNCATE, DL, ShiftVT, Amt));
- };
-
- // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> FSHL( X, Y, C )
- // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> FSHR( Y, X, C )
- // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHL( X, Y, C )
- // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHR( Y, X, C )
- // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> FSHL( X, Y, C )
- // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> FSHR( Y, X, C )
- if (ShAmt1.getOpcode() == ISD::SUB) {
- SDValue Sum = ShAmt1.getOperand(0);
- if (auto *SumC = dyn_cast<ConstantSDNode>(Sum)) {
- SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
- if (ShAmt1Op1.getOpcode() == ISD::AND &&
- isa<ConstantSDNode>(ShAmt1Op1.getOperand(1)) &&
- ShAmt1Op1.getConstantOperandAPInt(1) == (Bits - 1)) {
- ShMsk1 = ShAmt1Op1;
- ShAmt1Op1 = ShAmt1Op1.getOperand(0);
- }
- if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE)
- ShAmt1Op1 = ShAmt1Op1.getOperand(0);
- if ((SumC->getAPIntValue() == Bits ||
- (SumC->getAPIntValue() == 0 && ShMsk1)) &&
- ShAmt1Op1 == ShAmt0)
- return GetFunnelShift(Op0, Op1, ShAmt0);
- }
- } else if (auto *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
- auto *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
- if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits)
- return GetFunnelShift(Op0, Op1, ShAmt0);
- } else if (ShAmt1.getOpcode() == ISD::XOR) {
- SDValue Mask = ShAmt1.getOperand(1);
- if (auto *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
- unsigned InnerShift = (ISD::FSHL == Opc ? ISD::SRL : ISD::SHL);
- SDValue ShAmt1Op0 = ShAmt1.getOperand(0);
- if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE)
- ShAmt1Op0 = ShAmt1Op0.getOperand(0);
- if (MaskC->getSExtValue() == (Bits - 1) &&
- (ShAmt1Op0 == ShAmt0 || ShAmt1Op0 == ShMsk0)) {
- if (Op1.getOpcode() == InnerShift &&
- isa<ConstantSDNode>(Op1.getOperand(1)) &&
- Op1.getConstantOperandAPInt(1).isOneValue()) {
- return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0);
- }
- // Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ).
- if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD &&
- Op1.getOperand(0) == Op1.getOperand(1)) {
- return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0);
- }
- }
- }
- }
-
- return SDValue();
-}
-
static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -40797,7 +43239,8 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
// TODO: Support multiple SrcOps.
if (VT == MVT::i1) {
SmallVector<SDValue, 2> SrcOps;
- if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) &&
+ SmallVector<APInt, 2> SrcPartials;
+ if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps, &SrcPartials) &&
SrcOps.size() == 1) {
SDLoc dl(N);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -40807,13 +43250,19 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType()))
Mask = DAG.getBitcast(MaskVT, SrcOps[0]);
if (Mask) {
- APInt AllBits = APInt::getNullValue(NumElts);
- return DAG.getSetCC(dl, MVT::i1, Mask,
- DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE);
+ assert(SrcPartials[0].getBitWidth() == NumElts &&
+ "Unexpected partial reduction mask");
+ SDValue ZeroBits = DAG.getConstant(0, dl, MaskVT);
+ SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT);
+ Mask = DAG.getNode(ISD::AND, dl, MaskVT, Mask, PartialBits);
+ return DAG.getSetCC(dl, MVT::i1, Mask, ZeroBits, ISD::SETNE);
}
}
}
+ if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
+ return R;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -40829,8 +43278,33 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
return R;
- if (SDValue R = combineOrShiftToFunnelShift(N, DAG, Subtarget))
- return R;
+ // Combine OR(X,KSHIFTL(Y,Elts/2)) -> CONCAT_VECTORS(X,Y) == KUNPCK(X,Y).
+ // Combine OR(KSHIFTL(X,Elts/2),Y) -> CONCAT_VECTORS(Y,X) == KUNPCK(Y,X).
+ // iff the upper elements of the non-shifted arg are zero.
+ // KUNPCK require 16+ bool vector elements.
+ if (N0.getOpcode() == X86ISD::KSHIFTL || N1.getOpcode() == X86ISD::KSHIFTL) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfElts = NumElts / 2;
+ APInt UpperElts = APInt::getHighBitsSet(NumElts, HalfElts);
+ if (NumElts >= 16 && N1.getOpcode() == X86ISD::KSHIFTL &&
+ N1.getConstantOperandAPInt(1) == HalfElts &&
+ DAG.MaskedValueIsZero(N0, APInt(1, 1), UpperElts)) {
+ SDLoc dl(N);
+ return DAG.getNode(
+ ISD::CONCAT_VECTORS, dl, VT,
+ extractSubVector(N0, 0, DAG, dl, HalfElts),
+ extractSubVector(N1.getOperand(0), 0, DAG, dl, HalfElts));
+ }
+ if (NumElts >= 16 && N0.getOpcode() == X86ISD::KSHIFTL &&
+ N0.getConstantOperandAPInt(1) == HalfElts &&
+ DAG.MaskedValueIsZero(N1, APInt(1, 1), UpperElts)) {
+ SDLoc dl(N);
+ return DAG.getNode(
+ ISD::CONCAT_VECTORS, dl, VT,
+ extractSubVector(N1, 0, DAG, dl, HalfElts),
+ extractSubVector(N0.getOperand(0), 0, DAG, dl, HalfElts));
+ }
+ }
// Attempt to recursively combine an OR of shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
@@ -41179,18 +43653,9 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
// A lambda checking the given SDValue is a constant vector and each element
// is in the range [Min, Max].
auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) {
- BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V);
- if (!BV || !BV->isConstant())
- return false;
- for (SDValue Op : V->ops()) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- if (!C)
- return false;
- const APInt &Val = C->getAPIntValue();
- if (Val.ult(Min) || Val.ugt(Max))
- return false;
- }
- return true;
+ return ISD::matchUnaryPredicate(V, [Min, Max](ConstantSDNode *C) {
+ return !(C->getAPIntValue().ult(Min) || C->getAPIntValue().ugt(Max));
+ });
};
// Check if each element of the vector is right-shifted by one.
@@ -41291,10 +43756,10 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
// pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
ISD::LoadExtType Ext = Ld->getExtensionType();
bool Fast;
- unsigned Alignment = Ld->getAlignment();
if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
Ext == ISD::NON_EXTLOAD &&
- ((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) ||
+ ((Ld->isNonTemporal() && !Subtarget.hasInt256() &&
+ Ld->getAlignment() >= 16) ||
(TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
*Ld->getMemOperand(), &Fast) &&
!Fast))) {
@@ -41302,17 +43767,18 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
if (NumElems < 2)
return SDValue();
- unsigned HalfAlign = 16;
+ unsigned HalfOffset = 16;
SDValue Ptr1 = Ld->getBasePtr();
- SDValue Ptr2 = DAG.getMemBasePlusOffset(Ptr1, HalfAlign, dl);
+ SDValue Ptr2 = DAG.getMemBasePlusOffset(Ptr1, HalfOffset, dl);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
NumElems / 2);
SDValue Load1 =
DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr1, Ld->getPointerInfo(),
- Alignment, Ld->getMemOperand()->getFlags());
+ Ld->getOriginalAlign(),
+ Ld->getMemOperand()->getFlags());
SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr2,
- Ld->getPointerInfo().getWithOffset(HalfAlign),
- MinAlign(Alignment, HalfAlign),
+ Ld->getPointerInfo().getWithOffset(HalfOffset),
+ Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Load1.getValue(1), Load2.getValue(1));
@@ -41329,13 +43795,28 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
if (TLI.isTypeLegal(IntVT)) {
SDValue IntLoad = DAG.getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), Alignment,
+ Ld->getPointerInfo(),
+ Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
SDValue BoolVec = DAG.getBitcast(RegVT, IntLoad);
return DCI.CombineTo(N, BoolVec, IntLoad.getValue(1), true);
}
}
+ // Cast ptr32 and ptr64 pointers to the default address space before a load.
+ unsigned AddrSpace = Ld->getAddressSpace();
+ if (AddrSpace == X86AS::PTR64 || AddrSpace == X86AS::PTR32_SPTR ||
+ AddrSpace == X86AS::PTR32_UPTR) {
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ if (PtrVT != Ld->getBasePtr().getSimpleValueType()) {
+ SDValue Cast =
+ DAG.getAddrSpaceCast(dl, PtrVT, Ld->getBasePtr(), AddrSpace, 0);
+ return DAG.getLoad(RegVT, dl, Ld->getChain(), Cast, Ld->getPointerInfo(),
+ Ld->getOriginalAlign(),
+ Ld->getMemOperand()->getFlags());
+ }
+ }
+
return SDValue();
}
@@ -41482,7 +43963,7 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
- MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
+ auto *Mld = cast<MaskedLoadSDNode>(N);
// TODO: Expanding load with constant mask may be optimized as well.
if (Mld->isExpandingLoad())
@@ -41491,12 +43972,33 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
if (Mld->getExtensionType() == ISD::NON_EXTLOAD) {
if (SDValue ScalarLoad = reduceMaskedLoadToScalarLoad(Mld, DAG, DCI))
return ScalarLoad;
+
// TODO: Do some AVX512 subsets benefit from this transform?
if (!Subtarget.hasAVX512())
if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI))
return Blend;
}
+ // If the mask value has been legalized to a non-boolean vector, try to
+ // simplify ops leading up to it. We only demand the MSB of each lane.
+ SDValue Mask = Mld->getMask();
+ if (Mask.getScalarValueSizeInBits() != 1) {
+ EVT VT = Mld->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits()));
+ if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ if (SDValue NewMask =
+ TLI.SimplifyMultipleUseDemandedBits(Mask, DemandedBits, DAG))
+ return DAG.getMaskedLoad(
+ VT, SDLoc(N), Mld->getChain(), Mld->getBasePtr(), Mld->getOffset(),
+ NewMask, Mld->getPassThru(), Mld->getMemoryVT(), Mld->getMemOperand(),
+ Mld->getAddressingMode(), Mld->getExtensionType());
+ }
+
return SDValue();
}
@@ -41548,9 +44050,18 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
// simplify ops leading up to it. We only demand the MSB of each lane.
SDValue Mask = Mst->getMask();
if (Mask.getScalarValueSizeInBits() != 1) {
- APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
- if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
+ APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits()));
+ if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
return SDValue(N, 0);
+ }
+ if (SDValue NewMask =
+ TLI.SimplifyMultipleUseDemandedBits(Mask, DemandedBits, DAG))
+ return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Mst->getValue(),
+ Mst->getBasePtr(), Mst->getOffset(), NewMask,
+ Mst->getMemoryVT(), Mst->getMemOperand(),
+ Mst->getAddressingMode());
}
SDValue Value = Mst->getValue();
@@ -41572,7 +44083,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
StoreSDNode *St = cast<StoreSDNode>(N);
EVT StVT = St->getMemoryVT();
SDLoc dl(St);
- unsigned Alignment = St->getAlignment();
SDValue StoredVal = St->getValue();
EVT VT = StoredVal.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -41585,7 +44095,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
StoredVal = DAG.getBitcast(NewVT, StoredVal);
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
+ St->getPointerInfo(), St->getOriginalAlign(),
St->getMemOperand()->getFlags());
}
@@ -41596,7 +44106,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
StoredVal.getOperand(0).getValueType() == MVT::i8) {
return DAG.getStore(St->getChain(), dl, StoredVal.getOperand(0),
St->getBasePtr(), St->getPointerInfo(),
- St->getAlignment(), St->getMemOperand()->getFlags());
+ St->getOriginalAlign(),
+ St->getMemOperand()->getFlags());
}
// Widen v2i1/v4i1 stores to v8i1.
@@ -41607,7 +44118,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
Ops[0] = StoredVal;
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
+ St->getPointerInfo(), St->getOriginalAlign(),
St->getMemOperand()->getFlags());
}
@@ -41616,7 +44127,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) &&
ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) {
// If its a v64i1 store without 64-bit support, we need two stores.
- if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
+ if (!DCI.isBeforeLegalize() && VT == MVT::v64i1 && !Subtarget.is64Bit()) {
SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl,
StoredVal->ops().slice(0, 32));
Lo = combinevXi1ConstantToInteger(Lo, DAG);
@@ -41629,18 +44140,19 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
SDValue Ch0 =
DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(),
- Alignment, St->getMemOperand()->getFlags());
+ St->getOriginalAlign(),
+ St->getMemOperand()->getFlags());
SDValue Ch1 =
DAG.getStore(St->getChain(), dl, Hi, Ptr1,
St->getPointerInfo().getWithOffset(4),
- MinAlign(Alignment, 4U),
+ St->getOriginalAlign(),
St->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}
StoredVal = combinevXi1ConstantToInteger(StoredVal, DAG);
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
+ St->getPointerInfo(), St->getOriginalAlign(),
St->getMemOperand()->getFlags());
}
@@ -41659,7 +44171,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
// Split under-aligned vector non-temporal stores.
- if (St->isNonTemporal() && StVT == VT && Alignment < VT.getStoreSize()) {
+ if (St->isNonTemporal() && StVT == VT &&
+ St->getAlignment() < VT.getStoreSize()) {
// ZMM/YMM nt-stores - either it can be stored as a series of shorter
// vectors or the legalizer can scalarize it to use MOVNTI.
if (VT.is256BitVector() || VT.is512BitVector()) {
@@ -41713,7 +44226,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG,
Subtarget, dl))
return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
+ St->getPointerInfo(), St->getOriginalAlign(),
St->getMemOperand()->getFlags());
if (TLI.isTruncStoreLegal(VT, StVT)) {
@@ -41731,6 +44244,20 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+ // Cast ptr32 and ptr64 pointers to the default address space before a store.
+ unsigned AddrSpace = St->getAddressSpace();
+ if (AddrSpace == X86AS::PTR64 || AddrSpace == X86AS::PTR32_SPTR ||
+ AddrSpace == X86AS::PTR32_UPTR) {
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ if (PtrVT != St->getBasePtr().getSimpleValueType()) {
+ SDValue Cast =
+ DAG.getAddrSpaceCast(dl, PtrVT, St->getBasePtr(), AddrSpace, 0);
+ return DAG.getStore(St->getChain(), dl, StoredVal, Cast,
+ St->getPointerInfo(), St->getOriginalAlign(),
+ St->getMemOperand()->getFlags(), St->getAAInfo());
+ }
+ }
+
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
@@ -41785,13 +44312,38 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
BitCast, OldExtract.getOperand(1));
return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
+ St->getPointerInfo(), St->getOriginalAlign(),
St->getMemOperand()->getFlags());
}
return SDValue();
}
+static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ auto *St = cast<MemIntrinsicSDNode>(N);
+
+ SDValue StoredVal = N->getOperand(1);
+ MVT VT = StoredVal.getSimpleValueType();
+ EVT MemVT = St->getMemoryVT();
+
+ // Figure out which elements we demand.
+ unsigned StElts = MemVT.getSizeInBits() / VT.getScalarSizeInBits();
+ APInt DemandedElts = APInt::getLowBitsSet(VT.getVectorNumElements(), StElts);
+
+ APInt KnownUndef, KnownZero;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, KnownUndef,
+ KnownZero, DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+
+ return SDValue();
+}
+
/// Return 'true' if this vector operation is "horizontal"
/// and return the operands for the horizontal operation in LHS and RHS. A
/// horizontal operation performs the binary operation on successive elements
@@ -42028,17 +44580,6 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
// of one truncation.
// i.e. if one of the inputs will constant fold or the input is repeated.
switch (SrcOpcode) {
- case ISD::AND:
- case ISD::XOR:
- case ISD::OR: {
- SDValue Op0 = Src.getOperand(0);
- SDValue Op1 = Src.getOperand(1);
- if (TLI.isOperationLegalOrPromote(SrcOpcode, VT) &&
- (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
- return TruncateArithmetic(Op0, Op1);
- break;
- }
-
case ISD::MUL:
// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
// better to truncate if we have the chance.
@@ -42047,21 +44588,15 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
!TLI.isOperationLegal(SrcOpcode, SrcVT))
return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
LLVM_FALLTHROUGH;
- case ISD::ADD: {
- SDValue Op0 = Src.getOperand(0);
- SDValue Op1 = Src.getOperand(1);
- if (TLI.isOperationLegal(SrcOpcode, VT) &&
- (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
- return TruncateArithmetic(Op0, Op1);
- break;
- }
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR:
+ case ISD::ADD:
case ISD::SUB: {
- // TODO: ISD::SUB We are conservative and require both sides to be freely
- // truncatable to avoid interfering with combineSubToSubus.
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegal(SrcOpcode, VT) &&
- (Op0 == Op1 || (IsFreeTruncation(Op0) && IsFreeTruncation(Op1))))
+ (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
return TruncateArithmetic(Op0, Op1);
break;
}
@@ -42172,13 +44707,17 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
MVT InSVT = InVT.getScalarType();
// Check we have a truncation suited for PACKSS/PACKUS.
- if (!VT.is128BitVector() && !VT.is256BitVector())
+ if (!isPowerOf2_32(VT.getVectorNumElements()))
return SDValue();
if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32)
return SDValue();
if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64)
return SDValue();
+ // Truncation to sub-128bit vXi32 can be better handled with shuffles.
+ if (SVT == MVT::i32 && VT.getSizeInBits() < 128)
+ return SDValue();
+
// AVX512 has fast truncate, but if the input is already going to be split,
// there's no harm in trying pack.
if (Subtarget.hasAVX512() &&
@@ -42199,6 +44738,13 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
// Use PACKSS if the input has sign-bits that extend all the way to the
// packed/truncated value. e.g. Comparison result, sext_in_reg, etc.
unsigned NumSignBits = DAG.ComputeNumSignBits(In);
+
+ // Don't use PACKSS for vXi64 -> vXi32 truncations unless we're dealing with
+ // a sign splat. ComputeNumSignBits struggles to see through BITCASTs later
+ // on and combines/simplifications can't then use it.
+ if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits())
+ return SDValue();
+
if (NumSignBits > (InSVT.getSizeInBits() - NumPackedSignBits))
return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget);
@@ -42227,9 +44773,9 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
if (!VT.isVector() || VT.getVectorElementType() != MVT::i16)
return SDValue();
- // Input type should be vXi32.
+ // Input type should be at least vXi32.
EVT InVT = Src.getValueType();
- if (InVT.getVectorElementType() != MVT::i32)
+ if (InVT.getVectorElementType().getSizeInBits() < 32)
return SDValue();
// Need a shift by 16.
@@ -42438,7 +44984,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
return combineVectorTruncation(N, DAG, Subtarget);
}
-static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
EVT VT = N->getValueType(0);
SDValue In = N->getOperand(0);
SDLoc DL(N);
@@ -42448,6 +44995,11 @@ static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG) {
if (auto USatVal = detectUSatPattern(In, VT, DAG, DL))
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits()));
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -42540,37 +45092,46 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
if (NegMul) {
switch (Opcode) {
default: llvm_unreachable("Unexpected opcode");
- case ISD::FMA: Opcode = X86ISD::FNMADD; break;
- case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break;
- case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break;
- case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break;
- case X86ISD::FNMADD: Opcode = ISD::FMA; break;
- case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break;
- case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break;
- case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break;
+ case ISD::FMA: Opcode = X86ISD::FNMADD; break;
+ case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FNMADD; break;
+ case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break;
+ case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break;
+ case X86ISD::STRICT_FMSUB: Opcode = X86ISD::STRICT_FNMSUB; break;
+ case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break;
+ case X86ISD::FNMADD: Opcode = ISD::FMA; break;
+ case X86ISD::STRICT_FNMADD: Opcode = ISD::STRICT_FMA; break;
+ case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break;
+ case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break;
+ case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FMSUB; break;
+ case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break;
}
}
if (NegAcc) {
switch (Opcode) {
default: llvm_unreachable("Unexpected opcode");
- case ISD::FMA: Opcode = X86ISD::FMSUB; break;
- case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break;
- case X86ISD::FMSUB: Opcode = ISD::FMA; break;
- case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break;
- case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break;
- case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break;
- case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break;
- case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break;
- case X86ISD::FMADDSUB: Opcode = X86ISD::FMSUBADD; break;
- case X86ISD::FMADDSUB_RND: Opcode = X86ISD::FMSUBADD_RND; break;
- case X86ISD::FMSUBADD: Opcode = X86ISD::FMADDSUB; break;
- case X86ISD::FMSUBADD_RND: Opcode = X86ISD::FMADDSUB_RND; break;
+ case ISD::FMA: Opcode = X86ISD::FMSUB; break;
+ case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FMSUB; break;
+ case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break;
+ case X86ISD::FMSUB: Opcode = ISD::FMA; break;
+ case X86ISD::STRICT_FMSUB: Opcode = ISD::STRICT_FMA; break;
+ case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break;
+ case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break;
+ case X86ISD::STRICT_FNMADD: Opcode = X86ISD::STRICT_FNMSUB; break;
+ case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break;
+ case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break;
+ case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FNMADD; break;
+ case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break;
+ case X86ISD::FMADDSUB: Opcode = X86ISD::FMSUBADD; break;
+ case X86ISD::FMADDSUB_RND: Opcode = X86ISD::FMSUBADD_RND; break;
+ case X86ISD::FMSUBADD: Opcode = X86ISD::FMADDSUB; break;
+ case X86ISD::FMSUBADD_RND: Opcode = X86ISD::FMADDSUB_RND; break;
}
}
if (NegRes) {
switch (Opcode) {
+ // For accuracy reason, we never combine fneg and fma under strict FP.
default: llvm_unreachable("Unexpected opcode");
case ISD::FMA: Opcode = X86ISD::FNMSUB; break;
case X86ISD::FMADD_RND: Opcode = X86ISD::FNMSUB_RND; break;
@@ -42588,18 +45149,20 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc,
/// Do target-specific dag combines on floating point negations.
static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
EVT OrigVT = N->getValueType(0);
SDValue Arg = isFNEG(DAG, N);
if (!Arg)
return SDValue();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = Arg.getValueType();
EVT SVT = VT.getScalarType();
SDLoc DL(N);
// Let legalize expand this if it isn't a legal type yet.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ if (!TLI.isTypeLegal(VT))
return SDValue();
// If we're negating a FMUL node on a target with FMA, then we can avoid the
@@ -42613,80 +45176,25 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
return DAG.getBitcast(OrigVT, NewNode);
}
- // If we're negating an FMA node, then we can adjust the
- // instruction to include the extra negation.
- if (Arg.hasOneUse() && Subtarget.hasAnyFMA()) {
- switch (Arg.getOpcode()) {
- case ISD::FMA:
- case X86ISD::FMSUB:
- case X86ISD::FNMADD:
- case X86ISD::FNMSUB:
- case X86ISD::FMADD_RND:
- case X86ISD::FMSUB_RND:
- case X86ISD::FNMADD_RND:
- case X86ISD::FNMSUB_RND: {
- // We can't handle scalar intrinsic node here because it would only
- // invert one element and not the whole vector. But we could try to handle
- // a negation of the lower element only.
- unsigned NewOpcode = negateFMAOpcode(Arg.getOpcode(), false, false, true);
- return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, Arg->ops()));
- }
- }
- }
+ bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool LegalOperations = !DCI.isBeforeLegalizeOps();
+ if (SDValue NegArg =
+ TLI.getNegatedExpression(Arg, DAG, LegalOperations, CodeSize))
+ return DAG.getBitcast(OrigVT, NegArg);
return SDValue();
}
-char X86TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
- bool LegalOperations,
- bool ForCodeSize,
- unsigned Depth) const {
- // fneg patterns are removable even if they have multiple uses.
- if (isFNEG(DAG, Op.getNode(), Depth))
- return 2;
-
- // Don't recurse exponentially.
- if (Depth > SelectionDAG::MaxRecursionDepth)
- return 0;
-
- EVT VT = Op.getValueType();
- EVT SVT = VT.getScalarType();
- switch (Op.getOpcode()) {
- case ISD::FMA:
- case X86ISD::FMSUB:
- case X86ISD::FNMADD:
- case X86ISD::FNMSUB:
- case X86ISD::FMADD_RND:
- case X86ISD::FMSUB_RND:
- case X86ISD::FNMADD_RND:
- case X86ISD::FNMSUB_RND: {
- if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) ||
- !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations)
- break;
-
- // This is always negatible for free but we might be able to remove some
- // extra operand negations as well.
- for (int i = 0; i != 3; ++i) {
- char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- if (V == 2)
- return V;
- }
- return 1;
- }
- }
-
- return TargetLowering::isNegatibleForFree(Op, DAG, LegalOperations,
- ForCodeSize, Depth);
-}
-
SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOperations,
bool ForCodeSize,
+ NegatibleCost &Cost,
unsigned Depth) const {
// fneg patterns are removable even if they have multiple uses.
- if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth))
+ if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth)) {
+ Cost = NegatibleCost::Cheaper;
return DAG.getBitcast(Op.getValueType(), Arg);
+ }
EVT VT = Op.getValueType();
EVT SVT = VT.getScalarType();
@@ -42701,35 +45209,41 @@ SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
case X86ISD::FNMADD_RND:
case X86ISD::FNMSUB_RND: {
if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) ||
- !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations)
+ !(SVT == MVT::f32 || SVT == MVT::f64) ||
+ !isOperationLegal(ISD::FMA, VT))
break;
// This is always negatible for free but we might be able to remove some
// extra operand negations as well.
SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue());
- for (int i = 0; i != 3; ++i) {
- char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- if (V == 2)
- NewOps[i] = getNegatedExpression(Op.getOperand(i), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- }
+ for (int i = 0; i != 3; ++i)
+ NewOps[i] = getCheaperNegatedExpression(
+ Op.getOperand(i), DAG, LegalOperations, ForCodeSize, Depth + 1);
bool NegA = !!NewOps[0];
bool NegB = !!NewOps[1];
bool NegC = !!NewOps[2];
unsigned NewOpc = negateFMAOpcode(Opc, NegA != NegB, NegC, true);
+ Cost = (NegA || NegB || NegC) ? NegatibleCost::Cheaper
+ : NegatibleCost::Neutral;
+
// Fill in the non-negated ops with the original values.
for (int i = 0, e = Op.getNumOperands(); i != e; ++i)
if (!NewOps[i])
NewOps[i] = Op.getOperand(i);
return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps);
}
+ case X86ISD::FRCP:
+ if (SDValue NegOp0 =
+ getNegatedExpression(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Cost, Depth + 1))
+ return DAG.getNode(Opc, SDLoc(Op), VT, NegOp0);
+ break;
}
return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations,
- ForCodeSize, Depth);
+ ForCodeSize, Cost, Depth);
}
static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
@@ -42790,6 +45304,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
return Cmp;
+ if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
+ return R;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -42802,33 +45319,21 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
return FPLogic;
- return combineFneg(N, DAG, Subtarget);
+ return combineFneg(N, DAG, DCI, Subtarget);
}
static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
EVT VT = N->getValueType(0);
unsigned NumBits = VT.getSizeInBits();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
// TODO - Constant Folding.
- if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) {
- // Reduce Cst1 to the bottom 16-bits.
- // NOTE: SimplifyDemandedBits won't do this for constants.
- const APInt &Val1 = Cst1->getAPIntValue();
- APInt MaskedVal1 = Val1 & 0xFFFF;
- if (MaskedVal1 != Val1)
- return DAG.getNode(X86ISD::BEXTR, SDLoc(N), VT, Op0,
- DAG.getConstant(MaskedVal1, SDLoc(N), VT));
- }
-
- // Only bottom 16-bits of the control bits are required.
- APInt DemandedMask(APInt::getLowBitsSet(NumBits, 16));
- if (TLI.SimplifyDemandedBits(Op1, DemandedMask, DCI))
+
+ // Simplify the inputs.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedMask(APInt::getAllOnesValue(NumBits));
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
return SDValue();
@@ -42919,6 +45424,7 @@ static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG,
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
@@ -42930,7 +45436,7 @@ static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
if (isNullFPScalarOrVectorConst(N->getOperand(1)))
return N->getOperand(0);
- if (SDValue NewVal = combineFneg(N, DAG, Subtarget))
+ if (SDValue NewVal = combineFneg(N, DAG, DCI, Subtarget))
return NewVal;
return lowerX86FPLogicOp(N, DAG, Subtarget);
@@ -43041,23 +45547,16 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
assert(InVT.is128BitVector() && "Expected 128-bit input vector");
LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
- // Unless the load is volatile or atomic.
- if (LN->isSimple()) {
+ unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
+ MVT MemVT = MVT::getIntegerVT(NumBits);
+ MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
+ if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) {
SDLoc dl(N);
- unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
- MVT MemVT = MVT::getIntegerVT(NumBits);
- MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
- SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
- SDValue VZLoad =
- DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT,
- LN->getPointerInfo(),
- LN->getAlignment(),
- LN->getMemOperand()->getFlags());
SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
DAG.getBitcast(InVT, VZLoad));
DCI.CombineTo(N, Convert);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
return SDValue(N, 0);
}
}
@@ -43067,33 +45566,33 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
- // FIXME: Handle strict fp nodes.
+ bool IsStrict = N->isTargetStrictFPOpcode();
EVT VT = N->getValueType(0);
// Convert a full vector load into vzload when not all bits are needed.
- SDValue In = N->getOperand(0);
+ SDValue In = N->getOperand(IsStrict ? 1 : 0);
MVT InVT = In.getSimpleValueType();
if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
assert(InVT.is128BitVector() && "Expected 128-bit input vector");
LoadSDNode *LN = cast<LoadSDNode>(In);
- // Unless the load is volatile or atomic.
- if (LN->isSimple()) {
+ unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
+ MVT MemVT = MVT::getFloatingPointVT(NumBits);
+ MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
+ if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) {
SDLoc dl(N);
- unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
- MVT MemVT = MVT::getFloatingPointVT(NumBits);
- MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
- SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
- SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
- SDValue VZLoad =
- DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT,
- LN->getPointerInfo(),
- LN->getAlignment(),
- LN->getMemOperand()->getFlags());
- SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
- DAG.getBitcast(InVT, VZLoad));
- DCI.CombineTo(N, Convert);
+ if (IsStrict) {
+ SDValue Convert =
+ DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other},
+ {N->getOperand(0), DAG.getBitcast(InVT, VZLoad)});
+ DCI.CombineTo(N, Convert, Convert.getValue(1));
+ } else {
+ SDValue Convert =
+ DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad));
+ DCI.CombineTo(N, Convert);
+ }
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
return SDValue(N, 0);
}
}
@@ -43132,14 +45631,58 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
- SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// BT ignores high bits in the bit index operand.
unsigned BitWidth = N1.getValueSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
- if (SDValue DemandedN1 = DAG.GetDemandedBits(N1, DemandedMask))
- return DAG.getNode(X86ISD::BT, SDLoc(N), MVT::i32, N0, DemandedN1);
+ if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(N1, DemandedMask, DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+
+ return SDValue();
+}
+
+static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ bool IsStrict = N->getOpcode() == X86ISD::STRICT_CVTPH2PS;
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+
+ if (N->getValueType(0) == MVT::v4f32 && Src.getValueType() == MVT::v8i16) {
+ APInt KnownUndef, KnownZero;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedElts = APInt::getLowBitsSet(8, 4);
+ if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
+ DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+
+ // Convert a full vector load into vzload when not all bits are needed.
+ if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) {
+ LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(IsStrict ? 1 : 0));
+ if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::i64, MVT::v2i64, DAG)) {
+ SDLoc dl(N);
+ if (IsStrict) {
+ SDValue Convert = DAG.getNode(
+ N->getOpcode(), dl, {MVT::v4f32, MVT::Other},
+ {N->getOperand(0), DAG.getBitcast(MVT::v8i16, VZLoad)});
+ DCI.CombineTo(N, Convert, Convert.getValue(1));
+ } else {
+ SDValue Convert = DAG.getNode(N->getOpcode(), dl, MVT::v4f32,
+ DAG.getBitcast(MVT::v8i16, VZLoad));
+ DCI.CombineTo(N, Convert);
+ }
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ return SDValue(N, 0);
+ }
+ }
+ }
return SDValue();
}
@@ -43225,7 +45768,7 @@ static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
//(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
// (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
- N0.getOpcode() == ISD::SIGN_EXTEND)) {
+ N0.getOpcode() == ISD::SIGN_EXTEND)) {
SDValue N00 = N0.getOperand(0);
// EXTLOAD has a better solution on AVX2,
@@ -43234,9 +45777,14 @@ static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
if (!ISD::isNormalLoad(N00.getNode()))
return SDValue();
+ // Attempt to promote any comparison mask ops before moving the
+ // SIGN_EXTEND_INREG in the way.
+ if (SDValue Promote = PromoteMaskArithmetic(N0.getNode(), DAG, Subtarget))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Promote, N1);
+
if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
- SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
- N00, N1);
+ SDValue Tmp =
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, N00, N1);
return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
}
}
@@ -43421,6 +45969,21 @@ combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG,
for (unsigned i = 0; i != Scale; ++i)
ShuffleMask.append(EltSizeInBits, i);
+ Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
+ } else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits &&
+ (SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) {
+ // If we have register broadcast instructions, use the scalar size as the
+ // element type for the shuffle. Then cast to the wider element type. The
+ // widened bits won't be used, and this might allow the use of a broadcast
+ // load.
+ assert((EltSizeInBits % NumElts) == 0 && "Unexpected integer scale");
+ unsigned Scale = EltSizeInBits / NumElts;
+ EVT BroadcastVT =
+ EVT::getVectorVT(*DAG.getContext(), SclVT, NumElts * Scale);
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
+ ShuffleMask.append(NumElts * Scale, 0);
+ Vec = DAG.getVectorShuffle(BroadcastVT, DL, Vec, Vec, ShuffleMask);
+ Vec = DAG.getBitcast(VT, Vec);
} else {
// For smaller scalar integers, we can simply any-extend it to the vector
// element size (we don't care about the upper bits) and broadcast it to all
@@ -43428,8 +45991,8 @@ combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG,
SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT);
Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl);
ShuffleMask.append(NumElts, 0);
+ Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
}
- Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
// Now, mask the relevant bit in each element.
SmallVector<SDValue, 32> Bits;
@@ -43474,7 +46037,7 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
// We can only do this if the vector size in 256 bits or less.
unsigned Size = VT.getSizeInBits();
- if (Size > 256)
+ if (Size > 256 && Subtarget.useAVX512Regs())
return SDValue();
// Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since
@@ -43492,7 +46055,7 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
if (N->getOpcode() == ISD::ZERO_EXTEND)
- Res = DAG.getZeroExtendInReg(Res, dl, N0.getValueType().getScalarType());
+ Res = DAG.getZeroExtendInReg(Res, dl, N0.getValueType());
return Res;
}
@@ -43505,6 +46068,23 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
EVT InVT = N0.getValueType();
SDLoc DL(N);
+ // (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
+ if (!DCI.isBeforeLegalizeOps() &&
+ N0.getOpcode() == X86ISD::SETCC_CARRY) {
+ SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0),
+ N0->getOperand(1));
+ bool ReplaceOtherUses = !N0.hasOneUse();
+ DCI.CombineTo(N, Setcc);
+ // Replace other uses with a truncate of the widened setcc_carry.
+ if (ReplaceOtherUses) {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), Setcc);
+ DCI.CombineTo(N0.getNode(), Trunc);
+ }
+
+ return SDValue(N, 0);
+ }
+
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
return NewCMov;
@@ -43542,6 +46122,7 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
+ bool IsStrict = N->isStrictFPOpcode() || N->isTargetStrictFPOpcode();
// Let legalize expand this if it isn't a legal type yet.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -43552,15 +46133,16 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA())
return SDValue();
- SDValue A = N->getOperand(0);
- SDValue B = N->getOperand(1);
- SDValue C = N->getOperand(2);
+ SDValue A = N->getOperand(IsStrict ? 1 : 0);
+ SDValue B = N->getOperand(IsStrict ? 2 : 1);
+ SDValue C = N->getOperand(IsStrict ? 3 : 2);
auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
bool LegalOperations = !DCI.isBeforeLegalizeOps();
- if (TLI.isNegatibleForFree(V, DAG, LegalOperations, CodeSize) == 2) {
- V = TLI.getNegatedExpression(V, DAG, LegalOperations, CodeSize);
+ if (SDValue NegV = TLI.getCheaperNegatedExpression(V, DAG, LegalOperations,
+ CodeSize)) {
+ V = NegV;
return true;
}
// Look through extract_vector_elts. If it comes from an FNEG, create a
@@ -43568,11 +46150,10 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isNullConstant(V.getOperand(1))) {
SDValue Vec = V.getOperand(0);
- if (TLI.isNegatibleForFree(Vec, DAG, LegalOperations, CodeSize) == 2) {
- SDValue NegVal =
- TLI.getNegatedExpression(Vec, DAG, LegalOperations, CodeSize);
+ if (SDValue NegV = TLI.getCheaperNegatedExpression(
+ Vec, DAG, LegalOperations, CodeSize)) {
V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(),
- NegVal, V.getOperand(1));
+ NegV, V.getOperand(1));
return true;
}
}
@@ -43592,9 +46173,15 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
unsigned NewOpcode =
negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC, false);
- if (N->getNumOperands() == 4)
- return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3));
- return DAG.getNode(NewOpcode, dl, VT, A, B, C);
+ if (IsStrict) {
+ assert(N->getNumOperands() == 4 && "Shouldn't be greater than 4");
+ return DAG.getNode(NewOpcode, dl, {VT, MVT::Other},
+ {N->getOperand(0), A, B, C});
+ } else {
+ if (N->getNumOperands() == 4)
+ return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3));
+ return DAG.getNode(NewOpcode, dl, VT, A, B, C);
+ }
}
// Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C)
@@ -43608,10 +46195,11 @@ static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
bool LegalOperations = !DCI.isBeforeLegalizeOps();
SDValue N2 = N->getOperand(2);
- if (TLI.isNegatibleForFree(N2, DAG, LegalOperations, CodeSize) != 2)
- return SDValue();
- SDValue NegN2 = TLI.getNegatedExpression(N2, DAG, LegalOperations, CodeSize);
+ SDValue NegN2 =
+ TLI.getCheaperNegatedExpression(N2, DAG, LegalOperations, CodeSize);
+ if (!NegN2)
+ return SDValue();
unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false);
if (N->getNumOperands() == 4)
@@ -43624,38 +46212,26 @@ static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
- // (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
- // (and (i32 x86isd::setcc_carry), 1)
- // This eliminates the zext. This transformation is necessary because
- // ISD::SETCC is always legalized to i8.
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (N0.getOpcode() == ISD::AND &&
- N0.hasOneUse() &&
- N0.getOperand(0).hasOneUse()) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
- if (!isOneConstant(N0.getOperand(1)))
- return SDValue();
- return DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
- N00.getOperand(0), N00.getOperand(1)),
- DAG.getConstant(1, dl, VT));
+ // (i32 (aext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))
+ // FIXME: Is this needed? We don't seem to have any tests for it.
+ if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND &&
+ N0.getOpcode() == X86ISD::SETCC_CARRY) {
+ SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0),
+ N0->getOperand(1));
+ bool ReplaceOtherUses = !N0.hasOneUse();
+ DCI.CombineTo(N, Setcc);
+ // Replace other uses with a truncate of the widened setcc_carry.
+ if (ReplaceOtherUses) {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), Setcc);
+ DCI.CombineTo(N0.getNode(), Trunc);
}
- }
- if (N0.getOpcode() == ISD::TRUNCATE &&
- N0.hasOneUse() &&
- N0.getOperand(0).hasOneUse()) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
- return DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
- N00.getOperand(0), N00.getOperand(1)),
- DAG.getConstant(1, dl, VT));
- }
+ return SDValue(N, 0);
}
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
@@ -43768,13 +46344,12 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
EVT VT = SetCC->getValueType(0);
SDLoc DL(SetCC);
- bool HasAVX = Subtarget.hasAVX();
// Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.
// Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.
// Otherwise use PCMPEQ (plus AND) and mask testing.
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
- (OpSize == 256 && HasAVX) ||
+ (OpSize == 256 && Subtarget.hasAVX()) ||
(OpSize == 512 && Subtarget.useAVX512Regs())) {
bool HasPT = Subtarget.hasSSE41();
@@ -43828,11 +46403,9 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
X = DAG.getBitcast(TmpCastVT, X);
if (!NeedZExt && !TmpZext)
return X;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MVT VecIdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT,
DAG.getConstant(0, DL, VecVT), X,
- DAG.getConstant(0, DL, VecIdxVT));
+ DAG.getVectorIdxConstant(0, DL));
};
SDValue Cmp;
@@ -43865,17 +46438,16 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
Cmp);
SDValue PT = DAG.getNode(X86ISD::PTEST, DL, MVT::i32, BCCmp, BCCmp);
X86::CondCode X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
- SDValue SetCC = getSETCC(X86CC, PT, DL, DAG);
- return DAG.getNode(ISD::TRUNCATE, DL, VT, SetCC.getValue(0));
+ SDValue X86SetCC = getSETCC(X86CC, PT, DL, DAG);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, X86SetCC.getValue(0));
}
// If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
// setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
// setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
- // setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq
- // setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne
+ assert(Cmp.getValueType() == MVT::v16i8 &&
+ "Non 128-bit vector on pre-SSE41 target");
SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
- SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL,
- MVT::i32);
+ SDValue FFFFs = DAG.getConstant(0xFFFF, DL, MVT::i32);
return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC);
}
@@ -43892,23 +46464,16 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
if (CC == ISD::SETNE || CC == ISD::SETEQ) {
- // 0-x == y --> x+y == 0
- // 0-x != y --> x+y != 0
- if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
- LHS.hasOneUse()) {
- SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, RHS, LHS.getOperand(1));
- return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
- }
- // x == 0-y --> x+y == 0
- // x != 0-y --> x+y != 0
- if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
- RHS.hasOneUse()) {
- SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
- return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
- }
-
if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget))
return V;
+
+ if (VT == MVT::i1 && isNullConstant(RHS)) {
+ SDValue X86CC;
+ if (SDValue V =
+ MatchVectorAllZeroTest(LHS, CC, DL, Subtarget, DAG, X86CC))
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getNode(X86ISD::SETCC, DL, MVT::i8, X86CC, V));
+ }
}
if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
@@ -43931,7 +46496,7 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
if (IsSEXT0 && IsVZero1) {
assert(VT == Op0.getOperand(0).getValueType() &&
- "Uexpected operand type");
+ "Unexpected operand type");
if (TmpCC == ISD::SETGT)
return DAG.getConstant(0, DL, VT);
if (TmpCC == ISD::SETLE)
@@ -44021,20 +46586,43 @@ static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG,
if (Mask.getScalarValueSizeInBits() != 1) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
- if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
+ if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
return SDValue(N, 0);
+ }
}
return SDValue();
}
+static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS,
+ SDValue Index, SDValue Base, SDValue Scale,
+ SelectionDAG &DAG) {
+ SDLoc DL(GorS);
+
+ if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
+ SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(),
+ Gather->getMask(), Base, Index, Scale } ;
+ return DAG.getMaskedGather(Gather->getVTList(),
+ Gather->getMemoryVT(), DL, Ops,
+ Gather->getMemOperand(),
+ Gather->getIndexType());
+ }
+ auto *Scatter = cast<MaskedScatterSDNode>(GorS);
+ SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(),
+ Scatter->getMask(), Base, Index, Scale };
+ return DAG.getMaskedScatter(Scatter->getVTList(),
+ Scatter->getMemoryVT(), DL,
+ Ops, Scatter->getMemOperand(),
+ Scatter->getIndexType());
+}
+
static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
SDLoc DL(N);
auto *GorS = cast<MaskedGatherScatterSDNode>(N);
- SDValue Chain = GorS->getChain();
SDValue Index = GorS->getIndex();
- SDValue Mask = GorS->getMask();
SDValue Base = GorS->getBasePtr();
SDValue Scale = GorS->getScale();
@@ -44054,21 +46642,7 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
unsigned NumElts = Index.getValueType().getVectorNumElements();
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index);
- if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
- SDValue Ops[] = { Chain, Gather->getPassThru(),
- Mask, Base, Index, Scale } ;
- return DAG.getMaskedGather(Gather->getVTList(),
- Gather->getMemoryVT(), DL, Ops,
- Gather->getMemOperand(),
- Gather->getIndexType());
- }
- auto *Scatter = cast<MaskedScatterSDNode>(GorS);
- SDValue Ops[] = { Chain, Scatter->getValue(),
- Mask, Base, Index, Scale };
- return DAG.getMaskedScatter(Scatter->getVTList(),
- Scatter->getMemoryVT(), DL,
- Ops, Scatter->getMemOperand(),
- Scatter->getIndexType());
+ return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
}
}
@@ -44083,21 +46657,7 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
unsigned NumElts = Index.getValueType().getVectorNumElements();
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index);
- if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
- SDValue Ops[] = { Chain, Gather->getPassThru(),
- Mask, Base, Index, Scale } ;
- return DAG.getMaskedGather(Gather->getVTList(),
- Gather->getMemoryVT(), DL, Ops,
- Gather->getMemOperand(),
- Gather->getIndexType());
- }
- auto *Scatter = cast<MaskedScatterSDNode>(GorS);
- SDValue Ops[] = { Chain, Scatter->getValue(),
- Mask, Base, Index, Scale };
- return DAG.getMaskedScatter(Scatter->getVTList(),
- Scatter->getMemoryVT(), DL,
- Ops, Scatter->getMemOperand(),
- Scatter->getIndexType());
+ return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
}
}
@@ -44110,30 +46670,20 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
Index.getValueType().getVectorNumElements());
Index = DAG.getSExtOrTrunc(Index, DL, IndexVT);
- if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) {
- SDValue Ops[] = { Chain, Gather->getPassThru(),
- Mask, Base, Index, Scale } ;
- return DAG.getMaskedGather(Gather->getVTList(),
- Gather->getMemoryVT(), DL, Ops,
- Gather->getMemOperand(),
- Gather->getIndexType());
- }
- auto *Scatter = cast<MaskedScatterSDNode>(GorS);
- SDValue Ops[] = { Chain, Scatter->getValue(),
- Mask, Base, Index, Scale };
- return DAG.getMaskedScatter(Scatter->getVTList(),
- Scatter->getMemoryVT(), DL,
- Ops, Scatter->getMemOperand(),
- Scatter->getIndexType());
+ return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
}
}
// With vector masks we only demand the upper bit of the mask.
+ SDValue Mask = GorS->getMask();
if (Mask.getScalarValueSizeInBits() != 1) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
- if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
+ if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
return SDValue(N, 0);
+ }
}
return SDValue();
@@ -44172,10 +46722,11 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// TODO: Could we move this to DAGCombine?
static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
SelectionDAG &DAG) {
- // Take advantage of vector comparisons producing 0 or -1 in each lane to
- // optimize away operation when it's from a constant.
+ // Take advantage of vector comparisons (etc.) producing 0 or -1 in each lane
+ // to optimize away operation when it's from a constant.
//
// The general transformation is:
// UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
@@ -44187,9 +46738,10 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
// aren't the same.
EVT VT = N->getValueType(0);
bool IsStrict = N->isStrictFPOpcode();
+ unsigned NumEltBits = VT.getScalarSizeInBits();
SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
- if (!VT.isVector() || Op0->getOpcode() != ISD::AND ||
- Op0->getOperand(0)->getOpcode() != ISD::SETCC ||
+ if (!VT.isVector() || Op0.getOpcode() != ISD::AND ||
+ DAG.ComputeNumSignBits(Op0.getOperand(0)) != NumEltBits ||
VT.getSizeInBits() != Op0.getValueSizeInBits())
return SDValue();
@@ -44362,7 +46914,6 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.useSoftFloat() && Subtarget.hasX87() &&
Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
- EVT LdVT = Ld->getValueType(0);
// This transformation is not supported if the result type is f16 or f128.
if (VT == MVT::f16 || VT == MVT::f128)
@@ -44373,11 +46924,12 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasDQI() && VT != MVT::f80)
return SDValue();
- if (Ld->isSimple() && !VT.isVector() &&
- ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
- !Subtarget.is64Bit() && LdVT == MVT::i64) {
- std::pair<SDValue, SDValue> Tmp = Subtarget.getTargetLowering()->BuildFILD(
- SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG);
+ if (Ld->isSimple() && !VT.isVector() && ISD::isNormalLoad(Op0.getNode()) &&
+ Op0.hasOneUse() && !Subtarget.is64Bit() && InVT == MVT::i64) {
+ std::pair<SDValue, SDValue> Tmp =
+ Subtarget.getTargetLowering()->BuildFILD(
+ VT, InVT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->getOriginalAlign(), DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Tmp.second);
return Tmp.first;
}
@@ -44711,7 +47263,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
}
if (CC == X86::COND_A) {
- SDValue EFLAGS = Y->getOperand(1);
+ SDValue EFLAGS = Y.getOperand(1);
// Try to convert COND_A into COND_B in an attempt to facilitate
// materializing "setb reg".
//
@@ -44724,13 +47276,44 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
EFLAGS.getNode()->getVTList(),
EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
+ SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
DAG.getVTList(VT, MVT::i32), X,
DAG.getConstant(0, DL, VT), NewEFLAGS);
}
}
+ if (CC == X86::COND_AE) {
+ // X + SETAE --> sbb X, -1
+ // X - SETAE --> adc X, -1
+ return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(-1, DL, VT), Y.getOperand(1));
+ }
+
+ if (CC == X86::COND_BE) {
+ // X + SETBE --> sbb X, -1
+ // X - SETBE --> adc X, -1
+ SDValue EFLAGS = Y.getOperand(1);
+ // Try to convert COND_BE into COND_AE in an attempt to facilitate
+ // materializing "setae reg".
+ //
+ // Do not flip "e <= c", where "c" is a constant, because Cmp instruction
+ // cannot take an immediate as its first operand.
+ //
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ SDValue NewSub = DAG.getNode(
+ X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
+ return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(-1, DL, VT), NewEFLAGS);
+ }
+ }
+
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
@@ -44767,15 +47350,18 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) ||
(!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) {
SDValue One = DAG.getConstant(1, DL, ZVT);
- SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One);
+ SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
+ SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Cmp1);
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
+ Cmp1.getValue(1));
}
}
// (cmp Z, 1) sets the carry flag if Z is 0.
SDValue One = DAG.getConstant(1, DL, ZVT);
- SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One);
+ SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
+ SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
// Add the flags type for ADC/SBB nodes.
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
@@ -44784,151 +47370,12 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
// X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
if (CC == X86::COND_NE)
return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
- DAG.getConstant(-1ULL, DL, VT), Cmp1);
+ DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1));
// X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
// X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
- DAG.getConstant(0, DL, VT), Cmp1);
-}
-
-static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- if (!Subtarget.hasSSE2())
- return SDValue();
-
- EVT VT = N->getValueType(0);
-
- // If the vector size is less than 128, or greater than the supported RegSize,
- // do not use PMADD.
- if (!VT.isVector() || VT.getVectorNumElements() < 8)
- return SDValue();
-
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
-
- auto UsePMADDWD = [&](SDValue Op) {
- ShrinkMode Mode;
- return Op.getOpcode() == ISD::MUL &&
- canReduceVMulWidth(Op.getNode(), DAG, Mode) &&
- Mode != ShrinkMode::MULU16 &&
- (!Subtarget.hasSSE41() ||
- (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
- Op->isOnlyUserOf(Op.getOperand(1).getNode())));
- };
-
- SDValue MulOp, OtherOp;
- if (UsePMADDWD(Op0)) {
- MulOp = Op0;
- OtherOp = Op1;
- } else if (UsePMADDWD(Op1)) {
- MulOp = Op1;
- OtherOp = Op0;
- } else
- return SDValue();
-
- SDLoc DL(N);
- EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
- VT.getVectorNumElements());
- EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
- VT.getVectorNumElements() / 2);
-
- // Shrink the operands of mul.
- SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(0));
- SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1));
-
- // Madd vector size is half of the original vector size
- auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
- MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
- return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops);
- };
- SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 },
- PMADDWDBuilder);
- // Fill the rest of the output with 0
- SDValue Zero = DAG.getConstant(0, DL, Madd.getSimpleValueType());
- SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, Zero);
-
- // Preserve the reduction flag on the ADD. We may need to revisit for the
- // other operand.
- SDNodeFlags Flags;
- Flags.setVectorReduction(true);
- return DAG.getNode(ISD::ADD, DL, VT, Concat, OtherOp, Flags);
-}
-
-static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- if (!Subtarget.hasSSE2())
- return SDValue();
-
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
- // TODO: There's nothing special about i32, any integer type above i16 should
- // work just as well.
- if (!VT.isVector() || !VT.isSimple() ||
- !(VT.getVectorElementType() == MVT::i32))
- return SDValue();
-
- unsigned RegSize = 128;
- if (Subtarget.useBWIRegs())
- RegSize = 512;
- else if (Subtarget.hasAVX())
- RegSize = 256;
-
- // We only handle v16i32 for SSE2 / v32i32 for AVX / v64i32 for AVX512.
- // TODO: We should be able to handle larger vectors by splitting them before
- // feeding them into several SADs, and then reducing over those.
- if (VT.getSizeInBits() / 4 > RegSize)
- return SDValue();
-
- // We know N is a reduction add. To match SAD, we need one of the operands to
- // be an ABS.
- SDValue AbsOp = N->getOperand(0);
- SDValue OtherOp = N->getOperand(1);
- if (AbsOp.getOpcode() != ISD::ABS)
- std::swap(AbsOp, OtherOp);
- if (AbsOp.getOpcode() != ISD::ABS)
- return SDValue();
-
- // Check whether we have an abs-diff pattern feeding into the select.
- SDValue SadOp0, SadOp1;
- if(!detectZextAbsDiff(AbsOp, SadOp0, SadOp1))
- return SDValue();
-
- // SAD pattern detected. Now build a SAD instruction and an addition for
- // reduction. Note that the number of elements of the result of SAD is less
- // than the number of elements of its input. Therefore, we could only update
- // part of elements in the reduction vector.
- SDValue Sad = createPSADBW(DAG, SadOp0, SadOp1, DL, Subtarget);
-
- // The output of PSADBW is a vector of i64.
- // We need to turn the vector of i64 into a vector of i32.
- // If the reduction vector is at least as wide as the psadbw result, just
- // bitcast. If it's narrower which can only occur for v2i32, bits 127:16 of
- // the PSADBW will be zero. If we promote/ narrow vectors, truncate the v2i64
- // result to v2i32 which will be removed by type legalization. If we/ widen
- // narrow vectors then we bitcast to v4i32 and extract v2i32.
- MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
- Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
-
- if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
- // Fill the upper elements with zero to match the add width.
- assert(VT.getSizeInBits() % ResVT.getSizeInBits() == 0 && "Unexpected VTs");
- unsigned NumConcats = VT.getSizeInBits() / ResVT.getSizeInBits();
- SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, DL, ResVT));
- Ops[0] = Sad;
- Sad = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
- } else if (VT.getSizeInBits() < ResVT.getSizeInBits()) {
- Sad = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Sad,
- DAG.getIntPtrConstant(0, DL));
- }
-
- // Preserve the reduction flag on the ADD. We may need to revisit for the
- // other operand.
- SDNodeFlags Flags;
- Flags.setVectorReduction(true);
- return DAG.getNode(ISD::ADD, DL, VT, Sad, OtherOp, Flags);
+ DAG.getConstant(0, DL, VT), Cmp1.getValue(1));
}
static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
@@ -45020,30 +47467,25 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
Mode == ShrinkMode::MULU16)
return SDValue();
+ EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
+ VT.getVectorNumElements() * 2);
+ SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(1));
+
auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
- // Shrink by adding truncate nodes and let DAGCombine fold with the
- // sources.
EVT InVT = Ops[0].getValueType();
- assert(InVT.getScalarType() == MVT::i32 &&
- "Unexpected scalar element type");
assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
InVT.getVectorNumElements() / 2);
- EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
- InVT.getVectorNumElements());
- return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT,
- DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[0]),
- DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Ops[1]));
+ return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]);
};
- return SplitOpsAndApply(DAG, Subtarget, DL, VT,
- { Mul.getOperand(0), Mul.getOperand(1) },
- PMADDBuilder);
+ return SplitOpsAndApply(DAG, Subtarget, DL, VT, { N0, N1 }, PMADDBuilder);
}
// Attempt to turn this pattern into PMADDWD.
-// (mul (add (sext (build_vector)), (sext (build_vector))),
-// (add (sext (build_vector)), (sext (build_vector)))
+// (add (mul (sext (build_vector)), (sext (build_vector))),
+// (mul (sext (build_vector)), (sext (build_vector)))
static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
const SDLoc &DL, EVT VT,
const X86Subtarget &Subtarget) {
@@ -45165,13 +47607,6 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
- const SDNodeFlags Flags = N->getFlags();
- if (Flags.hasVectorReduction()) {
- if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget))
- return Sad;
- if (SDValue MAdd = combineLoopMAddPattern(N, DAG, Subtarget))
- return MAdd;
- }
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
@@ -45262,6 +47697,38 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
SubusRHS = MinLHS;
else
return SDValue();
+ } else if (Op1.getOpcode() == ISD::TRUNCATE &&
+ Op1.getOperand(0).getOpcode() == ISD::UMIN &&
+ (EltVT == MVT::i8 || EltVT == MVT::i16)) {
+ // Special case where the UMIN has been truncated. Try to push the truncate
+ // further up. This is similar to the i32/i64 special processing.
+ SubusLHS = Op0;
+ SDValue MinLHS = Op1.getOperand(0).getOperand(0);
+ SDValue MinRHS = Op1.getOperand(0).getOperand(1);
+ EVT TruncVT = Op1.getOperand(0).getValueType();
+ if (!(Subtarget.hasSSSE3() && (TruncVT == MVT::v8i32 ||
+ TruncVT == MVT::v8i64)) &&
+ !(Subtarget.useBWIRegs() && (TruncVT == MVT::v16i32)))
+ return SDValue();
+ SDValue OpToSaturate;
+ if (MinLHS.getOpcode() == ISD::ZERO_EXTEND &&
+ MinLHS.getOperand(0) == Op0)
+ OpToSaturate = MinRHS;
+ else if (MinRHS.getOpcode() == ISD::ZERO_EXTEND &&
+ MinRHS.getOperand(0) == Op0)
+ OpToSaturate = MinLHS;
+ else
+ return SDValue();
+
+ // Saturate the non-extended input and then truncate it.
+ SDLoc DL(N);
+ SDValue SaturationConst =
+ DAG.getConstant(APInt::getLowBitsSet(TruncVT.getScalarSizeInBits(),
+ VT.getScalarSizeInBits()),
+ DL, TruncVT);
+ SDValue UMin = DAG.getNode(ISD::UMIN, DL, TruncVT, OpToSaturate,
+ SaturationConst);
+ SubusRHS = DAG.getNode(ISD::TRUNCATE, DL, VT, UMin);
} else
return SDValue();
@@ -45376,6 +47843,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors");
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
return DAG.getUNDEF(VT);
@@ -45386,6 +47854,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
return getZeroVector(VT, Subtarget, DAG, DL);
SDValue Op0 = Ops[0];
+ bool IsSplat = llvm::all_of(Ops, [&Op0](SDValue Op) { return Op == Op0; });
// Fold subvector loads into one.
// If needed, look through bitcasts to get to the load.
@@ -45402,13 +47871,28 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
// Repeated subvectors.
- if (llvm::all_of(Ops, [Op0](SDValue Op) { return Op == Op0; })) {
+ if (IsSplat) {
// If this broadcast/subv_broadcast is inserted into both halves, use a
// larger broadcast/subv_broadcast.
if (Op0.getOpcode() == X86ISD::VBROADCAST ||
Op0.getOpcode() == X86ISD::SUBV_BROADCAST)
return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0));
+ // If this broadcast_load is inserted into both halves, use a larger
+ // broadcast_load. Update other uses to use an extracted subvector.
+ if (Op0.getOpcode() == X86ISD::VBROADCAST_LOAD) {
+ auto *MemIntr = cast<MemIntrinsicSDNode>(Op0);
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = {MemIntr->getChain(), MemIntr->getBasePtr()};
+ SDValue BcastLd = DAG.getMemIntrinsicNode(
+ X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MemIntr->getMemoryVT(),
+ MemIntr->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(
+ Op0, extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits()));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
+ return BcastLd;
+ }
+
// concat_vectors(movddup(x),movddup(x)) -> broadcast(x)
if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&
(Subtarget.hasAVX2() || MayFoldLoad(Op0.getOperand(0))))
@@ -45420,12 +47904,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
// concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x)
if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR &&
(Subtarget.hasAVX2() ||
- (VT.getScalarSizeInBits() >= 32 && MayFoldLoad(Op0.getOperand(0)))) &&
+ (EltSizeInBits >= 32 && MayFoldLoad(Op0.getOperand(0)))) &&
Op0.getOperand(0).getValueType() == VT.getScalarType())
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0));
- }
- bool IsSplat = llvm::all_of(Ops, [&Op0](SDValue Op) { return Op == Op0; });
+ // concat_vectors(extract_subvector(broadcast(x)),
+ // extract_subvector(broadcast(x))) -> broadcast(x)
+ if (Op0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Op0.getOperand(0).getValueType() == VT) {
+ if (Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST ||
+ Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD)
+ return Op0.getOperand(0);
+ }
+ }
// Repeated opcode.
// TODO - combineX86ShufflesRecursively should handle shuffle concatenation
@@ -45435,6 +47926,24 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
})) {
unsigned NumOps = Ops.size();
switch (Op0.getOpcode()) {
+ case X86ISD::SHUFP: {
+ // Add SHUFPD support if/when necessary.
+ if (!IsSplat && VT.getScalarType() == MVT::f32 &&
+ llvm::all_of(Ops, [Op0](SDValue Op) {
+ return Op.getOperand(2) == Op0.getOperand(2);
+ })) {
+ SmallVector<SDValue, 2> LHS, RHS;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ LHS.push_back(Ops[i].getOperand(0));
+ RHS.push_back(Ops[i].getOperand(1));
+ }
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS),
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, RHS),
+ Op0.getOperand(2));
+ }
+ break;
+ }
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFD:
@@ -45461,8 +47970,42 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
return DAG.getBitcast(VT, Res);
}
break;
+ case X86ISD::VSHLI:
+ case X86ISD::VSRAI:
+ case X86ISD::VSRLI:
+ if (((VT.is256BitVector() && Subtarget.hasInt256()) ||
+ (VT.is512BitVector() && Subtarget.useAVX512Regs() &&
+ (EltSizeInBits >= 32 || Subtarget.useBWIRegs()))) &&
+ llvm::all_of(Ops, [Op0](SDValue Op) {
+ return Op0.getOperand(1) == Op.getOperand(1);
+ })) {
+ SmallVector<SDValue, 2> Src;
+ for (unsigned i = 0; i != NumOps; ++i)
+ Src.push_back(Ops[i].getOperand(0));
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Src),
+ Op0.getOperand(1));
+ }
+ break;
+ case X86ISD::VPERMI:
+ case X86ISD::VROTLI:
+ case X86ISD::VROTRI:
+ if (VT.is512BitVector() && Subtarget.useAVX512Regs() &&
+ llvm::all_of(Ops, [Op0](SDValue Op) {
+ return Op0.getOperand(1) == Op.getOperand(1);
+ })) {
+ SmallVector<SDValue, 2> Src;
+ for (unsigned i = 0; i != NumOps; ++i)
+ Src.push_back(Ops[i].getOperand(0));
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Src),
+ Op0.getOperand(1));
+ }
+ break;
+ case X86ISD::PACKSS:
case X86ISD::PACKUS:
- if (NumOps == 2 && VT.is256BitVector() && Subtarget.hasInt256()) {
+ if (!IsSplat && NumOps == 2 && VT.is256BitVector() &&
+ Subtarget.hasInt256()) {
SmallVector<SDValue, 2> LHS, RHS;
for (unsigned i = 0; i != NumOps; ++i) {
LHS.push_back(Ops[i].getOperand(0));
@@ -45476,6 +48019,24 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, RHS));
}
break;
+ case X86ISD::PALIGNR:
+ if (!IsSplat &&
+ ((VT.is256BitVector() && Subtarget.hasInt256()) ||
+ (VT.is512BitVector() && Subtarget.useBWIRegs())) &&
+ llvm::all_of(Ops, [Op0](SDValue Op) {
+ return Op0.getOperand(2) == Op.getOperand(2);
+ })) {
+ SmallVector<SDValue, 2> LHS, RHS;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ LHS.push_back(Ops[i].getOperand(0));
+ RHS.push_back(Ops[i].getOperand(1));
+ }
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS),
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, RHS),
+ Op0.getOperand(2));
+ }
+ break;
}
}
@@ -45565,7 +48126,8 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
// if the insert or extract can be represented with a subregister operation.
if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
SubVec.getOperand(0).getSimpleValueType() == OpVT &&
- (IdxVal != 0 || !Vec.isUndef())) {
+ (IdxVal != 0 ||
+ !(Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())))) {
int ExtIdxVal = SubVec.getConstantOperandVal(1);
if (ExtIdxVal != 0) {
int VecNumElts = OpVT.getVectorNumElements();
@@ -45654,7 +48216,7 @@ static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) {
unsigned SelElts = SelVT.getVectorNumElements();
unsigned CastedElts = WideVT.getVectorNumElements();
- unsigned ExtIdx = cast<ConstantSDNode>(Ext->getOperand(1))->getZExtValue();
+ unsigned ExtIdx = Ext->getConstantOperandVal(1);
if (SelElts % CastedElts == 0) {
// The select has the same or more (narrower) elements than the extract
// operand. The extraction index gets scaled by that factor.
@@ -45699,6 +48261,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
MVT VT = N->getSimpleValueType(0);
SDValue InVec = N->getOperand(0);
+ unsigned IdxVal = N->getConstantOperandVal(1);
SDValue InVecBC = peekThroughBitcasts(InVec);
EVT InVecVT = InVec.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -45716,7 +48279,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
if (isConcatenatedNot(InVecBC.getOperand(0)) ||
isConcatenatedNot(InVecBC.getOperand(1))) {
// extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1
- SDValue Concat = split256IntArith(InVecBC, DAG);
+ SDValue Concat = splitVectorIntBinary(InVecBC, DAG);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
DAG.getBitcast(InVecVT, Concat), N->getOperand(1));
}
@@ -45728,8 +48291,6 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
if (SDValue V = narrowExtractedVectorSelect(N, DAG))
return V;
- unsigned IdxVal = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
-
if (ISD::isBuildVectorAllZeros(InVec.getNode()))
return getZeroVector(VT, Subtarget, DAG, SDLoc(N));
@@ -45779,6 +48340,43 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
}
}
+ // If we're extracting an upper subvector from a broadcast we should just
+ // extract the lowest subvector instead which should allow
+ // SimplifyDemandedVectorElts do more simplifications.
+ if (IdxVal != 0 && (InVec.getOpcode() == X86ISD::VBROADCAST ||
+ InVec.getOpcode() == X86ISD::VBROADCAST_LOAD))
+ return extractSubVector(InVec, 0, DAG, SDLoc(N), VT.getSizeInBits());
+
+ // If we're extracting a broadcasted subvector, just use the source.
+ if (InVec.getOpcode() == X86ISD::SUBV_BROADCAST &&
+ InVec.getOperand(0).getValueType() == VT)
+ return InVec.getOperand(0);
+
+ // Attempt to extract from the source of a shuffle vector.
+ if ((InVecVT.getSizeInBits() % VT.getSizeInBits()) == 0 &&
+ (IdxVal % VT.getVectorNumElements()) == 0) {
+ SmallVector<int, 32> ShuffleMask;
+ SmallVector<int, 32> ScaledMask;
+ SmallVector<SDValue, 2> ShuffleInputs;
+ unsigned NumSubVecs = InVecVT.getSizeInBits() / VT.getSizeInBits();
+ // Decode the shuffle mask and scale it so its shuffling subvectors.
+ if (getTargetShuffleInputs(InVecBC, ShuffleInputs, ShuffleMask, DAG) &&
+ scaleShuffleElements(ShuffleMask, NumSubVecs, ScaledMask)) {
+ unsigned SubVecIdx = IdxVal / VT.getVectorNumElements();
+ if (ScaledMask[SubVecIdx] == SM_SentinelUndef)
+ return DAG.getUNDEF(VT);
+ if (ScaledMask[SubVecIdx] == SM_SentinelZero)
+ return getZeroVector(VT, Subtarget, DAG, SDLoc(N));
+ SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs];
+ if (Src.getValueSizeInBits() == InVecVT.getSizeInBits()) {
+ unsigned SrcSubVecIdx = ScaledMask[SubVecIdx] % NumSubVecs;
+ unsigned SrcEltIdx = SrcSubVecIdx * VT.getVectorNumElements();
+ return extractSubVector(DAG.getBitcast(InVecVT, Src), SrcEltIdx, DAG,
+ SDLoc(N), VT.getSizeInBits());
+ }
+ }
+ }
+
// If we're extracting the lowest subvector and we're the only user,
// we may be able to perform this with a smaller vector width.
if (IdxVal == 0 && InVec.hasOneUse()) {
@@ -45851,13 +48449,30 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
Src.getOperand(1));
// Reduce v2i64 to v4i32 if we don't need the upper bits.
- // TODO: Move to DAGCombine?
- if (VT == MVT::v2i64 && Src.getOpcode() == ISD::ANY_EXTEND &&
- Src.getValueType() == MVT::i64 && Src.hasOneUse() &&
- Src.getOperand(0).getScalarValueSizeInBits() <= 32)
- return DAG.getBitcast(
- VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
- DAG.getAnyExtOrTrunc(Src.getOperand(0), DL, MVT::i32)));
+ // TODO: Move to DAGCombine/SimplifyDemandedBits?
+ if (VT == MVT::v2i64 || VT == MVT::v2f64) {
+ auto IsAnyExt64 = [](SDValue Op) {
+ if (Op.getValueType() != MVT::i64 || !Op.hasOneUse())
+ return SDValue();
+ if (Op.getOpcode() == ISD::ANY_EXTEND &&
+ Op.getOperand(0).getScalarValueSizeInBits() <= 32)
+ return Op.getOperand(0);
+ if (auto *Ld = dyn_cast<LoadSDNode>(Op))
+ if (Ld->getExtensionType() == ISD::EXTLOAD &&
+ Ld->getMemoryVT().getScalarSizeInBits() <= 32)
+ return Op;
+ return SDValue();
+ };
+ if (SDValue ExtSrc = IsAnyExt64(peekThroughOneUseBitcasts(Src)))
+ return DAG.getBitcast(
+ VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
+ DAG.getAnyExtOrTrunc(ExtSrc, DL, MVT::i32)));
+ }
+
+ // Combine (v2i64 (scalar_to_vector (i64 (bitconvert (mmx))))) to MOVQ2DQ.
+ if (VT == MVT::v2i64 && Src.getOpcode() == ISD::BITCAST &&
+ Src.getOperand(0).getValueType() == MVT::x86mmx)
+ return DAG.getNode(X86ISD::MOVQ2DQ, DL, VT, Src.getOperand(0));
return SDValue();
}
@@ -45928,13 +48543,16 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
auto *Ld = cast<LoadSDNode>(In);
if (Ld->isSimple()) {
MVT SVT = In.getSimpleValueType().getVectorElementType();
- ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
- EVT MemVT = EVT::getVectorVT(*DAG.getContext(), SVT,
- VT.getVectorNumElements());
+ ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG
+ ? ISD::SEXTLOAD
+ : ISD::ZEXTLOAD;
+ EVT MemVT =
+ EVT::getVectorVT(*DAG.getContext(), SVT, VT.getVectorNumElements());
if (TLI.isLoadExtLegal(Ext, VT, MemVT)) {
SDValue Load =
DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), MemVT, Ld->getAlignment(),
+ Ld->getPointerInfo(), MemVT,
+ Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
return Load;
@@ -45971,6 +48589,196 @@ static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Optimize (fp16_to_fp (fp_to_fp16 X)) to VCVTPS2PH followed by VCVTPH2PS.
+// Done as a combine because the lowering for fp16_to_fp and fp_to_fp16 produce
+// extra instructions between the conversion due to going to scalar and back.
+static SDValue combineFP16_TO_FP(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (Subtarget.useSoftFloat() || !Subtarget.hasF16C())
+ return SDValue();
+
+ if (N->getOperand(0).getOpcode() != ISD::FP_TO_FP16)
+ return SDValue();
+
+ if (N->getValueType(0) != MVT::f32 ||
+ N->getOperand(0).getOperand(0).getValueType() != MVT::f32)
+ return SDValue();
+
+ SDLoc dl(N);
+ SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32,
+ N->getOperand(0).getOperand(0));
+ Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res,
+ DAG.getTargetConstant(4, dl, MVT::i32));
+ Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+ DAG.getIntPtrConstant(0, dl));
+}
+
+static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
+ return SDValue();
+
+ bool IsStrict = N->isStrictFPOpcode();
+ EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ EVT SrcVT = Src.getValueType();
+
+ if (!SrcVT.isVector() || SrcVT.getVectorElementType() != MVT::f16)
+ return SDValue();
+
+ if (VT.getVectorElementType() != MVT::f32 &&
+ VT.getVectorElementType() != MVT::f64)
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 1 || !isPowerOf2_32(NumElts))
+ return SDValue();
+
+ SDLoc dl(N);
+
+ // Convert the input to vXi16.
+ EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
+ Src = DAG.getBitcast(IntVT, Src);
+
+ // Widen to at least 8 input elements.
+ if (NumElts < 8) {
+ unsigned NumConcats = 8 / NumElts;
+ SDValue Fill = NumElts == 4 ? DAG.getUNDEF(IntVT)
+ : DAG.getConstant(0, dl, IntVT);
+ SmallVector<SDValue, 4> Ops(NumConcats, Fill);
+ Ops[0] = Src;
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, Ops);
+ }
+
+ // Destination is vXf32 with at least 4 elements.
+ EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32,
+ std::max(4U, NumElts));
+ SDValue Cvt, Chain;
+ if (IsStrict) {
+ Cvt = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {CvtVT, MVT::Other},
+ {N->getOperand(0), Src});
+ Chain = Cvt.getValue(1);
+ } else {
+ Cvt = DAG.getNode(X86ISD::CVTPH2PS, dl, CvtVT, Src);
+ }
+
+ if (NumElts < 4) {
+ assert(NumElts == 2 && "Unexpected size");
+ Cvt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Cvt,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ if (IsStrict) {
+ // Extend to the original VT if necessary.
+ if (Cvt.getValueType() != VT) {
+ Cvt = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {VT, MVT::Other},
+ {Chain, Cvt});
+ Chain = Cvt.getValue(1);
+ }
+ return DAG.getMergeValues({Cvt, Chain}, dl);
+ }
+
+ // Extend to the original VT if necessary.
+ return DAG.getNode(ISD::FP_EXTEND, dl, VT, Cvt);
+}
+
+// Try to find a larger VBROADCAST_LOAD that we can extract from. Limit this to
+// cases where the loads have the same input chain and the output chains are
+// unused. This avoids any memory ordering issues.
+static SDValue combineVBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Only do this if the chain result is unused.
+ if (N->hasAnyUseOfValue(1))
+ return SDValue();
+
+ auto *MemIntrin = cast<MemIntrinsicSDNode>(N);
+
+ SDValue Ptr = MemIntrin->getBasePtr();
+ SDValue Chain = MemIntrin->getChain();
+ EVT VT = N->getSimpleValueType(0);
+ EVT MemVT = MemIntrin->getMemoryVT();
+
+ // Look at other users of our base pointer and try to find a wider broadcast.
+ // The input chain and the size of the memory VT must match.
+ for (SDNode *User : Ptr->uses())
+ if (User != N && User->getOpcode() == X86ISD::VBROADCAST_LOAD &&
+ cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr &&
+ cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&
+ cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
+ MemVT.getSizeInBits() &&
+ !User->hasAnyUseOfValue(1) &&
+ User->getValueSizeInBits(0) > VT.getSizeInBits()) {
+ SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
+ VT.getSizeInBits());
+ Extract = DAG.getBitcast(VT, Extract);
+ return DCI.CombineTo(N, Extract, SDValue(User, 1));
+ }
+
+ return SDValue();
+}
+
+static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
+ SrcVT.getVectorElementType() != MVT::f32)
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 1 || !isPowerOf2_32(NumElts))
+ return SDValue();
+
+ SDLoc dl(N);
+
+ // Widen to at least 4 input elements.
+ if (NumElts < 4)
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
+ DAG.getConstantFP(0.0, dl, SrcVT));
+
+ // Destination is v8i16 with at least 8 elements.
+ EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
+ std::max(8U, NumElts));
+ SDValue Cvt = DAG.getNode(X86ISD::CVTPS2PH, dl, CvtVT, Src,
+ DAG.getTargetConstant(4, dl, MVT::i32));
+
+ // Extract down to real number of elements.
+ if (NumElts < 8) {
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+ Cvt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, IntVT, Cvt,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ return DAG.getBitcast(VT, Cvt);
+}
+
+static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) {
+ SDValue Src = N->getOperand(0);
+
+ // Turn MOVDQ2Q+simple_load into an mmx load.
+ if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) {
+ LoadSDNode *LN = cast<LoadSDNode>(Src.getNode());
+
+ if (LN->isSimple()) {
+ SDValue NewLd = DAG.getLoad(MVT::x86mmx, SDLoc(N), LN->getChain(),
+ LN->getBasePtr(),
+ LN->getPointerInfo(),
+ LN->getOriginalAlign(),
+ LN->getMemOperand()->getFlags());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), NewLd.getValue(1));
+ return NewLd;
+ }
+ }
+
+ return SDValue();
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -46002,8 +48810,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::ADC: return combineADC(N, DAG, DCI);
case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);
case ISD::SHL: return combineShiftLeft(N, DAG);
- case ISD::SRA: return combineShiftRightArithmetic(N, DAG);
- case ISD::SRL: return combineShiftRightLogical(N, DAG, DCI);
+ case ISD::SRA: return combineShiftRightArithmetic(N, DAG, Subtarget);
+ case ISD::SRL: return combineShiftRightLogical(N, DAG, DCI, Subtarget);
case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget);
case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
@@ -46012,6 +48820,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget);
+ case X86ISD::VEXTRACT_STORE:
+ return combineVEXTRACT_STORE(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
return combineSIntToFP(N, DAG, DCI, Subtarget);
@@ -46020,14 +48830,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return combineUIntToFP(N, DAG, Subtarget);
case ISD::FADD:
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
- case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
+ case ISD::FNEG: return combineFneg(N, DAG, DCI, Subtarget);
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
- case X86ISD::VTRUNC: return combineVTRUNC(N, DAG);
+ case X86ISD::VTRUNC: return combineVTRUNC(N, DAG, DCI);
case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget);
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
case X86ISD::FXOR:
- case X86ISD::FOR: return combineFOr(N, DAG, Subtarget);
+ case X86ISD::FOR: return combineFOr(N, DAG, DCI, Subtarget);
case X86ISD::FMIN:
case X86ISD::FMAX: return combineFMinFMax(N, DAG);
case ISD::FMINNUM:
@@ -46036,8 +48846,13 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI);
case X86ISD::CVTP2SI:
case X86ISD::CVTP2UI:
+ case X86ISD::STRICT_CVTTP2SI:
case X86ISD::CVTTP2SI:
- case X86ISD::CVTTP2UI: return combineCVTP2I_CVTTP2I(N, DAG, DCI);
+ case X86ISD::STRICT_CVTTP2UI:
+ case X86ISD::CVTTP2UI:
+ return combineCVTP2I_CVTTP2I(N, DAG, DCI);
+ case X86ISD::STRICT_CVTPH2PS:
+ case X86ISD::CVTPH2PS: return combineCVTPH2PS(N, DAG, DCI);
case X86ISD::BT: return combineBT(N, DAG, DCI);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
@@ -46060,12 +48875,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VSRAI:
case X86ISD::VSRLI:
return combineVectorShiftImm(N, DAG, DCI, Subtarget);
+ case ISD::INSERT_VECTOR_ELT:
case X86ISD::PINSRB:
case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::INSERTPS:
case X86ISD::EXTRQI:
case X86ISD::INSERTQI:
+ case X86ISD::VALIGN:
case X86ISD::PALIGNR:
case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ:
@@ -46097,12 +48914,16 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
case X86ISD::FMADD_RND:
case X86ISD::FMSUB:
+ case X86ISD::STRICT_FMSUB:
case X86ISD::FMSUB_RND:
case X86ISD::FNMADD:
+ case X86ISD::STRICT_FNMADD:
case X86ISD::FNMADD_RND:
case X86ISD::FNMSUB:
+ case X86ISD::STRICT_FNMSUB:
case X86ISD::FNMSUB_RND:
- case ISD::FMA: return combineFMA(N, DAG, DCI, Subtarget);
+ case ISD::FMA:
+ case ISD::STRICT_FMA: return combineFMA(N, DAG, DCI, Subtarget);
case X86ISD::FMADDSUB_RND:
case X86ISD::FMSUBADD_RND:
case X86ISD::FMADDSUB:
@@ -46118,6 +48939,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
case X86ISD::KSHIFTL:
case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI);
+ case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget);
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_EXTEND: return combineFP_EXTEND(N, DAG, Subtarget);
+ case ISD::FP_ROUND: return combineFP_ROUND(N, DAG, Subtarget);
+ case X86ISD::VBROADCAST_LOAD: return combineVBROADCAST_LOAD(N, DAG, DCI);
+ case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG);
}
return SDValue();
@@ -46266,27 +49093,6 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
return true;
}
-bool X86TargetLowering::
- isDesirableToCombineBuildVectorToShuffleTruncate(
- ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const {
-
- assert(SrcVT.getVectorNumElements() == ShuffleMask.size() &&
- "Element count mismatch");
- assert(
- Subtarget.getTargetLowering()->isShuffleMaskLegal(ShuffleMask, SrcVT) &&
- "Shuffle Mask expected to be legal");
-
- // For 32-bit elements VPERMD is better than shuffle+truncate.
- // TODO: After we improve lowerBuildVector, add execption for VPERMW.
- if (SrcVT.getScalarSizeInBits() == 32 || !Subtarget.hasAVX2())
- return false;
-
- if (is128BitLaneCrossingShuffleMask(SrcVT.getSimpleVT(), ShuffleMask))
- return false;
-
- return true;
-}
-
//===----------------------------------------------------------------------===//
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
@@ -46327,7 +49133,7 @@ static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
}
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
- InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
const std::string &AsmStr = IA->getAsmString();
@@ -46450,7 +49256,6 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
case 'y':
case 'x':
case 'v':
- case 'Y':
case 'l':
case 'k': // AVX512 masking registers.
return C_RegisterClass;
@@ -46487,7 +49292,6 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
default:
break;
case 'z':
- case '0':
return C_Register;
case 'i':
case 'm':
@@ -46543,19 +49347,17 @@ TargetLowering::ConstraintWeight
if (type->isX86_MMXTy() && Subtarget.hasMMX())
weight = CW_SpecificReg;
break;
- case 'Y': {
- unsigned Size = StringRef(constraint).size();
- // Pick 'i' as the next char as 'Yi' and 'Y' are synonymous, when matching 'Y'
- char NextChar = Size == 2 ? constraint[1] : 'i';
- if (Size > 2)
+ case 'Y':
+ if (StringRef(constraint).size() != 2)
break;
- switch (NextChar) {
+ switch (constraint[1]) {
default:
return CW_Invalid;
// XMM0
case 'z':
- case '0':
- if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1())
+ if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
+ ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) ||
+ ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()))
return CW_SpecificReg;
return CW_Invalid;
// Conditional OpMask regs (AVX512)
@@ -46568,7 +49370,7 @@ TargetLowering::ConstraintWeight
if (type->isX86_MMXTy() && Subtarget.hasMMX())
return weight;
return CW_Invalid;
- // Any SSE reg when ISA >= SSE2, same as 'Y'
+ // Any SSE reg when ISA >= SSE2, same as 'x'
case 'i':
case 't':
case '2':
@@ -46576,9 +49378,7 @@ TargetLowering::ConstraintWeight
return CW_Invalid;
break;
}
- // Fall through (handle "Y" constraint).
- LLVM_FALLTHROUGH;
- }
+ break;
case 'v':
if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
weight = CW_Register;
@@ -46660,8 +49460,6 @@ LowerXConstraint(EVT ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) {
- if (Subtarget.hasSSE2())
- return "Y";
if (Subtarget.hasSSE1())
return "x";
}
@@ -46910,26 +49708,26 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
break;
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
if (Subtarget.is64Bit()) {
- if (VT == MVT::i32 || VT == MVT::f32)
- return std::make_pair(0U, &X86::GR32RegClass);
- if (VT == MVT::i16)
- return std::make_pair(0U, &X86::GR16RegClass);
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8RegClass);
- if (VT == MVT::i64 || VT == MVT::f64)
+ if (VT == MVT::i16)
+ return std::make_pair(0U, &X86::GR16RegClass);
+ if (VT == MVT::i32 || VT == MVT::f32)
+ return std::make_pair(0U, &X86::GR32RegClass);
+ if (VT != MVT::f80)
return std::make_pair(0U, &X86::GR64RegClass);
break;
}
LLVM_FALLTHROUGH;
// 32-bit fallthrough
case 'Q': // Q_REGS
- if (VT == MVT::i32 || VT == MVT::f32)
- return std::make_pair(0U, &X86::GR32_ABCDRegClass);
- if (VT == MVT::i16)
- return std::make_pair(0U, &X86::GR16_ABCDRegClass);
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
- if (VT == MVT::i64)
+ if (VT == MVT::i16)
+ return std::make_pair(0U, &X86::GR16_ABCDRegClass);
+ if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget.is64Bit())
+ return std::make_pair(0U, &X86::GR32_ABCDRegClass);
+ if (VT != MVT::f80)
return std::make_pair(0U, &X86::GR64_ABCDRegClass);
break;
case 'r': // GENERAL_REGS
@@ -46940,15 +49738,19 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &X86::GR16RegClass);
if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget.is64Bit())
return std::make_pair(0U, &X86::GR32RegClass);
- return std::make_pair(0U, &X86::GR64RegClass);
+ if (VT != MVT::f80)
+ return std::make_pair(0U, &X86::GR64RegClass);
+ break;
case 'R': // LEGACY_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_NOREXRegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16_NOREXRegClass);
- if (VT == MVT::i32 || !Subtarget.is64Bit())
+ if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget.is64Bit())
return std::make_pair(0U, &X86::GR32_NOREXRegClass);
- return std::make_pair(0U, &X86::GR64_NOREXRegClass);
+ if (VT != MVT::f80)
+ return std::make_pair(0U, &X86::GR64_NOREXRegClass);
+ break;
case 'f': // FP Stack registers.
// If SSE is enabled for this VT, use f80 to ensure the isel moves the
// value to the correct fpstack register class.
@@ -46956,13 +49758,12 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &X86::RFP32RegClass);
if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
return std::make_pair(0U, &X86::RFP64RegClass);
- return std::make_pair(0U, &X86::RFP80RegClass);
+ if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80)
+ return std::make_pair(0U, &X86::RFP80RegClass);
+ break;
case 'y': // MMX_REGS if MMX allowed.
if (!Subtarget.hasMMX()) break;
return std::make_pair(0U, &X86::VR64RegClass);
- case 'Y': // SSE_REGS if SSE2 allowed
- if (!Subtarget.hasSSE2()) break;
- LLVM_FALLTHROUGH;
case 'v':
case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
if (!Subtarget.hasSSE1()) break;
@@ -46981,7 +49782,13 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VConstraint && Subtarget.hasVLX())
return std::make_pair(0U, &X86::FR64XRegClass);
return std::make_pair(0U, &X86::FR64RegClass);
- // TODO: Handle i128 in FR128RegClass after it is tested well.
+ case MVT::i128:
+ if (Subtarget.is64Bit()) {
+ if (VConstraint && Subtarget.hasVLX())
+ return std::make_pair(0U, &X86::VR128XRegClass);
+ return std::make_pair(0U, &X86::VR128RegClass);
+ }
+ break;
// Vector types and fp128.
case MVT::f128:
case MVT::v16i8:
@@ -47005,6 +49812,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Subtarget.hasAVX())
return std::make_pair(0U, &X86::VR256RegClass);
break;
+ case MVT::v64i8:
+ case MVT::v32i16:
case MVT::v8f64:
case MVT::v16f32:
case MVT::v16i32:
@@ -47023,14 +49832,50 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'i':
case 't':
case '2':
- return getRegForInlineAsmConstraint(TRI, "Y", VT);
+ return getRegForInlineAsmConstraint(TRI, "x", VT);
case 'm':
if (!Subtarget.hasMMX()) break;
return std::make_pair(0U, &X86::VR64RegClass);
case 'z':
- case '0':
if (!Subtarget.hasSSE1()) break;
- return std::make_pair(X86::XMM0, &X86::VR128RegClass);
+ switch (VT.SimpleTy) {
+ default: break;
+ // Scalar SSE types.
+ case MVT::f32:
+ case MVT::i32:
+ return std::make_pair(X86::XMM0, &X86::FR32RegClass);
+ case MVT::f64:
+ case MVT::i64:
+ return std::make_pair(X86::XMM0, &X86::FR64RegClass);
+ case MVT::f128:
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return std::make_pair(X86::XMM0, &X86::VR128RegClass);
+ // AVX types.
+ case MVT::v32i8:
+ case MVT::v16i16:
+ case MVT::v8i32:
+ case MVT::v4i64:
+ case MVT::v8f32:
+ case MVT::v4f64:
+ if (Subtarget.hasAVX())
+ return std::make_pair(X86::YMM0, &X86::VR256RegClass);
+ break;
+ case MVT::v64i8:
+ case MVT::v32i16:
+ case MVT::v8f64:
+ case MVT::v16f32:
+ case MVT::v16i32:
+ case MVT::v8i64:
+ if (Subtarget.hasAVX512())
+ return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
+ break;
+ }
+ break;
case 'k':
// This register class doesn't allocate k0 for masked vector operation.
if (Subtarget.hasAVX512()) {
@@ -47056,7 +49901,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
- std::pair<unsigned, const TargetRegisterClass*> Res;
+ std::pair<Register, const TargetRegisterClass*> Res;
Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// Not found as a standard register?
@@ -47127,7 +49972,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (isGRClass(*Class)) {
unsigned Size = VT.getSizeInBits();
if (Size == 1) Size = 8;
- unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size);
+ Register DestReg = getX86SubSuperRegisterOrZero(Res.first, Size);
if (DestReg > 0) {
bool is64Bit = Subtarget.is64Bit();
const TargetRegisterClass *RC =
@@ -47243,8 +50088,7 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
- bool OptSize =
- Attr.hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize);
+ bool OptSize = Attr.hasFnAttribute(Attribute::MinSize);
return OptSize && !VT.isVector();
}
@@ -47301,10 +50145,35 @@ bool X86TargetLowering::supportSwiftError() const {
return Subtarget.is64Bit();
}
+/// Returns true if stack probing through a function call is requested.
+bool X86TargetLowering::hasStackProbeSymbol(MachineFunction &MF) const {
+ return !getStackProbeSymbolName(MF).empty();
+}
+
+/// Returns true if stack probing through inline assembly is requested.
+bool X86TargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
+
+ // No inline stack probe for Windows, they have their own mechanism.
+ if (Subtarget.isOSWindows() ||
+ MF.getFunction().hasFnAttribute("no-stack-arg-probe"))
+ return false;
+
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+
+ return false;
+}
+
/// Returns the name of the symbol used to emit stack probes or the empty
/// string if not applicable.
StringRef
X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
+ // Inline Stack probes disable stack probe call
+ if (hasInlineStackProbe(MF))
+ return "";
+
// If the function specifically requests stack probes, emit them.
if (MF.getFunction().hasFnAttribute("probe-stack"))
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString();
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
index 830cdfc79c0a..7f3dc90a2d73 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
@@ -14,8 +14,6 @@
#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
namespace llvm {
@@ -24,680 +22,809 @@ namespace llvm {
namespace X86ISD {
// X86 Specific DAG Nodes
- enum NodeType : unsigned {
- // Start the numbering where the builtin ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- /// Bit scan forward.
- BSF,
- /// Bit scan reverse.
- BSR,
-
- /// Double shift instructions. These correspond to
- /// X86::SHLDxx and X86::SHRDxx instructions.
- SHLD,
- SHRD,
-
- /// Bitwise logical AND of floating point values. This corresponds
- /// to X86::ANDPS or X86::ANDPD.
- FAND,
-
- /// Bitwise logical OR of floating point values. This corresponds
- /// to X86::ORPS or X86::ORPD.
- FOR,
-
- /// Bitwise logical XOR of floating point values. This corresponds
- /// to X86::XORPS or X86::XORPD.
- FXOR,
-
- /// Bitwise logical ANDNOT of floating point values. This
- /// corresponds to X86::ANDNPS or X86::ANDNPD.
- FANDN,
-
- /// These operations represent an abstract X86 call
- /// instruction, which includes a bunch of information. In particular the
- /// operands of these node are:
- ///
- /// #0 - The incoming token chain
- /// #1 - The callee
- /// #2 - The number of arg bytes the caller pushes on the stack.
- /// #3 - The number of arg bytes the callee pops off the stack.
- /// #4 - The value to pass in AL/AX/EAX (optional)
- /// #5 - The value to pass in DL/DX/EDX (optional)
- ///
- /// The result values of these nodes are:
- ///
- /// #0 - The outgoing token chain
- /// #1 - The first register result value (optional)
- /// #2 - The second register result value (optional)
- ///
- CALL,
-
- /// Same as call except it adds the NoTrack prefix.
- NT_CALL,
-
- /// X86 compare and logical compare instructions.
- CMP, COMI, UCOMI,
-
- /// X86 bit-test instructions.
- BT,
-
- /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
- /// operand, usually produced by a CMP instruction.
- SETCC,
-
- /// X86 Select
- SELECTS,
-
- // Same as SETCC except it's materialized with a sbb and the value is all
- // one's or all zero's.
- SETCC_CARRY, // R = carry_bit ? ~0 : 0
-
- /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
- /// Operands are two FP values to compare; result is a mask of
- /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
- FSETCC,
-
- /// X86 FP SETCC, similar to above, but with output as an i1 mask and
- /// and a version with SAE.
- FSETCCM, FSETCCM_SAE,
-
- /// X86 conditional moves. Operand 0 and operand 1 are the two values
- /// to select from. Operand 2 is the condition code, and operand 3 is the
- /// flag operand produced by a CMP or TEST instruction.
- CMOV,
-
- /// X86 conditional branches. Operand 0 is the chain operand, operand 1
- /// is the block to branch if condition is true, operand 2 is the
- /// condition code, and operand 3 is the flag operand produced by a CMP
- /// or TEST instruction.
- BRCOND,
-
- /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
- /// operand 1 is the target address.
- NT_BRIND,
-
- /// Return with a flag operand. Operand 0 is the chain operand, operand
- /// 1 is the number of bytes of stack to pop.
- RET_FLAG,
-
- /// Return from interrupt. Operand 0 is the number of bytes to pop.
- IRET,
-
- /// Repeat fill, corresponds to X86::REP_STOSx.
- REP_STOS,
-
- /// Repeat move, corresponds to X86::REP_MOVSx.
- REP_MOVS,
-
- /// On Darwin, this node represents the result of the popl
- /// at function entry, used for PIC code.
- GlobalBaseReg,
-
- /// A wrapper node for TargetConstantPool, TargetJumpTable,
- /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
- /// MCSymbol and TargetBlockAddress.
- Wrapper,
-
- /// Special wrapper used under X86-64 PIC mode for RIP
- /// relative displacements.
- WrapperRIP,
-
- /// Copies a 64-bit value from an MMX vector to the low word
- /// of an XMM vector, with the high word zero filled.
- MOVQ2DQ,
-
- /// Copies a 64-bit value from the low word of an XMM vector
- /// to an MMX vector.
- MOVDQ2Q,
-
- /// Copies a 32-bit value from the low word of a MMX
- /// vector to a GPR.
- MMX_MOVD2W,
-
- /// Copies a GPR into the low 32-bit word of a MMX vector
- /// and zero out the high word.
- MMX_MOVW2D,
-
- /// Extract an 8-bit value from a vector and zero extend it to
- /// i32, corresponds to X86::PEXTRB.
- PEXTRB,
-
- /// Extract a 16-bit value from a vector and zero extend it to
- /// i32, corresponds to X86::PEXTRW.
- PEXTRW,
-
- /// Insert any element of a 4 x float vector into any element
- /// of a destination 4 x floatvector.
- INSERTPS,
-
- /// Insert the lower 8-bits of a 32-bit value to a vector,
- /// corresponds to X86::PINSRB.
- PINSRB,
-
- /// Insert the lower 16-bits of a 32-bit value to a vector,
- /// corresponds to X86::PINSRW.
- PINSRW,
-
- /// Shuffle 16 8-bit values within a vector.
- PSHUFB,
-
- /// Compute Sum of Absolute Differences.
- PSADBW,
- /// Compute Double Block Packed Sum-Absolute-Differences
- DBPSADBW,
-
- /// Bitwise Logical AND NOT of Packed FP values.
- ANDNP,
-
- /// Blend where the selector is an immediate.
- BLENDI,
-
- /// Dynamic (non-constant condition) vector blend where only the sign bits
- /// of the condition elements are used. This is used to enforce that the
- /// condition mask is not valid for generic VSELECT optimizations. This
- /// is also used to implement the intrinsics.
- /// Operands are in VSELECT order: MASK, TRUE, FALSE
- BLENDV,
-
- /// Combined add and sub on an FP vector.
- ADDSUB,
-
- // FP vector ops with rounding mode.
- FADD_RND, FADDS, FADDS_RND,
- FSUB_RND, FSUBS, FSUBS_RND,
- FMUL_RND, FMULS, FMULS_RND,
- FDIV_RND, FDIVS, FDIVS_RND,
- FMAX_SAE, FMAXS_SAE,
- FMIN_SAE, FMINS_SAE,
- FSQRT_RND, FSQRTS, FSQRTS_RND,
-
- // FP vector get exponent.
- FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
- // Extract Normalized Mantissas.
- VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
- // FP Scale.
- SCALEF, SCALEF_RND,
- SCALEFS, SCALEFS_RND,
-
- // Unsigned Integer average.
- AVG,
-
- /// Integer horizontal add/sub.
- HADD,
- HSUB,
-
- /// Floating point horizontal add/sub.
- FHADD,
- FHSUB,
-
- // Detect Conflicts Within a Vector
- CONFLICT,
-
- /// Floating point max and min.
- FMAX, FMIN,
-
- /// Commutative FMIN and FMAX.
- FMAXC, FMINC,
-
- /// Scalar intrinsic floating point max and min.
- FMAXS, FMINS,
-
- /// Floating point reciprocal-sqrt and reciprocal approximation.
- /// Note that these typically require refinement
- /// in order to obtain suitable precision.
- FRSQRT, FRCP,
-
- // AVX-512 reciprocal approximations with a little more precision.
- RSQRT14, RSQRT14S, RCP14, RCP14S,
-
- // Thread Local Storage.
- TLSADDR,
-
- // Thread Local Storage. A call to get the start address
- // of the TLS block for the current module.
- TLSBASEADDR,
-
- // Thread Local Storage. When calling to an OS provided
- // thunk at the address from an earlier relocation.
- TLSCALL,
+ enum NodeType : unsigned {
+ // Start the numbering where the builtin ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// Bit scan forward.
+ BSF,
+ /// Bit scan reverse.
+ BSR,
+
+ /// X86 funnel/double shift i16 instructions. These correspond to
+ /// X86::SHLDW and X86::SHRDW instructions which have different amt
+ /// modulo rules to generic funnel shifts.
+ /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
+ FSHL,
+ FSHR,
+
+ /// Bitwise logical AND of floating point values. This corresponds
+ /// to X86::ANDPS or X86::ANDPD.
+ FAND,
+
+ /// Bitwise logical OR of floating point values. This corresponds
+ /// to X86::ORPS or X86::ORPD.
+ FOR,
+
+ /// Bitwise logical XOR of floating point values. This corresponds
+ /// to X86::XORPS or X86::XORPD.
+ FXOR,
+
+ /// Bitwise logical ANDNOT of floating point values. This
+ /// corresponds to X86::ANDNPS or X86::ANDNPD.
+ FANDN,
+
+ /// These operations represent an abstract X86 call
+ /// instruction, which includes a bunch of information. In particular the
+ /// operands of these node are:
+ ///
+ /// #0 - The incoming token chain
+ /// #1 - The callee
+ /// #2 - The number of arg bytes the caller pushes on the stack.
+ /// #3 - The number of arg bytes the callee pops off the stack.
+ /// #4 - The value to pass in AL/AX/EAX (optional)
+ /// #5 - The value to pass in DL/DX/EDX (optional)
+ ///
+ /// The result values of these nodes are:
+ ///
+ /// #0 - The outgoing token chain
+ /// #1 - The first register result value (optional)
+ /// #2 - The second register result value (optional)
+ ///
+ CALL,
- // Exception Handling helpers.
- EH_RETURN,
+ /// Same as call except it adds the NoTrack prefix.
+ NT_CALL,
- // SjLj exception handling setjmp.
- EH_SJLJ_SETJMP,
+ /// X86 compare and logical compare instructions.
+ CMP,
+ FCMP,
+ COMI,
+ UCOMI,
- // SjLj exception handling longjmp.
- EH_SJLJ_LONGJMP,
+ /// X86 bit-test instructions.
+ BT,
- // SjLj exception handling dispatch.
- EH_SJLJ_SETUP_DISPATCH,
+ /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
+ /// operand, usually produced by a CMP instruction.
+ SETCC,
- /// Tail call return. See X86TargetLowering::LowerCall for
- /// the list of operands.
- TC_RETURN,
+ /// X86 Select
+ SELECTS,
- // Vector move to low scalar and zero higher vector elements.
- VZEXT_MOVL,
+ // Same as SETCC except it's materialized with a sbb and the value is all
+ // one's or all zero's.
+ SETCC_CARRY, // R = carry_bit ? ~0 : 0
- // Vector integer truncate.
- VTRUNC,
- // Vector integer truncate with unsigned/signed saturation.
- VTRUNCUS, VTRUNCS,
+ /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
+ /// Operands are two FP values to compare; result is a mask of
+ /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
+ FSETCC,
- // Masked version of the above. Used when less than a 128-bit result is
- // produced since the mask only applies to the lower elements and can't
- // be represented by a select.
- // SRC, PASSTHRU, MASK
- VMTRUNC, VMTRUNCUS, VMTRUNCS,
-
- // Vector FP extend.
- VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
-
- // Vector FP round.
- VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
-
- // Masked version of above. Used for v2f64->v4f32.
- // SRC, PASSTHRU, MASK
- VMFPROUND,
-
- // 128-bit vector logical left / right shift
- VSHLDQ, VSRLDQ,
-
- // Vector shift elements
- VSHL, VSRL, VSRA,
-
- // Vector variable shift
- VSHLV, VSRLV, VSRAV,
-
- // Vector shift elements by immediate
- VSHLI, VSRLI, VSRAI,
-
- // Shifts of mask registers.
- KSHIFTL, KSHIFTR,
-
- // Bit rotate by immediate
- VROTLI, VROTRI,
-
- // Vector packed double/float comparison.
- CMPP,
-
- // Vector integer comparisons.
- PCMPEQ, PCMPGT,
-
- // v8i16 Horizontal minimum and position.
- PHMINPOS,
-
- MULTISHIFT,
-
- /// Vector comparison generating mask bits for fp and
- /// integer signed and unsigned data types.
- CMPM,
- // Vector comparison with SAE for FP values
- CMPM_SAE,
-
- // Arithmetic operations with FLAGS results.
- ADD, SUB, ADC, SBB, SMUL, UMUL,
- OR, XOR, AND,
-
- // Bit field extract.
- BEXTR,
-
- // Zero High Bits Starting with Specified Bit Position.
- BZHI,
-
- // X86-specific multiply by immediate.
- MUL_IMM,
-
- // Vector sign bit extraction.
- MOVMSK,
-
- // Vector bitwise comparisons.
- PTEST,
-
- // Vector packed fp sign bitwise comparisons.
- TESTP,
-
- // OR/AND test for masks.
- KORTEST,
- KTEST,
-
- // ADD for masks.
- KADD,
-
- // Several flavors of instructions with vector shuffle behaviors.
- // Saturated signed/unnsigned packing.
- PACKSS,
- PACKUS,
- // Intra-lane alignr.
- PALIGNR,
- // AVX512 inter-lane alignr.
- VALIGN,
- PSHUFD,
- PSHUFHW,
- PSHUFLW,
- SHUFP,
- // VBMI2 Concat & Shift.
- VSHLD,
- VSHRD,
- VSHLDV,
- VSHRDV,
- //Shuffle Packed Values at 128-bit granularity.
- SHUF128,
- MOVDDUP,
- MOVSHDUP,
- MOVSLDUP,
- MOVLHPS,
- MOVHLPS,
- MOVSD,
- MOVSS,
- UNPCKL,
- UNPCKH,
- VPERMILPV,
- VPERMILPI,
- VPERMI,
- VPERM2X128,
-
- // Variable Permute (VPERM).
- // Res = VPERMV MaskV, V0
- VPERMV,
-
- // 3-op Variable Permute (VPERMT2).
- // Res = VPERMV3 V0, MaskV, V1
- VPERMV3,
-
- // Bitwise ternary logic.
- VPTERNLOG,
- // Fix Up Special Packed Float32/64 values.
- VFIXUPIMM, VFIXUPIMM_SAE,
- VFIXUPIMMS, VFIXUPIMMS_SAE,
- // Range Restriction Calculation For Packed Pairs of Float32/64 values.
- VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
- // Reduce - Perform Reduction Transformation on scalar\packed FP.
- VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
- // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
- // Also used by the legacy (V)ROUND intrinsics where we mask out the
- // scaling part of the immediate.
- VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
- // Tests Types Of a FP Values for packed types.
- VFPCLASS,
- // Tests Types Of a FP Values for scalar types.
- VFPCLASSS,
-
- // Broadcast (splat) scalar or element 0 of a vector. If the operand is
- // a vector, this node may change the vector length as part of the splat.
- VBROADCAST,
- // Broadcast mask to vector.
- VBROADCASTM,
- // Broadcast subvector to vector.
- SUBV_BROADCAST,
-
- /// SSE4A Extraction and Insertion.
- EXTRQI, INSERTQI,
-
- // XOP arithmetic/logical shifts.
- VPSHA, VPSHL,
- // XOP signed/unsigned integer comparisons.
- VPCOM, VPCOMU,
- // XOP packed permute bytes.
- VPPERM,
- // XOP two source permutation.
- VPERMIL2,
-
- // Vector multiply packed unsigned doubleword integers.
- PMULUDQ,
- // Vector multiply packed signed doubleword integers.
- PMULDQ,
- // Vector Multiply Packed UnsignedIntegers with Round and Scale.
- MULHRS,
-
- // Multiply and Add Packed Integers.
- VPMADDUBSW, VPMADDWD,
-
- // AVX512IFMA multiply and add.
- // NOTE: These are different than the instruction and perform
- // op0 x op1 + op2.
- VPMADD52L, VPMADD52H,
-
- // VNNI
- VPDPBUSD,
- VPDPBUSDS,
- VPDPWSSD,
- VPDPWSSDS,
-
- // FMA nodes.
- // We use the target independent ISD::FMA for the non-inverted case.
- FNMADD,
- FMSUB,
- FNMSUB,
- FMADDSUB,
- FMSUBADD,
-
- // FMA with rounding mode.
- FMADD_RND,
- FNMADD_RND,
- FMSUB_RND,
- FNMSUB_RND,
- FMADDSUB_RND,
- FMSUBADD_RND,
-
- // Compress and expand.
- COMPRESS,
- EXPAND,
-
- // Bits shuffle
- VPSHUFBITQMB,
-
- // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
- SINT_TO_FP_RND, UINT_TO_FP_RND,
- SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
- SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
-
- // Vector float/double to signed/unsigned integer.
- CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
- // Scalar float/double to signed/unsigned integer.
- CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
-
- // Vector float/double to signed/unsigned integer with truncation.
- CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
- // Scalar float/double to signed/unsigned integer with truncation.
- CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
-
- // Vector signed/unsigned integer to float/double.
- CVTSI2P, CVTUI2P,
-
- // Masked versions of above. Used for v2f64->v4f32.
- // SRC, PASSTHRU, MASK
- MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
- MCVTSI2P, MCVTUI2P,
-
- // Vector float to bfloat16.
- // Convert TWO packed single data to one packed BF16 data
- CVTNE2PS2BF16,
- // Convert packed single data to packed BF16 data
- CVTNEPS2BF16,
- // Masked version of above.
- // SRC, PASSTHRU, MASK
- MCVTNEPS2BF16,
-
- // Dot product of BF16 pairs to accumulated into
- // packed single precision.
- DPBF16PS,
-
- // Save xmm argument registers to the stack, according to %al. An operator
- // is needed so that this can be expanded with control flow.
- VASTART_SAVE_XMM_REGS,
-
- // Windows's _chkstk call to do stack probing.
- WIN_ALLOCA,
-
- // For allocating variable amounts of stack space when using
- // segmented stacks. Check if the current stacklet has enough space, and
- // falls back to heap allocation if not.
- SEG_ALLOCA,
-
- // Memory barriers.
- MEMBARRIER,
- MFENCE,
-
- // Store FP status word into i16 register.
- FNSTSW16r,
-
- // Store contents of %ah into %eflags.
- SAHF,
-
- // Get a random integer and indicate whether it is valid in CF.
- RDRAND,
-
- // Get a NIST SP800-90B & C compliant random integer and
- // indicate whether it is valid in CF.
- RDSEED,
-
- // Protection keys
- // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
- // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
- // value for ECX.
- RDPKRU, WRPKRU,
-
- // SSE42 string comparisons.
- // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
- // will emit one or two instructions based on which results are used. If
- // flags and index/mask this allows us to use a single instruction since
- // we won't have to pick and opcode for flags. Instead we can rely on the
- // DAG to CSE everything and decide at isel.
- PCMPISTR,
- PCMPESTR,
-
- // Test if in transactional execution.
- XTEST,
-
- // ERI instructions.
- RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
- RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
-
- // Conversions between float and half-float.
- CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
-
- // Masked version of above.
- // SRC, RND, PASSTHRU, MASK
- MCVTPS2PH,
-
- // Galois Field Arithmetic Instructions
- GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
-
- // LWP insert record.
- LWPINS,
-
- // User level wait
- UMWAIT, TPAUSE,
-
- // Enqueue Stores Instructions
- ENQCMD, ENQCMDS,
-
- // For avx512-vp2intersect
- VP2INTERSECT,
-
- /// X86 strict FP compare instructions.
- STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
- STRICT_FCMPS,
-
- // Vector packed double/float comparison.
- STRICT_CMPP,
-
- /// Vector comparison generating mask bits for fp and
- /// integer signed and unsigned data types.
- STRICT_CMPM,
-
- // Vector float/double to signed/unsigned integer with truncation.
- STRICT_CVTTP2SI, STRICT_CVTTP2UI,
-
- // Vector FP extend.
- STRICT_VFPEXT,
-
- // Vector FP round.
- STRICT_VFPROUND,
-
- // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
- // Also used by the legacy (V)ROUND intrinsics where we mask out the
- // scaling part of the immediate.
- STRICT_VRNDSCALE,
-
- // Vector signed/unsigned integer to float/double.
- STRICT_CVTSI2P, STRICT_CVTUI2P,
-
- // Compare and swap.
- LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
- LCMPXCHG8_DAG,
- LCMPXCHG16_DAG,
- LCMPXCHG8_SAVE_EBX_DAG,
- LCMPXCHG16_SAVE_RBX_DAG,
-
- /// LOCK-prefixed arithmetic read-modify-write instructions.
- /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
- LADD, LSUB, LOR, LXOR, LAND,
-
- // Load, scalar_to_vector, and zero extend.
- VZEXT_LOAD,
-
- // extract_vector_elt, store.
- VEXTRACT_STORE,
-
- // scalar broadcast from memory
- VBROADCAST_LOAD,
-
- // Store FP control world into i16 memory.
- FNSTCW16m,
-
- /// This instruction implements FP_TO_SINT with the
- /// integer destination in memory and a FP reg source. This corresponds
- /// to the X86::FIST*m instructions and the rounding mode change stuff. It
- /// has two inputs (token chain and address) and two outputs (int value
- /// and token chain). Memory VT specifies the type to store to.
- FP_TO_INT_IN_MEM,
-
- /// This instruction implements SINT_TO_FP with the
- /// integer source in memory and FP reg result. This corresponds to the
- /// X86::FILD*m instructions. It has two inputs (token chain and address)
- /// and two outputs (FP value and token chain). FILD_FLAG also produces a
- /// flag). The integer source type is specified by the memory VT.
- FILD,
- FILD_FLAG,
-
- /// This instruction implements a fp->int store from FP stack
- /// slots. This corresponds to the fist instruction. It takes a
- /// chain operand, value to store, address, and glue. The memory VT
- /// specifies the type to store as.
- FIST,
-
- /// This instruction implements an extending load to FP stack slots.
- /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
- /// operand, and ptr to load from. The memory VT specifies the type to
- /// load from.
- FLD,
+ /// X86 FP SETCC, similar to above, but with output as an i1 mask and
+ /// and a version with SAE.
+ FSETCCM,
+ FSETCCM_SAE,
- /// This instruction implements a truncating store from FP stack
- /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
- /// chain operand, value to store, address, and glue. The memory VT
- /// specifies the type to store as.
- FST,
-
- /// This instruction grabs the address of the next argument
- /// from a va_list. (reads and modifies the va_list in memory)
- VAARG_64,
-
- // Vector truncating store with unsigned/signed saturation
- VTRUNCSTOREUS, VTRUNCSTORES,
- // Vector truncating masked store with unsigned/signed saturation
- VMTRUNCSTOREUS, VMTRUNCSTORES,
-
- // X86 specific gather and scatter
- MGATHER, MSCATTER,
-
- // WARNING: Do not add anything in the end unless you want the node to
- // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
- // opcodes will be thought as target memory ops!
- };
+ /// X86 conditional moves. Operand 0 and operand 1 are the two values
+ /// to select from. Operand 2 is the condition code, and operand 3 is the
+ /// flag operand produced by a CMP or TEST instruction.
+ CMOV,
+
+ /// X86 conditional branches. Operand 0 is the chain operand, operand 1
+ /// is the block to branch if condition is true, operand 2 is the
+ /// condition code, and operand 3 is the flag operand produced by a CMP
+ /// or TEST instruction.
+ BRCOND,
+
+ /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
+ /// operand 1 is the target address.
+ NT_BRIND,
+
+ /// Return with a flag operand. Operand 0 is the chain operand, operand
+ /// 1 is the number of bytes of stack to pop.
+ RET_FLAG,
+
+ /// Return from interrupt. Operand 0 is the number of bytes to pop.
+ IRET,
+
+ /// Repeat fill, corresponds to X86::REP_STOSx.
+ REP_STOS,
+
+ /// Repeat move, corresponds to X86::REP_MOVSx.
+ REP_MOVS,
+
+ /// On Darwin, this node represents the result of the popl
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// A wrapper node for TargetConstantPool, TargetJumpTable,
+ /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
+ /// MCSymbol and TargetBlockAddress.
+ Wrapper,
+
+ /// Special wrapper used under X86-64 PIC mode for RIP
+ /// relative displacements.
+ WrapperRIP,
+
+ /// Copies a 64-bit value from an MMX vector to the low word
+ /// of an XMM vector, with the high word zero filled.
+ MOVQ2DQ,
+
+ /// Copies a 64-bit value from the low word of an XMM vector
+ /// to an MMX vector.
+ MOVDQ2Q,
+
+ /// Copies a 32-bit value from the low word of a MMX
+ /// vector to a GPR.
+ MMX_MOVD2W,
+
+ /// Copies a GPR into the low 32-bit word of a MMX vector
+ /// and zero out the high word.
+ MMX_MOVW2D,
+
+ /// Extract an 8-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRB.
+ PEXTRB,
+
+ /// Extract a 16-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRW.
+ PEXTRW,
+
+ /// Insert any element of a 4 x float vector into any element
+ /// of a destination 4 x floatvector.
+ INSERTPS,
+
+ /// Insert the lower 8-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRB.
+ PINSRB,
+
+ /// Insert the lower 16-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRW.
+ PINSRW,
+
+ /// Shuffle 16 8-bit values within a vector.
+ PSHUFB,
+
+ /// Compute Sum of Absolute Differences.
+ PSADBW,
+ /// Compute Double Block Packed Sum-Absolute-Differences
+ DBPSADBW,
+
+ /// Bitwise Logical AND NOT of Packed FP values.
+ ANDNP,
+
+ /// Blend where the selector is an immediate.
+ BLENDI,
+
+ /// Dynamic (non-constant condition) vector blend where only the sign bits
+ /// of the condition elements are used. This is used to enforce that the
+ /// condition mask is not valid for generic VSELECT optimizations. This
+ /// is also used to implement the intrinsics.
+ /// Operands are in VSELECT order: MASK, TRUE, FALSE
+ BLENDV,
+
+ /// Combined add and sub on an FP vector.
+ ADDSUB,
+
+ // FP vector ops with rounding mode.
+ FADD_RND,
+ FADDS,
+ FADDS_RND,
+ FSUB_RND,
+ FSUBS,
+ FSUBS_RND,
+ FMUL_RND,
+ FMULS,
+ FMULS_RND,
+ FDIV_RND,
+ FDIVS,
+ FDIVS_RND,
+ FMAX_SAE,
+ FMAXS_SAE,
+ FMIN_SAE,
+ FMINS_SAE,
+ FSQRT_RND,
+ FSQRTS,
+ FSQRTS_RND,
+
+ // FP vector get exponent.
+ FGETEXP,
+ FGETEXP_SAE,
+ FGETEXPS,
+ FGETEXPS_SAE,
+ // Extract Normalized Mantissas.
+ VGETMANT,
+ VGETMANT_SAE,
+ VGETMANTS,
+ VGETMANTS_SAE,
+ // FP Scale.
+ SCALEF,
+ SCALEF_RND,
+ SCALEFS,
+ SCALEFS_RND,
+
+ // Unsigned Integer average.
+ AVG,
+
+ /// Integer horizontal add/sub.
+ HADD,
+ HSUB,
+
+ /// Floating point horizontal add/sub.
+ FHADD,
+ FHSUB,
+
+ // Detect Conflicts Within a Vector
+ CONFLICT,
+
+ /// Floating point max and min.
+ FMAX,
+ FMIN,
+
+ /// Commutative FMIN and FMAX.
+ FMAXC,
+ FMINC,
+
+ /// Scalar intrinsic floating point max and min.
+ FMAXS,
+ FMINS,
+
+ /// Floating point reciprocal-sqrt and reciprocal approximation.
+ /// Note that these typically require refinement
+ /// in order to obtain suitable precision.
+ FRSQRT,
+ FRCP,
+
+ // AVX-512 reciprocal approximations with a little more precision.
+ RSQRT14,
+ RSQRT14S,
+ RCP14,
+ RCP14S,
+
+ // Thread Local Storage.
+ TLSADDR,
+
+ // Thread Local Storage. A call to get the start address
+ // of the TLS block for the current module.
+ TLSBASEADDR,
+
+ // Thread Local Storage. When calling to an OS provided
+ // thunk at the address from an earlier relocation.
+ TLSCALL,
+
+ // Exception Handling helpers.
+ EH_RETURN,
+
+ // SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
+ // SjLj exception handling dispatch.
+ EH_SJLJ_SETUP_DISPATCH,
+
+ /// Tail call return. See X86TargetLowering::LowerCall for
+ /// the list of operands.
+ TC_RETURN,
+
+ // Vector move to low scalar and zero higher vector elements.
+ VZEXT_MOVL,
+
+ // Vector integer truncate.
+ VTRUNC,
+ // Vector integer truncate with unsigned/signed saturation.
+ VTRUNCUS,
+ VTRUNCS,
+
+ // Masked version of the above. Used when less than a 128-bit result is
+ // produced since the mask only applies to the lower elements and can't
+ // be represented by a select.
+ // SRC, PASSTHRU, MASK
+ VMTRUNC,
+ VMTRUNCUS,
+ VMTRUNCS,
+
+ // Vector FP extend.
+ VFPEXT,
+ VFPEXT_SAE,
+ VFPEXTS,
+ VFPEXTS_SAE,
+
+ // Vector FP round.
+ VFPROUND,
+ VFPROUND_RND,
+ VFPROUNDS,
+ VFPROUNDS_RND,
+
+ // Masked version of above. Used for v2f64->v4f32.
+ // SRC, PASSTHRU, MASK
+ VMFPROUND,
+
+ // 128-bit vector logical left / right shift
+ VSHLDQ,
+ VSRLDQ,
+
+ // Vector shift elements
+ VSHL,
+ VSRL,
+ VSRA,
+
+ // Vector variable shift
+ VSHLV,
+ VSRLV,
+ VSRAV,
+
+ // Vector shift elements by immediate
+ VSHLI,
+ VSRLI,
+ VSRAI,
+
+ // Shifts of mask registers.
+ KSHIFTL,
+ KSHIFTR,
+
+ // Bit rotate by immediate
+ VROTLI,
+ VROTRI,
+
+ // Vector packed double/float comparison.
+ CMPP,
+
+ // Vector integer comparisons.
+ PCMPEQ,
+ PCMPGT,
+
+ // v8i16 Horizontal minimum and position.
+ PHMINPOS,
+
+ MULTISHIFT,
+
+ /// Vector comparison generating mask bits for fp and
+ /// integer signed and unsigned data types.
+ CMPM,
+ // Vector comparison with SAE for FP values
+ CMPM_SAE,
+
+ // Arithmetic operations with FLAGS results.
+ ADD,
+ SUB,
+ ADC,
+ SBB,
+ SMUL,
+ UMUL,
+ OR,
+ XOR,
+ AND,
+
+ // Bit field extract.
+ BEXTR,
+
+ // Zero High Bits Starting with Specified Bit Position.
+ BZHI,
+
+ // Parallel extract and deposit.
+ PDEP,
+ PEXT,
+
+ // X86-specific multiply by immediate.
+ MUL_IMM,
+
+ // Vector sign bit extraction.
+ MOVMSK,
+
+ // Vector bitwise comparisons.
+ PTEST,
+
+ // Vector packed fp sign bitwise comparisons.
+ TESTP,
+
+ // OR/AND test for masks.
+ KORTEST,
+ KTEST,
+
+ // ADD for masks.
+ KADD,
+
+ // Several flavors of instructions with vector shuffle behaviors.
+ // Saturated signed/unnsigned packing.
+ PACKSS,
+ PACKUS,
+ // Intra-lane alignr.
+ PALIGNR,
+ // AVX512 inter-lane alignr.
+ VALIGN,
+ PSHUFD,
+ PSHUFHW,
+ PSHUFLW,
+ SHUFP,
+ // VBMI2 Concat & Shift.
+ VSHLD,
+ VSHRD,
+ VSHLDV,
+ VSHRDV,
+ // Shuffle Packed Values at 128-bit granularity.
+ SHUF128,
+ MOVDDUP,
+ MOVSHDUP,
+ MOVSLDUP,
+ MOVLHPS,
+ MOVHLPS,
+ MOVSD,
+ MOVSS,
+ UNPCKL,
+ UNPCKH,
+ VPERMILPV,
+ VPERMILPI,
+ VPERMI,
+ VPERM2X128,
+
+ // Variable Permute (VPERM).
+ // Res = VPERMV MaskV, V0
+ VPERMV,
+
+ // 3-op Variable Permute (VPERMT2).
+ // Res = VPERMV3 V0, MaskV, V1
+ VPERMV3,
+
+ // Bitwise ternary logic.
+ VPTERNLOG,
+ // Fix Up Special Packed Float32/64 values.
+ VFIXUPIMM,
+ VFIXUPIMM_SAE,
+ VFIXUPIMMS,
+ VFIXUPIMMS_SAE,
+ // Range Restriction Calculation For Packed Pairs of Float32/64 values.
+ VRANGE,
+ VRANGE_SAE,
+ VRANGES,
+ VRANGES_SAE,
+ // Reduce - Perform Reduction Transformation on scalar\packed FP.
+ VREDUCE,
+ VREDUCE_SAE,
+ VREDUCES,
+ VREDUCES_SAE,
+ // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
+ // Also used by the legacy (V)ROUND intrinsics where we mask out the
+ // scaling part of the immediate.
+ VRNDSCALE,
+ VRNDSCALE_SAE,
+ VRNDSCALES,
+ VRNDSCALES_SAE,
+ // Tests Types Of a FP Values for packed types.
+ VFPCLASS,
+ // Tests Types Of a FP Values for scalar types.
+ VFPCLASSS,
+
+ // Broadcast (splat) scalar or element 0 of a vector. If the operand is
+ // a vector, this node may change the vector length as part of the splat.
+ VBROADCAST,
+ // Broadcast mask to vector.
+ VBROADCASTM,
+ // Broadcast subvector to vector.
+ SUBV_BROADCAST,
+
+ /// SSE4A Extraction and Insertion.
+ EXTRQI,
+ INSERTQI,
+
+ // XOP arithmetic/logical shifts.
+ VPSHA,
+ VPSHL,
+ // XOP signed/unsigned integer comparisons.
+ VPCOM,
+ VPCOMU,
+ // XOP packed permute bytes.
+ VPPERM,
+ // XOP two source permutation.
+ VPERMIL2,
+
+ // Vector multiply packed unsigned doubleword integers.
+ PMULUDQ,
+ // Vector multiply packed signed doubleword integers.
+ PMULDQ,
+ // Vector Multiply Packed UnsignedIntegers with Round and Scale.
+ MULHRS,
+
+ // Multiply and Add Packed Integers.
+ VPMADDUBSW,
+ VPMADDWD,
+
+ // AVX512IFMA multiply and add.
+ // NOTE: These are different than the instruction and perform
+ // op0 x op1 + op2.
+ VPMADD52L,
+ VPMADD52H,
+
+ // VNNI
+ VPDPBUSD,
+ VPDPBUSDS,
+ VPDPWSSD,
+ VPDPWSSDS,
+
+ // FMA nodes.
+ // We use the target independent ISD::FMA for the non-inverted case.
+ FNMADD,
+ FMSUB,
+ FNMSUB,
+ FMADDSUB,
+ FMSUBADD,
+
+ // FMA with rounding mode.
+ FMADD_RND,
+ FNMADD_RND,
+ FMSUB_RND,
+ FNMSUB_RND,
+ FMADDSUB_RND,
+ FMSUBADD_RND,
+
+ // Compress and expand.
+ COMPRESS,
+ EXPAND,
+
+ // Bits shuffle
+ VPSHUFBITQMB,
+
+ // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
+ SINT_TO_FP_RND,
+ UINT_TO_FP_RND,
+ SCALAR_SINT_TO_FP,
+ SCALAR_UINT_TO_FP,
+ SCALAR_SINT_TO_FP_RND,
+ SCALAR_UINT_TO_FP_RND,
+
+ // Vector float/double to signed/unsigned integer.
+ CVTP2SI,
+ CVTP2UI,
+ CVTP2SI_RND,
+ CVTP2UI_RND,
+ // Scalar float/double to signed/unsigned integer.
+ CVTS2SI,
+ CVTS2UI,
+ CVTS2SI_RND,
+ CVTS2UI_RND,
+
+ // Vector float/double to signed/unsigned integer with truncation.
+ CVTTP2SI,
+ CVTTP2UI,
+ CVTTP2SI_SAE,
+ CVTTP2UI_SAE,
+ // Scalar float/double to signed/unsigned integer with truncation.
+ CVTTS2SI,
+ CVTTS2UI,
+ CVTTS2SI_SAE,
+ CVTTS2UI_SAE,
+
+ // Vector signed/unsigned integer to float/double.
+ CVTSI2P,
+ CVTUI2P,
+
+ // Masked versions of above. Used for v2f64->v4f32.
+ // SRC, PASSTHRU, MASK
+ MCVTP2SI,
+ MCVTP2UI,
+ MCVTTP2SI,
+ MCVTTP2UI,
+ MCVTSI2P,
+ MCVTUI2P,
+
+ // Vector float to bfloat16.
+ // Convert TWO packed single data to one packed BF16 data
+ CVTNE2PS2BF16,
+ // Convert packed single data to packed BF16 data
+ CVTNEPS2BF16,
+ // Masked version of above.
+ // SRC, PASSTHRU, MASK
+ MCVTNEPS2BF16,
+
+ // Dot product of BF16 pairs to accumulated into
+ // packed single precision.
+ DPBF16PS,
+
+ // Save xmm argument registers to the stack, according to %al. An operator
+ // is needed so that this can be expanded with control flow.
+ VASTART_SAVE_XMM_REGS,
+
+ // Windows's _chkstk call to do stack probing.
+ WIN_ALLOCA,
+
+ // For allocating variable amounts of stack space when using
+ // segmented stacks. Check if the current stacklet has enough space, and
+ // falls back to heap allocation if not.
+ SEG_ALLOCA,
+
+ // For allocating stack space when using stack clash protector.
+ // Allocation is performed by block, and each block is probed.
+ PROBED_ALLOCA,
+
+ // Memory barriers.
+ MEMBARRIER,
+ MFENCE,
+
+ // Get a random integer and indicate whether it is valid in CF.
+ RDRAND,
+
+ // Get a NIST SP800-90B & C compliant random integer and
+ // indicate whether it is valid in CF.
+ RDSEED,
+
+ // Protection keys
+ // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
+ // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
+ // value for ECX.
+ RDPKRU,
+ WRPKRU,
+
+ // SSE42 string comparisons.
+ // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
+ // will emit one or two instructions based on which results are used. If
+ // flags and index/mask this allows us to use a single instruction since
+ // we won't have to pick and opcode for flags. Instead we can rely on the
+ // DAG to CSE everything and decide at isel.
+ PCMPISTR,
+ PCMPESTR,
+
+ // Test if in transactional execution.
+ XTEST,
+
+ // ERI instructions.
+ RSQRT28,
+ RSQRT28_SAE,
+ RSQRT28S,
+ RSQRT28S_SAE,
+ RCP28,
+ RCP28_SAE,
+ RCP28S,
+ RCP28S_SAE,
+ EXP2,
+ EXP2_SAE,
+
+ // Conversions between float and half-float.
+ CVTPS2PH,
+ CVTPH2PS,
+ CVTPH2PS_SAE,
+
+ // Masked version of above.
+ // SRC, RND, PASSTHRU, MASK
+ MCVTPS2PH,
+
+ // Galois Field Arithmetic Instructions
+ GF2P8AFFINEINVQB,
+ GF2P8AFFINEQB,
+ GF2P8MULB,
+
+ // LWP insert record.
+ LWPINS,
+
+ // User level wait
+ UMWAIT,
+ TPAUSE,
+
+ // Enqueue Stores Instructions
+ ENQCMD,
+ ENQCMDS,
+
+ // For avx512-vp2intersect
+ VP2INTERSECT,
+
+ /// X86 strict FP compare instructions.
+ STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
+ STRICT_FCMPS,
+
+ // Vector packed double/float comparison.
+ STRICT_CMPP,
+
+ /// Vector comparison generating mask bits for fp and
+ /// integer signed and unsigned data types.
+ STRICT_CMPM,
+
+ // Vector float/double to signed/unsigned integer with truncation.
+ STRICT_CVTTP2SI,
+ STRICT_CVTTP2UI,
+
+ // Vector FP extend.
+ STRICT_VFPEXT,
+
+ // Vector FP round.
+ STRICT_VFPROUND,
+
+ // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
+ // Also used by the legacy (V)ROUND intrinsics where we mask out the
+ // scaling part of the immediate.
+ STRICT_VRNDSCALE,
+
+ // Vector signed/unsigned integer to float/double.
+ STRICT_CVTSI2P,
+ STRICT_CVTUI2P,
+
+ // Strict FMA nodes.
+ STRICT_FNMADD,
+ STRICT_FMSUB,
+ STRICT_FNMSUB,
+
+ // Conversions between float and half-float.
+ STRICT_CVTPS2PH,
+ STRICT_CVTPH2PS,
+
+ // Compare and swap.
+ LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ LCMPXCHG8_DAG,
+ LCMPXCHG16_DAG,
+ LCMPXCHG8_SAVE_EBX_DAG,
+ LCMPXCHG16_SAVE_RBX_DAG,
+
+ /// LOCK-prefixed arithmetic read-modify-write instructions.
+ /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
+ LADD,
+ LSUB,
+ LOR,
+ LXOR,
+ LAND,
+
+ // Load, scalar_to_vector, and zero extend.
+ VZEXT_LOAD,
+
+ // extract_vector_elt, store.
+ VEXTRACT_STORE,
+
+ // scalar broadcast from memory
+ VBROADCAST_LOAD,
+
+ // Store FP control world into i16 memory.
+ FNSTCW16m,
+
+ /// This instruction implements FP_TO_SINT with the
+ /// integer destination in memory and a FP reg source. This corresponds
+ /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+ /// has two inputs (token chain and address) and two outputs (int value
+ /// and token chain). Memory VT specifies the type to store to.
+ FP_TO_INT_IN_MEM,
+
+ /// This instruction implements SINT_TO_FP with the
+ /// integer source in memory and FP reg result. This corresponds to the
+ /// X86::FILD*m instructions. It has two inputs (token chain and address)
+ /// and two outputs (FP value and token chain). The integer source type is
+ /// specified by the memory VT.
+ FILD,
+
+ /// This instruction implements a fp->int store from FP stack
+ /// slots. This corresponds to the fist instruction. It takes a
+ /// chain operand, value to store, address, and glue. The memory VT
+ /// specifies the type to store as.
+ FIST,
+
+ /// This instruction implements an extending load to FP stack slots.
+ /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+ /// operand, and ptr to load from. The memory VT specifies the type to
+ /// load from.
+ FLD,
+
+ /// This instruction implements a truncating store from FP stack
+ /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+ /// chain operand, value to store, address, and glue. The memory VT
+ /// specifies the type to store as.
+ FST,
+
+ /// This instruction grabs the address of the next argument
+ /// from a va_list. (reads and modifies the va_list in memory)
+ VAARG_64,
+
+ // Vector truncating store with unsigned/signed saturation
+ VTRUNCSTOREUS,
+ VTRUNCSTORES,
+ // Vector truncating masked store with unsigned/signed saturation
+ VMTRUNCSTOREUS,
+ VMTRUNCSTORES,
+
+ // X86 specific gather and scatter
+ MGATHER,
+ MSCATTER,
+
+ // WARNING: Do not add anything in the end unless you want the node to
+ // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
+ // opcodes will be thought as target memory ops!
+ };
} // end namespace X86ISD
/// Define some predicates that are used for node matching.
@@ -717,7 +844,10 @@ namespace llvm {
/// If Op is a constant whose elements are all the same constant or
/// undefined, return true and return the constant value in \p SplatVal.
- bool isConstantSplat(SDValue Op, APInt &SplatVal);
+ /// If we have undef bits that don't cover an entire element, we treat these
+ /// as zero if AllowPartialUndefs is set, else we fail and return false.
+ bool isConstantSplat(SDValue Op, APInt &SplatVal,
+ bool AllowPartialUndefs = true);
} // end namespace X86
//===--------------------------------------------------------------------===//
@@ -756,19 +886,7 @@ namespace llvm {
unsigned getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const override;
- /// Returns the target specific optimal type for load
- /// and store operations as a result of memset, memcpy, and memmove
- /// lowering. If DstAlign is zero that means it's safe to destination
- /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
- /// means there isn't a need to check it against alignment requirement,
- /// probably because the source does not need to be loaded. If 'IsMemset' is
- /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
- /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
- /// source is constant so it does not need to be loaded.
- /// It returns EVT::Other if the type should be determined using generic
- /// target-independent logic.
- EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+ EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
/// Returns true if it's safe to use load / store of the
@@ -805,19 +923,6 @@ namespace llvm {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- // Return true if it is profitable to combine a BUILD_VECTOR with a
- // stride-pattern to a shuffle and a truncate.
- // Example of such a combine:
- // v4i32 build_vector((extract_elt V, 1),
- // (extract_elt V, 3),
- // (extract_elt V, 5),
- // (extract_elt V, 7))
- // -->
- // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
- // v4i64)
- bool isDesirableToCombineBuildVectorToShuffleTruncate(
- ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
-
/// Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
@@ -830,15 +935,12 @@ namespace llvm {
/// and some i16 instructions are slow.
bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
- /// Return 1 if we can compute the negated form of the specified expression
- /// for the same cost as the expression itself, or 2 if we can compute the
- /// negated form more cheaply than the expression itself. Else return 0.
- char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations,
- bool ForCodeSize, unsigned Depth) const override;
-
- /// If isNegatibleForFree returns true, return the newly negated expression.
+ /// Return the newly negated expression if the cost is not expensive and
+ /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
+ /// do the negation.
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOperations, bool ForCodeSize,
+ NegatibleCost &Cost,
unsigned Depth) const override;
MachineBasicBlock *
@@ -934,7 +1036,8 @@ namespace llvm {
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
- bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts,
TargetLoweringOpt &TLO) const override;
/// Determine which of the bits specified in Mask are known to be either
@@ -958,6 +1061,12 @@ namespace llvm {
TargetLoweringOpt &TLO,
unsigned Depth) const override;
+ bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
+ const APInt &DemandedElts,
+ unsigned MaskIndex,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const;
+
bool SimplifyDemandedBitsForTargetNode(SDValue Op,
const APInt &DemandedBits,
const APInt &DemandedElts,
@@ -1047,6 +1156,8 @@ namespace llvm {
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
+ /// This is used to enable splatted operand transforms for vector shifts
+ /// and vector funnel shifts.
bool isVectorShiftByScalarCheap(Type *Ty) const override;
/// Add x86-specific opcodes to the default list.
@@ -1075,6 +1186,10 @@ namespace llvm {
bool isZExtFree(EVT VT1, EVT VT2) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ bool shouldSinkOperands(Instruction *I,
+ SmallVectorImpl<Use *> &Ops) const override;
+ bool shouldConvertPhiType(Type *From, Type *To) const override;
+
/// Return true if folding a vector load into ExtVal (a sign, zero, or any
/// extend node) is profitable.
bool isVectorLoadExtDesirable(SDValue) const override;
@@ -1171,7 +1286,8 @@ namespace llvm {
/// Overflow nodes should get combined/lowered to optimal instructions
/// (they should allow eliminating explicit compares by getting flags from
/// math ops).
- bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
+ bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
+ bool MathUsed) const override;
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
unsigned AddrSpace) const override {
@@ -1194,12 +1310,12 @@ namespace llvm {
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override;
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
virtual bool needsFixedCatchObjects() const override;
@@ -1227,8 +1343,10 @@ namespace llvm {
/// offset as appropriate.
Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
- std::pair<SDValue, SDValue> BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
- SDValue StackSlot,
+ std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
+ SDValue Chain, SDValue Pointer,
+ MachinePointerInfo PtrInfo,
+ Align Alignment,
SelectionDAG &DAG) const;
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
@@ -1236,6 +1354,8 @@ namespace llvm {
/// Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
+ bool softPromoteHalfType() const override { return true; }
+
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
EVT VT) const override;
@@ -1251,6 +1371,8 @@ namespace llvm {
bool supportSwiftError() const override;
+ bool hasStackProbeSymbol(MachineFunction &MF) const override;
+ bool hasInlineStackProbe(MachineFunction &MF) const override;
StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
unsigned getStackProbeSize(MachineFunction &MF) const;
@@ -1314,7 +1436,7 @@ namespace llvm {
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
const SDLoc &dl, SelectionDAG &DAG,
const CCValAssign &VA,
- ISD::ArgFlagsTy Flags) const;
+ ISD::ArgFlagsTy Flags, bool isByval) const;
// Call lowering helpers.
@@ -1340,8 +1462,9 @@ namespace llvm {
unsigned getAddressSpace(void) const;
- SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned,
+ SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
SDValue &Chain) const;
+ SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -1365,8 +1488,8 @@ namespace llvm {
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
@@ -1431,7 +1554,7 @@ namespace llvm {
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
TargetLoweringBase::AtomicExpansionKind
- shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
+ shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
@@ -1464,18 +1587,15 @@ namespace llvm {
MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
- MachineBasicBlock *BB) const;
-
MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
- MachineBasicBlock *BB) const;
-
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+
MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
MachineBasicBlock *BB) const;
@@ -1497,32 +1617,25 @@ namespace llvm {
MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const;
- MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
- MachineBasicBlock *MBB) const;
-
MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *MBB) const;
- /// Convert a comparison if required by the subtarget.
- SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
-
/// Emit flags for the given setcc condition and operands. Also returns the
/// corresponding X86 condition code constant in X86CC.
SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG,
- SDValue &X86CC, SDValue &Chain,
- bool IsSignaling) const;
+ SDValue &X86CC) const;
/// Check if replacement of SQRT with RSQRT should be disabled.
- bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
+ bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
/// Use rsqrt* to speed up sqrt calculations.
- SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
int &RefinementSteps, bool &UseOneConstNR,
bool Reciprocal) const override;
/// Use rcp* to speed up fdiv calculations.
- SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const override;
/// Reassociate floating point divisions into multiply by reciprocal.
@@ -1537,101 +1650,14 @@ namespace llvm {
const TargetLibraryInfo *libInfo);
} // end namespace X86
- // Base class for all X86 non-masked store operations.
- class X86StoreSDNode : public MemSDNode {
- public:
- X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
- SDVTList VTs, EVT MemVT,
- MachineMemOperand *MMO)
- :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
- const SDValue &getValue() const { return getOperand(1); }
- const SDValue &getBasePtr() const { return getOperand(2); }
-
- static bool classof(const SDNode *N) {
- return N->getOpcode() == X86ISD::VTRUNCSTORES ||
- N->getOpcode() == X86ISD::VTRUNCSTOREUS;
- }
- };
-
- // Base class for all X86 masked store operations.
- // The class has the same order of operands as MaskedStoreSDNode for
- // convenience.
- class X86MaskedStoreSDNode : public MemSDNode {
- public:
- X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
- const DebugLoc &dl, SDVTList VTs, EVT MemVT,
- MachineMemOperand *MMO)
- : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
-
- const SDValue &getValue() const { return getOperand(1); }
- const SDValue &getBasePtr() const { return getOperand(2); }
- const SDValue &getMask() const { return getOperand(3); }
-
- static bool classof(const SDNode *N) {
- return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
- N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
- }
- };
-
- // X86 Truncating Store with Signed saturation.
- class TruncSStoreSDNode : public X86StoreSDNode {
- public:
- TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
- SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
- : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
-
- static bool classof(const SDNode *N) {
- return N->getOpcode() == X86ISD::VTRUNCSTORES;
- }
- };
-
- // X86 Truncating Store with Unsigned saturation.
- class TruncUSStoreSDNode : public X86StoreSDNode {
- public:
- TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
- SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
- : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
-
- static bool classof(const SDNode *N) {
- return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
- }
- };
-
- // X86 Truncating Masked Store with Signed saturation.
- class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
- public:
- MaskedTruncSStoreSDNode(unsigned Order,
- const DebugLoc &dl, SDVTList VTs, EVT MemVT,
- MachineMemOperand *MMO)
- : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
-
- static bool classof(const SDNode *N) {
- return N->getOpcode() == X86ISD::VMTRUNCSTORES;
- }
- };
-
- // X86 Truncating Masked Store with Unsigned saturation.
- class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
- public:
- MaskedTruncUSStoreSDNode(unsigned Order,
- const DebugLoc &dl, SDVTList VTs, EVT MemVT,
- MachineMemOperand *MMO)
- : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
-
- static bool classof(const SDNode *N) {
- return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
- }
- };
-
// X86 specific Gather/Scatter nodes.
// The class has the same order of operands as MaskedGatherScatterSDNode for
// convenience.
- class X86MaskedGatherScatterSDNode : public MemSDNode {
+ class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
public:
- X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
- const DebugLoc &dl, SDVTList VTs, EVT MemVT,
- MachineMemOperand *MMO)
- : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
+ // This is a intended as a utility and should never be directly created.
+ X86MaskedGatherScatterSDNode() = delete;
+ ~X86MaskedGatherScatterSDNode() = delete;
const SDValue &getBasePtr() const { return getOperand(3); }
const SDValue &getIndex() const { return getOperand(4); }
@@ -1646,11 +1672,6 @@ namespace llvm {
class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
public:
- X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
- EVT MemVT, MachineMemOperand *MMO)
- : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
- MMO) {}
-
const SDValue &getPassThru() const { return getOperand(1); }
static bool classof(const SDNode *N) {
@@ -1660,11 +1681,6 @@ namespace llvm {
class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
public:
- X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
- EVT MemVT, MachineMemOperand *MMO)
- : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
- MMO) {}
-
const SDValue &getValue() const { return getOperand(1); }
static bool classof(const SDNode *N) {
@@ -1673,47 +1689,15 @@ namespace llvm {
};
/// Generate unpacklo/unpackhi shuffle mask.
- template <typename T = int>
- void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
- bool Unary) {
- assert(Mask.empty() && "Expected an empty shuffle mask vector");
- int NumElts = VT.getVectorNumElements();
- int NumEltsInLane = 128 / VT.getScalarSizeInBits();
- for (int i = 0; i < NumElts; ++i) {
- unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
- int Pos = (i % NumEltsInLane) / 2 + LaneStart;
- Pos += (Unary ? 0 : NumElts * (i % 2));
- Pos += (Lo ? 0 : NumEltsInLane / 2);
- Mask.push_back(Pos);
- }
- }
-
- /// Helper function to scale a shuffle or target shuffle mask, replacing each
- /// mask index with the scaled sequential indices for an equivalent narrowed
- /// mask. This is the reverse process to canWidenShuffleElements, but can
- /// always succeed.
- template <typename T>
- void scaleShuffleMask(size_t Scale, ArrayRef<T> Mask,
- SmallVectorImpl<T> &ScaledMask) {
- assert(0 < Scale && "Unexpected scaling factor");
- size_t NumElts = Mask.size();
- ScaledMask.assign(NumElts * Scale, -1);
-
- for (size_t i = 0; i != NumElts; ++i) {
- int M = Mask[i];
-
- // Repeat sentinel values in every mask element.
- if (M < 0) {
- for (size_t s = 0; s != Scale; ++s)
- ScaledMask[(Scale * i) + s] = M;
- continue;
- }
-
- // Scale mask element and increment across each mask element.
- for (size_t s = 0; s != Scale; ++s)
- ScaledMask[(Scale * i) + s] = (Scale * M) + s;
- }
- }
+ void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
+ bool Unary);
+
+ /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
+ /// imposed by AVX and specific to the unary pattern. Example:
+ /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
+ /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
+ void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
index 0a79b793a980..1628f85da808 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -92,9 +92,7 @@ static bool IsCallReturnTwice(llvm::MachineOperand &MOp) {
if (!CalleeFn)
return false;
AttributeList Attrs = CalleeFn->getAttributes();
- if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice))
- return true;
- return false;
+ return Attrs.hasFnAttribute(Attribute::ReturnsTwice);
}
bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
@@ -138,17 +136,38 @@ bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
if (MBB.hasAddressTaken())
Changed |= addENDBR(MBB, MBB.begin());
- // Exception handle may indirectly jump to catch pad, So we should add
- // ENDBR before catch pad instructions.
- bool EHPadIBTNeeded = MBB.isEHPad();
-
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
if (I->isCall() && IsCallReturnTwice(I->getOperand(0)))
Changed |= addENDBR(MBB, std::next(I));
+ }
- if (EHPadIBTNeeded && I->isEHLabel()) {
+ // Exception handle may indirectly jump to catch pad, So we should add
+ // ENDBR before catch pad instructions. For SjLj exception model, it will
+ // create a new BB(new landingpad) indirectly jump to the old landingpad.
+ if (TM->Options.ExceptionModel == ExceptionHandling::SjLj) {
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+ // New Landingpad BB without EHLabel.
+ if (MBB.isEHPad()) {
+ if (I->isDebugInstr())
+ continue;
+ Changed |= addENDBR(MBB, I);
+ break;
+ } else if (I->isEHLabel()) {
+ // Old Landingpad BB (is not Landingpad now) with
+ // the the old "callee" EHLabel.
+ MCSymbol *Sym = I->getOperand(0).getMCSymbol();
+ if (!MF.hasCallSiteLandingPad(Sym))
+ continue;
+ Changed |= addENDBR(MBB, std::next(I));
+ break;
+ }
+ }
+ } else if (MBB.isEHPad()){
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+ if (!I->isEHLabel())
+ continue;
Changed |= addENDBR(MBB, std::next(I));
- EHPadIBTNeeded = false;
+ break;
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp
index 36b9c3ccc959..828887d96129 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp
@@ -29,6 +29,7 @@
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86Subtarget.h"
+#include "llvm/CodeGen/IndirectThunks.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -40,6 +41,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -56,23 +58,6 @@ static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_";
static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";
namespace {
-template <typename Derived> class ThunkInserter {
- Derived &getDerived() { return *static_cast<Derived *>(this); }
-
-protected:
- bool InsertedThunks;
- void doInitialization(Module &M) {}
- void createThunkFunction(MachineModuleInfo &MMI, StringRef Name);
-
-public:
- void init(Module &M) {
- InsertedThunks = false;
- getDerived().doInitialization(M);
- }
- // return `true` if `MMI` or `MF` was modified
- bool run(MachineModuleInfo &MMI, MachineFunction &MF);
-};
-
struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
const char *getThunkPrefix() { return RetpolineNamePrefix; }
bool mayUseThunk(const MachineFunction &MF) {
@@ -94,12 +79,9 @@ struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
createThunkFunction(MMI, R11LVIThunkName);
}
void populateThunk(MachineFunction &MF) {
- // Grab the entry MBB and erase any other blocks. O0 codegen appears to
- // generate two bbs for the entry block.
+ assert (MF.size() == 1);
MachineBasicBlock *Entry = &MF.front();
Entry->clear();
- while (MF.size() > 1)
- MF.erase(std::next(MF.begin()));
// This code mitigates LVI by replacing each indirect call/jump with a
// direct call/jump to a thunk that looks like:
@@ -128,12 +110,6 @@ public:
bool doInitialization(Module &M) override;
bool runOnMachineFunction(MachineFunction &MF) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineModuleInfoWrapperPass>();
- AU.addPreserved<MachineModuleInfoWrapperPass>();
- }
-
private:
std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;
@@ -224,12 +200,9 @@ void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
}
const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
- // Grab the entry MBB and erase any other blocks. O0 codegen appears to
- // generate two bbs for the entry block.
+ assert (MF.size() == 1);
MachineBasicBlock *Entry = &MF.front();
Entry->clear();
- while (MF.size() > 1)
- MF.erase(std::next(MF.begin()));
MachineBasicBlock *CaptureSpec =
MF.CreateMachineBasicBlock(Entry->getBasicBlock());
@@ -279,73 +252,6 @@ void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
}
-template <typename Derived>
-void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
- StringRef Name) {
- assert(Name.startswith(getDerived().getThunkPrefix()) &&
- "Created a thunk with an unexpected prefix!");
-
- Module &M = const_cast<Module &>(*MMI.getModule());
- LLVMContext &Ctx = M.getContext();
- auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
- Function *F =
- Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
- F->setVisibility(GlobalValue::HiddenVisibility);
- F->setComdat(M.getOrInsertComdat(Name));
-
- // Add Attributes so that we don't create a frame, unwind information, or
- // inline.
- AttrBuilder B;
- B.addAttribute(llvm::Attribute::NoUnwind);
- B.addAttribute(llvm::Attribute::Naked);
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
-
- // Populate our function a bit so that we can verify.
- BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
- IRBuilder<> Builder(Entry);
-
- Builder.CreateRetVoid();
-
- // MachineFunctions/MachineBasicBlocks aren't created automatically for the
- // IR-level constructs we already made. Create them and insert them into the
- // module.
- MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
- MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
-
- // Insert EntryMBB into MF. It's not in the module until we do this.
- MF.insert(MF.end(), EntryMBB);
- // Set MF properties. We never use vregs...
- MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
-}
-
-template <typename Derived>
-bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) {
- // If MF is not a thunk, check to see if we need to insert a thunk.
- if (!MF.getName().startswith(getDerived().getThunkPrefix())) {
- // If we've already inserted a thunk, nothing else to do.
- if (InsertedThunks)
- return false;
-
- // Only add a thunk if one of the functions has the corresponding feature
- // enabled in its subtarget, and doesn't enable external thunks.
- // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
- // nothing will end up calling it.
- // FIXME: It's a little silly to look at every function just to enumerate
- // the subtargets, but eventually we'll want to look at them for indirect
- // calls, so maybe this is OK.
- if (!getDerived().mayUseThunk(MF))
- return false;
-
- getDerived().insertThunks(MMI);
- InsertedThunks = true;
- return true;
- }
-
- // If this *is* a thunk function, we need to populate it with the correct MI.
- getDerived().populateThunk(MF);
- return true;
-}
-
FunctionPass *llvm::createX86IndirectThunksPass() {
return new X86IndirectThunks();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
index 2b1e3f23efd7..53925bbfd72f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertPrefetch.cpp
@@ -173,7 +173,7 @@ bool X86InsertPrefetch::doInitialization(Module &M) {
void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<MachineModuleInfoWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InsertWait.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertWait.cpp
new file mode 100644
index 000000000000..a82d98d88b30
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InsertWait.cpp
@@ -0,0 +1,151 @@
+//- X86Insertwait.cpp - Strict-Fp:Insert wait instruction X87 instructions --//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which insert x86 wait instructions after each
+// X87 instructions when strict float is enabled.
+//
+// The logic to insert a wait instruction after an X87 instruction is as below:
+// 1. If the X87 instruction don't raise float exception nor is a load/store
+// instruction, or is a x87 control instruction, don't insert wait.
+// 2. If the X87 instruction is an instruction which the following instruction
+// is an X87 exception synchronizing X87 instruction, don't insert wait.
+// 3. For other situations, insert wait instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-insert-wait"
+
+namespace {
+
+class WaitInsert : public MachineFunctionPass {
+public:
+ static char ID;
+
+ WaitInsert() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "X86 insert wait instruction";
+ }
+
+private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+};
+
+} // namespace
+
+char WaitInsert::ID = 0;
+
+FunctionPass *llvm::createX86InsertX87waitPass() { return new WaitInsert(); }
+
+/// Return true if the Reg is X87 register.
+static bool isX87Reg(unsigned Reg) {
+ return (Reg == X86::FPCW || Reg == X86::FPSW ||
+ (Reg >= X86::ST0 && Reg <= X86::ST7));
+}
+
+/// check if the instruction is X87 instruction
+static bool isX87Instruction(MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (isX87Reg(MO.getReg()))
+ return true;
+ }
+ return false;
+}
+
+static bool isX87ControlInstruction(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case X86::FNINIT:
+ case X86::FLDCW16m:
+ case X86::FNSTCW16m:
+ case X86::FNSTSW16r:
+ case X86::FNSTSWm:
+ case X86::FNCLEX:
+ case X86::FLDENVm:
+ case X86::FSTENVm:
+ case X86::FRSTORm:
+ case X86::FSAVEm:
+ case X86::FINCSTP:
+ case X86::FDECSTP:
+ case X86::FFREE:
+ case X86::FFREEP:
+ case X86::FNOP:
+ case X86::WAIT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool isX87NonWaitingControlInstruction(MachineInstr &MI) {
+ // a few special control instructions don't perform a wait operation
+ switch (MI.getOpcode()) {
+ case X86::FNINIT:
+ case X86::FNSTSW16r:
+ case X86::FNSTSWm:
+ case X86::FNSTCW16m:
+ case X86::FNCLEX:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getFunction().hasFnAttribute(Attribute::StrictFP))
+ return false;
+
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ TII = ST.getInstrInfo();
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) {
+ // Jump non X87 instruction.
+ if (!isX87Instruction(*MI))
+ continue;
+ // If the instruction instruction neither has float exception nor is
+ // a load/store instruction, or the instruction is x87 control
+ // instruction, do not insert wait.
+ if (!(MI->mayRaiseFPException() || MI->mayLoadOrStore()) ||
+ isX87ControlInstruction(*MI))
+ continue;
+ // If the following instruction is an X87 instruction and isn't an X87
+ // non-waiting control instruction, we can omit insert wait instruction.
+ MachineBasicBlock::iterator AfterMI = std::next(MI);
+ if (AfterMI != MBB.end() && isX87Instruction(*AfterMI) &&
+ !isX87NonWaitingControlInstruction(*AfterMI))
+ continue;
+
+ BuildMI(MBB, AfterMI, MI->getDebugLoc(), TII->get(X86::WAIT));
+ LLVM_DEBUG(dbgs() << "\nInsert wait after:\t" << *MI);
+ // Jump the newly inserting wait
+ ++MI;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td
new file mode 100644
index 000000000000..e26dd5050a23
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td
@@ -0,0 +1,119 @@
+//===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel AMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMX instructions
+
+let Predicates = [HasAMXTILE, In64BitMode] in {
+ let SchedRW = [WriteSystem] in {
+ let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
+ def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
+ "ldtilecfg\t$src",
+ [(int_x86_ldtilecfg addr:$src)]>, VEX, T8PS;
+ def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
+ "sttilecfg\t$src",
+ [(int_x86_sttilecfg addr:$src)]>, VEX, T8PD;
+ def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
+ (ins sibmem:$src),
+ "tileloadd\t{$src, $dst|$dst, $src}", []>,
+ VEX, T8XD;
+ def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
+ (ins sibmem:$src),
+ "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
+ VEX, T8PD;
+ let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
+ def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
+ "tilerelease", [(int_x86_tilerelease)]>, VEX, T8PS;
+ def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs),
+ (ins sibmem:$dst, TILE:$src),
+ "tilestored\t{$src, $dst|$dst, $src}", []>,
+ VEX, T8XS;
+ def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
+ "tilezero\t$dst", []>,
+ VEX, T8XD;
+
+ let usesCustomInserter = 1 in {
+ // Pseudo instructions, using immediates instead of tile registers.
+ // To be translated to the actual instructions in X86ISelLowering.cpp
+ def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
+ def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
+ sibmem:$src2), []>;
+ def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
+ def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
+ [(int_x86_tilezero imm:$src)]>;
+ }
+ } // SchedRW
+} // HasAMXTILE
+
+let Predicates = [HasAMXINT8, In64BitMode] in {
+ let SchedRW = [WriteSystem] in {
+ let Constraints = "$src1 = $dst" in {
+ def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
+ (ins TILE:$src1, TILE:$src2, TILE:$src3),
+ "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ VEX_4V, T8XD;
+ def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
+ (ins TILE:$src1, TILE:$src2, TILE:$src3),
+ "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ VEX_4V, T8XS;
+ def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
+ (ins TILE:$src1, TILE:$src2, TILE:$src3),
+ "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ VEX_4V, T8PD;
+ def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
+ (ins TILE:$src1, TILE:$src2, TILE:$src3),
+ "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
+ VEX_4V, T8PS;
+ }
+
+ let usesCustomInserter = 1 in {
+ // Pseudo instructions, using immediates instead of tile registers.
+ // To be translated to the actual instructions in X86ISelLowering.cpp
+ def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1,
+ u8imm:$src2, u8imm:$src3),
+ [(int_x86_tdpbssd imm:$src1,
+ imm:$src2, imm:$src3)]>;
+ def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1,
+ u8imm:$src2, u8imm:$src3),
+ [(int_x86_tdpbsud imm:$src1,
+ imm:$src2, imm:$src3)]>;
+ def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1,
+ u8imm:$src2, u8imm:$src3),
+ [(int_x86_tdpbusd imm:$src1,
+ imm:$src2, imm:$src3)]>;
+ def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1,
+ u8imm:$src2, u8imm:$src3),
+ [(int_x86_tdpbuud imm:$src1,
+ imm:$src2, imm:$src3)]>;
+ }
+ }
+} // HasAMXTILE
+
+let Predicates = [HasAMXBF16, In64BitMode] in {
+ let SchedRW = [WriteSystem] in {
+ let Constraints = "$src1 = $dst" in
+ def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
+ (ins TILE:$src1, TILE:$src2, TILE:$src3),
+ "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ []>, VEX_4V, T8XS;
+
+ let usesCustomInserter = 1 in {
+ // Pseudo instructions, using immediates instead of tile registers.
+ // To be translated to the actual instructions in X86ISelLowering.cpp
+ def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1,
+ u8imm:$src2, u8imm:$src3),
+ [(int_x86_tdpbf16ps imm:$src1,
+ imm:$src2, imm:$src3)]>;
+ }
+ }
+} // HasAMXTILE, HasAMXBF16
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
index 32f012033fb0..a3ad0b1c8dd6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -76,11 +76,11 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
- ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
- !cast<ComplexPattern>("sse_load_f32"),
- !if (!eq (EltTypeName, "f64"),
- !cast<ComplexPattern>("sse_load_f64"),
- ?));
+ PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"),
+ !cast<PatFrags>("sse_load_f32"),
+ !if (!eq (EltTypeName, "f64"),
+ !cast<PatFrags>("sse_load_f64"),
+ ?));
// The string to specify embedded broadcast in assembly.
string BroadcastStr = "{1to" # NumElts # "}";
@@ -169,6 +169,18 @@ def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
+// Used for matching masked operations. Ensures the operation part only has a
+// single use.
+def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
+ (vselect node:$mask, node:$src1, node:$src2), [{
+ return isProfitableToFormMaskedOp(N);
+}]>;
+
+def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
+ (X86selects node:$mask, node:$src1, node:$src2), [{
+ return isProfitableToFormMaskedOp(N);
+}]>;
+
// This multiclass generates the masking variants from the non-masking
// variant. It only provides the assembly pieces for the masking variants.
// It assumes custom ISel patterns for masking which can be provided as
@@ -220,7 +232,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskingRHS,
- SDNode Select = vselect,
+ SDPatternOperator Select = vselect_mask,
string MaskingConstraint = "",
bit IsCommutable = 0,
bit IsKCommutable = 0,
@@ -236,35 +248,36 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
-// perserved vector elements come from a new dummy input operand tied to $dst.
+// preserved vector elements come from a new dummy input operand tied to $dst.
// This version uses a separate dag for non-masking and masking.
multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskRHS,
bit IsCommutable = 0, bit IsKCommutable = 0,
- SDNode Select = vselect> :
+ bit IsKZCommutable = IsCommutable> :
AVX512_maskable_custom<O, F, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm,
[(set _.RC:$dst, RHS)],
[(set _.RC:$dst,
- (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
+ (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
[(set _.RC:$dst,
- (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
- "$src0 = $dst", IsCommutable, IsKCommutable>;
+ (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
+ "$src0 = $dst", IsCommutable, IsKCommutable,
+ IsKZCommutable>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
-// perserved vector elements come from a new dummy input operand tied to $dst.
+// preserved vector elements come from a new dummy input operand tied to $dst.
multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS,
bit IsCommutable = 0, bit IsKCommutable = 0,
bit IsKZCommutable = IsCommutable,
- SDNode Select = vselect> :
+ SDPatternOperator Select = vselect_mask> :
AVX512_maskable_common<O, F, _, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
@@ -280,7 +293,7 @@ multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
string AttSrcAsm, string IntelSrcAsm,
dag RHS> :
AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
- RHS, 0, 0, 0, X86selects>;
+ RHS, 0, 0, 0, X86selects_mask>;
// Similar to AVX512_maskable but in this case one of the source operands
// ($src1) is already tied to $dst so we just use that for the preserved
@@ -292,7 +305,7 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
dag RHS,
bit IsCommutable = 0,
bit IsKCommutable = 0,
- SDNode Select = vselect,
+ SDPatternOperator Select = vselect_mask,
bit MaskOnly = 0> :
AVX512_maskable_common<O, F, _, Outs,
!con((ins _.RC:$src1), NonTiedIns),
@@ -317,9 +330,9 @@ multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
- (vselect InVT.KRCWM:$mask, RHS,
+ (vselect_mask InVT.KRCWM:$mask, RHS,
(bitconvert InVT.RC:$src1)),
- vselect, "", IsCommutable>;
+ vselect_mask, "", IsCommutable>;
multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
@@ -330,7 +343,7 @@ multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
bit MaskOnly = 0> :
AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
- X86selects, MaskOnly>;
+ X86selects_mask, MaskOnly>;
multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins,
@@ -399,6 +412,36 @@ multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(and _.KRCWM:$mask, RHS_su), IsCommutable>;
+// Used by conversion instructions.
+multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs,
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
+ AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
+ AttSrcAsm, IntelSrcAsm,
+ [(set _.RC:$dst, RHS)],
+ [(set _.RC:$dst, MaskingRHS)],
+ [(set _.RC:$dst, ZeroMaskingRHS)],
+ "$src0 = $dst">;
+
+multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag NonTiedIns, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, dag MaskingRHS, bit IsCommutable,
+ bit IsKCommutable> :
+ AVX512_maskable_custom<O, F, Outs,
+ !con((ins _.RC:$src1), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm,
+ [(set _.RC:$dst, RHS)],
+ [(set _.RC:$dst,
+ (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
+ [(set _.RC:$dst,
+ (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
+ "", IsCommutable, IsKCommutable>;
// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
@@ -625,45 +668,45 @@ multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
list<Predicate> p> {
let Predicates = p in {
def : Pat<(Cast.VT
- (vselect Cast.KRCWM:$mask,
- (bitconvert
- (vinsert_insert:$ins (To.VT To.RC:$src1),
- (From.VT From.RC:$src2),
- (iPTR imm))),
- Cast.RC:$src0)),
+ (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (vinsert_insert:$ins (To.VT To.RC:$src1),
+ (From.VT From.RC:$src2),
+ (iPTR imm))),
+ Cast.RC:$src0)),
(!cast<Instruction>(InstrStr#"rrk")
Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
(INSERT_get_vinsert_imm To.RC:$ins))>;
def : Pat<(Cast.VT
- (vselect Cast.KRCWM:$mask,
- (bitconvert
- (vinsert_insert:$ins (To.VT To.RC:$src1),
- (From.VT
- (bitconvert
- (From.LdFrag addr:$src2))),
- (iPTR imm))),
- Cast.RC:$src0)),
+ (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (vinsert_insert:$ins (To.VT To.RC:$src1),
+ (From.VT
+ (bitconvert
+ (From.LdFrag addr:$src2))),
+ (iPTR imm))),
+ Cast.RC:$src0)),
(!cast<Instruction>(InstrStr#"rmk")
Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
(INSERT_get_vinsert_imm To.RC:$ins))>;
def : Pat<(Cast.VT
- (vselect Cast.KRCWM:$mask,
- (bitconvert
- (vinsert_insert:$ins (To.VT To.RC:$src1),
- (From.VT From.RC:$src2),
- (iPTR imm))),
- Cast.ImmAllZerosV)),
+ (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (vinsert_insert:$ins (To.VT To.RC:$src1),
+ (From.VT From.RC:$src2),
+ (iPTR imm))),
+ Cast.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#"rrkz")
Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
(INSERT_get_vinsert_imm To.RC:$ins))>;
def : Pat<(Cast.VT
- (vselect Cast.KRCWM:$mask,
- (bitconvert
- (vinsert_insert:$ins (To.VT To.RC:$src1),
- (From.VT (From.LdFrag addr:$src2)),
- (iPTR imm))),
- Cast.ImmAllZerosV)),
+ (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (vinsert_insert:$ins (To.VT To.RC:$src1),
+ (From.VT (From.LdFrag addr:$src2)),
+ (iPTR imm))),
+ Cast.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#"rmkz")
Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
(INSERT_get_vinsert_imm To.RC:$ins))>;
@@ -981,20 +1024,20 @@ multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
SDNodeXForm EXTRACT_get_vextract_imm,
list<Predicate> p> {
let Predicates = p in {
- def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
- (bitconvert
- (To.VT (vextract_extract:$ext
- (From.VT From.RC:$src), (iPTR imm)))),
- To.RC:$src0)),
+ def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (To.VT (vextract_extract:$ext
+ (From.VT From.RC:$src), (iPTR imm)))),
+ To.RC:$src0)),
(Cast.VT (!cast<Instruction>(InstrStr#"rrk")
Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
(EXTRACT_get_vextract_imm To.RC:$ext)))>;
- def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
- (bitconvert
- (To.VT (vextract_extract:$ext
- (From.VT From.RC:$src), (iPTR imm)))),
- Cast.ImmAllZerosV)),
+ def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
+ (bitconvert
+ (To.VT (vextract_extract:$ext
+ (From.VT From.RC:$src), (iPTR imm)))),
+ Cast.ImmAllZerosV)),
(Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
Cast.KRCWM:$mask, From.RC:$src,
(EXTRACT_get_vextract_imm To.RC:$ext)))>;
@@ -1101,18 +1144,18 @@ multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
string Name,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
- (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
+ (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
(SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
- def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
- (X86VBroadcast SrcInfo.FRC:$src),
- DestInfo.RC:$src0)),
- (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
+ def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.RC:$src0)),
+ (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
(SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
- def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
- (X86VBroadcast SrcInfo.FRC:$src),
- DestInfo.ImmAllZerosV)),
- (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
+ def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.ImmAllZerosV)),
+ (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
}
@@ -1128,83 +1171,83 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
SDPatternOperator UnmaskedOp = X86VBroadcast,
SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
let hasSideEffects = 0 in
- def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set MaskInfo.RC:$dst,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
- DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
- def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
- (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
- "${dst} {${mask}} {z}, $src}"),
- [(set MaskInfo.RC:$dst,
- (vselect MaskInfo.KRCWM:$mask,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
- MaskInfo.ImmAllZerosV))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
- let Constraints = "$src0 = $dst" in
- def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
- (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
- SrcInfo.RC:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
- "${dst} {${mask}}, $src}"),
+ def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set MaskInfo.RC:$dst,
- (vselect MaskInfo.KRCWM:$mask,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
- MaskInfo.RC:$src0))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
+ DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
+ def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
+ "${dst} {${mask}} {z}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect_mask MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
+ MaskInfo.ImmAllZerosV))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
+ let Constraints = "$src0 = $dst" in
+ def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
+ SrcInfo.RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect_mask MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
+ MaskInfo.RC:$src0))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
let hasSideEffects = 0, mayLoad = 1 in
- def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
- (ins SrcInfo.ScalarMemOp:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set MaskInfo.RC:$dst,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (UnmaskedBcastOp addr:$src)))))],
- DestInfo.ExeDomain>, T8PD, EVEX,
- EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
-
- def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
- (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
- "${dst} {${mask}} {z}, $src}"),
- [(set MaskInfo.RC:$dst,
- (vselect MaskInfo.KRCWM:$mask,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (SrcInfo.BroadcastLdFrag addr:$src)))),
- MaskInfo.ImmAllZerosV))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
- EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
+ def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
+ (ins SrcInfo.ScalarMemOp:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (UnmaskedBcastOp addr:$src)))))],
+ DestInfo.ExeDomain>, T8PD, EVEX,
+ EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
+
+ def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
+ "${dst} {${mask}} {z}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect_mask MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (SrcInfo.BroadcastLdFrag addr:$src)))),
+ MaskInfo.ImmAllZerosV))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
+ EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
let Constraints = "$src0 = $dst",
isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
- def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
- (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
- SrcInfo.ScalarMemOp:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
- "${dst} {${mask}}, $src}"),
- [(set MaskInfo.RC:$dst,
- (vselect MaskInfo.KRCWM:$mask,
- (MaskInfo.VT
- (bitconvert
- (DestInfo.VT
- (SrcInfo.BroadcastLdFrag addr:$src)))),
- MaskInfo.RC:$src0))],
- DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
- EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
+ def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
+ (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
+ SrcInfo.ScalarMemOp:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src}"),
+ [(set MaskInfo.RC:$dst,
+ (vselect_mask MaskInfo.KRCWM:$mask,
+ (MaskInfo.VT
+ (bitconvert
+ (DestInfo.VT
+ (SrcInfo.BroadcastLdFrag addr:$src)))),
+ MaskInfo.RC:$src0))],
+ DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
+ EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
}
// Helper class to force mask and broadcast result to same type.
@@ -1267,35 +1310,38 @@ defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
X86VectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC> {
+ // Fold with a mask even if it has multiple uses since it is cheap.
let ExeDomain = _.ExeDomain in
- defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins SrcRC:$src),
- "vpbroadcast"##_.Suffix, "$src", "$src",
- (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
- Sched<[SchedRR]>;
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins SrcRC:$src),
+ "vpbroadcast"#_.Suffix, "$src", "$src",
+ (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
+ /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
+ T8PD, EVEX, Sched<[SchedRR]>;
}
multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
X86VectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC, SubRegIndex Subreg> {
let hasSideEffects = 0, ExeDomain = _.ExeDomain in
- defm r : AVX512_maskable_custom<opc, MRMSrcReg,
- (outs _.RC:$dst), (ins GR32:$src),
- !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
- !con((ins _.KRCWM:$mask), (ins GR32:$src)),
- "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
- "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
+ defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
+ (outs _.RC:$dst), (ins GR32:$src),
+ !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
+ !con((ins _.KRCWM:$mask), (ins GR32:$src)),
+ "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
+ "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
def : Pat <(_.VT (OpNode SrcRC:$src)),
- (!cast<Instruction>(Name#r)
+ (!cast<Instruction>(Name#rr)
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
+ // Fold with a mask even if it has multiple uses since it is cheap.
def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
- (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
+ (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
- (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
+ (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
}
@@ -1392,72 +1438,6 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
AVX5128IBase, EVEX;
}
-let Predicates = [HasAVX512] in {
- // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
- def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
- (VPBROADCASTQZm addr:$src)>;
-
- // FIXME this is to handle aligned extloads from i8.
- def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDZm addr:$src)>;
-}
-
-let Predicates = [HasVLX] in {
- // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
- def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
- (VPBROADCASTQZ128m addr:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
- (VPBROADCASTQZ256m addr:$src)>;
-
- // FIXME this is to handle aligned extloads from i8.
- def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDZ128m addr:$src)>;
- def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDZ256m addr:$src)>;
-}
-let Predicates = [HasVLX, HasBWI] in {
- // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
- // This means we'll encounter truncated i32 loads; match that here.
- def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWZ128m addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWZ256m addr:$src)>;
- def : Pat<(v8i16 (X86VBroadcast
- (i16 (trunc (i32 (extloadi16 addr:$src)))))),
- (VPBROADCASTWZ128m addr:$src)>;
- def : Pat<(v8i16 (X86VBroadcast
- (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
- (VPBROADCASTWZ128m addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast
- (i16 (trunc (i32 (extloadi16 addr:$src)))))),
- (VPBROADCASTWZ256m addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast
- (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
- (VPBROADCASTWZ256m addr:$src)>;
-
- // FIXME this is to handle aligned extloads from i8.
- def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWZ128m addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWZ256m addr:$src)>;
-}
-let Predicates = [HasBWI] in {
- // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
- // This means we'll encounter truncated i32 loads; match that here.
- def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWZm addr:$src)>;
- def : Pat<(v32i16 (X86VBroadcast
- (i16 (trunc (i32 (extloadi16 addr:$src)))))),
- (VPBROADCASTWZm addr:$src)>;
- def : Pat<(v32i16 (X86VBroadcast
- (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
- (VPBROADCASTWZm addr:$src)>;
-
- // FIXME this is to handle aligned extloads from i8.
- def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWZm addr:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST SUBVECTORS
//
@@ -1516,38 +1496,38 @@ def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
// Patterns for selects of bitcasted operations.
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- (v16f32 immAllZerosV)),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ (v16f32 immAllZerosV)),
(VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ VR512:$src0),
(VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
- (v16i32 immAllZerosV)),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ (v16i32 immAllZerosV)),
(VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ VR512:$src0),
(VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
- (v8f64 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
+ (v8f64 immAllZerosV)),
(VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
+ VR512:$src0),
(VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
- (v8i64 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
+ (v8i64 immAllZerosV)),
(VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
+ VR512:$src0),
(VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
@@ -1569,21 +1549,21 @@ def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
// Patterns for selects of bitcasted operations.
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- (v8f32 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ (v8f32 immAllZerosV)),
(VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- VR256X:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ VR256X:$src0),
(VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
- (v8i32 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ (v8i32 immAllZerosV)),
(VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
- VR256X:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ VR256X:$src0),
(VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
@@ -1618,21 +1598,21 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2"
EVEX_V256, EVEX_CD8<64, CD8VT2>;
// Patterns for selects of bitcasted operations.
-def : Pat<(vselect VK4WM:$mask,
- (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- (v4f64 immAllZerosV)),
+def : Pat<(vselect_mask VK4WM:$mask,
+ (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ (v4f64 immAllZerosV)),
(VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
-def : Pat<(vselect VK4WM:$mask,
- (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- VR256X:$src0),
+def : Pat<(vselect_mask VK4WM:$mask,
+ (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ VR256X:$src0),
(VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
-def : Pat<(vselect VK4WM:$mask,
- (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- (v4i64 immAllZerosV)),
+def : Pat<(vselect_mask VK4WM:$mask,
+ (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ (v4i64 immAllZerosV)),
(VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
-def : Pat<(vselect VK4WM:$mask,
- (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- VR256X:$src0),
+def : Pat<(vselect_mask VK4WM:$mask,
+ (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ VR256X:$src0),
(VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
}
@@ -1651,38 +1631,38 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
EVEX_V512, EVEX_CD8<32, CD8VT8>;
// Patterns for selects of bitcasted operations.
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
- (v16f32 immAllZerosV)),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
+ (v16f32 immAllZerosV)),
(VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
+ VR512:$src0),
(VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
- (v16i32 immAllZerosV)),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
+ (v16i32 immAllZerosV)),
(VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK16WM:$mask,
- (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
+ VR512:$src0),
(VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- (v8f64 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ (v8f64 immAllZerosV)),
(VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ VR512:$src0),
(VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- (v8i64 immAllZerosV)),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ (v8i64 immAllZerosV)),
(VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
-def : Pat<(vselect VK8WM:$mask,
- (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- VR512:$src0),
+def : Pat<(vselect_mask VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
+ VR512:$src0),
(VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
@@ -1836,24 +1816,27 @@ defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
X86VectorVTInfo IdxVT,
X86VectorVTInfo CastVT> {
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermt2 (_.VT _.RC:$src2),
- (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
- (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
+ (X86VPermt2 (_.VT _.RC:$src2),
+ (IdxVT.VT (bitconvert
+ (CastVT.VT _.RC:$src1))),
+ _.RC:$src3),
+ (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermt2 _.RC:$src2,
- (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
- (_.LdFrag addr:$src3)),
- (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
+ (X86VPermt2 _.RC:$src2,
+ (IdxVT.VT (bitconvert
+ (CastVT.VT _.RC:$src1))),
+ (_.LdFrag addr:$src3)),
+ (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86VPermt2 _.RC:$src2,
- (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
- (_.BroadcastLdFrag addr:$src3)),
- (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
+ (X86VPermt2 _.RC:$src2,
+ (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
+ (_.BroadcastLdFrag addr:$src3)),
+ (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
(!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3)>;
}
@@ -2085,9 +2068,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
(ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
- (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
+ (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
timm:$cc),
- (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
+ (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
@@ -2646,13 +2629,13 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
(i32 timm:$src2)))]>,
Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#
+ OpcodeStr#_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclasss_su (_.VT _.RC:$src1),
@@ -2660,18 +2643,18 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##
+ OpcodeStr#_.Suffix#
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,
- (X86Vfpclasss _.ScalarIntMemCPat:$src1,
- (i32 timm:$src2)))]>,
+ (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
+ (i32 timm:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##
+ OpcodeStr#_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
- (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
+ (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
(i32 timm:$src2))))]>,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -2686,13 +2669,13 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
(i32 timm:$src2)))]>,
Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#
+ OpcodeStr#_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclass_su (_.VT _.RC:$src1),
@@ -2700,7 +2683,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"{"#mem#"}"#
+ OpcodeStr#_.Suffix#"{"#mem#"}"#
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclass
(_.VT (_.LdFrag addr:$src1)),
@@ -2708,7 +2691,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"{"#mem#"}"#
+ OpcodeStr#_.Suffix#"{"#mem#"}"#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
(_.VT (_.LdFrag addr:$src1)),
@@ -2716,18 +2699,18 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
- _.BroadcastStr##", $dst|$dst, ${src1}"
- ##_.BroadcastStr##", $src2}",
+ OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
+ _.BroadcastStr#", $dst|$dst, ${src1}"
+ #_.BroadcastStr#", $src2}",
[(set _.KRC:$dst,(X86Vfpclass
(_.VT (_.BroadcastLdFrag addr:$src1)),
(i32 timm:$src2)))]>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
- _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
- _.BroadcastStr##", $src2}",
+ OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
+ _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
+ _.BroadcastStr#", $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
(_.VT (_.BroadcastLdFrag addr:$src1)),
(i32 timm:$src2))))]>,
@@ -2979,6 +2962,8 @@ def : Pat<(vnot VK4:$src),
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
def : Pat<(vnot VK2:$src),
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
+def : Pat<(vnot VK1:$src),
+ (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
// Mask binary operation
// - KAND, KANDN, KOR, KXNOR, KXOR
@@ -3008,8 +2993,6 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
}
-def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
-def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
// These nodes use 'vnot' instead of 'not' to support vectors.
def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
@@ -3022,7 +3005,7 @@ defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XM
defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
-multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
+multiclass avx512_binop_pat<SDPatternOperator VOpNode,
Instruction Inst> {
// With AVX512F, 8-bit mask is promoted to 16-bit mask,
// for the DQI set, this type is legal and KxxxB instruction is used
@@ -3033,25 +3016,25 @@ multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
(COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
// All types smaller than 8 bits require conversion anyway
- def : Pat<(OpNode VK1:$src1, VK1:$src2),
+ def : Pat<(VOpNode VK1:$src1, VK1:$src2),
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK1:$src1, VK16),
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
def : Pat<(VOpNode VK2:$src1, VK2:$src2),
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK2:$src1, VK16),
- (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
+ (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
def : Pat<(VOpNode VK4:$src1, VK4:$src2),
(COPY_TO_REGCLASS (Inst
(COPY_TO_REGCLASS VK4:$src1, VK16),
- (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
+ (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
}
-defm : avx512_binop_pat<and, and, KANDWrr>;
-defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
-defm : avx512_binop_pat<or, or, KORWrr>;
-defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
-defm : avx512_binop_pat<xor, xor, KXORWrr>;
+defm : avx512_binop_pat<and, KANDWrr>;
+defm : avx512_binop_pat<vandn, KANDNWrr>;
+defm : avx512_binop_pat<or, KORWrr>;
+defm : avx512_binop_pat<vxnor, KXNORWrr>;
+defm : avx512_binop_pat<xor, KXORWrr>;
// Mask unpacking
multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
@@ -3065,7 +3048,7 @@ multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
VEX_4V, VEX_L, Sched<[sched]>;
def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
- (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
+ (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
}
}
@@ -3201,8 +3184,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
-def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2), timm:$cc)),
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT Narrow.RC:$src2), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrri")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3219,8 +3202,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
timm:$cc), Narrow.KRC)>;
// Broadcast load.
-def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmbi")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3235,8 +3218,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
addr:$src2, timm:$cc), Narrow.KRC)>;
// Commuted with broadcast load.
-def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
- (Narrow.VT Narrow.RC:$src1), timm:$cc)),
+def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
+ (Narrow.VT Narrow.RC:$src1), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrmbi")
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3301,7 +3284,7 @@ multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
let Predicates = [HasAVX512] in
let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
SchedRW = [WriteZero] in
- def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
+ def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
[(set KRC:$dst, (VT Val))]>;
}
@@ -3409,7 +3392,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
"${dst} {${mask}}, $src1}"),
[(set _.RC:$dst, (_.VT
- (vselect _.KRCWM:$mask,
+ (vselect_mask _.KRCWM:$mask,
(_.VT (ld_frag addr:$src1)),
(_.VT _.RC:$src0))))], _.ExeDomain>,
EVEX, EVEX_K, Sched<[Sched.RM]>;
@@ -3418,18 +3401,18 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
(ins _.KRCWM:$mask, _.MemOp:$src),
OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
"${dst} {${mask}} {z}, $src}",
- [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
+ [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
(_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
_.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
}
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
- (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
+ (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
- (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
+ (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
- (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
+ (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
_.KRCWM:$mask, addr:$ptr)>;
}
@@ -4286,6 +4269,17 @@ def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0)))
def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
(COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
+
+def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
+ (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
+def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
+ (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
+
+def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
+ (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
+def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
+ (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
+
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
@@ -4439,8 +4433,6 @@ let Predicates = [HasAVX512] in {
(VMOV64toPQIZrr GR64:$src)>;
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
- (VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzload32 addr:$src)),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v8i32 (X86vzload32 addr:$src)),
@@ -4624,8 +4616,8 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
(_.VT (OpNode _.RC:$src1,
(_.BroadcastLdFrag addr:$src2)))>,
AVX512BIBase, EVEX_4V, EVEX_B,
@@ -4750,8 +4742,8 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
OpcodeStr,
- "${src2}"##_Brdct.BroadcastStr##", $src1",
- "$src1, ${src2}"##_Brdct.BroadcastStr,
+ "${src2}"#_Brdct.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_Brdct.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
(_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
AVX512BIBase, EVEX_4V, EVEX_B,
@@ -4822,8 +4814,8 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
OpcodeStr,
- "${src2}"##_Src.BroadcastStr##", $src1",
- "$src1, ${src2}"##_Src.BroadcastStr,
+ "${src2}"#_Src.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_Src.BroadcastStr,
(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
(_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
@@ -5159,26 +5151,26 @@ multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
X86VectorVTInfo _,
X86VectorVTInfo IntInfo> {
// Masked register-register logical operations.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, _.RC:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
_.RC:$src2)>;
// Masked register-memory logical operations.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert (IntInfo.VT (OpNode _.RC:$src1,
(load addr:$src2)))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert (IntInfo.VT (OpNode _.RC:$src1,
(load addr:$src2)))),
_.ImmAllZerosV)),
@@ -5190,14 +5182,14 @@ multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
X86VectorVTInfo _,
X86VectorVTInfo IntInfo> {
// Register-broadcast logical operations.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert
(IntInfo.VT (OpNode _.RC:$src1,
(IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(bitconvert
(IntInfo.VT (OpNode _.RC:$src1,
(IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
@@ -5304,7 +5296,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
- _.ScalarIntMemCPat:$src2))>,
+ (_.ScalarIntMemFrags addr:$src2)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -5350,7 +5342,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
- _.ScalarIntMemCPat:$src2))>,
+ (_.ScalarIntMemFrags addr:$src2)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let isCodeGenOnly = 1, Predicates = [HasAVX512],
@@ -5463,28 +5455,32 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
EVEX_CD8<64, CD8VT1>, SIMD_EXC;
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode,
X86VectorVTInfo _, X86FoldableSchedWrite sched,
bit IsCommutable,
bit IsKCommutable = IsCommutable> {
let ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
IsKCommutable, IsKCommutable>,
EVEX_4V, Sched<[sched]>;
let mayLoad = 1 in {
- defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
- (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
+ (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
- defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
- (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
+ defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
+ (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
EVEX_4V, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -5496,7 +5492,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
@@ -5507,38 +5503,39 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
EVEX_4V, EVEX_B, Sched<[sched]>;
}
multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode,
Predicate prd, X86SchedWriteSizes sched,
bit IsCommutable = 0,
bit IsPD128Commutable = IsCommutable> {
let Predicates = [prd] in {
- defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
+ defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
+ defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
// Define only if AVX512VL feature is present.
let Predicates = [prd, HasVLX] in {
- defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
+ defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
EVEX_CD8<32, CD8VF>;
- defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
+ defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
EVEX_CD8<32, CD8VF>;
- defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
+ defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
sched.PD.XMM, IsPD128Commutable,
IsCommutable>, EVEX_V128, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
- defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
+ defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
@@ -5566,38 +5563,38 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd
EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
}
-defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512,
+defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
SchedWriteFAddSizes, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
-defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512,
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
SchedWriteFMulSizes, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512,
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
SchedWriteFAddSizes>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
-defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512,
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
SchedWriteFDivSizes>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
-defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
+defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
SchedWriteFCmpSizes, 0>,
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
-defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
+defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
SchedWriteFCmpSizes, 0>,
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
let isCodeGenOnly = 1 in {
- defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
+ defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
SchedWriteFCmpSizes, 1>;
- defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
+ defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
SchedWriteFCmpSizes, 1>;
}
let Uses = []<Register>, mayRaiseFPException = 0 in {
-defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
+defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
-defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
+defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
SchedWriteFLogicSizes, 0>;
-defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
+defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
-defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
+defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
}
@@ -5605,19 +5602,19 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
EVEX_4V, Sched<[sched]>;
defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
(OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -5627,14 +5624,14 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
Sched<[sched]>;
defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
"$src2, $src1", "$src1, $src2",
- (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
+ (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -5648,11 +5645,11 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr
avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
- avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
X86scalefsRnd, sched.Scl>,
EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
- avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
X86scalefsRnd, sched.Scl>,
EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
@@ -5679,7 +5676,7 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
string Name> {
// NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
- // There are just too many permuations due to commutability and bitcasts.
+ // There are just too many permutations due to commutability and bitcasts.
let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
@@ -5701,8 +5698,8 @@ multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
(null_frag), (null_frag)>,
EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -5790,7 +5787,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
let ExeDomain = _.ExeDomain in
defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
- "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
+ "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
(_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
EVEX_B, Sched<[sched.Folded]>;
}
@@ -5973,8 +5970,8 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
let ExeDomain = _.ExeDomain in
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
(_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -6245,8 +6242,8 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
(_.VT (OpNode
_.RC:$src1,
(Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
@@ -6370,9 +6367,6 @@ defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
let Predicates = [HasAVX512] in {
// VMOVHPD patterns
- def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
- (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
- (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
@@ -6419,29 +6413,33 @@ let Predicates = [HasAVX512] in {
//
multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86FoldableSchedWrite sched,
+ SDNode MaskOpNode, X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
+ (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
+ (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
AVX512FMA3Base, Sched<[sched]>;
- defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
+ (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
+ (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
- defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src2,
+ _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
+ (MaskOpNode _.RC:$src2,
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6450,74 +6448,88 @@ multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
- defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
+ (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched,
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _, string Suff> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
- _.info512, Suff>,
+ defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, _.info512, Suff>,
avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
_.info512, Suff>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
- defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
- _.info256, Suff>,
+ defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
- defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
- _.info128, Suff>,
+ defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd> {
- defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f32_info, "PS">;
- defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f64_info, "PD">,
- VEX_W;
-}
-
-defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, X86FmaddRnd>;
-defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
-defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
-defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
-defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
+ SDNode MaskOpNode, SDNode OpNodeRnd> {
+ defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f32_info, "PS">;
+ defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f64_info, "PD">, VEX_W;
+}
+
+defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd,
+ X86Fmadd, X86FmaddRnd>;
+defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
+ X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
+ X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
+ X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
+ X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
+ X86Fnmsub, X86FnmsubRnd>;
multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86FoldableSchedWrite sched,
+ SDNode MaskOpNode, X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
- vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
+ (null_frag),
+ (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
+ AVX512FMA3Base, Sched<[sched]>;
- defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
+ (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
+ (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
- defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
- OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
- "$src2, ${src3}"##_.BroadcastStr,
+ OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
+ "$src2, ${src3}"#_.BroadcastStr,
(_.VT (OpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
- _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
+ _.RC:$src1)),
+ (_.VT (MaskOpNode _.RC:$src2,
+ (_.VT (_.BroadcastLdFrag addr:$src3)),
+ _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6527,77 +6539,89 @@ multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
- defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
- (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
- 1, 1, vselect, 1>,
- AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
+ (null_frag),
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
+ 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched,
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _, string Suff> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
- _.info512, Suff>,
+ defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, _.info512, Suff>,
avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
_.info512, Suff>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
- defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
- _.info256, Suff>,
+ defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
- defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
- _.info128, Suff>,
+ defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd > {
- defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f32_info, "PS">;
- defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f64_info, "PD">,
- VEX_W;
-}
-
-defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, X86FmaddRnd>;
-defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
-defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
-defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
-defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
+ SDNode MaskOpNode, SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f32_info, "PS">;
+ defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f64_info, "PD">, VEX_W;
+}
+
+defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd,
+ X86Fmadd, X86FmaddRnd>;
+defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
+ X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
+ X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
+ X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
+ X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
+ X86Fnmsub, X86FnmsubRnd>;
multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86FoldableSchedWrite sched,
+ SDNode MaskOpNode, X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
+ (null_frag),
+ (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
AVX512FMA3Base, Sched<[sched]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
- defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
+ (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
+ (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
- defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
- OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
- "$src2, ${src3}"##_.BroadcastStr,
+ OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
+ "$src2, ${src3}"#_.BroadcastStr,
(_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
- _.RC:$src1, _.RC:$src2)), 1, 0>,
+ _.RC:$src1, _.RC:$src2)),
+ (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
+ _.RC:$src1, _.RC:$src2)), 1, 0>,
AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -6607,49 +6631,57 @@ multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string Suff> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
- defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
- (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
- 1, 1, vselect, 1>,
- AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
+ (null_frag),
+ (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
+ 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched,
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _, string Suff> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
- _.info512, Suff>,
+ defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, _.info512, Suff>,
avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
_.info512, Suff>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
let Predicates = [HasVLX, HasAVX512] in {
- defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
- _.info256, Suff>,
+ defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, _.info256, Suff>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
- defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
- _.info128, Suff>,
+ defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, _.info128, Suff>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd > {
- defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f32_info, "PS">;
- defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
- SchedWriteFMA, avx512vl_f64_info, "PD">,
- VEX_W;
-}
-
-defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, X86FmaddRnd>;
-defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
-defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
-defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
-defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
+ SDNode MaskOpNode, SDNode OpNodeRnd > {
+ defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f32_info, "PS">;
+ defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f64_info, "PD">, VEX_W;
+}
+
+defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd,
+ X86Fmadd, X86FmaddRnd>;
+defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
+ X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
+ X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
+ X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
+ X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
+ X86Fnmsub, X86FnmsubRnd>;
// Scalar FMA
multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -6742,11 +6774,12 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
}
defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
-defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
-defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
+defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
+defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
-multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
+multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp,
+ SDNode RndOp, string Prefix,
string Suffix, SDNode Move,
X86VectorVTInfo _, PatLeaf ZeroFP> {
let Predicates = [HasAVX512] in {
@@ -6788,8 +6821,8 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2,
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src3),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
@@ -6799,8 +6832,8 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2,
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(_.ScalarLdFrag addr:$src3)),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
@@ -6809,18 +6842,18 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
(!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2, _.FRC:$src3,
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2, _.FRC:$src3,
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
(!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
VR128X:$src1, VK1WM:$mask,
@@ -6828,19 +6861,19 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
(!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2,
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- _.FRC:$src3),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2,
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ _.FRC:$src3),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
VR128X:$src1, VK1WM:$mask,
@@ -6848,9 +6881,9 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2, _.FRC:$src3,
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2, _.FRC:$src3,
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
VR128X:$src1, VK1WM:$mask,
@@ -6858,28 +6891,28 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2,
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- (_.ScalarLdFrag addr:$src3)),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2,
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (_.ScalarLdFrag addr:$src3)),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
- (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
VR128X:$src1, VK1WM:$mask,
@@ -6903,7 +6936,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
+ (X86selects_mask VK1WM:$mask,
(RndOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src3, (i32 timm:$rc)),
@@ -6914,7 +6947,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
+ (X86selects_mask VK1WM:$mask,
(RndOp _.FRC:$src2, _.FRC:$src3,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(i32 timm:$rc)),
@@ -6925,7 +6958,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
+ (X86selects_mask VK1WM:$mask,
(RndOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
_.FRC:$src3, (i32 timm:$rc)),
@@ -6936,7 +6969,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
- (X86selects VK1WM:$mask,
+ (X86selects_mask VK1WM:$mask,
(RndOp _.FRC:$src2, _.FRC:$src3,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(i32 timm:$rc)),
@@ -6948,23 +6981,23 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
}
}
-defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SS",
- X86Movss, v4f32x_info, fp32imm0>;
-defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
- X86Movss, v4f32x_info, fp32imm0>;
-defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
- X86Movss, v4f32x_info, fp32imm0>;
-defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
- X86Movss, v4f32x_info, fp32imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
+ "SS", X86Movss, v4f32x_info, fp32imm0>;
-defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SD",
- X86Movsd, v2f64x_info, fp64imm0>;
-defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
- X86Movsd, v2f64x_info, fp64imm0>;
-defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
- X86Movsd, v2f64x_info, fp64imm0>;
-defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
- X86Movsd, v2f64x_info, fp64imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
+ "SD", X86Movsd, v2f64x_info, fp64imm0>;
//===----------------------------------------------------------------------===//
// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
@@ -7194,7 +7227,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode
- (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
+ (SrcVT.ScalarIntMemFrags addr:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
} // Predicates = [HasAVX512]
@@ -7233,6 +7266,45 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2u
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
+ X86VectorVTInfo DstVT, SDNode OpNode,
+ X86FoldableSchedWrite sched,
+ string aliasStr> {
+ let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
+ let isCodeGenOnly = 1 in {
+ def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
+ EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
+ def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+ }
+ } // Predicates = [HasAVX512]
+}
+
+defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
+ lrint, WriteCvtSS2I,
+ "{l}">, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
+ llrint, WriteCvtSS2I,
+ "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
+ lrint, WriteCvtSD2I,
+ "{l}">, XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
+ llrint, WriteCvtSD2I,
+ "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
+ def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
+
+ def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
+ def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
+}
+
// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
// which produce unnecessary vmovs{s,d} instructions
let Predicates = [HasAVX512] in {
@@ -7347,7 +7419,7 @@ let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
(ins _SrcRC.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst,
- (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
+ (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
} //HasAVX512
@@ -7404,7 +7476,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
(ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
- (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
+ (_Src.ScalarIntMemFrags addr:$src2)))>,
EVEX_4V, VEX_LIG,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -7421,7 +7493,7 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
}
}
-// Scalar Coversion with SAE - suppress all exceptions
+// Scalar Conversion with SAE - suppress all exceptions
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeSAE,
X86FoldableSchedWrite sched> {
@@ -7506,55 +7578,63 @@ def : Pat<(v2f64 (X86Movsd
//===----------------------------------------------------------------------===//
multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86VectorVTInfo _Src, SDNode OpNode,
+ X86VectorVTInfo _Src, SDNode OpNode, SDNode MaskOpNode,
X86FoldableSchedWrite sched,
string Broadcast = _.BroadcastStr,
string Alias = "", X86MemOperand MemOp = _Src.MemOp,
RegisterClass MaskRC = _.KRCWM,
- dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
+ dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
+ dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
let Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src),
(ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
(ins MaskRC:$mask, _Src.RC:$src),
OpcodeStr, "$src", "$src",
(_.VT (OpNode (_Src.VT _Src.RC:$src))),
- (vselect MaskRC:$mask,
- (_.VT (OpNode (_Src.VT _Src.RC:$src))),
- _.RC:$src0),
- vselect, "$src0 = $dst">,
+ (vselect_mask MaskRC:$mask,
+ (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
+ _.RC:$src0),
+ (vselect_mask MaskRC:$mask,
+ (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
+ _.ImmAllZerosV)>,
EVEX, Sched<[sched]>;
- defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins MemOp:$src),
(ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
(ins MaskRC:$mask, MemOp:$src),
OpcodeStr#Alias, "$src", "$src",
LdDAG,
- (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
- vselect, "$src0 = $dst">,
+ (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
+ (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
EVEX, Sched<[sched.Folded]>;
- defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _Src.ScalarMemOp:$src),
(ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
(ins MaskRC:$mask, _Src.ScalarMemOp:$src),
OpcodeStr,
- "${src}"##Broadcast, "${src}"##Broadcast,
+ "${src}"#Broadcast, "${src}"#Broadcast,
(_.VT (OpNode (_Src.VT
(_Src.BroadcastLdFrag addr:$src))
)),
- (vselect MaskRC:$mask,
- (_.VT
- (OpNode
- (_Src.VT
- (_Src.BroadcastLdFrag addr:$src)))),
- _.RC:$src0),
- vselect, "$src0 = $dst">,
+ (vselect_mask MaskRC:$mask,
+ (_.VT
+ (MaskOpNode
+ (_Src.VT
+ (_Src.BroadcastLdFrag addr:$src)))),
+ _.RC:$src0),
+ (vselect_mask MaskRC:$mask,
+ (_.VT
+ (MaskOpNode
+ (_Src.VT
+ (_Src.BroadcastLdFrag addr:$src)))),
+ _.ImmAllZerosV)>,
EVEX, EVEX_B, Sched<[sched.Folded]>;
}
}
-// Coversion with SAE - suppress all exceptions
+// Conversion with SAE - suppress all exceptions
multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeSAE,
X86FoldableSchedWrite sched> {
@@ -7581,12 +7661,14 @@ multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode,
+ SDNode MaskOpNode,
X86FoldableSchedWrite sched,
string Broadcast = _.BroadcastStr,
string Alias = "", X86MemOperand MemOp = _Src.MemOp,
RegisterClass MaskRC = _.KRCWM>
- : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
- MemOp, MaskRC,
+ : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
+ Alias, MemOp, MaskRC,
+ (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
(_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
// Extend Float to Double
@@ -7594,69 +7676,72 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
- any_fpextend, sched.ZMM>,
+ any_fpextend, fpextend, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
X86vfpextSAE, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
- X86any_vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
- defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend,
- sched.YMM>, EVEX_V256;
+ X86any_vfpext, X86vfpext, sched.XMM, "{1to2}",
+ "", f64mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info,
+ any_fpextend, fpextend, sched.YMM>, EVEX_V256;
}
}
// Truncate Double to Float
multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86any_vfpround, sched.ZMM>,
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info,
+ X86any_vfpround, X86vfpround, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
X86vfproundRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
- null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
- EVEX_V128;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86any_vfpround,
+ null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
+ f128mem, VK2WM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info,
+ X86any_vfpround, X86vfpround,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
}
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
@@ -7701,81 +7786,91 @@ let Predicates = [HasVLX] in {
// Convert Signed/Unsigned Doubleword to Double
let Uses = []<Register>, mayRaiseFPException = 0 in
multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNode128, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNode128,
+ SDNode MaskOpNode128,
+ X86SchedWriteWidths sched> {
// No rounding in this op
let Predicates = [HasAVX512] in
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
- sched.ZMM>, EVEX_V512;
+ MaskOpNode, sched.ZMM>, EVEX_V512;
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
- OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
+ OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
+ "", i64mem, VK2WM,
(v2f64 (OpNode128 (bc_v4i32
(v2i64
+ (scalar_to_vector (loadi64 addr:$src)))))),
+ (v2f64 (MaskOpNode128 (bc_v4i32
+ (v2i64
(scalar_to_vector (loadi64 addr:$src))))))>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Signed/Unsigned Doubleword to Float
multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
- sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Doubleword with truncation
multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode MaskOpNode,
SDNode OpNodeSAE, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
OpNodeSAE, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
- sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Doubleword
multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
- sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Double to Signed/Unsigned Doubleword with truncation
multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeSAE, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeSAE,
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
OpNodeSAE, sched.ZMM>, EVEX_V512;
}
@@ -7785,50 +7880,50 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
- null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+ null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
VK2WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
- sched.YMM, "{1to4}", "{y}">, EVEX_V256;
+ MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
}
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
@@ -7836,10 +7931,11 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Convert Double to Signed/Unsigned Doubleword
multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
@@ -7849,48 +7945,48 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
- null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+ null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
VK2WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
- sched.YMM, "{1to4}", "{y}">, EVEX_V256;
+ MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
}
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
VK2WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
VK4WM:$mask, f64mem:$src), 0, "att">;
@@ -7898,61 +7994,65 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Convert Double to Signed/Unsigned Quardword
multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
- sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Double to Signed/Unsigned Quardword with truncation
multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
- sched.XMM>, EVEX_V128;
+ MaskOpNode, sched.XMM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Signed/Unsigned Quardword to Double
multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasDQI, HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
- sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
+ MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
- sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
+ MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
}
}
// Convert Float to Signed/Unsigned Quardword
multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
@@ -7960,21 +8060,26 @@ multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
- sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
(v2i64 (OpNode (bc_v4f32
(v2f64
+ (scalar_to_vector (loadf64 addr:$src)))))),
+ (v2i64 (MaskOpNode (bc_v4f32
+ (v2f64
(scalar_to_vector (loadf64 addr:$src))))))>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Float to Signed/Unsigned Quardword with truncation
multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
@@ -7982,22 +8087,26 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
- sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
(v2i64 (OpNode (bc_v4f32
(v2f64
+ (scalar_to_vector (loadf64 addr:$src)))))),
+ (v2i64 (MaskOpNode (bc_v4f32
+ (v2f64
(scalar_to_vector (loadf64 addr:$src))))))>,
EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
- sched.YMM>, EVEX_V256;
+ MaskOpNode, sched.YMM>, EVEX_V256;
}
}
// Convert Signed/Unsigned Quardword to Float
multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
- sched.ZMM>,
+ MaskOpNode, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
@@ -8007,152 +8116,159 @@ multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
- sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
+ null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
EVEX_V128, NotEVEX2VEXConvertible;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
- sched.YMM, "{1to4}", "{y}">, EVEX_V256,
+ MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256,
NotEVEX2VEXConvertible;
}
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
VK2WM:$mask, VR128X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
i64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
VK2WM:$mask, i64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to2}}",
(!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
VK2WM:$mask, i64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
"$dst {${mask}}, $src}",
(!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, $src}",
(!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
VK4WM:$mask, VR256X:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
i64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
"$dst {${mask}}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
VK4WM:$mask, i64mem:$src), 0, "att">;
- def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
"$dst {${mask}} {z}, ${src}{1to4}}",
(!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
VK4WM:$mask, i64mem:$src), 0, "att">;
}
-defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, X86any_VSintToFP,
+defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
+ X86any_VSintToFP, X86VSintToFP,
SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
-defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp,
+defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
PS, EVEX_CD8<32, CD8VF>;
defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
- X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
- XS, EVEX_CD8<32, CD8VF>;
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
- X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPD2DQ>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
- X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
- EVEX_CD8<32, CD8VF>;
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
- X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPD2DQ>,
PS, VEX_W, EVEX_CD8<64, CD8VF>;
defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
- X86any_VUintToFP, SchedWriteCvtDQ2PD>, XS,
- EVEX_CD8<32, CD8VH>;
+ uint_to_fp, X86any_VUintToFP, X86VUintToFP,
+ SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
- X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
- EVEX_CD8<32, CD8VF>;
+ uint_to_fp, X86VUintToFpRnd,
+ SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
-defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
+defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VF>;
-defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
+defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
VEX_W, EVEX_CD8<64, CD8VF>;
-defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
+defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
PS, EVEX_CD8<32, CD8VF>;
-defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
+defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
PS, EVEX_CD8<64, CD8VF>;
-defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
+defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
-defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
+defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
-defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
+defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
-defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
+defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
- X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
- X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
- X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
- X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
- X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
- EVEX_CD8<64, CD8VF>;
+ sint_to_fp, X86VSintToFpRnd,
+ SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
- X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
- EVEX_CD8<64, CD8VF>;
+ uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
+ VEX_W, XS, EVEX_CD8<64, CD8VF>;
defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
- X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
- EVEX_CD8<64, CD8VF>;
+ sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
+ VEX_W, PS, EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
- X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
- EVEX_CD8<64, CD8VF>;
+ uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>,
+ VEX_W, XD, EVEX_CD8<64, CD8VF>;
let Predicates = [HasVLX] in {
// Special patterns to allow use of X86mcvtp2Int for masking. Instruction
@@ -8275,70 +8391,70 @@ let Predicates = [HasVLX] in {
let Predicates = [HasDQI, HasVLX] in {
def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTPS2QQZ128rm addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- v2i64x_info.ImmAllZerosV)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
(VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTPS2UQQZ128rm addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- v2i64x_info.ImmAllZerosV)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
(VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTTPS2QQZ128rm addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- v2i64x_info.ImmAllZerosV)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
(VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTTPS2UQQZ128rm addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
- v2i64x_info.ImmAllZerosV)),
+ def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
(VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTDQ2PDZ128rm addr:$src)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
+ (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
- v2f64x_info.ImmAllZerosV)),
+ def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
+ (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ v2f64x_info.ImmAllZerosV)),
(VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTUDQ2PDZ128rm addr:$src)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
- VR128X:$src0)),
+ def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
+ (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
(VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
- v2f64x_info.ImmAllZerosV)),
+ def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
+ (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ v2f64x_info.ImmAllZerosV)),
(VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
}
@@ -8408,16 +8524,17 @@ let Predicates = [HasDQI, HasVLX] in {
let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
- X86MemOperand x86memop, PatFrag ld_frag,
+ X86MemOperand x86memop, dag ld_dag,
X86FoldableSchedWrite sched> {
- defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
+ defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
(ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
+ (X86any_cvtph2ps (_src.VT _src.RC:$src)),
(X86cvtph2ps (_src.VT _src.RC:$src))>,
T8PD, Sched<[sched]>;
- defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
+ defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
(ins x86memop:$src), "vcvtph2ps", "$src", "$src",
- (X86cvtph2ps (_src.VT
- (ld_frag addr:$src)))>,
+ (X86any_cvtph2ps (_src.VT ld_dag)),
+ (X86cvtph2ps (_src.VT ld_dag))>,
T8PD, Sched<[sched.Folded]>;
}
@@ -8432,23 +8549,22 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
}
let Predicates = [HasAVX512] in
- defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
- WriteCvtPH2PSZ>,
+ defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
+ (load addr:$src), WriteCvtPH2PSZ>,
avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
- load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
+ (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
EVEX_CD8<32, CD8VH>;
defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
- load, WriteCvtPH2PS>, EVEX, EVEX_V128,
+ (bitconvert (v2i64 (X86vzload64 addr:$src))),
+ WriteCvtPH2PS>, EVEX, EVEX_V128,
EVEX_CD8<32, CD8VH>;
// Pattern match vcvtph2ps of a scalar i64 load.
- def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
- (VCVTPH2PSZ128rm addr:$src)>;
- def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
+ def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
(v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
(VCVTPH2PSZ128rm addr:$src)>;
}
@@ -8460,7 +8576,7 @@ let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
(ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _dest.RC:$dst,
- (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
+ (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
Sched<[RR]>;
let Constraints = "$src0 = $dst" in
def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
@@ -8505,54 +8621,35 @@ let Predicates = [HasAVX512] in {
WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
- let Predicates = [HasVLX] in {
- defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
- WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
- EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
- defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
- WriteCvtPS2PH, WriteCvtPS2PHSt>,
- EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
- }
+
+ def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
+ (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
+}
+
+let Predicates = [HasVLX] in {
+ defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
+ WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
+ EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+ defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
+ WriteCvtPS2PH, WriteCvtPS2PHSt>,
+ EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
def : Pat<(store (f64 (extractelt
- (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
+ (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
(iPTR 0))), addr:$dst),
(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
def : Pat<(store (i64 (extractelt
- (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
+ (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
(iPTR 0))), addr:$dst),
(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
- def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
+ def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
(VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
- def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
- (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
-}
-
-// Patterns for matching conversions from float to half-float and vice versa.
-let Predicates = [HasVLX] in {
- // Use MXCSR.RC for rounding instead of explicitly specifying the default
- // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
- // configurations we support (the default). However, falling back to MXCSR is
- // more consistent with other instructions, which are always controlled by it.
- // It's encoded as 0b100.
- def : Pat<(fp_to_f16 FR32X:$src),
- (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
- (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
-
- def : Pat<(f16_to_fp GR16:$src),
- (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
- (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
-
- def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
- (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
- (v8i16 (VCVTPS2PHZ128rr
- (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
}
// Unordered/Ordered scalar fp compare with Sae and set EFLAGS
multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr, Domain d,
- X86FoldableSchedWrite sched = WriteFCom> {
+ X86FoldableSchedWrite sched = WriteFComX> {
let hasSideEffects = 0, Uses = [MXCSR] in
def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
@@ -8613,7 +8710,7 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
- _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
+ (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8646,7 +8743,7 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
- "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
+ "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
(OpNode (_.VT
(_.BroadcastLdFrag addr:$src)))>,
EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -8701,7 +8798,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
+ (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}
@@ -8741,7 +8838,7 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
- "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
+ "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
(OpNode (_.VT
(_.BroadcastLdFrag addr:$src)))>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -8811,20 +8908,21 @@ multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
- (_.VT (any_fsqrt _.RC:$src))>, EVEX,
+ (_.VT (any_fsqrt _.RC:$src)),
+ (_.VT (fsqrt _.RC:$src))>, EVEX,
Sched<[sched]>;
- defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
- (any_fsqrt (_.VT
- (bitconvert (_.LdFrag addr:$src))))>, EVEX,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
- defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (any_fsqrt (_.VT (_.LdFrag addr:$src))),
+ (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
+ defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
- "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
- (any_fsqrt (_.VT
- (_.BroadcastLdFrag addr:$src)))>,
+ "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
+ (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
+ (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -8879,7 +8977,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(X86fsqrts (_.VT _.RC:$src1),
- _.ScalarIntMemCPat:$src2)>,
+ (_.ScalarIntMemFrags addr:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -8952,7 +9050,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales _.RC:$src1,
- _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
+ (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
@@ -8971,13 +9069,13 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX512] in {
def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
- (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
+ (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src1, timm:$src2))>;
}
let Predicates = [HasAVX512, OptForSize] in {
def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
- (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
+ (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
addr:$src1, timm:$src2))>;
}
}
@@ -8996,13 +9094,13 @@ multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
dag OutMask, Predicate BasePredicate> {
let Predicates = [BasePredicate] in {
- def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
+ def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
(OpNode (extractelt _.VT:$src2, (iPTR 0))),
(extractelt _.VT:$dst, (iPTR 0))))),
(!cast<Instruction>("V"#OpcPrefix#r_Intk)
_.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
- def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
+ def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
(OpNode (extractelt _.VT:$src2, (iPTR 0))),
ZeroFP))),
(!cast<Instruction>("V"#OpcPrefix#r_Intkz)
@@ -9026,14 +9124,14 @@ defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
// either to the multiclasses.
def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
- (vselect node:$mask,
- (trunc node:$src), node:$src0)>;
+ (vselect_mask node:$mask,
+ (trunc node:$src), node:$src0)>;
def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
- (vselect node:$mask,
- (X86vtruncs node:$src), node:$src0)>;
+ (vselect_mask node:$mask,
+ (X86vtruncs node:$src), node:$src0)>;
def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
- (vselect node:$mask,
- (X86vtruncus node:$src), node:$src0)>;
+ (vselect_mask node:$mask,
+ (X86vtruncus node:$src), node:$src0)>;
multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDPatternOperator MaskNode,
@@ -9083,12 +9181,12 @@ multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
string Name> {
def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
- (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
+ (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
addr:$dst, SrcInfo.RC:$src)>;
def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
SrcInfo.KRCWM:$mask),
- (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
+ (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
}
@@ -9548,6 +9646,8 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
let Predicates = [HasVLX] in {
def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
+ def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
@@ -9558,6 +9658,8 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
+ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
}
@@ -9565,6 +9667,10 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
let Predicates = [HasAVX512] in {
def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
+ def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
+ def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
+ (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
}
}
@@ -9586,54 +9692,49 @@ def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
// FIXME: Improve scheduling of gather/scatter instructions.
multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86MemOperand memop, PatFrag GatherNode,
- RegisterClass MaskRC = _.KRCWM> {
+ X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
- ExeDomain = _.ExeDomain in
+ ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
(ins _.RC:$src1, MaskRC:$mask, memop:$src2),
!strconcat(OpcodeStr#_.Suffix,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- [(set _.RC:$dst, MaskRC:$mask_wb,
- (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
- vectoraddr:$src2))]>, EVEX, EVEX_K,
- EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
+ []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
}
multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
- defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
- vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
- defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
- vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
+ defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
+ vy512xmem>, EVEX_V512, VEX_W;
+ defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
+ vz512mem>, EVEX_V512, VEX_W;
let Predicates = [HasVLX] in {
- defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
- vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
- defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
- vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
- defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
- vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
- defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
- vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
+ defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
+ vx256xmem>, EVEX_V256, VEX_W;
+ defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
+ vy256xmem>, EVEX_V256, VEX_W;
+ defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
+ vx128xmem>, EVEX_V128, VEX_W;
+ defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
+ vx128xmem>, EVEX_V128, VEX_W;
}
}
multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
- defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
- mgatherv16i32>, EVEX_V512;
- defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
- mgatherv8i64>, EVEX_V512;
+ defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
+ EVEX_V512;
+ defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
+ EVEX_V512;
let Predicates = [HasVLX] in {
- defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
- vy256xmem, mgatherv8i32>, EVEX_V256;
- defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
- vy128xmem, mgatherv4i64>, EVEX_V256;
- defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
- vx128xmem, mgatherv4i32>, EVEX_V128;
- defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
- vx64xmem, mgatherv2i64, VK2WM>,
- EVEX_V128;
+ defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
+ vy256xmem>, EVEX_V256;
+ defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
+ vy128xmem>, EVEX_V256;
+ defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
+ vx128xmem>, EVEX_V128;
+ defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
+ vx64xmem, VK2WM>, EVEX_V128;
}
}
@@ -9645,55 +9746,52 @@ defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q
avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86MemOperand memop, PatFrag ScatterNode,
- RegisterClass MaskRC = _.KRCWM> {
+ X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
-let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
+let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
+ hasSideEffects = 0 in
def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
(ins memop:$dst, MaskRC:$mask, _.RC:$src),
!strconcat(OpcodeStr#_.Suffix,
"\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
- [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
- MaskRC:$mask, vectoraddr:$dst))]>,
- EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
+ []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[WriteStore]>;
}
multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
- defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
- vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
- defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
- vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
+ defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
+ vy512xmem>, EVEX_V512, VEX_W;
+ defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
+ vz512mem>, EVEX_V512, VEX_W;
let Predicates = [HasVLX] in {
- defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
- vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
- defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
- vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
- defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
- vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
- defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
- vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
+ defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
+ vx256xmem>, EVEX_V256, VEX_W;
+ defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
+ vy256xmem>, EVEX_V256, VEX_W;
+ defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
+ vx128xmem>, EVEX_V128, VEX_W;
+ defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
+ vx128xmem>, EVEX_V128, VEX_W;
}
}
multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
- defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
- mscatterv16i32>, EVEX_V512;
- defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
- mscatterv8i64>, EVEX_V512;
+ defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
+ EVEX_V512;
+ defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
+ EVEX_V512;
let Predicates = [HasVLX] in {
- defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
- vy256xmem, mscatterv8i32>, EVEX_V256;
- defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
- vy128xmem, mscatterv4i64>, EVEX_V256;
- defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
- vx128xmem, mscatterv4i32>, EVEX_V128;
- defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
- vx64xmem, mscatterv2i64, VK2WM>,
- EVEX_V128;
+ defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
+ vy256xmem>, EVEX_V256;
+ defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
+ vy128xmem>, EVEX_V256;
+ defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
+ vx128xmem>, EVEX_V128;
+ defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
+ vx64xmem, VK2WM>, EVEX_V128;
}
}
@@ -9762,13 +9860,9 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
- !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
+ !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
[(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
-
-// Also need a pattern for anyextend.
-def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
- (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
}
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
@@ -9842,19 +9936,11 @@ let Predicates = [HasDQI, NoBWI] in {
(VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
(VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
-
- def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
- (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
- def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
- (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
}
let Predicates = [HasDQI, NoBWI, HasVLX] in {
def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
(VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
-
- def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
- (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
}
//===----------------------------------------------------------------------===//
@@ -9885,14 +9971,14 @@ multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
- (!cast<Instruction>(Name#_.ZSuffix##mrk)
+ (!cast<Instruction>(Name#_.ZSuffix#mrk)
addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
- (!cast<Instruction>(Name#_.ZSuffix##rrk)
+ (!cast<Instruction>(Name#_.ZSuffix#rrk)
_.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
- (!cast<Instruction>(Name#_.ZSuffix##rrkz)
+ (!cast<Instruction>(Name#_.ZSuffix#rrkz)
_.KRCWM:$mask, _.RC:$src)>;
}
@@ -9940,23 +10026,23 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
- (!cast<Instruction>(Name#_.ZSuffix##rmkz)
+ (!cast<Instruction>(Name#_.ZSuffix#rmkz)
_.KRCWM:$mask, addr:$src)>;
def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
- (!cast<Instruction>(Name#_.ZSuffix##rmkz)
+ (!cast<Instruction>(Name#_.ZSuffix#rmkz)
_.KRCWM:$mask, addr:$src)>;
def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
(_.VT _.RC:$src0))),
- (!cast<Instruction>(Name#_.ZSuffix##rmk)
+ (!cast<Instruction>(Name#_.ZSuffix#rmk)
_.RC:$src0, _.KRCWM:$mask, addr:$src)>;
def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
- (!cast<Instruction>(Name#_.ZSuffix##rrk)
+ (!cast<Instruction>(Name#_.ZSuffix#rrk)
_.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
- (!cast<Instruction>(Name#_.ZSuffix##rrkz)
+ (!cast<Instruction>(Name#_.ZSuffix#rrkz)
_.KRCWM:$mask, _.RC:$src)>;
}
@@ -9990,26 +10076,33 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
// op(mem_vec,imm)
// op(broadcast(eltVt),imm)
//all instruction created with FROUND_CURRENT
-multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86FoldableSchedWrite sched, X86VectorVTInfo _> {
+multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode MaskOpNode,
+ X86FoldableSchedWrite sched,
+ X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1),
- (i32 timm:$src2))>, Sched<[sched]>;
- defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
+ (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
+ Sched<[sched]>;
+ defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
+ OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
- (i32 timm:$src2))>,
+ (i32 timm:$src2)),
+ (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 timm:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
- defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
- "${src1}"##_.BroadcastStr##", $src2",
+ OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
+ "${src1}"#_.BroadcastStr#", $src2",
(OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
- (i32 timm:$src2))>, EVEX_B,
+ (i32 timm:$src2)),
+ (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
+ (i32 timm:$src2))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -10021,7 +10114,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
+ OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
"$src1, {sae}, $src2",
(OpNode (_.VT _.RC:$src1),
(i32 timm:$src2))>,
@@ -10030,18 +10123,19 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
- SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
+ SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
+ Predicate prd>{
let Predicates = [prd] in {
- defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
- _.info512>,
+ defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.ZMM, _.info512>,
avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
sched.ZMM, _.info512>, EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
- defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
- _.info128>, EVEX_V128;
- defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
- _.info256>, EVEX_V256;
+ defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.XMM, _.info128>, EVEX_V128;
+ defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
+ sched.YMM, _.info256>, EVEX_V256;
}
}
@@ -10068,8 +10162,8 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
- OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (_.BroadcastLdFrag addr:$src2)),
(i32 timm:$src3))>, EVEX_B,
@@ -10111,8 +10205,8 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
let ExeDomain = _.ExeDomain in
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
- OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (_.BroadcastLdFrag addr:$src2)),
(i8 timm:$src3))>, EVEX_B,
@@ -10135,7 +10229,7 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
- (_.VT _.ScalarIntMemCPat:$src2),
+ (_.ScalarIntMemFrags addr:$src2),
(i32 timm:$src3))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -10228,24 +10322,26 @@ multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
- SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
+ SDNode MaskOpNode, SDNode OpNodeSAE,
+ X86SchedWriteWidths sched, Predicate prd>{
defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
- opcPs, OpNode, OpNodeSAE, sched, prd>,
+ opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
EVEX_CD8<32, CD8VF>;
defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
- opcPd, OpNode, OpNodeSAE, sched, prd>,
+ opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
EVEX_CD8<64, CD8VF>, VEX_W;
}
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
- X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
- AVX512AIi8Base, EVEX;
+ X86VReduce, X86VReduce, X86VReduceSAE,
+ SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
- X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
+ X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
+ SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
- X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
- AVX512AIi8Base, EVEX;
+ X86VGetMant, X86VGetMant, X86VGetMantSAE,
+ SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX;
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
0x50, X86VRange, X86VRangeSAE,
@@ -10302,8 +10398,8 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
- OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $src3",
(_.VT
(bitconvert
(CastInfo.VT
@@ -10391,8 +10487,8 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
- OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr##", $src3",
+ OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $src3",
(X86VAlign _.RC:$src1,
(_.VT (_.BroadcastLdFrag addr:$src2)),
(i8 timm:$src3))>, EVEX_B,
@@ -10441,40 +10537,40 @@ def ValigndImm8XForm : SDNodeXForm<timm, [{
multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
X86VectorVTInfo From, X86VectorVTInfo To,
SDNodeXForm ImmXForm> {
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1, From.RC:$src2,
- timm:$src3))),
- To.RC:$src0)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1, From.RC:$src2,
+ timm:$src3))),
+ To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, To.RC:$src2,
(ImmXForm timm:$src3))>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1, From.RC:$src2,
- timm:$src3))),
- To.ImmAllZerosV)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1, From.RC:$src2,
+ timm:$src3))),
+ To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
To.RC:$src1, To.RC:$src2,
(ImmXForm timm:$src3))>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1,
- (From.LdFrag addr:$src2),
- timm:$src3))),
- To.RC:$src0)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (From.LdFrag addr:$src2),
+ timm:$src3))),
+ To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
(ImmXForm timm:$src3))>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1,
- (From.LdFrag addr:$src2),
- timm:$src3))),
- To.ImmAllZerosV)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (From.LdFrag addr:$src2),
+ timm:$src3))),
+ To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
(ImmXForm timm:$src3))>;
@@ -10491,24 +10587,24 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
(!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
(ImmXForm timm:$src3))>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1,
- (bitconvert
- (To.VT (To.BroadcastLdFrag addr:$src2))),
- timm:$src3))),
- To.RC:$src0)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert
+ (To.VT (To.BroadcastLdFrag addr:$src2))),
+ timm:$src3))),
+ To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
(ImmXForm timm:$src3))>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask,
- (bitconvert
- (From.VT (OpNode From.RC:$src1,
- (bitconvert
- (To.VT (To.BroadcastLdFrag addr:$src2))),
- timm:$src3))),
- To.ImmAllZerosV)),
+ def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
+ (bitconvert
+ (From.VT (OpNode From.RC:$src1,
+ (bitconvert
+ (To.VT (To.BroadcastLdFrag addr:$src2))),
+ timm:$src3))),
+ To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
(ImmXForm timm:$src3))>;
@@ -10567,8 +10663,8 @@ multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1), OpcodeStr,
- "${src1}"##_.BroadcastStr,
- "${src1}"##_.BroadcastStr,
+ "${src1}"#_.BroadcastStr,
+ "${src1}"#_.BroadcastStr,
(_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded]>;
@@ -10751,32 +10847,14 @@ defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>
let Predicates = [HasVLX] in {
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
- (VMOVDDUPZ128rm addr:$src)>;
-def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
- (VMOVDDUPZ128rm addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
- (v2f64 VR128X:$src0)),
+def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
+ (v2f64 VR128X:$src0)),
(VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
(v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
- immAllZerosV),
+def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
+ immAllZerosV),
(VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
- (v2f64 VR128X:$src0)),
- (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
- immAllZerosV),
- (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
-
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
- (v2f64 VR128X:$src0)),
- (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
- immAllZerosV),
- (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -10784,9 +10862,9 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load
//===----------------------------------------------------------------------===//
let Uses = []<Register>, mayRaiseFPException = 0 in {
-defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
+defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
SchedWriteFShuffleSizes, 0, 1>;
-defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
+defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
SchedWriteFShuffleSizes>;
}
@@ -10945,16 +11023,15 @@ defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD,
// AVX-512 - Byte shift Left/Right
//===----------------------------------------------------------------------===//
-// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
Format MRMm, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
- def rr : AVX512<opc, MRMr,
+ def ri : AVX512<opc, MRMr,
(outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
Sched<[sched]>;
- def rm : AVX512<opc, MRMm,
+ def mi : AVX512<opc, MRMm,
(outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode
@@ -11106,8 +11183,8 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
- OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
- "$src2, ${src3}"##_.BroadcastStr##", $src4",
+ OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
+ "$src2, ${src3}"#_.BroadcastStr#", $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT (_.BroadcastLdFrag addr:$src3)),
@@ -11117,12 +11194,12 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
}// Constraints = "$src1 = $dst"
// Additional patterns for matching passthru operand in other positions.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
@@ -11141,13 +11218,13 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Additional patterns for matching zero masking with loads in other
// positions.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 timm:$src4)),
_.ImmAllZerosV)),
@@ -11156,31 +11233,31 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Additional patterns for matching masked loads with different
// operand orders.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1,
(bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
@@ -11200,14 +11277,14 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Additional patterns for matching zero masking with broadcasts in other
// positions.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (_.BroadcastLdFrag addr:$src3),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src1,
(_.BroadcastLdFrag addr:$src3),
_.RC:$src2, (i8 timm:$src4)),
@@ -11218,32 +11295,32 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Additional patterns for matching masked broadcasts with different
// operand orders.
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
_.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (_.BroadcastLdFrag addr:$src3),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1,
(_.BroadcastLdFrag addr:$src3),
(i8 timm:$src4)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode _.RC:$src2,
(_.BroadcastLdFrag addr:$src3),
_.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
(OpNode (_.BroadcastLdFrag addr:$src3),
_.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
@@ -11288,6 +11365,36 @@ let Predicates = [HasVLX] in {
(VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
@@ -11305,6 +11412,66 @@ let Predicates = [HasVLX] in {
(VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v4i32 (X86vpternlog VR128X:$src1,
+ (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v2i64 (X86vpternlog VR128X:$src1,
+ (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
@@ -11322,6 +11489,36 @@ let Predicates = [HasVLX] in {
(VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
@@ -11338,6 +11535,66 @@ let Predicates = [HasVLX] in {
VR256X:$src2, (i8 timm:$src4))),
(VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v8i32 (X86vpternlog VR256X:$src1,
+ (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v4i64 (X86vpternlog VR256X:$src1,
+ (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
}
let Predicates = [HasAVX512] in {
@@ -11358,6 +11615,36 @@ let Predicates = [HasAVX512] in {
(VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
(i8 timm:$src4))),
(VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
@@ -11371,9 +11658,84 @@ let Predicates = [HasAVX512] in {
(VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG321_imm8 timm:$src4))>;
def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
- VR512:$src2, (i8 timm:$src4))),
+ VR512:$src2, (i8 timm:$src4))),
(VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
(VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i32 (X86vpternlog VR512:$src1,
+ (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v8i64 (X86vpternlog VR512:$src1,
+ (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
}
// Patterns to implement vnot using vpternlog instead of creating all ones
@@ -11484,14 +11846,14 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
Uses = [MXCSR], mayRaiseFPException = 1 in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
+ OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT _.RC:$src3),
(i32 timm:$src4))>, Sched<[sched]>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
+ OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
@@ -11499,8 +11861,8 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
- "$src2, ${src3}"##_.BroadcastStr##", $src4",
+ OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
+ "$src2, ${src3}"#_.BroadcastStr#", $src4",
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
@@ -11516,7 +11878,7 @@ multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
+ OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
"$src2, $src3, {sae}, $src4",
(X86VFixupimmSAE (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
@@ -11533,7 +11895,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
+ OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(X86VFixupimms (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT _src3VT.RC:$src3),
@@ -11541,7 +11903,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
let Uses = [MXCSR] in
defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
+ OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
"$src2, $src3, {sae}, $src4",
(X86VFixupimmSAEs (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
@@ -11550,7 +11912,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
- OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
+ OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(X86VFixupimms (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT (scalar_to_vector
@@ -11630,8 +11992,9 @@ defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
// TODO: Some canonicalization in lowering would simplify the number of
// patterns we have to try to match.
-multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
- X86VectorVTInfo _, PatLeaf ZeroFP> {
+multiclass AVX512_scalar_math_fp_patterns<SDNode Op, SDNode MaskedOp,
+ string OpcPrefix, SDNode MoveNode,
+ X86VectorVTInfo _, PatLeaf ZeroFP> {
let Predicates = [HasAVX512] in {
// extracted scalar math op with insert via movss
def : Pat<(MoveNode
@@ -11639,79 +12002,79 @@ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode Mo
(_.VT (scalar_to_vector
(Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
_.FRC:$src)))),
- (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
+ (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
(_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
def : Pat<(MoveNode
(_.VT VR128X:$dst),
(_.VT (scalar_to_vector
(Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
(_.ScalarLdFrag addr:$src))))),
- (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
+ (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
// extracted masked scalar math op with insert via movss
def : Pat<(MoveNode (_.VT VR128X:$src1),
(scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- _.FRC:$src2),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ _.FRC:$src2),
_.FRC:$src0))),
- (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
+ (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
VK1WM:$mask, _.VT:$src1,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
def : Pat<(MoveNode (_.VT VR128X:$src1),
(scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- (_.ScalarLdFrag addr:$src2)),
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (_.ScalarLdFrag addr:$src2)),
_.FRC:$src0))),
- (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
+ (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
VK1WM:$mask, _.VT:$src1, addr:$src2)>;
// extracted masked scalar math op with insert via movss
def : Pat<(MoveNode (_.VT VR128X:$src1),
(scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- _.FRC:$src2), (_.EltVT ZeroFP)))),
- (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ _.FRC:$src2), (_.EltVT ZeroFP)))),
+ (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
VK1WM:$mask, _.VT:$src1,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
def : Pat<(MoveNode (_.VT VR128X:$src1),
(scalar_to_vector
- (X86selects VK1WM:$mask,
- (Op (_.EltVT
- (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
- (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
+ (X86selects_mask VK1WM:$mask,
+ (MaskedOp (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
+ (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
}
}
-defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
-defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
-defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
-defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
-defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
-defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
-defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
-defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
SDNode Move, X86VectorVTInfo _> {
let Predicates = [HasAVX512] in {
def : Pat<(_.VT (Move _.VT:$dst,
(scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
- (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
+ (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
}
}
-defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
-defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
+defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
+defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
//===----------------------------------------------------------------------===//
// AES instructions
@@ -11724,13 +12087,13 @@ multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
loadv2i64, 0, VR128X, i128mem>,
EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
defm Z256 : AESI_binop_rm_int<Op, OpStr,
- !cast<Intrinsic>(IntPrefix##"_256"),
+ !cast<Intrinsic>(IntPrefix#"_256"),
loadv4i64, 0, VR256X, i256mem>,
EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
}
let Predicates = [HasAVX512, HasVAES] in
defm Z : AESI_binop_rm_int<Op, OpStr,
- !cast<Intrinsic>(IntPrefix##"_512"),
+ !cast<Intrinsic>(IntPrefix#"_512"),
loadv8i64, 0, VR512, i512mem>,
EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
}
@@ -11792,8 +12155,8 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
ExeDomain = VTI.ExeDomain in
defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
- "${src3}"##VTI.BroadcastStr##", $src2",
- "$src2, ${src3}"##VTI.BroadcastStr,
+ "${src3}"#VTI.BroadcastStr#", $src2",
+ "$src2, ${src3}"#VTI.BroadcastStr,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
AVX512FMA3Base, EVEX_B,
@@ -11827,22 +12190,22 @@ multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
}
multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
SDNode OpNode, X86SchedWriteWidths sched> {
- defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
+ defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
- defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
+ defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
- defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
+ defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
SDNode OpNode, X86SchedWriteWidths sched> {
- defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
+ defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
VEX_W, EVEX_CD8<16, CD8VF>;
- defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
+ defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
- defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
+ defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
}
@@ -11890,8 +12253,8 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
- OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
- "$src2, ${src3}"##VTI.BroadcastStr,
+ OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
+ "$src2, ${src3}"#VTI.BroadcastStr,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
@@ -12027,8 +12390,8 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
let ExeDomain = VTI.ExeDomain in
defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
- OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
- "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
+ OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
+ "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
(OpNode (VTI.VT VTI.RC:$src1),
(bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
(i8 timm:$src3))>, EVEX_B,
@@ -12184,41 +12547,44 @@ multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
}
}
+let ExeDomain = SSEPackedSingle in
defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
- SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
+ SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
avx512vl_f32_info, avx512vl_i16_info,
X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
// Truncate Float to BFloat16
multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
+ let ExeDomain = SSEPackedSingle in {
let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
- X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
+ X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasBF16, HasVLX] in {
let Uses = []<Register>, mayRaiseFPException = 0 in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
- null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
+ null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
VK4WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
- X86cvtneps2bf16,
+ X86cvtneps2bf16, X86cvtneps2bf16,
sched.YMM, "{1to8}", "{y}">, EVEX_V256;
}
+ } // Predicates = [HasBF16, HasVLX]
+ } // ExeDomain = SSEPackedSingle
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
- VR128X:$src), 0>;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
- f128mem:$src), 0, "intel">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
- VR256X:$src), 0>;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
- f256mem:$src), 0, "intel">;
- }
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0>;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
+ f128mem:$src), 0, "intel">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0>;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
+ f256mem:$src), 0, "intel">;
}
defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
@@ -12262,25 +12628,24 @@ multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
X86VectorVTInfo _, X86VectorVTInfo src_v> {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.RC:$src3),
+ (ins src_v.RC:$src2, src_v.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+ (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
EVEX_4V, Sched<[sched]>;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.MemOp:$src3),
+ (ins src_v.RC:$src2, src_v.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2,
- (src_v.VT (bitconvert
- (src_v.LdFrag addr:$src3)))))>, EVEX_4V,
+ (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
+ (src_v.LdFrag addr:$src3)))>, EVEX_4V,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
OpcodeStr,
!strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr),
- (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+ (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
(src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -12302,6 +12667,7 @@ multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
+let ExeDomain = SSEPackedSingle in
defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
avx512vl_f32_info, avx512vl_i32_info,
HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
index 1e399a894490..f7f22285bd15 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -605,16 +605,16 @@ def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem,
- Imm8, i8imm, relocImm8_su, i8imm, invalid_node,
+ Imm8, i8imm, imm_su, i8imm, invalid_node,
0, OpSizeFixed, 0>;
def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
- Imm16, i16imm, relocImm16_su, i16i8imm, i16immSExt8_su,
+ Imm16, i16imm, imm_su, i16i8imm, i16immSExt8_su,
1, OpSize16, 0>;
def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
- Imm32, i32imm, relocImm32_su, i32i8imm, i32immSExt8_su,
+ Imm32, i32imm, imm_su, i32i8imm, i32immSExt8_su,
1, OpSize32, 0>;
def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
- Imm32S, i64i32imm, i64relocImmSExt32_su, i64i8imm, i64immSExt8_su,
+ Imm32S, i64i32imm, i64immSExt32_su, i64i8imm, i64immSExt8_su,
1, OpSizeFixed, 1>;
/// ITy - This instruction base class takes the type info for the instruction.
@@ -1217,6 +1217,146 @@ def : Pat<(store (X86adc_flag GR64:$src, (loadi64 addr:$dst), EFLAGS),
addr:$dst),
(ADC64mr addr:$dst, GR64:$src)>;
+// Patterns for basic arithmetic ops with relocImm for the immediate field.
+multiclass ArithBinOp_RF_relocImm_Pats<SDNode OpNodeFlag, SDNode OpNode> {
+ def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
+ (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2),
+ (!cast<Instruction>(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
+ (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2),
+ (!cast<Instruction>(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
+ (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2),
+ (!cast<Instruction>(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
+ (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+ def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), i16relocImmSExt8_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"16mi8") addr:$dst, i16relocImmSExt8_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), i32relocImmSExt8_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"32mi8") addr:$dst, i32relocImmSExt8_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt8_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"64mi8") addr:$dst, i64relocImmSExt8_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+}
+
+multiclass ArithBinOp_RFF_relocImm_Pats<SDNode OpNodeFlag> {
+ def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+ def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), i16relocImmSExt8_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"16mi8") addr:$dst, i16relocImmSExt8_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), i32relocImmSExt8_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"32mi8") addr:$dst, i32relocImmSExt8_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt8_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"64mi8") addr:$dst, i64relocImmSExt8_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+}
+
+multiclass ArithBinOp_F_relocImm_Pats<SDNode OpNodeFlag> {
+ def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
+ (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2),
+ (!cast<Instruction>(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
+ (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2),
+ (!cast<Instruction>(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
+ (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2),
+ (!cast<Instruction>(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
+ (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+ def : Pat<(OpNodeFlag (loadi8 addr:$src1), relocImm8_su:$src2),
+ (!cast<Instruction>(NAME#"8mi") addr:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag (loadi16 addr:$src1), i16relocImmSExt8_su:$src2),
+ (!cast<Instruction>(NAME#"16mi8") addr:$src1, i16relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag (loadi16 addr:$src1), relocImm16_su:$src2),
+ (!cast<Instruction>(NAME#"16mi") addr:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag (loadi32 addr:$src1), i32relocImmSExt8_su:$src2),
+ (!cast<Instruction>(NAME#"32mi8") addr:$src1, i32relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag (loadi32 addr:$src1), relocImm32_su:$src2),
+ (!cast<Instruction>(NAME#"32mi") addr:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag (loadi64 addr:$src1), i64relocImmSExt8_su:$src2),
+ (!cast<Instruction>(NAME#"64mi8") addr:$src1, i64relocImmSExt8_su:$src2)>;
+ def : Pat<(OpNodeFlag (loadi64 addr:$src1), i64relocImmSExt32_su:$src2),
+ (!cast<Instruction>(NAME#"64mi32") addr:$src1, i64relocImmSExt32_su:$src2)>;
+}
+
+defm AND : ArithBinOp_RF_relocImm_Pats<X86and_flag, and>;
+defm OR : ArithBinOp_RF_relocImm_Pats<X86or_flag, or>;
+defm XOR : ArithBinOp_RF_relocImm_Pats<X86xor_flag, xor>;
+defm ADD : ArithBinOp_RF_relocImm_Pats<X86add_flag, add>;
+defm SUB : ArithBinOp_RF_relocImm_Pats<X86sub_flag, sub>;
+
+defm ADC : ArithBinOp_RFF_relocImm_Pats<X86adc_flag>;
+defm SBB : ArithBinOp_RFF_relocImm_Pats<X86sbb_flag>;
+
+defm CMP : ArithBinOp_F_relocImm_Pats<X86cmp>;
+
+// ADC is commutable, but we can't indicate that to tablegen. So manually
+// reverse the operands.
+def : Pat<(X86adc_flag GR8:$src1, relocImm8_su:$src2, EFLAGS),
+ (ADC8ri relocImm8_su:$src2, GR8:$src1)>;
+def : Pat<(X86adc_flag i16relocImmSExt8_su:$src2, GR16:$src1, EFLAGS),
+ (ADC16ri8 GR16:$src1, i16relocImmSExt8_su:$src2)>;
+def : Pat<(X86adc_flag relocImm16_su:$src2, GR16:$src1, EFLAGS),
+ (ADC16ri GR16:$src1, relocImm16_su:$src2)>;
+def : Pat<(X86adc_flag i32relocImmSExt8_su:$src2, GR32:$src1, EFLAGS),
+ (ADC32ri8 GR32:$src1, i32relocImmSExt8_su:$src2)>;
+def : Pat<(X86adc_flag relocImm32_su:$src2, GR32:$src1, EFLAGS),
+ (ADC32ri GR32:$src1, relocImm32_su:$src2)>;
+def : Pat<(X86adc_flag i64relocImmSExt8_su:$src2, GR64:$src1, EFLAGS),
+ (ADC64ri8 GR64:$src1, i64relocImmSExt8_su:$src2)>;
+def : Pat<(X86adc_flag i64relocImmSExt32_su:$src2, GR64:$src1, EFLAGS),
+ (ADC64ri32 GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+def : Pat<(store (X86adc_flag relocImm8_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+ (ADC8mi addr:$dst, relocImm8_su:$src)>;
+def : Pat<(store (X86adc_flag i16relocImmSExt8_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+ (ADC16mi8 addr:$dst, i16relocImmSExt8_su:$src)>;
+def : Pat<(store (X86adc_flag relocImm16_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+ (ADC16mi addr:$dst, relocImm16_su:$src)>;
+def : Pat<(store (X86adc_flag i32relocImmSExt8_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+ (ADC32mi8 addr:$dst, i32relocImmSExt8_su:$src)>;
+def : Pat<(store (X86adc_flag relocImm32_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+ (ADC32mi addr:$dst, relocImm32_su:$src)>;
+def : Pat<(store (X86adc_flag i64relocImmSExt8_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+ (ADC64mi8 addr:$dst, i64relocImmSExt8_su:$src)>;
+def : Pat<(store (X86adc_flag i64relocImmSExt32_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+ (ADC64mi32 addr:$dst, i64relocImmSExt32_su:$src)>;
+
//===----------------------------------------------------------------------===//
// Semantically, test instructions are similar like AND, except they don't
// generate a result. From an encoding perspective, they are very different:
@@ -1247,7 +1387,6 @@ let isCompare = 1 in {
def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>;
- let Predicates = [In64BitMode] in
def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;
def TEST8mi : BinOpMI_F<0xF6, "test", Xi8 , X86testpat, MRM0m>;
@@ -1267,6 +1406,25 @@ let isCompare = 1 in {
"{$src, %rax|rax, $src}">;
} // isCompare
+// Patterns to match a relocImm into the immediate field.
+def : Pat<(X86testpat GR8:$src1, relocImm8_su:$src2),
+ (TEST8ri GR8:$src1, relocImm8_su:$src2)>;
+def : Pat<(X86testpat GR16:$src1, relocImm16_su:$src2),
+ (TEST16ri GR16:$src1, relocImm16_su:$src2)>;
+def : Pat<(X86testpat GR32:$src1, relocImm32_su:$src2),
+ (TEST32ri GR32:$src1, relocImm32_su:$src2)>;
+def : Pat<(X86testpat GR64:$src1, i64relocImmSExt32_su:$src2),
+ (TEST64ri32 GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+def : Pat<(X86testpat (loadi8 addr:$src1), relocImm8_su:$src2),
+ (TEST8mi addr:$src1, relocImm8_su:$src2)>;
+def : Pat<(X86testpat (loadi16 addr:$src1), relocImm16_su:$src2),
+ (TEST16mi addr:$src1, relocImm16_su:$src2)>;
+def : Pat<(X86testpat (loadi32 addr:$src1), relocImm32_su:$src2),
+ (TEST32mi addr:$src1, relocImm32_su:$src2)>;
+def : Pat<(X86testpat (loadi64 addr:$src1), i64relocImmSExt32_su:$src2),
+ (TEST64mi32 addr:$src1, i64relocImmSExt32_su:$src2)>;
+
//===----------------------------------------------------------------------===//
// ANDN Instruction
//
@@ -1306,7 +1464,6 @@ let Predicates = [HasBMI], AddedComplexity = -6 in {
multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in {
- let isCommutable = 1 in
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
[]>, T8XD, VEX_4V, Sched<[sched, WriteIMulH]>;
@@ -1314,7 +1471,17 @@ let hasSideEffects = 0 in {
let mayLoad = 1 in
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+
[]>, T8XD, VEX_4V, Sched<[sched.Folded, WriteIMulH]>;
+
+ // Pseudo instructions to be used when the low result isn't used. The
+ // instruction is defined to keep the high if both destinations are the same.
+ def Hrr : PseudoI<(outs RC:$dst), (ins RC:$src),
+ []>, Sched<[sched]>;
+
+ let mayLoad = 1 in
+ def Hrm : PseudoI<(outs RC:$dst), (ins x86memop:$src),
+ []>, Sched<[sched.Folded]>;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrBuilder.h b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrBuilder.h
index aa45e9b191c1..07079ef87fd4 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrBuilder.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrBuilder.h
@@ -207,7 +207,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
Flags |= MachineMemOperand::MOStore;
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
return addOffset(MIB.addFrameIndex(FI), Offset)
.addMemOperand(MMO);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
index 1fdac104cb73..4df93fb2ed60 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -111,8 +111,30 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
[(set GR64:$dst,
(X86SegAlloca GR64:$size))]>,
Requires<[In64BitMode]>;
+
+// To protect against stack clash, dynamic allocation should perform a memory
+// probe at each page.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+def PROBED_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
+ "# variable sized alloca with probing",
+ [(set GR32:$dst,
+ (X86ProbedAlloca GR32:$size))]>,
+ Requires<[NotLP64]>;
+
+let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
+def PROBED_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
+ "# variable sized alloca with probing",
+ [(set GR64:$dst,
+ (X86ProbedAlloca GR64:$size))]>,
+ Requires<[In64BitMode]>;
}
+let hasNoSchedulingInfo = 1 in
+def STACKALLOC_W_PROBING : I<0, Pseudo, (outs), (ins i64imm:$stacksize),
+ "# fixed size alloca with probing",
+ []>;
+
// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
// targets. These calls are needed to probe the stack when allocating more than
// 4k bytes in one go. Touching the stack at 4K increments is necessary to
@@ -177,18 +199,6 @@ let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
[(catchret bb:$dst, bb:$from)]>;
}
-let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,
- usesCustomInserter = 1 in
-def CATCHPAD : I<0, Pseudo, (outs), (ins), "# CATCHPAD", [(catchpad)]>;
-
-// This instruction is responsible for re-establishing stack pointers after an
-// exception has been caught and we are rejoining normal control flow in the
-// parent function or funclet. It generally sets ESP and EBP, and optionally
-// ESI. It is only needed for 32-bit WinEH, as the runtime restores CSRs for us
-// elsewhere.
-let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in
-def EH_RESTORE : I<0, Pseudo, (outs), (ins), "# EH_RESTORE", []>;
-
let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
usesCustomInserter = 1 in {
def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
@@ -308,69 +318,26 @@ def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
// that would make it more difficult to rematerialize.
-let isReMaterializable = 1, isAsCheapAsAMove = 1,
- isPseudo = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
-def MOV32ri64 : I<0, Pseudo, (outs GR64:$dst), (ins i64i32imm:$src), "", []>;
-
-// This 64-bit pseudo-move can be used for both a 64-bit constant that is
-// actually the zero-extension of a 32-bit constant and for labels in the
-// x86-64 small code model.
-def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;
-
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isPseudo = 1, SchedRW = [WriteMove] in
+def MOV32ri64 : I<0, Pseudo, (outs GR64:$dst), (ins i64i32imm:$src), "",
+ [(set GR64:$dst, i64immZExt32:$src)]>;
+
+// This 64-bit pseudo-move can also be used for labels in the x86-64 small code
+// model.
+def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [X86Wrapper]>;
def : Pat<(i64 mov64imm32:$src), (MOV32ri64 mov64imm32:$src)>;
// Use sbb to materialize carry bit.
-let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
+let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteADC],
+ hasSideEffects = 0 in {
// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
// However, Pat<> can't replicate the destination reg into the inputs of the
// result.
-def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",
- [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",
- [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "", []>;
+def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "", []>;
} // isCodeGenOnly
-
-def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C16r)>;
-def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C32r)>;
-def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C64r)>;
-
-def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C16r)>;
-def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C32r)>;
-def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C64r)>;
-
-// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
-// will be eliminated and that the sbb can be extended up to a wider type. When
-// this happens, it is great. However, if we are left with an 8-bit sbb and an
-// and, we might as well just match it as a setb.
-def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
- (SETCCr (i8 2))>;
-
-// Patterns to give priority when both inputs are zero so that we don't use
-// an immediate for the RHS.
-// TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out?
-def : Pat<(X86sbb_flag (i8 0), (i8 0), EFLAGS),
- (SBB8rr (EXTRACT_SUBREG (MOV32r0), sub_8bit),
- (EXTRACT_SUBREG (MOV32r0), sub_8bit))>;
-def : Pat<(X86sbb_flag (i16 0), (i16 0), EFLAGS),
- (SBB16rr (EXTRACT_SUBREG (MOV32r0), sub_16bit),
- (EXTRACT_SUBREG (MOV32r0), sub_16bit))>;
-def : Pat<(X86sbb_flag (i32 0), (i32 0), EFLAGS),
- (SBB32rr (MOV32r0), (MOV32r0))>;
-def : Pat<(X86sbb_flag (i64 0), (i64 0), EFLAGS),
- (SBB64rr (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit),
- (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit))>;
-
//===----------------------------------------------------------------------===//
// String Pseudo Instructions
//
@@ -568,10 +535,13 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>;
- let Predicates = [NoAVX512] in {
+ let Predicates = [HasMMX] in
+ defm _VR64 : CMOVrr_PSEUDO<VR64, x86mmx>;
+
+ let Predicates = [HasSSE1,NoAVX512] in
defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
+ let Predicates = [HasSSE2,NoAVX512] in
defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
- }
let Predicates = [HasAVX512] in {
defm _FR32X : CMOVrr_PSEUDO<FR32X, f32>;
defm _FR64X : CMOVrr_PSEUDO<FR64X, f64>;
@@ -585,6 +555,7 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
defm _VR256X : CMOVrr_PSEUDO<VR256X, v4i64>;
}
defm _VR512 : CMOVrr_PSEUDO<VR512, v8i64>;
+ defm _VK1 : CMOVrr_PSEUDO<VK1, v1i1>;
defm _VK2 : CMOVrr_PSEUDO<VK2, v2i1>;
defm _VK4 : CMOVrr_PSEUDO<VK4, v4i1>;
defm _VK8 : CMOVrr_PSEUDO<VK8, v8i1>;
@@ -880,7 +851,7 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
// it. In other words, the register will not fix the clobbering of
// RBX that will happen when setting the arguments for the instrucion.
//
-// Unlike the actual related instuction, we mark that this one
+// Unlike the actual related instruction, we mark that this one
// defines EBX (instead of using EBX).
// The rationale is that we will define RBX during the expansion of
// the pseudo. The argument feeding EBX is ebx_input.
@@ -1815,21 +1786,24 @@ multiclass MaskedRotateAmountPats<SDNode frag, string name> {
defm : MaskedRotateAmountPats<rotl, "ROL">;
defm : MaskedRotateAmountPats<rotr, "ROR">;
-// Double shift amount is implicitly masked.
-multiclass MaskedDoubleShiftAmountPats<SDNode frag, string name> {
- // (shift x (and y, 31)) ==> (shift x, y)
- def : Pat<(frag GR16:$src1, GR16:$src2, (shiftMask32 CL)),
- (!cast<Instruction>(name # "16rrCL") GR16:$src1, GR16:$src2)>;
- def : Pat<(frag GR32:$src1, GR32:$src2, (shiftMask32 CL)),
- (!cast<Instruction>(name # "32rrCL") GR32:$src1, GR32:$src2)>;
-
- // (shift x (and y, 63)) ==> (shift x, y)
- def : Pat<(frag GR64:$src1, GR64:$src2, (shiftMask32 CL)),
- (!cast<Instruction>(name # "64rrCL") GR64:$src1, GR64:$src2)>;
-}
-
-defm : MaskedDoubleShiftAmountPats<X86shld, "SHLD">;
-defm : MaskedDoubleShiftAmountPats<X86shrd, "SHRD">;
+// Double "funnel" shift amount is implicitly masked.
+// (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) (NOTE: modulo32)
+def : Pat<(X86fshl GR16:$src1, GR16:$src2, (shiftMask32 CL)),
+ (SHLD16rrCL GR16:$src1, GR16:$src2)>;
+def : Pat<(X86fshr GR16:$src2, GR16:$src1, (shiftMask32 CL)),
+ (SHRD16rrCL GR16:$src1, GR16:$src2)>;
+
+// (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y)
+def : Pat<(fshl GR32:$src1, GR32:$src2, (shiftMask32 CL)),
+ (SHLD32rrCL GR32:$src1, GR32:$src2)>;
+def : Pat<(fshr GR32:$src2, GR32:$src1, (shiftMask32 CL)),
+ (SHRD32rrCL GR32:$src1, GR32:$src2)>;
+
+// (fshl/fshr x (and y, 63)) ==> (fshl/fshr x, y)
+def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
+ (SHLD64rrCL GR64:$src1, GR64:$src2)>;
+def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
+ (SHRD64rrCL GR64:$src1, GR64:$src2)>;
let Predicates = [HasBMI2] in {
let AddedComplexity = 1 in {
@@ -1919,15 +1893,6 @@ defm : one_bit_patterns<GR16, i16, BTR16rr, BTS16rr, BTC16rr, shiftMask16>;
defm : one_bit_patterns<GR32, i32, BTR32rr, BTS32rr, BTC32rr, shiftMask32>;
defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>;
-
-// (anyext (setcc_carry)) -> (setcc_carry)
-def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C16r)>;
-def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C32r)>;
-def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C32r)>;
-
//===----------------------------------------------------------------------===//
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
@@ -1999,10 +1964,6 @@ def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
-// sub reg, relocImm
-def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt8_su:$src2),
- (SUB64ri8 GR64:$src1, i64relocImmSExt8_su:$src2)>;
-
// mul reg, reg
def : Pat<(mul GR16:$src1, GR16:$src2),
(IMUL16rr GR16:$src1, GR16:$src2)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td
index 1842dc19ec2e..4f7867744017 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td
@@ -193,14 +193,16 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
"ljmp{l}\t$seg, $off", []>,
OpSize32, Sched<[WriteJump]>;
}
- def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
- "ljmp{q}\t{*}$dst", []>, Sched<[WriteJump]>, Requires<[In64BitMode]>;
-
- let AsmVariantName = "att" in
- def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
- "ljmp{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
- def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
- "{l}jmp{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
+ let mayLoad = 1 in {
+ def FARJMP64m : RI<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
+ "ljmp{q}\t{*}$dst", []>, Sched<[WriteJump]>, Requires<[In64BitMode]>;
+
+ let AsmVariantName = "att" in
+ def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
+ "ljmp{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
+ def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
+ "{l}jmp{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
+ }
}
// Loop instructions
@@ -275,10 +277,12 @@ let isCall = 1 in
OpSize32, Sched<[WriteJump]>;
}
- def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
- "lcall{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
- def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
- "{l}call{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
+ let mayLoad = 1 in {
+ def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
+ "lcall{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
+ def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
+ "{l}call{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
+ }
}
@@ -351,7 +355,8 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
Requires<[In64BitMode,FavorMemIndirectCall]>, NOTRACK;
}
- def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
+ let mayLoad = 1 in
+ def FARCALL64m : RI<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
"lcall{q}\t{*}$dst", []>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td
index 9e43a532a3f8..4dbd6bb8cd7e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA.td
@@ -126,7 +126,7 @@ let ExeDomain = SSEPackedSingle in {
loadv4f32, loadv8f32, X86any_Fmadd, v4f32, v8f32,
SchedWriteFMA>;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
- loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32,
+ loadv4f32, loadv8f32, X86any_Fmsub, v4f32, v8f32,
SchedWriteFMA>;
defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", "PS",
loadv4f32, loadv8f32, X86Fmaddsub, v4f32, v8f32,
@@ -141,7 +141,7 @@ let ExeDomain = SSEPackedDouble in {
loadv2f64, loadv4f64, X86any_Fmadd, v2f64,
v4f64, SchedWriteFMA>, VEX_W;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
- loadv2f64, loadv4f64, X86Fmsub, v2f64,
+ loadv2f64, loadv4f64, X86any_Fmsub, v2f64,
v4f64, SchedWriteFMA>, VEX_W;
defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", "PD",
loadv2f64, loadv4f64, X86Fmaddsub,
@@ -154,19 +154,19 @@ let ExeDomain = SSEPackedDouble in {
// Fused Negative Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", "PS", loadv4f32,
- loadv8f32, X86Fnmadd, v4f32, v8f32, SchedWriteFMA>;
+ loadv8f32, X86any_Fnmadd, v4f32, v8f32, SchedWriteFMA>;
defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", "PS", loadv4f32,
- loadv8f32, X86Fnmsub, v4f32, v8f32, SchedWriteFMA>;
+ loadv8f32, X86any_Fnmsub, v4f32, v8f32, SchedWriteFMA>;
}
let ExeDomain = SSEPackedDouble in {
defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", "PD", loadv2f64,
- loadv4f64, X86Fnmadd, v2f64, v4f64, SchedWriteFMA>, VEX_W;
+ loadv4f64, X86any_Fnmadd, v2f64, v4f64, SchedWriteFMA>, VEX_W;
defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", "PD", loadv2f64,
- loadv4f64, X86Fnmsub, v2f64, v4f64, SchedWriteFMA>, VEX_W;
+ loadv4f64, X86any_Fnmsub, v2f64, v4f64, SchedWriteFMA>, VEX_W;
}
// All source register operands of FMA opcodes defined in fma3s_rm multiclass
-// can be commuted. In many cases such commute transformation requres an opcode
+// can be commuted. In many cases such commute transformation requires an opcode
// adjustment, for example, commuting the operands 1 and 2 in FMA*132 form
// would require an opcode change to FMA*231:
// FMA*132* reg1, reg2, reg3; // reg1 * reg3 + reg2;
@@ -283,7 +283,7 @@ multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
[]>, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
-// The FMA 213 form is created for lowering of scalar FMA intrinscis
+// The FMA 213 form is created for lowering of scalar FMA intrinsics
// to machine instructions.
// The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands
// of FMA 213 form.
@@ -321,12 +321,12 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86any_Fmadd,
SchedWriteFMA.Scl>, VEX_LIG;
-defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsub,
+defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86any_Fmsub,
SchedWriteFMA.Scl>, VEX_LIG;
-defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadd,
+defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86any_Fnmadd,
SchedWriteFMA.Scl>, VEX_LIG;
-defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsub,
+defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86any_Fnmsub,
SchedWriteFMA.Scl>, VEX_LIG;
multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
@@ -373,14 +373,14 @@ multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
}
defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
-defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
-defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
-defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<X86any_Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<X86any_Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<X86any_Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
-defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
-defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
-defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<X86any_Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<X86any_Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<X86any_Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
//===----------------------------------------------------------------------===//
// FMA4 - AMD 4 operand Fused Multiply-Add instructions
@@ -542,26 +542,26 @@ let ExeDomain = SSEPackedSingle in {
SchedWriteFMA.Scl>,
fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
SchedWriteFMA.Scl>;
- defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32,
+ defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86any_Fmsub, loadf32,
SchedWriteFMA.Scl>,
fma4s_int<0x6E, "vfmsubss", ssmem, v4f32,
SchedWriteFMA.Scl>;
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
- X86Fnmadd, loadf32, SchedWriteFMA.Scl>,
+ X86any_Fnmadd, loadf32, SchedWriteFMA.Scl>,
fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32,
SchedWriteFMA.Scl>;
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
- X86Fnmsub, loadf32, SchedWriteFMA.Scl>,
+ X86any_Fnmsub, loadf32, SchedWriteFMA.Scl>,
fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86any_Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
- defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
+ defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86any_Fmsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
- defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
+ defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86any_Fnmadd, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
- defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
+ defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86any_Fnmsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
@@ -575,26 +575,26 @@ let ExeDomain = SSEPackedDouble in {
SchedWriteFMA.Scl>,
fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
- defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64,
+ defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86any_Fmsub, loadf64,
SchedWriteFMA.Scl>,
fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
- X86Fnmadd, loadf64, SchedWriteFMA.Scl>,
+ X86any_Fnmadd, loadf64, SchedWriteFMA.Scl>,
fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
- X86Fnmsub, loadf64, SchedWriteFMA.Scl>,
+ X86any_Fnmsub, loadf64, SchedWriteFMA.Scl>,
fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86any_Fmadd, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
- defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
+ defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86any_Fmsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
- defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
+ defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86any_Fnmadd, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
- defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
+ defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86any_Fnmsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
@@ -630,11 +630,11 @@ multiclass scalar_fma4_patterns<SDNode Op, string Name,
}
defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
-defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
-defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
-defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
index 25bbdddb7a21..6d803e931b68 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -116,11 +116,8 @@ static void verifyTables() {
#ifndef NDEBUG
static std::atomic<bool> TableChecked(false);
if (!TableChecked.load(std::memory_order_relaxed)) {
- assert(std::is_sorted(std::begin(Groups), std::end(Groups)) &&
- std::is_sorted(std::begin(RoundGroups), std::end(RoundGroups)) &&
- std::is_sorted(std::begin(BroadcastGroups),
- std::end(BroadcastGroups)) &&
- "FMA3 tables not sorted!");
+ assert(llvm::is_sorted(Groups) && llvm::is_sorted(RoundGroups) &&
+ llvm::is_sorted(BroadcastGroups) && "FMA3 tables not sorted!");
TableChecked.store(true, std::memory_order_relaxed);
}
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA3Info.h b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA3Info.h
index 7fa6f5917862..ce0a7cc7f82e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA3Info.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFMA3Info.h
@@ -14,11 +14,7 @@
#ifndef LLVM_LIB_TARGET_X86_UTILS_X86INSTRFMA3INFO_H
#define LLVM_LIB_TARGET_X86_UTILS_X86INSTRFMA3INFO_H
-#include "X86.h"
-#include "llvm/ADT/DenseMap.h"
-#include <cassert>
#include <cstdint>
-#include <set>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
index 1830262205c6..67dcb8d00ea5 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -22,24 +22,17 @@ def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>,
SDTCisPtrTy<1>]>;
def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
-def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
- [SDNPHasChain, SDNPOptInGlue, SDNPMayStore,
- SDNPMemOperand]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
- [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
- SDNPMemOperand]>;
def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist,
- [SDNPHasChain, SDNPOptInGlue, SDNPMayStore,
- SDNPMemOperand]>;
-def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
@@ -79,8 +72,9 @@ def X86fild64 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
}]>;
-def X86fildflag64 : PatFrag<(ops node:$ptr), (X86fildflag node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+def X86fist32 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fist node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
@@ -292,7 +286,7 @@ defm MUL : FPBinary_rr<any_fmul>;
defm DIV : FPBinary_rr<any_fdiv>;
}
-// Sets the scheduling resources for the actual NAME#_F<size>m defintions.
+// Sets the scheduling resources for the actual NAME#_F<size>m definitions.
let SchedRW = [WriteFAddLd] in {
defm ADD : FPBinary<any_fadd, MRM0m, "add">;
defm SUB : FPBinary<any_fsub, MRM4m, "sub">;
@@ -381,7 +375,8 @@ def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
// Versions of FP instructions that take a single memory operand. Added for the
// disassembler; remove as they are included with patterns elsewhere.
-let SchedRW = [WriteFComLd], Uses = [FPCW], mayRaiseFPException = 1 in {
+let SchedRW = [WriteFComLd], Uses = [FPCW], mayRaiseFPException = 1,
+ mayLoad = 1 in {
def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
@@ -396,21 +391,22 @@ def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
} // SchedRW
let SchedRW = [WriteMicrocoded] in {
-let Defs = [FPSW, FPCW] in {
+let Defs = [FPSW, FPCW], mayLoad = 1 in {
def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins f32mem:$dst), "frstor\t$dst">;
}
-let Defs = [FPSW, FPCW], Uses = [FPSW, FPCW] in {
+let Defs = [FPSW, FPCW], Uses = [FPSW, FPCW], mayStore = 1 in {
def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">;
def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins f32mem:$dst), "fnsave\t$dst">;
}
-let Uses = [FPSW] in
+let Uses = [FPSW], mayStore = 1 in
def FNSTSWm : FPI<0xDD, MRM7m, (outs), (ins i16mem:$dst), "fnstsw\t$dst">;
+let mayLoad = 1 in
def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\t$src">;
-let Uses = [FPCW] ,mayRaiseFPException = 1 in
+let Uses = [FPCW] ,mayRaiseFPException = 1, mayStore = 1 in
def FBSTPm : FPI<0xDF, MRM6m, (outs), (ins f80mem:$dst), "fbstp\t$dst">;
} // SchedRW
@@ -534,14 +530,20 @@ def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
let mayStore = 1, hasSideEffects = 0 in {
def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
-def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
-def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+ [(X86fist32 RFP32:$src, addr:$op)]>;
+def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
+ [(X86fist64 RFP32:$src, addr:$op)]>;
def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
-def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
-def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
+ [(X86fist32 RFP64:$src, addr:$op)]>;
+def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
+ [(X86fist64 RFP64:$src, addr:$op)]>;
def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
-def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
-def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
+ [(X86fist32 RFP80:$src, addr:$op)]>;
+def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
+ [(X86fist64 RFP80:$src, addr:$op)]>;
} // mayStore
} // SchedRW, Uses = [FPCW]
@@ -601,6 +603,7 @@ let SchedRW = [WriteMove], Uses = [FPCW] in {
def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RSTi:$op), "fld\t$op">;
def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RSTi:$op), "fst\t$op">;
def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RSTi:$op), "fstp\t$op">;
+let mayRaiseFPException = 0 in
def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RSTi:$op), "fxch\t$op">;
}
@@ -620,13 +623,13 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
[(set RFP80:$dst, fpimm1)]>;
}
-let SchedRW = [WriteFLD0], Uses = [FPCW] in
+let SchedRW = [WriteFLD0], Uses = [FPCW], mayRaiseFPException = 0 in
def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
-let SchedRW = [WriteFLD1], Uses = [FPCW] in
+let SchedRW = [WriteFLD1], Uses = [FPCW], mayRaiseFPException = 0 in
def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
-let SchedRW = [WriteFLDC], Defs = [FPSW], Uses = [FPCW] in {
+let SchedRW = [WriteFLDC], Defs = [FPSW], Uses = [FPCW], mayRaiseFPException = 0 in {
def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
@@ -635,25 +638,19 @@ def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
} // SchedRW
// Floating point compares.
-let SchedRW = [WriteFCom], Uses = [FPCW] in {
-def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
- [(set FPSW, (trunc (X86any_fcmp RFP32:$lhs, RFP32:$rhs)))]>;
-def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
- [(set FPSW, (trunc (X86any_fcmp RFP64:$lhs, RFP64:$rhs)))]>;
-def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
- [(set FPSW, (trunc (X86any_fcmp RFP80:$lhs, RFP80:$rhs)))]>;
-def COM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
- [(set FPSW, (trunc (X86strict_fcmps RFP32:$lhs, RFP32:$rhs)))]>;
-def COM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
- [(set FPSW, (trunc (X86strict_fcmps RFP64:$lhs, RFP64:$rhs)))]>;
-def COM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
- [(set FPSW, (trunc (X86strict_fcmps RFP80:$lhs, RFP80:$rhs)))]>;
+let SchedRW = [WriteFCom], Uses = [FPCW], hasSideEffects = 0 in {
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, []>;
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, []>;
+def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, []>;
+def COM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, []>;
+def COM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, []>;
+def COM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, []>;
} // SchedRW
} // mayRaiseFPException = 1
let SchedRW = [WriteFCom], mayRaiseFPException = 1 in {
// CC = ST(0) cmp ST(i)
-let Defs = [EFLAGS, FPCW], Uses = [FPCW] in {
+let Defs = [EFLAGS, FPSW], Uses = [FPCW] in {
def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set EFLAGS, (X86any_fcmp RFP32:$lhs, RFP32:$rhs))]>,
Requires<[FPStackf32, HasCMov]>;
@@ -698,10 +695,9 @@ def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RSTi:$reg),
// Floating point flag ops.
let SchedRW = [WriteALU] in {
-let Defs = [AX, FPSW], Uses = [FPSW] in
+let Defs = [AX, FPSW], Uses = [FPSW], hasSideEffects = 0 in
def FNSTSW16r : I<0xDF, MRM_E0, // AX = fp flags
- (outs), (ins), "fnstsw\t{%ax|ax}",
- [(set AX, (X86fp_stsw FPSW))]>;
+ (outs), (ins), "fnstsw\t{%ax|ax}", []>;
let Defs = [FPSW], Uses = [FPCW] in
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
(outs), (ins i16mem:$dst), "fnstcw\t$dst",
@@ -754,20 +750,20 @@ def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", []>;
let Uses = [FPSW, FPCW] in {
def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
- "fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, TB,
+ "fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, PS,
Requires<[HasFXSR]>;
def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
"fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)]>,
- TB, Requires<[HasFXSR, In64BitMode]>;
+ PS, Requires<[HasFXSR, In64BitMode]>;
} // Uses = [FPSW, FPCW]
let Defs = [FPSW, FPCW] in {
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaquemem:$src),
"fxrstor\t$src", [(int_x86_fxrstor addr:$src)]>,
- TB, Requires<[HasFXSR]>;
+ PS, Requires<[HasFXSR]>;
def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src),
"fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)]>,
- TB, Requires<[HasFXSR, In64BitMode]>;
+ PS, Requires<[HasFXSR, In64BitMode]>;
} // Defs = [FPSW, FPCW]
} // SchedRW
@@ -799,13 +795,6 @@ def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
-// Used to conv. i64 to f64 since there isn't a SSE version.
-def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>;
-
-// Used to conv. between f80 and i64 for i64 atomic loads.
-def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>;
-def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
-
// FP extensions map onto simple pseudo-value conversions if they are to/from
// the FP stack.
def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index f3b286e0375c..e16382e956c5 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -486,7 +486,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 },
{ X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
{ X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_NO_REVERSE },
+ { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
{ X86::CVTSD2SI64rr_Int, X86::CVTSD2SI64rm_Int, TB_NO_REVERSE },
+ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
{ X86::CVTSD2SIrr_Int, X86::CVTSD2SIrm_Int, TB_NO_REVERSE },
{ X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
{ X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
@@ -494,7 +496,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::CVTSI642SDrr, X86::CVTSI642SDrm, 0 },
{ X86::CVTSI642SSrr, X86::CVTSI642SSrm, 0 },
{ X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
+ { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
{ X86::CVTSS2SI64rr_Int, X86::CVTSS2SI64rm_Int, TB_NO_REVERSE },
+ { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
{ X86::CVTSS2SIrr_Int, X86::CVTSS2SIrm_Int, TB_NO_REVERSE },
{ X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
{ X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
@@ -627,18 +631,18 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::UCOMISSrr_Int, X86::UCOMISSrm_Int, TB_NO_REVERSE },
{ X86::VAESIMCrr, X86::VAESIMCrm, 0 },
{ X86::VAESKEYGENASSIST128rr,X86::VAESKEYGENASSIST128rm,0 },
- { X86::VBROADCASTF32X2Z256r, X86::VBROADCASTF32X2Z256m, TB_NO_REVERSE },
- { X86::VBROADCASTF32X2Zr, X86::VBROADCASTF32X2Zm, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z128r, X86::VBROADCASTI32X2Z128m, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z256r, X86::VBROADCASTI32X2Z256m, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Zr, X86::VBROADCASTI32X2Zm, TB_NO_REVERSE },
+ { X86::VBROADCASTF32X2Z256rr,X86::VBROADCASTF32X2Z256rm,TB_NO_REVERSE },
+ { X86::VBROADCASTF32X2Zrr, X86::VBROADCASTF32X2Zrm, TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Z128rr,X86::VBROADCASTI32X2Z128rm,TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Z256rr,X86::VBROADCASTI32X2Z256rm,TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Zrr, X86::VBROADCASTI32X2Zrm, TB_NO_REVERSE },
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
- { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZ256rr, X86::VBROADCASTSDZ256rm, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZrr, X86::VBROADCASTSDZrm, TB_NO_REVERSE },
{ X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
- { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ128rr, X86::VBROADCASTSSZ128rm, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ256rr, X86::VBROADCASTSSZ256rm, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZrr, X86::VBROADCASTSSZrm, TB_NO_REVERSE },
{ X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
{ X86::VCOMISDZrr, X86::VCOMISDZrm, 0 },
{ X86::VCOMISDZrr_Int, X86::VCOMISDZrm_Int, TB_NO_REVERSE },
@@ -710,15 +714,23 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VCVTQQ2PSZ128rr, X86::VCVTQQ2PSZ128rm, 0 },
{ X86::VCVTQQ2PSZ256rr, X86::VCVTQQ2PSZ256rm, 0 },
{ X86::VCVTQQ2PSZrr, X86::VCVTQQ2PSZrm, 0 },
+ { X86::VCVTSD2SI64Zrr, X86::VCVTSD2SI64Zrm, 0 },
{ X86::VCVTSD2SI64Zrr_Int, X86::VCVTSD2SI64Zrm_Int, TB_NO_REVERSE },
+ { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
{ X86::VCVTSD2SI64rr_Int, X86::VCVTSD2SI64rm_Int, TB_NO_REVERSE },
+ { X86::VCVTSD2SIZrr, X86::VCVTSD2SIZrm, 0 },
{ X86::VCVTSD2SIZrr_Int, X86::VCVTSD2SIZrm_Int, TB_NO_REVERSE },
+ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
{ X86::VCVTSD2SIrr_Int, X86::VCVTSD2SIrm_Int, TB_NO_REVERSE },
{ X86::VCVTSD2USI64Zrr_Int, X86::VCVTSD2USI64Zrm_Int, TB_NO_REVERSE },
{ X86::VCVTSD2USIZrr_Int, X86::VCVTSD2USIZrm_Int, TB_NO_REVERSE },
+ { X86::VCVTSS2SI64Zrr, X86::VCVTSS2SI64Zrm, 0 },
{ X86::VCVTSS2SI64Zrr_Int, X86::VCVTSS2SI64Zrm_Int, TB_NO_REVERSE },
+ { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
{ X86::VCVTSS2SI64rr_Int, X86::VCVTSS2SI64rm_Int, TB_NO_REVERSE },
+ { X86::VCVTSS2SIZrr, X86::VCVTSS2SIZrm, 0 },
{ X86::VCVTSS2SIZrr_Int, X86::VCVTSS2SIZrm_Int, TB_NO_REVERSE },
+ { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
{ X86::VCVTSS2SIrr_Int, X86::VCVTSS2SIrm_Int, TB_NO_REVERSE },
{ X86::VCVTSS2USI64Zrr_Int, X86::VCVTSS2USI64Zrm_Int, TB_NO_REVERSE },
{ X86::VCVTSS2USIZrr_Int, X86::VCVTSS2USIZrm_Int, TB_NO_REVERSE },
@@ -906,24 +918,24 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VPABSWZrr, X86::VPABSWZrm, 0 },
{ X86::VPABSWrr, X86::VPABSWrm, 0 },
{ X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm, TB_NO_REVERSE },
- { X86::VPBROADCASTBZ128r, X86::VPBROADCASTBZ128m, TB_NO_REVERSE },
- { X86::VPBROADCASTBZ256r, X86::VPBROADCASTBZ256m, TB_NO_REVERSE },
- { X86::VPBROADCASTBZr, X86::VPBROADCASTBZm, TB_NO_REVERSE },
- { X86::VPBROADCASTBrr, X86::VPBROADCASTBrm, TB_NO_REVERSE },
+ { X86::VPBROADCASTBZ128rr, X86::VPBROADCASTBZ128rm, TB_NO_REVERSE },
+ { X86::VPBROADCASTBZ256rr, X86::VPBROADCASTBZ256rm, TB_NO_REVERSE },
+ { X86::VPBROADCASTBZrr, X86::VPBROADCASTBZrm, TB_NO_REVERSE },
+ { X86::VPBROADCASTBrr , X86::VPBROADCASTBrm, TB_NO_REVERSE },
{ X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ128r, X86::VPBROADCASTDZ128m, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ256r, X86::VPBROADCASTDZ256m, TB_NO_REVERSE },
- { X86::VPBROADCASTDZr, X86::VPBROADCASTDZm, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZ128rr, X86::VPBROADCASTDZ128rm, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZ256rr, X86::VPBROADCASTDZ256rm, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZrr, X86::VPBROADCASTDZrm, TB_NO_REVERSE },
{ X86::VPBROADCASTDrr, X86::VPBROADCASTDrm, TB_NO_REVERSE },
{ X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ128r, X86::VPBROADCASTQZ128m, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ256r, X86::VPBROADCASTQZ256m, TB_NO_REVERSE },
- { X86::VPBROADCASTQZr, X86::VPBROADCASTQZm, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZ128rr, X86::VPBROADCASTQZ128rm, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZ256rr, X86::VPBROADCASTQZ256rm, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZrr, X86::VPBROADCASTQZrm, TB_NO_REVERSE },
{ X86::VPBROADCASTQrr, X86::VPBROADCASTQrm, TB_NO_REVERSE },
{ X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ128r, X86::VPBROADCASTWZ128m, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ256r, X86::VPBROADCASTWZ256m, TB_NO_REVERSE },
- { X86::VPBROADCASTWZr, X86::VPBROADCASTWZm, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZ128rr, X86::VPBROADCASTWZ128rm, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZ256rr, X86::VPBROADCASTWZ256rm, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZrr, X86::VPBROADCASTWZrm, TB_NO_REVERSE },
{ X86::VPBROADCASTWrr, X86::VPBROADCASTWrm, TB_NO_REVERSE },
{ X86::VPCMPESTRIrr, X86::VPCMPESTRIrm, 0 },
{ X86::VPCMPESTRMrr, X86::VPCMPESTRMrm, 0 },
@@ -1100,9 +1112,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 },
{ X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 },
{ X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 },
- { X86::VPSLLDQZ128rr, X86::VPSLLDQZ128rm, 0 },
- { X86::VPSLLDQZ256rr, X86::VPSLLDQZ256rm, 0 },
- { X86::VPSLLDQZrr, X86::VPSLLDQZrm, 0 },
+ { X86::VPSLLDQZ128ri, X86::VPSLLDQZ128mi, 0 },
+ { X86::VPSLLDQZ256ri, X86::VPSLLDQZ256mi, 0 },
+ { X86::VPSLLDQZri, X86::VPSLLDQZmi, 0 },
{ X86::VPSLLDZ128ri, X86::VPSLLDZ128mi, 0 },
{ X86::VPSLLDZ256ri, X86::VPSLLDZ256mi, 0 },
{ X86::VPSLLDZri, X86::VPSLLDZmi, 0 },
@@ -1121,9 +1133,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VPSRAWZ128ri, X86::VPSRAWZ128mi, 0 },
{ X86::VPSRAWZ256ri, X86::VPSRAWZ256mi, 0 },
{ X86::VPSRAWZri, X86::VPSRAWZmi, 0 },
- { X86::VPSRLDQZ128rr, X86::VPSRLDQZ128rm, 0 },
- { X86::VPSRLDQZ256rr, X86::VPSRLDQZ256rm, 0 },
- { X86::VPSRLDQZrr, X86::VPSRLDQZrm, 0 },
+ { X86::VPSRLDQZ128ri, X86::VPSRLDQZ128mi, 0 },
+ { X86::VPSRLDQZ256ri, X86::VPSRLDQZ256mi, 0 },
+ { X86::VPSRLDQZri, X86::VPSRLDQZmi, 0 },
{ X86::VPSRLDZ128ri, X86::VPSRLDZ128mi, 0 },
{ X86::VPSRLDZ256ri, X86::VPSRLDZ256mi, 0 },
{ X86::VPSRLDZri, X86::VPSRLDZmi, 0 },
@@ -1609,16 +1621,16 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 },
{ X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 },
{ X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 },
- { X86::VBROADCASTF32X2Z256rkz, X86::VBROADCASTF32X2Z256mkz, TB_NO_REVERSE },
- { X86::VBROADCASTF32X2Zrkz, X86::VBROADCASTF32X2Zmkz, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z128rkz, X86::VBROADCASTI32X2Z128mkz, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z256rkz, X86::VBROADCASTI32X2Z256mkz, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Zrkz, X86::VBROADCASTI32X2Zmkz, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
- { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
- { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTF32X2Z256rrkz, X86::VBROADCASTF32X2Z256rmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTF32X2Zrrkz, X86::VBROADCASTF32X2Zrmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Z128rrkz, X86::VBROADCASTI32X2Z128rmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Z256rrkz, X86::VBROADCASTI32X2Z256rmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Zrrkz, X86::VBROADCASTI32X2Zrmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZ256rrkz, X86::VBROADCASTSDZ256rmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZrrkz, X86::VBROADCASTSDZrmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ128rrkz, X86::VBROADCASTSSZ128rmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ256rrkz, X86::VBROADCASTSSZ256rmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZrrkz, X86::VBROADCASTSSZrmkz, TB_NO_REVERSE },
{ X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 },
{ X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 },
{ X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 },
@@ -2153,18 +2165,18 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0 },
{ X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 },
{ X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 },
- { X86::VPBROADCASTBZ128rkz, X86::VPBROADCASTBZ128mkz, TB_NO_REVERSE },
- { X86::VPBROADCASTBZ256rkz, X86::VPBROADCASTBZ256mkz, TB_NO_REVERSE },
- { X86::VPBROADCASTBZrkz, X86::VPBROADCASTBZmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ128rkz, X86::VPBROADCASTDZ128mkz, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ256rkz, X86::VPBROADCASTDZ256mkz, TB_NO_REVERSE },
- { X86::VPBROADCASTDZrkz, X86::VPBROADCASTDZmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ128rkz, X86::VPBROADCASTQZ128mkz, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ256rkz, X86::VPBROADCASTQZ256mkz, TB_NO_REVERSE },
- { X86::VPBROADCASTQZrkz, X86::VPBROADCASTQZmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ128rkz, X86::VPBROADCASTWZ128mkz, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ256rkz, X86::VPBROADCASTWZ256mkz, TB_NO_REVERSE },
- { X86::VPBROADCASTWZrkz, X86::VPBROADCASTWZmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTBZ128rrkz, X86::VPBROADCASTBZ128rmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTBZ256rrkz, X86::VPBROADCASTBZ256rmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTBZrrkz, X86::VPBROADCASTBZrmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZ128rrkz, X86::VPBROADCASTDZ128rmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZ256rrkz, X86::VPBROADCASTDZ256rmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZrrkz, X86::VPBROADCASTDZrmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZ128rrkz, X86::VPBROADCASTQZ128rmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZ256rrkz, X86::VPBROADCASTQZ256rmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZrrkz, X86::VPBROADCASTQZrmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZ128rrkz, X86::VPBROADCASTWZ128rmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZ256rrkz, X86::VPBROADCASTWZ256rmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZrrkz, X86::VPBROADCASTWZrmkz, TB_NO_REVERSE },
{ X86::VPCLMULQDQYrr, X86::VPCLMULQDQYrm, 0 },
{ X86::VPCLMULQDQZ128rr, X86::VPCLMULQDQZ128rm, 0 },
{ X86::VPCLMULQDQZ256rr, X86::VPCLMULQDQZ256rm, 0 },
@@ -3010,16 +3022,16 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VBLENDMPSZ128rrk, X86::VBLENDMPSZ128rmk, 0 },
{ X86::VBLENDMPSZ256rrk, X86::VBLENDMPSZ256rmk, 0 },
{ X86::VBLENDMPSZrrk, X86::VBLENDMPSZrmk, 0 },
- { X86::VBROADCASTF32X2Z256rk, X86::VBROADCASTF32X2Z256mk, TB_NO_REVERSE },
- { X86::VBROADCASTF32X2Zrk, X86::VBROADCASTF32X2Zmk, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z128rk, X86::VBROADCASTI32X2Z128mk, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z256rk, X86::VBROADCASTI32X2Z256mk, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Zrk, X86::VBROADCASTI32X2Zmk, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
- { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
- { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
+ { X86::VBROADCASTF32X2Z256rrk, X86::VBROADCASTF32X2Z256rmk, TB_NO_REVERSE },
+ { X86::VBROADCASTF32X2Zrrk, X86::VBROADCASTF32X2Zrmk, TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Z128rrk, X86::VBROADCASTI32X2Z128rmk, TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Z256rrk, X86::VBROADCASTI32X2Z256rmk, TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Zrrk, X86::VBROADCASTI32X2Zrmk, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZ256rrk, X86::VBROADCASTSDZ256rmk, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZrrk, X86::VBROADCASTSDZrmk, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ128rrk, X86::VBROADCASTSSZ128rmk, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ256rrk, X86::VBROADCASTSSZ256rmk, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZrrk, X86::VBROADCASTSSZrmk, TB_NO_REVERSE },
{ X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0 },
{ X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0 },
{ X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0 },
@@ -3662,18 +3674,18 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VPBLENDMWZ128rrk, X86::VPBLENDMWZ128rmk, 0 },
{ X86::VPBLENDMWZ256rrk, X86::VPBLENDMWZ256rmk, 0 },
{ X86::VPBLENDMWZrrk, X86::VPBLENDMWZrmk, 0 },
- { X86::VPBROADCASTBZ128rk, X86::VPBROADCASTBZ128mk, TB_NO_REVERSE },
- { X86::VPBROADCASTBZ256rk, X86::VPBROADCASTBZ256mk, TB_NO_REVERSE },
- { X86::VPBROADCASTBZrk, X86::VPBROADCASTBZmk, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ128rk, X86::VPBROADCASTDZ128mk, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ256rk, X86::VPBROADCASTDZ256mk, TB_NO_REVERSE },
- { X86::VPBROADCASTDZrk, X86::VPBROADCASTDZmk, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ128rk, X86::VPBROADCASTQZ128mk, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ256rk, X86::VPBROADCASTQZ256mk, TB_NO_REVERSE },
- { X86::VPBROADCASTQZrk, X86::VPBROADCASTQZmk, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ128rk, X86::VPBROADCASTWZ128mk, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ256rk, X86::VPBROADCASTWZ256mk, TB_NO_REVERSE },
- { X86::VPBROADCASTWZrk, X86::VPBROADCASTWZmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTBZ128rrk, X86::VPBROADCASTBZ128rmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTBZ256rrk, X86::VPBROADCASTBZ256rmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTBZrrk, X86::VPBROADCASTBZrmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZ128rrk, X86::VPBROADCASTDZ128rmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZ256rrk, X86::VPBROADCASTDZ256rmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZrrk, X86::VPBROADCASTDZrmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZ128rrk, X86::VPBROADCASTQZ128rmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZ256rrk, X86::VPBROADCASTQZ256rmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZrrk, X86::VPBROADCASTQZrmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZ128rrk, X86::VPBROADCASTWZ128rmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZ256rrk, X86::VPBROADCASTWZ256rmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZrrk, X86::VPBROADCASTWZrmk, TB_NO_REVERSE },
{ X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 },
{ X86::VPCMOVrrr, X86::VPCMOVrrm, 0 },
{ X86::VPCMPBZ128rrik, X86::VPCMPBZ128rmik, 0 },
@@ -5509,6 +5521,12 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = {
{ X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS },
{ X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS },
{ X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS },
+ { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D },
+ { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D },
+ { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D },
+ { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q },
+ { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q },
+ { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q },
};
static const X86MemoryFoldTableEntry *
@@ -5517,53 +5535,45 @@ lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
// Make sure the tables are sorted.
static std::atomic<bool> FoldTablesChecked(false);
if (!FoldTablesChecked.load(std::memory_order_relaxed)) {
- assert(std::is_sorted(std::begin(MemoryFoldTable2Addr),
- std::end(MemoryFoldTable2Addr)) &&
+ assert(llvm::is_sorted(MemoryFoldTable2Addr) &&
std::adjacent_find(std::begin(MemoryFoldTable2Addr),
std::end(MemoryFoldTable2Addr)) ==
- std::end(MemoryFoldTable2Addr) &&
+ std::end(MemoryFoldTable2Addr) &&
"MemoryFoldTable2Addr is not sorted and unique!");
- assert(std::is_sorted(std::begin(MemoryFoldTable0),
- std::end(MemoryFoldTable0)) &&
+ assert(llvm::is_sorted(MemoryFoldTable0) &&
std::adjacent_find(std::begin(MemoryFoldTable0),
std::end(MemoryFoldTable0)) ==
- std::end(MemoryFoldTable0) &&
+ std::end(MemoryFoldTable0) &&
"MemoryFoldTable0 is not sorted and unique!");
- assert(std::is_sorted(std::begin(MemoryFoldTable1),
- std::end(MemoryFoldTable1)) &&
+ assert(llvm::is_sorted(MemoryFoldTable1) &&
std::adjacent_find(std::begin(MemoryFoldTable1),
std::end(MemoryFoldTable1)) ==
- std::end(MemoryFoldTable1) &&
+ std::end(MemoryFoldTable1) &&
"MemoryFoldTable1 is not sorted and unique!");
- assert(std::is_sorted(std::begin(MemoryFoldTable2),
- std::end(MemoryFoldTable2)) &&
+ assert(llvm::is_sorted(MemoryFoldTable2) &&
std::adjacent_find(std::begin(MemoryFoldTable2),
std::end(MemoryFoldTable2)) ==
- std::end(MemoryFoldTable2) &&
+ std::end(MemoryFoldTable2) &&
"MemoryFoldTable2 is not sorted and unique!");
- assert(std::is_sorted(std::begin(MemoryFoldTable3),
- std::end(MemoryFoldTable3)) &&
+ assert(llvm::is_sorted(MemoryFoldTable3) &&
std::adjacent_find(std::begin(MemoryFoldTable3),
std::end(MemoryFoldTable3)) ==
- std::end(MemoryFoldTable3) &&
+ std::end(MemoryFoldTable3) &&
"MemoryFoldTable3 is not sorted and unique!");
- assert(std::is_sorted(std::begin(MemoryFoldTable4),
- std::end(MemoryFoldTable4)) &&
+ assert(llvm::is_sorted(MemoryFoldTable4) &&
std::adjacent_find(std::begin(MemoryFoldTable4),
std::end(MemoryFoldTable4)) ==
- std::end(MemoryFoldTable4) &&
+ std::end(MemoryFoldTable4) &&
"MemoryFoldTable4 is not sorted and unique!");
- assert(std::is_sorted(std::begin(BroadcastFoldTable2),
- std::end(BroadcastFoldTable2)) &&
+ assert(llvm::is_sorted(BroadcastFoldTable2) &&
std::adjacent_find(std::begin(BroadcastFoldTable2),
std::end(BroadcastFoldTable2)) ==
- std::end(BroadcastFoldTable2) &&
+ std::end(BroadcastFoldTable2) &&
"BroadcastFoldTable2 is not sorted and unique!");
- assert(std::is_sorted(std::begin(BroadcastFoldTable3),
- std::end(BroadcastFoldTable3)) &&
+ assert(llvm::is_sorted(BroadcastFoldTable3) &&
std::adjacent_find(std::begin(BroadcastFoldTable3),
std::end(BroadcastFoldTable3)) ==
- std::end(BroadcastFoldTable3) &&
+ std::end(BroadcastFoldTable3) &&
"BroadcastFoldTable3 is not sorted and unique!");
FoldTablesChecked.store(true, std::memory_order_relaxed);
}
@@ -5639,7 +5649,7 @@ struct X86MemUnfoldTable {
addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable3)
- // Index 2, folded broadcast
+ // Index 3, folded broadcast
addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
// Sort the memory->reg unfold table.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.h b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.h
index 7dc236a0d7e4..b7aca27ab2bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86INSTRFOLDTABLES_H
#define LLVM_LIB_TARGET_X86_X86INSTRFOLDTABLES_H
-#include "llvm/Support/DataTypes.h"
+#include <cstdint>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
index 2f797fcfb8de..d7752e656b55 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFormats.td
@@ -27,26 +27,33 @@ def RawFrmDstSrc : Format<6>;
def RawFrmImm8 : Format<7>;
def RawFrmImm16 : Format<8>;
def AddCCFrm : Format<9>;
-def MRMDestMem : Format<32>;
-def MRMSrcMem : Format<33>;
-def MRMSrcMem4VOp3 : Format<34>;
-def MRMSrcMemOp4 : Format<35>;
-def MRMSrcMemCC : Format<36>;
-def MRMXmCC: Format<38>;
-def MRMXm : Format<39>;
-def MRM0m : Format<40>; def MRM1m : Format<41>; def MRM2m : Format<42>;
-def MRM3m : Format<43>; def MRM4m : Format<44>; def MRM5m : Format<45>;
-def MRM6m : Format<46>; def MRM7m : Format<47>;
-def MRMDestReg : Format<48>;
-def MRMSrcReg : Format<49>;
-def MRMSrcReg4VOp3 : Format<50>;
-def MRMSrcRegOp4 : Format<51>;
-def MRMSrcRegCC : Format<52>;
-def MRMXrCC: Format<54>;
-def MRMXr : Format<55>;
-def MRM0r : Format<56>; def MRM1r : Format<57>; def MRM2r : Format<58>;
-def MRM3r : Format<59>; def MRM4r : Format<60>; def MRM5r : Format<61>;
-def MRM6r : Format<62>; def MRM7r : Format<63>;
+def PrefixByte : Format<10>;
+def MRMr0 : Format<21>;
+def MRMSrcMemFSIB : Format<22>;
+def MRMDestMemFSIB : Format<23>;
+def MRMDestMem : Format<24>;
+def MRMSrcMem : Format<25>;
+def MRMSrcMem4VOp3 : Format<26>;
+def MRMSrcMemOp4 : Format<27>;
+def MRMSrcMemCC : Format<28>;
+def MRMXmCC: Format<30>;
+def MRMXm : Format<31>;
+def MRM0m : Format<32>; def MRM1m : Format<33>; def MRM2m : Format<34>;
+def MRM3m : Format<35>; def MRM4m : Format<36>; def MRM5m : Format<37>;
+def MRM6m : Format<38>; def MRM7m : Format<39>;
+def MRMDestReg : Format<40>;
+def MRMSrcReg : Format<41>;
+def MRMSrcReg4VOp3 : Format<42>;
+def MRMSrcRegOp4 : Format<43>;
+def MRMSrcRegCC : Format<44>;
+def MRMXrCC: Format<46>;
+def MRMXr : Format<47>;
+def MRM0r : Format<48>; def MRM1r : Format<49>; def MRM2r : Format<50>;
+def MRM3r : Format<51>; def MRM4r : Format<52>; def MRM5r : Format<53>;
+def MRM6r : Format<54>; def MRM7r : Format<55>;
+def MRM0X : Format<56>; def MRM1X : Format<57>; def MRM2X : Format<58>;
+def MRM3X : Format<59>; def MRM4X : Format<60>; def MRM5X : Format<61>;
+def MRM6X : Format<62>; def MRM7X : Format<63>;
def MRM_C0 : Format<64>; def MRM_C1 : Format<65>; def MRM_C2 : Format<66>;
def MRM_C3 : Format<67>; def MRM_C4 : Format<68>; def MRM_C5 : Format<69>;
def MRM_C6 : Format<70>; def MRM_C7 : Format<71>; def MRM_C8 : Format<72>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 3250123e5aa6..f3f7d17d9b3c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -59,9 +59,13 @@ def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
-def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
-def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
-def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
+def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>;
+def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>;
+
+def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
+
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
@@ -535,8 +539,20 @@ def X86any_Fmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fmadd node:$src1, node:$src2, node:$src3),
(X86Fmadd node:$src1, node:$src2, node:$src3)]>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+ [(X86strict_Fnmadd node:$src1, node:$src2, node:$src3),
+ (X86Fnmadd node:$src1, node:$src2, node:$src3)]>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86strict_Fmsub : SDNode<"X86ISD::STRICT_FMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+def X86any_Fmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+ [(X86strict_Fmsub node:$src1, node:$src2, node:$src3),
+ (X86Fmsub node:$src1, node:$src2, node:$src3)]>;
def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86strict_Fnmsub : SDNode<"X86ISD::STRICT_FNMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+def X86any_Fnmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+ [(X86strict_Fnmsub node:$src1, node:$src2, node:$src3),
+ (X86Fnmsub node:$src1, node:$src2, node:$src3)]>;
def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFPTernaryOp, [SDNPCommutative]>;
def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFPTernaryOp, [SDNPCommutative]>;
@@ -709,19 +725,27 @@ def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>;
def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>;
def X86mcvttp2ui : SDNode<"X86ISD::MCVTTP2UI", SDTMFloatToInt>;
+def SDTcvtph2ps : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, i16>]>;
+def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>;
+def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps,
+ [SDNPHasChain]>;
+def X86any_cvtph2ps : PatFrags<(ops node:$src),
+ [(X86strict_cvtph2ps node:$src),
+ (X86cvtph2ps node:$src)]>;
+
+def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE", SDTcvtph2ps>;
+
+def SDTcvtps2ph : SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisVT<2, i32>]>;
+def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", SDTcvtps2ph>;
+def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph,
+ [SDNPHasChain]>;
+def X86any_cvtps2ph : PatFrags<(ops node:$src1, node:$src2),
+ [(X86strict_cvtps2ph node:$src1, node:$src2),
+ (X86cvtps2ph node:$src1, node:$src2)]>;
-def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, i16>]> >;
-
-def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, i16>]> >;
-
-def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
- SDTCVecEltisVT<1, f32>,
- SDTCisVT<2, i32>]> >;
def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
SDTypeProfile<1, 4, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, f32>,
@@ -741,7 +765,9 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
// cvt fp to bfloat16
def X86cvtne2ps2bf16 : SDNode<"X86ISD::CVTNE2PS2BF16",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisSameSizeAs<0,1>,
SDTCisSameAs<1,2>]>>;
def X86mcvtneps2bf16 : SDNode<"X86ISD::MCVTNEPS2BF16",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
@@ -768,23 +794,6 @@ def SDTX86MaskedStore: SDTypeProfile<0, 3, [ // masked store
]>;
//===----------------------------------------------------------------------===//
-// SSE Complex Patterns
-//===----------------------------------------------------------------------===//
-
-// These are 'extloads' from a scalar to the low element of a vector, zeroing
-// the top elements. These are used for the SSE 'ss' and 'sd' instruction
-// forms.
-def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
- SDNPWantRoot, SDNPWantParent]>;
-def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
- SDNPWantRoot, SDNPWantParent]>;
-
-def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
-def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
-
-//===----------------------------------------------------------------------===//
// SSE pattern fragments
//===----------------------------------------------------------------------===//
@@ -895,89 +904,6 @@ def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
-def X86masked_gather : SDNode<"X86ISD::MGATHER",
- SDTypeProfile<2, 3, [SDTCisVec<0>,
- SDTCisVec<1>, SDTCisInt<1>,
- SDTCisSameAs<0, 2>,
- SDTCisSameAs<1, 3>,
- SDTCisPtrTy<4>]>,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-
-def X86masked_scatter : SDNode<"X86ISD::MSCATTER",
- SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisSameAs<0, 2>,
- SDTCVecEltisVT<0, i1>,
- SDTCisPtrTy<3>]>,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-
-def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v4i32;
-}]>;
-
-def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v8i32;
-}]>;
-
-def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v2i64;
-}]>;
-def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v4i64;
-}]>;
-def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v8i64;
-}]>;
-def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v16i32;
-}]>;
-
-def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v2i64;
-}]>;
-
-def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v4i32;
-}]>;
-
-def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v4i64;
-}]>;
-
-def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v8i32;
-}]>;
-
-def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v8i64;
-}]>;
-def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v16i32;
-}]>;
-
// 128-bit bitconvert pattern fragments
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
@@ -1037,6 +963,23 @@ def X86VBroadcastld64 : PatFrag<(ops node:$src),
return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
}]>;
+// Scalar SSE intrinsic fragments to match several different types of loads.
+// Used by scalar SSE intrinsic instructions which have 128 bit types, but
+// only load a single element.
+// FIXME: We should add more canolicalizing in DAGCombine. Particulary removing
+// the simple_load case.
+def sse_load_f32 : PatFrags<(ops node:$ptr),
+ [(v4f32 (simple_load node:$ptr)),
+ (v4f32 (X86vzload32 node:$ptr)),
+ (v4f32 (scalar_to_vector (loadf32 node:$ptr)))]>;
+def sse_load_f64 : PatFrags<(ops node:$ptr),
+ [(v2f64 (simple_load node:$ptr)),
+ (v2f64 (X86vzload64 node:$ptr)),
+ (v2f64 (scalar_to_vector (loadf64 node:$ptr)))]>;
+
+def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
+def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
+
def fp32imm0 : PatLeaf<(f32 fpimm), [{
return N->isExactlyValue(+0.0);
@@ -1185,60 +1128,60 @@ def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTX86MaskedStore,
def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncSStore node:$val, node:$ptr), [{
- return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def truncstore_us_vi8 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncUSStore node:$val, node:$ptr), [{
- return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def truncstore_s_vi16 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncSStore node:$val, node:$ptr), [{
- return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def truncstore_us_vi16 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncUSStore node:$val, node:$ptr), [{
- return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def truncstore_s_vi32 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncSStore node:$val, node:$ptr), [{
- return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
def truncstore_us_vi32 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncUSStore node:$val, node:$ptr), [{
- return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
def masked_truncstore_s_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def masked_truncstore_us_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def masked_truncstore_s_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def masked_truncstore_us_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def masked_truncstore_s_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
index 90484241c28c..42c111173570 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -88,7 +88,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
bool
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
+ Register &SrcReg, Register &DstReg,
unsigned &SubIdx) const {
switch (MI.getOpcode()) {
default: break;
@@ -135,13 +135,497 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
return false;
}
+bool X86InstrInfo::isDataInvariant(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ // By default, assume that the instruction is not data invariant.
+ return false;
+
+ // Some target-independent operations that trivially lower to data-invariant
+ // instructions.
+ case TargetOpcode::COPY:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ return true;
+
+ // On x86 it is believed that imul is constant time w.r.t. the loaded data.
+ // However, they set flags and are perhaps the most surprisingly constant
+ // time operations so we call them out here separately.
+ case X86::IMUL16rr:
+ case X86::IMUL16rri8:
+ case X86::IMUL16rri:
+ case X86::IMUL32rr:
+ case X86::IMUL32rri8:
+ case X86::IMUL32rri:
+ case X86::IMUL64rr:
+ case X86::IMUL64rri32:
+ case X86::IMUL64rri8:
+
+ // Bit scanning and counting instructions that are somewhat surprisingly
+ // constant time as they scan across bits and do other fairly complex
+ // operations like popcnt, but are believed to be constant time on x86.
+ // However, these set flags.
+ case X86::BSF16rr:
+ case X86::BSF32rr:
+ case X86::BSF64rr:
+ case X86::BSR16rr:
+ case X86::BSR32rr:
+ case X86::BSR64rr:
+ case X86::LZCNT16rr:
+ case X86::LZCNT32rr:
+ case X86::LZCNT64rr:
+ case X86::POPCNT16rr:
+ case X86::POPCNT32rr:
+ case X86::POPCNT64rr:
+ case X86::TZCNT16rr:
+ case X86::TZCNT32rr:
+ case X86::TZCNT64rr:
+
+ // Bit manipulation instructions are effectively combinations of basic
+ // arithmetic ops, and should still execute in constant time. These also
+ // set flags.
+ case X86::BLCFILL32rr:
+ case X86::BLCFILL64rr:
+ case X86::BLCI32rr:
+ case X86::BLCI64rr:
+ case X86::BLCIC32rr:
+ case X86::BLCIC64rr:
+ case X86::BLCMSK32rr:
+ case X86::BLCMSK64rr:
+ case X86::BLCS32rr:
+ case X86::BLCS64rr:
+ case X86::BLSFILL32rr:
+ case X86::BLSFILL64rr:
+ case X86::BLSI32rr:
+ case X86::BLSI64rr:
+ case X86::BLSIC32rr:
+ case X86::BLSIC64rr:
+ case X86::BLSMSK32rr:
+ case X86::BLSMSK64rr:
+ case X86::BLSR32rr:
+ case X86::BLSR64rr:
+ case X86::TZMSK32rr:
+ case X86::TZMSK64rr:
+
+ // Bit extracting and clearing instructions should execute in constant time,
+ // and set flags.
+ case X86::BEXTR32rr:
+ case X86::BEXTR64rr:
+ case X86::BEXTRI32ri:
+ case X86::BEXTRI64ri:
+ case X86::BZHI32rr:
+ case X86::BZHI64rr:
+
+ // Shift and rotate.
+ case X86::ROL8r1:
+ case X86::ROL16r1:
+ case X86::ROL32r1:
+ case X86::ROL64r1:
+ case X86::ROL8rCL:
+ case X86::ROL16rCL:
+ case X86::ROL32rCL:
+ case X86::ROL64rCL:
+ case X86::ROL8ri:
+ case X86::ROL16ri:
+ case X86::ROL32ri:
+ case X86::ROL64ri:
+ case X86::ROR8r1:
+ case X86::ROR16r1:
+ case X86::ROR32r1:
+ case X86::ROR64r1:
+ case X86::ROR8rCL:
+ case X86::ROR16rCL:
+ case X86::ROR32rCL:
+ case X86::ROR64rCL:
+ case X86::ROR8ri:
+ case X86::ROR16ri:
+ case X86::ROR32ri:
+ case X86::ROR64ri:
+ case X86::SAR8r1:
+ case X86::SAR16r1:
+ case X86::SAR32r1:
+ case X86::SAR64r1:
+ case X86::SAR8rCL:
+ case X86::SAR16rCL:
+ case X86::SAR32rCL:
+ case X86::SAR64rCL:
+ case X86::SAR8ri:
+ case X86::SAR16ri:
+ case X86::SAR32ri:
+ case X86::SAR64ri:
+ case X86::SHL8r1:
+ case X86::SHL16r1:
+ case X86::SHL32r1:
+ case X86::SHL64r1:
+ case X86::SHL8rCL:
+ case X86::SHL16rCL:
+ case X86::SHL32rCL:
+ case X86::SHL64rCL:
+ case X86::SHL8ri:
+ case X86::SHL16ri:
+ case X86::SHL32ri:
+ case X86::SHL64ri:
+ case X86::SHR8r1:
+ case X86::SHR16r1:
+ case X86::SHR32r1:
+ case X86::SHR64r1:
+ case X86::SHR8rCL:
+ case X86::SHR16rCL:
+ case X86::SHR32rCL:
+ case X86::SHR64rCL:
+ case X86::SHR8ri:
+ case X86::SHR16ri:
+ case X86::SHR32ri:
+ case X86::SHR64ri:
+ case X86::SHLD16rrCL:
+ case X86::SHLD32rrCL:
+ case X86::SHLD64rrCL:
+ case X86::SHLD16rri8:
+ case X86::SHLD32rri8:
+ case X86::SHLD64rri8:
+ case X86::SHRD16rrCL:
+ case X86::SHRD32rrCL:
+ case X86::SHRD64rrCL:
+ case X86::SHRD16rri8:
+ case X86::SHRD32rri8:
+ case X86::SHRD64rri8:
+
+ // Basic arithmetic is constant time on the input but does set flags.
+ case X86::ADC8rr:
+ case X86::ADC8ri:
+ case X86::ADC16rr:
+ case X86::ADC16ri:
+ case X86::ADC16ri8:
+ case X86::ADC32rr:
+ case X86::ADC32ri:
+ case X86::ADC32ri8:
+ case X86::ADC64rr:
+ case X86::ADC64ri8:
+ case X86::ADC64ri32:
+ case X86::ADD8rr:
+ case X86::ADD8ri:
+ case X86::ADD16rr:
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD32rr:
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD64rr:
+ case X86::ADD64ri8:
+ case X86::ADD64ri32:
+ case X86::AND8rr:
+ case X86::AND8ri:
+ case X86::AND16rr:
+ case X86::AND16ri:
+ case X86::AND16ri8:
+ case X86::AND32rr:
+ case X86::AND32ri:
+ case X86::AND32ri8:
+ case X86::AND64rr:
+ case X86::AND64ri8:
+ case X86::AND64ri32:
+ case X86::OR8rr:
+ case X86::OR8ri:
+ case X86::OR16rr:
+ case X86::OR16ri:
+ case X86::OR16ri8:
+ case X86::OR32rr:
+ case X86::OR32ri:
+ case X86::OR32ri8:
+ case X86::OR64rr:
+ case X86::OR64ri8:
+ case X86::OR64ri32:
+ case X86::SBB8rr:
+ case X86::SBB8ri:
+ case X86::SBB16rr:
+ case X86::SBB16ri:
+ case X86::SBB16ri8:
+ case X86::SBB32rr:
+ case X86::SBB32ri:
+ case X86::SBB32ri8:
+ case X86::SBB64rr:
+ case X86::SBB64ri8:
+ case X86::SBB64ri32:
+ case X86::SUB8rr:
+ case X86::SUB8ri:
+ case X86::SUB16rr:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB32rr:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB64rr:
+ case X86::SUB64ri8:
+ case X86::SUB64ri32:
+ case X86::XOR8rr:
+ case X86::XOR8ri:
+ case X86::XOR16rr:
+ case X86::XOR16ri:
+ case X86::XOR16ri8:
+ case X86::XOR32rr:
+ case X86::XOR32ri:
+ case X86::XOR32ri8:
+ case X86::XOR64rr:
+ case X86::XOR64ri8:
+ case X86::XOR64ri32:
+ // Arithmetic with just 32-bit and 64-bit variants and no immediates.
+ case X86::ADCX32rr:
+ case X86::ADCX64rr:
+ case X86::ADOX32rr:
+ case X86::ADOX64rr:
+ case X86::ANDN32rr:
+ case X86::ANDN64rr:
+ // Unary arithmetic operations.
+ case X86::DEC8r:
+ case X86::DEC16r:
+ case X86::DEC32r:
+ case X86::DEC64r:
+ case X86::INC8r:
+ case X86::INC16r:
+ case X86::INC32r:
+ case X86::INC64r:
+ case X86::NEG8r:
+ case X86::NEG16r:
+ case X86::NEG32r:
+ case X86::NEG64r:
+
+ // Unlike other arithmetic, NOT doesn't set EFLAGS.
+ case X86::NOT8r:
+ case X86::NOT16r:
+ case X86::NOT32r:
+ case X86::NOT64r:
+
+ // Various move instructions used to zero or sign extend things. Note that we
+ // intentionally don't support the _NOREX variants as we can't handle that
+ // register constraint anyways.
+ case X86::MOVSX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVSX32rr16:
+ case X86::MOVSX64rr8:
+ case X86::MOVSX64rr16:
+ case X86::MOVSX64rr32:
+ case X86::MOVZX16rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVZX32rr16:
+ case X86::MOVZX64rr8:
+ case X86::MOVZX64rr16:
+ case X86::MOV32rr:
+
+ // Arithmetic instructions that are both constant time and don't set flags.
+ case X86::RORX32ri:
+ case X86::RORX64ri:
+ case X86::SARX32rr:
+ case X86::SARX64rr:
+ case X86::SHLX32rr:
+ case X86::SHLX64rr:
+ case X86::SHRX32rr:
+ case X86::SHRX64rr:
+
+ // LEA doesn't actually access memory, and its arithmetic is constant time.
+ case X86::LEA16r:
+ case X86::LEA32r:
+ case X86::LEA64_32r:
+ case X86::LEA64r:
+ return true;
+ }
+}
+
+bool X86InstrInfo::isDataInvariantLoad(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ // By default, assume that the load will immediately leak.
+ return false;
+
+ // On x86 it is believed that imul is constant time w.r.t. the loaded data.
+ // However, they set flags and are perhaps the most surprisingly constant
+ // time operations so we call them out here separately.
+ case X86::IMUL16rm:
+ case X86::IMUL16rmi8:
+ case X86::IMUL16rmi:
+ case X86::IMUL32rm:
+ case X86::IMUL32rmi8:
+ case X86::IMUL32rmi:
+ case X86::IMUL64rm:
+ case X86::IMUL64rmi32:
+ case X86::IMUL64rmi8:
+
+ // Bit scanning and counting instructions that are somewhat surprisingly
+ // constant time as they scan across bits and do other fairly complex
+ // operations like popcnt, but are believed to be constant time on x86.
+ // However, these set flags.
+ case X86::BSF16rm:
+ case X86::BSF32rm:
+ case X86::BSF64rm:
+ case X86::BSR16rm:
+ case X86::BSR32rm:
+ case X86::BSR64rm:
+ case X86::LZCNT16rm:
+ case X86::LZCNT32rm:
+ case X86::LZCNT64rm:
+ case X86::POPCNT16rm:
+ case X86::POPCNT32rm:
+ case X86::POPCNT64rm:
+ case X86::TZCNT16rm:
+ case X86::TZCNT32rm:
+ case X86::TZCNT64rm:
+
+ // Bit manipulation instructions are effectively combinations of basic
+ // arithmetic ops, and should still execute in constant time. These also
+ // set flags.
+ case X86::BLCFILL32rm:
+ case X86::BLCFILL64rm:
+ case X86::BLCI32rm:
+ case X86::BLCI64rm:
+ case X86::BLCIC32rm:
+ case X86::BLCIC64rm:
+ case X86::BLCMSK32rm:
+ case X86::BLCMSK64rm:
+ case X86::BLCS32rm:
+ case X86::BLCS64rm:
+ case X86::BLSFILL32rm:
+ case X86::BLSFILL64rm:
+ case X86::BLSI32rm:
+ case X86::BLSI64rm:
+ case X86::BLSIC32rm:
+ case X86::BLSIC64rm:
+ case X86::BLSMSK32rm:
+ case X86::BLSMSK64rm:
+ case X86::BLSR32rm:
+ case X86::BLSR64rm:
+ case X86::TZMSK32rm:
+ case X86::TZMSK64rm:
+
+ // Bit extracting and clearing instructions should execute in constant time,
+ // and set flags.
+ case X86::BEXTR32rm:
+ case X86::BEXTR64rm:
+ case X86::BEXTRI32mi:
+ case X86::BEXTRI64mi:
+ case X86::BZHI32rm:
+ case X86::BZHI64rm:
+
+ // Basic arithmetic is constant time on the input but does set flags.
+ case X86::ADC8rm:
+ case X86::ADC16rm:
+ case X86::ADC32rm:
+ case X86::ADC64rm:
+ case X86::ADCX32rm:
+ case X86::ADCX64rm:
+ case X86::ADD8rm:
+ case X86::ADD16rm:
+ case X86::ADD32rm:
+ case X86::ADD64rm:
+ case X86::ADOX32rm:
+ case X86::ADOX64rm:
+ case X86::AND8rm:
+ case X86::AND16rm:
+ case X86::AND32rm:
+ case X86::AND64rm:
+ case X86::ANDN32rm:
+ case X86::ANDN64rm:
+ case X86::OR8rm:
+ case X86::OR16rm:
+ case X86::OR32rm:
+ case X86::OR64rm:
+ case X86::SBB8rm:
+ case X86::SBB16rm:
+ case X86::SBB32rm:
+ case X86::SBB64rm:
+ case X86::SUB8rm:
+ case X86::SUB16rm:
+ case X86::SUB32rm:
+ case X86::SUB64rm:
+ case X86::XOR8rm:
+ case X86::XOR16rm:
+ case X86::XOR32rm:
+ case X86::XOR64rm:
+
+ // Integer multiply w/o affecting flags is still believed to be constant
+ // time on x86. Called out separately as this is among the most surprising
+ // instructions to exhibit that behavior.
+ case X86::MULX32rm:
+ case X86::MULX64rm:
+
+ // Arithmetic instructions that are both constant time and don't set flags.
+ case X86::RORX32mi:
+ case X86::RORX64mi:
+ case X86::SARX32rm:
+ case X86::SARX64rm:
+ case X86::SHLX32rm:
+ case X86::SHLX64rm:
+ case X86::SHRX32rm:
+ case X86::SHRX64rm:
+
+ // Conversions are believed to be constant time and don't set flags.
+ case X86::CVTTSD2SI64rm:
+ case X86::VCVTTSD2SI64rm:
+ case X86::VCVTTSD2SI64Zrm:
+ case X86::CVTTSD2SIrm:
+ case X86::VCVTTSD2SIrm:
+ case X86::VCVTTSD2SIZrm:
+ case X86::CVTTSS2SI64rm:
+ case X86::VCVTTSS2SI64rm:
+ case X86::VCVTTSS2SI64Zrm:
+ case X86::CVTTSS2SIrm:
+ case X86::VCVTTSS2SIrm:
+ case X86::VCVTTSS2SIZrm:
+ case X86::CVTSI2SDrm:
+ case X86::VCVTSI2SDrm:
+ case X86::VCVTSI2SDZrm:
+ case X86::CVTSI2SSrm:
+ case X86::VCVTSI2SSrm:
+ case X86::VCVTSI2SSZrm:
+ case X86::CVTSI642SDrm:
+ case X86::VCVTSI642SDrm:
+ case X86::VCVTSI642SDZrm:
+ case X86::CVTSI642SSrm:
+ case X86::VCVTSI642SSrm:
+ case X86::VCVTSI642SSZrm:
+ case X86::CVTSS2SDrm:
+ case X86::VCVTSS2SDrm:
+ case X86::VCVTSS2SDZrm:
+ case X86::CVTSD2SSrm:
+ case X86::VCVTSD2SSrm:
+ case X86::VCVTSD2SSZrm:
+ // AVX512 added unsigned integer conversions.
+ case X86::VCVTTSD2USI64Zrm:
+ case X86::VCVTTSD2USIZrm:
+ case X86::VCVTTSS2USI64Zrm:
+ case X86::VCVTTSS2USIZrm:
+ case X86::VCVTUSI2SDZrm:
+ case X86::VCVTUSI642SDZrm:
+ case X86::VCVTUSI2SSZrm:
+ case X86::VCVTUSI642SSZrm:
+
+ // Loads to register don't set flags.
+ case X86::MOV8rm:
+ case X86::MOV8rm_NOREX:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::MOVSX16rm8:
+ case X86::MOVSX32rm16:
+ case X86::MOVSX32rm8:
+ case X86::MOVSX32rm8_NOREX:
+ case X86::MOVSX64rm16:
+ case X86::MOVSX64rm32:
+ case X86::MOVSX64rm8:
+ case X86::MOVZX16rm8:
+ case X86::MOVZX32rm16:
+ case X86::MOVZX32rm8:
+ case X86::MOVZX32rm8_NOREX:
+ case X86::MOVZX64rm16:
+ case X86::MOVZX64rm8:
+ return true;
+ }
+}
+
int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
if (isFrameInstr(MI)) {
- unsigned StackAlign = TFI->getStackAlignment();
- int SPAdj = alignTo(getFrameSize(MI), StackAlign);
+ int SPAdj = alignTo(getFrameSize(MI), TFI->getStackAlign());
SPAdj -= getFrameAdjustment(MI);
if (!isFrameSetup(MI))
SPAdj = -SPAdj;
@@ -639,7 +1123,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SubIdx,
+ Register DestReg, unsigned SubIdx,
const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const {
bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI);
@@ -1182,61 +1666,61 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk:
- case X86::VBROADCASTSDZ256mk:
- case X86::VBROADCASTSDZmk:
- case X86::VBROADCASTSSZ128mk:
- case X86::VBROADCASTSSZ256mk:
- case X86::VBROADCASTSSZmk:
- case X86::VPBROADCASTDZ128mk:
- case X86::VPBROADCASTDZ256mk:
- case X86::VPBROADCASTDZmk:
- case X86::VPBROADCASTQZ128mk:
- case X86::VPBROADCASTQZ256mk:
- case X86::VPBROADCASTQZmk: {
+ case X86::VBROADCASTSDZ256rmk:
+ case X86::VBROADCASTSDZrmk:
+ case X86::VBROADCASTSSZ128rmk:
+ case X86::VBROADCASTSSZ256rmk:
+ case X86::VBROADCASTSSZrmk:
+ case X86::VPBROADCASTDZ128rmk:
+ case X86::VPBROADCASTDZ256rmk:
+ case X86::VPBROADCASTDZrmk:
+ case X86::VPBROADCASTQZ128rmk:
+ case X86::VPBROADCASTQZ256rmk:
+ case X86::VPBROADCASTQZrmk: {
unsigned Opc;
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
- case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
- case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
- case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
- case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
- case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
- case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
- case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
- case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
- case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
- case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
- case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
- case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
- case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
- case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
- case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
- case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
- case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
- case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
- case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
- case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
- case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
- case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
- case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
- case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
- case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
- case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
- case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
- case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
- case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
- case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
- case X86::VBROADCASTSDZ256mk: Opc = X86::VBLENDMPDZ256rmbk; break;
- case X86::VBROADCASTSDZmk: Opc = X86::VBLENDMPDZrmbk; break;
- case X86::VBROADCASTSSZ128mk: Opc = X86::VBLENDMPSZ128rmbk; break;
- case X86::VBROADCASTSSZ256mk: Opc = X86::VBLENDMPSZ256rmbk; break;
- case X86::VBROADCASTSSZmk: Opc = X86::VBLENDMPSZrmbk; break;
- case X86::VPBROADCASTDZ128mk: Opc = X86::VPBLENDMDZ128rmbk; break;
- case X86::VPBROADCASTDZ256mk: Opc = X86::VPBLENDMDZ256rmbk; break;
- case X86::VPBROADCASTDZmk: Opc = X86::VPBLENDMDZrmbk; break;
- case X86::VPBROADCASTQZ128mk: Opc = X86::VPBLENDMQZ128rmbk; break;
- case X86::VPBROADCASTQZ256mk: Opc = X86::VPBLENDMQZ256rmbk; break;
- case X86::VPBROADCASTQZmk: Opc = X86::VPBLENDMQZrmbk; break;
+ case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
+ case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
+ case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
+ case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
+ case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
+ case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
+ case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VBROADCASTSDZ256rmk: Opc = X86::VBLENDMPDZ256rmbk; break;
+ case X86::VBROADCASTSDZrmk: Opc = X86::VBLENDMPDZrmbk; break;
+ case X86::VBROADCASTSSZ128rmk: Opc = X86::VBLENDMPSZ128rmbk; break;
+ case X86::VBROADCASTSSZ256rmk: Opc = X86::VBLENDMPSZ256rmbk; break;
+ case X86::VBROADCASTSSZrmk: Opc = X86::VBLENDMPSZrmbk; break;
+ case X86::VPBROADCASTDZ128rmk: Opc = X86::VPBLENDMDZ128rmbk; break;
+ case X86::VPBROADCASTDZ256rmk: Opc = X86::VPBLENDMDZ256rmbk; break;
+ case X86::VPBROADCASTDZrmk: Opc = X86::VPBLENDMDZrmbk; break;
+ case X86::VPBROADCASTQZ128rmk: Opc = X86::VPBLENDMQZ128rmbk; break;
+ case X86::VPBROADCASTQZ256rmk: Opc = X86::VPBLENDMQZ256rmbk; break;
+ case X86::VPBROADCASTQZrmk: Opc = X86::VPBLENDMQZrmbk; break;
}
NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@@ -1883,7 +2367,7 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
unsigned KMaskOp = -1U;
if (X86II::isKMasked(TSFlags)) {
// For k-zero-masked operations it is Ok to commute the first vector
- // operand.
+ // operand. Unless this is an intrinsic instruction.
// For regular k-masked operations a conservative choice is done as the
// elements of the first vector operand, for which the corresponding bit
// in the k-mask operand is set to 0, are copied to the result of the
@@ -1902,7 +2386,7 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
// The operand with index = 1 is used as a source for those elements for
// which the corresponding bit in the k-mask is set to 0.
- if (X86II::isKMergeMasked(TSFlags))
+ if (X86II::isKMergeMasked(TSFlags) || IsIntrinsic)
FirstCommutableVecOp = 3;
LastCommutableVecOp++;
@@ -2379,17 +2863,6 @@ unsigned X86::getSwappedVCMPImm(unsigned Imm) {
return Imm;
}
-bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
- if (!MI.isTerminator()) return false;
-
- // Conditional branch is a special case.
- if (MI.isBranch() && !MI.isBarrier())
- return true;
- if (!MI.isPredicable())
- return true;
- return !isPredicated(MI);
-}
-
bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case X86::TCRETURNdi:
@@ -2826,11 +3299,11 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
return Count;
}
-bool X86InstrInfo::
-canInsertSelect(const MachineBasicBlock &MBB,
- ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+ ArrayRef<MachineOperand> Cond,
+ Register DstReg, Register TrueReg,
+ Register FalseReg, int &CondCycles,
+ int &TrueCycles, int &FalseCycles) const {
// Not all subtargets have cmov instructions.
if (!Subtarget.hasCMov())
return false;
@@ -2865,9 +3338,9 @@ canInsertSelect(const MachineBasicBlock &MBB,
void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DstReg,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg) const {
+ const DebugLoc &DL, Register DstReg,
+ ArrayRef<MachineOperand> Cond, Register TrueReg,
+ Register FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
const TargetRegisterClass &RC = *MRI.getRegClass(DstReg);
@@ -3189,8 +3662,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
}
}
-bool X86InstrInfo::getMemOperandWithOffset(
- const MachineInstr &MemOp, const MachineOperand *&BaseOp, int64_t &Offset,
+bool X86InstrInfo::getMemOperandsWithOffsetWidth(
+ const MachineInstr &MemOp, SmallVectorImpl<const MachineOperand *> &BaseOps,
+ int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
const TargetRegisterInfo *TRI) const {
const MCInstrDesc &Desc = MemOp.getDesc();
int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
@@ -3199,7 +3673,8 @@ bool X86InstrInfo::getMemOperandWithOffset(
MemRefBegin += X86II::getOperandBias(Desc);
- BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
+ const MachineOperand *BaseOp =
+ &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
if (!BaseOp->isReg()) // Can be an MO_FrameIndex
return false;
@@ -3221,6 +3696,13 @@ bool X86InstrInfo::getMemOperandWithOffset(
if (!BaseOp->isReg())
return false;
+ OffsetIsScalable = false;
+ // FIXME: Relying on memoperands() may not be right thing to do here. Check
+ // with X86 maintainers, and fix it accordingly. For now, it is ok, since
+ // there is no use of `Width` for X86 back-end at the moment.
+ Width =
+ !MemOp.memoperands_empty() ? MemOp.memoperands().front()->getSize() : 0;
+ BaseOps.push_back(BaseOp);
return true;
}
@@ -3241,7 +3723,7 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIdx,
+ Register SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
@@ -3249,7 +3731,7 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
"Stack slot too small for store");
unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
bool isAligned =
- (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
+ (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
@@ -3258,20 +3740,20 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
+ Register DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
bool isAligned =
- (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
+ (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx);
}
-bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
+bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &CmpMask,
int &CmpValue) const {
switch (MI.getOpcode()) {
default: break;
@@ -3358,7 +3840,7 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
/// SrcReg, SrcRegs: register operands for FlagI.
/// ImmValue: immediate for FlagI if it takes an immediate.
inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
- unsigned SrcReg, unsigned SrcReg2,
+ Register SrcReg, Register SrcReg2,
int ImmMask, int ImmValue,
const MachineInstr &OI) {
if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
@@ -3547,8 +4029,8 @@ static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
/// Check if there exists an earlier instruction that
/// operates on the same source operands and sets flags in the same way as
/// Compare; remove Compare if possible.
-bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int CmpMask,
+bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int CmpMask,
int CmpValue,
const MachineRegisterInfo *MRI) const {
// Check whether we can replace SUB with CMP.
@@ -3875,15 +4357,15 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
const MCInstrDesc &Desc) {
assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
MIB->setDesc(Desc);
// MachineInstr::addOperand() will insert explicit operands before any
// implicit operands.
MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
// But we don't trust that.
- assert(MIB->getOperand(1).getReg() == Reg &&
- MIB->getOperand(2).getReg() == Reg && "Misplaced operand");
+ assert(MIB.getReg(1) == Reg &&
+ MIB.getReg(2) == Reg && "Misplaced operand");
return true;
}
@@ -3905,7 +4387,7 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
bool MinusOne) {
MachineBasicBlock &MBB = *MIB->getParent();
DebugLoc DL = MIB->getDebugLoc();
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
// Insert the XOR.
BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
@@ -3949,7 +4431,7 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm);
MIB->setDesc(TII.get(X86::POP64r));
MIB->getOperand(0)
- .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64));
+ .setReg(getX86SubSuperRegister(MIB.getReg(0), 64));
} else {
assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
StackAdjustment = 4;
@@ -3981,14 +4463,14 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB,
const TargetInstrInfo &TII) {
MachineBasicBlock &MBB = *MIB->getParent();
DebugLoc DL = MIB->getDebugLoc();
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
const GlobalValue *GV =
cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
auto Flags = MachineMemOperand::MOLoad |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, 8);
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, Align(8));
MachineBasicBlock::iterator I = MIB.getInstr();
BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
@@ -4019,7 +4501,7 @@ static bool expandNOVLXLoad(MachineInstrBuilder &MIB,
const MCInstrDesc &LoadDesc,
const MCInstrDesc &BroadcastDesc,
unsigned SubIdx) {
- Register DestReg = MIB->getOperand(0).getReg();
+ Register DestReg = MIB.getReg(0);
// Check if DestReg is XMM16-31 or YMM16-31.
if (TRI->getEncodingValue(DestReg) < 16) {
// We can use a normal VEX encoded load.
@@ -4042,7 +4524,7 @@ static bool expandNOVLXStore(MachineInstrBuilder &MIB,
const MCInstrDesc &StoreDesc,
const MCInstrDesc &ExtractDesc,
unsigned SubIdx) {
- Register SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg();
+ Register SrcReg = MIB.getReg(X86::AddrNumOperands);
// Check if DestReg is XMM16-31 or YMM16-31.
if (TRI->getEncodingValue(SrcReg) < 16) {
// We can use a normal VEX encoded store.
@@ -4065,7 +4547,7 @@ static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
// Temporarily remove the immediate so we can add another source register.
MIB->RemoveOperand(2);
// Add the register. Don't copy the kill flag if there is one.
- MIB.addReg(MIB->getOperand(1).getReg(),
+ MIB.addReg(MIB.getReg(1),
getUndefRegState(MIB->getOperand(1).isUndef()));
// Add back the immediate.
MIB.addImm(ShiftAmt);
@@ -4085,10 +4567,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::MOV32ImmSExti8:
case X86::MOV64ImmSExti8:
return ExpandMOVImmSExti8(MIB, *this, Subtarget);
- case X86::SETB_C8r:
- return Expand2AddrUndef(MIB, get(X86::SBB8rr));
- case X86::SETB_C16r:
- return Expand2AddrUndef(MIB, get(X86::SBB16rr));
case X86::SETB_C32r:
return Expand2AddrUndef(MIB, get(X86::SBB32rr));
case X86::SETB_C64r:
@@ -4103,7 +4581,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX_SET0: {
assert(HasAVX && "AVX not supported");
const TargetRegisterInfo *TRI = &getRegisterInfo();
- Register SrcReg = MIB->getOperand(0).getReg();
+ Register SrcReg = MIB.getReg(0);
Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
MIB->getOperand(0).setReg(XReg);
Expand2AddrUndef(MIB, get(X86::VXORPSrr));
@@ -4115,7 +4593,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX512_FsFLD0SD:
case X86::AVX512_FsFLD0F128: {
bool HasVLX = Subtarget.hasVLX();
- Register SrcReg = MIB->getOperand(0).getReg();
+ Register SrcReg = MIB.getReg(0);
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
return Expand2AddrUndef(MIB,
@@ -4129,7 +4607,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0: {
bool HasVLX = Subtarget.hasVLX();
- Register SrcReg = MIB->getOperand(0).getReg();
+ Register SrcReg = MIB.getReg(0);
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) {
Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
@@ -4152,14 +4630,14 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::AVX2_SETALLONES:
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
case X86::AVX1_SETALLONES: {
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
// VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS.
MIB->setDesc(get(X86::VCMPPSYrri));
MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
return true;
}
case X86::AVX512_512_SETALLONES: {
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
MIB->setDesc(get(X86::VPTERNLOGDZrri));
// VPTERNLOGD needs 3 register inputs and an immediate.
// 0xff will return 1s for any input.
@@ -4169,8 +4647,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case X86::AVX512_512_SEXT_MASK_32:
case X86::AVX512_512_SEXT_MASK_64: {
- Register Reg = MIB->getOperand(0).getReg();
- Register MaskReg = MIB->getOperand(1).getReg();
+ Register Reg = MIB.getReg(0);
+ Register MaskReg = MIB.getReg(1);
unsigned MaskState = getRegState(MIB->getOperand(1));
unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
@@ -4207,7 +4685,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
case X86::MOV32ri64: {
- Register Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB.getReg(0);
Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
MI.setDesc(get(X86::MOV32ri));
MIB->getOperand(0).setReg(Reg32);
@@ -4360,11 +4838,105 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance(
// Return true for any instruction the copies the high bits of the first source
// operand into the unused high bits of the destination operand.
-static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
+// Also returns true for instructions that have two inputs where one may
+// be undef and we want it to use the same register as the other input.
+static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
bool ForLoadFold = false) {
// Set the OpNum parameter to the first source operand.
- OpNum = 1;
switch (Opcode) {
+ case X86::MMX_PUNPCKHBWirr:
+ case X86::MMX_PUNPCKHWDirr:
+ case X86::MMX_PUNPCKHDQirr:
+ case X86::MMX_PUNPCKLBWirr:
+ case X86::MMX_PUNPCKLWDirr:
+ case X86::MMX_PUNPCKLDQirr:
+ case X86::MOVHLPSrr:
+ case X86::PACKSSWBrr:
+ case X86::PACKUSWBrr:
+ case X86::PACKSSDWrr:
+ case X86::PACKUSDWrr:
+ case X86::PUNPCKHBWrr:
+ case X86::PUNPCKLBWrr:
+ case X86::PUNPCKHWDrr:
+ case X86::PUNPCKLWDrr:
+ case X86::PUNPCKHDQrr:
+ case X86::PUNPCKLDQrr:
+ case X86::PUNPCKHQDQrr:
+ case X86::PUNPCKLQDQrr:
+ case X86::SHUFPDrri:
+ case X86::SHUFPSrri:
+ // These instructions are sometimes used with an undef first or second
+ // source. Return true here so BreakFalseDeps will assign this source to the
+ // same register as the first source to avoid a false dependency.
+ // Operand 1 of these instructions is tied so they're separate from their
+ // VEX counterparts.
+ return OpNum == 2 && !ForLoadFold;
+
+ case X86::VMOVLHPSrr:
+ case X86::VMOVLHPSZrr:
+ case X86::VPACKSSWBrr:
+ case X86::VPACKUSWBrr:
+ case X86::VPACKSSDWrr:
+ case X86::VPACKUSDWrr:
+ case X86::VPACKSSWBZ128rr:
+ case X86::VPACKUSWBZ128rr:
+ case X86::VPACKSSDWZ128rr:
+ case X86::VPACKUSDWZ128rr:
+ case X86::VPERM2F128rr:
+ case X86::VPERM2I128rr:
+ case X86::VSHUFF32X4Z256rri:
+ case X86::VSHUFF32X4Zrri:
+ case X86::VSHUFF64X2Z256rri:
+ case X86::VSHUFF64X2Zrri:
+ case X86::VSHUFI32X4Z256rri:
+ case X86::VSHUFI32X4Zrri:
+ case X86::VSHUFI64X2Z256rri:
+ case X86::VSHUFI64X2Zrri:
+ case X86::VPUNPCKHBWrr:
+ case X86::VPUNPCKLBWrr:
+ case X86::VPUNPCKHBWYrr:
+ case X86::VPUNPCKLBWYrr:
+ case X86::VPUNPCKHBWZ128rr:
+ case X86::VPUNPCKLBWZ128rr:
+ case X86::VPUNPCKHBWZ256rr:
+ case X86::VPUNPCKLBWZ256rr:
+ case X86::VPUNPCKHBWZrr:
+ case X86::VPUNPCKLBWZrr:
+ case X86::VPUNPCKHWDrr:
+ case X86::VPUNPCKLWDrr:
+ case X86::VPUNPCKHWDYrr:
+ case X86::VPUNPCKLWDYrr:
+ case X86::VPUNPCKHWDZ128rr:
+ case X86::VPUNPCKLWDZ128rr:
+ case X86::VPUNPCKHWDZ256rr:
+ case X86::VPUNPCKLWDZ256rr:
+ case X86::VPUNPCKHWDZrr:
+ case X86::VPUNPCKLWDZrr:
+ case X86::VPUNPCKHDQrr:
+ case X86::VPUNPCKLDQrr:
+ case X86::VPUNPCKHDQYrr:
+ case X86::VPUNPCKLDQYrr:
+ case X86::VPUNPCKHDQZ128rr:
+ case X86::VPUNPCKLDQZ128rr:
+ case X86::VPUNPCKHDQZ256rr:
+ case X86::VPUNPCKLDQZ256rr:
+ case X86::VPUNPCKHDQZrr:
+ case X86::VPUNPCKLDQZrr:
+ case X86::VPUNPCKHQDQrr:
+ case X86::VPUNPCKLQDQrr:
+ case X86::VPUNPCKHQDQYrr:
+ case X86::VPUNPCKLQDQYrr:
+ case X86::VPUNPCKHQDQZ128rr:
+ case X86::VPUNPCKLQDQZ128rr:
+ case X86::VPUNPCKHQDQZ256rr:
+ case X86::VPUNPCKLQDQZ256rr:
+ case X86::VPUNPCKHQDQZrr:
+ case X86::VPUNPCKLQDQZrr:
+ // These instructions are sometimes used with an undef first or second
+ // source. Return true here so BreakFalseDeps will assign this source to the
+ // same register as the first source to avoid a false dependency.
+ return (OpNum == 1 || OpNum == 2) && !ForLoadFold;
+
case X86::VCVTSI2SSrr:
case X86::VCVTSI2SSrm:
case X86::VCVTSI2SSrr_Int:
@@ -4422,7 +4994,7 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
case X86::VCVTUSI642SDZrm_Int:
// Load folding won't effect the undef register update since the input is
// a GPR.
- return !ForLoadFold;
+ return OpNum == 1 && !ForLoadFold;
case X86::VCVTSD2SSrr:
case X86::VCVTSD2SSrm:
case X86::VCVTSD2SSrr_Int:
@@ -4521,15 +5093,13 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
case X86::VSQRTSDZrb_Int:
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
- return true;
+ return OpNum == 1;
case X86::VMOVSSZrrk:
case X86::VMOVSDZrrk:
- OpNum = 3;
- return true;
+ return OpNum == 3 && !ForLoadFold;
case X86::VMOVSSZrrkz:
case X86::VMOVSDZrrkz:
- OpNum = 2;
- return true;
+ return OpNum == 2 && !ForLoadFold;
}
return false;
@@ -4552,13 +5122,17 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum,
unsigned
X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
const TargetRegisterInfo *TRI) const {
- if (!hasUndefRegUpdate(MI.getOpcode(), OpNum))
- return 0;
-
- const MachineOperand &MO = MI.getOperand(OpNum);
- if (MO.isUndef() && Register::isPhysicalRegister(MO.getReg())) {
- return UndefRegClearance;
+ for (unsigned i = MI.getNumExplicitDefs(), e = MI.getNumExplicitOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && MO.isUndef() &&
+ Register::isPhysicalRegister(MO.getReg()) &&
+ hasUndefRegUpdate(MI.getOpcode(), i)) {
+ OpNum = i;
+ return UndefRegClearance;
+ }
}
+
return 0;
}
@@ -4729,7 +5303,7 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
- unsigned Size, unsigned Align) const {
+ unsigned Size, Align Alignment) const {
switch (MI.getOpcode()) {
case X86::INSERTPSrr:
case X86::VINSERTPSrr:
@@ -4745,7 +5319,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
- if ((Size == 0 || Size >= 16) && RCSize >= 16 && 4 <= Align) {
+ if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(4)) {
int PtrOffset = SrcIdx * 4;
unsigned NewImm = (DstIdx << 4) | ZMask;
unsigned NewOpCode =
@@ -4769,7 +5343,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
- if ((Size == 0 || Size >= 16) && RCSize >= 16 && 8 <= Align) {
+ if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(8)) {
unsigned NewOpCode =
(MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
(MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm :
@@ -4788,7 +5362,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
- if ((Size == 0 || Size >= 16) && RCSize >= 16 && Align < 16) {
+ if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment < Align(16)) {
MachineInstr *NewMI =
FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this);
return NewMI;
@@ -4802,8 +5376,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
MachineInstr &MI) {
- unsigned Ignored;
- if (!hasUndefRegUpdate(MI.getOpcode(), Ignored, /*ForLoadFold*/true) ||
+ if (!hasUndefRegUpdate(MI.getOpcode(), 1, /*ForLoadFold*/true) ||
!MI.getOperand(1).isReg())
return false;
@@ -4820,11 +5393,10 @@ static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
return VRegDef && VRegDef->isImplicitDef();
}
-
MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
- unsigned Size, unsigned Align, bool AllowCommute) const {
+ unsigned Size, Align Alignment, bool AllowCommute) const {
bool isSlowTwoMemOps = Subtarget.slowTwoMemOps();
bool isTwoAddrFold = false;
@@ -4864,8 +5436,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
MachineInstr *NewMI = nullptr;
// Attempt to fold any custom cases we have.
- if (MachineInstr *CustomMI =
- foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt, Size, Align))
+ if (MachineInstr *CustomMI = foldMemoryOperandCustom(
+ MF, MI, OpNum, MOs, InsertPt, Size, Alignment))
return CustomMI;
const X86MemoryFoldTableEntry *I = nullptr;
@@ -4892,9 +5464,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
if (I != nullptr) {
unsigned Opcode = I->DstOp;
- unsigned MinAlign = (I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
- MinAlign = MinAlign ? 1 << (MinAlign - 1) : 0;
- if (Align < MinAlign)
+ MaybeAlign MinAlign =
+ decodeMaybeAlign((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT);
+ if (MinAlign && Alignment < *MinAlign)
return nullptr;
bool NarrowToMOV32rm = false;
if (Size) {
@@ -4969,8 +5541,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
}
// Attempt to fold with the commuted version of the instruction.
- NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt,
- Size, Align, /*AllowCommute=*/false);
+ NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt, Size,
+ Alignment, /*AllowCommute=*/false);
if (NewMI)
return NewMI;
@@ -5024,12 +5596,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Size = MFI.getObjectSize(FrameIndex);
- unsigned Alignment = MFI.getObjectAlignment(FrameIndex);
+ Align Alignment = MFI.getObjectAlign(FrameIndex);
// If the function stack isn't realigned we don't want to fold instructions
// that need increased alignment.
if (!RI.needsStackRealignment(MF))
Alignment =
- std::min(Alignment, Subtarget.getFrameLowering()->getStackAlignment());
+ std::min(Alignment, Subtarget.getFrameLowering()->getStackAlign());
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
unsigned RCSize = 0;
@@ -5087,12 +5659,31 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
// destination register is wider than 32 bits (4 bytes), and its user
// instruction isn't scalar (SS).
switch (UserOpc) {
+ case X86::CVTSS2SDrr_Int:
+ case X86::VCVTSS2SDrr_Int:
+ case X86::VCVTSS2SDZrr_Int:
+ case X86::VCVTSS2SDZrr_Intk:
+ case X86::VCVTSS2SDZrr_Intkz:
+ case X86::CVTSS2SIrr_Int: case X86::CVTSS2SI64rr_Int:
+ case X86::VCVTSS2SIrr_Int: case X86::VCVTSS2SI64rr_Int:
+ case X86::VCVTSS2SIZrr_Int: case X86::VCVTSS2SI64Zrr_Int:
+ case X86::CVTTSS2SIrr_Int: case X86::CVTTSS2SI64rr_Int:
+ case X86::VCVTTSS2SIrr_Int: case X86::VCVTTSS2SI64rr_Int:
+ case X86::VCVTTSS2SIZrr_Int: case X86::VCVTTSS2SI64Zrr_Int:
+ case X86::VCVTSS2USIZrr_Int: case X86::VCVTSS2USI64Zrr_Int:
+ case X86::VCVTTSS2USIZrr_Int: case X86::VCVTTSS2USI64Zrr_Int:
+ case X86::RCPSSr_Int: case X86::VRCPSSr_Int:
+ case X86::RSQRTSSr_Int: case X86::VRSQRTSSr_Int:
+ case X86::ROUNDSSr_Int: case X86::VROUNDSSr_Int:
+ case X86::COMISSrr_Int: case X86::VCOMISSrr_Int: case X86::VCOMISSZrr_Int:
+ case X86::UCOMISSrr_Int:case X86::VUCOMISSrr_Int:case X86::VUCOMISSZrr_Int:
case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int:
case X86::CMPSSrr_Int: case X86::VCMPSSrr_Int: case X86::VCMPSSZrr_Int:
case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int:
case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int:
case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int:
case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
+ case X86::SQRTSSr_Int: case X86::VSQRTSSr_Int: case X86::VSQRTSSZr_Int:
case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz:
case X86::VCMPSSZrr_Intk:
@@ -5100,6 +5691,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz:
case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz:
case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz:
+ case X86::VSQRTSSZr_Intk: case X86::VSQRTSSZr_Intkz:
case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz:
case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int:
case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int:
@@ -5127,6 +5719,41 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz:
case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz:
case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz:
+ case X86::VFIXUPIMMSSZrri:
+ case X86::VFIXUPIMMSSZrrik:
+ case X86::VFIXUPIMMSSZrrikz:
+ case X86::VFPCLASSSSZrr:
+ case X86::VFPCLASSSSZrrk:
+ case X86::VGETEXPSSZr:
+ case X86::VGETEXPSSZrk:
+ case X86::VGETEXPSSZrkz:
+ case X86::VGETMANTSSZrri:
+ case X86::VGETMANTSSZrrik:
+ case X86::VGETMANTSSZrrikz:
+ case X86::VRANGESSZrri:
+ case X86::VRANGESSZrrik:
+ case X86::VRANGESSZrrikz:
+ case X86::VRCP14SSZrr:
+ case X86::VRCP14SSZrrk:
+ case X86::VRCP14SSZrrkz:
+ case X86::VRCP28SSZr:
+ case X86::VRCP28SSZrk:
+ case X86::VRCP28SSZrkz:
+ case X86::VREDUCESSZrri:
+ case X86::VREDUCESSZrrik:
+ case X86::VREDUCESSZrrikz:
+ case X86::VRNDSCALESSZr_Int:
+ case X86::VRNDSCALESSZr_Intk:
+ case X86::VRNDSCALESSZr_Intkz:
+ case X86::VRSQRT14SSZrr:
+ case X86::VRSQRT14SSZrrk:
+ case X86::VRSQRT14SSZrrkz:
+ case X86::VRSQRT28SSZr:
+ case X86::VRSQRT28SSZrk:
+ case X86::VRSQRT28SSZrkz:
+ case X86::VSCALEFSSZrr:
+ case X86::VSCALEFSSZrrk:
+ case X86::VSCALEFSSZrrkz:
return false;
default:
return true;
@@ -5141,12 +5768,29 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
// destination register is wider than 64 bits (8 bytes), and its user
// instruction isn't scalar (SD).
switch (UserOpc) {
+ case X86::CVTSD2SSrr_Int:
+ case X86::VCVTSD2SSrr_Int:
+ case X86::VCVTSD2SSZrr_Int:
+ case X86::VCVTSD2SSZrr_Intk:
+ case X86::VCVTSD2SSZrr_Intkz:
+ case X86::CVTSD2SIrr_Int: case X86::CVTSD2SI64rr_Int:
+ case X86::VCVTSD2SIrr_Int: case X86::VCVTSD2SI64rr_Int:
+ case X86::VCVTSD2SIZrr_Int: case X86::VCVTSD2SI64Zrr_Int:
+ case X86::CVTTSD2SIrr_Int: case X86::CVTTSD2SI64rr_Int:
+ case X86::VCVTTSD2SIrr_Int: case X86::VCVTTSD2SI64rr_Int:
+ case X86::VCVTTSD2SIZrr_Int: case X86::VCVTTSD2SI64Zrr_Int:
+ case X86::VCVTSD2USIZrr_Int: case X86::VCVTSD2USI64Zrr_Int:
+ case X86::VCVTTSD2USIZrr_Int: case X86::VCVTTSD2USI64Zrr_Int:
+ case X86::ROUNDSDr_Int: case X86::VROUNDSDr_Int:
+ case X86::COMISDrr_Int: case X86::VCOMISDrr_Int: case X86::VCOMISDZrr_Int:
+ case X86::UCOMISDrr_Int:case X86::VUCOMISDrr_Int:case X86::VUCOMISDZrr_Int:
case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int:
case X86::CMPSDrr_Int: case X86::VCMPSDrr_Int: case X86::VCMPSDZrr_Int:
case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int:
case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int:
case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int:
case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
+ case X86::SQRTSDr_Int: case X86::VSQRTSDr_Int: case X86::VSQRTSDZr_Int:
case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz:
case X86::VCMPSDZrr_Intk:
@@ -5154,6 +5798,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz:
case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz:
case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz:
+ case X86::VSQRTSDZr_Intk: case X86::VSQRTSDZr_Intkz:
case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz:
case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int:
case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int:
@@ -5181,6 +5826,41 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz:
case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz:
case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz:
+ case X86::VFIXUPIMMSDZrri:
+ case X86::VFIXUPIMMSDZrrik:
+ case X86::VFIXUPIMMSDZrrikz:
+ case X86::VFPCLASSSDZrr:
+ case X86::VFPCLASSSDZrrk:
+ case X86::VGETEXPSDZr:
+ case X86::VGETEXPSDZrk:
+ case X86::VGETEXPSDZrkz:
+ case X86::VGETMANTSDZrri:
+ case X86::VGETMANTSDZrrik:
+ case X86::VGETMANTSDZrrikz:
+ case X86::VRANGESDZrri:
+ case X86::VRANGESDZrrik:
+ case X86::VRANGESDZrrikz:
+ case X86::VRCP14SDZrr:
+ case X86::VRCP14SDZrrk:
+ case X86::VRCP14SDZrrkz:
+ case X86::VRCP28SDZr:
+ case X86::VRCP28SDZrk:
+ case X86::VRCP28SDZrkz:
+ case X86::VREDUCESDZrri:
+ case X86::VREDUCESDZrrik:
+ case X86::VREDUCESDZrrikz:
+ case X86::VRNDSCALESDZr_Int:
+ case X86::VRNDSCALESDZr_Intk:
+ case X86::VRNDSCALESDZr_Intkz:
+ case X86::VRSQRT14SDZrr:
+ case X86::VRSQRT14SDZrrk:
+ case X86::VRSQRT14SDZrrkz:
+ case X86::VRSQRT28SDZr:
+ case X86::VRSQRT28SDZrk:
+ case X86::VRSQRT28SDZrkz:
+ case X86::VSCALEFSDZrr:
+ case X86::VSCALEFSDZrrk:
+ case X86::VSCALEFSDZrrkz:
return false;
default:
return true;
@@ -5221,36 +5901,36 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
return nullptr;
// Determine the alignment of the load.
- unsigned Alignment = 0;
+ Align Alignment;
if (LoadMI.hasOneMemOperand())
- Alignment = (*LoadMI.memoperands_begin())->getAlignment();
+ Alignment = (*LoadMI.memoperands_begin())->getAlign();
else
switch (LoadMI.getOpcode()) {
case X86::AVX512_512_SET0:
case X86::AVX512_512_SETALLONES:
- Alignment = 64;
+ Alignment = Align(64);
break;
case X86::AVX2_SETALLONES:
case X86::AVX1_SETALLONES:
case X86::AVX_SET0:
case X86::AVX512_256_SET0:
- Alignment = 32;
+ Alignment = Align(32);
break;
case X86::V_SET0:
case X86::V_SETALLONES:
case X86::AVX512_128_SET0:
case X86::FsFLD0F128:
case X86::AVX512_FsFLD0F128:
- Alignment = 16;
+ Alignment = Align(16);
break;
case X86::MMX_SET0:
case X86::FsFLD0SD:
case X86::AVX512_FsFLD0SD:
- Alignment = 8;
+ Alignment = Align(8);
break;
case X86::FsFLD0SS:
case X86::AVX512_FsFLD0SS:
- Alignment = 4;
+ Alignment = Align(4);
break;
default:
return nullptr;
@@ -5325,14 +6005,18 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
Ty = Type::getFP128Ty(MF.getFunction().getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 16);
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES)
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 8);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 8);
else if (Opc == X86::MMX_SET0)
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 2);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 2);
else
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 4);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 4);
bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
Opc == X86::AVX512_512_SETALLONES ||
@@ -5418,33 +6102,33 @@ static unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I,
case TB_BCAST_D:
switch (SpillSize) {
default: llvm_unreachable("Unknown spill size");
- case 16: return X86::VPBROADCASTDZ128m;
- case 32: return X86::VPBROADCASTDZ256m;
- case 64: return X86::VPBROADCASTDZm;
+ case 16: return X86::VPBROADCASTDZ128rm;
+ case 32: return X86::VPBROADCASTDZ256rm;
+ case 64: return X86::VPBROADCASTDZrm;
}
break;
case TB_BCAST_Q:
switch (SpillSize) {
default: llvm_unreachable("Unknown spill size");
- case 16: return X86::VPBROADCASTQZ128m;
- case 32: return X86::VPBROADCASTQZ256m;
- case 64: return X86::VPBROADCASTQZm;
+ case 16: return X86::VPBROADCASTQZ128rm;
+ case 32: return X86::VPBROADCASTQZ256rm;
+ case 64: return X86::VPBROADCASTQZrm;
}
break;
case TB_BCAST_SS:
switch (SpillSize) {
default: llvm_unreachable("Unknown spill size");
- case 16: return X86::VBROADCASTSSZ128m;
- case 32: return X86::VBROADCASTSSZ256m;
- case 64: return X86::VBROADCASTSSZm;
+ case 16: return X86::VBROADCASTSSZ128rm;
+ case 32: return X86::VBROADCASTSSZ256rm;
+ case 64: return X86::VBROADCASTSSZrm;
}
break;
case TB_BCAST_SD:
switch (SpillSize) {
default: llvm_unreachable("Unknown spill size");
case 16: return X86::VMOVDDUPZ128rm;
- case 32: return X86::VBROADCASTSDZ256m;
- case 64: return X86::VBROADCASTSDZm;
+ case 32: return X86::VBROADCASTSDZ256rm;
+ case 64: return X86::VBROADCASTSDZrm;
}
break;
}
@@ -5504,7 +6188,7 @@ bool X86InstrInfo::unfoldMemoryOperand(
Opc = getBroadcastOpcode(I, RC, Subtarget);
} else {
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
Opc = getLoadRegOpcode(Reg, RC, isAligned, Subtarget);
}
@@ -5581,7 +6265,7 @@ bool X86InstrInfo::unfoldMemoryOperand(
const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*DstRC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
@@ -5648,7 +6332,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
Opc = getBroadcastOpcode(I, RC, Subtarget);
} else {
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
Opc = getLoadRegOpcode(0, RC, isAligned, Subtarget);
}
@@ -5714,7 +6398,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
- bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
+ bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
SDNode *Store =
DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
dl, MVT::Other, AddrOps);
@@ -6124,18 +6808,18 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VMOVSDZrm_alt, X86::VMOVSDZrm_alt, X86::VMOVQI2PQIZrm },
{ X86::VMOVSSZrm, X86::VMOVSSZrm, X86::VMOVDI2PDIZrm },
{ X86::VMOVSSZrm_alt, X86::VMOVSSZrm_alt, X86::VMOVDI2PDIZrm },
- { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r },
- { X86::VBROADCASTSSZ128m, X86::VBROADCASTSSZ128m, X86::VPBROADCASTDZ128m },
- { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256r, X86::VPBROADCASTDZ256r },
- { X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m },
- { X86::VBROADCASTSSZr, X86::VBROADCASTSSZr, X86::VPBROADCASTDZr },
- { X86::VBROADCASTSSZm, X86::VBROADCASTSSZm, X86::VPBROADCASTDZm },
- { X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rr, X86::VPBROADCASTQZ128r },
- { X86::VMOVDDUPZ128rm, X86::VMOVDDUPZ128rm, X86::VPBROADCASTQZ128m },
- { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r },
- { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m },
- { X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr },
- { X86::VBROADCASTSDZm, X86::VBROADCASTSDZm, X86::VPBROADCASTQZm },
+ { X86::VBROADCASTSSZ128rr,X86::VBROADCASTSSZ128rr,X86::VPBROADCASTDZ128rr },
+ { X86::VBROADCASTSSZ128rm,X86::VBROADCASTSSZ128rm,X86::VPBROADCASTDZ128rm },
+ { X86::VBROADCASTSSZ256rr,X86::VBROADCASTSSZ256rr,X86::VPBROADCASTDZ256rr },
+ { X86::VBROADCASTSSZ256rm,X86::VBROADCASTSSZ256rm,X86::VPBROADCASTDZ256rm },
+ { X86::VBROADCASTSSZrr, X86::VBROADCASTSSZrr, X86::VPBROADCASTDZrr },
+ { X86::VBROADCASTSSZrm, X86::VBROADCASTSSZrm, X86::VPBROADCASTDZrm },
+ { X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rr, X86::VPBROADCASTQZ128rr },
+ { X86::VMOVDDUPZ128rm, X86::VMOVDDUPZ128rm, X86::VPBROADCASTQZ128rm },
+ { X86::VBROADCASTSDZ256rr,X86::VBROADCASTSDZ256rr,X86::VPBROADCASTQZ256rr },
+ { X86::VBROADCASTSDZ256rm,X86::VBROADCASTSDZ256rm,X86::VPBROADCASTQZ256rm },
+ { X86::VBROADCASTSDZrr, X86::VBROADCASTSDZrr, X86::VPBROADCASTQZrr },
+ { X86::VBROADCASTSDZrm, X86::VBROADCASTSDZrm, X86::VPBROADCASTQZrm },
{ X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrr, X86::VINSERTI32x4Zrr },
{ X86::VINSERTF32x4Zrm, X86::VINSERTF32x4Zrm, X86::VINSERTI32x4Zrm },
{ X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrr, X86::VINSERTI32x8Zrr },
@@ -6895,7 +7579,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ);
// Don't change integer Q instructions to D instructions and
- // use D intructions if we started with a PS instruction.
+ // use D instructions if we started with a PS instruction.
if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
Domain = 4;
}
@@ -7552,7 +8236,8 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case X86::VMULSSrr:
case X86::VMULSDZrr:
case X86::VMULSSZrr:
- return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
+ return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Inst.getFlag(MachineInstr::MIFlag::FmNsz);
default:
return false;
}
@@ -7679,6 +8364,10 @@ X86InstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const {
return ParamLoadedValue(*Op, Expr);;
}
+ case X86::MOV8ri:
+ case X86::MOV16ri:
+ // TODO: Handle MOV8ri and MOV16ri.
+ return None;
case X86::MOV32ri:
case X86::MOV64ri:
case X86::MOV64ri32:
@@ -7738,6 +8427,20 @@ void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
MachineInstr &OldMI2,
MachineInstr &NewMI1,
MachineInstr &NewMI2) const {
+ // Propagate FP flags from the original instructions.
+ // But clear poison-generating flags because those may not be valid now.
+ // TODO: There should be a helper function for copying only fast-math-flags.
+ uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+ NewMI1.setFlags(IntersectedFlags);
+ NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
+
+ NewMI2.setFlags(IntersectedFlags);
+ NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
+
// Integer instructions may define an implicit EFLAGS dest register operand.
MachineOperand *OldFlagDef1 = OldMI1.findRegisterDefOperand(X86::EFLAGS);
MachineOperand *OldFlagDef2 = OldMI2.findRegisterDefOperand(X86::EFLAGS);
@@ -7957,8 +8660,7 @@ namespace {
}
// Visit the children of this block in the dominator tree.
- for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
- I != E; ++I) {
+ for (auto I = Node->begin(), E = Node->end(); I != E; ++I) {
Changed |= VisitNode(*I, TLSBaseAddrReg);
}
@@ -8073,6 +8775,35 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
return Sum + 1;
});
+ // We check to see if CFI Instructions are present, and if they are
+ // we find the number of CFI Instructions in the candidates.
+ unsigned CFICount = 0;
+ MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
+ for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
+ Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
+ const std::vector<MCCFIInstruction> &CFIInstructions =
+ RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
+ if (MBBI->isCFIInstruction()) {
+ unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
+ MCCFIInstruction CFI = CFIInstructions[CFIIndex];
+ CFICount++;
+ }
+ MBBI++;
+ }
+
+ // We compare the number of found CFI Instructions to the number of CFI
+ // instructions in the parent function for each candidate. We must check this
+ // since if we outline one of the CFI instructions in a function, we have to
+ // outline them all for correctness. If we do not, the address offsets will be
+ // incorrect between the two sections of the program.
+ for (outliner::Candidate &C : RepeatedSequenceLocs) {
+ std::vector<MCCFIInstruction> CFIInstructions =
+ C.getMF()->getFrameInstructions();
+
+ if (CFICount > 0 && CFICount != CFIInstructions.size())
+ return outliner::OutlinedFunction();
+ }
+
// FIXME: Use real size in bytes for call and ret instructions.
if (RepeatedSequenceLocs[0].back()->isTerminator()) {
for (outliner::Candidate &C : RepeatedSequenceLocs)
@@ -8084,6 +8815,9 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
);
}
+ if (CFICount > 0)
+ return outliner::OutlinedFunction();
+
for (outliner::Candidate &C : RepeatedSequenceLocs)
C.setCallInfo(MachineOutlinerDefault, 1);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
index 1d2da5305357..89f2ff118c37 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h
@@ -24,8 +24,6 @@
#include "X86GenInstrInfo.inc"
namespace llvm {
-class MachineInstrBuilder;
-class X86RegisterInfo;
class X86Subtarget;
namespace X86 {
@@ -180,8 +178,37 @@ public:
/// true, then it's expected the pre-extension value is available as a subreg
/// of the result register. This also returns the sub-register index in
/// SubIdx.
- bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &DstReg, unsigned &SubIdx) const override;
+ bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg,
+ Register &DstReg, unsigned &SubIdx) const override;
+
+ /// Returns true if the instruction has no behavior (specified or otherwise)
+ /// that is based on the value of any of its register operands
+ ///
+ /// Instructions are considered data invariant even if they set EFLAGS.
+ ///
+ /// A classical example of something that is inherently not data invariant is
+ /// an indirect jump -- the destination is loaded into icache based on the
+ /// bits set in the jump destination register.
+ ///
+ /// FIXME: This should become part of our instruction tables.
+ static bool isDataInvariant(MachineInstr &MI);
+
+ /// Returns true if the instruction has no behavior (specified or otherwise)
+ /// that is based on the value loaded from memory or the value of any
+ /// non-address register operands.
+ ///
+ /// For example, if the latency of the instruction is dependent on the
+ /// particular bits set in any of the registers *or* any of the bits loaded
+ /// from memory.
+ ///
+ /// Instructions are considered data invariant even if they set EFLAGS.
+ ///
+ /// A classical example of something that is inherently not data invariant is
+ /// an indirect jump -- the destination is loaded into icache based on the
+ /// bits set in the jump destination register.
+ ///
+ /// FIXME: This should become part of our instruction tables.
+ static bool isDataInvariantLoad(MachineInstr &MI);
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
@@ -208,7 +235,7 @@ public:
bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
AAResults *AA) const override;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SubIdx,
+ Register DestReg, unsigned SubIdx,
const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const override;
@@ -278,7 +305,6 @@ public:
const X86InstrFMA3Group &FMA3Group) const;
// Branch analysis.
- bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
bool isUnconditionalTailCall(const MachineInstr &MI) const override;
bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
const MachineInstr &TailCall) const override;
@@ -291,10 +317,11 @@ public:
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const override;
- bool getMemOperandWithOffset(const MachineInstr &LdSt,
- const MachineOperand *&BaseOp,
- int64_t &Offset,
- const TargetRegisterInfo *TRI) const override;
+ bool getMemOperandsWithOffsetWidth(
+ const MachineInstr &LdSt,
+ SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+ bool &OffsetIsScalable, unsigned &Width,
+ const TargetRegisterInfo *TRI) const override;
bool analyzeBranchPredicate(MachineBasicBlock &MBB,
TargetInstrInfo::MachineBranchPredicate &MBP,
bool AllowModify = false) const override;
@@ -306,22 +333,23 @@ public:
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
- unsigned, unsigned, int &, int &, int &) const override;
+ Register, Register, Register, int &, int &,
+ int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DstReg,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg) const override;
+ const DebugLoc &DL, Register DstReg,
+ ArrayRef<MachineOperand> Cond, Register TrueReg,
+ Register FalseReg) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned SrcReg,
+ MachineBasicBlock::iterator MI, Register SrcReg,
bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned DestReg,
+ MachineBasicBlock::iterator MI, Register DestReg,
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
@@ -443,7 +471,7 @@ public:
unsigned OpNum,
ArrayRef<MachineOperand> MOs,
MachineBasicBlock::iterator InsertPt,
- unsigned Size, unsigned Alignment,
+ unsigned Size, Align Alignment,
bool AllowCommute) const;
bool isHighLatencyDef(int opc) const override;
@@ -469,15 +497,15 @@ public:
/// in SrcReg and SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
- bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
+ bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &CmpMask,
int &CmpValue) const override;
/// optimizeCompareInstr - Check if there exists an earlier instruction that
/// operates on the same source operands and sets flags in the same way as
/// Compare; remove Compare if possible.
- bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int CmpMask, int CmpValue,
+ bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const override;
/// optimizeLoadInstr - Try to remove the load by folding it to a register
@@ -563,7 +591,7 @@ private:
unsigned OpNum,
ArrayRef<MachineOperand> MOs,
MachineBasicBlock::iterator InsertPt,
- unsigned Size, unsigned Align) const;
+ unsigned Size, Align Alignment) const;
/// isFrameOperand - Return true and the FrameIndex if the specified
/// operand and follow operands form a reference to the stack frame.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
index 93f40c8ec996..23841c3d7e50 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
@@ -16,10 +16,10 @@
// X86 specific DAG Nodes.
//
-def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
-
-def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
-//def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def SDTX86FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisFP<1>,
+ SDTCisSameAs<1, 2>]>;
def SDTX86Cmov : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
@@ -121,6 +121,8 @@ def SDT_X86WIN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+def SDT_X86PROBED_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -138,12 +140,13 @@ def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
-def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
-def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
+def X86fshl : SDNode<"X86ISD::FSHL", SDTIntShiftDOp>;
+def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>;
def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
-def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86CmpTest, [SDNPHasChain]>;
-def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86CmpTest, [SDNPHasChain]>;
+def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>;
+def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
+def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
@@ -152,8 +155,6 @@ def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
-def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>;
-
def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand,
[SDNPHasChain, SDNPSideEffect]>;
@@ -286,6 +287,9 @@ def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>;
+def X86pdep : SDNode<"X86ISD::PDEP", SDTIntBinOp>;
+def X86pext : SDNode<"X86ISD::PEXT", SDTIntBinOp>;
+
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA,
@@ -294,6 +298,9 @@ def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA,
def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
[SDNPHasChain]>;
+def X86ProbedAlloca : SDNode<"X86ISD::PROBED_ALLOCA", SDT_X86PROBED_ALLOCA,
+ [SDNPHasChain]>;
+
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
@@ -354,6 +361,8 @@ let RenderMethod = "addMemOperands", SuperClasses = [X86MemAsmOperand] in {
def X86Mem512_RC256XOperand : AsmOperandClass { let Name = "Mem512_RC256X"; }
def X86Mem256_RC512Operand : AsmOperandClass { let Name = "Mem256_RC512"; }
def X86Mem512_RC512Operand : AsmOperandClass { let Name = "Mem512_RC512"; }
+
+ def X86SibMemOperand : AsmOperandClass { let Name = "SibMem"; }
}
def X86AbsMemAsmOperand : AsmOperandClass {
@@ -376,14 +385,16 @@ class X86VMemOperand<RegisterClass RC, string printMethod,
let MIOperandInfo = (ops ptr_rc, i8imm, RC, i32imm, SEGMENT_REG);
}
-def anymem : X86MemOperand<"printanymem">;
+def anymem : X86MemOperand<"printMemReference">;
def X86any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
[(X86strict_fcmp node:$lhs, node:$rhs),
- (X86cmp node:$lhs, node:$rhs)]>;
+ (X86fcmp node:$lhs, node:$rhs)]>;
// FIXME: Right now we allow any size during parsing, but we might want to
// restrict to only unsized memory.
-def opaquemem : X86MemOperand<"printopaquemem">;
+def opaquemem : X86MemOperand<"printMemReference">;
+
+def sibmem: X86MemOperand<"printMemReference", X86SibMemOperand>;
def i8mem : X86MemOperand<"printbytemem", X86Mem8AsmOperand>;
def i16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>;
@@ -757,14 +768,14 @@ def i64u8imm : Operand<i64> {
}
def lea64_32mem : Operand<i32> {
- let PrintMethod = "printanymem";
+ let PrintMethod = "printMemReference";
let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, SEGMENT_REG);
let ParserMatchClass = X86MemAsmOperand;
}
// Memory operands that use 64-bit pointers in both ILP32 and LP64.
def lea64mem : Operand<i64> {
- let PrintMethod = "printanymem";
+ let PrintMethod = "printMemReference";
let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, SEGMENT_REG);
let ParserMatchClass = X86MemAsmOperand;
}
@@ -830,11 +841,10 @@ def tls64baseaddr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
def vectoraddr : ComplexPattern<iPTR, 5, "selectVectorAddr", [],[SDNPWantParent]>;
-// A relocatable immediate is either an immediate operand or an operand that can
-// be relocated by the linker to an immediate, such as a regular symbol in
-// non-PIC code.
-def relocImm : ComplexPattern<iAny, 1, "selectRelocImm", [imm, X86Wrapper], [],
- 0>;
+// A relocatable immediate is an operand that can be relocated by the linker to
+// an immediate, such as a regular symbol in non-PIC code.
+def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
+ [X86Wrapper], [], 0>;
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
@@ -922,11 +932,10 @@ def HasRTM : Predicate<"Subtarget->hasRTM()">;
def HasADX : Predicate<"Subtarget->hasADX()">;
def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasSGX : Predicate<"Subtarget->hasSGX()">;
-def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
-def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">;
def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
@@ -948,18 +957,23 @@ def HasCmpxchg8b : Predicate<"Subtarget->hasCmpxchg8b()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">;
def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">;
+def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
+def HasTSXLDTRK : Predicate<"Subtarget->hasTSXLDTRK()">;
+def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">;
+def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">;
+def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
- AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
+ AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
- AssemblerPredicate<"Mode64Bit", "64-bit mode">;
+ AssemblerPredicate<(all_of Mode64Bit), "64-bit mode">;
def IsLP64 : Predicate<"Subtarget->isTarget64BitLP64()">;
def NotLP64 : Predicate<"!Subtarget->isTarget64BitLP64()">;
def In16BitMode : Predicate<"Subtarget->is16Bit()">,
- AssemblerPredicate<"Mode16Bit", "16-bit mode">;
+ AssemblerPredicate<(all_of Mode16Bit), "16-bit mode">;
def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,
- AssemblerPredicate<"!Mode16Bit", "Not 16-bit mode">;
+ AssemblerPredicate<(all_of (not Mode16Bit)), "Not 16-bit mode">;
def In32BitMode : Predicate<"Subtarget->is32Bit()">,
- AssemblerPredicate<"Mode32Bit", "32-bit mode">;
+ AssemblerPredicate<(all_of Mode32Bit), "32-bit mode">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
@@ -1033,13 +1047,17 @@ def i32immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
def i64immSExt8 : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
-// FIXME: Ideally we would just replace the above i*immSExt* matchers with
-// relocImm-based matchers, but then FastISel would be unable to use them.
+def i16relocImmSExt8 : PatLeaf<(i16 relocImm), [{
+ return isSExtAbsoluteSymbolRef(8, N);
+}]>;
+def i32relocImmSExt8 : PatLeaf<(i32 relocImm), [{
+ return isSExtAbsoluteSymbolRef(8, N);
+}]>;
def i64relocImmSExt8 : PatLeaf<(i64 relocImm), [{
- return isSExtRelocImm<8>(N);
+ return isSExtAbsoluteSymbolRef(8, N);
}]>;
def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{
- return isSExtRelocImm<32>(N);
+ return isSExtAbsoluteSymbolRef(32, N);
}]>;
// If we have multiple users of an immediate, it's much smaller to reuse
@@ -1059,6 +1077,13 @@ def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{
// Eventually, it would be nice to allow ConstantHoisting to merge constants
// globally for potentially added savings.
//
+def imm_su : PatLeaf<(imm), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64immSExt32_su : PatLeaf<(i64immSExt32), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
def relocImm8_su : PatLeaf<(i8 relocImm), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
@@ -1069,20 +1094,26 @@ def relocImm32_su : PatLeaf<(i32 relocImm), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
-def i16immSExt8_su : PatLeaf<(i16immSExt8), [{
+def i16relocImmSExt8_su : PatLeaf<(i16relocImmSExt8), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
-def i32immSExt8_su : PatLeaf<(i32immSExt8), [{
+def i32relocImmSExt8_su : PatLeaf<(i32relocImmSExt8), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
-def i64immSExt8_su : PatLeaf<(i64immSExt8), [{
+def i64relocImmSExt8_su : PatLeaf<(i64relocImmSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64relocImmSExt32_su : PatLeaf<(i64relocImmSExt32), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
-def i64relocImmSExt8_su : PatLeaf<(i64relocImmSExt8), [{
+def i16immSExt8_su : PatLeaf<(i16immSExt8), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
-def i64relocImmSExt32_su : PatLeaf<(i64relocImmSExt32), [{
+def i32immSExt8_su : PatLeaf<(i32immSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64immSExt8_su : PatLeaf<(i64immSExt8), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
@@ -1113,7 +1144,7 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD)
return true;
- if (ExtType == ISD::EXTLOAD)
+ if (ExtType == ISD::EXTLOAD && EnablePromoteAnyextLoad)
return LD->getAlignment() >= 2 && LD->isSimple();
return false;
}]>;
@@ -1123,7 +1154,7 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD)
return true;
- if (ExtType == ISD::EXTLOAD)
+ if (ExtType == ISD::EXTLOAD && EnablePromoteAnyextLoad)
return LD->getAlignment() >= 4 && LD->isSimple();
return false;
}]>;
@@ -1550,7 +1581,7 @@ def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
[(set GR16:$dst, imm:$src)]>, OpSize16;
def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, relocImm:$src)]>, OpSize32;
+ [(set GR32:$dst, imm:$src)]>, OpSize32;
def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, i64immSExt32:$src)]>;
@@ -1558,7 +1589,7 @@ def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
let isReMaterializable = 1, isMoveImm = 1 in {
def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
"movabs{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, relocImm:$src)]>;
+ [(set GR64:$dst, imm:$src)]>;
}
// Longer forms that use a ModR/M byte. Needed for disassembler
@@ -1578,19 +1609,31 @@ def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src),
let SchedRW = [WriteStore] in {
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
- [(store (i8 relocImm8_su:$src), addr:$dst)]>;
+ [(store (i8 imm_su:$src), addr:$dst)]>;
def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
"mov{w}\t{$src, $dst|$dst, $src}",
- [(store (i16 relocImm16_su:$src), addr:$dst)]>, OpSize16;
+ [(store (i16 imm_su:$src), addr:$dst)]>, OpSize16;
def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
- [(store (i32 relocImm32_su:$src), addr:$dst)]>, OpSize32;
+ [(store (i32 imm_su:$src), addr:$dst)]>, OpSize32;
def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
- [(store i64relocImmSExt32_su:$src, addr:$dst)]>,
+ [(store i64immSExt32_su:$src, addr:$dst)]>,
Requires<[In64BitMode]>;
} // SchedRW
+def : Pat<(i32 relocImm:$src), (MOV32ri relocImm:$src)>;
+def : Pat<(i64 relocImm:$src), (MOV64ri relocImm:$src)>;
+
+def : Pat<(store (i8 relocImm8_su:$src), addr:$dst),
+ (MOV8mi addr:$dst, relocImm8_su:$src)>;
+def : Pat<(store (i16 relocImm16_su:$src), addr:$dst),
+ (MOV16mi addr:$dst, relocImm16_su:$src)>;
+def : Pat<(store (i32 relocImm32_su:$src), addr:$dst),
+ (MOV32mi addr:$dst, relocImm32_su:$src)>;
+def : Pat<(store (i64 i64relocImmSExt32_su:$src), addr:$dst),
+ (MOV64mi32 addr:$dst, i64immSExt32_su:$src)>;
+
let hasSideEffects = 0 in {
/// Memory offset versions of moves. The immediate is an address mode sized
@@ -1787,9 +1830,8 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
// Condition code ops, incl. set if equal/not equal/...
let SchedRW = [WriteLAHFSAHF] in {
-let Defs = [EFLAGS], Uses = [AH] in
-def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
- [(set EFLAGS, (X86sahf AH))]>,
+let Defs = [EFLAGS], Uses = [AH], hasSideEffects = 0 in
+def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>, // flags = AH
Requires<[HasLAHFSAHF]>;
let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags
@@ -2163,24 +2205,24 @@ def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
// Lock instruction prefix
let SchedRW = [WriteMicrocoded] in
-def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>;
+def LOCK_PREFIX : I<0xF0, PrefixByte, (outs), (ins), "lock", []>;
let SchedRW = [WriteNop] in {
// Rex64 instruction prefix
-def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>,
+def REX64_PREFIX : I<0x48, PrefixByte, (outs), (ins), "rex64", []>,
Requires<[In64BitMode]>;
// Data16 instruction prefix
-def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>;
+def DATA16_PREFIX : I<0x66, PrefixByte, (outs), (ins), "data16", []>;
} // SchedRW
// Repeat string operation instruction prefixes
let Defs = [ECX], Uses = [ECX,DF], SchedRW = [WriteMicrocoded] in {
// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
-def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>;
+def REP_PREFIX : I<0xF3, PrefixByte, (outs), (ins), "rep", []>;
// Repeat while not equal (used with CMPS and SCAS)
-def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
+def REPNE_PREFIX : I<0xF2, PrefixByte, (outs), (ins), "repne", []>;
}
// String manipulation instructions
@@ -2581,27 +2623,27 @@ let Predicates = [HasBMI2, NoTBM] in {
}
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
- X86MemOperand x86memop, Intrinsic Int,
+ X86MemOperand x86memop, SDNode OpNode,
PatFrag ld_frag> {
def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (Int RC:$src1, RC:$src2))]>,
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
VEX_4V, Sched<[WriteALU]>;
def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))]>,
+ [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
}
let Predicates = [HasBMI2] in {
defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
- int_x86_bmi_pdep_32, loadi32>, T8XD;
+ X86pdep, loadi32>, T8XD;
defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
- int_x86_bmi_pdep_64, loadi64>, T8XD, VEX_W;
+ X86pdep, loadi64>, T8XD, VEX_W;
defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
- int_x86_bmi_pext_32, loadi32>, T8XS;
+ X86pext, loadi32>, T8XS;
defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
- int_x86_bmi_pext_64, loadi64>, T8XS, VEX_W;
+ X86pext, loadi64>, T8XS, VEX_W;
}
//===----------------------------------------------------------------------===//
@@ -2785,11 +2827,11 @@ let SchedRW = [WriteStore] in {
def MOVDIRI32 : I<0xF9, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movdiri\t{$src, $dst|$dst, $src}",
[(int_x86_directstore32 addr:$dst, GR32:$src)]>,
- T8, Requires<[HasMOVDIRI]>;
+ T8PS, Requires<[HasMOVDIRI]>;
def MOVDIRI64 : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movdiri\t{$src, $dst|$dst, $src}",
[(int_x86_directstore64 addr:$dst, GR64:$src)]>,
- T8, Requires<[In64BitMode, HasMOVDIRI]>;
+ T8PS, Requires<[In64BitMode, HasMOVDIRI]>;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -2856,6 +2898,23 @@ def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
+// SERIALIZE Instruction
+//
+def SERIALIZE : I<0x01, MRM_E8, (outs), (ins), "serialize",
+ [(int_x86_serialize)]>, PS,
+ Requires<[HasSERIALIZE]>;
+
+//===----------------------------------------------------------------------===//
+// TSXLDTRK - TSX Suspend Load Address Tracking
+//
+let Predicates = [HasTSXLDTRK] in {
+ def XSUSLDTRK : I<0x01, MRM_E8, (outs), (ins), "xsusldtrk",
+ [(int_x86_xsusldtrk)]>, XD;
+ def XRESLDTRK : I<0x01, MRM_E9, (outs), (ins), "xresldtrk",
+ [(int_x86_xresldtrk)]>, XD;
+}
+
+//===----------------------------------------------------------------------===//
// Pattern fragments to auto generate TBM instructions.
//===----------------------------------------------------------------------===//
@@ -2913,6 +2972,11 @@ let Predicates = [HasTBM] in {
(TZMSK64rr GR64:$src)>;
// Patterns to match flag producing ops.
+ def : Pat<(and_flag_nocf GR32:$src, (add GR32:$src, 1)),
+ (BLCFILL32rr GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (add GR64:$src, 1)),
+ (BLCFILL64rr GR64:$src)>;
+
def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))),
(BLCI32rr GR32:$src)>;
def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))),
@@ -2974,7 +3038,7 @@ def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src",
let Predicates = [HasCLDEMOTE], SchedRW = [WriteLoad] in
def CLDEMOTE : I<0x1C, MRM0m, (outs), (ins i8mem:$src), "cldemote\t$src",
- [(int_x86_cldemote addr:$src)]>, TB;
+ [(int_x86_cldemote addr:$src)]>, PS;
//===----------------------------------------------------------------------===//
// Subsystems.
@@ -3013,6 +3077,9 @@ include "X86InstrSVM.td"
include "X86InstrTSX.td"
include "X86InstrSGX.td"
+// AMX instructions
+include "X86InstrAMX.td"
+
// System instructions.
include "X86InstrSystem.td"
@@ -3108,6 +3175,9 @@ def : MnemonicAlias<"smovl", "movsl", "att">;
def : MnemonicAlias<"smovq", "movsq", "att">;
def : MnemonicAlias<"ud2a", "ud2", "att">;
+def : MnemonicAlias<"ud2bw", "ud1w", "att">;
+def : MnemonicAlias<"ud2bl", "ud1l", "att">;
+def : MnemonicAlias<"ud2bq", "ud1q", "att">;
def : MnemonicAlias<"verrw", "verr", "att">;
// MS recognizes 'xacquire'/'xrelease' as 'acquire'/'release'
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
index 0f4d4d764cc9..49940204c25a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMMX.td
@@ -24,8 +24,9 @@
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, SchedRW = [WriteZero] in {
-def MMX_SET0 : I<0, Pseudo, (outs VR64:$dst), (ins), "", []>;
+ isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasMMX] in {
+def MMX_SET0 : I<0, Pseudo, (outs VR64:$dst), (ins), "",
+ [(set VR64:$dst, (x86mmx (MMX_X86movw2d (i32 0))))]>;
}
let Constraints = "$src1 = $dst" in {
@@ -43,8 +44,7 @@ let Constraints = "$src1 = $dst" in {
def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, OType:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId VR64:$src1,
- (bitconvert (load_mmx addr:$src2))))]>,
+ [(set VR64:$dst, (IntId VR64:$src1, (load_mmx addr:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -60,8 +60,7 @@ let Constraints = "$src1 = $dst" in {
def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId VR64:$src1,
- (bitconvert (load_mmx addr:$src2))))]>,
+ [(set VR64:$dst, (IntId VR64:$src1, (load_mmx addr:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32u8imm:$src2),
@@ -81,8 +80,7 @@ multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
def rm : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst,
- (IntId64 (bitconvert (load_mmx addr:$src))))]>,
+ [(set VR64:$dst, (IntId64 (load_mmx addr:$src)))]>,
Sched<[sched.Folded]>;
}
@@ -101,8 +99,7 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (load_mmx addr:$src2))))]>,
+ (IntId64 VR64:$src1, (load_mmx addr:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -118,8 +115,8 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR64:$dst, (IntId VR64:$src1,
- (bitconvert (load_mmx addr:$src2)), (i8 timm:$src3)))]>,
+ [(set VR64:$dst, (IntId VR64:$src1, (load_mmx addr:$src2),
+ (i8 timm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -164,23 +161,14 @@ def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (x86mmx (scalar_to_vector GR32:$src)))]>,
+ (x86mmx (MMX_X86movw2d GR32:$src)))]>,
Sched<[WriteVecMoveFromGpr]>;
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>,
+ (x86mmx (MMX_X86movw2d (loadi32 addr:$src))))]>,
Sched<[WriteVecLoad]>;
-let Predicates = [HasMMX] in {
- def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)),
- (MMX_MOVD64rr GR32:$src)>;
- def : Pat<(x86mmx (MMX_X86movw2d (i32 0))),
- (MMX_SET0)>;
- def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))),
- (MMX_MOVD64rm addr:$src)>;
-}
-
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>,
@@ -240,20 +228,21 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (x86mmx VR64:$src), addr:$dst)]>;
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+ [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
+ [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
+
let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (x86mmx (bitconvert
- (i64 (extractelt (v2i64 VR128:$src),
- (iPTR 0))))))]>;
+ (x86mmx (MMX_X86movdq2q VR128:$src)))]>;
def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2i64
- (scalar_to_vector
- (i64 (bitconvert (x86mmx VR64:$src))))))]>;
+ (v2i64 (MMX_X86movq2dq VR64:$src)))]>;
let isCodeGenOnly = 1, hasSideEffects = 1 in {
def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
@@ -272,14 +261,6 @@ def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>,
Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
-let Predicates = [HasMMX] in {
- // movd to MMX register zero-extends
- def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))),
- (MMX_MOVD64rr GR32:$src)>;
- def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
- (MMX_MOVD64rm addr:$src)>;
-}
-
// Arithmetic Instructions
defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
SchedWriteVecALU.MMX>;
@@ -566,27 +547,6 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(int_x86_mmx_pmovmskb VR64:$src))]>,
Sched<[WriteMMXMOVMSK]>;
-// MMX to XMM for vector types
-def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
- [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-
-// Low word of XMM to MMX.
-def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
- [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
-
-def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
- (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
-
-def : Pat<(x86mmx (MMX_X86movdq2q (v2i64 (simple_load addr:$src)))),
- (x86mmx (MMX_MOVQ64rm addr:$src))>;
-
-def : Pat<(v2i64 (X86vzmovl (scalar_to_vector
- (i64 (bitconvert (x86mmx VR64:$src)))))),
- (MMX_MOVQ2DQrr VR64:$src)>;
-
// Misc.
let SchedRW = [SchedWriteShuffle.MMX] in {
let Uses = [EDI], Predicates = [HasMMX, HasSSE1,Not64BitMode] in
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td
index 747f5aa86653..6439f717accb 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSGX.td
@@ -17,13 +17,13 @@
let SchedRW = [WriteSystem], Predicates = [HasSGX] in {
// ENCLS - Execute an Enclave System Function of Specified Leaf Number
def ENCLS : I<0x01, MRM_CF, (outs), (ins),
- "encls", []>, TB;
+ "encls", []>, PS;
// ENCLU - Execute an Enclave User Function of Specified Leaf Number
def ENCLU : I<0x01, MRM_D7, (outs), (ins),
- "enclu", []>, TB;
+ "enclu", []>, PS;
// ENCLV - Execute an Enclave VMM Function of Specified Leaf Number
def ENCLV : I<0x01, MRM_C0, (outs), (ins),
- "enclv", []>, TB;
+ "enclv", []>, PS;
} // SchedRW
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
index c45f342ed75b..c3c9f22381f8 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td
@@ -43,7 +43,7 @@ let isCodeGenOnly = 1 in {
multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode, RegisterClass RC,
ValueType VT, string asm, Operand memopr,
- ComplexPattern mem_cpat, Domain d,
+ PatFrags mem_frags, Domain d,
X86FoldableSchedWrite sched, bit Is2Addr = 1> {
let hasSideEffects = 0 in {
def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
@@ -57,7 +57,7 @@ let hasSideEffects = 0 in {
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], d>,
+ [(set RC:$dst, (VT (OpNode RC:$src1, (mem_frags addr:$src2))))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -720,11 +720,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
} // SchedRW
let Predicates = [UseAVX] in {
- // Also handle an i64 load because that may get selected as a faster way to
- // load the data.
- def : Pat<(v2f64 (X86Unpckl VR128:$src1,
- (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
- (VMOVHPDrm VR128:$src1, addr:$src2)>;
+ // MOVHPD patterns
def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
@@ -754,12 +750,6 @@ let Predicates = [UseSSE1] in {
let Predicates = [UseSSE2] in {
// MOVHPD patterns
-
- // Also handle an i64 load because that may get selected as a faster way to
- // load the data.
- def : Pat<(v2f64 (X86Unpckl VR128:$src1,
- (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
- (MOVHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
@@ -884,6 +874,23 @@ defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf6
"cvttsd2si", "cvttsd2si",
WriteCvtSD2I, SSEPackedDouble>,
XD, VEX, VEX_W, VEX_LIG;
+
+defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
+ "cvtss2si", "cvtss2si",
+ WriteCvtSS2I, SSEPackedSingle>,
+ XS, VEX, VEX_LIG;
+defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
+ "cvtss2si", "cvtss2si",
+ WriteCvtSS2I, SSEPackedSingle>,
+ XS, VEX, VEX_W, VEX_LIG;
+defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
+ "cvtsd2si", "cvtsd2si",
+ WriteCvtSD2I, SSEPackedDouble>,
+ XD, VEX, VEX_LIG;
+defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
+ "cvtsd2si", "cvtsd2si",
+ WriteCvtSD2I, SSEPackedDouble>,
+ XD, VEX, VEX_W, VEX_LIG;
}
// The assembler can recognize rr 64-bit instructions by seeing a rxx
@@ -923,6 +930,12 @@ let Predicates = [UseAVX] in {
(VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
def : Pat<(f64 (any_sint_to_fp GR64:$src)),
(VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+
+ def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>;
+ def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>;
+
+ def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>;
+ def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>;
}
let isCodeGenOnly = 1 in {
@@ -938,6 +951,20 @@ defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
+
+defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
+ "cvtss2si", "cvtss2si",
+ WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC;
+defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
+ "cvtss2si", "cvtss2si",
+ WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC;
+defm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
+ "cvtsd2si", "cvtsd2si",
+ WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC;
+defm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
+ "cvtsd2si", "cvtsd2si",
+ WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
+
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32,
"cvtsi2ss", "cvtsi2ss{l}",
WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
@@ -952,12 +979,22 @@ defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64,
WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
} // isCodeGenOnly = 1
+let Predicates = [UseSSE1] in {
+ def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>;
+ def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>;
+}
+
+let Predicates = [UseSSE2] in {
+ def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>;
+ def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>;
+}
+
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
ValueType DstVT, ValueType SrcVT, SDNode OpNode,
- Operand memop, ComplexPattern mem_cpat, string asm,
+ Operand memop, PatFrags mem_frags, string asm,
X86FoldableSchedWrite sched, Domain d> {
let ExeDomain = d in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
@@ -966,7 +1003,7 @@ let ExeDomain = d in {
Sched<[sched]>;
def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>,
+ [(set DstRC:$dst, (DstVT (OpNode (SrcVT (mem_frags addr:$src)))))]>,
Sched<[sched.Folded]>;
}
}
@@ -1247,7 +1284,7 @@ def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (v4f32 (X86frounds VR128:$src1, sse_load_f64:$src2)))]>,
+ (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>,
XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
@@ -1261,7 +1298,7 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4f32 (X86frounds VR128:$src1,sse_load_f64:$src2)))]>,
+ (v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>,
XD, Requires<[UseSSE2]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
@@ -1745,124 +1782,94 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
- SDNode OpNode, ValueType VT,
+ Operand memop, SDNode OpNode, ValueType VT,
PatFrag ld_frag, string asm,
- X86FoldableSchedWrite sched> {
-let Uses = [MXCSR], mayRaiseFPException = 1 in {
- let isCommutable = 1 in
- def rr : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
- [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, timm:$cc))]>,
- Sched<[sched]>;
- def rm : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
- [(set RC:$dst, (OpNode (VT RC:$src1),
- (ld_frag addr:$src2), timm:$cc))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
-}
-}
-
-let isCodeGenOnly = 1 in {
- let ExeDomain = SSEPackedSingle in
- defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32,
- "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PS.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG;
- let ExeDomain = SSEPackedDouble in
- defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64,
- "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PD.Scl>,
- XD, VEX_4V, VEX_LIG, VEX_WIG;
-
- let Constraints = "$src1 = $dst" in {
- let ExeDomain = SSEPackedSingle in
- defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32,
- "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PS.Scl>, XS;
- let ExeDomain = SSEPackedDouble in
- defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64,
- "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PD.Scl>, XD;
- }
-}
-
-multiclass sse12_cmp_scalar_int<Operand memop,
- Intrinsic Int, string asm, X86FoldableSchedWrite sched,
- ComplexPattern mem_cpat> {
-let Uses = [MXCSR], mayRaiseFPException = 1 in {
+ X86FoldableSchedWrite sched,
+ PatFrags mem_frags> {
def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src, u8imm:$cc), asm,
- [(set VR128:$dst, (Int VR128:$src1,
- VR128:$src, timm:$cc))]>,
- Sched<[sched]>;
-let mayLoad = 1 in
+ (ins VR128:$src1, VR128:$src2, u8imm:$cc), asm,
+ [(set VR128:$dst, (OpNode (VT VR128:$src1),
+ VR128:$src2, timm:$cc))]>,
+ Sched<[sched]>, SIMD_EXC;
+ let mayLoad = 1 in
def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, memop:$src, u8imm:$cc), asm,
- [(set VR128:$dst, (Int VR128:$src1,
- mem_cpat:$src, timm:$cc))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
-}
+ (ins VR128:$src1, memop:$src2, u8imm:$cc), asm,
+ [(set VR128:$dst, (OpNode (VT VR128:$src1),
+ (mem_frags addr:$src2), timm:$cc))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+
+ let isCodeGenOnly = 1 in {
+ let isCommutable = 1 in
+ def rr : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2, timm:$cc))]>,
+ Sched<[sched]>, SIMD_EXC;
+ def rm : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
+ [(set RC:$dst, (OpNode RC:$src1,
+ (ld_frag addr:$src2), timm:$cc))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+ }
}
-// Aliases to match intrinsics which expect XMM operand(s).
let ExeDomain = SSEPackedSingle in
-defm VCMPSS : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss,
- "cmpss\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}",
- SchedWriteFCmpSizes.PS.Scl, sse_load_f32>,
- XS, VEX_4V, VEX_LIG, VEX_WIG;
+defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32,
+ "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SchedWriteFCmpSizes.PS.Scl, sse_load_f32>,
+ XS, VEX_4V, VEX_LIG, VEX_WIG;
let ExeDomain = SSEPackedDouble in
-defm VCMPSD : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd,
- "cmpsd\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}",
- SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
- XD, VEX_4V, VEX_LIG, VEX_WIG;
+defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64,
+ "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
+ XD, VEX_4V, VEX_LIG, VEX_WIG;
+
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
- defm CMPSS : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss,
- "cmpss\t{$cc, $src, $dst|$dst, $src, $cc}",
- SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
+ defm CMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32,
+ "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
let ExeDomain = SSEPackedDouble in
- defm CMPSD : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd,
- "cmpsd\t{$cc, $src, $dst|$dst, $src, $cc}",
- SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
+ defm CMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64,
+ "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
}
-
// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
ValueType vt, X86MemOperand x86memop,
PatFrag ld_frag, string OpcodeStr, Domain d,
- X86FoldableSchedWrite sched = WriteFCom> {
-let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
- ExeDomain = d in {
+ X86FoldableSchedWrite sched = WriteFComX> {
+ let ExeDomain = d in {
def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
- Sched<[sched]>;
-let mayLoad = 1 in
+ Sched<[sched]>, SIMD_EXC;
+ let mayLoad = 1 in
def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
(ld_frag addr:$src2)))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}
// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
ValueType vt, Operand memop,
- ComplexPattern mem_cpat, string OpcodeStr,
+ PatFrags mem_frags, string OpcodeStr,
Domain d,
- X86FoldableSchedWrite sched = WriteFCom> {
-let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = d in {
+ X86FoldableSchedWrite sched = WriteFComX> {
+let ExeDomain = d in {
def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>,
- Sched<[sched]>;
+ Sched<[sched]>, SIMD_EXC;
let mayLoad = 1 in
def rm_Int: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
- mem_cpat:$src2))]>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ (mem_frags addr:$src2)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}
@@ -1914,18 +1921,16 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
ValueType VT, string asm,
X86FoldableSchedWrite sched,
Domain d, PatFrag ld_frag> {
-let Uses = [MXCSR], mayRaiseFPException = 1 in {
let isCommutable = 1 in
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
[(set RC:$dst, (VT (X86any_cmpp RC:$src1, RC:$src2, timm:$cc)))], d>,
- Sched<[sched]>;
+ Sched<[sched]>, SIMD_EXC;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst,
(VT (X86any_cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
-}
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
@@ -2812,7 +2817,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
}
multiclass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt,
- ComplexPattern int_cpat, Intrinsic Intr,
+ PatFrags mem_frags, Intrinsic Intr,
Predicate target, string Suffix> {
let Predicates = [target] in {
// These are unary operations, but they are modeled as having 2 source operands
@@ -2828,13 +2833,13 @@ multiclass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt,
// which has a clobber before the rcp, vs.
// rcpss mem, %xmm0
let Predicates = [target, OptForSize] in {
- def : Pat<(Intr int_cpat:$src2),
+ def : Pat<(Intr (mem_frags addr:$src2)),
(!cast<Instruction>(NAME#m_Int)
(vt (IMPLICIT_DEF)), addr:$src2)>;
}
}
-multiclass avx_fp_unop_s_intr<RegisterClass RC, ValueType vt, ComplexPattern int_cpat,
+multiclass avx_fp_unop_s_intr<RegisterClass RC, ValueType vt, PatFrags mem_frags,
Intrinsic Intr, Predicate target> {
let Predicates = [target] in {
def : Pat<(Intr VR128:$src),
@@ -2842,7 +2847,7 @@ multiclass avx_fp_unop_s_intr<RegisterClass RC, ValueType vt, ComplexPattern int
VR128:$src)>;
}
let Predicates = [target, OptForSize] in {
- def : Pat<(Intr int_cpat:$src2),
+ def : Pat<(Intr (mem_frags addr:$src2)),
(!cast<Instruction>(NAME#m_Int)
(vt (IMPLICIT_DEF)), addr:$src2)>;
}
@@ -2968,28 +2973,28 @@ let Predicates = [HasAVX, NoVLX] in {
multiclass sse1_fp_unop_s_intr<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteWidths sched, Predicate AVXTarget> {
defm SS : sse_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
- !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss),
+ !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
UseSSE1, "SS">, XS;
defm V#NAME#SS : avx_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
- !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss),
+ !cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
AVXTarget>,
XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
}
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, f32, f32mem,
+ defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32, f32mem,
ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS;
- defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, f32,
+ defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32,
f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>,
XS, VEX_4V, VEX_LIG, VEX_WIG;
}
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, f64, f64mem,
+ defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64, f64mem,
sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD;
- defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, f64,
+ defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64,
f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
XD, VEX_4V, VEX_LIG, VEX_WIG;
}
@@ -3185,13 +3190,13 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins),
let SchedRW = [WriteFence] in {
// Load, store, and memory fence
-// TODO: As with mfence, we may want to ease the availablity of sfence/lfence
+// TODO: As with mfence, we may want to ease the availability of sfence/lfence
// to include any 64-bit target.
-def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+def SFENCE : I<0xAE, MRM7X, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
PS, Requires<[HasSSE1]>;
-def LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>,
+def LFENCE : I<0xAE, MRM5X, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>,
PS, Requires<[HasSSE2]>;
-def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>,
+def MFENCE : I<0xAE, MRM6X, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>,
PS, Requires<[HasMFence]>;
} // SchedRW
@@ -3213,11 +3218,11 @@ def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
let mayLoad=1, hasSideEffects=1 in
def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
- TB, Sched<[WriteLDMXCSR]>;
+ PS, Sched<[WriteLDMXCSR]>;
let mayStore=1, hasSideEffects=1 in
def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
"stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
- TB, Sched<[WriteSTMXCSR]>;
+ PS, Sched<[WriteSTMXCSR]>;
//===---------------------------------------------------------------------===//
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
@@ -4185,8 +4190,6 @@ let Predicates = [UseAVX] in {
// AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
// These instructions also write zeros in the high part of a 256-bit register.
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
- (VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzload32 addr:$src)),
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v8i32 (X86vzload32 addr:$src)),
@@ -4199,8 +4202,6 @@ let Predicates = [UseSSE2] in {
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
(MOV64toPQIrr GR64:$src)>;
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
- (MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzload32 addr:$src)),
(MOVDI2PDIrm addr:$src)>;
}
@@ -4429,16 +4430,11 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
}
let Predicates = [UseSSE3] in {
- // No need for aligned memory as this only loads 64-bits.
- def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))),
- (MOVDDUPrm addr:$src)>;
def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(MOVDDUPrm addr:$src)>;
}
@@ -5022,7 +5018,9 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
+ def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
@@ -5030,12 +5028,14 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload32 addr:$src))))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v8i16 (X86vzload64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
+ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
}
}
@@ -5499,7 +5499,7 @@ let ExeDomain = SSEPackedSingle in {
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (OpNode VR128:$src1, sse_load_f32:$src2, timm:$src3))]>,
+ (OpNode VR128:$src1, (sse_load_f32 addr:$src2), timm:$src3))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
@@ -5522,7 +5522,7 @@ let ExeDomain = SSEPackedDouble in {
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (OpNode VR128:$src1, sse_load_f64:$src2, timm:$src3))]>,
+ (OpNode VR128:$src1, (sse_load_f64 addr:$src2), timm:$src3))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
}
@@ -6623,7 +6623,7 @@ multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
[!if(UsesXMM0,
(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
(set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>,
- T8, Sched<[sched]>;
+ T8PS, Sched<[sched]>;
def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
@@ -6634,7 +6634,7 @@ multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
(set VR128:$dst, (IntId VR128:$src1,
(memop addr:$src2), XMM0)),
(set VR128:$dst, (IntId VR128:$src1,
- (memop addr:$src2))))]>, T8,
+ (memop addr:$src2))))]>, T8PS,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -6644,7 +6644,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
"sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
- (i8 timm:$src3)))]>, TA,
+ (i8 timm:$src3)))]>, TAPS,
Sched<[SchedWriteVecIMul.XMM]>;
def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
@@ -6652,7 +6652,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
[(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1,
(memop addr:$src2),
- (i8 timm:$src3)))]>, TA,
+ (i8 timm:$src3)))]>, TAPS,
Sched<[SchedWriteVecIMul.XMM.Folded,
SchedWriteVecIMul.XMM.ReadAfterFold]>;
@@ -6687,7 +6687,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId, PatFrag ld_frag,
bit Is2Addr = 0, RegisterClass RC = VR128,
X86MemOperand MemOp = i128mem> {
- let AsmString = OpcodeStr##
+ let AsmString = OpcodeStr#
!if(Is2Addr, "\t{$src2, $dst|$dst, $src2}",
"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in {
def rr : AES8I<opc, MRMSrcReg, (outs RC:$dst),
@@ -6874,10 +6874,10 @@ defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load,
multiclass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC,
X86MemOperand MemOp, string Hi, string Lo> {
- def : InstAlias<"vpclmul"##Hi##Lo##"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
(!cast<Instruction>(InstStr # "rr") RC:$dst, RC:$src1, RC:$src2,
!add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>;
- def : InstAlias<"vpclmul"##Hi##Lo##"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ def : InstAlias<"vpclmul"#Hi#Lo#"dq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
(!cast<Instruction>(InstStr # "rm") RC:$dst, RC:$src1, MemOp:$src2,
!add(!shl(!eq(Lo,"hq"),4),!eq(Hi,"hq"))), 0>;
}
@@ -7290,13 +7290,12 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
X86FoldableSchedWrite sched> {
def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
- [(set RC:$dst, (X86cvtph2ps VR128:$src))]>,
+ [(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>,
T8PD, VEX, Sched<[sched]>;
let hasSideEffects = 0, mayLoad = 1 in
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
"vcvtph2ps\t{$src, $dst|$dst, $src}",
- [(set RC:$dst, (X86cvtph2ps (loadv8i16 addr:$src)))]>,
- T8PD, VEX, Sched<[sched.Folded]>;
+ []>, T8PD, VEX, Sched<[sched.Folded]>;
}
multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
@@ -7304,7 +7303,7 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
(ins RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (X86cvtps2ph RC:$src1, timm:$src2))]>,
+ [(set VR128:$dst, (X86any_cvtps2ph RC:$src1, timm:$src2))]>,
TAPD, VEX, Sched<[RR]>;
let hasSideEffects = 0, mayStore = 1 in
def mr : Ii8<0x1D, MRMDestMem, (outs),
@@ -7322,44 +7321,26 @@ let Predicates = [HasF16C, NoVLX] in {
WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC;
// Pattern match vcvtph2ps of a scalar i64 load.
- def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
+ def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(VCVTPH2PSrm addr:$src)>;
- def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16
+ def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16
(v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(VCVTPH2PSrm addr:$src)>;
+ def : Pat<(v8f32 (X86any_cvtph2ps (loadv8i16 addr:$src))),
+ (VCVTPH2PSYrm addr:$src)>;
def : Pat<(store (f64 (extractelt
- (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))),
+ (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))),
(iPTR 0))), addr:$dst),
(VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
def : Pat<(store (i64 (extractelt
- (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))),
+ (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))),
(iPTR 0))), addr:$dst),
(VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
- def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, timm:$src2)), addr:$dst),
+ def : Pat<(store (v8i16 (X86any_cvtps2ph VR256:$src1, timm:$src2)), addr:$dst),
(VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>;
}
-// Patterns for matching conversions from float to half-float and vice versa.
-let Predicates = [HasF16C, NoVLX] in {
- // Use MXCSR.RC for rounding instead of explicitly specifying the default
- // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
- // configurations we support (the default). However, falling back to MXCSR is
- // more consistent with other instructions, which are always controlled by it.
- // It's encoded as 0b100.
- def : Pat<(fp_to_f16 FR32:$src),
- (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (v8i16 (VCVTPS2PHrr
- (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 4))), sub_16bit))>;
-
- def : Pat<(f16_to_fp GR16:$src),
- (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSrr
- (v4i32 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)))), FR32)) >;
-
- def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
- (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSrr
- (v8i16 (VCVTPS2PHrr (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 4)))), FR32)) >;
-}
-
//===----------------------------------------------------------------------===//
// AVX2 Instructions
//===----------------------------------------------------------------------===//
@@ -7415,7 +7396,7 @@ def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
// For insertion into the zero index (low half) of a 256-bit vector, it is
// more efficient to generate a blend with immediate instead of an insert*128.
-// NOTE: We're using FP instructions here, but exeuction domain fixing should
+// NOTE: We're using FP instructions here, but execution domain fixing should
// take care of using integer instructions when profitable.
let Predicates = [HasAVX] in {
def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
@@ -7496,46 +7477,6 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastl
v2i64, v4i64, NoVLX>;
let Predicates = [HasAVX2, NoVLX] in {
- // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
- def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
- (VPBROADCASTQrm addr:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
- (VPBROADCASTQYrm addr:$src)>;
-
- // FIXME this is to handle aligned extloads from i8/i16.
- def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDrm addr:$src)>;
- def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDYrm addr:$src)>;
-}
-let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
- // This means we'll encounter truncated i32 loads; match that here.
- def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWrm addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
- (VPBROADCASTWYrm addr:$src)>;
- def : Pat<(v8i16 (X86VBroadcast
- (i16 (trunc (i32 (extloadi16 addr:$src)))))),
- (VPBROADCASTWrm addr:$src)>;
- def : Pat<(v8i16 (X86VBroadcast
- (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
- (VPBROADCASTWrm addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast
- (i16 (trunc (i32 (extloadi16 addr:$src)))))),
- (VPBROADCASTWYrm addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast
- (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
- (VPBROADCASTWYrm addr:$src)>;
-
- // FIXME this is to handle aligned extloads from i8.
- def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWrm addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWYrm addr:$src)>;
-}
-
-let Predicates = [HasAVX2, NoVLX] in {
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
@@ -7597,10 +7538,6 @@ let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v2f64 (X86VBroadcast v2f64:$src)),
(VMOVDDUPrr VR128:$src)>;
- def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
- (VMOVDDUPrm addr:$src)>;
- def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
- (VMOVDDUPrm addr:$src)>;
}
let Predicates = [HasAVX1Only] in {
@@ -7760,39 +7697,43 @@ let Predicates = [HasAVX2, NoVLX] in {
//
multiclass avx2_pmovmask<string OpcodeStr,
Intrinsic IntLd128, Intrinsic IntLd256,
- Intrinsic IntSt128, Intrinsic IntSt256> {
+ Intrinsic IntSt128, Intrinsic IntSt256,
+ X86SchedWriteMaskMove schedX,
+ X86SchedWriteMaskMove schedY> {
def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>,
- VEX_4V, Sched<[WriteVecMaskedLoad]>;
+ VEX_4V, Sched<[schedX.RM]>;
def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
- VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>;
+ VEX_4V, VEX_L, Sched<[schedY.RM]>;
def mr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>,
- VEX_4V, Sched<[WriteVecMaskedStore]>;
+ VEX_4V, Sched<[schedX.MR]>;
def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
- VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>;
+ VEX_4V, VEX_L, Sched<[schedY.MR]>;
}
defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
int_x86_avx2_maskload_d,
int_x86_avx2_maskload_d_256,
int_x86_avx2_maskstore_d,
- int_x86_avx2_maskstore_d_256>;
+ int_x86_avx2_maskstore_d_256,
+ WriteVecMaskMove32, WriteVecMaskMove32Y>;
defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
int_x86_avx2_maskload_q,
int_x86_avx2_maskload_q_256,
int_x86_avx2_maskstore_q,
- int_x86_avx2_maskstore_q_256>, VEX_W;
+ int_x86_avx2_maskstore_q_256,
+ WriteVecMaskMove64, WriteVecMaskMove64Y>, VEX_W;
multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
ValueType MaskVT> {
@@ -7905,57 +7846,48 @@ let Predicates = [HasAVX2, NoVLX] in {
// FIXME: Improve scheduling of gather instructions.
multiclass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
- ValueType VTy, PatFrag GatherNode128,
- PatFrag GatherNode256, RegisterClass RC256,
+ ValueType VTy, RegisterClass RC256,
X86MemOperand memop128, X86MemOperand memop256,
ValueType MTx = VTx, ValueType MTy = VTy> {
+let mayLoad = 1, hasSideEffects = 0 in {
def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb),
(ins VR128:$src1, memop128:$src2, VR128:$mask),
!strconcat(OpcodeStr,
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
- [(set (VTx VR128:$dst), (MTx VR128:$mask_wb),
- (GatherNode128 VR128:$src1, VR128:$mask,
- vectoraddr:$src2))]>,
- VEX, Sched<[WriteLoad]>;
+ []>, VEX, Sched<[WriteLoad]>;
def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb),
(ins RC256:$src1, memop256:$src2, RC256:$mask),
!strconcat(OpcodeStr,
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
- [(set (VTy RC256:$dst), (MTy RC256:$mask_wb),
- (GatherNode256 RC256:$src1, RC256:$mask,
- vectoraddr:$src2))]>,
- VEX, VEX_L, Sched<[WriteLoad]>;
+ []>, VEX, VEX_L, Sched<[WriteLoad]>;
+}
}
let Predicates = [HasAVX2] in {
let mayLoad = 1, hasSideEffects = 0, Constraints
= "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
in {
- defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64, mgatherv4i32,
- mgatherv4i32, VR256, vx128mem, vx256mem>, VEX_W;
- defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64, mgatherv2i64,
- mgatherv4i64, VR256, vx128mem, vy256mem>, VEX_W;
- defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32, mgatherv4i32,
- mgatherv8i32, VR256, vx128mem, vy256mem>;
- defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32, mgatherv2i64,
- mgatherv4i64, VR128, vx64mem, vy128mem>;
+ defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64,
+ VR256, vx128mem, vx256mem>, VEX_W;
+ defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64,
+ VR256, vx128mem, vy256mem>, VEX_W;
+ defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32,
+ VR256, vx128mem, vy256mem>;
+ defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32,
+ VR128, vx64mem, vy128mem>;
let ExeDomain = SSEPackedDouble in {
- defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64, mgatherv4i32,
- mgatherv4i32, VR256, vx128mem, vx256mem,
- v2i64, v4i64>, VEX_W;
- defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64, mgatherv2i64,
- mgatherv4i64, VR256, vx128mem, vy256mem,
- v2i64, v4i64>, VEX_W;
+ defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64,
+ VR256, vx128mem, vx256mem, v2i64, v4i64>, VEX_W;
+ defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64,
+ VR256, vx128mem, vy256mem, v2i64, v4i64>, VEX_W;
}
let ExeDomain = SSEPackedSingle in {
- defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32, mgatherv4i32,
- mgatherv8i32, VR256, vx128mem, vy256mem,
- v4i32, v8i32>;
- defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32, mgatherv2i64,
- mgatherv4i64, VR128, vx64mem, vy128mem,
- v4i32, v4i32>;
+ defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32,
+ VR256, vx128mem, vy256mem, v4i32, v8i32>;
+ defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32,
+ VR128, vx64mem, vy128mem, v4i32, v4i32>;
}
}
}
@@ -7969,8 +7901,8 @@ multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT,
X86MemOperand X86MemOp, bit Is2Addr = 0> {
let ExeDomain = SSEPackedInt,
AsmString = !if(Is2Addr,
- OpcodeStr##"\t{$src2, $dst|$dst, $src2}",
- OpcodeStr##"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in {
+ OpcodeStr#"\t{$src2, $dst|$dst, $src2}",
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in {
let isCommutable = 1 in
def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "",
[(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>,
@@ -7987,8 +7919,8 @@ multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag,
X86MemOperand X86MemOp, bit Is2Addr = 0> {
let AsmString = !if(Is2Addr,
- OpStr##"\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in {
+ OpStr#"\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ OpStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in {
def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), "",
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))],
@@ -8008,9 +7940,9 @@ multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode,
VR128, load, i128mem, 1>;
let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in {
- defm V##NAME : GF2P8AFFINE_rmi<Op, "v"##OpStr, v16i8, OpNode, VR128,
+ defm V#NAME : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128,
load, i128mem>, VEX_4V, VEX_W;
- defm V##NAME##Y : GF2P8AFFINE_rmi<Op, "v"##OpStr, v32i8, OpNode, VR256,
+ defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256,
load, i256mem>, VEX_4V, VEX_L, VEX_W;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 9d974b716dda..823ff78b9903 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -472,19 +472,19 @@ def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"rol{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 relocImm:$src2)))]>;
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"rol{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 relocImm:$src2)))]>,
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>,
OpSize16;
def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"rol{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 relocImm:$src2)))]>,
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>,
OpSize32;
def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
(ins GR64:$src1, u8imm:$src2),
"rol{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (rotl GR64:$src1, (i8 relocImm:$src2)))]>;
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
// Rotate by 1
def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -570,19 +570,19 @@ def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"ror{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (rotr GR8:$src1, (i8 relocImm:$src2)))]>;
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"ror{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (rotr GR16:$src1, (i8 relocImm:$src2)))]>,
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>,
OpSize16;
def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"ror{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (rotr GR32:$src1, (i8 relocImm:$src2)))]>,
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>,
OpSize32;
def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, u8imm:$src2),
"ror{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (rotr GR64:$src1, (i8 relocImm:$src2)))]>;
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
// Rotate by 1
def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -661,32 +661,32 @@ let Uses = [CL], SchedRW = [WriteSHDrrcl] in {
def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+ [(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2, CL))]>,
TB, OpSize16;
def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+ [(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1, CL))]>,
TB, OpSize16;
def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>,
+ [(set GR32:$dst, (fshl GR32:$src1, GR32:$src2, CL))]>,
TB, OpSize32;
def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>,
+ [(set GR32:$dst, (fshr GR32:$src2, GR32:$src1, CL))]>,
TB, OpSize32;
def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>,
+ [(set GR64:$dst, (fshl GR64:$src1, GR64:$src2, CL))]>,
TB;
def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
+ [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1, CL))]>,
TB;
} // SchedRW
@@ -695,42 +695,42 @@ def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
(outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+ [(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2,
(i8 imm:$src3)))]>,
TB, OpSize16;
def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
(outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, u8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+ [(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1,
(i8 imm:$src3)))]>,
TB, OpSize16;
def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
(outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, u8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+ [(set GR32:$dst, (fshl GR32:$src1, GR32:$src2,
(i8 imm:$src3)))]>,
TB, OpSize32;
def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
(outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, u8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+ [(set GR32:$dst, (fshr GR32:$src2, GR32:$src1,
(i8 imm:$src3)))]>,
TB, OpSize32;
def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
(outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, u8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+ [(set GR64:$dst, (fshl GR64:$src1, GR64:$src2,
(i8 imm:$src3)))]>,
TB;
def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
(outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, u8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+ [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1,
(i8 imm:$src3)))]>,
TB;
} // SchedRW
@@ -739,70 +739,70 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
let Uses = [CL], SchedRW = [WriteSHDmrcl] in {
def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize16;
+ [(store (X86fshl (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize16;
def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize16;
+ [(store (X86fshr GR16:$src2, (loadi16 addr:$dst), CL),
+ addr:$dst)]>, TB, OpSize16;
def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+ [(store (fshl (loadi32 addr:$dst), GR32:$src2, CL),
addr:$dst)]>, TB, OpSize32;
def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
- addr:$dst)]>, TB, OpSize32;
+ [(store (fshr GR32:$src2, (loadi32 addr:$dst), CL),
+ addr:$dst)]>, TB, OpSize32;
def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
+ [(store (fshl (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
- [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
+ [(store (fshr GR64:$src2, (loadi64 addr:$dst), CL),
+ addr:$dst)]>, TB;
} // SchedRW
let SchedRW = [WriteSHDmri] in {
def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (X86fshl (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize16;
def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (X86fshr GR16:$src2, (loadi16 addr:$dst),
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize16;
def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
(outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshl (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize32;
def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshr GR32:$src2, (loadi32 addr:$dst),
+ (i8 imm:$src3)), addr:$dst)]>,
TB, OpSize32;
def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshl (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
TB;
def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
+ [(store (fshr GR64:$src2, (loadi64 addr:$dst),
+ (i8 imm:$src3)), addr:$dst)]>,
TB;
} // SchedRW
@@ -1013,3 +1013,21 @@ let Predicates = [HasBMI2] in {
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
+
+def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
+ (ROL8ri GR8:$src1, relocImm:$src2)>;
+def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
+ (ROL16ri GR16:$src1, relocImm:$src2)>;
+def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
+ (ROL32ri GR32:$src1, relocImm:$src2)>;
+def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
+ (ROL64ri GR64:$src1, relocImm:$src2)>;
+
+def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
+ (ROR8ri GR8:$src1, relocImm:$src2)>;
+def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
+ (ROR16ri GR16:$src1, relocImm:$src2)>;
+def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
+ (ROR32ri GR32:$src1, relocImm:$src2)>;
+def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
+ (ROR64ri GR64:$src1, relocImm:$src2)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
index 7f41feb6c0d9..c23bc7ebbf70 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSystem.td
@@ -23,7 +23,20 @@ def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
let mayLoad = 1, mayStore = 0, hasSideEffects = 1, isTrap = 1 in {
def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
- def UD2B : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB;
+
+ def UD1Wm : I<0xB9, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2),
+ "ud1{w} {$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
+ def UD1Lm : I<0xB9, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2),
+ "ud1{l} {$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
+ def UD1Qm : RI<0xB9, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
+ "ud1{q} {$src2, $src1|$src1, $src2}", []>, TB;
+
+ def UD1Wr : I<0xB9, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "ud1{w} {$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
+ def UD1Lr : I<0xB9, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "ud1{l} {$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
+ def UD1Qr : RI<0xB9, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "ud1{q} {$src2, $src1|$src1, $src2}", []>, TB;
}
def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
@@ -149,12 +162,12 @@ def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
// Segment override instruction prefixes
let SchedRW = [WriteNop] in {
-def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
-def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
-def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
-def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
-def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
-def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
+def CS_PREFIX : I<0x2E, PrefixByte, (outs), (ins), "cs", []>;
+def SS_PREFIX : I<0x36, PrefixByte, (outs), (ins), "ss", []>;
+def DS_PREFIX : I<0x3E, PrefixByte, (outs), (ins), "ds", []>;
+def ES_PREFIX : I<0x26, PrefixByte, (outs), (ins), "es", []>;
+def FS_PREFIX : I<0x64, PrefixByte, (outs), (ins), "fs", []>;
+def GS_PREFIX : I<0x65, PrefixByte, (outs), (ins), "gs", []>;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -512,12 +525,12 @@ let SchedRW = [WriteSystem] in {
let SchedRW = [WriteSystem] in {
let Predicates = [HasXSAVE] in {
let Defs = [EDX, EAX], Uses = [ECX] in
- def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
+ def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, PS;
let Uses = [EDX, EAX, ECX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins),
"xsetbv",
- [(int_x86_xsetbv ECX, EDX, EAX)]>, TB;
+ [(int_x86_xsetbv ECX, EDX, EAX)]>, PS;
} // HasXSAVE
@@ -542,47 +555,47 @@ def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
[(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT, In64BitMode]>;
def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
"xsavec\t$dst",
- [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC]>;
+ [(int_x86_xsavec addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEC]>;
def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
"xsavec64\t$dst",
- [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC, In64BitMode]>;
+ [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEC, In64BitMode]>;
def XSAVES : I<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
"xsaves\t$dst",
- [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
+ [(int_x86_xsaves addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVES]>;
def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
"xsaves64\t$dst",
- [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>;
+ [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
"xrstors\t$dst",
- [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
+ [(int_x86_xrstors addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVES]>;
def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
"xrstors64\t$dst",
- [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES, In64BitMode]>;
+ [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVES, In64BitMode]>;
} // Uses
} // SchedRW
//===----------------------------------------------------------------------===//
// VIA PadLock crypto instructions
let Defs = [RAX, RDI], Uses = [RDX, RDI], SchedRW = [WriteSystem] in
- def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB;
+ def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB, REP;
def : InstAlias<"xstorerng", (XSTORE)>;
let SchedRW = [WriteSystem] in {
let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
- def XCRYPTECB : I<0xa7, MRM_C8, (outs), (ins), "xcryptecb", []>, TB;
- def XCRYPTCBC : I<0xa7, MRM_D0, (outs), (ins), "xcryptcbc", []>, TB;
- def XCRYPTCTR : I<0xa7, MRM_D8, (outs), (ins), "xcryptctr", []>, TB;
- def XCRYPTCFB : I<0xa7, MRM_E0, (outs), (ins), "xcryptcfb", []>, TB;
- def XCRYPTOFB : I<0xa7, MRM_E8, (outs), (ins), "xcryptofb", []>, TB;
+ def XCRYPTECB : I<0xa7, MRM_C8, (outs), (ins), "xcryptecb", []>, TB, REP;
+ def XCRYPTCBC : I<0xa7, MRM_D0, (outs), (ins), "xcryptcbc", []>, TB, REP;
+ def XCRYPTCTR : I<0xa7, MRM_D8, (outs), (ins), "xcryptctr", []>, TB, REP;
+ def XCRYPTCFB : I<0xa7, MRM_E0, (outs), (ins), "xcryptcfb", []>, TB, REP;
+ def XCRYPTOFB : I<0xa7, MRM_E8, (outs), (ins), "xcryptofb", []>, TB, REP;
}
let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
- def XSHA1 : I<0xa6, MRM_C8, (outs), (ins), "xsha1", []>, TB;
- def XSHA256 : I<0xa6, MRM_D0, (outs), (ins), "xsha256", []>, TB;
+ def XSHA1 : I<0xa6, MRM_C8, (outs), (ins), "xsha1", []>, TB, REP;
+ def XSHA256 : I<0xa6, MRM_D0, (outs), (ins), "xsha256", []>, TB, REP;
}
let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
- def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB;
+ def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB, REP;
} // SchedRW
//==-----------------------------------------------------------------------===//
@@ -590,10 +603,10 @@ let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
let SchedRW = [WriteSystem] in {
let Defs = [EAX, EDX], Uses = [ECX] in
def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru",
- [(set EAX, (X86rdpkru ECX)), (implicit EDX)]>, TB;
+ [(set EAX, (X86rdpkru ECX)), (implicit EDX)]>, PS;
let Uses = [EAX, ECX, EDX] in
def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru",
- [(X86wrpkru EAX, EDX, ECX)]>, TB;
+ [(X86wrpkru EAX, EDX, ECX)]>, PS;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -653,15 +666,15 @@ let Predicates = [In64BitMode, HasINVPCID] in {
//===----------------------------------------------------------------------===//
// SMAP Instruction
let Defs = [EFLAGS], SchedRW = [WriteSystem] in {
- def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
- def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
+ def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, PS;
+ def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, PS;
}
//===----------------------------------------------------------------------===//
// SMX Instruction
let SchedRW = [WriteSystem] in {
let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in {
- def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB;
+ def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, PS;
} // Uses, Defs
} // SchedRW
@@ -729,6 +742,6 @@ def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst),
let SchedRW = [WriteSystem] in {
let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX, RDX, EFLAGS] in
- def PCONFIG : I<0x01, MRM_C5, (outs), (ins), "pconfig", []>, TB,
+ def PCONFIG : I<0x01, MRM_C5, (outs), (ins), "pconfig", []>, PS,
Requires<[HasPCONFIG]>;
} // SchedRW
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td
index 41b839425ccd..28563eeb4484 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrTSX.td
@@ -37,11 +37,11 @@ def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>;
}
def XEND : I<0x01, MRM_D5, (outs), (ins),
- "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
+ "xend", [(int_x86_xend)]>, PS, Requires<[HasRTM]>;
let Defs = [EFLAGS] in
def XTEST : I<0x01, MRM_D6, (outs), (ins),
- "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasRTM]>;
+ "xtest", [(set EFLAGS, (X86xtest))]>, PS, Requires<[HasRTM]>;
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm",
@@ -52,8 +52,8 @@ def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
let SchedRW = [WriteSystem] in {
let isAsmParserOnly = 1 in {
-def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>;
-def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>;
+def XACQUIRE_PREFIX : I<0xF2, PrefixByte, (outs), (ins), "xacquire", []>;
+def XRELEASE_PREFIX : I<0xF3, PrefixByte, (outs), (ins), "xrelease", []>;
}
} // SchedRW
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td
index 37bc4ce2e053..d204a33358ea 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrVMX.td
@@ -37,7 +37,7 @@ def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
"vmclear\t$vmcs", []>, PD;
// OF 01 D4
-def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB;
+def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, PS;
// 0F 01 C2
def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td
index 229af366d940..a5976b7d2d74 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrXOP.td
@@ -40,14 +40,14 @@ let ExeDomain = SSEPackedInt in {
// Scalar load 2 addr operand instructions
multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
- Operand memop, ComplexPattern mem_cpat,
+ Operand memop, PatFrags mem_frags,
X86FoldableSchedWrite sched> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (Int mem_cpat:$src))]>, XOP,
+ [(set VR128:$dst, (Int (mem_frags addr:$src)))]>, XOP,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -335,13 +335,13 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, RC:$src2))))]>, XOP_4V,
Sched<[sched]>;
- // FIXME: This pattern can't match.
+ // FIXME: We can't write a pattern for this in tablegen.
+ let hasSideEffects = 0, mayLoad = 1 in
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
- (X86andnp (load addr:$src3), RC:$src2))))]>,
+ []>,
XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
@@ -383,13 +383,13 @@ let Predicates = [HasXOP] in {
(VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(or (and VR128:$src3, VR128:$src1),
- (X86andnp VR128:$src3, (bc_v16i8 (loadv2i64 addr:$src2)))),
+ (X86andnp VR128:$src3, (loadv16i8 addr:$src2))),
(VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(or (and VR128:$src3, VR128:$src1),
- (X86andnp VR128:$src3, (bc_v8i16 (loadv2i64 addr:$src2)))),
+ (X86andnp VR128:$src3, (loadv8i16 addr:$src2))),
(VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(or (and VR128:$src3, VR128:$src1),
- (X86andnp VR128:$src3, (bc_v4i32 (loadv2i64 addr:$src2)))),
+ (X86andnp VR128:$src3, (loadv4i32 addr:$src2))),
(VPCMOVrmr VR128:$src1, addr:$src2, VR128:$src3)>;
def : Pat<(v32i8 (or (and VR256:$src3, VR256:$src1),
@@ -403,13 +403,13 @@ let Predicates = [HasXOP] in {
(VPCMOVYrrr VR256:$src1, VR256:$src2, VR256:$src3)>;
def : Pat<(or (and VR256:$src3, VR256:$src1),
- (X86andnp VR256:$src3, (bc_v32i8 (loadv4i64 addr:$src2)))),
+ (X86andnp VR256:$src3, (loadv32i8 addr:$src2))),
(VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
def : Pat<(or (and VR256:$src3, VR256:$src1),
- (X86andnp VR256:$src3, (bc_v16i16 (loadv4i64 addr:$src2)))),
+ (X86andnp VR256:$src3, (loadv16i16 addr:$src2))),
(VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
def : Pat<(or (and VR256:$src3, VR256:$src1),
- (X86andnp VR256:$src3, (bc_v8i32 (loadv4i64 addr:$src2)))),
+ (X86andnp VR256:$src3, (loadv8i32 addr:$src2))),
(VPCMOVYrmr VR256:$src1, addr:$src2, VR256:$src3)>;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 3f9d626ff912..60fb4d2ef4bf 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/X86BaseInfo.h"
+#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
#include "X86RegisterBankInfo.h"
@@ -71,7 +72,7 @@ private:
// TODO: remove after supported by Tablegen-erated instruction selection.
unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc,
- uint64_t Alignment) const;
+ Align Alignment) const;
bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
@@ -394,7 +395,7 @@ bool X86InstructionSelector::select(MachineInstr &I) {
unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
const RegisterBank &RB,
unsigned Opc,
- uint64_t Alignment) const {
+ Align Alignment) const {
bool Isload = (Opc == TargetOpcode::G_LOAD);
bool HasAVX = STI.hasAVX();
bool HasAVX512 = STI.hasAVX512();
@@ -427,7 +428,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
HasAVX ? X86::VMOVSDmr :
X86::MOVSDmr);
} else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
- if (Alignment >= 16)
+ if (Alignment >= Align(16))
return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
: HasAVX512
? X86::VMOVAPSZ128rm_NOVLX
@@ -446,7 +447,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
? X86::VMOVUPSZ128mr_NOVLX
: HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
} else if (Ty.isVector() && Ty.getSizeInBits() == 256) {
- if (Alignment >= 32)
+ if (Alignment >= Align(32))
return Isload ? (HasVLX ? X86::VMOVAPSZ256rm
: HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
: X86::VMOVAPSYrm)
@@ -461,7 +462,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
: HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
: X86::VMOVUPSYmr);
} else if (Ty.isVector() && Ty.getSizeInBits() == 512) {
- if (Alignment >= 64)
+ if (Alignment >= Align(64))
return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
else
return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
@@ -520,13 +521,13 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n");
return false;
}
- if (MemOp.getAlignment() < Ty.getSizeInBits()/8) {
+ if (MemOp.getAlign() < Ty.getSizeInBits() / 8) {
LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n");
return false;
}
}
- unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment());
+ unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlign());
if (NewOpc == Opc)
return false;
@@ -1435,14 +1436,15 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
const Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
- unsigned Align = DstTy.getSizeInBits();
+ Align Alignment = Align(DstTy.getSizeInBytes());
const DebugLoc &DbgLoc = I.getDebugLoc();
- unsigned Opc = getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Align);
+ unsigned Opc =
+ getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);
// Create the load from the constant pool.
const ConstantFP *CFP = I.getOperand(1).getFPImm();
- unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Align);
+ unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
MachineInstr *LoadInst = nullptr;
unsigned char OpFlag = STI.classifyLocalReference(nullptr);
@@ -1456,7 +1458,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
- MF.getDataLayout().getPointerSize(), Align);
+ MF.getDataLayout().getPointerSize(), Alignment);
LoadInst =
addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index 8f74a8fe041d..a19e12766e10 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -69,7 +69,7 @@ class X86InterleavedAccessGroup {
/// Breaks down a vector \p 'Inst' of N elements into \p NumSubVectors
/// sub vectors of type \p T. Returns the sub-vectors in \p DecomposedVectors.
- void decompose(Instruction *Inst, unsigned NumSubVectors, VectorType *T,
+ void decompose(Instruction *Inst, unsigned NumSubVectors, FixedVectorType *T,
SmallVectorImpl<Instruction *> &DecomposedVectors);
/// Performs matrix transposition on a 4x4 matrix \p InputVectors and
@@ -127,7 +127,7 @@ public:
bool X86InterleavedAccessGroup::isSupported() const {
VectorType *ShuffleVecTy = Shuffles[0]->getType();
- Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType();
+ Type *ShuffleEltTy = ShuffleVecTy->getElementType();
unsigned ShuffleElemSize = DL.getTypeSizeInBits(ShuffleEltTy);
unsigned WideInstSize;
@@ -150,7 +150,7 @@ bool X86InterleavedAccessGroup::isSupported() const {
// We support shuffle represents stride 4 for byte type with size of
// WideInstSize.
if (ShuffleElemSize == 64 && WideInstSize == 1024 && Factor == 4)
- return true;
+ return true;
if (ShuffleElemSize == 8 && isa<StoreInst>(Inst) && Factor == 4 &&
(WideInstSize == 256 || WideInstSize == 512 || WideInstSize == 1024 ||
@@ -165,7 +165,7 @@ bool X86InterleavedAccessGroup::isSupported() const {
}
void X86InterleavedAccessGroup::decompose(
- Instruction *VecInst, unsigned NumSubVectors, VectorType *SubVecTy,
+ Instruction *VecInst, unsigned NumSubVectors, FixedVectorType *SubVecTy,
SmallVectorImpl<Instruction *> &DecomposedVectors) {
assert((isa<LoadInst>(VecInst) || isa<ShuffleVectorInst>(VecInst)) &&
"Expected Load or Shuffle");
@@ -186,8 +186,8 @@ void X86InterleavedAccessGroup::decompose(
DecomposedVectors.push_back(
cast<ShuffleVectorInst>(Builder.CreateShuffleVector(
Op0, Op1,
- createSequentialMask(Builder, Indices[i],
- SubVecTy->getVectorNumElements(), 0))));
+ createSequentialMask(Indices[i], SubVecTy->getNumElements(),
+ 0))));
return;
}
@@ -201,7 +201,7 @@ void X86InterleavedAccessGroup::decompose(
// [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1]
unsigned VecLength = DL.getTypeSizeInBits(VecWidth);
if (VecLength == 768 || VecLength == 1536) {
- VecBaseTy = VectorType::get(Type::getInt8Ty(LI->getContext()), 16);
+ VecBaseTy = FixedVectorType::get(Type::getInt8Ty(LI->getContext()), 16);
VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
NumLoads = NumSubVectors * (VecLength / 384);
@@ -211,13 +211,20 @@ void X86InterleavedAccessGroup::decompose(
VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
}
// Generate N loads of T type.
+ assert(VecBaseTy->getPrimitiveSizeInBits().isByteSized() &&
+ "VecBaseTy's size must be a multiple of 8");
+ const Align FirstAlignment = LI->getAlign();
+ const Align SubsequentAlignment = commonAlignment(
+ FirstAlignment, VecBaseTy->getPrimitiveSizeInBits().getFixedSize() / 8);
+ Align Alignment = FirstAlignment;
for (unsigned i = 0; i < NumLoads; i++) {
// TODO: Support inbounds GEP.
Value *NewBasePtr =
Builder.CreateGEP(VecBaseTy, VecBasePtr, Builder.getInt32(i));
Instruction *NewLoad =
- Builder.CreateAlignedLoad(VecBaseTy, NewBasePtr, LI->getAlignment());
+ Builder.CreateAlignedLoad(VecBaseTy, NewBasePtr, Alignment);
DecomposedVectors.push_back(NewLoad);
+ Alignment = SubsequentAlignment;
}
}
@@ -229,11 +236,11 @@ static MVT scaleVectorType(MVT VT) {
VT.getVectorNumElements() / 2);
}
-static uint32_t Concat[] = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 };
+static constexpr int Concat[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
// genShuffleBland - Creates shuffle according to two vectors.This function is
// only works on instructions with lane inside 256 registers. According to
@@ -251,11 +258,11 @@ static uint32_t Concat[] = {
// By computing the shuffle on a sequence of 16 elements(one lane) and add the
// correct offset. We are creating a vpsuffed + blend sequence between two
// shuffles.
-static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask,
- SmallVectorImpl<uint32_t> &Out, int LowOffset,
- int HighOffset) {
+static void genShuffleBland(MVT VT, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &Out, int LowOffset,
+ int HighOffset) {
assert(VT.getSizeInBits() >= 256 &&
- "This function doesn't accept width smaller then 256");
+ "This function doesn't accept width smaller then 256");
unsigned NumOfElm = VT.getVectorNumElements();
for (unsigned i = 0; i < Mask.size(); i++)
Out.push_back(Mask[i] + LowOffset);
@@ -282,36 +289,35 @@ static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask,
// Invec[2] - |2|5|8|11| TransposedMatrix[2] - |8|9|10|11|
static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
- ArrayRef<Value *> Vec, ArrayRef<uint32_t> VPShuf,
- unsigned VecElems, unsigned Stride,
- IRBuilder<> Builder) {
+ ArrayRef<Value *> Vec, ArrayRef<int> VPShuf,
+ unsigned VecElems, unsigned Stride,
+ IRBuilder<> &Builder) {
if (VecElems == 16) {
for (unsigned i = 0; i < Stride; i++)
TransposedMatrix[i] = Builder.CreateShuffleVector(
- Vec[i], UndefValue::get(Vec[i]->getType()), VPShuf);
+ Vec[i], UndefValue::get(Vec[i]->getType()), VPShuf);
return;
}
- SmallVector<uint32_t, 32> OptimizeShuf;
+ SmallVector<int, 32> OptimizeShuf;
Value *Temp[8];
for (unsigned i = 0; i < (VecElems / 16) * Stride; i += 2) {
genShuffleBland(VT, VPShuf, OptimizeShuf, (i / Stride) * 16,
- (i + 1) / Stride * 16);
+ (i + 1) / Stride * 16);
Temp[i / 2] = Builder.CreateShuffleVector(
- Vec[i % Stride], Vec[(i + 1) % Stride], OptimizeShuf);
+ Vec[i % Stride], Vec[(i + 1) % Stride], OptimizeShuf);
OptimizeShuf.clear();
}
if (VecElems == 32) {
std::copy(Temp, Temp + Stride, TransposedMatrix.begin());
return;
- }
- else
+ } else
for (unsigned i = 0; i < Stride; i++)
TransposedMatrix[i] =
- Builder.CreateShuffleVector(Temp[2 * i], Temp[2 * i + 1], Concat);
+ Builder.CreateShuffleVector(Temp[2 * i], Temp[2 * i + 1], Concat);
}
void X86InterleavedAccessGroup::interleave8bitStride4VF8(
@@ -325,19 +331,19 @@ void X86InterleavedAccessGroup::interleave8bitStride4VF8(
MVT VT = MVT::v8i16;
TransposedMatrix.resize(2);
- SmallVector<uint32_t, 16> MaskLow;
- SmallVector<uint32_t, 32> MaskLowTemp1, MaskLowWord;
- SmallVector<uint32_t, 32> MaskHighTemp1, MaskHighWord;
+ SmallVector<int, 16> MaskLow;
+ SmallVector<int, 32> MaskLowTemp1, MaskLowWord;
+ SmallVector<int, 32> MaskHighTemp1, MaskHighWord;
for (unsigned i = 0; i < 8; ++i) {
MaskLow.push_back(i);
MaskLow.push_back(i + 8);
}
- createUnpackShuffleMask<uint32_t>(VT, MaskLowTemp1, true, false);
- createUnpackShuffleMask<uint32_t>(VT, MaskHighTemp1, false, false);
- scaleShuffleMask<uint32_t>(2, MaskHighTemp1, MaskHighWord);
- scaleShuffleMask<uint32_t>(2, MaskLowTemp1, MaskLowWord);
+ createUnpackShuffleMask(VT, MaskLowTemp1, true, false);
+ createUnpackShuffleMask(VT, MaskHighTemp1, false, false);
+ narrowShuffleMaskElts(2, MaskHighTemp1, MaskHighWord);
+ narrowShuffleMaskElts(2, MaskLowTemp1, MaskLowWord);
// IntrVec1Low = c0 m0 c1 m1 c2 m2 c3 m3 c4 m4 c5 m5 c6 m6 c7 m7
// IntrVec2Low = y0 k0 y1 k1 y2 k2 y3 k3 y4 k4 y5 k5 y6 k6 y7 k7
Value *IntrVec1Low =
@@ -367,25 +373,25 @@ void X86InterleavedAccessGroup::interleave8bitStride4(
MVT HalfVT = scaleVectorType(VT);
TransposedMatrix.resize(4);
- SmallVector<uint32_t, 32> MaskHigh;
- SmallVector<uint32_t, 32> MaskLow;
- SmallVector<uint32_t, 32> LowHighMask[2];
- SmallVector<uint32_t, 32> MaskHighTemp;
- SmallVector<uint32_t, 32> MaskLowTemp;
+ SmallVector<int, 32> MaskHigh;
+ SmallVector<int, 32> MaskLow;
+ SmallVector<int, 32> LowHighMask[2];
+ SmallVector<int, 32> MaskHighTemp;
+ SmallVector<int, 32> MaskLowTemp;
// MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86
// shuffle pattern.
- createUnpackShuffleMask<uint32_t>(VT, MaskLow, true, false);
- createUnpackShuffleMask<uint32_t>(VT, MaskHigh, false, false);
+ createUnpackShuffleMask(VT, MaskLow, true, false);
+ createUnpackShuffleMask(VT, MaskHigh, false, false);
// MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86
// shuffle pattern.
- createUnpackShuffleMask<uint32_t>(HalfVT, MaskLowTemp, true, false);
- createUnpackShuffleMask<uint32_t>(HalfVT, MaskHighTemp, false, false);
- scaleShuffleMask<uint32_t>(2, MaskLowTemp, LowHighMask[0]);
- scaleShuffleMask<uint32_t>(2, MaskHighTemp, LowHighMask[1]);
+ createUnpackShuffleMask(HalfVT, MaskLowTemp, true, false);
+ createUnpackShuffleMask(HalfVT, MaskHighTemp, false, false);
+ narrowShuffleMaskElts(2, MaskLowTemp, LowHighMask[0]);
+ narrowShuffleMaskElts(2, MaskHighTemp, LowHighMask[1]);
// IntrVec1Low = c0 m0 c1 m1 ... c7 m7 | c16 m16 c17 m17 ... c23 m23
// IntrVec1High = c8 m8 c9 m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31
@@ -433,7 +439,7 @@ void X86InterleavedAccessGroup::interleave8bitStride4(
// For example shuffle pattern for VF 16 register size 256 -> lanes = 2
// {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>}
static void createShuffleStride(MVT VT, int Stride,
- SmallVectorImpl<uint32_t> &Mask) {
+ SmallVectorImpl<int> &Mask) {
int VectorSize = VT.getSizeInBits();
int VF = VT.getVectorNumElements();
int LaneCount = std::max(VectorSize / 128, 1);
@@ -446,7 +452,7 @@ static void createShuffleStride(MVT VT, int Stride,
// inside mask a shuffleMask. A mask contains exactly 3 groups, where
// each group is a monotonically increasing sequence with stride 3.
// For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2}
-static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
+static void setGroupSize(MVT VT, SmallVectorImpl<int> &SizeInfo) {
int VectorSize = VT.getSizeInBits();
int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1);
for (int i = 0, FirstGroupElement = 0; i < 3; i++) {
@@ -470,7 +476,7 @@ static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
// direction of the alignment. (false - align to the "right" side while true -
// align to the "left" side)
static void DecodePALIGNRMask(MVT VT, unsigned Imm,
- SmallVectorImpl<uint32_t> &ShuffleMask,
+ SmallVectorImpl<int> &ShuffleMask,
bool AlignDirection = true, bool Unary = false) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1);
@@ -519,7 +525,7 @@ static void DecodePALIGNRMask(MVT VT, unsigned Imm,
// Invec[2] - |8|9|10|11| Vec[2] - |2|5|8|11|
static void concatSubVector(Value **Vec, ArrayRef<Instruction *> InVec,
- unsigned VecElems, IRBuilder<> Builder) {
+ unsigned VecElems, IRBuilder<> &Builder) {
if (VecElems == 16) {
for (int i = 0; i < 3; i++)
Vec[i] = InVec[i];
@@ -547,11 +553,11 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3(
// Matrix[2]= b5 c5 a6 b6 c6 a7 b7 c7
TransposedMatrix.resize(3);
- SmallVector<uint32_t, 32> VPShuf;
- SmallVector<uint32_t, 32> VPAlign[2];
- SmallVector<uint32_t, 32> VPAlign2;
- SmallVector<uint32_t, 32> VPAlign3;
- SmallVector<uint32_t, 3> GroupSize;
+ SmallVector<int, 32> VPShuf;
+ SmallVector<int, 32> VPAlign[2];
+ SmallVector<int, 32> VPAlign2;
+ SmallVector<int, 32> VPAlign3;
+ SmallVector<int, 3> GroupSize;
Value *Vec[6], *TempVector[3];
MVT VT = MVT::getVT(Shuffles[0]->getType());
@@ -605,8 +611,8 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3(
// group2Shuffle reorder the shuffle stride back into continuous order.
// For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} =>
// MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}.
-static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask,
- SmallVectorImpl<uint32_t> &Output) {
+static void group2Shuffle(MVT VT, SmallVectorImpl<int> &Mask,
+ SmallVectorImpl<int> &Output) {
int IndexGroup[3] = {0, 0, 0};
int Index = 0;
int VectorWidth = VT.getSizeInBits();
@@ -633,11 +639,11 @@ void X86InterleavedAccessGroup::interleave8bitStride3(
// Matrix[2]= c0 c1 c2 c3 c3 a7 b7 c7
TransposedMatrix.resize(3);
- SmallVector<uint32_t, 3> GroupSize;
- SmallVector<uint32_t, 32> VPShuf;
- SmallVector<uint32_t, 32> VPAlign[3];
- SmallVector<uint32_t, 32> VPAlign2;
- SmallVector<uint32_t, 32> VPAlign3;
+ SmallVector<int, 3> GroupSize;
+ SmallVector<int, 32> VPShuf;
+ SmallVector<int, 32> VPAlign[3];
+ SmallVector<int, 32> VPAlign2;
+ SmallVector<int, 32> VPAlign3;
Value *Vec[3], *TempVector[3];
MVT VT = MVT::getVectorVT(MVT::i8, VecElems);
@@ -682,7 +688,7 @@ void X86InterleavedAccessGroup::interleave8bitStride3(
unsigned NumOfElm = VT.getVectorNumElements();
group2Shuffle(VT, GroupSize, VPShuf);
- reorderSubVector(VT, TransposedMatrix, Vec, VPShuf, NumOfElm,3, Builder);
+ reorderSubVector(VT, TransposedMatrix, Vec, VPShuf, NumOfElm, 3, Builder);
}
void X86InterleavedAccessGroup::transpose_4x4(
@@ -692,25 +698,25 @@ void X86InterleavedAccessGroup::transpose_4x4(
TransposedMatrix.resize(4);
// dst = src1[0,1],src2[0,1]
- uint32_t IntMask1[] = {0, 1, 4, 5};
- ArrayRef<uint32_t> Mask = makeArrayRef(IntMask1, 4);
+ static constexpr int IntMask1[] = {0, 1, 4, 5};
+ ArrayRef<int> Mask = makeArrayRef(IntMask1, 4);
Value *IntrVec1 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
Value *IntrVec2 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
// dst = src1[2,3],src2[2,3]
- uint32_t IntMask2[] = {2, 3, 6, 7};
+ static constexpr int IntMask2[] = {2, 3, 6, 7};
Mask = makeArrayRef(IntMask2, 4);
Value *IntrVec3 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
Value *IntrVec4 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
// dst = src1[0],src2[0],src1[2],src2[2]
- uint32_t IntMask3[] = {0, 4, 2, 6};
+ static constexpr int IntMask3[] = {0, 4, 2, 6};
Mask = makeArrayRef(IntMask3, 4);
TransposedMatrix[0] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
TransposedMatrix[2] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
// dst = src1[1],src2[1],src1[3],src2[3]
- uint32_t IntMask4[] = {1, 5, 3, 7};
+ static constexpr int IntMask4[] = {1, 5, 3, 7};
Mask = makeArrayRef(IntMask4, 4);
TransposedMatrix[1] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
TransposedMatrix[3] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
@@ -721,14 +727,14 @@ void X86InterleavedAccessGroup::transpose_4x4(
bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
SmallVector<Instruction *, 4> DecomposedVectors;
SmallVector<Value *, 4> TransposedVectors;
- VectorType *ShuffleTy = Shuffles[0]->getType();
+ auto *ShuffleTy = cast<FixedVectorType>(Shuffles[0]->getType());
if (isa<LoadInst>(Inst)) {
// Try to generate target-sized register(/instruction).
decompose(Inst, Factor, ShuffleTy, DecomposedVectors);
- Type *ShuffleEltTy = Inst->getType();
- unsigned NumSubVecElems = ShuffleEltTy->getVectorNumElements() / Factor;
+ auto *ShuffleEltTy = cast<FixedVectorType>(Inst->getType());
+ unsigned NumSubVecElems = ShuffleEltTy->getNumElements() / Factor;
// Perform matrix-transposition in order to compute interleaved
// results by generating some sort of (optimized) target-specific
// instructions.
@@ -756,13 +762,14 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
return true;
}
- Type *ShuffleEltTy = ShuffleTy->getVectorElementType();
- unsigned NumSubVecElems = ShuffleTy->getVectorNumElements() / Factor;
+ Type *ShuffleEltTy = ShuffleTy->getElementType();
+ unsigned NumSubVecElems = ShuffleTy->getNumElements() / Factor;
// Lower the interleaved stores:
// 1. Decompose the interleaved wide shuffle into individual shuffle
// vectors.
- decompose(Shuffles[0], Factor, VectorType::get(ShuffleEltTy, NumSubVecElems),
+ decompose(Shuffles[0], Factor,
+ FixedVectorType::get(ShuffleEltTy, NumSubVecElems),
DecomposedVectors);
// 2. Transpose the interleaved-vectors into vectors of contiguous
@@ -793,8 +800,7 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
// 4. Generate a store instruction for wide-vec.
StoreInst *SI = cast<StoreInst>(Inst);
- Builder.CreateAlignedStore(WideVec, SI->getPointerOperand(),
- SI->getAlignment());
+ Builder.CreateAlignedStore(WideVec, SI->getPointerOperand(), SI->getAlign());
return true;
}
@@ -826,7 +832,8 @@ bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI,
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
- assert(SVI->getType()->getVectorNumElements() % Factor == 0 &&
+ assert(cast<FixedVectorType>(SVI->getType())->getNumElements() % Factor ==
+ 0 &&
"Invalid interleaved store");
// Holds the indices of SVI that correspond to the starting index of each
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 40bf28df3b90..1c10c07abeee 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -679,8 +679,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
- X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK,
- X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, TRUNCATE_TO_REG,
+ X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK,
@@ -783,10 +783,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FSUBS, X86ISD::FSUBS_RND),
X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK,
X86ISD::FSUBS, X86ISD::FSUBS_RND),
- X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTPH2PS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK,
- X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_SAE,
X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_SAE),
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, CVTPS2PH_MASK,
@@ -997,7 +993,16 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0),
X86_INTRINSIC_DATA(bmi_bzhi_64, INTR_TYPE_2OP, X86ISD::BZHI, 0),
+ X86_INTRINSIC_DATA(bmi_pdep_32, INTR_TYPE_2OP, X86ISD::PDEP, 0),
+ X86_INTRINSIC_DATA(bmi_pdep_64, INTR_TYPE_2OP, X86ISD::PDEP, 0),
+ X86_INTRINSIC_DATA(bmi_pext_32, INTR_TYPE_2OP, X86ISD::PEXT, 0),
+ X86_INTRINSIC_DATA(bmi_pext_64, INTR_TYPE_2OP, X86ISD::PEXT, 0),
+ X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
+ X86_INTRINSIC_DATA(sse_cmp_ss, INTR_TYPE_3OP, X86ISD::FSETCC, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse_comigt_ss, COMI, X86ISD::COMI, ISD::SETGT),
@@ -1022,6 +1027,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse_ucomilt_ss, COMI, X86ISD::UCOMI, ISD::SETLT),
X86_INTRINSIC_DATA(sse_ucomineq_ss, COMI, X86ISD::UCOMI, ISD::SETNE),
X86_INTRINSIC_DATA(sse2_cmp_pd, INTR_TYPE_3OP, X86ISD::CMPP, 0),
+ X86_INTRINSIC_DATA(sse2_cmp_sd, INTR_TYPE_3OP, X86ISD::FSETCC, 0),
X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),
@@ -1104,8 +1110,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(subborrow_64, ADX, X86ISD::SBB, X86ISD::SUB),
X86_INTRINSIC_DATA(tbm_bextri_u32, BEXTRI, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(tbm_bextri_u64, BEXTRI, X86ISD::BEXTR, 0),
- X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
- X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
X86_INTRINSIC_DATA(vcvtps2ph_256, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
@@ -1157,10 +1161,8 @@ static const IntrinsicData* getIntrinsicWithoutChain(unsigned IntNo) {
}
static void verifyIntrinsicTables() {
- assert(std::is_sorted(std::begin(IntrinsicsWithoutChain),
- std::end(IntrinsicsWithoutChain)) &&
- std::is_sorted(std::begin(IntrinsicsWithChain),
- std::end(IntrinsicsWithChain)) &&
+ assert(llvm::is_sorted(IntrinsicsWithoutChain) &&
+ llvm::is_sorted(IntrinsicsWithChain) &&
"Intrinsic data tables should be sorted by Intrinsic ID");
assert((std::adjacent_find(std::begin(IntrinsicsWithoutChain),
std::end(IntrinsicsWithoutChain)) ==
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp
index da53d6420021..84f560f2f9ee 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.cpp
@@ -85,14 +85,14 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
verify(*STI.getInstrInfo());
}
-bool X86LegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
+bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
switch (MI.getIntrinsicID()) {
case Intrinsic::memcpy:
case Intrinsic::memset:
case Intrinsic::memmove:
- if (createMemLibcall(MIRBuilder, MRI, MI) ==
+ if (createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI) ==
LegalizerHelper::UnableToLegalize)
return false;
MI.eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.h
index 7a0f13fb5ae6..72d25096d72b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LegalizerInfo.h
@@ -32,8 +32,8 @@ private:
public:
X86LegalizerInfo(const X86Subtarget &STI, const X86TargetMachine &TM);
- bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const override;
+ bool legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const override;
private:
void setLegalizerInfo32bit();
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
index 35fc439998f9..50f8b3477acc 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -822,79 +822,3 @@ INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() {
return new X86LoadValueInjectionLoadHardeningPass();
}
-
-namespace {
-
-/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive
-/// analysis passes that add complexity to the pipeline. This complexity
-/// can cause noticable overhead when no optimizations are enabled, i.e., -O0.
-/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to
-/// provide the same security as the optimized pass, but without adding
-/// unnecessary complexity to the LLVM pipeline.
-///
-/// The behavior of this pass is simply to insert an LFENCE after every load
-/// instruction.
-class X86LoadValueInjectionLoadHardeningUnoptimizedPass
- : public MachineFunctionPass {
-public:
- X86LoadValueInjectionLoadHardeningUnoptimizedPass()
- : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override {
- return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)";
- }
- bool runOnMachineFunction(MachineFunction &MF) override;
- static char ID;
-};
-
-} // end anonymous namespace
-
-char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0;
-
-bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction(
- MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
- << " *****\n");
- const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>();
- if (!STI->useLVILoadHardening())
- return false;
-
- // FIXME: support 32-bit
- if (!STI->is64Bit())
- report_fatal_error("LVI load hardening is only supported on 64-bit", false);
-
- // Don't skip functions with the "optnone" attr but participate in opt-bisect.
- const Function &F = MF.getFunction();
- if (!F.hasOptNone() && skipFunction(F))
- return false;
-
- bool Modified = false;
- ++NumFunctionsConsidered;
-
- const TargetInstrInfo *TII = STI->getInstrInfo();
- for (auto &MBB : MF) {
- for (auto &MI : MBB) {
- if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE ||
- MI.getOpcode() == X86::MFENCE)
- continue;
-
- MachineBasicBlock::iterator InsertionPt =
- MI.getNextNode() ? MI.getNextNode() : MBB.end();
- BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
- ++NumFences;
- Modified = true;
- }
- }
-
- if (Modified)
- ++NumFunctionsMitigated;
-
- return Modified;
-}
-
-INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY,
- "X86 LVI load hardening", false, false)
-
-FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() {
- return new X86LoadValueInjectionLoadHardeningUnoptimizedPass();
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
index f5caaaae4d84..9ce2a4637e2e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -14,11 +14,12 @@
#include "MCTargetDesc/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86InstComments.h"
+#include "MCTargetDesc/X86ShuffleDecode.h"
#include "MCTargetDesc/X86TargetStreamer.h"
-#include "Utils/X86ShuffleDecode.h"
#include "X86AsmPrinter.h"
#include "X86RegisterInfo.h"
#include "X86ShuffleDecodeConstantPool.h"
+#include "X86Subtarget.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/iterator_range.h"
@@ -43,6 +44,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -72,9 +74,30 @@ private:
} // end anonymous namespace
+/// A RAII helper which defines a region of instructions which can't have
+/// padding added between them for correctness.
+struct NoAutoPaddingScope {
+ MCStreamer &OS;
+ const bool OldAllowAutoPadding;
+ NoAutoPaddingScope(MCStreamer &OS)
+ : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
+ changeAndComment(false);
+ }
+ ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
+ void changeAndComment(bool b) {
+ if (b == OS.getAllowAutoPadding())
+ return;
+ OS.setAllowAutoPadding(b);
+ if (b)
+ OS.emitRawComment("autopadding");
+ else
+ OS.emitRawComment("noautopadding");
+ }
+};
+
// Emit a minimal sequence of nops spanning NumBytes bytes.
-static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
- const MCSubtargetInfo &STI);
+static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
+ const X86Subtarget *Subtarget);
void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
const MCSubtargetInfo &STI,
@@ -94,13 +117,13 @@ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
if (InShadow && CurrentShadowSize < RequiredShadowSize) {
InShadow = false;
- EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
- MF->getSubtarget<X86Subtarget>().is64Bit(), STI);
+ emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
+ &MF->getSubtarget<X86Subtarget>());
}
}
void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
- OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
+ OutStreamer->emitInstruction(Inst, getSubtargetInfo());
SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
}
@@ -116,6 +139,10 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
/// operand to an MCSymbol.
MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
+ const Triple &TT = TM.getTargetTriple();
+ if (MO.isGlobal() && TT.isOSBinFormatELF())
+ return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
+
const DataLayout &DL = MF.getDataLayout();
assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
"Isn't a symbol reference");
@@ -272,7 +299,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
// local labels. This is only safe when the symbols are in the same
// section so we are restricting it to jumptable references.
MCSymbol *Label = Ctx.createTempSymbol();
- AsmPrinter.OutStreamer->EmitAssignment(Label, Expr);
+ AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
Expr = MCSymbolRefExpr::create(Label, Ctx);
}
break;
@@ -482,6 +509,26 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
"LEA has segment specified!");
break;
+ case X86::MULX32Hrr:
+ case X86::MULX32Hrm:
+ case X86::MULX64Hrr:
+ case X86::MULX64Hrm: {
+ // Turn into regular MULX by duplicating the destination.
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
+ case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
+ case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
+ case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ // Duplicate the destination.
+ unsigned DestReg = OutMI.getOperand(0).getReg();
+ OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
+ break;
+ }
+
// Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
// if one of the registers is extended, but other isn't.
case X86::VMOVZPQILo2PQIrr:
@@ -929,6 +976,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
const MachineInstr &MI) {
+ NoAutoPaddingScope NoPadScope(*OutStreamer);
bool Is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
MI.getOpcode() == X86::TLS_base_addr64;
MCContext &Ctx = OutStreamer->getContext();
@@ -1034,29 +1082,26 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
/// Return the longest nop which can be efficiently decoded for the given
/// target cpu. 15-bytes is the longest single NOP instruction, but some
/// platforms can't decode the longest forms efficiently.
-static unsigned MaxLongNopLength(const MCSubtargetInfo &STI) {
- uint64_t MaxNopLength = 10;
- if (STI.getFeatureBits()[X86::ProcIntelSLM])
- MaxNopLength = 7;
- else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
- MaxNopLength = 15;
- else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
- MaxNopLength = 11;
- return MaxNopLength;
+static unsigned maxLongNopLength(const X86Subtarget *Subtarget) {
+ if (Subtarget->getFeatureBits()[X86::ProcIntelSLM])
+ return 7;
+ if (Subtarget->getFeatureBits()[X86::FeatureFast15ByteNOP])
+ return 15;
+ if (Subtarget->getFeatureBits()[X86::FeatureFast11ByteNOP])
+ return 11;
+ if (Subtarget->getFeatureBits()[X86::FeatureNOPL] || Subtarget->is64Bit())
+ return 10;
+ if (Subtarget->is32Bit())
+ return 2;
+ return 1;
}
/// Emit the largest nop instruction smaller than or equal to \p NumBytes
/// bytes. Return the size of nop emitted.
-static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
- const MCSubtargetInfo &STI) {
- if (!Is64Bit) {
- // TODO Do additional checking if the CPU supports multi-byte nops.
- OS.EmitInstruction(MCInstBuilder(X86::NOOP), STI);
- return 1;
- }
-
+static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
+ const X86Subtarget *Subtarget) {
// Cap a single nop emission at the profitable value for the target
- NumBytes = std::min(NumBytes, MaxLongNopLength(STI));
+ NumBytes = std::min(NumBytes, maxLongNopLength(Subtarget));
unsigned NopSize;
unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
@@ -1125,25 +1170,26 @@ static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
NopSize += NumPrefixes;
for (unsigned i = 0; i != NumPrefixes; ++i)
- OS.EmitBytes("\x66");
+ OS.emitBytes("\x66");
switch (Opc) {
default: llvm_unreachable("Unexpected opcode");
case X86::NOOP:
- OS.EmitInstruction(MCInstBuilder(Opc), STI);
+ OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
break;
case X86::XCHG16ar:
- OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), STI);
+ OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
+ *Subtarget);
break;
case X86::NOOPL:
case X86::NOOPW:
- OS.EmitInstruction(MCInstBuilder(Opc)
+ OS.emitInstruction(MCInstBuilder(Opc)
.addReg(BaseReg)
.addImm(ScaleVal)
.addReg(IndexReg)
.addImm(Displacement)
.addReg(SegmentReg),
- STI);
+ *Subtarget);
break;
}
assert(NopSize <= NumBytes && "We overemitted?");
@@ -1151,39 +1197,16 @@ static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
}
/// Emit the optimal amount of multi-byte nops on X86.
-static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
- const MCSubtargetInfo &STI) {
+static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
+ const X86Subtarget *Subtarget) {
unsigned NopsToEmit = NumBytes;
(void)NopsToEmit;
while (NumBytes) {
- NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI);
+ NumBytes -= emitNop(OS, NumBytes, Subtarget);
assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
}
}
-/// A RAII helper which defines a region of instructions which can't have
-/// padding added between them for correctness.
-struct NoAutoPaddingScope {
- MCStreamer &OS;
- const bool OldAllowAutoPadding;
- NoAutoPaddingScope(MCStreamer &OS)
- : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
- changeAndComment(false);
- }
- ~NoAutoPaddingScope() {
- changeAndComment(OldAllowAutoPadding);
- }
- void changeAndComment(bool b) {
- if (b == OS.getAllowAutoPadding())
- return;
- OS.setAllowAutoPadding(b);
- if (b)
- OS.emitRawComment("autopadding");
- else
- OS.emitRawComment("noautopadding");
- }
-};
-
void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
X86MCInstLower &MCIL) {
assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
@@ -1192,8 +1215,7 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
StatepointOpers SOpers(&MI);
if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
- EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(),
- getSubtargetInfo());
+ emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
} else {
// Lower call target and choose correct opcode
const MachineOperand &CallTarget = SOpers.getCallTarget();
@@ -1235,14 +1257,14 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
MCInst CallInst;
CallInst.setOpcode(CallOpcode);
CallInst.addOperand(CallTargetMCOp);
- OutStreamer->EmitInstruction(CallInst, getSubtargetInfo());
+ OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
}
// Record our statepoint node in the same section used by STACKMAP
// and PATCHPOINT
auto &Ctx = OutStreamer->getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(MILabel);
+ OutStreamer->emitLabel(MILabel);
SM.recordStatepoint(*MILabel, MI);
}
@@ -1262,7 +1284,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
auto &Ctx = OutStreamer->getContext();
MCSymbol *FaultingLabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(FaultingLabel);
+ OutStreamer->emitLabel(FaultingLabel);
assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
@@ -1280,7 +1302,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
MI.addOperand(MaybeOperand.getValue());
OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
- OutStreamer->EmitInstruction(MI, getSubtargetInfo());
+ OutStreamer->emitInstruction(MI, getSubtargetInfo());
}
void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
@@ -1317,7 +1339,17 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
if (Code.size() < MinSize) {
- if (MinSize == 2 && Opcode == X86::PUSH64r) {
+ if (MinSize == 2 && Subtarget->is32Bit() &&
+ Subtarget->isTargetWindowsMSVC() &&
+ (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
+ // For compatibilty reasons, when targetting MSVC, is is important to
+ // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
+ // rely specifically on this pattern to be able to patch a function.
+ // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
+ *Subtarget);
+ } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
// This is an optimization that lets us get away without emitting a nop in
// many cases.
//
@@ -1325,14 +1357,13 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
// bytes too, so the check on MinSize is important.
MCI.setOpcode(X86::PUSH64rmr);
} else {
- unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(),
- getSubtargetInfo());
+ unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
assert(NopSize == MinSize && "Could not implement MinSize!");
(void)NopSize;
}
}
- OutStreamer->EmitInstruction(MCI, getSubtargetInfo());
+ OutStreamer->emitInstruction(MCI, getSubtargetInfo());
}
// Lower a stackmap of the form:
@@ -1342,7 +1373,7 @@ void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
auto &Ctx = OutStreamer->getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(MILabel);
+ OutStreamer->emitLabel(MILabel);
SM.recordStackMap(*MILabel, MI);
unsigned NumShadowBytes = MI.getOperand(1).getImm();
@@ -1361,7 +1392,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
auto &Ctx = OutStreamer->getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(MILabel);
+ OutStreamer->emitLabel(MILabel);
SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers opers(&MI);
@@ -1410,8 +1441,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
assert(NumBytes >= EncodedBytes &&
"Patchpoint can't request size less than the length of a call.");
- EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(),
- getSubtargetInfo());
+ emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
}
void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
@@ -1442,13 +1472,13 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
// First we emit the label and the jump.
auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
OutStreamer->AddComment("# XRay Custom Event Log");
- OutStreamer->EmitCodeAlignment(2);
- OutStreamer->EmitLabel(CurSled);
+ OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitLabel(CurSled);
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
// an operand (computed as an offset from the jmp instruction).
// FIXME: Find another less hacky way do force the relative jump.
- OutStreamer->EmitBinaryData("\xeb\x0f");
+ OutStreamer->emitBinaryData("\xeb\x0f");
// The default C calling convention will place two arguments into %rcx and
// %rdx -- so we only work with those.
@@ -1471,7 +1501,7 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
EmitAndCountInstruction(
MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
} else {
- EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo());
+ emitX86Nops(*OutStreamer, 4, Subtarget);
}
}
@@ -1500,14 +1530,14 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
if (UsedMask[I])
EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
else
- EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo());
+ emitX86Nops(*OutStreamer, 1, Subtarget);
OutStreamer->AddComment("xray custom event end.");
- // Record the sled version. Older versions of this sled were spelled
- // differently, so we let the runtime handle the different offsets we're
- // using.
- recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1);
+ // Record the sled version. Version 0 of this sled was spelled differently, so
+ // we let the runtime handle the different offsets we're using. Version 2
+ // changed the absolute address to a PC-relative address.
+ recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
}
void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
@@ -1538,13 +1568,13 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
// First we emit the label and the jump.
auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
OutStreamer->AddComment("# XRay Typed Event Log");
- OutStreamer->EmitCodeAlignment(2);
- OutStreamer->EmitLabel(CurSled);
+ OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitLabel(CurSled);
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
// an operand (computed as an offset from the jmp instruction).
// FIXME: Find another less hacky way do force the relative jump.
- OutStreamer->EmitBinaryData("\xeb\x14");
+ OutStreamer->emitBinaryData("\xeb\x14");
// An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
// so we'll work with those. Or we may be called via SystemV, in which case
@@ -1569,7 +1599,7 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
EmitAndCountInstruction(
MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
} else {
- EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo());
+ emitX86Nops(*OutStreamer, 4, Subtarget);
}
}
@@ -1603,12 +1633,12 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
if (UsedMask[I])
EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
else
- EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo());
+ emitX86Nops(*OutStreamer, 1, Subtarget);
OutStreamer->AddComment("xray typed event end.");
// Record the sled version.
- recordSled(CurSled, MI, SledKind::TYPED_EVENT, 0);
+ recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
}
void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
@@ -1623,7 +1653,7 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
.getValueAsString()
.getAsInteger(10, Num))
return;
- EmitNops(*OutStreamer, Num, Subtarget->is64Bit(), getSubtargetInfo());
+ emitX86Nops(*OutStreamer, Num, Subtarget);
return;
}
// We want to emit the following pattern:
@@ -1640,15 +1670,15 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
// call <relative offset, 32-bits> // 5 bytes
//
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->EmitCodeAlignment(2);
- OutStreamer->EmitLabel(CurSled);
+ OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitLabel(CurSled);
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
// an operand (computed as an offset from the jmp instruction).
// FIXME: Find another less hacky way do force the relative jump.
- OutStreamer->EmitBytes("\xeb\x09");
- EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
- recordSled(CurSled, MI, SledKind::FUNCTION_ENTER);
+ OutStreamer->emitBytes("\xeb\x09");
+ emitX86Nops(*OutStreamer, 9, Subtarget);
+ recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
}
void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
@@ -1670,17 +1700,17 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
//
// This just makes sure that the alignment for the next instruction is 2.
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->EmitCodeAlignment(2);
- OutStreamer->EmitLabel(CurSled);
+ OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitLabel(CurSled);
unsigned OpCode = MI.getOperand(0).getImm();
MCInst Ret;
Ret.setOpcode(OpCode);
for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
Ret.addOperand(MaybeOperand.getValue());
- OutStreamer->EmitInstruction(Ret, getSubtargetInfo());
- EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo());
- recordSled(CurSled, MI, SledKind::FUNCTION_EXIT);
+ OutStreamer->emitInstruction(Ret, getSubtargetInfo());
+ emitX86Nops(*OutStreamer, 10, Subtarget);
+ recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
}
void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
@@ -1694,17 +1724,17 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
// the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
// tail call much like how we have it in PATCHABLE_RET.
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->EmitCodeAlignment(2);
- OutStreamer->EmitLabel(CurSled);
+ OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
// an operand (computed as an offset from the jmp instruction).
// FIXME: Find another less hacky way do force the relative jump.
- OutStreamer->EmitBytes("\xeb\x09");
- EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
- OutStreamer->EmitLabel(Target);
- recordSled(CurSled, MI, SledKind::TAIL_CALL);
+ OutStreamer->emitBytes("\xeb\x09");
+ emitX86Nops(*OutStreamer, 9, Subtarget);
+ OutStreamer->emitLabel(Target);
+ recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
unsigned OpCode = MI.getOperand(0).getImm();
OpCode = convertTailJumpOpcode(OpCode);
@@ -1717,7 +1747,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
TC.addOperand(MaybeOperand.getValue());
- OutStreamer->EmitInstruction(TC, getSubtargetInfo());
+ OutStreamer->emitInstruction(TC, getSubtargetInfo());
}
// Returns instruction preceding MBBI in MachineFunction.
@@ -1961,300 +1991,9 @@ static unsigned getRegisterWidth(const MCOperandInfo &Info) {
llvm_unreachable("Unknown register class!");
}
-void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
- X86MCInstLower MCInstLowering(*MF, *this);
- const X86RegisterInfo *RI =
- MF->getSubtarget<X86Subtarget>().getRegisterInfo();
-
- // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
- // are compressed from EVEX encoding to VEX encoding.
- if (TM.Options.MCOptions.ShowMCEncoding) {
- if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
- OutStreamer->AddComment("EVEX TO VEX Compression ", false);
- }
-
+static void addConstantComments(const MachineInstr *MI,
+ MCStreamer &OutStreamer) {
switch (MI->getOpcode()) {
- case TargetOpcode::DBG_VALUE:
- llvm_unreachable("Should be handled target independently");
-
- // Emit nothing here but a comment if we can.
- case X86::Int_MemBarrier:
- OutStreamer->emitRawComment("MEMBARRIER");
- return;
-
- case X86::EH_RETURN:
- case X86::EH_RETURN64: {
- // Lower these as normal, but add some comments.
- Register Reg = MI->getOperand(0).getReg();
- OutStreamer->AddComment(StringRef("eh_return, addr: %") +
- X86ATTInstPrinter::getRegisterName(Reg));
- break;
- }
- case X86::CLEANUPRET: {
- // Lower these as normal, but add some comments.
- OutStreamer->AddComment("CLEANUPRET");
- break;
- }
-
- case X86::CATCHRET: {
- // Lower these as normal, but add some comments.
- OutStreamer->AddComment("CATCHRET");
- break;
- }
-
- case X86::ENDBR32:
- case X86::ENDBR64: {
- // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
- // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
- // non-empty. If MI is the initial ENDBR, place the
- // __patchable_function_entries label after ENDBR.
- if (CurrentPatchableFunctionEntrySym &&
- CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
- MI == &MF->front().front()) {
- MCInst Inst;
- MCInstLowering.Lower(MI, Inst);
- EmitAndCountInstruction(Inst);
- CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
- OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym);
- return;
- }
- break;
- }
-
- case X86::TAILJMPr:
- case X86::TAILJMPm:
- case X86::TAILJMPd:
- case X86::TAILJMPd_CC:
- case X86::TAILJMPr64:
- case X86::TAILJMPm64:
- case X86::TAILJMPd64:
- case X86::TAILJMPd64_CC:
- case X86::TAILJMPr64_REX:
- case X86::TAILJMPm64_REX:
- // Lower these as normal, but add some comments.
- OutStreamer->AddComment("TAILCALL");
- break;
-
- case X86::TLS_addr32:
- case X86::TLS_addr64:
- case X86::TLS_base_addr32:
- case X86::TLS_base_addr64:
- return LowerTlsAddr(MCInstLowering, *MI);
-
- // Loading/storing mask pairs requires two kmov operations. The second one of these
- // needs a 2 byte displacement relative to the specified address (with 32 bit spill
- // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size,
- // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD.
- //
- // The displacement value might wrap around in theory, thus the asserts in both
- // cases.
- case X86::MASKPAIR16LOAD: {
- int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm();
- assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
- Register Reg = MI->getOperand(0).getReg();
- Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
- Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
-
- // Load the first mask register
- MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm);
- MIB.addReg(Reg0);
- for (int i = 0; i < X86::AddrNumOperands; ++i) {
- auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
- MIB.addOperand(Op.getValue());
- }
- EmitAndCountInstruction(MIB);
-
- // Load the second mask register of the pair
- MIB = MCInstBuilder(X86::KMOVWkm);
- MIB.addReg(Reg1);
- for (int i = 0; i < X86::AddrNumOperands; ++i) {
- if (i == X86::AddrDisp) {
- MIB.addImm(Disp + 2);
- } else {
- auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
- MIB.addOperand(Op.getValue());
- }
- }
- EmitAndCountInstruction(MIB);
- return;
- }
-
- case X86::MASKPAIR16STORE: {
- int64_t Disp = MI->getOperand(X86::AddrDisp).getImm();
- assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
- Register Reg = MI->getOperand(X86::AddrNumOperands).getReg();
- Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
- Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
-
- // Store the first mask register
- MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk);
- for (int i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue());
- MIB.addReg(Reg0);
- EmitAndCountInstruction(MIB);
-
- // Store the second mask register of the pair
- MIB = MCInstBuilder(X86::KMOVWmk);
- for (int i = 0; i < X86::AddrNumOperands; ++i) {
- if (i == X86::AddrDisp) {
- MIB.addImm(Disp + 2);
- } else {
- auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i));
- MIB.addOperand(Op.getValue());
- }
- }
- MIB.addReg(Reg1);
- EmitAndCountInstruction(MIB);
- return;
- }
-
- case X86::MOVPC32r: {
- // This is a pseudo op for a two instruction sequence with a label, which
- // looks like:
- // call "L1$pb"
- // "L1$pb":
- // popl %esi
-
- // Emit the call.
- MCSymbol *PICBase = MF->getPICBaseSymbol();
- // FIXME: We would like an efficient form for this, so we don't have to do a
- // lot of extra uniquing.
- EmitAndCountInstruction(
- MCInstBuilder(X86::CALLpcrel32)
- .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
-
- const X86FrameLowering *FrameLowering =
- MF->getSubtarget<X86Subtarget>().getFrameLowering();
- bool hasFP = FrameLowering->hasFP(*MF);
-
- // TODO: This is needed only if we require precise CFA.
- bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
- !OutStreamer->getDwarfFrameInfos().back().End;
-
- int stackGrowth = -RI->getSlotSize();
-
- if (HasActiveDwarfFrame && !hasFP) {
- OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth);
- }
-
- // Emit the label.
- OutStreamer->EmitLabel(PICBase);
-
- // popl $reg
- EmitAndCountInstruction(
- MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
-
- if (HasActiveDwarfFrame && !hasFP) {
- OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth);
- }
- return;
- }
-
- case X86::ADD32ri: {
- // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
- if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
- break;
-
- // Okay, we have something like:
- // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
-
- // For this, we want to print something like:
- // MYGLOBAL + (. - PICBASE)
- // However, we can't generate a ".", so just emit a new label here and refer
- // to it.
- MCSymbol *DotSym = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(DotSym);
-
- // Now that we have emitted the label, lower the complex operand expression.
- MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
-
- const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
- const MCExpr *PICBase =
- MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
- DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
-
- DotExpr = MCBinaryExpr::createAdd(
- MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
-
- EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(DotExpr));
- return;
- }
- case TargetOpcode::STATEPOINT:
- return LowerSTATEPOINT(*MI, MCInstLowering);
-
- case TargetOpcode::FAULTING_OP:
- return LowerFAULTING_OP(*MI, MCInstLowering);
-
- case TargetOpcode::FENTRY_CALL:
- return LowerFENTRY_CALL(*MI, MCInstLowering);
-
- case TargetOpcode::PATCHABLE_OP:
- return LowerPATCHABLE_OP(*MI, MCInstLowering);
-
- case TargetOpcode::STACKMAP:
- return LowerSTACKMAP(*MI);
-
- case TargetOpcode::PATCHPOINT:
- return LowerPATCHPOINT(*MI, MCInstLowering);
-
- case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
- return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
-
- case TargetOpcode::PATCHABLE_RET:
- return LowerPATCHABLE_RET(*MI, MCInstLowering);
-
- case TargetOpcode::PATCHABLE_TAIL_CALL:
- return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
-
- case TargetOpcode::PATCHABLE_EVENT_CALL:
- return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
-
- case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
- return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
-
- case X86::MORESTACK_RET:
- EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
- return;
-
- case X86::MORESTACK_RET_RESTORE_R10:
- // Return, then restore R10.
- EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
- EmitAndCountInstruction(
- MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
- return;
-
- case X86::SEH_PushReg:
- case X86::SEH_SaveReg:
- case X86::SEH_SaveXMM:
- case X86::SEH_StackAlloc:
- case X86::SEH_StackAlign:
- case X86::SEH_SetFrame:
- case X86::SEH_PushFrame:
- case X86::SEH_EndPrologue:
- EmitSEHInstruction(MI);
- return;
-
- case X86::SEH_Epilogue: {
- assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
- MachineBasicBlock::const_iterator MBBI(MI);
- // Check if preceded by a call and emit nop if so.
- for (MBBI = PrevCrossBBInst(MBBI);
- MBBI != MachineBasicBlock::const_iterator();
- MBBI = PrevCrossBBInst(MBBI)) {
- // Conservatively assume that pseudo instructions don't emit code and keep
- // looking for a call. We may emit an unnecessary nop in some cases.
- if (!MBBI->isPseudo()) {
- if (MBBI->isCall())
- EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
- break;
- }
- }
- return;
- }
-
// Lower PSHUFB and VPERMILP normally but add a comment if we can find
// a constant shuffle mask. We won't be able to do this at the MC layer
// because the mask isn't an immediate.
@@ -2270,30 +2009,19 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::VPSHUFBZrm:
case X86::VPSHUFBZrmk:
case X86::VPSHUFBZrmkz: {
- if (!OutStreamer->isVerboseAsm())
- break;
- unsigned SrcIdx, MaskIdx;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::PSHUFBrm:
- case X86::VPSHUFBrm:
- case X86::VPSHUFBYrm:
- case X86::VPSHUFBZ128rm:
- case X86::VPSHUFBZ256rm:
- case X86::VPSHUFBZrm:
- SrcIdx = 1; MaskIdx = 5; break;
- case X86::VPSHUFBZ128rmkz:
- case X86::VPSHUFBZ256rmkz:
- case X86::VPSHUFBZrmkz:
- SrcIdx = 2; MaskIdx = 6; break;
- case X86::VPSHUFBZ128rmk:
- case X86::VPSHUFBZ256rmk:
- case X86::VPSHUFBZrmk:
- SrcIdx = 3; MaskIdx = 7; break;
+ unsigned SrcIdx = 1;
+ if (X86II::isKMasked(MI->getDesc().TSFlags)) {
+ // Skip mask operand.
+ ++SrcIdx;
+ if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
+ // Skip passthru operand.
+ ++SrcIdx;
+ }
}
+ unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
- assert(MI->getNumOperands() >= 6 &&
- "We should always have at least 6 operands!");
+ assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
@@ -2301,7 +2029,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 64> Mask;
DecodePSHUFBMask(C, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
+ OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
}
break;
}
@@ -2328,9 +2056,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::VPERMILPDZrm:
case X86::VPERMILPDZrmk:
case X86::VPERMILPDZrmkz: {
- if (!OutStreamer->isVerboseAsm())
- break;
- unsigned SrcIdx, MaskIdx;
unsigned ElSize;
switch (MI->getOpcode()) {
default: llvm_unreachable("Invalid opcode");
@@ -2339,33 +2064,42 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::VPERMILPSZ128rm:
case X86::VPERMILPSZ256rm:
case X86::VPERMILPSZrm:
- SrcIdx = 1; MaskIdx = 5; ElSize = 32; break;
case X86::VPERMILPSZ128rmkz:
case X86::VPERMILPSZ256rmkz:
case X86::VPERMILPSZrmkz:
- SrcIdx = 2; MaskIdx = 6; ElSize = 32; break;
case X86::VPERMILPSZ128rmk:
case X86::VPERMILPSZ256rmk:
case X86::VPERMILPSZrmk:
- SrcIdx = 3; MaskIdx = 7; ElSize = 32; break;
+ ElSize = 32;
+ break;
case X86::VPERMILPDrm:
case X86::VPERMILPDYrm:
case X86::VPERMILPDZ128rm:
case X86::VPERMILPDZ256rm:
case X86::VPERMILPDZrm:
- SrcIdx = 1; MaskIdx = 5; ElSize = 64; break;
case X86::VPERMILPDZ128rmkz:
case X86::VPERMILPDZ256rmkz:
case X86::VPERMILPDZrmkz:
- SrcIdx = 2; MaskIdx = 6; ElSize = 64; break;
case X86::VPERMILPDZ128rmk:
case X86::VPERMILPDZ256rmk:
case X86::VPERMILPDZrmk:
- SrcIdx = 3; MaskIdx = 7; ElSize = 64; break;
+ ElSize = 64;
+ break;
}
- assert(MI->getNumOperands() >= 6 &&
- "We should always have at least 6 operands!");
+ unsigned SrcIdx = 1;
+ if (X86II::isKMasked(MI->getDesc().TSFlags)) {
+ // Skip mask operand.
+ ++SrcIdx;
+ if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
+ // Skip passthru operand.
+ ++SrcIdx;
+ }
+ }
+ unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
+
+ assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
@@ -2373,7 +2107,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPERMILPMask(C, ElSize, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
+ OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
}
break;
}
@@ -2382,10 +2116,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::VPERMIL2PSrm:
case X86::VPERMIL2PDYrm:
case X86::VPERMIL2PSYrm: {
- if (!OutStreamer->isVerboseAsm())
- break;
- assert(MI->getNumOperands() >= 8 &&
- "We should always have at least 8 operands!");
+ assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
+ "Unexpected number of operands!");
const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
if (!CtrlOp.isImm())
@@ -2398,47 +2130,43 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
}
- const MachineOperand &MaskOp = MI->getOperand(6);
+ const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
SmallVector<int, 16> Mask;
DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
+ OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
}
break;
}
case X86::VPPERMrrm: {
- if (!OutStreamer->isVerboseAsm())
- break;
- assert(MI->getNumOperands() >= 7 &&
- "We should always have at least 7 operands!");
+ assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
- const MachineOperand &MaskOp = MI->getOperand(6);
+ const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
SmallVector<int, 16> Mask;
DecodeVPPERMMask(C, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
+ OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
}
break;
}
case X86::MMX_MOVQ64rm: {
- if (!OutStreamer->isVerboseAsm())
- break;
- if (MI->getNumOperands() <= 4)
- break;
- if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+ assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
+ if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
std::string Comment;
raw_string_ostream CS(Comment);
const MachineOperand &DstOp = MI->getOperand(0);
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
if (auto *CF = dyn_cast<ConstantFP>(C)) {
CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
- OutStreamer->AddComment(CS.str());
+ OutStreamer.AddComment(CS.str());
}
}
break;
@@ -2489,11 +2217,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::VBROADCASTI64X2Z128rm:
case X86::VBROADCASTI64X2rm:
case X86::VBROADCASTI64X4rm:
- if (!OutStreamer->isVerboseAsm())
- break;
- if (MI->getNumOperands() <= 4)
- break;
- if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+ assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
+ if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
int NumLanes = 1;
// Override NumLanes for the broadcast instructions.
switch (MI->getOpcode()) {
@@ -2535,7 +2261,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
CS << "]";
- OutStreamer->AddComment(CS.str());
+ OutStreamer.AddComment(CS.str());
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
CS << "<";
for (int l = 0; l != NumLanes; ++l) {
@@ -2547,80 +2273,79 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
CS << ">";
- OutStreamer->AddComment(CS.str());
+ OutStreamer.AddComment(CS.str());
}
}
break;
+
case X86::MOVDDUPrm:
case X86::VMOVDDUPrm:
case X86::VMOVDDUPZ128rm:
case X86::VBROADCASTSSrm:
case X86::VBROADCASTSSYrm:
- case X86::VBROADCASTSSZ128m:
- case X86::VBROADCASTSSZ256m:
- case X86::VBROADCASTSSZm:
+ case X86::VBROADCASTSSZ128rm:
+ case X86::VBROADCASTSSZ256rm:
+ case X86::VBROADCASTSSZrm:
case X86::VBROADCASTSDYrm:
- case X86::VBROADCASTSDZ256m:
- case X86::VBROADCASTSDZm:
+ case X86::VBROADCASTSDZ256rm:
+ case X86::VBROADCASTSDZrm:
case X86::VPBROADCASTBrm:
case X86::VPBROADCASTBYrm:
- case X86::VPBROADCASTBZ128m:
- case X86::VPBROADCASTBZ256m:
- case X86::VPBROADCASTBZm:
+ case X86::VPBROADCASTBZ128rm:
+ case X86::VPBROADCASTBZ256rm:
+ case X86::VPBROADCASTBZrm:
case X86::VPBROADCASTDrm:
case X86::VPBROADCASTDYrm:
- case X86::VPBROADCASTDZ128m:
- case X86::VPBROADCASTDZ256m:
- case X86::VPBROADCASTDZm:
+ case X86::VPBROADCASTDZ128rm:
+ case X86::VPBROADCASTDZ256rm:
+ case X86::VPBROADCASTDZrm:
case X86::VPBROADCASTQrm:
case X86::VPBROADCASTQYrm:
- case X86::VPBROADCASTQZ128m:
- case X86::VPBROADCASTQZ256m:
- case X86::VPBROADCASTQZm:
+ case X86::VPBROADCASTQZ128rm:
+ case X86::VPBROADCASTQZ256rm:
+ case X86::VPBROADCASTQZrm:
case X86::VPBROADCASTWrm:
case X86::VPBROADCASTWYrm:
- case X86::VPBROADCASTWZ128m:
- case X86::VPBROADCASTWZ256m:
- case X86::VPBROADCASTWZm:
- if (!OutStreamer->isVerboseAsm())
- break;
- if (MI->getNumOperands() <= 4)
- break;
- if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+ case X86::VPBROADCASTWZ128rm:
+ case X86::VPBROADCASTWZ256rm:
+ case X86::VPBROADCASTWZrm:
+ assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
+ if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
int NumElts;
switch (MI->getOpcode()) {
default: llvm_unreachable("Invalid opcode");
- case X86::MOVDDUPrm: NumElts = 2; break;
- case X86::VMOVDDUPrm: NumElts = 2; break;
- case X86::VMOVDDUPZ128rm: NumElts = 2; break;
- case X86::VBROADCASTSSrm: NumElts = 4; break;
- case X86::VBROADCASTSSYrm: NumElts = 8; break;
- case X86::VBROADCASTSSZ128m: NumElts = 4; break;
- case X86::VBROADCASTSSZ256m: NumElts = 8; break;
- case X86::VBROADCASTSSZm: NumElts = 16; break;
- case X86::VBROADCASTSDYrm: NumElts = 4; break;
- case X86::VBROADCASTSDZ256m: NumElts = 4; break;
- case X86::VBROADCASTSDZm: NumElts = 8; break;
- case X86::VPBROADCASTBrm: NumElts = 16; break;
- case X86::VPBROADCASTBYrm: NumElts = 32; break;
- case X86::VPBROADCASTBZ128m: NumElts = 16; break;
- case X86::VPBROADCASTBZ256m: NumElts = 32; break;
- case X86::VPBROADCASTBZm: NumElts = 64; break;
- case X86::VPBROADCASTDrm: NumElts = 4; break;
- case X86::VPBROADCASTDYrm: NumElts = 8; break;
- case X86::VPBROADCASTDZ128m: NumElts = 4; break;
- case X86::VPBROADCASTDZ256m: NumElts = 8; break;
- case X86::VPBROADCASTDZm: NumElts = 16; break;
- case X86::VPBROADCASTQrm: NumElts = 2; break;
- case X86::VPBROADCASTQYrm: NumElts = 4; break;
- case X86::VPBROADCASTQZ128m: NumElts = 2; break;
- case X86::VPBROADCASTQZ256m: NumElts = 4; break;
- case X86::VPBROADCASTQZm: NumElts = 8; break;
- case X86::VPBROADCASTWrm: NumElts = 8; break;
- case X86::VPBROADCASTWYrm: NumElts = 16; break;
- case X86::VPBROADCASTWZ128m: NumElts = 8; break;
- case X86::VPBROADCASTWZ256m: NumElts = 16; break;
- case X86::VPBROADCASTWZm: NumElts = 32; break;
+ case X86::MOVDDUPrm: NumElts = 2; break;
+ case X86::VMOVDDUPrm: NumElts = 2; break;
+ case X86::VMOVDDUPZ128rm: NumElts = 2; break;
+ case X86::VBROADCASTSSrm: NumElts = 4; break;
+ case X86::VBROADCASTSSYrm: NumElts = 8; break;
+ case X86::VBROADCASTSSZ128rm: NumElts = 4; break;
+ case X86::VBROADCASTSSZ256rm: NumElts = 8; break;
+ case X86::VBROADCASTSSZrm: NumElts = 16; break;
+ case X86::VBROADCASTSDYrm: NumElts = 4; break;
+ case X86::VBROADCASTSDZ256rm: NumElts = 4; break;
+ case X86::VBROADCASTSDZrm: NumElts = 8; break;
+ case X86::VPBROADCASTBrm: NumElts = 16; break;
+ case X86::VPBROADCASTBYrm: NumElts = 32; break;
+ case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
+ case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
+ case X86::VPBROADCASTBZrm: NumElts = 64; break;
+ case X86::VPBROADCASTDrm: NumElts = 4; break;
+ case X86::VPBROADCASTDYrm: NumElts = 8; break;
+ case X86::VPBROADCASTDZ128rm: NumElts = 4; break;
+ case X86::VPBROADCASTDZ256rm: NumElts = 8; break;
+ case X86::VPBROADCASTDZrm: NumElts = 16; break;
+ case X86::VPBROADCASTQrm: NumElts = 2; break;
+ case X86::VPBROADCASTQYrm: NumElts = 4; break;
+ case X86::VPBROADCASTQZ128rm: NumElts = 2; break;
+ case X86::VPBROADCASTQZ256rm: NumElts = 4; break;
+ case X86::VPBROADCASTQZrm: NumElts = 8; break;
+ case X86::VPBROADCASTWrm: NumElts = 8; break;
+ case X86::VPBROADCASTWYrm: NumElts = 16; break;
+ case X86::VPBROADCASTWZ128rm: NumElts = 8; break;
+ case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
+ case X86::VPBROADCASTWZrm: NumElts = 32; break;
}
std::string Comment;
@@ -2634,8 +2359,241 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
printConstant(C, CS);
}
CS << "]";
- OutStreamer->AddComment(CS.str());
+ OutStreamer.AddComment(CS.str());
+ }
+ }
+}
+
+void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
+ X86MCInstLower MCInstLowering(*MF, *this);
+ const X86RegisterInfo *RI =
+ MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+
+ // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
+ // are compressed from EVEX encoding to VEX encoding.
+ if (TM.Options.MCOptions.ShowMCEncoding) {
+ if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
+ OutStreamer->AddComment("EVEX TO VEX Compression ", false);
+ }
+
+ // Add comments for values loaded from constant pool.
+ if (OutStreamer->isVerboseAsm())
+ addConstantComments(MI, *OutStreamer);
+
+ switch (MI->getOpcode()) {
+ case TargetOpcode::DBG_VALUE:
+ llvm_unreachable("Should be handled target independently");
+
+ // Emit nothing here but a comment if we can.
+ case X86::Int_MemBarrier:
+ OutStreamer->emitRawComment("MEMBARRIER");
+ return;
+
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ // Lower these as normal, but add some comments.
+ Register Reg = MI->getOperand(0).getReg();
+ OutStreamer->AddComment(StringRef("eh_return, addr: %") +
+ X86ATTInstPrinter::getRegisterName(Reg));
+ break;
+ }
+ case X86::CLEANUPRET: {
+ // Lower these as normal, but add some comments.
+ OutStreamer->AddComment("CLEANUPRET");
+ break;
+ }
+
+ case X86::CATCHRET: {
+ // Lower these as normal, but add some comments.
+ OutStreamer->AddComment("CATCHRET");
+ break;
+ }
+
+ case X86::ENDBR32:
+ case X86::ENDBR64: {
+ // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
+ // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
+ // non-empty. If MI is the initial ENDBR, place the
+ // __patchable_function_entries label after ENDBR.
+ if (CurrentPatchableFunctionEntrySym &&
+ CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
+ MI == &MF->front().front()) {
+ MCInst Inst;
+ MCInstLowering.Lower(MI, Inst);
+ EmitAndCountInstruction(Inst);
+ CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
+ OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
+ return;
}
+ break;
+ }
+
+ case X86::TAILJMPr:
+ case X86::TAILJMPm:
+ case X86::TAILJMPd:
+ case X86::TAILJMPd_CC:
+ case X86::TAILJMPr64:
+ case X86::TAILJMPm64:
+ case X86::TAILJMPd64:
+ case X86::TAILJMPd64_CC:
+ case X86::TAILJMPr64_REX:
+ case X86::TAILJMPm64_REX:
+ // Lower these as normal, but add some comments.
+ OutStreamer->AddComment("TAILCALL");
+ break;
+
+ case X86::TLS_addr32:
+ case X86::TLS_addr64:
+ case X86::TLS_base_addr32:
+ case X86::TLS_base_addr64:
+ return LowerTlsAddr(MCInstLowering, *MI);
+
+ case X86::MOVPC32r: {
+ // This is a pseudo op for a two instruction sequence with a label, which
+ // looks like:
+ // call "L1$pb"
+ // "L1$pb":
+ // popl %esi
+
+ // Emit the call.
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ EmitAndCountInstruction(
+ MCInstBuilder(X86::CALLpcrel32)
+ .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
+
+ const X86FrameLowering *FrameLowering =
+ MF->getSubtarget<X86Subtarget>().getFrameLowering();
+ bool hasFP = FrameLowering->hasFP(*MF);
+
+ // TODO: This is needed only if we require precise CFA.
+ bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
+ !OutStreamer->getDwarfFrameInfos().back().End;
+
+ int stackGrowth = -RI->getSlotSize();
+
+ if (HasActiveDwarfFrame && !hasFP) {
+ OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
+ }
+
+ // Emit the label.
+ OutStreamer->emitLabel(PICBase);
+
+ // popl $reg
+ EmitAndCountInstruction(
+ MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
+
+ if (HasActiveDwarfFrame && !hasFP) {
+ OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
+ }
+ return;
+ }
+
+ case X86::ADD32ri: {
+ // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
+ if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ break;
+
+ // Okay, we have something like:
+ // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
+
+ // For this, we want to print something like:
+ // MYGLOBAL + (. - PICBASE)
+ // However, we can't generate a ".", so just emit a new label here and refer
+ // to it.
+ MCSymbol *DotSym = OutContext.createTempSymbol();
+ OutStreamer->emitLabel(DotSym);
+
+ // Now that we have emitted the label, lower the complex operand expression.
+ MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
+
+ const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
+ const MCExpr *PICBase =
+ MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
+ DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
+
+ DotExpr = MCBinaryExpr::createAdd(
+ MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
+
+ EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(DotExpr));
+ return;
+ }
+ case TargetOpcode::STATEPOINT:
+ return LowerSTATEPOINT(*MI, MCInstLowering);
+
+ case TargetOpcode::FAULTING_OP:
+ return LowerFAULTING_OP(*MI, MCInstLowering);
+
+ case TargetOpcode::FENTRY_CALL:
+ return LowerFENTRY_CALL(*MI, MCInstLowering);
+
+ case TargetOpcode::PATCHABLE_OP:
+ return LowerPATCHABLE_OP(*MI, MCInstLowering);
+
+ case TargetOpcode::STACKMAP:
+ return LowerSTACKMAP(*MI);
+
+ case TargetOpcode::PATCHPOINT:
+ return LowerPATCHPOINT(*MI, MCInstLowering);
+
+ case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
+ return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
+
+ case TargetOpcode::PATCHABLE_RET:
+ return LowerPATCHABLE_RET(*MI, MCInstLowering);
+
+ case TargetOpcode::PATCHABLE_TAIL_CALL:
+ return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
+
+ case TargetOpcode::PATCHABLE_EVENT_CALL:
+ return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
+
+ case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
+ return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
+
+ case X86::MORESTACK_RET:
+ EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
+ return;
+
+ case X86::MORESTACK_RET_RESTORE_R10:
+ // Return, then restore R10.
+ EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
+ EmitAndCountInstruction(
+ MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
+ return;
+
+ case X86::SEH_PushReg:
+ case X86::SEH_SaveReg:
+ case X86::SEH_SaveXMM:
+ case X86::SEH_StackAlloc:
+ case X86::SEH_StackAlign:
+ case X86::SEH_SetFrame:
+ case X86::SEH_PushFrame:
+ case X86::SEH_EndPrologue:
+ EmitSEHInstruction(MI);
+ return;
+
+ case X86::SEH_Epilogue: {
+ assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
+ MachineBasicBlock::const_iterator MBBI(MI);
+ // Check if preceded by a call and emit nop if so.
+ for (MBBI = PrevCrossBBInst(MBBI);
+ MBBI != MachineBasicBlock::const_iterator();
+ MBBI = PrevCrossBBInst(MBBI)) {
+ // Conservatively assume that pseudo instructions don't emit code and keep
+ // looking for a call. We may emit an unnecessary nop in some cases.
+ if (!MBBI->isPseudo()) {
+ if (MBBI->isCall())
+ EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
+ break;
+ }
+ }
+ return;
+ }
}
MCInst TmpInst;
@@ -2652,7 +2610,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// after it.
SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
// Then emit the call
- OutStreamer->EmitInstruction(TmpInst, getSubtargetInfo());
+ OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
return;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 5cb80a082b56..eedad952c3b9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -13,9 +13,10 @@
#ifndef LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Support/MachineValueType.h"
namespace llvm {
@@ -62,12 +63,12 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// SRetReturnReg - Some subtargets require that sret lowering includes
/// returning the value of the returned struct in a register. This field
/// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg = 0;
+ Register SRetReturnReg;
/// GlobalBaseReg - keeps track of the virtual register initialized for
/// use as the global base register. This is used for PIC in some PIC
/// relocation models.
- unsigned GlobalBaseReg = 0;
+ Register GlobalBaseReg;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex = 0;
@@ -104,6 +105,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// True if this function has WIN_ALLOCA instructions.
bool HasWinAlloca = false;
+ /// True if this function has any preallocated calls.
+ bool HasPreallocatedCall = false;
+
+ ValueMap<const Value *, size_t> PreallocatedIds;
+ SmallVector<size_t, 0> PreallocatedStackSizes;
+ SmallVector<SmallVector<size_t, 4>, 0> PreallocatedArgOffsets;
+
private:
/// ForwardedMustTailRegParms - A list of virtual and physical registers
/// that must be forwarded to every musttail call.
@@ -143,11 +151,11 @@ public:
int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
- unsigned getSRetReturnReg() const { return SRetReturnReg; }
- void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+ Register getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(Register Reg) { SRetReturnReg = Reg; }
- unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
- void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+ Register getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; }
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
@@ -185,6 +193,36 @@ public:
bool hasWinAlloca() const { return HasWinAlloca; }
void setHasWinAlloca(bool v) { HasWinAlloca = v; }
+
+ bool hasPreallocatedCall() const { return HasPreallocatedCall; }
+ void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; }
+
+ size_t getPreallocatedIdForCallSite(const Value *CS) {
+ auto Insert = PreallocatedIds.insert({CS, PreallocatedIds.size()});
+ if (Insert.second) {
+ PreallocatedStackSizes.push_back(0);
+ PreallocatedArgOffsets.emplace_back();
+ }
+ return Insert.first->second;
+ }
+
+ void setPreallocatedStackSize(size_t Id, size_t StackSize) {
+ PreallocatedStackSizes[Id] = StackSize;
+ }
+
+ size_t getPreallocatedStackSize(const size_t Id) {
+ assert(PreallocatedStackSizes[Id] != 0 && "stack size not set");
+ return PreallocatedStackSizes[Id];
+ }
+
+ void setPreallocatedArgOffsets(size_t Id, ArrayRef<size_t> AO) {
+ PreallocatedArgOffsets[Id].assign(AO.begin(), AO.end());
+ }
+
+ const ArrayRef<size_t> getPreallocatedArgOffsets(const size_t Id) {
+ assert(!PreallocatedArgOffsets[Id].empty() && "arg offsets not set");
+ return PreallocatedArgOffsets[Id];
+ }
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
index b19d1263e0c9..425054cfdd92 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/X86BaseInfo.h"
#include "X86MacroFusion.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/MacroFusion.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.h b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.h
index d4ae54f657a5..05388b275ca3 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MacroFusion.h
@@ -14,10 +14,12 @@
#ifndef LLVM_LIB_TARGET_X86_X86MACROFUSION_H
#define LLVM_LIB_TARGET_X86_X86MACROFUSION_H
-#include "llvm/CodeGen/MachineScheduler.h"
+#include <memory>
namespace llvm {
+class ScheduleDAGMutation;
+
/// Note that you have to add:
/// DAG.addMutation(createX86MacroFusionDAGMutation());
/// to X86PassConfig::createMachineScheduler() to have an effect.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index 0c791b6674dc..c8899a85118e 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -578,7 +578,7 @@ bool X86OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
MachineInstr *X86OptimizeLEAPass::replaceDebugValue(MachineInstr &MI,
unsigned VReg,
int64_t AddrDispShift) {
- DIExpression *Expr = const_cast<DIExpression *>(MI.getDebugExpression());
+ const DIExpression *Expr = MI.getDebugExpression();
if (AddrDispShift != 0)
Expr = DIExpression::prepend(Expr, DIExpression::StackValue, AddrDispShift);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PadShortFunction.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86PadShortFunction.cpp
index 4c6bd0ccc2cd..ec81b07f9e5f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -58,6 +58,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.addPreserved<LazyMachineBlockFrequencyInfoPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PartialReduction.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86PartialReduction.cpp
new file mode 100644
index 000000000000..8784a3df1773
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86PartialReduction.cpp
@@ -0,0 +1,490 @@
+//===-- X86PartialReduction.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for add instructions used by a horizontal reduction to see
+// if we might be able to use pmaddwd or psadbw. Some cases of this require
+// cross basic block knowledge and can't be done in SelectionDAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "X86TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-partial-reduction"
+
+namespace {
+
+class X86PartialReduction : public FunctionPass {
+ const DataLayout *DL;
+ const X86Subtarget *ST;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ X86PartialReduction() : FunctionPass(ID) { }
+
+ bool runOnFunction(Function &Fn) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+
+ StringRef getPassName() const override {
+ return "X86 Partial Reduction";
+ }
+
+private:
+ bool tryMAddReplacement(Instruction *Op);
+ bool trySADReplacement(Instruction *Op);
+};
+}
+
+FunctionPass *llvm::createX86PartialReductionPass() {
+ return new X86PartialReduction();
+}
+
+char X86PartialReduction::ID = 0;
+
+INITIALIZE_PASS(X86PartialReduction, DEBUG_TYPE,
+ "X86 Partial Reduction", false, false)
+
+bool X86PartialReduction::tryMAddReplacement(Instruction *Op) {
+ if (!ST->hasSSE2())
+ return false;
+
+ // Need at least 8 elements.
+ if (cast<FixedVectorType>(Op->getType())->getNumElements() < 8)
+ return false;
+
+ // Element type should be i32.
+ if (!cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(32))
+ return false;
+
+ auto *Mul = dyn_cast<BinaryOperator>(Op);
+ if (!Mul || Mul->getOpcode() != Instruction::Mul)
+ return false;
+
+ Value *LHS = Mul->getOperand(0);
+ Value *RHS = Mul->getOperand(1);
+
+ // LHS and RHS should be only used once or if they are the same then only
+ // used twice. Only check this when SSE4.1 is enabled and we have zext/sext
+ // instructions, otherwise we use punpck to emulate zero extend in stages. The
+ // trunc/ we need to do likely won't introduce new instructions in that case.
+ if (ST->hasSSE41()) {
+ if (LHS == RHS) {
+ if (!isa<Constant>(LHS) && !LHS->hasNUses(2))
+ return false;
+ } else {
+ if (!isa<Constant>(LHS) && !LHS->hasOneUse())
+ return false;
+ if (!isa<Constant>(RHS) && !RHS->hasOneUse())
+ return false;
+ }
+ }
+
+ auto CanShrinkOp = [&](Value *Op) {
+ auto IsFreeTruncation = [&](Value *Op) {
+ if (auto *Cast = dyn_cast<CastInst>(Op)) {
+ if (Cast->getParent() == Mul->getParent() &&
+ (Cast->getOpcode() == Instruction::SExt ||
+ Cast->getOpcode() == Instruction::ZExt) &&
+ Cast->getOperand(0)->getType()->getScalarSizeInBits() <= 16)
+ return true;
+ }
+
+ return isa<Constant>(Op);
+ };
+
+ // If the operation can be freely truncated and has enough sign bits we
+ // can shrink.
+ if (IsFreeTruncation(Op) &&
+ ComputeNumSignBits(Op, *DL, 0, nullptr, Mul) > 16)
+ return true;
+
+ // SelectionDAG has limited support for truncating through an add or sub if
+ // the inputs are freely truncatable.
+ if (auto *BO = dyn_cast<BinaryOperator>(Op)) {
+ if (BO->getParent() == Mul->getParent() &&
+ IsFreeTruncation(BO->getOperand(0)) &&
+ IsFreeTruncation(BO->getOperand(1)) &&
+ ComputeNumSignBits(Op, *DL, 0, nullptr, Mul) > 16)
+ return true;
+ }
+
+ return false;
+ };
+
+ // Both Ops need to be shrinkable.
+ if (!CanShrinkOp(LHS) && !CanShrinkOp(RHS))
+ return false;
+
+ IRBuilder<> Builder(Mul);
+
+ auto *MulTy = cast<FixedVectorType>(Op->getType());
+ unsigned NumElts = MulTy->getNumElements();
+
+ // Extract even elements and odd elements and add them together. This will
+ // be pattern matched by SelectionDAG to pmaddwd. This instruction will be
+ // half the original width.
+ SmallVector<int, 16> EvenMask(NumElts / 2);
+ SmallVector<int, 16> OddMask(NumElts / 2);
+ for (int i = 0, e = NumElts / 2; i != e; ++i) {
+ EvenMask[i] = i * 2;
+ OddMask[i] = i * 2 + 1;
+ }
+ // Creating a new mul so the replaceAllUsesWith below doesn't replace the
+ // uses in the shuffles we're creating.
+ Value *NewMul = Builder.CreateMul(Mul->getOperand(0), Mul->getOperand(1));
+ Value *EvenElts = Builder.CreateShuffleVector(NewMul, NewMul, EvenMask);
+ Value *OddElts = Builder.CreateShuffleVector(NewMul, NewMul, OddMask);
+ Value *MAdd = Builder.CreateAdd(EvenElts, OddElts);
+
+ // Concatenate zeroes to extend back to the original type.
+ SmallVector<int, 32> ConcatMask(NumElts);
+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
+ Value *Zero = Constant::getNullValue(MAdd->getType());
+ Value *Concat = Builder.CreateShuffleVector(MAdd, Zero, ConcatMask);
+
+ Mul->replaceAllUsesWith(Concat);
+ Mul->eraseFromParent();
+
+ return true;
+}
+
+bool X86PartialReduction::trySADReplacement(Instruction *Op) {
+ if (!ST->hasSSE2())
+ return false;
+
+ // TODO: There's nothing special about i32, any integer type above i16 should
+ // work just as well.
+ if (!cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(32))
+ return false;
+
+ // Operand should be a select.
+ auto *SI = dyn_cast<SelectInst>(Op);
+ if (!SI)
+ return false;
+
+ // Select needs to implement absolute value.
+ Value *LHS, *RHS;
+ auto SPR = matchSelectPattern(SI, LHS, RHS);
+ if (SPR.Flavor != SPF_ABS)
+ return false;
+
+ // Need a subtract of two values.
+ auto *Sub = dyn_cast<BinaryOperator>(LHS);
+ if (!Sub || Sub->getOpcode() != Instruction::Sub)
+ return false;
+
+ // Look for zero extend from i8.
+ auto getZeroExtendedVal = [](Value *Op) -> Value * {
+ if (auto *ZExt = dyn_cast<ZExtInst>(Op))
+ if (cast<VectorType>(ZExt->getOperand(0)->getType())
+ ->getElementType()
+ ->isIntegerTy(8))
+ return ZExt->getOperand(0);
+
+ return nullptr;
+ };
+
+ // Both operands of the subtract should be extends from vXi8.
+ Value *Op0 = getZeroExtendedVal(Sub->getOperand(0));
+ Value *Op1 = getZeroExtendedVal(Sub->getOperand(1));
+ if (!Op0 || !Op1)
+ return false;
+
+ IRBuilder<> Builder(SI);
+
+ auto *OpTy = cast<FixedVectorType>(Op->getType());
+ unsigned NumElts = OpTy->getNumElements();
+
+ unsigned IntrinsicNumElts;
+ Intrinsic::ID IID;
+ if (ST->hasBWI() && NumElts >= 64) {
+ IID = Intrinsic::x86_avx512_psad_bw_512;
+ IntrinsicNumElts = 64;
+ } else if (ST->hasAVX2() && NumElts >= 32) {
+ IID = Intrinsic::x86_avx2_psad_bw;
+ IntrinsicNumElts = 32;
+ } else {
+ IID = Intrinsic::x86_sse2_psad_bw;
+ IntrinsicNumElts = 16;
+ }
+
+ Function *PSADBWFn = Intrinsic::getDeclaration(SI->getModule(), IID);
+
+ if (NumElts < 16) {
+ // Pad input with zeroes.
+ SmallVector<int, 32> ConcatMask(16);
+ for (unsigned i = 0; i != NumElts; ++i)
+ ConcatMask[i] = i;
+ for (unsigned i = NumElts; i != 16; ++i)
+ ConcatMask[i] = (i % NumElts) + NumElts;
+
+ Value *Zero = Constant::getNullValue(Op0->getType());
+ Op0 = Builder.CreateShuffleVector(Op0, Zero, ConcatMask);
+ Op1 = Builder.CreateShuffleVector(Op1, Zero, ConcatMask);
+ NumElts = 16;
+ }
+
+ // Intrinsics produce vXi64 and need to be casted to vXi32.
+ auto *I32Ty =
+ FixedVectorType::get(Builder.getInt32Ty(), IntrinsicNumElts / 4);
+
+ assert(NumElts % IntrinsicNumElts == 0 && "Unexpected number of elements!");
+ unsigned NumSplits = NumElts / IntrinsicNumElts;
+
+ // First collect the pieces we need.
+ SmallVector<Value *, 4> Ops(NumSplits);
+ for (unsigned i = 0; i != NumSplits; ++i) {
+ SmallVector<int, 64> ExtractMask(IntrinsicNumElts);
+ std::iota(ExtractMask.begin(), ExtractMask.end(), i * IntrinsicNumElts);
+ Value *ExtractOp0 = Builder.CreateShuffleVector(Op0, Op0, ExtractMask);
+ Value *ExtractOp1 = Builder.CreateShuffleVector(Op1, Op0, ExtractMask);
+ Ops[i] = Builder.CreateCall(PSADBWFn, {ExtractOp0, ExtractOp1});
+ Ops[i] = Builder.CreateBitCast(Ops[i], I32Ty);
+ }
+
+ assert(isPowerOf2_32(NumSplits) && "Expected power of 2 splits");
+ unsigned Stages = Log2_32(NumSplits);
+ for (unsigned s = Stages; s > 0; --s) {
+ unsigned NumConcatElts =
+ cast<FixedVectorType>(Ops[0]->getType())->getNumElements() * 2;
+ for (unsigned i = 0; i != 1U << (s - 1); ++i) {
+ SmallVector<int, 64> ConcatMask(NumConcatElts);
+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
+ Ops[i] = Builder.CreateShuffleVector(Ops[i*2], Ops[i*2+1], ConcatMask);
+ }
+ }
+
+ // At this point the final value should be in Ops[0]. Now we need to adjust
+ // it to the final original type.
+ NumElts = cast<FixedVectorType>(OpTy)->getNumElements();
+ if (NumElts == 2) {
+ // Extract down to 2 elements.
+ Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{0, 1});
+ } else if (NumElts >= 8) {
+ SmallVector<int, 32> ConcatMask(NumElts);
+ unsigned SubElts =
+ cast<FixedVectorType>(Ops[0]->getType())->getNumElements();
+ for (unsigned i = 0; i != SubElts; ++i)
+ ConcatMask[i] = i;
+ for (unsigned i = SubElts; i != NumElts; ++i)
+ ConcatMask[i] = (i % SubElts) + SubElts;
+
+ Value *Zero = Constant::getNullValue(Ops[0]->getType());
+ Ops[0] = Builder.CreateShuffleVector(Ops[0], Zero, ConcatMask);
+ }
+
+ SI->replaceAllUsesWith(Ops[0]);
+ SI->eraseFromParent();
+
+ return true;
+}
+
+// Walk backwards from the ExtractElementInst and determine if it is the end of
+// a horizontal reduction. Return the input to the reduction if we find one.
+static Value *matchAddReduction(const ExtractElementInst &EE) {
+ // Make sure we're extracting index 0.
+ auto *Index = dyn_cast<ConstantInt>(EE.getIndexOperand());
+ if (!Index || !Index->isNullValue())
+ return nullptr;
+
+ const auto *BO = dyn_cast<BinaryOperator>(EE.getVectorOperand());
+ if (!BO || BO->getOpcode() != Instruction::Add || !BO->hasOneUse())
+ return nullptr;
+
+ unsigned NumElems = cast<FixedVectorType>(BO->getType())->getNumElements();
+ // Ensure the reduction size is a power of 2.
+ if (!isPowerOf2_32(NumElems))
+ return nullptr;
+
+ const Value *Op = BO;
+ unsigned Stages = Log2_32(NumElems);
+ for (unsigned i = 0; i != Stages; ++i) {
+ const auto *BO = dyn_cast<BinaryOperator>(Op);
+ if (!BO || BO->getOpcode() != Instruction::Add)
+ return nullptr;
+
+ // If this isn't the first add, then it should only have 2 users, the
+ // shuffle and another add which we checked in the previous iteration.
+ if (i != 0 && !BO->hasNUses(2))
+ return nullptr;
+
+ Value *LHS = BO->getOperand(0);
+ Value *RHS = BO->getOperand(1);
+
+ auto *Shuffle = dyn_cast<ShuffleVectorInst>(LHS);
+ if (Shuffle) {
+ Op = RHS;
+ } else {
+ Shuffle = dyn_cast<ShuffleVectorInst>(RHS);
+ Op = LHS;
+ }
+
+ // The first operand of the shuffle should be the same as the other operand
+ // of the bin op.
+ if (!Shuffle || Shuffle->getOperand(0) != Op)
+ return nullptr;
+
+ // Verify the shuffle has the expected (at this stage of the pyramid) mask.
+ unsigned MaskEnd = 1 << i;
+ for (unsigned Index = 0; Index < MaskEnd; ++Index)
+ if (Shuffle->getMaskValue(Index) != (int)(MaskEnd + Index))
+ return nullptr;
+ }
+
+ return const_cast<Value *>(Op);
+}
+
+// See if this BO is reachable from this Phi by walking forward through single
+// use BinaryOperators with the same opcode. If we get back then we know we've
+// found a loop and it is safe to step through this Add to find more leaves.
+static bool isReachableFromPHI(PHINode *Phi, BinaryOperator *BO) {
+ // The PHI itself should only have one use.
+ if (!Phi->hasOneUse())
+ return false;
+
+ Instruction *U = cast<Instruction>(*Phi->user_begin());
+ if (U == BO)
+ return true;
+
+ while (U->hasOneUse() && U->getOpcode() == BO->getOpcode())
+ U = cast<Instruction>(*U->user_begin());
+
+ return U == BO;
+}
+
+// Collect all the leaves of the tree of adds that feeds into the horizontal
+// reduction. Root is the Value that is used by the horizontal reduction.
+// We look through single use phis, single use adds, or adds that are used by
+// a phi that forms a loop with the add.
+static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) {
+ SmallPtrSet<Value *, 8> Visited;
+ SmallVector<Value *, 8> Worklist;
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
+
+ if (auto *PN = dyn_cast<PHINode>(V)) {
+ // PHI node should have single use unless it is the root node, then it
+ // has 2 uses.
+ if (!PN->hasNUses(PN == Root ? 2 : 1))
+ break;
+
+ // Push incoming values to the worklist.
+ for (Value *InV : PN->incoming_values())
+ Worklist.push_back(InV);
+
+ continue;
+ }
+
+ if (auto *BO = dyn_cast<BinaryOperator>(V)) {
+ if (BO->getOpcode() == Instruction::Add) {
+ // Simple case. Single use, just push its operands to the worklist.
+ if (BO->hasNUses(BO == Root ? 2 : 1)) {
+ for (Value *Op : BO->operands())
+ Worklist.push_back(Op);
+ continue;
+ }
+
+ // If there is additional use, make sure it is an unvisited phi that
+ // gets us back to this node.
+ if (BO->hasNUses(BO == Root ? 3 : 2)) {
+ PHINode *PN = nullptr;
+ for (auto *U : Root->users())
+ if (auto *P = dyn_cast<PHINode>(U))
+ if (!Visited.count(P))
+ PN = P;
+
+ // If we didn't find a 2-input PHI then this isn't a case we can
+ // handle.
+ if (!PN || PN->getNumIncomingValues() != 2)
+ continue;
+
+ // Walk forward from this phi to see if it reaches back to this add.
+ if (!isReachableFromPHI(PN, BO))
+ continue;
+
+ // The phi forms a loop with this Add, push its operands.
+ for (Value *Op : BO->operands())
+ Worklist.push_back(Op);
+ }
+ }
+ }
+
+ // Not an add or phi, make it a leaf.
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (!V->hasNUses(I == Root ? 2 : 1))
+ continue;
+
+ // Add this as a leaf.
+ Leaves.push_back(I);
+ }
+ }
+}
+
+bool X86PartialReduction::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto &TM = TPC->getTM<X86TargetMachine>();
+ ST = TM.getSubtargetImpl(F);
+
+ DL = &F.getParent()->getDataLayout();
+
+ bool MadeChange = false;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *EE = dyn_cast<ExtractElementInst>(&I);
+ if (!EE)
+ continue;
+
+ // First find a reduction tree.
+ // FIXME: Do we need to handle other opcodes than Add?
+ Value *Root = matchAddReduction(*EE);
+ if (!Root)
+ continue;
+
+ SmallVector<Instruction *, 8> Leaves;
+ collectLeaves(Root, Leaves);
+
+ for (Instruction *I : Leaves) {
+ if (tryMAddReplacement(I)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // Don't do SAD matching on the root node. SelectionDAG already
+ // has support for that and currently generates better code.
+ if (I != Root && trySADReplacement(I))
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td b/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
index 93238983afa2..833013fb69f3 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
@@ -223,3 +223,13 @@ def ZnVer1PfmCounters : ProcPfmCounters {
];
}
def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>;
+
+def ZnVer2PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cycles_not_in_halt">;
+ let UopsCounter = PfmCounter<"retired_uops">;
+ let IssueCounters = [
+ PfmIssueCounter<"Zn2AGU", "ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">,
+ PfmIssueCounter<"Zn2Divider", "div_op_count">
+ ];
+}
+def : PfmCountersBinding<"znver2", ZnVer2PfmCounters>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
index f69626b2622e..f456728cf47b 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -72,12 +72,6 @@ X86RegisterInfo::X86RegisterInfo(const Triple &TT)
}
}
-bool
-X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- // ExecutionDomainFix, BreakFalseDeps and PostRAScheduler require liveness.
- return true;
-}
-
int
X86RegisterInfo::getSEHRegNum(unsigned i) const {
return getEncodingValue(i);
@@ -633,18 +627,22 @@ static bool CantUseSP(const MachineFrameInfo &MFI) {
}
bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
- const MachineFrameInfo &MFI = MF.getFrameInfo();
-
- if (!EnableBasePointer)
- return false;
-
- // When we need stack realignment, we can't address the stack from the frame
- // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
- // can't address variables from the stack pointer. MS inline asm can
- // reference locals while also adjusting the stack pointer. When we can't
- // use both the SP and the FP, we need a separate base pointer register.
- bool CantUseFP = needsStackRealignment(MF);
- return CantUseFP && CantUseSP(MFI);
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ if (X86FI->hasPreallocatedCall())
+ return true;
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ if (!EnableBasePointer)
+ return false;
+
+ // When we need stack realignment, we can't address the stack from the frame
+ // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
+ // can't address variables from the stack pointer. MS inline asm can
+ // reference locals while also adjusting the stack pointer. When we can't
+ // use both the SP and the FP, we need a separate base pointer register.
+ bool CantUseFP = needsStackRealignment(MF);
+ return CantUseFP && CantUseSP(MFI);
}
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
@@ -667,7 +665,7 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
}
bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
- unsigned Reg, int &FrameIdx) const {
+ Register Reg, int &FrameIdx) const {
// Since X86 defines assignCalleeSavedSpillSlots which always return true
// this function neither used nor tested.
llvm_unreachable("Unused function on X86. Otherwise need a test case.");
@@ -728,7 +726,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Determine base register and offset.
int FIOffset;
- unsigned BasePtr;
+ Register BasePtr;
if (MI.isReturn()) {
assert((!needsStackRealignment(MF) ||
MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h
index b82920898069..3435c0a10b04 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -54,10 +54,6 @@ public:
// FIXME: This should be tablegen'd like getDwarfRegNum is
int getSEHRegNum(unsigned i) const;
- /// Code Generation virtual methods...
- ///
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
-
/// getMatchingSuperRegClass - Return a subclass of the specified register
/// class A so that each register in it has a sub-register of the
/// specified sub-register index which is in the specified register class B.
@@ -125,7 +121,7 @@ public:
bool canRealignStack(const MachineFunction &MF) const override;
- bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+ bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
int &FrameIdx) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
index 3cfaf714e93e..8de5b94bbffa 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -265,6 +265,16 @@ let SubRegIndices = [sub_ymm] in {
}
}
+// Tile "registers".
+def TMM0: X86Reg<"tmm0", 0>;
+def TMM1: X86Reg<"tmm1", 1>;
+def TMM2: X86Reg<"tmm2", 2>;
+def TMM3: X86Reg<"tmm3", 3>;
+def TMM4: X86Reg<"tmm4", 4>;
+def TMM5: X86Reg<"tmm5", 5>;
+def TMM6: X86Reg<"tmm6", 6>;
+def TMM7: X86Reg<"tmm7", 7>;
+
// Mask Registers, used by AVX-512 instructions.
def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, 93, 93]>;
def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, 94, 94]>;
@@ -498,7 +508,7 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
// which we do not have right now.
def LOW32_ADDR_ACCESS : RegisterClass<"X86", [i32], 32, (add GR32, RIP)>;
-// When RBP is used as a base pointer in a 32-bit addresses environement,
+// When RBP is used as a base pointer in a 32-bit addresses environment,
// this is also safe to use the full register to access addresses.
// Since RBP will never be spilled, stick to a 32 alignment to save
// on memory consumption.
@@ -621,3 +631,8 @@ def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
// Bound registers
def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
+
+// Tiles
+let isAllocatable = 0 in
+def TILE : RegisterClass<"X86", [untyped], 0,
+ (sequence "TMM%u", 0, 7)> {let Size = 8192;}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td
index 9b1fcaa8a13d..4aea7bc253bb 100755
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -260,7 +260,8 @@ defm : BWWriteResPair<WriteFCmp64X, [BWPort1], 3, [1], 1, 5>; // Floating point
defm : BWWriteResPair<WriteFCmp64Y, [BWPort1], 3, [1], 1, 6>; // Floating point double compare (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
-defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags.
+defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags (X87).
+defm : BWWriteResPair<WriteFComX, [BWPort1], 3>; // Floating point compare to flags (SSE).
defm : BWWriteResPair<WriteFMul, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication.
defm : BWWriteResPair<WriteFMulX, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication (XMM).
@@ -351,8 +352,10 @@ defm : X86WriteRes<WriteVecStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [BWPort015], 1, [1], 1>;
@@ -986,7 +989,7 @@ def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup62], (instrs FARJMP64)>;
+def: InstRW<[BWWriteResGroup62], (instrs FARJMP64m)>;
def: InstRW<[BWWriteResGroup62], (instregex "JMP(16|32|64)m")>;
def BWWriteResGroup64 : SchedWriteRes<[BWPort23,BWPort15]> {
@@ -1127,7 +1130,7 @@ def BWWriteResGroup89 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort
let ResourceCycles = [1,1,1,1,1];
}
def: InstRW<[BWWriteResGroup89], (instregex "CALL(16|32|64)m")>;
-def: InstRW<[BWWriteResGroup89], (instrs FARCALL64)>;
+def: InstRW<[BWWriteResGroup89], (instrs FARCALL64m)>;
def BWWriteResGroup90 : SchedWriteRes<[BWPort6,BWPort06,BWPort15,BWPort0156]> {
let Latency = 7;
@@ -1479,54 +1482,42 @@ def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> {
def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI(16|32)m")>;
def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
- let Latency = 22;
+ let Latency = 17;
let NumMicroOps = 7;
let ResourceCycles = [1,3,2,1];
}
-def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERQPDrm)>;
+def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERDPDrm, VPGATHERDQrm,
+ VGATHERQPDrm, VPGATHERQQrm)>;
def BWWriteResGroup183_2 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
- let Latency = 23;
+ let Latency = 18;
let NumMicroOps = 9;
let ResourceCycles = [1,3,4,1];
}
-def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERQPDYrm)>;
+def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
+ VGATHERQPDYrm, VPGATHERQQYrm)>;
def BWWriteResGroup183_3 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
- let Latency = 24;
+ let Latency = 19;
let NumMicroOps = 9;
let ResourceCycles = [1,5,2,1];
}
-def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSYrm)>;
+def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSrm, VPGATHERQDrm)>;
def BWWriteResGroup183_4 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
- let Latency = 25;
- let NumMicroOps = 7;
- let ResourceCycles = [1,3,2,1];
+ let Latency = 19;
+ let NumMicroOps = 10;
+ let ResourceCycles = [1,4,4,1];
}
-def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPDrm,
- VGATHERDPSrm)>;
+def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPSrm, VPGATHERDDrm,
+ VGATHERQPSYrm, VPGATHERQDYrm)>;
def BWWriteResGroup183_5 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
- let Latency = 26;
- let NumMicroOps = 9;
- let ResourceCycles = [1,5,2,1];
-}
-def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPDYrm)>;
-
-def BWWriteResGroup183_6 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
- let Latency = 26;
+ let Latency = 21;
let NumMicroOps = 14;
let ResourceCycles = [1,4,8,1];
}
-def: InstRW<[BWWriteResGroup183_6], (instrs VGATHERDPSYrm)>;
-
-def BWWriteResGroup183_7 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> {
- let Latency = 27;
- let NumMicroOps = 9;
- let ResourceCycles = [1,5,2,1];
-}
-def: InstRW<[BWWriteResGroup183_7], (instrs VGATHERQPSrm)>;
+def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def BWWriteResGroup185 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> {
let Latency = 29;
@@ -1604,7 +1595,7 @@ def: InstRW<[BWWriteResGroup202], (instrs FSTENVm)>;
def: InstRW<[WriteZero], (instrs CLC)>;
-// Intruction variants handled by the renamer. These might not need execution
+// Instruction variants handled by the renamer. These might not need execution
// ports in certain conditions.
// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
// section "Haswell and Broadwell Pipeline" > "Register allocation and
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedHaswell.td
index 06f417501b21..746dbaeca189 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -261,6 +261,7 @@ defm : HWWriteResPair<WriteFCmp64Y, [HWPort1], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFCmp64Z, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFCom, [HWPort1], 3>;
+defm : HWWriteResPair<WriteFComX, [HWPort1], 3>;
defm : HWWriteResPair<WriteFMul, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMulX, [HWPort01], 5, [1], 1, 6>;
@@ -391,8 +392,10 @@ defm : X86WriteRes<WriteVecStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [HWPort015], 1, [1], 1>;
@@ -996,7 +999,7 @@ def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup14], (instrs FARJMP64)>;
+def: InstRW<[HWWriteResGroup14], (instrs FARJMP64m)>;
def: InstRW<[HWWriteResGroup14], (instregex "JMP(16|32|64)m")>;
def HWWriteResGroup16 : SchedWriteRes<[HWPort23,HWPort15]> {
@@ -1205,7 +1208,7 @@ def HWWriteResGroup48 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort
let ResourceCycles = [1,1,1,1,1];
}
def: InstRW<[HWWriteResGroup48], (instregex "CALL(16|32|64)m")>;
-def: InstRW<[HWWriteResGroup48], (instrs FARCALL64)>;
+def: InstRW<[HWWriteResGroup48], (instrs FARCALL64m)>;
def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
let Latency = 3;
@@ -1784,80 +1787,60 @@ def HWWriteResGroup183 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,
}
def: InstRW<[HWWriteResGroup183], (instrs FSTENVm)>;
-def HWWriteResGroup184 : SchedWriteRes<[HWPort0, HWPort5, HWPort15, HWPort015, HWPort06, HWPort23]> {
- let Latency = 26;
+def HWWriteResGroup184 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
+ let Latency = 14;
let NumMicroOps = 12;
- let ResourceCycles = [2,2,1,3,2,2];
-}
-def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm,
- VPGATHERDQrm,
- VPGATHERDDrm)>;
-
-def HWWriteResGroup185 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
- let Latency = 24;
- let NumMicroOps = 22;
- let ResourceCycles = [5,3,4,1,5,4];
+ let ResourceCycles = [2,2,2,1,3,2];
}
-def: InstRW<[HWWriteResGroup185], (instrs VGATHERQPDYrm,
- VPGATHERQQYrm)>;
+def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm, VPGATHERDQrm)>;
-def HWWriteResGroup186 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
- let Latency = 28;
- let NumMicroOps = 22;
- let ResourceCycles = [5,3,4,1,5,4];
-}
-def: InstRW<[HWWriteResGroup186], (instrs VPGATHERQDYrm)>;
-
-def HWWriteResGroup187 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
- let Latency = 25;
- let NumMicroOps = 22;
- let ResourceCycles = [5,3,4,1,5,4];
+def HWWriteResGroup185 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
+ let Latency = 17;
+ let NumMicroOps = 20;
+ let ResourceCycles = [3,3,4,1,5,4];
}
-def: InstRW<[HWWriteResGroup187], (instrs VPGATHERQDrm)>;
+def: InstRW<[HWWriteResGroup185], (instrs VGATHERDPDYrm, VPGATHERDQYrm)>;
-def HWWriteResGroup188 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
- let Latency = 27;
+def HWWriteResGroup186 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
+ let Latency = 16;
let NumMicroOps = 20;
let ResourceCycles = [3,3,4,1,5,4];
}
-def: InstRW<[HWWriteResGroup188], (instrs VGATHERDPDYrm,
- VPGATHERDQYrm)>;
+def: InstRW<[HWWriteResGroup186], (instrs VGATHERDPSrm, VPGATHERDDrm)>;
-def HWWriteResGroup189 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
- let Latency = 27;
+def HWWriteResGroup187 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
+ let Latency = 22;
let NumMicroOps = 34;
let ResourceCycles = [5,3,8,1,9,8];
}
-def: InstRW<[HWWriteResGroup189], (instrs VGATHERDPSYrm,
- VPGATHERDDYrm)>;
+def: InstRW<[HWWriteResGroup187], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
-def HWWriteResGroup190 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
- let Latency = 23;
+def HWWriteResGroup188 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
+ let Latency = 15;
let NumMicroOps = 14;
let ResourceCycles = [3,3,2,1,3,2];
}
-def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPDrm,
- VPGATHERQQrm)>;
+def: InstRW<[HWWriteResGroup188], (instrs VGATHERQPDrm, VPGATHERQQrm)>;
-def HWWriteResGroup191 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
- let Latency = 28;
- let NumMicroOps = 15;
- let ResourceCycles = [3,3,2,1,4,2];
+def HWWriteResGroup189 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
+ let Latency = 17;
+ let NumMicroOps = 22;
+ let ResourceCycles = [5,3,4,1,5,4];
}
-def: InstRW<[HWWriteResGroup191], (instrs VGATHERQPSYrm)>;
+def: InstRW<[HWWriteResGroup189], (instrs VGATHERQPDYrm, VPGATHERQQYrm,
+ VGATHERQPSYrm, VPGATHERQDYrm)>;
-def HWWriteResGroup192 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> {
- let Latency = 25;
+def HWWriteResGroup190 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> {
+ let Latency = 16;
let NumMicroOps = 15;
let ResourceCycles = [3,3,2,1,4,2];
}
-def: InstRW<[HWWriteResGroup192], (instrs VGATHERQPSrm,
- VGATHERDPSrm)>;
+def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPSrm, VPGATHERQDrm)>;
def: InstRW<[WriteZero], (instrs CLC)>;
-// Intruction variants handled by the renamer. These might not need execution
+// Instruction variants handled by the renamer. These might not need execution
// ports in certain conditions.
// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
// section "Haswell and Broadwell Pipeline" > "Register allocation and
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSandyBridge.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 26d4d8fa3549..ac32f1b19990 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -238,6 +238,7 @@ defm : SBWriteResPair<WriteFCmp64Y, [SBPort1], 3, [1], 1, 7>;
defm : SBWriteResPair<WriteFCmp64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
+defm : SBWriteResPair<WriteFComX, [SBPort1], 3>;
defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteFMulX, [SBPort0], 5, [1], 1, 6>;
@@ -366,8 +367,10 @@ defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>;
@@ -481,7 +484,7 @@ def : WriteRes<WritePCmpEStrM, [SBPort015]> {
let ResourceCycles = [8];
}
def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> {
- let Latency = 11;
+ let Latency = 17;
let ResourceCycles = [7, 1];
}
@@ -503,7 +506,7 @@ def : WriteRes<WritePCmpEStrI, [SBPort015]> {
let ResourceCycles = [8];
}
def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
- let Latency = 4;
+ let Latency = 10;
let ResourceCycles = [7, 1];
}
@@ -541,7 +544,7 @@ def : WriteRes<WriteAESKeyGen, [SBPort015]> {
let ResourceCycles = [11];
}
def : WriteRes<WriteAESKeyGenLd, [SBPort015, SBPort23]> {
- let Latency = 8;
+ let Latency = 14;
let ResourceCycles = [10, 1];
}
@@ -551,7 +554,7 @@ def : WriteRes<WriteCLMul, [SBPort015]> {
let ResourceCycles = [18];
}
def : WriteRes<WriteCLMulLd, [SBPort015, SBPort23]> {
- let Latency = 14;
+ let Latency = 20;
let ResourceCycles = [17, 1];
}
@@ -881,7 +884,7 @@ def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SBWriteResGroup64], (instrs FARJMP64)>;
+def: InstRW<[SBWriteResGroup64], (instrs FARJMP64m)>;
def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> {
let Latency = 7;
@@ -967,7 +970,7 @@ def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
-def: InstRW<[SBWriteResGroup87], (instrs FARCALL64)>;
+def: InstRW<[SBWriteResGroup87], (instrs FARCALL64m)>;
def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
let Latency = 9;
@@ -1105,7 +1108,7 @@ def: InstRW<[SBWriteResGroupVzeroupper], (instrs VZEROUPPER)>;
def: InstRW<[WriteZero], (instrs CLC)>;
-// Intruction variants handled by the renamer. These might not need execution
+// Instruction variants handled by the renamer. These might not need execution
// ports in certain conditions.
// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
// section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 9a511ecc0071..0599564765da 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -255,7 +255,8 @@ defm : SKLWriteResPair<WriteFCmp64X, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFCmp64Y, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
-defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags.
+defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags (X87).
+defm : SKLWriteResPair<WriteFComX, [SKLPort0], 2>; // Floating point compare to flags (SSE).
defm : SKLWriteResPair<WriteFMul, [SKLPort01], 4, [1], 1, 5>; // Floating point multiplication.
defm : SKLWriteResPair<WriteFMulX, [SKLPort01], 4, [1], 1, 6>;
@@ -342,8 +343,10 @@ defm : X86WriteRes<WriteVecStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKLPort015], 1, [1], 1>;
@@ -361,9 +364,9 @@ defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : SKLWriteResPair<WriteVecTest, [SKLPort0,SKLPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
defm : SKLWriteResPair<WriteVecTestY, [SKLPort0,SKLPort5], 3, [1,1], 2, 7>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
-defm : SKLWriteResPair<WriteVecIMul, [SKLPort0] , 4, [1], 1, 5>; // Vector integer multiply.
-defm : SKLWriteResPair<WriteVecIMulX, [SKLPort01], 4, [1], 1, 6>;
-defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>;
+defm : SKLWriteResPair<WriteVecIMul, [SKLPort0] , 5, [1], 1, 5>; // Vector integer multiply.
+defm : SKLWriteResPair<WriteVecIMulX, [SKLPort01], 5, [1], 1, 6>;
+defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 5, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKLWriteResPair<WritePMULLDY, [SKLPort01], 10, [2], 2, 7>;
@@ -1012,7 +1015,7 @@ def SKLWriteResGroup72 : SchedWriteRes<[SKLPort6,SKLPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup72], (instrs FARJMP64)>;
+def: InstRW<[SKLWriteResGroup72], (instrs FARJMP64m)>;
def: InstRW<[SKLWriteResGroup72], (instregex "JMP(16|32|64)m")>;
def SKLWriteResGroup75 : SchedWriteRes<[SKLPort23,SKLPort15]> {
@@ -1193,7 +1196,7 @@ def SKLWriteResGroup102 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,
let ResourceCycles = [1,1,1,1,1];
}
def: InstRW<[SKLWriteResGroup102], (instregex "CALL(16|32|64)m")>;
-def: InstRW<[SKLWriteResGroup102], (instrs FARCALL64)>;
+def: InstRW<[SKLWriteResGroup102], (instrs FARCALL64m)>;
def SKLWriteResGroup103 : SchedWriteRes<[SKLPort6,SKLPort06,SKLPort15,SKLPort0156]> {
let Latency = 7;
@@ -1592,33 +1595,31 @@ def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0,SKLPort23]> {
}
def: InstRW<[SKLWriteResGroup196], (instregex "DIV_F(32|64)m")>;
-def SKLWriteResGroup196_1 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
- let Latency = 22;
- let NumMicroOps = 5;
+def SKLWriteResGroupVEX2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
+ let Latency = 18;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
let ResourceCycles = [1,2,1,1];
}
-def: InstRW<[SKLWriteResGroup196_1], (instrs VGATHERDPSrm,
- VGATHERDPDrm,
- VGATHERQPDrm,
- VGATHERQPSrm,
- VPGATHERDDrm,
- VPGATHERDQrm,
- VPGATHERQDrm,
- VPGATHERQQrm)>;
+def: InstRW<[SKLWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
+ VGATHERQPDrm, VPGATHERQQrm,
+ VGATHERQPSrm, VPGATHERQDrm)>;
-def SKLWriteResGroup196_2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
- let Latency = 25;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+def SKLWriteResGroupVEX4 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
+ let Latency = 20;
+ let NumMicroOps = 5; // 2 uops peform multiple loads
+ let ResourceCycles = [1,4,1,1];
+}
+def: InstRW<[SKLWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
+ VGATHERDPSrm, VPGATHERDDrm,
+ VGATHERQPDYrm, VPGATHERQQYrm,
+ VGATHERQPSYrm, VPGATHERQDYrm)>;
+
+def SKLWriteResGroupVEX8 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
+ let Latency = 22;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
}
-def: InstRW<[SKLWriteResGroup196_2], (instrs VGATHERDPSYrm,
- VGATHERQPDYrm,
- VGATHERQPSYrm,
- VPGATHERDDYrm,
- VPGATHERDQYrm,
- VPGATHERQDYrm,
- VPGATHERQQYrm,
- VGATHERDPDYrm)>;
+def: InstRW<[SKLWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 23;
@@ -1745,7 +1746,7 @@ def: InstRW<[SKLWriteResGroup223], (instrs FSTENVm)>;
def: InstRW<[WriteZero], (instrs CLC)>;
-// Intruction variants handled by the renamer. These might not need execution
+// Instruction variants handled by the renamer. These might not need execution
// ports in certain conditions.
// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
// section "Skylake Pipeline" > "Register allocation and renaming".
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index a8c65435ab9b..7fc96d1eda89 100755
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -255,7 +255,8 @@ defm : SKXWriteResPair<WriteFCmp64X, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFCmp64Z, [SKXPort05], 4, [1], 1, 7>;
-defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags.
+defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags (X87).
+defm : SKXWriteResPair<WriteFComX, [SKXPort0], 2>; // Floating point compare to flags (SSE).
defm : SKXWriteResPair<WriteFMul, [SKXPort01], 4, [1], 1, 5>; // Floating point multiplication.
defm : SKXWriteResPair<WriteFMulX, [SKXPort01], 4, [1], 1, 6>;
@@ -342,8 +343,10 @@ defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>;
@@ -361,10 +364,10 @@ defm : SKXWriteResPair<WriteVecLogicZ,[SKXPort05], 1, [1], 1, 7>;
defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
defm : SKXWriteResPair<WriteVecTestZ, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
-defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 4, [1], 1, 5>; // Vector integer multiply.
-defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 4, [1], 1, 6>;
-defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 4, [1], 1, 7>;
-defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 4, [1], 1, 7>;
+defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5, [1], 1, 5>; // Vector integer multiply.
+defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 5, [1], 1, 6>;
+defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 5, [1], 1, 7>;
+defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 5, [1], 1, 7>;
defm : SKXWriteResPair<WritePMULLD, [SKXPort01], 10, [2], 2, 6>; // Vector PMULLD.
defm : SKXWriteResPair<WritePMULLDY, [SKXPort01], 10, [2], 2, 7>;
defm : SKXWriteResPair<WritePMULLDZ, [SKXPort05], 10, [2], 2, 7>;
@@ -619,6 +622,8 @@ def: InstRW<[SKXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
"KOR(B|D|Q|W)rr",
"KXNOR(B|D|Q|W)rr",
"KXOR(B|D|Q|W)rr",
+ "KSET0(B|D|Q|W)", // Same as KXOR
+ "KSET1(B|D|Q|W)", // Same as KXNOR
"MMX_PADDS(B|W)irr",
"MMX_PADDUS(B|W)irr",
"MMX_PAVG(B|W)irr",
@@ -814,19 +819,26 @@ def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
}
def: InstRW<[SKXWriteResGroup32], (instrs VPSADBWZrr)>; // TODO: 512-bit ops require ports 0/1 to be joined.
def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
- "KADD(B|D|Q|W)rr",
+ "VALIGND(Z|Z128|Z256)rri",
+ "VALIGNQ(Z|Z128|Z256)rri",
+ "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
+ "VPBROADCAST(B|W)rr",
+ "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>;
+
+def SKXWriteResGroup33 : SchedWriteRes<[SKXPort5]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[SKXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
"KSHIFTL(B|D|Q|W)ri",
"KSHIFTR(B|D|Q|W)ri",
"KUNPCK(BW|DQ|WD)rr",
- "VALIGND(Z|Z128|Z256)rri",
- "VALIGNQ(Z|Z128|Z256)rri",
"VCMPPD(Z|Z128|Z256)rri",
"VCMPPS(Z|Z128|Z256)rri",
"VCMP(SD|SS)Zrr",
- "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
"VFPCLASS(PD|PS)(Z|Z128|Z256)rr",
"VFPCLASS(SD|SS)Zrr",
- "VPBROADCAST(B|W)rr",
"VPCMPB(Z|Z128|Z256)rri",
"VPCMPD(Z|Z128|Z256)rri",
"VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
@@ -834,7 +846,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
"VPCMPQ(Z|Z128|Z256)rri",
"VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
"VPCMPW(Z|Z128|Z256)rri",
- "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr",
"VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
@@ -1171,7 +1182,7 @@ def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64)>;
+def: InstRW<[SKXWriteResGroup76], (instrs FARJMP64m)>;
def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> {
@@ -1331,8 +1342,8 @@ def: InstRW<[SKXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd],
(instregex "VBLENDMPDZ128rm(b?)",
"VBLENDMPSZ128rm(b?)",
- "VBROADCASTI32X2Z128m(b?)",
- "VBROADCASTSSZ128m(b?)",
+ "VBROADCASTI32X2Z128rm(b?)",
+ "VBROADCASTSSZ128rm(b?)",
"VINSERT(F|I)128rm",
"VMOVAPDZ128rm(b?)",
"VMOVAPSZ128rm(b?)",
@@ -1350,8 +1361,8 @@ def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd],
"VPADD(B|D|Q|W)Z128rm(b?)",
"(V?)PADD(B|D|Q|W)rm",
"VPBLENDM(B|D|Q|W)Z128rm(b?)",
- "VPBROADCASTDZ128m(b?)",
- "VPBROADCASTQZ128m(b?)",
+ "VPBROADCASTDZ128rm(b?)",
+ "VPBROADCASTQZ128rm(b?)",
"VPSUB(B|D|Q|W)Z128rm(b?)",
"(V?)PSUB(B|D|Q|W)rm",
"VPTERNLOGDZ128rm(b?)i",
@@ -1456,7 +1467,7 @@ def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,
let ResourceCycles = [1,1,1,1,1];
}
def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
-def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64)>;
+def: InstRW<[SKXWriteResGroup109], (instrs FARCALL64m)>;
def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> {
let Latency = 7;
@@ -1516,9 +1527,8 @@ def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
- "VFPCLASSSDZrm(b?)",
- "VPBROADCASTB(Z|Z256)m(b?)",
- "VPBROADCASTW(Z|Z256)m(b?)")>;
+ "VPBROADCASTB(Z|Z256)rm(b?)",
+ "VPBROADCASTW(Z|Z256)rm(b?)")>;
def: InstRW<[SKXWriteResGroup119], (instrs VPBROADCASTBYrm,
VPBROADCASTWYrm,
VPMOVSXBDYrm,
@@ -1535,24 +1545,24 @@ def: InstRW<[SKXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
(instregex "VBLENDMPD(Z|Z256)rm(b?)",
"VBLENDMPS(Z|Z256)rm(b?)",
- "VBROADCASTF32X2Z256m(b?)",
- "VBROADCASTF32X2Zm(b?)",
+ "VBROADCASTF32X2Z256rm(b?)",
+ "VBROADCASTF32X2Zrm(b?)",
"VBROADCASTF32X4Z256rm(b?)",
"VBROADCASTF32X4rm(b?)",
"VBROADCASTF32X8rm(b?)",
"VBROADCASTF64X2Z128rm(b?)",
"VBROADCASTF64X2rm(b?)",
"VBROADCASTF64X4rm(b?)",
- "VBROADCASTI32X2Z256m(b?)",
- "VBROADCASTI32X2Zm(b?)",
+ "VBROADCASTI32X2Z256rm(b?)",
+ "VBROADCASTI32X2Zrm(b?)",
"VBROADCASTI32X4Z256rm(b?)",
"VBROADCASTI32X4rm(b?)",
"VBROADCASTI32X8rm(b?)",
"VBROADCASTI64X2Z128rm(b?)",
"VBROADCASTI64X2rm(b?)",
"VBROADCASTI64X4rm(b?)",
- "VBROADCASTSD(Z|Z256)m(b?)",
- "VBROADCASTSS(Z|Z256)m(b?)",
+ "VBROADCASTSD(Z|Z256)rm(b?)",
+ "VBROADCASTSS(Z|Z256)rm(b?)",
"VINSERTF32x4(Z|Z256)rm(b?)",
"VINSERTF32x8Zrm(b?)",
"VINSERTF64x2(Z|Z256)rm(b?)",
@@ -1577,8 +1587,8 @@ def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
"VPADD(B|D|Q|W)Yrm",
"VPADD(B|D|Q|W)(Z|Z256)rm(b?)",
"VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)",
- "VPBROADCASTD(Z|Z256)m(b?)",
- "VPBROADCASTQ(Z|Z256)m(b?)",
+ "VPBROADCASTD(Z|Z256)rm(b?)",
+ "VPBROADCASTQ(Z|Z256)rm(b?)",
"VPSUB(B|D|Q|W)Yrm",
"VPSUB(B|D|Q|W)(Z|Z256)rm(b?)",
"VPTERNLOGD(Z|Z256)rm(b?)i",
@@ -1667,17 +1677,9 @@ def: InstRW<[SKXWriteResGroup136], (instrs VPMOVSXBWYrm,
VPMOVSXWDYrm,
VPMOVZXWDYrm)>;
def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
- "VCMP(PD|PS)Z128rm(b?)i",
- "VCMP(SD|SS)Zrm",
+ "VFPCLASSSDZrm(b?)",
"VFPCLASSSSZrm(b?)",
- "VPCMPBZ128rmi(b?)",
- "VPCMPDZ128rmi(b?)",
- "VPCMPEQ(B|D|Q|W)Z128rm(b?)",
- "VPCMPGT(B|D|Q|W)Z128rm(b?)",
"(V?)PCMPGTQrm",
- "VPCMPQZ128rmi(b?)",
- "VPCMPU(B|D|Q|W)Z128rmi(b?)",
- "VPCMPWZ128rmi(b?)",
"VPERMI2D128rm(b?)",
"VPERMI2PD128rm(b?)",
"VPERMI2PS128rm(b?)",
@@ -1701,15 +1703,32 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
"VPMOVZXBWZ128rm(b?)",
"VPMOVZXDQZ128rm(b?)",
"VPMOVZXWDZ128rm(b?)",
- "VPMOVZXWQZ128rm(b?)",
- "VPTESTMBZ128rm(b?)",
- "VPTESTMDZ128rm(b?)",
- "VPTESTMQZ128rm(b?)",
- "VPTESTMWZ128rm(b?)",
- "VPTESTNMBZ128rm(b?)",
- "VPTESTNMDZ128rm(b?)",
- "VPTESTNMQZ128rm(b?)",
- "VPTESTNMWZ128rm(b?)")>;
+ "VPMOVZXWQZ128rm(b?)")>;
+
+def SKXWriteResGroup136_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SKXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
+ "VCMP(SD|SS)Zrm",
+ "VFPCLASSPDZ128rm(b?)",
+ "VFPCLASSPSZ128rm(b?)",
+ "VPCMPBZ128rmi(b?)",
+ "VPCMPDZ128rmi(b?)",
+ "VPCMPEQ(B|D|Q|W)Z128rm(b?)",
+ "VPCMPGT(B|D|Q|W)Z128rm(b?)",
+ "VPCMPQZ128rmi(b?)",
+ "VPCMPU(B|D|Q|W)Z128rmi(b?)",
+ "VPCMPWZ128rmi(b?)",
+ "VPTESTMBZ128rm(b?)",
+ "VPTESTMDZ128rm(b?)",
+ "VPTESTMQZ128rm(b?)",
+ "VPTESTMWZ128rm(b?)",
+ "VPTESTNMBZ128rm(b?)",
+ "VPTESTNMDZ128rm(b?)",
+ "VPTESTNMQZ128rm(b?)",
+ "VPTESTNMWZ128rm(b?)")>;
def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 9;
@@ -1745,30 +1764,38 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
"VALIGND(Z|Z256)rm(b?)i",
"VALIGNQ(Z|Z256)rm(b?)i",
- "VCMPPD(Z|Z256)rm(b?)i",
- "VCMPPS(Z|Z256)rm(b?)i",
- "VPCMPB(Z|Z256)rmi(b?)",
- "VPCMPD(Z|Z256)rmi(b?)",
- "VPCMPEQB(Z|Z256)rm(b?)",
- "VPCMPEQD(Z|Z256)rm(b?)",
- "VPCMPEQQ(Z|Z256)rm(b?)",
- "VPCMPEQW(Z|Z256)rm(b?)",
- "VPCMPGTB(Z|Z256)rm(b?)",
- "VPCMPGTD(Z|Z256)rm(b?)",
- "VPCMPGTQ(Z|Z256)rm(b?)",
- "VPCMPGTW(Z|Z256)rm(b?)",
- "VPCMPQ(Z|Z256)rmi(b?)",
- "VPCMPU(B|D|Q|W)Z256rmi(b?)",
- "VPCMPU(B|D|Q|W)Zrmi(b?)",
- "VPCMPW(Z|Z256)rmi(b?)",
"VPMAXSQ(Z|Z256)rm(b?)",
"VPMAXUQ(Z|Z256)rm(b?)",
"VPMINSQ(Z|Z256)rm(b?)",
- "VPMINUQ(Z|Z256)rm(b?)",
- "VPTESTM(B|D|Q|W)Z256rm(b?)",
- "VPTESTM(B|D|Q|W)Zrm(b?)",
- "VPTESTNM(B|D|Q|W)Z256rm(b?)",
- "VPTESTNM(B|D|Q|W)Zrm(b?)")>;
+ "VPMINUQ(Z|Z256)rm(b?)")>;
+
+def SKXWriteResGroup148_2 : SchedWriteRes<[SKXPort5,SKXPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SKXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
+ "VCMPPS(Z|Z256)rm(b?)i",
+ "VFPCLASSPD(Z|Z256)rm(b?)",
+ "VFPCLASSPS(Z|Z256)rm(b?)",
+ "VPCMPB(Z|Z256)rmi(b?)",
+ "VPCMPD(Z|Z256)rmi(b?)",
+ "VPCMPEQB(Z|Z256)rm(b?)",
+ "VPCMPEQD(Z|Z256)rm(b?)",
+ "VPCMPEQQ(Z|Z256)rm(b?)",
+ "VPCMPEQW(Z|Z256)rm(b?)",
+ "VPCMPGTB(Z|Z256)rm(b?)",
+ "VPCMPGTD(Z|Z256)rm(b?)",
+ "VPCMPGTQ(Z|Z256)rm(b?)",
+ "VPCMPGTW(Z|Z256)rm(b?)",
+ "VPCMPQ(Z|Z256)rmi(b?)",
+ "VPCMPU(B|D|Q|W)Z256rmi(b?)",
+ "VPCMPU(B|D|Q|W)Zrmi(b?)",
+ "VPCMPW(Z|Z256)rmi(b?)",
+ "VPTESTM(B|D|Q|W)Z256rm(b?)",
+ "VPTESTM(B|D|Q|W)Zrm(b?)",
+ "VPTESTNM(B|D|Q|W)Z256rm(b?)",
+ "VPTESTNM(B|D|Q|W)Zrm(b?)")>;
def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 10;
@@ -1938,14 +1965,14 @@ def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> {
- let Latency = 12;
+ let Latency = 15;
let NumMicroOps = 3;
let ResourceCycles = [3];
}
def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> {
- let Latency = 12;
+ let Latency = 15;
let NumMicroOps = 3;
let ResourceCycles = [3];
}
@@ -2106,8 +2133,8 @@ def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKX
}
def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>;
-def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> {
- let Latency = 18;
+def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort01]> {
+ let Latency = 21;
let NumMicroOps = 4;
let ResourceCycles = [1,3];
}
@@ -2134,21 +2161,19 @@ def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
}
def : SchedAlias<WriteFDiv64Ld, SKXWriteResGroup209>; // TODO - convert to ZnWriteResFpuPair
-def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> {
- let Latency = 19;
+def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort01]> {
+ let Latency = 22;
let NumMicroOps = 4;
let ResourceCycles = [1,3];
}
-def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)",
- "VPMULLQZrm(b?)")>;
+def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>;
-def SKXWriteResGroup214 : SchedWriteRes<[]> {
- let Latency = 20;
- let NumMicroOps = 0;
+def SKXWriteResGroup211_1 : SchedWriteRes<[SKXPort23,SKXPort05]> {
+ let Latency = 22;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
}
-def: InstRW<[SKXWriteResGroup214], (instrs VGATHERDPSZ128rm,
- VGATHERQPSZrm,
- VPGATHERDDZ128rm)>;
+def: InstRW<[SKXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>;
def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> {
let Latency = 20;
@@ -2164,15 +2189,41 @@ def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
}
def : SchedAlias<WriteFDiv64XLd, SKXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair
-def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 20;
- let NumMicroOps = 5;
+def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 17;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
let ResourceCycles = [1,2,1,1];
}
-def: InstRW<[SKXWriteResGroup218], (instrs VGATHERQPSZ128rm,
- VGATHERQPSZ256rm,
- VPGATHERQDZ128rm,
- VPGATHERQDZ256rm)>;
+def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
+ VGATHERDPDZ128rm, VPGATHERDQZ128rm,
+ VGATHERQPDZ128rm, VPGATHERQQZ128rm)>;
+
+def SKXWriteGatherEVEX4 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 19;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,4,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
+ VGATHERQPDZ256rm, VPGATHERQQZ256rm,
+ VGATHERDPSZ128rm, VPGATHERDDZ128rm,
+ VGATHERDPDZ256rm, VPGATHERDQZ256rm)>;
+
+def SKXWriteGatherEVEX8 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 21;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
+ VGATHERDPDZrm, VPGATHERDQZrm,
+ VGATHERQPDZrm, VPGATHERQQZrm,
+ VGATHERQPSZrm, VPGATHERQDZrm)>;
+
+def SKXWriteGatherEVEX16 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 25;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,16,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 20;
@@ -2202,57 +2253,31 @@ def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F(32|64)m")>;
-def SKXWriteResGroup224 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 22;
- let NumMicroOps = 5;
+def SKXWriteResGroupVEX2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+ let Latency = 18;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
let ResourceCycles = [1,2,1,1];
}
-def: InstRW<[SKXWriteResGroup224], (instrs VGATHERDPDZ128rm,
- VGATHERQPDZ128rm,
- VPGATHERDQZ128rm,
- VPGATHERQQZ128rm)>;
+def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
+ VGATHERQPDrm, VPGATHERQQrm,
+ VGATHERQPSrm, VPGATHERQDrm)>;
-def SKXWriteResGroup224_2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
- let Latency = 22;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+def SKXWriteResGroupVEX4 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+ let Latency = 20;
+ let NumMicroOps = 5; // 2 uops peform multiple loads
+ let ResourceCycles = [1,4,1,1];
}
-def: InstRW<[SKXWriteResGroup224_2], (instrs VGATHERDPSrm,
- VGATHERDPDrm,
- VGATHERQPDrm,
- VGATHERQPSrm,
- VPGATHERDDrm,
- VPGATHERDQrm,
- VPGATHERQDrm,
- VPGATHERQQrm,
- VPGATHERDDrm,
- VPGATHERQDrm,
- VPGATHERDQrm,
- VPGATHERQQrm,
- VGATHERDPSrm,
- VGATHERQPSrm,
- VGATHERDPDrm,
- VGATHERQPDrm)>;
-
-def SKXWriteResGroup224_3 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
- let Latency = 25;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
+ VGATHERDPSrm, VPGATHERDDrm,
+ VGATHERQPDYrm, VPGATHERQQYrm,
+ VGATHERQPSYrm, VPGATHERQDYrm)>;
+
+def SKXWriteResGroupVEX8 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+ let Latency = 22;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
}
-def: InstRW<[SKXWriteResGroup224_3], (instrs VGATHERDPSYrm,
- VGATHERQPDYrm,
- VGATHERQPSYrm,
- VPGATHERDDYrm,
- VPGATHERDQYrm,
- VPGATHERQDYrm,
- VPGATHERQQYrm,
- VPGATHERDDYrm,
- VPGATHERQDYrm,
- VPGATHERDQYrm,
- VPGATHERQQYrm,
- VGATHERDPSYrm,
- VGATHERQPSYrm,
- VGATHERDPDYrm)>;
+def: InstRW<[SKXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
let Latency = 22;
@@ -2276,27 +2301,6 @@ def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI(16|32)m")>;
-def SKXWriteResGroup234 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 25;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup234], (instrs VGATHERDPDZ256rm,
- VGATHERQPDZ256rm,
- VPGATHERDQZ256rm,
- VPGATHERQDZrm,
- VPGATHERQQZ256rm)>;
-
-def SKXWriteResGroup238 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 26;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup238], (instrs VGATHERDPDZrm,
- VGATHERQPDZrm,
- VPGATHERDQZrm,
- VPGATHERQQZrm)>;
-
def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 27;
let NumMicroOps = 2;
@@ -2304,14 +2308,6 @@ def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F(32|64)m")>;
-def SKXWriteResGroup240 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 27;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup240], (instrs VGATHERDPSZ256rm,
- VPGATHERDDZ256rm)>;
-
def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 29;
let NumMicroOps = 15;
@@ -2326,14 +2322,6 @@ def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
-def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 30;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup245], (instrs VGATHERDPSZrm,
- VPGATHERDDZrm)>;
-
def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
@@ -2461,7 +2449,7 @@ def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>;
def: InstRW<[WriteZero], (instrs CLC)>;
-// Intruction variants handled by the renamer. These might not need execution
+// Instruction variants handled by the renamer. These might not need execution
// ports in certain conditions.
// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
// section "Skylake Pipeline" > "Register allocation and renaming".
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Schedule.td b/contrib/llvm-project/llvm/lib/Target/X86/X86Schedule.td
index 95f710061aeb..f204d6622119 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Schedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Schedule.td
@@ -250,7 +250,8 @@ defm WriteFCmp64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double
defm WriteFCmp64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double compare (XMM).
defm WriteFCmp64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (YMM).
defm WriteFCmp64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (ZMM).
-defm WriteFCom : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags.
+defm WriteFCom : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags (X87).
+defm WriteFComX : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags (SSE).
defm WriteFMul : X86SchedWritePair<ReadAfterVecLd>; // Floating point multiplication.
defm WriteFMulX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point multiplication (XMM).
defm WriteFMulY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
@@ -340,8 +341,10 @@ def WriteVecStoreX : SchedWrite;
def WriteVecStoreY : SchedWrite;
def WriteVecStoreNT : SchedWrite;
def WriteVecStoreNTY : SchedWrite;
-def WriteVecMaskedStore : SchedWrite;
-def WriteVecMaskedStoreY : SchedWrite;
+def WriteVecMaskedStore32 : SchedWrite;
+def WriteVecMaskedStore64 : SchedWrite;
+def WriteVecMaskedStore32Y : SchedWrite;
+def WriteVecMaskedStore64Y : SchedWrite;
def WriteVecMove : SchedWrite;
def WriteVecMoveX : SchedWrite;
def WriteVecMoveY : SchedWrite;
@@ -549,6 +552,14 @@ def WriteFMaskMove32Y
: X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore32Y>;
def WriteFMaskMove64Y
: X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore64Y>;
+def WriteVecMaskMove32
+ : X86SchedWriteMaskMove<WriteVecMaskedLoad, WriteVecMaskedStore32>;
+def WriteVecMaskMove64
+ : X86SchedWriteMaskMove<WriteVecMaskedLoad, WriteVecMaskedStore64>;
+def WriteVecMaskMove32Y
+ : X86SchedWriteMaskMove<WriteVecMaskedLoadY, WriteVecMaskedStore32Y>;
+def WriteVecMaskMove64Y
+ : X86SchedWriteMaskMove<WriteVecMaskedLoadY, WriteVecMaskedStore64Y>;
// Vector width wrappers.
def SchedWriteFAdd
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td
index b0153ca9da36..b90baf6c16b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -244,6 +244,7 @@ defm : AtomWriteResPair<WriteFCmp64X, [AtomPort01], [AtomPort01], 6, 7,
defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFComX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFMulY>;
@@ -368,8 +369,10 @@ def : WriteRes<WriteVecStoreX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecStoreY>;
def : WriteRes<WriteVecStoreNT, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecStoreNTY>;
-def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
-defm : X86WriteResUnsupported<WriteVecMaskedStoreY>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
def : WriteRes<WriteVecMove, [AtomPort0]>;
def : WriteRes<WriteVecMoveX, [AtomPort01]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBdVer2.td
index d7aea3cf4e9d..0a201bc74a48 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -545,8 +545,40 @@ def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> {
def : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>;
// This is for simple LEAs with one or two input operands.
-// FIXME: SAGU 3-operand LEA
-def : WriteRes<WriteLEA, [PdEX01]> { let NumMicroOps = 2; }
+def : WriteRes<WriteLEA, [PdEX01]> { let ResourceCycles = [2]; }
+
+// This write is used for slow LEA instructions.
+def PdWrite3OpsLEA : SchedWriteRes<[PdEX01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+
+// On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset),
+// or an LEA with a `Scale` value different than 1.
+def PdSlowLEAPredicate : MCSchedPredicate<
+ CheckAny<[
+ // A 3-operand LEA (base, index, offset).
+ IsThreeOperandsLEAFn,
+ // An LEA with a "Scale" different than 1.
+ CheckAll<[
+ CheckIsImmOperand<2>,
+ CheckNot<CheckImmOperand<2, 1>>
+ ]>
+ ]>
+>;
+
+def PdWriteLEA : SchedWriteVariant<[
+ SchedVar<PdSlowLEAPredicate, [PdWrite3OpsLEA]>,
+ SchedVar<NoSchedPred, [WriteLEA]>
+]>;
+
+def : InstRW<[PdWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
+
+def PdWriteLEA16r : SchedWriteRes<[PdEX01]> {
+ let ResourceCycles = [3];
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteLEA16r], (instrs LEA16r)>;
// Bit counts.
defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [6], 6, 2>;
@@ -766,6 +798,7 @@ defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
+defm : PdWriteResXMMPair<WriteFComX, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> {
let Latency = 6;
@@ -1060,8 +1093,10 @@ def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>;
defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>;
defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>;
-defm : PdWriteRes<WriteVecMaskedStore, [PdStore, PdFPU01, PdFPMAL], 6, [1, 1, 4]>;
-defm : PdWriteRes<WriteVecMaskedStoreY, [PdStore, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>;
defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index d0421d94ee05..13b6eed5126d 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -541,6 +541,7 @@ defm : JWriteResFpuPair<WriteFCmp64X, [JFPU0, JFPA], 2>;
defm : JWriteResYMMPair<WriteFCmp64Y, [JFPU0, JFPA], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
+defm : JWriteResFpuPair<WriteFComX, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFMulX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFMulY, [JFPU1, JFPM], 2, [2,2], 2>;
@@ -669,8 +670,10 @@ defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1],
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [2, 2, 2], 2>;
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
-defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
+defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
defm : X86WriteRes<WriteVecMove, [JFPU01, JVALU], 1, [1, 1], 1>;
defm : X86WriteRes<WriteVecMoveX, [JFPU01, JVALU], 1, [1, 1], 1>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td
index dcd155ea0e0e..3d53ef104ed6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -214,6 +214,7 @@ defm : SLMWriteResPair<WriteFCmp64X, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp64Y, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFComX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
@@ -310,8 +311,10 @@ def : WriteRes<WriteVecStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>;
-def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>;
-def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecMaskedStore32, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecMaskedStore32Y, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecMaskedStore64, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecMaskedStore64Y, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveY, [SLM_FPC_RSV01]>;
@@ -390,44 +393,15 @@ defm : X86WriteResPairUnsupported<WritePHAddZ>;
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
-def : WriteRes<WritePCmpIStrM, [SLM_FPC_RSV0]> {
- let Latency = 13;
- let ResourceCycles = [13];
-}
-def : WriteRes<WritePCmpIStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
- let Latency = 13;
- let ResourceCycles = [13, 1];
-}
+defm : SLMWriteResPair<WritePCmpIStrM, [SLM_FPC_RSV0], 13, [13]>;
// Packed Compare Explicit Length Strings, Return Mask
-def : WriteRes<WritePCmpEStrM, [SLM_FPC_RSV0]> {
- let Latency = 17;
- let ResourceCycles = [17];
-}
-def : WriteRes<WritePCmpEStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
- let Latency = 17;
- let ResourceCycles = [17, 1];
-}
-
+defm : SLMWriteResPair<WritePCmpEStrM, [SLM_FPC_RSV0], 17, [17]>;
// Packed Compare Implicit Length Strings, Return Index
-def : WriteRes<WritePCmpIStrI, [SLM_FPC_RSV0]> {
- let Latency = 17;
- let ResourceCycles = [17];
-}
-def : WriteRes<WritePCmpIStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
- let Latency = 17;
- let ResourceCycles = [17, 1];
-}
+defm : SLMWriteResPair<WritePCmpIStrI, [SLM_FPC_RSV0], 17, [17]>;
// Packed Compare Explicit Length Strings, Return Index
-def : WriteRes<WritePCmpEStrI, [SLM_FPC_RSV0]> {
- let Latency = 21;
- let ResourceCycles = [21];
-}
-def : WriteRes<WritePCmpEStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
- let Latency = 21;
- let ResourceCycles = [21, 1];
-}
+defm : SLMWriteResPair<WritePCmpEStrI, [SLM_FPC_RSV0], 21, [21]>;
// MOVMSK Instructions.
def : WriteRes<WriteFMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
@@ -436,42 +410,12 @@ def : WriteRes<WriteVecMOVMSKY, [SLM_FPC_RSV1]> { let Latency = 4; }
def : WriteRes<WriteMMXMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
// AES Instructions.
-def : WriteRes<WriteAESDecEnc, [SLM_FPC_RSV0]> {
- let Latency = 8;
- let ResourceCycles = [5];
-}
-def : WriteRes<WriteAESDecEncLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
- let Latency = 8;
- let ResourceCycles = [5, 1];
-}
-
-def : WriteRes<WriteAESIMC, [SLM_FPC_RSV0]> {
- let Latency = 8;
- let ResourceCycles = [5];
-}
-def : WriteRes<WriteAESIMCLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
- let Latency = 8;
- let ResourceCycles = [5, 1];
-}
-
-def : WriteRes<WriteAESKeyGen, [SLM_FPC_RSV0]> {
- let Latency = 8;
- let ResourceCycles = [5];
-}
-def : WriteRes<WriteAESKeyGenLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
- let Latency = 8;
- let ResourceCycles = [5, 1];
-}
+defm : SLMWriteResPair<WriteAESDecEnc, [SLM_FPC_RSV0], 8, [5]>;
+defm : SLMWriteResPair<WriteAESIMC, [SLM_FPC_RSV0], 8, [5]>;
+defm : SLMWriteResPair<WriteAESKeyGen, [SLM_FPC_RSV0], 8, [5]>;
// Carry-less multiplication instructions.
-def : WriteRes<WriteCLMul, [SLM_FPC_RSV0]> {
- let Latency = 10;
- let ResourceCycles = [10];
-}
-def : WriteRes<WriteCLMulLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
- let Latency = 10;
- let ResourceCycles = [10, 1];
-}
+defm : SLMWriteResPair<WriteCLMul, [SLM_FPC_RSV0], 10, [10]>;
def : WriteRes<WriteSystem, [SLM_FPC_RSV0]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [SLM_FPC_RSV0]> { let Latency = 100; }
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver1.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 06201f4a3a84..fe09d6f85221 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -295,6 +295,7 @@ defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU0], 3>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFComX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
@@ -387,8 +388,10 @@ defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1], 1>;
-defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [ZnFPU], 2, [1], 2>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 4537d9cc7956..48da0d6329b1 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -187,7 +187,7 @@ defm : Zn2WriteResPair<WriteIMul8, [Zn2ALU1, Zn2Multiplier], 4>;
defm : X86WriteRes<WriteBSWAP32, [Zn2ALU], 1, [4], 1>;
defm : X86WriteRes<WriteBSWAP64, [Zn2ALU], 1, [4], 1>;
-defm : X86WriteRes<WriteCMPXCHG, [Zn2ALU], 1, [1], 1>;
+defm : X86WriteRes<WriteCMPXCHG, [Zn2ALU], 3, [1], 1>;
defm : X86WriteRes<WriteCMPXCHGRMW,[Zn2ALU,Zn2AGU], 8, [1,1], 5>;
defm : X86WriteRes<WriteXCHG, [Zn2ALU], 1, [2], 2>;
@@ -216,7 +216,7 @@ defm : X86WriteRes<WriteBitTestSet, [Zn2ALU], 2, [1], 2>;
// Bit counts.
defm : Zn2WriteResPair<WriteBSF, [Zn2ALU], 3>;
-defm : Zn2WriteResPair<WriteBSR, [Zn2ALU], 3>;
+defm : Zn2WriteResPair<WriteBSR, [Zn2ALU], 4>;
defm : Zn2WriteResPair<WriteLZCNT, [Zn2ALU], 1>;
defm : Zn2WriteResPair<WriteTZCNT, [Zn2ALU], 2>;
defm : Zn2WriteResPair<WritePOPCNT, [Zn2ALU], 1>;
@@ -272,15 +272,16 @@ defm : Zn2WriteResFpuPair<WriteFAdd64, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAdd64X, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAdd64Y, [Zn2FPU0], 3>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
-defm : Zn2WriteResFpuPair<WriteFCmp, [Zn2FPU0], 3>;
-defm : Zn2WriteResFpuPair<WriteFCmpX, [Zn2FPU0], 3>;
-defm : Zn2WriteResFpuPair<WriteFCmpY, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFCmp, [Zn2FPU0], 1>;
+defm : Zn2WriteResFpuPair<WriteFCmpX, [Zn2FPU0], 1>;
+defm : Zn2WriteResFpuPair<WriteFCmpY, [Zn2FPU0], 1>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
-defm : Zn2WriteResFpuPair<WriteFCmp64, [Zn2FPU0], 3>;
-defm : Zn2WriteResFpuPair<WriteFCmp64X, [Zn2FPU0], 3>;
-defm : Zn2WriteResFpuPair<WriteFCmp64Y, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFCmp64, [Zn2FPU0], 1>;
+defm : Zn2WriteResFpuPair<WriteFCmp64X, [Zn2FPU0], 1>;
+defm : Zn2WriteResFpuPair<WriteFCmp64Y, [Zn2FPU0], 1>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : Zn2WriteResFpuPair<WriteFCom, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFComX, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFBlend, [Zn2FPU01], 1>;
defm : Zn2WriteResFpuPair<WriteFBlendY, [Zn2FPU01], 1>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
@@ -313,8 +314,8 @@ defm : Zn2WriteResFpuPair<WriteFDiv64, [Zn2FPU3], 15>;
defm : Zn2WriteResFpuPair<WriteFDiv64X, [Zn2FPU3], 15>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : Zn2WriteResFpuPair<WriteFSign, [Zn2FPU3], 2>;
-defm : Zn2WriteResFpuPair<WriteFRnd, [Zn2FPU3], 4, [1], 1, 7, 0>;
-defm : Zn2WriteResFpuPair<WriteFRndY, [Zn2FPU3], 4, [1], 1, 7, 0>;
+defm : Zn2WriteResFpuPair<WriteFRnd, [Zn2FPU3], 3, [1], 1, 7, 0>;
+defm : Zn2WriteResFpuPair<WriteFRndY, [Zn2FPU3], 3, [1], 1, 7, 0>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : Zn2WriteResFpuPair<WriteFLogic, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteFLogicY, [Zn2FPU], 1>;
@@ -325,16 +326,16 @@ defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : Zn2WriteResFpuPair<WriteFShuffle, [Zn2FPU12], 1>;
defm : Zn2WriteResFpuPair<WriteFShuffleY, [Zn2FPU12], 1>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
-defm : Zn2WriteResFpuPair<WriteFVarShuffle, [Zn2FPU12], 1>;
-defm : Zn2WriteResFpuPair<WriteFVarShuffleY,[Zn2FPU12], 1>;
+defm : Zn2WriteResFpuPair<WriteFVarShuffle, [Zn2FPU12], 3>;
+defm : Zn2WriteResFpuPair<WriteFVarShuffleY,[Zn2FPU12], 3>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : Zn2WriteResFpuPair<WriteFMul, [Zn2FPU01], 3, [1], 1, 7, 1>;
defm : Zn2WriteResFpuPair<WriteFMulX, [Zn2FPU01], 3, [1], 1, 7, 1>;
-defm : Zn2WriteResFpuPair<WriteFMulY, [Zn2FPU01], 4, [1], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFMulY, [Zn2FPU01], 3, [1], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : Zn2WriteResFpuPair<WriteFMul64, [Zn2FPU01], 3, [1], 1, 7, 1>;
defm : Zn2WriteResFpuPair<WriteFMul64X, [Zn2FPU01], 3, [1], 1, 7, 1>;
-defm : Zn2WriteResFpuPair<WriteFMul64Y, [Zn2FPU01], 4, [1], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFMul64Y, [Zn2FPU01], 3, [1], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : Zn2WriteResFpuPair<WriteFMA, [Zn2FPU03], 5>;
defm : Zn2WriteResFpuPair<WriteFMAX, [Zn2FPU03], 5>;
@@ -369,8 +370,10 @@ defm : X86WriteRes<WriteVecStoreX, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreY, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [Zn2AGU], 1, [1], 1>;
-defm : X86WriteRes<WriteVecMaskedStore, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
-defm : X86WriteRes<WriteVecMaskedStoreY, [Zn2AGU,Zn2FPU01], 5, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [Zn2FPU], 2, [1], 2>;
@@ -380,7 +383,7 @@ defm : X86WriteRes<WriteEMMS, [Zn2FPU], 2, [1], 1>;
defm : Zn2WriteResFpuPair<WriteVecShift, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftX, [Zn2FPU2], 1>;
-defm : Zn2WriteResFpuPair<WriteVecShiftY, [Zn2FPU2], 2>;
+defm : Zn2WriteResFpuPair<WriteVecShiftY, [Zn2FPU2], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : Zn2WriteResFpuPair<WriteVecShiftImm, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftImmX, [Zn2FPU], 1>;
@@ -402,7 +405,7 @@ defm : Zn2WriteResFpuPair<WriteVecIMulX, [Zn2FPU0], 4>;
defm : Zn2WriteResFpuPair<WriteVecIMulY, [Zn2FPU0], 4>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : Zn2WriteResFpuPair<WritePMULLD, [Zn2FPU0], 4, [1], 1, 7, 1>;
-defm : Zn2WriteResFpuPair<WritePMULLDY, [Zn2FPU0], 3, [1], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WritePMULLDY, [Zn2FPU0], 4, [1], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : Zn2WriteResFpuPair<WriteShuffle, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteShuffleX, [Zn2FPU], 1>;
@@ -424,8 +427,8 @@ defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : Zn2WriteResFpuPair<WritePHMINPOS, [Zn2FPU0], 4>;
// Vector Shift Operations
-defm : Zn2WriteResFpuPair<WriteVarVecShift, [Zn2FPU12], 1>;
-defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU12], 1>;
+defm : Zn2WriteResFpuPair<WriteVarVecShift, [Zn2FPU12], 3>;
+defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU12], 3>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
// Vector insert/extract operations.
@@ -469,6 +472,12 @@ defm : Zn2WriteResFpuPair<WriteFVarShuffle256, [Zn2FPU], 100>;
def Zn2WriteMicrocoded : SchedWriteRes<[]> {
let Latency = 100;
}
+defm : Zn2WriteResPair<WriteDPPS, [], 15>;
+defm : Zn2WriteResPair<WriteFHAdd, [], 7>;
+defm : Zn2WriteResPair<WriteFHAddY, [], 7>;
+defm : Zn2WriteResPair<WritePHAdd, [], 3>;
+defm : Zn2WriteResPair<WritePHAddX, [], 3>;
+defm : Zn2WriteResPair<WritePHAddY, [], 3>;
def : SchedAlias<WriteMicrocoded, Zn2WriteMicrocoded>;
def : SchedAlias<WriteFCMOV, Zn2WriteMicrocoded>;
@@ -517,14 +526,14 @@ def Zn2WriteXCHG : SchedWriteRes<[Zn2ALU]> {
let NumMicroOps = 2;
}
-def : InstRW<[Zn2WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
+def : InstRW<[Zn2WriteXCHG], (instregex "^XCHG(8|16|32|64)rr", "^XCHG(16|32|64)ar")>;
// r,m.
def Zn2WriteXCHGrm : SchedWriteRes<[Zn2AGU, Zn2ALU]> {
let Latency = 5;
let NumMicroOps = 2;
}
-def : InstRW<[Zn2WriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>;
+def : InstRW<[Zn2WriteXCHGrm, ReadAfterLd], (instregex "^XCHG(8|16|32|64)rm")>;
def : InstRW<[WriteMicrocoded], (instrs XLAT)>;
@@ -594,8 +603,11 @@ def : InstRW<[WriteALULd],
def Zn2WriteMul16 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
let Latency = 3;
}
+def Zn2WriteMul16Imm : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
+ let Latency = 4;
+}
def : SchedAlias<WriteIMul16, Zn2WriteMul16>;
-def : SchedAlias<WriteIMul16Imm, Zn2WriteMul16>;
+def : SchedAlias<WriteIMul16Imm, Zn2WriteMul16Imm>;
def : SchedAlias<WriteIMul16Reg, Zn2WriteMul16>;
// m16.
@@ -1001,6 +1013,7 @@ def : InstRW<[WriteMicrocoded], (instrs FNINIT)>;
// mm <- mm.
def Zn2WriteFPU12 : SchedWriteRes<[Zn2FPU12]> ;
def Zn2WriteFPU12Y : SchedWriteRes<[Zn2FPU12]> {
+ let Latency = 4;
let NumMicroOps = 2;
}
def Zn2WriteFPU12m : SchedWriteRes<[Zn2AGU, Zn2FPU12]> ;
@@ -1109,15 +1122,6 @@ def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
//-- Arithmetic instructions --//
-// HADD, HSUB PS/PD
-// PHADD|PHSUB (S) W/D.
-def : SchedAlias<WritePHAdd, Zn2WriteMicrocoded>;
-def : SchedAlias<WritePHAddLd, Zn2WriteMicrocoded>;
-def : SchedAlias<WritePHAddX, Zn2WriteMicrocoded>;
-def : SchedAlias<WritePHAddXLd, Zn2WriteMicrocoded>;
-def : SchedAlias<WritePHAddY, Zn2WriteMicrocoded>;
-def : SchedAlias<WritePHAddYLd, Zn2WriteMicrocoded>;
-
// PCMPGTQ.
def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>;
def : InstRW<[Zn2WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
@@ -1137,8 +1141,12 @@ def : InstRW<[Zn2WritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
// PSLL,PSRL,PSRA W/D/Q.
// x,x / v,v,x.
-def Zn2WritePShift : SchedWriteRes<[Zn2FPU2]> ;
-def Zn2WritePShiftY : SchedWriteRes<[Zn2FPU2]> ;
+def Zn2WritePShift : SchedWriteRes<[Zn2FPU2]> {
+ let Latency = 3;
+}
+def Zn2WritePShiftY : SchedWriteRes<[Zn2FPU2]> {
+ let Latency = 3;
+}
// PSLL,PSRL DQ.
def : InstRW<[Zn2WritePShift], (instregex "(V?)PS(R|L)LDQri")>;
@@ -1280,7 +1288,7 @@ def Zn2WriteCVTDQ2PDr: SchedWriteRes<[Zn2FPU12,Zn2FPU3]> {
}
// CVTDQ2PD.
// x,x.
-def : InstRW<[Zn2WriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>;
+def : InstRW<[Zn2WriteCVTDQ2PDr], (instregex "(V)?CVTDQ2P(D|S)rr")>;
// Same as xmm
// y,x.
@@ -1290,9 +1298,9 @@ def : InstRW<[Zn2WriteCVTDQ2PDr], (instrs VCVTDQ2PSYrr)>;
def Zn2WriteCVTPD2DQr: SchedWriteRes<[Zn2FPU12, Zn2FPU3]> {
let Latency = 3;
}
-// CVT(T)PD2DQ.
+// CVT(T)P(D|S)2DQ.
// x,x.
-def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)PD2DQrr")>;
+def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)P(D|S)2DQrr")>;
def Zn2WriteCVTPD2DQLd: SchedWriteRes<[Zn2AGU,Zn2FPU12,Zn2FPU3]> {
let Latency = 10;
@@ -1322,7 +1330,7 @@ def : InstRW<[Zn2WriteCVTPS2PDr], (instrs MMX_CVTPI2PDirr)>;
def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>;
def Zn2WriteCVSTSI2SSr: SchedWriteRes<[Zn2FPU3]> {
- let Latency = 4;
+ let Latency = 3;
}
// same as CVTPD2DQr
@@ -1334,7 +1342,7 @@ def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>;
def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>;
def Zn2WriteCVSTSI2SDr: SchedWriteRes<[Zn2FPU013, Zn2FPU3]> {
- let Latency = 4;
+ let Latency = 3;
}
// CVTSI2SD.
// x,r32/64.
@@ -1376,7 +1384,7 @@ defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
//-- SSE4A instructions --//
// EXTRQ
def Zn2WriteEXTRQ: SchedWriteRes<[Zn2FPU12, Zn2FPU2]> {
- let Latency = 2;
+ let Latency = 3;
}
def : InstRW<[Zn2WriteEXTRQ], (instregex "EXTRQ")>;
@@ -1448,12 +1456,6 @@ def : InstRW<[Zn2WriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
//-- Arithmetic instructions --//
-// HADD, HSUB PS/PD
-def : SchedAlias<WriteFHAdd, Zn2WriteMicrocoded>;
-def : SchedAlias<WriteFHAddLd, Zn2WriteMicrocoded>;
-def : SchedAlias<WriteFHAddY, Zn2WriteMicrocoded>;
-def : SchedAlias<WriteFHAddYLd, Zn2WriteMicrocoded>;
-
// VDIVPS.
// TODO - convert to Zn2WriteResFpuPair
// y,y,y.
@@ -1490,11 +1492,9 @@ def : SchedAlias<WriteFDiv64YLd, Zn2WriteVDIVPDYLd>;
// DPPS.
// x,x,i / v,v,v,i.
-def : SchedAlias<WriteDPPS, Zn2WriteMicrocoded>;
def : SchedAlias<WriteDPPSY, Zn2WriteMicrocoded>;
// x,m,i / v,v,m,i.
-def : SchedAlias<WriteDPPSLd, Zn2WriteMicrocoded>;
def : SchedAlias<WriteDPPSYLd,Zn2WriteMicrocoded>;
// DPPD.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index 1ae8df977f83..ce8d1d464da9 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -15,6 +15,7 @@
#include "X86InstrInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DerivedTypes.h"
@@ -45,7 +46,7 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
- SDValue Size, unsigned Align, bool isVolatile,
+ SDValue Size, Align Alignment, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
const X86Subtarget &Subtarget =
@@ -65,7 +66,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
// If not DWORD aligned or size is more than the threshold, call the library.
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
- if ((Align & 3) != 0 || !ConstantSize ||
+ if (Alignment < Align(4) || !ConstantSize ||
ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) {
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
@@ -111,28 +112,27 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
uint64_t Val = ValC->getZExtValue() & 255;
// If the value is a constant, then we can potentially use larger sets.
- switch (Align & 3) {
- case 2: // WORD aligned
- AVT = MVT::i16;
- ValReg = X86::AX;
- Val = (Val << 8) | Val;
- break;
- case 0: // DWORD aligned
+ if (Alignment > Align(2)) {
+ // DWORD aligned
AVT = MVT::i32;
ValReg = X86::EAX;
Val = (Val << 8) | Val;
Val = (Val << 16) | Val;
- if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
+ if (Subtarget.is64Bit() && Alignment > Align(8)) { // QWORD aligned
AVT = MVT::i64;
ValReg = X86::RAX;
Val = (Val << 32) | Val;
}
- break;
- default: // Byte aligned
+ } else if (Alignment == Align(2)) {
+ // WORD aligned
+ AVT = MVT::i16;
+ ValReg = X86::AX;
+ Val = (Val << 8) | Val;
+ } else {
+ // Byte aligned
AVT = MVT::i8;
ValReg = X86::AL;
Count = DAG.getIntPtrConstant(SizeVal, dl);
- break;
}
if (AVT.bitsGT(MVT::i8)) {
@@ -169,13 +169,12 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
EVT AddrVT = Dst.getValueType();
EVT SizeVT = Size.getValueType();
- Chain = DAG.getMemset(Chain, dl,
- DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
- DAG.getConstant(Offset, dl, AddrVT)),
- Val,
- DAG.getConstant(BytesLeft, dl, SizeVT),
- Align, isVolatile, false,
- DstPtrInfo.getWithOffset(Offset));
+ Chain =
+ DAG.getMemset(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+ DAG.getConstant(Offset, dl, AddrVT)),
+ Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
+ isVolatile, false, DstPtrInfo.getWithOffset(Offset));
}
// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@@ -283,7 +282,7 @@ static SDValue emitConstantSizeRepmov(
Chain, dl,
DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
- DAG.getConstant(BytesLeft, dl, SizeVT), Align, isVolatile,
+ DAG.getConstant(BytesLeft, dl, SizeVT), llvm::Align(Align), isVolatile,
/*AlwaysInline*/ true, /*isTailCall*/ false,
DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
@@ -291,7 +290,7 @@ static SDValue emitConstantSizeRepmov(
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
// If to a segment-relative address space, use the default lowering.
if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
@@ -309,10 +308,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
/// Handle constant sizes,
if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
- return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
- ConstantSize->getZExtValue(),
- Size.getValueType(), Align, isVolatile,
- AlwaysInline, DstPtrInfo, SrcPtrInfo);
+ return emitConstantSizeRepmov(
+ DAG, Subtarget, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+ Size.getValueType(), Alignment.value(), isVolatile, AlwaysInline,
+ DstPtrInfo, SrcPtrInfo);
return SDValue();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.h
index 0f2d979f91e3..dac62973636c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SelectionDAGInfo.h
@@ -14,14 +14,9 @@
#define LLVM_LIB_TARGET_X86_X86SELECTIONDAGINFO_H
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
namespace llvm {
-class X86TargetLowering;
-class X86TargetMachine;
-class X86Subtarget;
-
class X86SelectionDAGInfo : public SelectionDAGTargetInfo {
/// Returns true if it is possible for the base register to conflict with the
/// given set of clobbers for a memory intrinsic.
@@ -33,13 +28,14 @@ public:
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
+ SDValue Size, Align Alignment,
+ bool isVolatile,
MachinePointerInfo DstPtrInfo) const override;
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
- bool AlwaysInline,
+ SDValue Size, Align Alignment,
+ bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
};
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index a202fc63637b..de528299654c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -11,8 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "Utils/X86ShuffleDecode.h"
+#include "X86ShuffleDecodeConstantPool.h"
+#include "MCTargetDesc/X86ShuffleDecode.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constants.h"
//===----------------------------------------------------------------------===//
@@ -34,17 +36,17 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
//
// <4 x i32> <i32 -2147483648, i32 -2147483648,
// i32 -2147483648, i32 -2147483648>
- Type *CstTy = C->getType();
- if (!CstTy->isVectorTy())
+ auto *CstTy = dyn_cast<FixedVectorType>(C->getType());
+ if (!CstTy)
return false;
- Type *CstEltTy = CstTy->getVectorElementType();
+ Type *CstEltTy = CstTy->getElementType();
if (!CstEltTy->isIntegerTy())
return false;
unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
- unsigned NumCstElts = CstTy->getVectorNumElements();
+ unsigned NumCstElts = CstTy->getNumElements();
assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
"Unaligned shuffle mask size");
@@ -185,13 +187,12 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
}
void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
- unsigned Width,
- SmallVectorImpl<int> &ShuffleMask) {
+ unsigned Width, SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
(void)MaskTySize;
- assert((MaskTySize == 128 || MaskTySize == 256) &&
- Width >= MaskTySize && "Unexpected vector size.");
+ assert((MaskTySize == 128 || MaskTySize == 256) && Width >= MaskTySize &&
+ "Unexpected vector size.");
// The shuffle mask requires elements the same size as the target.
APInt UndefElts;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h b/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
index 296341517579..51229a69a626 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
@@ -14,15 +14,13 @@
#ifndef LLVM_LIB_TARGET_X86_X86SHUFFLEDECODECONSTANTPOOL_H
#define LLVM_LIB_TARGET_X86_X86SHUFFLEDECODECONSTANTPOOL_H
-#include "llvm/ADT/SmallVector.h"
-
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
//===----------------------------------------------------------------------===//
namespace llvm {
class Constant;
-class MVT;
+template <typename T> class SmallVectorImpl;
/// Decode a PSHUFB mask from an IR-level vector constant.
void DecodePSHUFBMask(const Constant *C, unsigned Width,
@@ -33,9 +31,8 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, unsigned Width,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMILP2 variable mask from an IR-level vector constant.
-void DecodeVPERMIL2PMask(const Constant *C, unsigned MatchImm, unsigned ElSize,
- unsigned Width,
- SmallVectorImpl<int> &ShuffleMask);
+void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
+ unsigned Width, SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPPERM variable mask from an IR-level vector constant.
void DecodeVPPERMMask(const Constant *C, unsigned Width,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
new file mode 100644
index 000000000000..7e91c37367d2
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
@@ -0,0 +1,181 @@
+//===-- X86SpeculativeExecutionSideEffectSuppression.cpp ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file contains the X86 implementation of the speculative execution side
+/// effect suppression mitigation.
+///
+/// This must be used with the -mlvi-cfi flag in order to mitigate indirect
+/// branches and returns.
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-seses"
+
+STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
+
+static cl::opt<bool> EnableSpeculativeExecutionSideEffectSuppression(
+ "x86-seses-enable-without-lvi-cfi",
+ cl::desc("Force enable speculative execution side effect suppression. "
+ "(Note: User must pass -mlvi-cfi in order to mitigate indirect "
+ "branches and returns.)"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> OneLFENCEPerBasicBlock(
+ "x86-seses-one-lfence-per-bb",
+ cl::desc(
+ "Omit all lfences other than the first to be placed in a basic block."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> OnlyLFENCENonConst(
+ "x86-seses-only-lfence-non-const",
+ cl::desc("Only lfence before groups of terminators where at least one "
+ "branch instruction has an input to the addressing mode that is a "
+ "register other than %rip."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+ OmitBranchLFENCEs("x86-seses-omit-branch-lfences",
+ cl::desc("Omit all lfences before branch instructions."),
+ cl::init(false), cl::Hidden);
+
+namespace {
+
+class X86SpeculativeExecutionSideEffectSuppression
+ : public MachineFunctionPass {
+public:
+ X86SpeculativeExecutionSideEffectSuppression() : MachineFunctionPass(ID) {}
+
+ static char ID;
+ StringRef getPassName() const override {
+ return "X86 Speculative Execution Side Effect Suppression";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // namespace
+
+char X86SpeculativeExecutionSideEffectSuppression::ID = 0;
+
+// This function returns whether the passed instruction uses a memory addressing
+// mode that is constant. We treat all memory addressing modes that read
+// from a register that is not %rip as non-constant. Note that the use
+// of the EFLAGS register results in an addressing mode being considered
+// non-constant, therefore all JCC instructions will return false from this
+// function since one of their operands will always be the EFLAGS register.
+static bool hasConstantAddressingMode(const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.uses())
+ if (MO.isReg() && X86::RIP != MO.getReg())
+ return false;
+ return true;
+}
+
+bool X86SpeculativeExecutionSideEffectSuppression::runOnMachineFunction(
+ MachineFunction &MF) {
+
+ const auto &OptLevel = MF.getTarget().getOptLevel();
+ const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
+
+ // Check whether SESES needs to run as the fallback for LVI at O0, whether the
+ // user explicitly passed an SESES flag, or whether the SESES target feature
+ // was set.
+ if (!EnableSpeculativeExecutionSideEffectSuppression &&
+ !(Subtarget.useLVILoadHardening() && OptLevel == CodeGenOpt::None) &&
+ !Subtarget.useSpeculativeExecutionSideEffectSuppression())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
+ << " **********\n");
+ bool Modified = false;
+ const X86InstrInfo *TII = Subtarget.getInstrInfo();
+ for (MachineBasicBlock &MBB : MF) {
+ MachineInstr *FirstTerminator = nullptr;
+ // Keep track of whether the previous instruction was an LFENCE to avoid
+ // adding redundant LFENCEs.
+ bool PrevInstIsLFENCE = false;
+ for (auto &MI : MBB) {
+
+ if (MI.getOpcode() == X86::LFENCE) {
+ PrevInstIsLFENCE = true;
+ continue;
+ }
+ // We want to put an LFENCE before any instruction that
+ // may load or store. This LFENCE is intended to avoid leaking any secret
+ // data due to a given load or store. This results in closing the cache
+ // and memory timing side channels. We will treat terminators that load
+ // or store separately.
+ if (MI.mayLoadOrStore() && !MI.isTerminator()) {
+ if (!PrevInstIsLFENCE) {
+ BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE));
+ NumLFENCEsInserted++;
+ Modified = true;
+ }
+ if (OneLFENCEPerBasicBlock)
+ break;
+ }
+ // The following section will be LFENCEing before groups of terminators
+ // that include branches. This will close the branch prediction side
+ // channels since we will prevent code executing after misspeculation as
+ // a result of the LFENCEs placed with this logic.
+
+ // Keep track of the first terminator in a basic block since if we need
+ // to LFENCE the terminators in this basic block we must add the
+ // instruction before the first terminator in the basic block (as
+ // opposed to before the terminator that indicates an LFENCE is
+ // required). An example of why this is necessary is that the
+ // X86InstrInfo::analyzeBranch method assumes all terminators are grouped
+ // together and terminates it's analysis once the first non-termintor
+ // instruction is found.
+ if (MI.isTerminator() && FirstTerminator == nullptr)
+ FirstTerminator = &MI;
+
+ // Look for branch instructions that will require an LFENCE to be put
+ // before this basic block's terminators.
+ if (!MI.isBranch() || OmitBranchLFENCEs) {
+ // This isn't a branch or we're not putting LFENCEs before branches.
+ PrevInstIsLFENCE = false;
+ continue;
+ }
+
+ if (OnlyLFENCENonConst && hasConstantAddressingMode(MI)) {
+ // This is a branch, but it only has constant addressing mode and we're
+ // not adding LFENCEs before such branches.
+ PrevInstIsLFENCE = false;
+ continue;
+ }
+
+ // This branch requires adding an LFENCE.
+ if (!PrevInstIsLFENCE) {
+ BuildMI(MBB, FirstTerminator, DebugLoc(), TII->get(X86::LFENCE));
+ NumLFENCEsInserted++;
+ Modified = true;
+ }
+ break;
+ }
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createX86SpeculativeExecutionSideEffectSuppression() {
+ return new X86SpeculativeExecutionSideEffectSuppression();
+}
+
+INITIALIZE_PASS(X86SpeculativeExecutionSideEffectSuppression, "x86-seses",
+ "X86 Speculative Execution Side Effect Suppression", false,
+ false)
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index 9aa47c532e82..fe5b9a05f811 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -53,6 +53,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -872,10 +873,10 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
case X86::FARCALL16m:
case X86::FARCALL32m:
- case X86::FARCALL64:
+ case X86::FARCALL64m:
case X86::FARJMP16m:
case X86::FARJMP32m:
- case X86::FARJMP64:
+ case X86::FARJMP64m:
// We cannot mitigate far jumps or calls, but we also don't expect them
// to be vulnerable to Spectre v1.2 style attacks.
continue;
@@ -920,6 +921,11 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
// Now stitch the new instructions into place and erase the old one.
for (auto *NewMI : NewMIs)
MBB.insert(MI.getIterator(), NewMI);
+
+ // Update the call site info.
+ if (MI.isCandidateForCallSiteEntry())
+ MF.eraseCallSiteInfo(&MI);
+
MI.eraseFromParent();
LLVM_DEBUG({
dbgs() << "Unfolded load successfully into:\n";
@@ -993,7 +999,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
case X86::FARJMP16m:
case X86::FARJMP32m:
- case X86::FARJMP64:
+ case X86::FARJMP64m:
// We cannot mitigate far jumps or calls, but we also don't expect them
// to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
continue;
@@ -1195,394 +1201,13 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
return CMovs;
}
-/// Returns true if the instruction has no behavior (specified or otherwise)
-/// that is based on the value of any of its register operands
-///
-/// A classical example of something that is inherently not data invariant is an
-/// indirect jump -- the destination is loaded into icache based on the bits set
-/// in the jump destination register.
-///
-/// FIXME: This should become part of our instruction tables.
-static bool isDataInvariant(MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- // By default, assume that the instruction is not data invariant.
- return false;
-
- // Some target-independent operations that trivially lower to data-invariant
- // instructions.
- case TargetOpcode::COPY:
- case TargetOpcode::INSERT_SUBREG:
- case TargetOpcode::SUBREG_TO_REG:
- return true;
-
- // On x86 it is believed that imul is constant time w.r.t. the loaded data.
- // However, they set flags and are perhaps the most surprisingly constant
- // time operations so we call them out here separately.
- case X86::IMUL16rr:
- case X86::IMUL16rri8:
- case X86::IMUL16rri:
- case X86::IMUL32rr:
- case X86::IMUL32rri8:
- case X86::IMUL32rri:
- case X86::IMUL64rr:
- case X86::IMUL64rri32:
- case X86::IMUL64rri8:
-
- // Bit scanning and counting instructions that are somewhat surprisingly
- // constant time as they scan across bits and do other fairly complex
- // operations like popcnt, but are believed to be constant time on x86.
- // However, these set flags.
- case X86::BSF16rr:
- case X86::BSF32rr:
- case X86::BSF64rr:
- case X86::BSR16rr:
- case X86::BSR32rr:
- case X86::BSR64rr:
- case X86::LZCNT16rr:
- case X86::LZCNT32rr:
- case X86::LZCNT64rr:
- case X86::POPCNT16rr:
- case X86::POPCNT32rr:
- case X86::POPCNT64rr:
- case X86::TZCNT16rr:
- case X86::TZCNT32rr:
- case X86::TZCNT64rr:
-
- // Bit manipulation instructions are effectively combinations of basic
- // arithmetic ops, and should still execute in constant time. These also
- // set flags.
- case X86::BLCFILL32rr:
- case X86::BLCFILL64rr:
- case X86::BLCI32rr:
- case X86::BLCI64rr:
- case X86::BLCIC32rr:
- case X86::BLCIC64rr:
- case X86::BLCMSK32rr:
- case X86::BLCMSK64rr:
- case X86::BLCS32rr:
- case X86::BLCS64rr:
- case X86::BLSFILL32rr:
- case X86::BLSFILL64rr:
- case X86::BLSI32rr:
- case X86::BLSI64rr:
- case X86::BLSIC32rr:
- case X86::BLSIC64rr:
- case X86::BLSMSK32rr:
- case X86::BLSMSK64rr:
- case X86::BLSR32rr:
- case X86::BLSR64rr:
- case X86::TZMSK32rr:
- case X86::TZMSK64rr:
-
- // Bit extracting and clearing instructions should execute in constant time,
- // and set flags.
- case X86::BEXTR32rr:
- case X86::BEXTR64rr:
- case X86::BEXTRI32ri:
- case X86::BEXTRI64ri:
- case X86::BZHI32rr:
- case X86::BZHI64rr:
-
- // Shift and rotate.
- case X86::ROL8r1: case X86::ROL16r1: case X86::ROL32r1: case X86::ROL64r1:
- case X86::ROL8rCL: case X86::ROL16rCL: case X86::ROL32rCL: case X86::ROL64rCL:
- case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri:
- case X86::ROR8r1: case X86::ROR16r1: case X86::ROR32r1: case X86::ROR64r1:
- case X86::ROR8rCL: case X86::ROR16rCL: case X86::ROR32rCL: case X86::ROR64rCL:
- case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri:
- case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1: case X86::SAR64r1:
- case X86::SAR8rCL: case X86::SAR16rCL: case X86::SAR32rCL: case X86::SAR64rCL:
- case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri:
- case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1: case X86::SHL64r1:
- case X86::SHL8rCL: case X86::SHL16rCL: case X86::SHL32rCL: case X86::SHL64rCL:
- case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri:
- case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1: case X86::SHR64r1:
- case X86::SHR8rCL: case X86::SHR16rCL: case X86::SHR32rCL: case X86::SHR64rCL:
- case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri:
- case X86::SHLD16rrCL: case X86::SHLD32rrCL: case X86::SHLD64rrCL:
- case X86::SHLD16rri8: case X86::SHLD32rri8: case X86::SHLD64rri8:
- case X86::SHRD16rrCL: case X86::SHRD32rrCL: case X86::SHRD64rrCL:
- case X86::SHRD16rri8: case X86::SHRD32rri8: case X86::SHRD64rri8:
-
- // Basic arithmetic is constant time on the input but does set flags.
- case X86::ADC8rr: case X86::ADC8ri:
- case X86::ADC16rr: case X86::ADC16ri: case X86::ADC16ri8:
- case X86::ADC32rr: case X86::ADC32ri: case X86::ADC32ri8:
- case X86::ADC64rr: case X86::ADC64ri8: case X86::ADC64ri32:
- case X86::ADD8rr: case X86::ADD8ri:
- case X86::ADD16rr: case X86::ADD16ri: case X86::ADD16ri8:
- case X86::ADD32rr: case X86::ADD32ri: case X86::ADD32ri8:
- case X86::ADD64rr: case X86::ADD64ri8: case X86::ADD64ri32:
- case X86::AND8rr: case X86::AND8ri:
- case X86::AND16rr: case X86::AND16ri: case X86::AND16ri8:
- case X86::AND32rr: case X86::AND32ri: case X86::AND32ri8:
- case X86::AND64rr: case X86::AND64ri8: case X86::AND64ri32:
- case X86::OR8rr: case X86::OR8ri:
- case X86::OR16rr: case X86::OR16ri: case X86::OR16ri8:
- case X86::OR32rr: case X86::OR32ri: case X86::OR32ri8:
- case X86::OR64rr: case X86::OR64ri8: case X86::OR64ri32:
- case X86::SBB8rr: case X86::SBB8ri:
- case X86::SBB16rr: case X86::SBB16ri: case X86::SBB16ri8:
- case X86::SBB32rr: case X86::SBB32ri: case X86::SBB32ri8:
- case X86::SBB64rr: case X86::SBB64ri8: case X86::SBB64ri32:
- case X86::SUB8rr: case X86::SUB8ri:
- case X86::SUB16rr: case X86::SUB16ri: case X86::SUB16ri8:
- case X86::SUB32rr: case X86::SUB32ri: case X86::SUB32ri8:
- case X86::SUB64rr: case X86::SUB64ri8: case X86::SUB64ri32:
- case X86::XOR8rr: case X86::XOR8ri:
- case X86::XOR16rr: case X86::XOR16ri: case X86::XOR16ri8:
- case X86::XOR32rr: case X86::XOR32ri: case X86::XOR32ri8:
- case X86::XOR64rr: case X86::XOR64ri8: case X86::XOR64ri32:
- // Arithmetic with just 32-bit and 64-bit variants and no immediates.
- case X86::ADCX32rr: case X86::ADCX64rr:
- case X86::ADOX32rr: case X86::ADOX64rr:
- case X86::ANDN32rr: case X86::ANDN64rr:
- // Unary arithmetic operations.
- case X86::DEC8r: case X86::DEC16r: case X86::DEC32r: case X86::DEC64r:
- case X86::INC8r: case X86::INC16r: case X86::INC32r: case X86::INC64r:
- case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r:
- // Check whether the EFLAGS implicit-def is dead. We assume that this will
- // always find the implicit-def because this code should only be reached
- // for instructions that do in fact implicitly def this.
- if (!MI.findRegisterDefOperand(X86::EFLAGS)->isDead()) {
- // If we would clobber EFLAGS that are used, just bail for now.
- LLVM_DEBUG(dbgs() << " Unable to harden post-load due to EFLAGS: ";
- MI.dump(); dbgs() << "\n");
- return false;
- }
-
- // Otherwise, fallthrough to handle these the same as instructions that
- // don't set EFLAGS.
- LLVM_FALLTHROUGH;
-
- // Unlike other arithmetic, NOT doesn't set EFLAGS.
- case X86::NOT8r: case X86::NOT16r: case X86::NOT32r: case X86::NOT64r:
-
- // Various move instructions used to zero or sign extend things. Note that we
- // intentionally don't support the _NOREX variants as we can't handle that
- // register constraint anyways.
- case X86::MOVSX16rr8:
- case X86::MOVSX32rr8: case X86::MOVSX32rr16:
- case X86::MOVSX64rr8: case X86::MOVSX64rr16: case X86::MOVSX64rr32:
- case X86::MOVZX16rr8:
- case X86::MOVZX32rr8: case X86::MOVZX32rr16:
- case X86::MOVZX64rr8: case X86::MOVZX64rr16:
- case X86::MOV32rr:
-
- // Arithmetic instructions that are both constant time and don't set flags.
- case X86::RORX32ri:
- case X86::RORX64ri:
- case X86::SARX32rr:
- case X86::SARX64rr:
- case X86::SHLX32rr:
- case X86::SHLX64rr:
- case X86::SHRX32rr:
- case X86::SHRX64rr:
-
- // LEA doesn't actually access memory, and its arithmetic is constant time.
- case X86::LEA16r:
- case X86::LEA32r:
- case X86::LEA64_32r:
- case X86::LEA64r:
- return true;
- }
-}
-
-/// Returns true if the instruction has no behavior (specified or otherwise)
-/// that is based on the value loaded from memory or the value of any
-/// non-address register operands.
-///
-/// For example, if the latency of the instruction is dependent on the
-/// particular bits set in any of the registers *or* any of the bits loaded from
-/// memory.
-///
-/// A classical example of something that is inherently not data invariant is an
-/// indirect jump -- the destination is loaded into icache based on the bits set
-/// in the jump destination register.
-///
-/// FIXME: This should become part of our instruction tables.
-static bool isDataInvariantLoad(MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- // By default, assume that the load will immediately leak.
- return false;
-
- // On x86 it is believed that imul is constant time w.r.t. the loaded data.
- // However, they set flags and are perhaps the most surprisingly constant
- // time operations so we call them out here separately.
- case X86::IMUL16rm:
- case X86::IMUL16rmi8:
- case X86::IMUL16rmi:
- case X86::IMUL32rm:
- case X86::IMUL32rmi8:
- case X86::IMUL32rmi:
- case X86::IMUL64rm:
- case X86::IMUL64rmi32:
- case X86::IMUL64rmi8:
-
- // Bit scanning and counting instructions that are somewhat surprisingly
- // constant time as they scan across bits and do other fairly complex
- // operations like popcnt, but are believed to be constant time on x86.
- // However, these set flags.
- case X86::BSF16rm:
- case X86::BSF32rm:
- case X86::BSF64rm:
- case X86::BSR16rm:
- case X86::BSR32rm:
- case X86::BSR64rm:
- case X86::LZCNT16rm:
- case X86::LZCNT32rm:
- case X86::LZCNT64rm:
- case X86::POPCNT16rm:
- case X86::POPCNT32rm:
- case X86::POPCNT64rm:
- case X86::TZCNT16rm:
- case X86::TZCNT32rm:
- case X86::TZCNT64rm:
-
- // Bit manipulation instructions are effectively combinations of basic
- // arithmetic ops, and should still execute in constant time. These also
- // set flags.
- case X86::BLCFILL32rm:
- case X86::BLCFILL64rm:
- case X86::BLCI32rm:
- case X86::BLCI64rm:
- case X86::BLCIC32rm:
- case X86::BLCIC64rm:
- case X86::BLCMSK32rm:
- case X86::BLCMSK64rm:
- case X86::BLCS32rm:
- case X86::BLCS64rm:
- case X86::BLSFILL32rm:
- case X86::BLSFILL64rm:
- case X86::BLSI32rm:
- case X86::BLSI64rm:
- case X86::BLSIC32rm:
- case X86::BLSIC64rm:
- case X86::BLSMSK32rm:
- case X86::BLSMSK64rm:
- case X86::BLSR32rm:
- case X86::BLSR64rm:
- case X86::TZMSK32rm:
- case X86::TZMSK64rm:
-
- // Bit extracting and clearing instructions should execute in constant time,
- // and set flags.
- case X86::BEXTR32rm:
- case X86::BEXTR64rm:
- case X86::BEXTRI32mi:
- case X86::BEXTRI64mi:
- case X86::BZHI32rm:
- case X86::BZHI64rm:
-
- // Basic arithmetic is constant time on the input but does set flags.
- case X86::ADC8rm:
- case X86::ADC16rm:
- case X86::ADC32rm:
- case X86::ADC64rm:
- case X86::ADCX32rm:
- case X86::ADCX64rm:
- case X86::ADD8rm:
- case X86::ADD16rm:
- case X86::ADD32rm:
- case X86::ADD64rm:
- case X86::ADOX32rm:
- case X86::ADOX64rm:
- case X86::AND8rm:
- case X86::AND16rm:
- case X86::AND32rm:
- case X86::AND64rm:
- case X86::ANDN32rm:
- case X86::ANDN64rm:
- case X86::OR8rm:
- case X86::OR16rm:
- case X86::OR32rm:
- case X86::OR64rm:
- case X86::SBB8rm:
- case X86::SBB16rm:
- case X86::SBB32rm:
- case X86::SBB64rm:
- case X86::SUB8rm:
- case X86::SUB16rm:
- case X86::SUB32rm:
- case X86::SUB64rm:
- case X86::XOR8rm:
- case X86::XOR16rm:
- case X86::XOR32rm:
- case X86::XOR64rm:
- // Check whether the EFLAGS implicit-def is dead. We assume that this will
- // always find the implicit-def because this code should only be reached
- // for instructions that do in fact implicitly def this.
- if (!MI.findRegisterDefOperand(X86::EFLAGS)->isDead()) {
- // If we would clobber EFLAGS that are used, just bail for now.
- LLVM_DEBUG(dbgs() << " Unable to harden post-load due to EFLAGS: ";
- MI.dump(); dbgs() << "\n");
- return false;
- }
-
- // Otherwise, fallthrough to handle these the same as instructions that
- // don't set EFLAGS.
- LLVM_FALLTHROUGH;
-
- // Integer multiply w/o affecting flags is still believed to be constant
- // time on x86. Called out separately as this is among the most surprising
- // instructions to exhibit that behavior.
- case X86::MULX32rm:
- case X86::MULX64rm:
-
- // Arithmetic instructions that are both constant time and don't set flags.
- case X86::RORX32mi:
- case X86::RORX64mi:
- case X86::SARX32rm:
- case X86::SARX64rm:
- case X86::SHLX32rm:
- case X86::SHLX64rm:
- case X86::SHRX32rm:
- case X86::SHRX64rm:
-
- // Conversions are believed to be constant time and don't set flags.
- case X86::CVTTSD2SI64rm: case X86::VCVTTSD2SI64rm: case X86::VCVTTSD2SI64Zrm:
- case X86::CVTTSD2SIrm: case X86::VCVTTSD2SIrm: case X86::VCVTTSD2SIZrm:
- case X86::CVTTSS2SI64rm: case X86::VCVTTSS2SI64rm: case X86::VCVTTSS2SI64Zrm:
- case X86::CVTTSS2SIrm: case X86::VCVTTSS2SIrm: case X86::VCVTTSS2SIZrm:
- case X86::CVTSI2SDrm: case X86::VCVTSI2SDrm: case X86::VCVTSI2SDZrm:
- case X86::CVTSI2SSrm: case X86::VCVTSI2SSrm: case X86::VCVTSI2SSZrm:
- case X86::CVTSI642SDrm: case X86::VCVTSI642SDrm: case X86::VCVTSI642SDZrm:
- case X86::CVTSI642SSrm: case X86::VCVTSI642SSrm: case X86::VCVTSI642SSZrm:
- case X86::CVTSS2SDrm: case X86::VCVTSS2SDrm: case X86::VCVTSS2SDZrm:
- case X86::CVTSD2SSrm: case X86::VCVTSD2SSrm: case X86::VCVTSD2SSZrm:
- // AVX512 added unsigned integer conversions.
- case X86::VCVTTSD2USI64Zrm:
- case X86::VCVTTSD2USIZrm:
- case X86::VCVTTSS2USI64Zrm:
- case X86::VCVTTSS2USIZrm:
- case X86::VCVTUSI2SDZrm:
- case X86::VCVTUSI642SDZrm:
- case X86::VCVTUSI2SSZrm:
- case X86::VCVTUSI642SSZrm:
-
- // Loads to register don't set flags.
- case X86::MOV8rm:
- case X86::MOV8rm_NOREX:
- case X86::MOV16rm:
- case X86::MOV32rm:
- case X86::MOV64rm:
- case X86::MOVSX16rm8:
- case X86::MOVSX32rm16:
- case X86::MOVSX32rm8:
- case X86::MOVSX32rm8_NOREX:
- case X86::MOVSX64rm16:
- case X86::MOVSX64rm32:
- case X86::MOVSX64rm8:
- case X86::MOVZX16rm8:
- case X86::MOVZX32rm16:
- case X86::MOVZX32rm8:
- case X86::MOVZX32rm8_NOREX:
- case X86::MOVZX64rm16:
- case X86::MOVZX64rm8:
- return true;
+// Returns true if the MI has EFLAGS as a register def operand and it's live,
+// otherwise it returns false
+static bool isEFLAGSDefLive(const MachineInstr &MI) {
+ if (const MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
+ return !DefOp->isDead();
}
+ return false;
}
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
@@ -1740,8 +1365,9 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
// address registers, queue it up to be hardened post-load. Notably,
// even once hardened this won't introduce a useful dependency that
// could prune out subsequent loads.
- if (EnablePostLoadHardening && isDataInvariantLoad(MI) &&
- MI.getDesc().getNumDefs() == 1 && MI.getOperand(0).isReg() &&
+ if (EnablePostLoadHardening && X86InstrInfo::isDataInvariantLoad(MI) &&
+ !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
+ MI.getOperand(0).isReg() &&
canHardenRegister(MI.getOperand(0).getReg()) &&
!HardenedAddrRegs.count(BaseReg) &&
!HardenedAddrRegs.count(IndexReg)) {
@@ -1795,9 +1421,10 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
if (HardenPostLoad.erase(&MI)) {
assert(!MI.isCall() && "Must not try to post-load harden a call!");
- // If this is a data-invariant load, we want to try and sink any
- // hardening as far as possible.
- if (isDataInvariantLoad(MI)) {
+ // If this is a data-invariant load and there is no EFLAGS
+ // interference, we want to try and sink any hardening as far as
+ // possible.
+ if (X86InstrInfo::isDataInvariantLoad(MI) && !isEFLAGSDefLive(MI)) {
// Sink the instruction we'll need to harden as far as we can down
// the graph.
MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
@@ -2085,9 +1712,9 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
// Broadcast our state into a vector register.
Register VStateReg = MRI->createVirtualRegister(OpRC);
- unsigned BroadcastOp =
- Is128Bit ? X86::VPBROADCASTQrZ128r
- : Is256Bit ? X86::VPBROADCASTQrZ256r : X86::VPBROADCASTQrZr;
+ unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
+ : Is256Bit ? X86::VPBROADCASTQrZ256rr
+ : X86::VPBROADCASTQrZrr;
auto BroadcastI =
BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
.addReg(StateReg);
@@ -2147,8 +1774,11 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
- assert(isDataInvariantLoad(InitialMI) &&
+ assert(X86InstrInfo::isDataInvariantLoad(InitialMI) &&
"Cannot get here with a non-invariant load!");
+ assert(!isEFLAGSDefLive(InitialMI) &&
+ "Cannot get here with a data invariant load "
+ "that interferes with EFLAGS!");
// See if we can sink hardening the loaded value.
auto SinkCheckToSingleUse =
@@ -2160,14 +1790,14 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
// own.
MachineInstr *SingleUseMI = nullptr;
for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
- // If we're already going to harden this use, it is data invariant and
- // within our block.
+ // If we're already going to harden this use, it is data invariant, it
+ // does not interfere with EFLAGS, and within our block.
if (HardenedInstrs.count(&UseMI)) {
- if (!isDataInvariantLoad(UseMI)) {
+ if (!X86InstrInfo::isDataInvariantLoad(UseMI) || isEFLAGSDefLive(UseMI)) {
// If we've already decided to harden a non-load, we must have sunk
// some other post-load hardened instruction to it and it must itself
// be data-invariant.
- assert(isDataInvariant(UseMI) &&
+ assert(X86InstrInfo::isDataInvariant(UseMI) &&
"Data variant instruction being hardened!");
continue;
}
@@ -2199,7 +1829,8 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
// If this single use isn't data invariant, isn't in this block, or has
// interfering EFLAGS, we can't sink the hardening to it.
- if (!isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent())
+ if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
+ isEFLAGSDefLive(UseMI))
return {};
// If this instruction defines multiple registers bail as we won't harden
@@ -2590,10 +2221,10 @@ void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
switch (MI.getOpcode()) {
case X86::FARCALL16m:
case X86::FARCALL32m:
- case X86::FARCALL64:
+ case X86::FARCALL64m:
case X86::FARJMP16m:
case X86::FARJMP32m:
- case X86::FARJMP64:
+ case X86::FARJMP64m:
// We don't need to harden either far calls or far jumps as they are
// safe from Spectre.
return;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
index 75c3a70b430a..975cbabb30fd 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -10,14 +10,13 @@
//
//===----------------------------------------------------------------------===//
+#include "X86Subtarget.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86.h"
-
#include "X86CallLowering.h"
#include "X86LegalizerInfo.h"
#include "X86MacroFusion.h"
#include "X86RegisterBankInfo.h"
-#include "X86Subtarget.h"
-#include "MCTargetDesc/X86BaseInfo.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
@@ -89,7 +88,9 @@ X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
// Medium is a hybrid: RIP-rel for code, GOTOFF for DSO local data.
case CodeModel::Medium:
- if (isa<Function>(GV))
+ // Constant pool and jump table handling pass a nullptr to this
+ // function so we need to use isa_and_nonnull.
+ if (isa_and_nonnull<Function>(GV))
return X86II::MO_NO_FLAG; // All code is RIP-relative
return X86II::MO_GOTOFF; // Local symbols use GOTOFF.
}
@@ -227,11 +228,11 @@ bool X86Subtarget::isLegalToCallImmediateAddr() const {
}
void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
- std::string CPUName = CPU;
+ std::string CPUName = std::string(CPU);
if (CPUName.empty())
CPUName = "generic";
- std::string FullFS = FS;
+ std::string FullFS = std::string(FS);
if (In64BitMode) {
// SSE2 should default to enabled in 64-bit mode, but can be turned off
// explicitly.
@@ -379,3 +380,7 @@ void X86Subtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(createX86MacroFusionDAGMutation());
}
+
+bool X86Subtarget::isPositionIndependent() const {
+ return TM.isPositionIndependent();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
index af5153243c8b..de45d357e3c2 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
@@ -17,15 +17,9 @@
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86SelectionDAGInfo.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/Target/TargetMachine.h"
#include <climits>
#include <memory>
@@ -34,7 +28,13 @@
namespace llvm {
+class CallLowering;
class GlobalValue;
+class InstructionSelector;
+class LegalizerInfo;
+class RegisterBankInfo;
+class StringRef;
+class TargetMachine;
/// The X86 backend supports a number of different styles of PIC.
///
@@ -258,6 +258,10 @@ protected:
bool InsertVZEROUPPER = false;
/// True if there is no performance penalty for writing NOPs with up to
+ /// 7 bytes.
+ bool HasFast7ByteNOP = false;
+
+ /// True if there is no performance penalty for writing NOPs with up to
/// 11 bytes.
bool HasFast11ByteNOP = false;
@@ -393,6 +397,17 @@ protected:
/// Processor supports PCONFIG instruction
bool HasPCONFIG = false;
+ /// Processor supports SERIALIZE instruction
+ bool HasSERIALIZE = false;
+
+ /// Processor supports TSXLDTRK instruction
+ bool HasTSXLDTRK = false;
+
+ /// Processor has AMX support
+ bool HasAMXTILE = false;
+ bool HasAMXBF16 = false;
+ bool HasAMXINT8 = false;
+
/// Processor has a single uop BEXTR implementation.
bool HasFastBEXTR = false;
@@ -427,6 +442,9 @@ protected:
/// POP+LFENCE+JMP sequence.
bool UseLVIControlFlowIntegrity = false;
+ /// Enable Speculative Execution Side Effect Suppression
+ bool UseSpeculativeExecutionSideEffectSuppression = false;
+
/// Insert LFENCE instructions to prevent data speculatively injected into
/// loads from being used maliciously.
bool UseLVILoadHardening = false;
@@ -637,8 +655,15 @@ public:
bool hasRTM() const { return HasRTM; }
bool hasADX() const { return HasADX; }
bool hasSHA() const { return HasSHA; }
- bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }
+ bool hasPRFCHW() const { return HasPRFCHW; }
bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
+ bool hasPrefetchW() const {
+ // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
+ // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
+ // it and KNL has another that prefetches to L2 cache. We assume the
+ // L1 version exists if the L2 version does.
+ return has3DNow() || hasPRFCHW() || hasPREFETCHWT1();
+ }
bool hasSSEPrefetch() const {
// We implicitly enable these when we have a write prefix supporting cache
// level OR if we have prfchw, but don't already have a read prefetch from
@@ -712,10 +737,15 @@ public:
bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
bool hasINVPCID() const { return HasINVPCID; }
bool hasENQCMD() const { return HasENQCMD; }
+ bool hasSERIALIZE() const { return HasSERIALIZE; }
+ bool hasTSXLDTRK() const { return HasTSXLDTRK; }
bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
bool useRetpolineIndirectBranches() const {
return UseRetpolineIndirectBranches;
}
+ bool hasAMXTILE() const { return HasAMXTILE; }
+ bool hasAMXBF16() const { return HasAMXBF16; }
+ bool hasAMXINT8() const { return HasAMXINT8; }
bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
// These are generic getters that OR together all of the thunk types
@@ -732,6 +762,9 @@ public:
bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
bool useLVILoadHardening() const { return UseLVILoadHardening; }
+ bool useSpeculativeExecutionSideEffectSuppression() const {
+ return UseSpeculativeExecutionSideEffectSuppression;
+ }
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
@@ -829,7 +862,7 @@ public:
return PICStyle == PICStyles::Style::StubPIC;
}
- bool isPositionIndependent() const { return TM.isPositionIndependent(); }
+ bool isPositionIndependent() const;
bool isCallingConvWin64(CallingConv::ID CC) const {
switch (CC) {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
index 9f639ffa22ec..7344116e14af 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -73,18 +73,22 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeEvexToVexInstPassPass(PR);
initializeFixupLEAPassPass(PR);
initializeFPSPass(PR);
+ initializeX86FixupSetCCPassPass(PR);
initializeX86CallFrameOptimizationPass(PR);
initializeX86CmovConverterPassPass(PR);
initializeX86ExpandPseudoPass(PR);
initializeX86ExecutionDomainFixPass(PR);
initializeX86DomainReassignmentPass(PR);
initializeX86AvoidSFBPassPass(PR);
+ initializeX86AvoidTrailingCallPassPass(PR);
initializeX86SpeculativeLoadHardeningPassPass(PR);
+ initializeX86SpeculativeExecutionSideEffectSuppressionPass(PR);
initializeX86FlagsCopyLoweringPassPass(PR);
initializeX86CondBrFoldingPassPass(PR);
initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
initializeX86LoadValueInjectionRetHardeningPassPass(PR);
initializeX86OptimizeLEAPassPass(PR);
+ initializeX86PartialReductionPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -94,19 +98,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return std::make_unique<TargetLoweringObjectFileMachO>();
}
- if (TT.isOSFreeBSD())
- return std::make_unique<X86FreeBSDTargetObjectFile>();
- if (TT.isOSLinux() || TT.isOSNaCl() || TT.isOSIAMCU())
- return std::make_unique<X86LinuxNaClTargetObjectFile>();
- if (TT.isOSSolaris())
- return std::make_unique<X86SolarisTargetObjectFile>();
- if (TT.isOSFuchsia())
- return std::make_unique<X86FuchsiaTargetObjectFile>();
- if (TT.isOSBinFormatELF())
- return std::make_unique<X86ELFTargetObjectFile>();
if (TT.isOSBinFormatCOFF())
return std::make_unique<TargetLoweringObjectFileCOFF>();
- llvm_unreachable("unknown subtarget type");
+ return std::make_unique<X86ELFTargetObjectFile>();
}
static std::string computeDataLayout(const Triple &TT) {
@@ -234,6 +228,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
setMachineOutliner(true);
+ // x86 supports the debug entry values.
+ setSupportsDebugEntryValues(true);
+
initAsmInfo();
}
@@ -317,14 +314,6 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
}
//===----------------------------------------------------------------------===//
-// Command line options for x86
-//===----------------------------------------------------------------------===//
-static cl::opt<bool>
-UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
- cl::desc("Minimize AVX to SSE transition penalty"),
- cl::init(true));
-
-//===----------------------------------------------------------------------===//
// X86 TTI query.
//===----------------------------------------------------------------------===//
@@ -408,8 +397,10 @@ void X86PassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOpt::None) {
addPass(createInterleavedAccessPass());
+ addPass(createX86PartialReductionPass());
+ }
// Add passes that handle indirect branch removal and insertion of a retpoline
// thunk. These will be a no-op unless a function subtarget has the retpoline
@@ -498,10 +489,12 @@ void X86PassConfig::addMachineSSAOptimization() {
void X86PassConfig::addPostRegAlloc() {
addPass(createX86FloatingPointStackifierPass());
+ // When -O0 is enabled, the Load Value Injection Hardening pass will fall back
+ // to using the Speculative Execution Side Effect Suppression pass for
+ // mitigation. This is to prevent slow downs due to
+ // analyses needed by the LVIHardening pass when compiling at -O0.
if (getOptLevel() != CodeGenOpt::None)
addPass(createX86LoadValueInjectionLoadHardeningPass());
- else
- addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass());
}
void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
@@ -514,23 +507,33 @@ void X86PassConfig::addPreEmitPass() {
addPass(createX86IndirectBranchTrackingPass());
- if (UseVZeroUpper)
- addPass(createX86IssueVZeroUpperPass());
+ addPass(createX86IssueVZeroUpperPass());
if (getOptLevel() != CodeGenOpt::None) {
addPass(createX86FixupBWInsts());
addPass(createX86PadShortFunctions());
addPass(createX86FixupLEAs());
- addPass(createX86EvexToVexInsts());
}
+ addPass(createX86EvexToVexInsts());
addPass(createX86DiscriminateMemOpsPass());
addPass(createX86InsertPrefetchPass());
+ addPass(createX86InsertX87waitPass());
}
void X86PassConfig::addPreEmitPass2() {
const Triple &TT = TM->getTargetTriple();
const MCAsmInfo *MAI = TM->getMCAsmInfo();
+ // The X86 Speculative Execution Pass must run after all control
+ // flow graph modifying passes. As a result it was listed to run right before
+ // the X86 Retpoline Thunks pass. The reason it must run after control flow
+ // graph modifications is that the model of LFENCE in LLVM has to be updated
+ // (FIXME: https://bugs.llvm.org/show_bug.cgi?id=45167). Currently the
+ // placement of this pass was hand checked to ensure that the subsequent
+ // passes don't move the code around the LFENCEs in a way that will hurt the
+ // correctness of this pass. This placement has been shown to work based on
+ // hand inspection of the codegen output.
+ addPass(createX86SpeculativeExecutionSideEffectSuppression());
addPass(createX86IndirectThunksPass());
// Insert extra int3 instructions after trailing call instructions to avoid
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h
index 757ce8bc5c72..8d98474a39c0 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.h
@@ -23,8 +23,6 @@
namespace llvm {
class StringRef;
-class X86Subtarget;
-class X86RegisterBankInfo;
class TargetTransformInfo;
class X86TargetMachine final : public LLVMTargetMachine {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp
index 44185957686b..2b48baccc01f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
using namespace dwarf;
@@ -63,30 +64,3 @@ const MCExpr *X86ELFTargetObjectFile::getDebugThreadLocalSymbol(
const MCSymbol *Sym) const {
return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext());
}
-
-void
-X86FreeBSDTargetObjectFile::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
-}
-
-void
-X86FuchsiaTargetObjectFile::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
-}
-
-void
-X86LinuxNaClTargetObjectFile::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
-}
-
-void X86SolarisTargetObjectFile::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
-}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h
index 1fd0bbf56b19..acea772eb036 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -10,7 +10,6 @@
#define LLVM_LIB_TARGET_X86_X86TARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
@@ -44,33 +43,10 @@ namespace llvm {
X86ELFTargetObjectFile() {
PLTRelativeVariantKind = MCSymbolRefExpr::VK_PLT;
}
-
/// Describe a TLS variable address within debug info.
const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
};
- /// X86FreeBSDTargetObjectFile - This implementation is used for FreeBSD
- /// on x86 and x86-64.
- class X86FreeBSDTargetObjectFile : public X86ELFTargetObjectFile {
- void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
- };
-
- /// This implementation is used for Fuchsia on x86-64.
- class X86FuchsiaTargetObjectFile : public X86ELFTargetObjectFile {
- void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
- };
-
- /// X86LinuxNaClTargetObjectFile - This implementation is used for linux and
- /// Native Client on x86 and x86-64.
- class X86LinuxNaClTargetObjectFile : public X86ELFTargetObjectFile {
- void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
- };
-
- /// This implementation is used for Solaris on x86/x86-64.
- class X86SolarisTargetObjectFile : public X86ELFTargetObjectFile {
- void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
- };
-
} // end namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index b754836ea517..cc18e55656ef 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -170,12 +170,18 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
}
int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
+ // TODO: Handle more cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+ Op2Info, Opd1PropInfo,
+ Opd2PropInfo, Args, CxtI);
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
@@ -256,20 +262,25 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// The OperandValue properties may not be the same as that of the previous
// operation; conservatively assume OP_None.
int Cost =
- 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
+ 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info,
+ Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
+ Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
+ Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info,
+ Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info,
+ Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
if (ISD == ISD::SREM) {
// For SREM: (X % C) is the equivalent of (X - (X/C)*C)
- Cost += getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info);
- Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Op1Info, Op2Info);
+ Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info,
+ Op2Info);
+ Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info,
+ Op2Info);
}
return Cost;
@@ -277,12 +288,14 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// Vector unsigned division/remainder will be simplified to shifts/masks.
if (ISD == ISD::UDIV)
- return getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
+ return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind,
+ Op1Info, Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
else // UREM
- return getArithmeticInstrCost(Instruction::And, Ty, Op1Info, Op2Info,
+ return getArithmeticInstrCost(Instruction::And, Ty, CostKind,
+ Op1Info, Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
}
@@ -304,6 +317,10 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::SRA, MVT::v2i64, 1 },
{ ISD::SRA, MVT::v4i64, 1 },
{ ISD::SRA, MVT::v8i64, 1 },
+
+ { ISD::SHL, MVT::v64i8, 4 }, // psllw + pand.
+ { ISD::SRL, MVT::v64i8, 4 }, // psrlw + pand.
+ { ISD::SRA, MVT::v64i8, 8 }, // psrlw, pand, pxor, psubb.
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -370,6 +387,14 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence
{ ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
{ ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence
+ { ISD::SDIV, MVT::v64i8, 28 }, // 4*ext+4*pmulhw sequence
+ { ISD::SREM, MVT::v64i8, 32 }, // 4*ext+4*pmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v64i8, 28 }, // 4*ext+4*pmulhw sequence
+ { ISD::UREM, MVT::v64i8, 32 }, // 4*ext+4*pmulhw+mul+sub sequence
+ { ISD::SDIV, MVT::v32i16, 12 }, // 2*vpmulhw sequence
+ { ISD::SREM, MVT::v32i16, 16 }, // 2*vpmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v32i16, 12 }, // 2*vpmulhuw sequence
+ { ISD::UREM, MVT::v32i16, 16 }, // 2*vpmulhuw+mul+sub sequence
};
if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
@@ -446,11 +471,32 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return LT.first * Entry->Cost;
}
+ static const CostTblEntry AVX512BWShiftCostTable[] = {
+ { ISD::SHL, MVT::v8i16, 1 }, // vpsllvw
+ { ISD::SRL, MVT::v8i16, 1 }, // vpsrlvw
+ { ISD::SRA, MVT::v8i16, 1 }, // vpsravw
+
+ { ISD::SHL, MVT::v16i16, 1 }, // vpsllvw
+ { ISD::SRL, MVT::v16i16, 1 }, // vpsrlvw
+ { ISD::SRA, MVT::v16i16, 1 }, // vpsravw
+
+ { ISD::SHL, MVT::v32i16, 1 }, // vpsllvw
+ { ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw
+ { ISD::SRA, MVT::v32i16, 1 }, // vpsravw
+ };
+
+ if (ST->hasBWI())
+ if (const auto *Entry = CostTableLookup(AVX512BWShiftCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+
static const CostTblEntry AVX2UniformCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v16i16, 1 }, // psllw.
{ ISD::SRL, MVT::v16i16, 1 }, // psrlw.
{ ISD::SRA, MVT::v16i16, 1 }, // psraw.
+ { ISD::SHL, MVT::v32i16, 2 }, // 2*psllw.
+ { ISD::SRL, MVT::v32i16, 2 }, // 2*psrlw.
+ { ISD::SRA, MVT::v32i16, 2 }, // 2*psraw.
};
if (ST->hasAVX2() &&
@@ -495,18 +541,6 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return LT.first * Entry->Cost;
static const CostTblEntry AVX512BWCostTable[] = {
- { ISD::SHL, MVT::v8i16, 1 }, // vpsllvw
- { ISD::SRL, MVT::v8i16, 1 }, // vpsrlvw
- { ISD::SRA, MVT::v8i16, 1 }, // vpsravw
-
- { ISD::SHL, MVT::v16i16, 1 }, // vpsllvw
- { ISD::SRL, MVT::v16i16, 1 }, // vpsrlvw
- { ISD::SRA, MVT::v16i16, 1 }, // vpsravw
-
- { ISD::SHL, MVT::v32i16, 1 }, // vpsllvw
- { ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw
- { ISD::SRA, MVT::v32i16, 1 }, // vpsravw
-
{ ISD::SHL, MVT::v64i8, 11 }, // vpblendvb sequence.
{ ISD::SRL, MVT::v64i8, 11 }, // vpblendvb sequence.
{ ISD::SRA, MVT::v64i8, 24 }, // vpblendvb sequence.
@@ -533,6 +567,7 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::SRA, MVT::v4i64, 1 },
{ ISD::SRA, MVT::v8i64, 1 },
+ { ISD::MUL, MVT::v64i8, 26 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v16i8, 5 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v16i32, 1 }, // pmulld (Skylake from agner.org)
@@ -568,6 +603,18 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::SRL, MVT::v4i64, 1 },
};
+ if (ST->hasAVX512()) {
+ if (ISD == ISD::SHL && LT.second == MVT::v32i16 &&
+ (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
+ // On AVX512, a packed v32i16 shift left by a constant build_vector
+ // is lowered into a vector multiply (vpmullw).
+ return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
+ Op1Info, Op2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ }
+
// Look for AVX2 lowering tricks.
if (ST->hasAVX2()) {
if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
@@ -575,7 +622,8 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
// On AVX2, a packed v16i16 shift left by a constant build_vector
// is lowered into a vector multiply (vpmullw).
- return getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info,
+ return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
+ Op1Info, Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
@@ -667,13 +715,19 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
static const CostTblEntry AVX2CostTable[] = {
{ ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
+ { ISD::SHL, MVT::v64i8, 22 }, // 2*vpblendvb sequence.
{ ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
+ { ISD::SHL, MVT::v32i16, 20 }, // 2*extend/vpsrlvd/pack sequence.
{ ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence.
+ { ISD::SRL, MVT::v64i8, 22 }, // 2*vpblendvb sequence.
{ ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
+ { ISD::SRL, MVT::v32i16, 20 }, // 2*extend/vpsrlvd/pack sequence.
{ ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence.
+ { ISD::SRA, MVT::v64i8, 48 }, // 2*vpblendvb sequence.
{ ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence.
+ { ISD::SRA, MVT::v32i16, 20 }, // 2*extend/vpsravd/pack sequence.
{ ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence.
{ ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence.
@@ -877,20 +931,20 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
if (LT.second.isVector() && (ISD == ISD::SDIV || ISD == ISD::SREM ||
ISD == ISD::UDIV || ISD == ISD::UREM)) {
int ScalarCost = getArithmeticInstrCost(
- Opcode, Ty->getScalarType(), Op1Info, Op2Info,
+ Opcode, Ty->getScalarType(), CostKind, Op1Info, Op2Info,
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
}
// Fallback to the default implementation.
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info);
}
-int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp,
+ int Index, VectorType *SubTp) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
// Treat Transpose as 2-op shuffles - there's no difference in lowering.
if (Kind == TTI::SK_Transpose)
@@ -919,19 +973,19 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// FIXME: Remove some of the alignment restrictions.
// FIXME: We can use permq for 64-bit or larger extracts from 256-bit
// vectors.
- int OrigSubElts = SubTp->getVectorNumElements();
- if (NumSubElts > OrigSubElts &&
- (Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 &&
+ int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
+ if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 &&
+ (NumSubElts % OrigSubElts) == 0 &&
LT.second.getVectorElementType() ==
- SubLT.second.getVectorElementType() &&
+ SubLT.second.getVectorElementType() &&
LT.second.getVectorElementType().getSizeInBits() ==
- Tp->getVectorElementType()->getPrimitiveSizeInBits()) {
+ BaseTp->getElementType()->getPrimitiveSizeInBits()) {
assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
"Unexpected number of elements!");
- Type *VecTy = VectorType::get(Tp->getVectorElementType(),
- LT.second.getVectorNumElements());
- Type *SubTy = VectorType::get(Tp->getVectorElementType(),
- SubLT.second.getVectorNumElements());
+ auto *VecTy = FixedVectorType::get(BaseTp->getElementType(),
+ LT.second.getVectorNumElements());
+ auto *SubTy = FixedVectorType::get(BaseTp->getElementType(),
+ SubLT.second.getVectorNumElements());
int ExtractIndex = alignDown((Index % NumElts), NumSubElts);
int ExtractCost = getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
ExtractIndex, SubTy);
@@ -949,6 +1003,42 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
}
}
+ // Handle some common (illegal) sub-vector types as they are often very cheap
+ // to shuffle even on targets without PSHUFB.
+ EVT VT = TLI->getValueType(DL, BaseTp);
+ if (VT.isSimple() && VT.isVector() && VT.getSizeInBits() < 128 &&
+ !ST->hasSSSE3()) {
+ static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
+ {TTI::SK_Broadcast, MVT::v4i16, 1}, // pshuflw
+ {TTI::SK_Broadcast, MVT::v2i16, 1}, // pshuflw
+ {TTI::SK_Broadcast, MVT::v8i8, 2}, // punpck/pshuflw
+ {TTI::SK_Broadcast, MVT::v4i8, 2}, // punpck/pshuflw
+ {TTI::SK_Broadcast, MVT::v2i8, 1}, // punpck
+
+ {TTI::SK_Reverse, MVT::v4i16, 1}, // pshuflw
+ {TTI::SK_Reverse, MVT::v2i16, 1}, // pshuflw
+ {TTI::SK_Reverse, MVT::v4i8, 3}, // punpck/pshuflw/packus
+ {TTI::SK_Reverse, MVT::v2i8, 1}, // punpck
+
+ {TTI::SK_PermuteTwoSrc, MVT::v4i16, 2}, // punpck/pshuflw
+ {TTI::SK_PermuteTwoSrc, MVT::v2i16, 2}, // punpck/pshuflw
+ {TTI::SK_PermuteTwoSrc, MVT::v8i8, 7}, // punpck/pshuflw
+ {TTI::SK_PermuteTwoSrc, MVT::v4i8, 4}, // punpck/pshuflw
+ {TTI::SK_PermuteTwoSrc, MVT::v2i8, 2}, // punpck
+
+ {TTI::SK_PermuteSingleSrc, MVT::v4i16, 1}, // pshuflw
+ {TTI::SK_PermuteSingleSrc, MVT::v2i16, 1}, // pshuflw
+ {TTI::SK_PermuteSingleSrc, MVT::v8i8, 5}, // punpck/pshuflw
+ {TTI::SK_PermuteSingleSrc, MVT::v4i8, 3}, // punpck/pshuflw
+ {TTI::SK_PermuteSingleSrc, MVT::v2i8, 1}, // punpck
+ };
+
+ if (ST->hasSSE2())
+ if (const auto *Entry =
+ CostTableLookup(SSE2SubVectorShuffleTbl, Kind, VT.getSimpleVT()))
+ return Entry->Cost;
+ }
+
// We are going to permute multiple sources and the result will be in multiple
// destinations. Providing an accurate cost only for splits where the element
// type remains the same.
@@ -956,25 +1046,26 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
MVT LegalVT = LT.second;
if (LegalVT.isVector() &&
LegalVT.getVectorElementType().getSizeInBits() ==
- Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
- LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
+ BaseTp->getElementType()->getPrimitiveSizeInBits() &&
+ LegalVT.getVectorNumElements() <
+ cast<FixedVectorType>(BaseTp)->getNumElements()) {
- unsigned VecTySize = DL.getTypeStoreSize(Tp);
+ unsigned VecTySize = DL.getTypeStoreSize(BaseTp);
unsigned LegalVTSize = LegalVT.getStoreSize();
// Number of source vectors after legalization:
unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Number of destination vectors after legalization:
unsigned NumOfDests = LT.first;
- Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
- LegalVT.getVectorNumElements());
+ auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(),
+ LegalVT.getVectorNumElements());
unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
return NumOfShuffles *
getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr);
}
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, BaseTp, Index, SubTp);
}
// For 2-input shuffles, we must account for splitting the 2 inputs into many.
@@ -992,9 +1083,9 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, // vpermb
{TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, // vpermb
- {TTI::SK_PermuteTwoSrc, MVT::v64i8, 1}, // vpermt2b
- {TTI::SK_PermuteTwoSrc, MVT::v32i8, 1}, // vpermt2b
- {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1} // vpermt2b
+ {TTI::SK_PermuteTwoSrc, MVT::v64i8, 2}, // vpermt2b
+ {TTI::SK_PermuteTwoSrc, MVT::v32i8, 2}, // vpermt2b
+ {TTI::SK_PermuteTwoSrc, MVT::v16i8, 2} // vpermt2b
};
if (ST->hasVBMI())
@@ -1006,22 +1097,18 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
{TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb
- {TTI::SK_Reverse, MVT::v32i16, 1}, // vpermw
- {TTI::SK_Reverse, MVT::v16i16, 1}, // vpermw
+ {TTI::SK_Reverse, MVT::v32i16, 2}, // vpermw
+ {TTI::SK_Reverse, MVT::v16i16, 2}, // vpermw
{TTI::SK_Reverse, MVT::v64i8, 2}, // pshufb + vshufi64x2
- {TTI::SK_PermuteSingleSrc, MVT::v32i16, 1}, // vpermw
- {TTI::SK_PermuteSingleSrc, MVT::v16i16, 1}, // vpermw
- {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v32i16, 2}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v16i16, 2}, // vpermw
{TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, // extend to v32i16
- {TTI::SK_PermuteSingleSrc, MVT::v32i8, 3}, // vpermw + zext/trunc
- {TTI::SK_PermuteTwoSrc, MVT::v32i16, 1}, // vpermt2w
- {TTI::SK_PermuteTwoSrc, MVT::v16i16, 1}, // vpermt2w
- {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, // vpermt2w
- {TTI::SK_PermuteTwoSrc, MVT::v32i8, 3}, // zext + vpermt2w + trunc
+ {TTI::SK_PermuteTwoSrc, MVT::v32i16, 2}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v8i16, 2}, // vpermt2w
{TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1
- {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3} // zext + vpermt2w + trunc
};
if (ST->hasBWI())
@@ -1034,6 +1121,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{TTI::SK_Broadcast, MVT::v16f32, 1}, // vbroadcastps
{TTI::SK_Broadcast, MVT::v8i64, 1}, // vpbroadcastq
{TTI::SK_Broadcast, MVT::v16i32, 1}, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb
{TTI::SK_Reverse, MVT::v8f64, 1}, // vpermpd
{TTI::SK_Reverse, MVT::v16f32, 1}, // vpermps
@@ -1065,7 +1154,14 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{TTI::SK_PermuteTwoSrc, MVT::v2f64, 1}, // vpermt2pd
{TTI::SK_PermuteTwoSrc, MVT::v4f32, 1}, // vpermt2ps
{TTI::SK_PermuteTwoSrc, MVT::v2i64, 1}, // vpermt2q
- {TTI::SK_PermuteTwoSrc, MVT::v4i32, 1} // vpermt2d
+ {TTI::SK_PermuteTwoSrc, MVT::v4i32, 1}, // vpermt2d
+
+ // FIXME: This just applies the type legalization cost rules above
+ // assuming these completely split.
+ {TTI::SK_PermuteSingleSrc, MVT::v32i16, 14},
+ {TTI::SK_PermuteSingleSrc, MVT::v64i8, 14},
+ {TTI::SK_PermuteTwoSrc, MVT::v32i16, 42},
+ {TTI::SK_PermuteTwoSrc, MVT::v64i8, 42},
};
if (ST->hasAVX512())
@@ -1267,14 +1363,22 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, BaseTp, Index, SubTp);
}
int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ // TODO: Allow non-throughput costs that aren't binary.
+ auto AdjustCost = [&CostKind](int Cost) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost == 0 ? 0 : 1;
+ return Cost;
+ };
+
// FIXME: Need a better design of the cost table to handle non-simple types of
// potential massive combinations (elem_num x src_type x dst_type).
@@ -1283,6 +1387,11 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 },
// Mask sign extend has an instruction.
+ { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
@@ -1290,42 +1399,45 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 },
{ ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 },
- // Mask zero extend is a load + broadcast.
+ // Mask zero extend is a sext + shift.
+ { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 },
+
+ { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 },
+ { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 },
};
static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
- { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
- { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 },
- { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 1 },
- { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f32, 1 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
- { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f64, 1 },
{ ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 1 },
- { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f32, 1 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
- { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 },
{ ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 },
};
@@ -1337,14 +1449,70 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
- { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
- { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
- { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, // sext+vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, // sext+vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 3 }, // sext+vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, // sext+vpsllq+vptestmq
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 3 }, // sext+vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, // zmm vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // zmm vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // zmm vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, 2 }, // vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, // zmm vpsllq+vptestmq
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, // zmm vpsllq+vptestmq
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 2 }, // vpsllq+vptestmq
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 2 },
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 2 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 2 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, // zmm vpmovqd
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 5 },// 2*vpmovqd+concat+vpmovdb
+
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, // extend to v16i32
+ { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 8 },
+
+ // Sign extend is zmm vpternlogd+vptruncdb.
+ // Zero extend is zmm broadcast load+vptruncdw.
+ { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 3 },
+ { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 4 },
+ { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 4 },
+ { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 4 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 3 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 4 },
+
+ // Sign extend is zmm vpternlogd+vptruncdw.
+ // Zero extend is zmm vpternlogd+vptruncdw+vpsrlw.
+ { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 3 },
+ { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 4 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 4 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 4 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 3 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 4 },
+
+ { ISD::SIGN_EXTEND, MVT::v2i32, MVT::v2i1, 1 }, // zmm vpternlogd
+ { ISD::ZERO_EXTEND, MVT::v2i32, MVT::v2i1, 2 }, // zmm vpternlogd+psrld
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 }, // zmm vpternlogd
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 }, // zmm vpternlogd+psrld
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 }, // zmm vpternlogd
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 }, // zmm vpternlogd+psrld
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 }, // zmm vpternlogq
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 }, // zmm vpternlogq+psrlq
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, // zmm vpternlogq
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, // zmm vpternlogq+psrlq
+
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 1 }, // vpternlogd
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, // vpternlogd+psrld
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i1, 1 }, // vpternlogq
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i1, 2 }, // vpternlogq+psrlq
- // v16i1 -> v16i32 - load + broadcast
- { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
@@ -1356,6 +1524,9 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right
+
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
@@ -1367,44 +1538,163 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
+
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 3 },
+
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 3 },
+ { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 3 },
+ { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 3 },
+ };
+
+ static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] {
+ // Mask sign extend has an instruction.
+ { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
+
+ // Mask zero extend is a sext + shift.
+ { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
+
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // vpsllw+vptestmb
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // vpsllw+vptestmw
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // vpsllw+vptestmb
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // vpsllw+vptestmw
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // vpsllw+vptestmb
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // vpsllw+vptestmw
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // vpsllw+vptestmb
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // vpsllw+vptestmw
+ { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // vpsllw+vptestmb
+ };
+
+ static const TypeConversionCostTblEntry AVX512DQVLConversionTbl[] = {
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
+
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
+
+ { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f64, 1 },
+
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 },
+ };
+
+ static const TypeConversionCostTblEntry AVX512VLConversionTbl[] = {
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, // sext+vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, // sext+vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 8 }, // split+2*v8i8
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, // sext+vpsllq+vptestmq
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 8 }, // split+2*v8i16
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, // vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // vpslld+vptestmd
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, // vpsllq+vptestmq
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, // vpsllq+vptestmq
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, // vpmovqd
+
+ // sign extend is vpcmpeq+maskedmove+vpmovdw+vpacksswb
+ // zero extend is vpcmpeq+maskedmove+vpmovdw+vpsrlw+vpackuswb
+ { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 5 },
+ { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 6 },
+ { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 5 },
+ { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 6 },
+ { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 5 },
+ { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 10 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 12 },
+
+ // sign extend is vpcmpeq+maskedmove+vpmovdw
+ // zero extend is vpcmpeq+maskedmove+vpmovdw+vpsrlw
+ { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 4 },
+ { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 5 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 4 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 5 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 4 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 5 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 10 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 12 },
+
+ { ISD::SIGN_EXTEND, MVT::v2i32, MVT::v2i1, 1 }, // vpternlogd
+ { ISD::ZERO_EXTEND, MVT::v2i32, MVT::v2i1, 2 }, // vpternlogd+psrld
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 }, // vpternlogd
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 }, // vpternlogd+psrld
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 }, // vpternlogd
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 }, // vpternlogd+psrld
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 }, // vpternlogq
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 }, // vpternlogq+psrlq
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, // vpternlogq
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, // vpternlogq+psrlq
+
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 2 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 },
- { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
- { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 5 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 },
- { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
- { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
- { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
- { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 5 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 5 },
- { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
+
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 3 },
+
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 },
- { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
- { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 },
- { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 },
- { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 },
- { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 2 },
- { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 },
};
static const TypeConversionCostTblEntry AVX2ConversionTbl[] = {
@@ -1416,6 +1706,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
@@ -1424,13 +1716,16 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 3 },
+
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 },
- { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
- { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
@@ -1447,6 +1742,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 4 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 4 },
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 },
@@ -1456,15 +1753,21 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 4 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 5 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 4 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 },
+
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 },
- { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 11 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 9 },
- { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 },
+ { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 11 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
@@ -1503,8 +1806,15 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
- { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
- { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 },
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 },
+
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 },
// This node is expanded into scalarized operations but BasicTTI is overly
// optimistic estimating its cost. It computes 3 per element (one
// vector-extract, one scalar conversion and one vector-insert). The
@@ -1544,7 +1854,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
- { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 },
+ // These truncates end up widening elements.
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 1 }, // PMOVXZBQ
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 1 }, // PMOVXZWQ
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 1 }, // PMOVXZBD
+
+ { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 1 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 1 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 },
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
@@ -1555,6 +1871,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
+
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 3 },
+
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 3 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
};
static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
@@ -1580,16 +1903,26 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 },
- { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 6 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 },
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
@@ -1616,11 +1949,19 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 },
+ // These truncates are really widening elements.
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 1 }, // PSHUFD
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // PUNPCKLWD+DQ
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // PUNPCKLBW+WD+PSHUFD
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 1 }, // PUNPCKLWD
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // PUNPCKLBW+WD
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 1 }, // PUNPCKLBW
+
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // PAND+PACKUSWB
- { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 },
- { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, // PAND+PACKUSWB
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, // PAND+PACKUSWB
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
- { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 3 }, // PAND+3*PACKUSWB
+ { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 3 }, // PAND+2*PACKUSWB
{ ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 1 },
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 },
@@ -1639,7 +1980,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (ST->hasSSE2() && !ST->hasAVX()) {
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
LTDest.second, LTSrc.second))
- return LTSrc.first * Entry->Cost;
+ return AdjustCost(LTSrc.first * Entry->Cost);
}
EVT SrcTy = TLI->getValueType(DL, Src);
@@ -1647,61 +1988,77 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// The function getSimpleVT only handles simple value types.
if (!SrcTy.isSimple() || !DstTy.isSimple())
- return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind));
MVT SimpleSrcTy = SrcTy.getSimpleVT();
MVT SimpleDstTy = DstTy.getSimpleVT();
- // Make sure that neither type is going to be split before using the
- // AVX512 tables. This handles -mprefer-vector-width=256
- // with -min-legal-vector-width<=256
- if (TLI->getTypeAction(SimpleSrcTy) != TargetLowering::TypeSplitVector &&
- TLI->getTypeAction(SimpleDstTy) != TargetLowering::TypeSplitVector) {
+ if (ST->useAVX512Regs()) {
if (ST->hasBWI())
if (const auto *Entry = ConvertCostTableLookup(AVX512BWConversionTbl, ISD,
SimpleDstTy, SimpleSrcTy))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
if (ST->hasDQI())
if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD,
SimpleDstTy, SimpleSrcTy))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
if (ST->hasAVX512())
if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
SimpleDstTy, SimpleSrcTy))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
}
+ if (ST->hasBWI())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return AdjustCost(Entry->Cost);
+
+ if (ST->hasDQI())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return AdjustCost(Entry->Cost);
+
+ if (ST->hasAVX512())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return AdjustCost(Entry->Cost);
+
if (ST->hasAVX2()) {
if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
SimpleDstTy, SimpleSrcTy))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
}
if (ST->hasAVX()) {
if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
SimpleDstTy, SimpleSrcTy))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
}
if (ST->hasSSE41()) {
if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
SimpleDstTy, SimpleSrcTy))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
}
if (ST->hasSSE2()) {
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
SimpleDstTy, SimpleSrcTy))
- return Entry->Cost;
+ return AdjustCost(Entry->Cost);
}
- return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
+ return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I));
}
int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
@@ -1774,6 +2131,12 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
{ ISD::SELECT, MVT::v16i32, 1 },
{ ISD::SELECT, MVT::v8f64, 1 },
{ ISD::SELECT, MVT::v16f32, 1 },
+
+ { ISD::SETCC, MVT::v32i16, 2 }, // FIXME: should probably be 4
+ { ISD::SETCC, MVT::v64i8, 2 }, // FIXME: should probably be 4
+
+ { ISD::SELECT, MVT::v32i16, 2 }, // FIXME: should be 3
+ { ISD::SELECT, MVT::v64i8, 2 }, // FIXME: should be 3
};
static const CostTblEntry AVX2CostTbl[] = {
@@ -1878,14 +2241,14 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
return LT.first * (ExtraCost + Entry->Cost);
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
}
unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; }
-int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed) {
+int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
+ const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) {
+
// Costs should match the codegen from:
// BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll
// BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll
@@ -1935,12 +2298,20 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
static const CostTblEntry AVX512CostTbl[] = {
{ ISD::BITREVERSE, MVT::v8i64, 36 },
{ ISD::BITREVERSE, MVT::v16i32, 24 },
+ { ISD::BITREVERSE, MVT::v32i16, 10 },
+ { ISD::BITREVERSE, MVT::v64i8, 10 },
{ ISD::CTLZ, MVT::v8i64, 29 },
{ ISD::CTLZ, MVT::v16i32, 35 },
+ { ISD::CTLZ, MVT::v32i16, 28 },
+ { ISD::CTLZ, MVT::v64i8, 18 },
{ ISD::CTPOP, MVT::v8i64, 16 },
{ ISD::CTPOP, MVT::v16i32, 24 },
+ { ISD::CTPOP, MVT::v32i16, 18 },
+ { ISD::CTPOP, MVT::v64i8, 12 },
{ ISD::CTTZ, MVT::v8i64, 20 },
{ ISD::CTTZ, MVT::v16i32, 28 },
+ { ISD::CTTZ, MVT::v32i16, 24 },
+ { ISD::CTTZ, MVT::v64i8, 18 },
{ ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd
{ ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq
{ ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq
@@ -1949,6 +2320,22 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::UADDSAT, MVT::v2i64, 3 }, // not + pminuq + paddq
{ ISD::UADDSAT, MVT::v4i64, 3 }, // not + pminuq + paddq
{ ISD::UADDSAT, MVT::v8i64, 3 }, // not + pminuq + paddq
+ { ISD::SADDSAT, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::SADDSAT, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::SSUBSAT, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::SSUBSAT, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::UADDSAT, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::UADDSAT, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::USUBSAT, MVT::v32i16, 2 }, // FIXME: include split
+ { ISD::USUBSAT, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::FMAXNUM, MVT::f32, 2 },
+ { ISD::FMAXNUM, MVT::v4f32, 2 },
+ { ISD::FMAXNUM, MVT::v8f32, 2 },
+ { ISD::FMAXNUM, MVT::v16f32, 2 },
+ { ISD::FMAXNUM, MVT::f64, 2 },
+ { ISD::FMAXNUM, MVT::v2f64, 2 },
+ { ISD::FMAXNUM, MVT::v4f64, 2 },
+ { ISD::FMAXNUM, MVT::v8f64, 2 },
};
static const CostTblEntry XOPCostTbl[] = {
{ ISD::BITREVERSE, MVT::v4i64, 4 },
@@ -2031,6 +2418,12 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert
+ { ISD::FMAXNUM, MVT::f32, 3 },
+ { ISD::FMAXNUM, MVT::v4f32, 3 },
+ { ISD::FMAXNUM, MVT::v8f32, 5 },
+ { ISD::FMAXNUM, MVT::f64, 3 },
+ { ISD::FMAXNUM, MVT::v2f64, 3 },
+ { ISD::FMAXNUM, MVT::v4f64, 5 },
{ ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
@@ -2105,13 +2498,25 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::UADDSAT, MVT::v16i8, 1 },
{ ISD::USUBSAT, MVT::v8i16, 1 },
{ ISD::USUBSAT, MVT::v16i8, 1 },
+ { ISD::FMAXNUM, MVT::f64, 4 },
+ { ISD::FMAXNUM, MVT::v2f64, 4 },
{ ISD::FSQRT, MVT::f64, 32 }, // Nehalem from http://www.agner.org/
{ ISD::FSQRT, MVT::v2f64, 32 }, // Nehalem from http://www.agner.org/
};
static const CostTblEntry SSE1CostTbl[] = {
+ { ISD::FMAXNUM, MVT::f32, 4 },
+ { ISD::FMAXNUM, MVT::v4f32, 4 },
{ ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
};
+ static const CostTblEntry BMI64CostTbl[] = { // 64-bit targets
+ { ISD::CTTZ, MVT::i64, 1 },
+ };
+ static const CostTblEntry BMI32CostTbl[] = { // 32 or 64-bit targets
+ { ISD::CTTZ, MVT::i32, 1 },
+ { ISD::CTTZ, MVT::i16, 1 },
+ { ISD::CTTZ, MVT::i8, 1 },
+ };
static const CostTblEntry LZCNT64CostTbl[] = { // 64-bit targets
{ ISD::CTLZ, MVT::i64, 1 },
};
@@ -2131,6 +2536,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::BITREVERSE, MVT::i64, 14 },
{ ISD::CTLZ, MVT::i64, 4 }, // BSR+XOR or BSR+XOR+CMOV
+ { ISD::CTTZ, MVT::i64, 3 }, // TEST+BSF+CMOV/BRANCH
{ ISD::CTPOP, MVT::i64, 10 },
{ ISD::SADDO, MVT::i64, 1 },
{ ISD::UADDO, MVT::i64, 1 },
@@ -2142,6 +2548,9 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTLZ, MVT::i32, 4 }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i16, 4 }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i8, 4 }, // BSR+XOR or BSR+XOR+CMOV
+ { ISD::CTTZ, MVT::i32, 3 }, // TEST+BSF+CMOV/BRANCH
+ { ISD::CTTZ, MVT::i16, 3 }, // TEST+BSF+CMOV/BRANCH
+ { ISD::CTTZ, MVT::i8, 3 }, // TEST+BSF+CMOV/BRANCH
{ ISD::CTPOP, MVT::i32, 8 },
{ ISD::CTPOP, MVT::i16, 9 },
{ ISD::CTPOP, MVT::i8, 7 },
@@ -2153,7 +2562,9 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::UADDO, MVT::i8, 1 },
};
+ Type *RetTy = ICA.getReturnType();
Type *OpTy = RetTy;
+ Intrinsic::ID IID = ICA.getID();
unsigned ISD = ISD::DELETED_NODE;
switch (IID) {
default:
@@ -2173,6 +2584,11 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::cttz:
ISD = ISD::CTTZ;
break;
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ // FMINNUM has same costs so don't duplicate.
+ ISD = ISD::FMAXNUM;
+ break;
case Intrinsic::sadd_sat:
ISD = ISD::SADDSAT;
break;
@@ -2256,6 +2672,15 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
+ if (ST->hasBMI()) {
+ if (ST->is64Bit())
+ if (const auto *Entry = CostTableLookup(BMI64CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (const auto *Entry = CostTableLookup(BMI32CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+ }
+
if (ST->hasLZCNT()) {
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy))
@@ -2284,12 +2709,17 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
return LT.first * Entry->Cost;
}
- return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, ScalarizationCostPassed);
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
-int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF,
- unsigned VF) {
+int X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+
+ if (ICA.isTypeBasedOnly())
+ return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
+
static const CostTblEntry AVX512CostTbl[] = {
{ ISD::ROTL, MVT::v8i64, 1 },
{ ISD::ROTL, MVT::v4i64, 1 },
@@ -2340,6 +2770,9 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSHL, MVT::i8, 4 }
};
+ Intrinsic::ID IID = ICA.getID();
+ Type *RetTy = ICA.getReturnType();
+ const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
unsigned ISD = ISD::DELETED_NODE;
switch (IID) {
default:
@@ -2379,7 +2812,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
return LT.first * Entry->Cost;
}
- return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF);
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@@ -2391,10 +2824,11 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
};
assert(Val->isVectorTy() && "This must be a vector type");
-
Type *ScalarType = Val->getScalarType();
+ int RegisterFileMoveCost = 0;
- if (Index != -1U) {
+ if (Index != -1U && (Opcode == Instruction::ExtractElement ||
+ Opcode == Instruction::InsertElement)) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
@@ -2403,17 +2837,32 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return 0;
// The type may be split. Normalize the index to the new type.
- unsigned Width = LT.second.getVectorNumElements();
- Index = Index % Width;
+ unsigned NumElts = LT.second.getVectorNumElements();
+ unsigned SubNumElts = NumElts;
+ Index = Index % NumElts;
+
+ // For >128-bit vectors, we need to extract higher 128-bit subvectors.
+ // For inserts, we also need to insert the subvector back.
+ if (LT.second.getSizeInBits() > 128) {
+ assert((LT.second.getSizeInBits() % 128) == 0 && "Illegal vector");
+ unsigned NumSubVecs = LT.second.getSizeInBits() / 128;
+ SubNumElts = NumElts / NumSubVecs;
+ if (SubNumElts <= Index) {
+ RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
+ Index %= SubNumElts;
+ }
+ }
if (Index == 0) {
// Floating point scalars are already located in index #0.
+ // Many insertions to #0 can fold away for scalar fp-ops, so let's assume
+ // true for all.
if (ScalarType->isFloatingPointTy())
- return 0;
+ return RegisterFileMoveCost;
- // Assume movd/movq XMM <-> GPR is relatively cheap on all targets.
- if (ScalarType->isIntegerTy())
- return 1;
+ // Assume movd/movq XMM -> GPR is relatively cheap on all targets.
+ if (ScalarType->isIntegerTy() && Opcode == Instruction::ExtractElement)
+ return 1 + RegisterFileMoveCost;
}
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -2421,24 +2870,124 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
MVT MScalarTy = LT.second.getScalarType();
if (ST->isSLM())
if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy))
- return LT.first * Entry->Cost;
+ return Entry->Cost + RegisterFileMoveCost;
+
+ // Assume pinsr/pextr XMM <-> GPR is relatively cheap on all targets.
+ if ((MScalarTy == MVT::i16 && ST->hasSSE2()) ||
+ (MScalarTy.isInteger() && ST->hasSSE41()))
+ return 1 + RegisterFileMoveCost;
+
+ // Assume insertps is relatively cheap on all targets.
+ if (MScalarTy == MVT::f32 && ST->hasSSE41() &&
+ Opcode == Instruction::InsertElement)
+ return 1 + RegisterFileMoveCost;
+
+ // For extractions we just need to shuffle the element to index 0, which
+ // should be very cheap (assume cost = 1). For insertions we need to shuffle
+ // the elements to its destination. In both cases we must handle the
+ // subvector move(s).
+ // If the vector type is already less than 128-bits then don't reduce it.
+ // TODO: Under what circumstances should we shuffle using the full width?
+ int ShuffleCost = 1;
+ if (Opcode == Instruction::InsertElement) {
+ auto *SubTy = cast<VectorType>(Val);
+ EVT VT = TLI->getValueType(DL, Val);
+ if (VT.getScalarType() != MScalarTy || VT.getSizeInBits() >= 128)
+ SubTy = FixedVectorType::get(ScalarType, SubNumElts);
+ ShuffleCost = getShuffleCost(TTI::SK_PermuteTwoSrc, SubTy, 0, SubTy);
+ }
+ int IntOrFpCost = ScalarType->isFloatingPointTy() ? 0 : 1;
+ return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
}
// Add to the base cost if we know that the extracted element of a vector is
// destined to be moved to and used in the integer register file.
- int RegisterFileMoveCost = 0;
if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy())
- RegisterFileMoveCost = 1;
+ RegisterFileMoveCost += 1;
return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
}
+unsigned X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
+ const APInt &DemandedElts,
+ bool Insert, bool Extract) {
+ unsigned Cost = 0;
+
+ // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much
+ // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT.
+ if (Insert) {
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ MVT MScalarTy = LT.second.getScalarType();
+
+ if ((MScalarTy == MVT::i16 && ST->hasSSE2()) ||
+ (MScalarTy.isInteger() && ST->hasSSE41()) ||
+ (MScalarTy == MVT::f32 && ST->hasSSE41())) {
+ // For types we can insert directly, insertion into 128-bit sub vectors is
+ // cheap, followed by a cheap chain of concatenations.
+ if (LT.second.getSizeInBits() <= 128) {
+ Cost +=
+ BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false);
+ } else {
+ unsigned NumSubVecs = LT.second.getSizeInBits() / 128;
+ Cost += (PowerOf2Ceil(NumSubVecs) - 1) * LT.first;
+ Cost += DemandedElts.countPopulation();
+
+ // For vXf32 cases, insertion into the 0'th index in each v4f32
+ // 128-bit vector is free.
+ // NOTE: This assumes legalization widens vXf32 vectors.
+ if (MScalarTy == MVT::f32)
+ for (unsigned i = 0, e = cast<FixedVectorType>(Ty)->getNumElements();
+ i < e; i += 4)
+ if (DemandedElts[i])
+ Cost--;
+ }
+ } else if (LT.second.isVector()) {
+ // Without fast insertion, we need to use MOVD/MOVQ to pass each demanded
+ // integer element as a SCALAR_TO_VECTOR, then we build the vector as a
+ // series of UNPCK followed by CONCAT_VECTORS - all of these can be
+ // considered cheap.
+ if (Ty->isIntOrIntVectorTy())
+ Cost += DemandedElts.countPopulation();
+
+ // Get the smaller of the legalized or original pow2-extended number of
+ // vector elements, which represents the number of unpacks we'll end up
+ // performing.
+ unsigned NumElts = LT.second.getVectorNumElements();
+ unsigned Pow2Elts =
+ PowerOf2Ceil(cast<FixedVectorType>(Ty)->getNumElements());
+ Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
+ }
+ }
+
+ // TODO: Use default extraction for now, but we should investigate extending this
+ // to handle repeated subvector extraction.
+ if (Extract)
+ Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract);
+
+ return Cost;
+}
+
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput) {
+ if (isa_and_nonnull<StoreInst>(I)) {
+ Value *Ptr = I->getOperand(1);
+ // Store instruction with index and scale costs 2 Uops.
+ // Check the preceding GEP to identify non-const indices.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ if (!all_of(GEP->indices(), [](Value *V) { return isa<Constant>(V); }))
+ return TTI::TCC_Basic * 2;
+ }
+ }
+ return TTI::TCC_Basic;
+ }
+
// Handle non-power-of-two vectors such as <3 x float>
- if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
- unsigned NumElem = VTy->getVectorNumElements();
+ if (auto *VTy = dyn_cast<FixedVectorType>(Src)) {
+ unsigned NumElem = VTy->getNumElements();
// Handle a few common cases:
// <3 x float>
@@ -2453,14 +3002,21 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// Assume that all other non-power-of-two numbers are scalarized.
if (!isPowerOf2_32(NumElem)) {
+ APInt DemandedElts = APInt::getAllOnesValue(NumElem);
int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment,
- AddressSpace);
- int SplitCost = getScalarizationOverhead(Src, Opcode == Instruction::Load,
+ AddressSpace, CostKind);
+ int SplitCost = getScalarizationOverhead(VTy, DemandedElts,
+ Opcode == Instruction::Load,
Opcode == Instruction::Store);
return NumElem * Cost + SplitCost;
}
}
+ // Type legalization can't handle structs
+ if (TLI->getValueType(DL, Src, true) == MVT::Other)
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind);
+
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
@@ -2478,33 +3034,36 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
}
int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
- unsigned Alignment,
- unsigned AddressSpace) {
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) {
bool IsLoad = (Instruction::Load == Opcode);
bool IsStore = (Instruction::Store == Opcode);
- VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
+ auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
if (!SrcVTy)
// To calculate scalar take the regular cost, without mask
- return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace);
+ return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace, CostKind);
- unsigned NumElem = SrcVTy->getVectorNumElements();
- VectorType *MaskTy =
- VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
- if ((IsLoad && !isLegalMaskedLoad(SrcVTy, MaybeAlign(Alignment))) ||
- (IsStore && !isLegalMaskedStore(SrcVTy, MaybeAlign(Alignment))) ||
+ unsigned NumElem = SrcVTy->getNumElements();
+ auto *MaskTy =
+ FixedVectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
+ if ((IsLoad && !isLegalMaskedLoad(SrcVTy, Alignment)) ||
+ (IsStore && !isLegalMaskedStore(SrcVTy, Alignment)) ||
!isPowerOf2_32(NumElem)) {
// Scalarization
- int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
+ APInt DemandedElts = APInt::getAllOnesValue(NumElem);
+ int MaskSplitCost =
+ getScalarizationOverhead(MaskTy, DemandedElts, false, true);
int ScalarCompareCost = getCmpSelInstrCost(
- Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr);
- int BranchCost = getCFInstrCost(Instruction::Br);
+ Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr,
+ CostKind);
+ int BranchCost = getCFInstrCost(Instruction::Br, CostKind);
int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
-
- int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore);
+ int ValueSplitCost =
+ getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore);
int MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
- MaybeAlign(Alignment), AddressSpace);
+ Alignment, AddressSpace, CostKind);
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
}
@@ -2519,8 +3078,8 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, 0, nullptr);
else if (LT.second.getVectorNumElements() > NumElem) {
- VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
- LT.second.getVectorNumElements());
+ auto *NewMaskTy = FixedVectorType::get(MaskTy->getElementType(),
+ LT.second.getVectorNumElements());
// Expanding requires fill mask with zeroes
Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy);
}
@@ -2558,41 +3117,16 @@ int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
return BaseT::getAddressComputationCost(Ty, SE, Ptr);
}
-int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
- bool IsPairwise) {
+int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
+ bool IsPairwise,
+ TTI::TargetCostKind CostKind) {
+ // Just use the default implementation for pair reductions.
+ if (IsPairwise)
+ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise, CostKind);
+
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
// and make it as the cost.
- static const CostTblEntry SLMCostTblPairWise[] = {
- { ISD::FADD, MVT::v2f64, 3 },
- { ISD::ADD, MVT::v2i64, 5 },
- };
-
- static const CostTblEntry SSE2CostTblPairWise[] = {
- { ISD::FADD, MVT::v2f64, 2 },
- { ISD::FADD, MVT::v4f32, 4 },
- { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
- { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32.
- { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
- { ISD::ADD, MVT::v2i16, 3 }, // FIXME: chosen to be less than v4i16
- { ISD::ADD, MVT::v4i16, 4 }, // FIXME: chosen to be less than v8i16
- { ISD::ADD, MVT::v8i16, 5 },
- { ISD::ADD, MVT::v2i8, 2 },
- { ISD::ADD, MVT::v4i8, 2 },
- { ISD::ADD, MVT::v8i8, 2 },
- { ISD::ADD, MVT::v16i8, 3 },
- };
-
- static const CostTblEntry AVX1CostTblPairWise[] = {
- { ISD::FADD, MVT::v4f64, 5 },
- { ISD::FADD, MVT::v8f32, 7 },
- { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
- { ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8".
- { ISD::ADD, MVT::v8i32, 5 },
- { ISD::ADD, MVT::v16i16, 6 },
- { ISD::ADD, MVT::v32i8, 4 },
- };
-
static const CostTblEntry SLMCostTblNoPairWise[] = {
{ ISD::FADD, MVT::v2f64, 3 },
{ ISD::ADD, MVT::v2i64, 5 },
@@ -2633,66 +3167,49 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
EVT VT = TLI->getValueType(DL, ValTy);
if (VT.isSimple()) {
MVT MTy = VT.getSimpleVT();
- if (IsPairwise) {
- if (ST->isSLM())
- if (const auto *Entry = CostTableLookup(SLMCostTblPairWise, ISD, MTy))
- return Entry->Cost;
-
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
- return Entry->Cost;
-
- if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
- return Entry->Cost;
- } else {
- if (ST->isSLM())
- if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
- return Entry->Cost;
+ if (ST->isSLM())
+ if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
+ return Entry->Cost;
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
- return Entry->Cost;
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+ return Entry->Cost;
- if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
- return Entry->Cost;
- }
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
+ return Entry->Cost;
}
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
- if (IsPairwise) {
- if (ST->isSLM())
- if (const auto *Entry = CostTableLookup(SLMCostTblPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ auto *ValVTy = cast<FixedVectorType>(ValTy);
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ unsigned ArithmeticCost = 0;
+ if (LT.first != 1 && MTy.isVector() &&
+ MTy.getVectorNumElements() < ValVTy->getNumElements()) {
+ // Type needs to be split. We need LT.first - 1 arithmetic ops.
+ auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(),
+ MTy.getVectorNumElements());
+ ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
+ ArithmeticCost *= LT.first - 1;
+ }
- if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
- } else {
- if (ST->isSLM())
- if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->isSLM())
+ if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
+ return ArithmeticCost + Entry->Cost;
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+ return ArithmeticCost + Entry->Cost;
- if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
- }
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
+ return ArithmeticCost + Entry->Cost;
// FIXME: These assume a naive kshift+binop lowering, which is probably
// conservative in most cases.
- // FIXME: This doesn't cost large types like v128i1 correctly.
static const CostTblEntry AVX512BoolReduction[] = {
{ ISD::AND, MVT::v2i1, 3 },
{ ISD::AND, MVT::v4i1, 5 },
@@ -2738,252 +3255,408 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
};
// Handle bool allof/anyof patterns.
- if (!IsPairwise && ValTy->getVectorElementType()->isIntegerTy(1)) {
+ if (ValVTy->getElementType()->isIntegerTy(1)) {
+ unsigned ArithmeticCost = 0;
+ if (LT.first != 1 && MTy.isVector() &&
+ MTy.getVectorNumElements() < ValVTy->getNumElements()) {
+ // Type needs to be split. We need LT.first - 1 arithmetic ops.
+ auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(),
+ MTy.getVectorNumElements());
+ ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
+ ArithmeticCost *= LT.first - 1;
+ }
+
if (ST->hasAVX512())
if (const auto *Entry = CostTableLookup(AVX512BoolReduction, ISD, MTy))
- return LT.first * Entry->Cost;
+ return ArithmeticCost + Entry->Cost;
if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2BoolReduction, ISD, MTy))
- return LT.first * Entry->Cost;
+ return ArithmeticCost + Entry->Cost;
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1BoolReduction, ISD, MTy))
- return LT.first * Entry->Cost;
+ return ArithmeticCost + Entry->Cost;
if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy))
- return LT.first * Entry->Cost;
+ return ArithmeticCost + Entry->Cost;
+
+ return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise,
+ CostKind);
+ }
+
+ unsigned NumVecElts = ValVTy->getNumElements();
+ unsigned ScalarSize = ValVTy->getScalarSizeInBits();
+
+ // Special case power of 2 reductions where the scalar type isn't changed
+ // by type legalization.
+ if (!isPowerOf2_32(NumVecElts) || ScalarSize != MTy.getScalarSizeInBits())
+ return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise,
+ CostKind);
+
+ unsigned ReductionCost = 0;
+
+ auto *Ty = ValVTy;
+ if (LT.first != 1 && MTy.isVector() &&
+ MTy.getVectorNumElements() < ValVTy->getNumElements()) {
+ // Type needs to be split. We need LT.first - 1 arithmetic ops.
+ Ty = FixedVectorType::get(ValVTy->getElementType(),
+ MTy.getVectorNumElements());
+ ReductionCost = getArithmeticInstrCost(Opcode, Ty, CostKind);
+ ReductionCost *= LT.first - 1;
+ NumVecElts = MTy.getVectorNumElements();
+ }
+
+ // Now handle reduction with the legal type, taking into account size changes
+ // at each level.
+ while (NumVecElts > 1) {
+ // Determine the size of the remaining vector we need to reduce.
+ unsigned Size = NumVecElts * ScalarSize;
+ NumVecElts /= 2;
+ // If we're reducing from 256/512 bits, use an extract_subvector.
+ if (Size > 128) {
+ auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts);
+ ReductionCost +=
+ getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy);
+ Ty = SubTy;
+ } else if (Size == 128) {
+ // Reducing from 128 bits is a permute of v2f64/v2i64.
+ FixedVectorType *ShufTy;
+ if (ValVTy->isFloatingPointTy())
+ ShufTy =
+ FixedVectorType::get(Type::getDoubleTy(ValVTy->getContext()), 2);
+ else
+ ShufTy =
+ FixedVectorType::get(Type::getInt64Ty(ValVTy->getContext()), 2);
+ ReductionCost +=
+ getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
+ } else if (Size == 64) {
+ // Reducing from 64 bits is a shuffle of v4f32/v4i32.
+ FixedVectorType *ShufTy;
+ if (ValVTy->isFloatingPointTy())
+ ShufTy =
+ FixedVectorType::get(Type::getFloatTy(ValVTy->getContext()), 4);
+ else
+ ShufTy =
+ FixedVectorType::get(Type::getInt32Ty(ValVTy->getContext()), 4);
+ ReductionCost +=
+ getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
+ } else {
+ // Reducing from smaller size is a shift by immediate.
+ auto *ShiftTy = FixedVectorType::get(
+ Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size);
+ ReductionCost += getArithmeticInstrCost(
+ Instruction::LShr, ShiftTy, CostKind,
+ TargetTransformInfo::OK_AnyValue,
+ TargetTransformInfo::OK_UniformConstantValue,
+ TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
+ }
+
+ // Add the arithmetic op for this level.
+ ReductionCost += getArithmeticInstrCost(Opcode, Ty, CostKind);
}
- return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise);
+ // Add the final extract element to the cost.
+ return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
}
-int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
- bool IsPairwise, bool IsUnsigned) {
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+int X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned) {
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
MVT MTy = LT.second;
int ISD;
- if (ValTy->isIntOrIntVectorTy()) {
+ if (Ty->isIntOrIntVectorTy()) {
ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN;
} else {
- assert(ValTy->isFPOrFPVectorTy() &&
+ assert(Ty->isFPOrFPVectorTy() &&
"Expected float point or integer vector type.");
ISD = ISD::FMINNUM;
}
- // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
- // and make it as the cost.
+ static const CostTblEntry SSE1CostTbl[] = {
+ {ISD::FMINNUM, MVT::v4f32, 1},
+ };
- static const CostTblEntry SSE1CostTblPairWise[] = {
- {ISD::FMINNUM, MVT::v4f32, 4},
- };
-
- static const CostTblEntry SSE2CostTblPairWise[] = {
- {ISD::FMINNUM, MVT::v2f64, 3},
- {ISD::SMIN, MVT::v2i64, 6},
- {ISD::UMIN, MVT::v2i64, 8},
- {ISD::SMIN, MVT::v4i32, 6},
- {ISD::UMIN, MVT::v4i32, 8},
- {ISD::SMIN, MVT::v8i16, 4},
- {ISD::UMIN, MVT::v8i16, 6},
- {ISD::SMIN, MVT::v16i8, 8},
- {ISD::UMIN, MVT::v16i8, 6},
- };
-
- static const CostTblEntry SSE41CostTblPairWise[] = {
- {ISD::FMINNUM, MVT::v4f32, 2},
- {ISD::SMIN, MVT::v2i64, 9},
- {ISD::UMIN, MVT::v2i64,10},
- {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
- {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
- {ISD::SMIN, MVT::v8i16, 2},
- {ISD::UMIN, MVT::v8i16, 2},
- {ISD::SMIN, MVT::v16i8, 3},
- {ISD::UMIN, MVT::v16i8, 3},
- };
-
- static const CostTblEntry SSE42CostTblPairWise[] = {
- {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
- {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6"
- };
-
- static const CostTblEntry AVX1CostTblPairWise[] = {
- {ISD::FMINNUM, MVT::v4f32, 1},
- {ISD::FMINNUM, MVT::v4f64, 1},
- {ISD::FMINNUM, MVT::v8f32, 2},
- {ISD::SMIN, MVT::v2i64, 3},
- {ISD::UMIN, MVT::v2i64, 3},
- {ISD::SMIN, MVT::v4i32, 1},
- {ISD::UMIN, MVT::v4i32, 1},
- {ISD::SMIN, MVT::v8i16, 1},
- {ISD::UMIN, MVT::v8i16, 1},
- {ISD::SMIN, MVT::v16i8, 2},
- {ISD::UMIN, MVT::v16i8, 2},
- {ISD::SMIN, MVT::v4i64, 7},
- {ISD::UMIN, MVT::v4i64, 7},
- {ISD::SMIN, MVT::v8i32, 3},
- {ISD::UMIN, MVT::v8i32, 3},
- {ISD::SMIN, MVT::v16i16, 3},
- {ISD::UMIN, MVT::v16i16, 3},
- {ISD::SMIN, MVT::v32i8, 3},
- {ISD::UMIN, MVT::v32i8, 3},
- };
-
- static const CostTblEntry AVX2CostTblPairWise[] = {
- {ISD::SMIN, MVT::v4i64, 2},
- {ISD::UMIN, MVT::v4i64, 2},
- {ISD::SMIN, MVT::v8i32, 1},
- {ISD::UMIN, MVT::v8i32, 1},
- {ISD::SMIN, MVT::v16i16, 1},
- {ISD::UMIN, MVT::v16i16, 1},
- {ISD::SMIN, MVT::v32i8, 2},
- {ISD::UMIN, MVT::v32i8, 2},
- };
-
- static const CostTblEntry AVX512CostTblPairWise[] = {
- {ISD::FMINNUM, MVT::v8f64, 1},
- {ISD::FMINNUM, MVT::v16f32, 2},
- {ISD::SMIN, MVT::v8i64, 2},
- {ISD::UMIN, MVT::v8i64, 2},
- {ISD::SMIN, MVT::v16i32, 1},
- {ISD::UMIN, MVT::v16i32, 1},
- };
-
- static const CostTblEntry SSE1CostTblNoPairWise[] = {
- {ISD::FMINNUM, MVT::v4f32, 4},
+ static const CostTblEntry SSE2CostTbl[] = {
+ {ISD::FMINNUM, MVT::v2f64, 1},
+ {ISD::SMIN, MVT::v8i16, 1},
+ {ISD::UMIN, MVT::v16i8, 1},
};
- static const CostTblEntry SSE2CostTblNoPairWise[] = {
- {ISD::FMINNUM, MVT::v2f64, 3},
- {ISD::SMIN, MVT::v2i64, 6},
- {ISD::UMIN, MVT::v2i64, 8},
- {ISD::SMIN, MVT::v4i32, 6},
- {ISD::UMIN, MVT::v4i32, 8},
- {ISD::SMIN, MVT::v8i16, 4},
- {ISD::UMIN, MVT::v8i16, 6},
- {ISD::SMIN, MVT::v16i8, 8},
- {ISD::UMIN, MVT::v16i8, 6},
+ static const CostTblEntry SSE41CostTbl[] = {
+ {ISD::SMIN, MVT::v4i32, 1},
+ {ISD::UMIN, MVT::v4i32, 1},
+ {ISD::UMIN, MVT::v8i16, 1},
+ {ISD::SMIN, MVT::v16i8, 1},
};
- static const CostTblEntry SSE41CostTblNoPairWise[] = {
- {ISD::FMINNUM, MVT::v4f32, 3},
- {ISD::SMIN, MVT::v2i64, 9},
- {ISD::UMIN, MVT::v2i64,11},
- {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
- {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
- {ISD::SMIN, MVT::v8i16, 1}, // The data reported by the IACA is "1.5"
- {ISD::UMIN, MVT::v8i16, 2}, // The data reported by the IACA is "1.8"
- {ISD::SMIN, MVT::v16i8, 3},
- {ISD::UMIN, MVT::v16i8, 3},
+ static const CostTblEntry SSE42CostTbl[] = {
+ {ISD::UMIN, MVT::v2i64, 3}, // xor+pcmpgtq+blendvpd
};
- static const CostTblEntry SSE42CostTblNoPairWise[] = {
- {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
- {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6"
+ static const CostTblEntry AVX1CostTbl[] = {
+ {ISD::FMINNUM, MVT::v8f32, 1},
+ {ISD::FMINNUM, MVT::v4f64, 1},
+ {ISD::SMIN, MVT::v8i32, 3},
+ {ISD::UMIN, MVT::v8i32, 3},
+ {ISD::SMIN, MVT::v16i16, 3},
+ {ISD::UMIN, MVT::v16i16, 3},
+ {ISD::SMIN, MVT::v32i8, 3},
+ {ISD::UMIN, MVT::v32i8, 3},
};
- static const CostTblEntry AVX1CostTblNoPairWise[] = {
- {ISD::FMINNUM, MVT::v4f32, 1},
- {ISD::FMINNUM, MVT::v4f64, 1},
- {ISD::FMINNUM, MVT::v8f32, 1},
- {ISD::SMIN, MVT::v2i64, 3},
- {ISD::UMIN, MVT::v2i64, 3},
- {ISD::SMIN, MVT::v4i32, 1},
- {ISD::UMIN, MVT::v4i32, 1},
- {ISD::SMIN, MVT::v8i16, 1},
- {ISD::UMIN, MVT::v8i16, 1},
- {ISD::SMIN, MVT::v16i8, 2},
- {ISD::UMIN, MVT::v16i8, 2},
- {ISD::SMIN, MVT::v4i64, 7},
- {ISD::UMIN, MVT::v4i64, 7},
- {ISD::SMIN, MVT::v8i32, 2},
- {ISD::UMIN, MVT::v8i32, 2},
- {ISD::SMIN, MVT::v16i16, 2},
- {ISD::UMIN, MVT::v16i16, 2},
- {ISD::SMIN, MVT::v32i8, 2},
- {ISD::UMIN, MVT::v32i8, 2},
- };
-
- static const CostTblEntry AVX2CostTblNoPairWise[] = {
- {ISD::SMIN, MVT::v4i64, 1},
- {ISD::UMIN, MVT::v4i64, 1},
- {ISD::SMIN, MVT::v8i32, 1},
- {ISD::UMIN, MVT::v8i32, 1},
- {ISD::SMIN, MVT::v16i16, 1},
- {ISD::UMIN, MVT::v16i16, 1},
- {ISD::SMIN, MVT::v32i8, 1},
- {ISD::UMIN, MVT::v32i8, 1},
- };
-
- static const CostTblEntry AVX512CostTblNoPairWise[] = {
- {ISD::FMINNUM, MVT::v8f64, 1},
- {ISD::FMINNUM, MVT::v16f32, 2},
- {ISD::SMIN, MVT::v8i64, 1},
- {ISD::UMIN, MVT::v8i64, 1},
- {ISD::SMIN, MVT::v16i32, 1},
- {ISD::UMIN, MVT::v16i32, 1},
- };
-
- if (IsPairwise) {
- if (ST->hasAVX512())
- if (const auto *Entry = CostTableLookup(AVX512CostTblPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX2CostTbl[] = {
+ {ISD::SMIN, MVT::v8i32, 1},
+ {ISD::UMIN, MVT::v8i32, 1},
+ {ISD::SMIN, MVT::v16i16, 1},
+ {ISD::UMIN, MVT::v16i16, 1},
+ {ISD::SMIN, MVT::v32i8, 1},
+ {ISD::UMIN, MVT::v32i8, 1},
+ };
- if (ST->hasAVX2())
- if (const auto *Entry = CostTableLookup(AVX2CostTblPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX512CostTbl[] = {
+ {ISD::FMINNUM, MVT::v16f32, 1},
+ {ISD::FMINNUM, MVT::v8f64, 1},
+ {ISD::SMIN, MVT::v2i64, 1},
+ {ISD::UMIN, MVT::v2i64, 1},
+ {ISD::SMIN, MVT::v4i64, 1},
+ {ISD::UMIN, MVT::v4i64, 1},
+ {ISD::SMIN, MVT::v8i64, 1},
+ {ISD::UMIN, MVT::v8i64, 1},
+ {ISD::SMIN, MVT::v16i32, 1},
+ {ISD::UMIN, MVT::v16i32, 1},
+ };
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX512BWCostTbl[] = {
+ {ISD::SMIN, MVT::v32i16, 1},
+ {ISD::UMIN, MVT::v32i16, 1},
+ {ISD::SMIN, MVT::v64i8, 1},
+ {ISD::UMIN, MVT::v64i8, 1},
+ };
- if (ST->hasSSE42())
- if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ // If we have a native MIN/MAX instruction for this type, use it.
+ if (ST->hasBWI())
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE41())
- if (const auto *Entry = CostTableLookup(SSE41CostTblPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE1())
- if (const auto *Entry = CostTableLookup(SSE1CostTblPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->hasSSE1())
+ if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ unsigned CmpOpcode;
+ if (Ty->isFPOrFPVectorTy()) {
+ CmpOpcode = Instruction::FCmp;
} else {
- if (ST->hasAVX512())
- if (const auto *Entry =
- CostTableLookup(AVX512CostTblNoPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ assert(Ty->isIntOrIntVectorTy() &&
+ "expecting floating point or integer type for min/max reduction");
+ CmpOpcode = Instruction::ICmp;
+ }
- if (ST->hasAVX2())
- if (const auto *Entry = CostTableLookup(AVX2CostTblNoPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ // Otherwise fall back to cmp+select.
+ return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) +
+ getCmpSelInstrCost(Instruction::Select, Ty, CondTy, CostKind);
+}
+
+int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
+ bool IsPairwise, bool IsUnsigned,
+ TTI::TargetCostKind CostKind) {
+ // Just use the default implementation for pair reductions.
+ if (IsPairwise)
+ return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned,
+ CostKind);
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+
+ MVT MTy = LT.second;
+
+ int ISD;
+ if (ValTy->isIntOrIntVectorTy()) {
+ ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN;
+ } else {
+ assert(ValTy->isFPOrFPVectorTy() &&
+ "Expected float point or integer vector type.");
+ ISD = ISD::FMINNUM;
+ }
+
+ // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
+ // and make it as the cost.
+
+ static const CostTblEntry SSE2CostTblNoPairWise[] = {
+ {ISD::UMIN, MVT::v2i16, 5}, // need pxors to use pminsw/pmaxsw
+ {ISD::UMIN, MVT::v4i16, 7}, // need pxors to use pminsw/pmaxsw
+ {ISD::UMIN, MVT::v8i16, 9}, // need pxors to use pminsw/pmaxsw
+ };
+
+ static const CostTblEntry SSE41CostTblNoPairWise[] = {
+ {ISD::SMIN, MVT::v2i16, 3}, // same as sse2
+ {ISD::SMIN, MVT::v4i16, 5}, // same as sse2
+ {ISD::UMIN, MVT::v2i16, 5}, // same as sse2
+ {ISD::UMIN, MVT::v4i16, 7}, // same as sse2
+ {ISD::SMIN, MVT::v8i16, 4}, // phminposuw+xor
+ {ISD::UMIN, MVT::v8i16, 4}, // FIXME: umin is cheaper than umax
+ {ISD::SMIN, MVT::v2i8, 3}, // pminsb
+ {ISD::SMIN, MVT::v4i8, 5}, // pminsb
+ {ISD::SMIN, MVT::v8i8, 7}, // pminsb
+ {ISD::SMIN, MVT::v16i8, 6},
+ {ISD::UMIN, MVT::v2i8, 3}, // same as sse2
+ {ISD::UMIN, MVT::v4i8, 5}, // same as sse2
+ {ISD::UMIN, MVT::v8i8, 7}, // same as sse2
+ {ISD::UMIN, MVT::v16i8, 6}, // FIXME: umin is cheaper than umax
+ };
+
+ static const CostTblEntry AVX1CostTblNoPairWise[] = {
+ {ISD::SMIN, MVT::v16i16, 6},
+ {ISD::UMIN, MVT::v16i16, 6}, // FIXME: umin is cheaper than umax
+ {ISD::SMIN, MVT::v32i8, 8},
+ {ISD::UMIN, MVT::v32i8, 8},
+ };
+
+ static const CostTblEntry AVX512BWCostTblNoPairWise[] = {
+ {ISD::SMIN, MVT::v32i16, 8},
+ {ISD::UMIN, MVT::v32i16, 8}, // FIXME: umin is cheaper than umax
+ {ISD::SMIN, MVT::v64i8, 10},
+ {ISD::UMIN, MVT::v64i8, 10},
+ };
+
+ // Before legalizing the type, give a chance to look up illegal narrow types
+ // in the table.
+ // FIXME: Is there a better way to do this?
+ EVT VT = TLI->getValueType(DL, ValTy);
+ if (VT.isSimple()) {
+ MVT MTy = VT.getSimpleVT();
+ if (ST->hasBWI())
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy))
+ return Entry->Cost;
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
-
- if (ST->hasSSE42())
- if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ return Entry->Cost;
if (ST->hasSSE41())
if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ return Entry->Cost;
if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ return Entry->Cost;
+ }
- if (ST->hasSSE1())
- if (const auto *Entry = CostTableLookup(SSE1CostTblNoPairWise, ISD, MTy))
- return LT.first * Entry->Cost;
+ auto *ValVTy = cast<FixedVectorType>(ValTy);
+ unsigned NumVecElts = ValVTy->getNumElements();
+
+ auto *Ty = ValVTy;
+ unsigned MinMaxCost = 0;
+ if (LT.first != 1 && MTy.isVector() &&
+ MTy.getVectorNumElements() < ValVTy->getNumElements()) {
+ // Type needs to be split. We need LT.first - 1 operations ops.
+ Ty = FixedVectorType::get(ValVTy->getElementType(),
+ MTy.getVectorNumElements());
+ auto *SubCondTy = FixedVectorType::get(CondTy->getElementType(),
+ MTy.getVectorNumElements());
+ MinMaxCost = getMinMaxCost(Ty, SubCondTy, IsUnsigned);
+ MinMaxCost *= LT.first - 1;
+ NumVecElts = MTy.getVectorNumElements();
}
- return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned);
+ if (ST->hasBWI())
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy))
+ return MinMaxCost + Entry->Cost;
+
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+ return MinMaxCost + Entry->Cost;
+
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy))
+ return MinMaxCost + Entry->Cost;
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
+ return MinMaxCost + Entry->Cost;
+
+ unsigned ScalarSize = ValTy->getScalarSizeInBits();
+
+ // Special case power of 2 reductions where the scalar type isn't changed
+ // by type legalization.
+ if (!isPowerOf2_32(ValVTy->getNumElements()) ||
+ ScalarSize != MTy.getScalarSizeInBits())
+ return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned,
+ CostKind);
+
+ // Now handle reduction with the legal type, taking into account size changes
+ // at each level.
+ while (NumVecElts > 1) {
+ // Determine the size of the remaining vector we need to reduce.
+ unsigned Size = NumVecElts * ScalarSize;
+ NumVecElts /= 2;
+ // If we're reducing from 256/512 bits, use an extract_subvector.
+ if (Size > 128) {
+ auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts);
+ MinMaxCost +=
+ getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy);
+ Ty = SubTy;
+ } else if (Size == 128) {
+ // Reducing from 128 bits is a permute of v2f64/v2i64.
+ VectorType *ShufTy;
+ if (ValTy->isFloatingPointTy())
+ ShufTy =
+ FixedVectorType::get(Type::getDoubleTy(ValTy->getContext()), 2);
+ else
+ ShufTy = FixedVectorType::get(Type::getInt64Ty(ValTy->getContext()), 2);
+ MinMaxCost +=
+ getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
+ } else if (Size == 64) {
+ // Reducing from 64 bits is a shuffle of v4f32/v4i32.
+ FixedVectorType *ShufTy;
+ if (ValTy->isFloatingPointTy())
+ ShufTy = FixedVectorType::get(Type::getFloatTy(ValTy->getContext()), 4);
+ else
+ ShufTy = FixedVectorType::get(Type::getInt32Ty(ValTy->getContext()), 4);
+ MinMaxCost +=
+ getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
+ } else {
+ // Reducing from smaller size is a shift by immediate.
+ auto *ShiftTy = FixedVectorType::get(
+ Type::getIntNTy(ValTy->getContext(), Size), 128 / Size);
+ MinMaxCost += getArithmeticInstrCost(
+ Instruction::LShr, ShiftTy, TTI::TCK_RecipThroughput,
+ TargetTransformInfo::OK_AnyValue,
+ TargetTransformInfo::OK_UniformConstantValue,
+ TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
+ }
+
+ // Add the arithmetic op for this level.
+ auto *SubCondTy =
+ FixedVectorType::get(CondTy->getElementType(), Ty->getNumElements());
+ MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned);
+ }
+
+ // Add the final extract element to the cost.
+ return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
}
/// Calculate the cost of materializing a 64-bit value. This helper
@@ -2999,7 +3672,8 @@ int X86TTIImpl::getIntImmCost(int64_t Val) {
return 2 * TTI::TCC_Basic;
}
-int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -3034,7 +3708,7 @@ int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
}
int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) {
+ Type *Ty, TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -3121,17 +3795,18 @@ int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im
if (Idx == ImmIdx) {
int NumConstants = divideCeil(BitSize, 64);
- int Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
+ int Cost = X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<int>(TTI::TCC_Free)
: Cost;
}
- return X86TTIImpl::getIntImmCost(Imm, Ty);
+ return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -3162,52 +3837,45 @@ int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return TTI::TCC_Free;
break;
}
- return X86TTIImpl::getIntImmCost(Imm, Ty);
+ return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
-unsigned X86TTIImpl::getUserCost(const User *U,
- ArrayRef<const Value *> Operands) {
- if (isa<StoreInst>(U)) {
- Value *Ptr = U->getOperand(1);
- // Store instruction with index and scale costs 2 Uops.
- // Check the preceding GEP to identify non-const indices.
- if (auto GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
- if (!all_of(GEP->indices(), [](Value *V) { return isa<Constant>(V); }))
- return TTI::TCC_Basic * 2;
- }
- return TTI::TCC_Basic;
- }
- return BaseT::getUserCost(U, Operands);
+unsigned
+X86TTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Opcode == Instruction::PHI ? 0 : 1;
+ // Branches are assumed to be predicted.
+ return CostKind == TTI::TCK_RecipThroughput ? 0 : 1;
}
// Return an average cost of Gather / Scatter instruction, maybe improved later
-int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
- unsigned Alignment, unsigned AddressSpace) {
+int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, const Value *Ptr,
+ Align Alignment, unsigned AddressSpace) {
assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");
- unsigned VF = SrcVTy->getVectorNumElements();
+ unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
// Try to reduce index size from 64 bit (default for GEP)
// to 32. It is essential for VF 16. If the index can't be reduced to 32, the
// operation will use 16 x 64 indices which do not fit in a zmm and needs
// to split. Also check that the base pointer is the same for all lanes,
// and that there's at most one variable index.
- auto getIndexSizeInBits = [](Value *Ptr, const DataLayout& DL) {
+ auto getIndexSizeInBits = [](const Value *Ptr, const DataLayout &DL) {
unsigned IndexSize = DL.getPointerSizeInBits();
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (IndexSize < 64 || !GEP)
return IndexSize;
unsigned NumOfVarIndices = 0;
- Value *Ptrs = GEP->getPointerOperand();
+ const Value *Ptrs = GEP->getPointerOperand();
if (Ptrs->getType()->isVectorTy() && !getSplatValue(Ptrs))
return IndexSize;
for (unsigned i = 1; i < GEP->getNumOperands(); ++i) {
if (isa<Constant>(GEP->getOperand(i)))
continue;
Type *IndxTy = GEP->getOperand(i)->getType();
- if (IndxTy->isVectorTy())
- IndxTy = IndxTy->getVectorElementType();
+ if (auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
+ IndxTy = IndexVTy->getElementType();
if ((IndxTy->getPrimitiveSizeInBits() == 64 &&
!isa<SExtInst>(GEP->getOperand(i))) ||
++NumOfVarIndices > 1)
@@ -3216,21 +3884,21 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
return (unsigned)32;
};
-
// Trying to reduce IndexSize to 32 bits for vector 16.
// By default the IndexSize is equal to pointer size.
unsigned IndexSize = (ST->hasAVX512() && VF >= 16)
? getIndexSizeInBits(Ptr, DL)
: DL.getPointerSizeInBits();
- Type *IndexVTy = VectorType::get(IntegerType::get(SrcVTy->getContext(),
- IndexSize), VF);
+ auto *IndexVTy = FixedVectorType::get(
+ IntegerType::get(SrcVTy->getContext(), IndexSize), VF);
std::pair<int, MVT> IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy);
std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy);
int SplitFactor = std::max(IdxsLT.first, SrcLT.first);
if (SplitFactor > 1) {
// Handle splitting of vector of pointers
- Type *SplitSrcTy = VectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
+ auto *SplitSrcTy =
+ FixedVectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment,
AddressSpace);
}
@@ -3241,7 +3909,8 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
? ST->getGatherOverhead()
: ST->getScatterOverhead();
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
- MaybeAlign(Alignment), AddressSpace);
+ MaybeAlign(Alignment), AddressSpace,
+ TTI::TCK_RecipThroughput);
}
/// Return the cost of full scalarization of gather / scatter operation.
@@ -3253,25 +3922,29 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
/// AddressSpace - pointer[s] address space.
///
int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
- bool VariableMask, unsigned Alignment,
+ bool VariableMask, Align Alignment,
unsigned AddressSpace) {
- unsigned VF = SrcVTy->getVectorNumElements();
+ unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
+ APInt DemandedElts = APInt::getAllOnesValue(VF);
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
int MaskUnpackCost = 0;
if (VariableMask) {
- VectorType *MaskTy =
- VectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF);
- MaskUnpackCost = getScalarizationOverhead(MaskTy, false, true);
+ auto *MaskTy =
+ FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF);
+ MaskUnpackCost =
+ getScalarizationOverhead(MaskTy, DemandedElts, false, true);
int ScalarCompareCost =
getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()),
- nullptr);
- int BranchCost = getCFInstrCost(Instruction::Br);
+ nullptr, CostKind);
+ int BranchCost = getCFInstrCost(Instruction::Br, CostKind);
MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
}
// The cost of the scalar loads/stores.
int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
- MaybeAlign(Alignment), AddressSpace);
+ MaybeAlign(Alignment), AddressSpace,
+ CostKind);
int InsertExtractCost = 0;
if (Opcode == Instruction::Load)
@@ -3290,21 +3963,28 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
/// Calculate the cost of Gather / Scatter operation
int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
- Value *Ptr, bool VariableMask,
- unsigned Alignment) {
+ const Value *Ptr, bool VariableMask,
+ Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) {
+
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return 1;
+
assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
- unsigned VF = SrcVTy->getVectorNumElements();
+ unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
if (!PtrTy && Ptr->getType()->isVectorTy())
- PtrTy = dyn_cast<PointerType>(Ptr->getType()->getVectorElementType());
+ PtrTy = dyn_cast<PointerType>(
+ cast<VectorType>(Ptr->getType())->getElementType());
assert(PtrTy && "Unexpected type for Ptr argument");
unsigned AddressSpace = PtrTy->getAddressSpace();
bool Scalarize = false;
if ((Opcode == Instruction::Load &&
- !isLegalMaskedGather(SrcVTy, MaybeAlign(Alignment))) ||
+ !isLegalMaskedGather(SrcVTy, Align(Alignment))) ||
(Opcode == Instruction::Store &&
- !isLegalMaskedScatter(SrcVTy, MaybeAlign(Alignment))))
+ !isLegalMaskedScatter(SrcVTy, Align(Alignment))))
Scalarize = true;
// Gather / Scatter for vector 2 is not profitable on KNL / SKX
// Vector-4 of gather/scatter instruction does not exist on KNL.
@@ -3337,12 +4017,13 @@ bool X86TTIImpl::canMacroFuseCmp() {
return ST->hasMacroFusion() || ST->hasBranchFusion();
}
-bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
+bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) {
if (!ST->hasAVX())
return false;
// The backend can't handle a single element vector.
- if (isa<VectorType>(DataTy) && DataTy->getVectorNumElements() == 1)
+ if (isa<VectorType>(DataTy) &&
+ cast<FixedVectorType>(DataTy)->getNumElements() == 1)
return false;
Type *ScalarTy = DataTy->getScalarType();
@@ -3360,7 +4041,7 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
}
-bool X86TTIImpl::isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) {
+bool X86TTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment) {
return isLegalMaskedLoad(DataType, Alignment);
}
@@ -3407,10 +4088,10 @@ bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) {
return false;
// The backend can't handle a single element vector.
- if (DataTy->getVectorNumElements() == 1)
+ if (cast<FixedVectorType>(DataTy)->getNumElements() == 1)
return false;
- Type *ScalarTy = DataTy->getVectorElementType();
+ Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
return true;
@@ -3427,7 +4108,7 @@ bool X86TTIImpl::isLegalMaskedCompressStore(Type *DataTy) {
return isLegalMaskedExpandLoad(DataTy);
}
-bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, MaybeAlign Alignment) {
+bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) {
// Some CPUs have better gather performance than others.
// TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only
// enable gather with a -march.
@@ -3446,8 +4127,8 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, MaybeAlign Alignment) {
// In this case we can reject non-power-of-2 vectors.
// We also reject single element vectors as the type legalizer can't
// scalarize it.
- if (isa<VectorType>(DataTy)) {
- unsigned NumElts = DataTy->getVectorNumElements();
+ if (auto *DataVTy = dyn_cast<FixedVectorType>(DataTy)) {
+ unsigned NumElts = DataVTy->getNumElements();
if (NumElts == 1 || !isPowerOf2_32(NumElts))
return false;
}
@@ -3465,7 +4146,7 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, MaybeAlign Alignment) {
return IntWidth == 32 || IntWidth == 64;
}
-bool X86TTIImpl::isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) {
+bool X86TTIImpl::isLegalMaskedScatter(Type *DataType, Align Alignment) {
// AVX2 doesn't support scatter
if (!ST->hasAVX512())
return false;
@@ -3505,11 +4186,22 @@ bool X86TTIImpl::areFunctionArgsABICompatible(
// If we get here, we know the target features match. If one function
// considers 512-bit vectors legal and the other does not, consider them
// incompatible.
- // FIXME Look at the arguments and only consider 512 bit or larger vectors?
const TargetMachine &TM = getTLI()->getTargetMachine();
- return TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() ==
- TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs();
+ if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() ==
+ TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs())
+ return true;
+
+ // Consider the arguments compatible if they aren't vectors or aggregates.
+ // FIXME: Look at the size of vectors.
+ // FIXME: Look at the element types of aggregates to see if there are vectors.
+ // FIXME: The API of this function seems intended to allow arguments
+ // to be removed from the set, but the caller doesn't check if the set
+ // becomes empty so that may not work in practice.
+ return llvm::none_of(Args, [](Argument *A) {
+ auto *EltTy = cast<PointerType>(A->getType())->getElementType();
+ return EltTy->isVectorTy() || EltTy->isAggregateType();
+ });
}
X86TTIImpl::TTI::MemCmpExpansionOptions
@@ -3517,6 +4209,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
TTI::MemCmpExpansionOptions Options;
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
Options.NumLoadsPerBlock = 2;
+ // All GPR and vector loads can be unaligned.
+ Options.AllowOverlappingLoads = true;
if (IsZeroCmp) {
// Only enable vector loads for equality comparison. Right now the vector
// version is not as fast for three way compare (see #33329).
@@ -3524,8 +4218,6 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64);
if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32);
if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
- // All GPR and vector loads can be unaligned.
- Options.AllowOverlappingLoads = true;
}
if (ST->is64Bit()) {
Options.LoadSizes.push_back(8);
@@ -3555,24 +4247,22 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() {
// computing the cost using a generic formula as a function of generic
// shuffles. We therefore use a lookup table instead, filled according to
// the instruction sequences that codegen currently generates.
-int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int X86TTIImpl::getInterleavedMemoryOpCostAVX2(
+ unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
+ Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
// We currently Support only fully-interleaved groups, with no gaps.
// TODO: Support also strided loads (interleaved-groups with gaps).
if (Indices.size() && Indices.size() != Factor)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ CostKind);
// VecTy for interleave memop is <VF*Factor x Elt>.
// So, for VF=4, Interleave Factor = 3, Element type = i32 we have
@@ -3584,10 +4274,11 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
// (see MachineValueType.h::getVectorVT()).
if (!LegalVT.isVector())
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ CostKind);
- unsigned VF = VecTy->getVectorNumElements() / Factor;
- Type *ScalarTy = VecTy->getVectorElementType();
+ unsigned VF = VecTy->getNumElements() / Factor;
+ Type *ScalarTy = VecTy->getElementType();
// Calculate the number of memory operations (NumOfMemOps), required
// for load/store the VecTy.
@@ -3596,16 +4287,18 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Get the cost of one memory operation.
- Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
- LegalVT.getVectorNumElements());
+ auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
+ LegalVT.getVectorNumElements());
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
- MaybeAlign(Alignment), AddressSpace);
+ MaybeAlign(Alignment), AddressSpace,
+ CostKind);
- VectorType *VT = VectorType::get(ScalarTy, VF);
+ auto *VT = FixedVectorType::get(ScalarTy, VF);
EVT ETy = TLI->getValueType(DL, VT);
if (!ETy.isSimple())
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ CostKind);
// TODO: Complete for other data-types and strides.
// Each combination of Stride, ElementTy and VF results in a different
@@ -3664,24 +4357,21 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace, CostKind);
}
// Get estimation for interleaved load/store operations and strided load.
// \p Indices contains indices for strided load.
// \p Factor - the factor of interleaving.
// AVX-512 provides 3-src shuffles that significantly reduces the cost.
-int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int X86TTIImpl::getInterleavedMemoryOpCostAVX512(
+ unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
+ Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
// VecTy for interleave memop is <VF*Factor x Elt>.
@@ -3696,12 +4386,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Get the cost of one memory operation.
- Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
- LegalVT.getVectorNumElements());
+ auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
+ LegalVT.getVectorNumElements());
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
- MaybeAlign(Alignment), AddressSpace);
+ MaybeAlign(Alignment), AddressSpace,
+ CostKind);
- unsigned VF = VecTy->getVectorNumElements() / Factor;
+ unsigned VF = VecTy->getNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
if (Opcode == Instruction::Load) {
@@ -3733,8 +4424,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned NumOfLoadsInInterleaveGrp =
Indices.size() ? Indices.size() : Factor;
- Type *ResultTy = VectorType::get(VecTy->getVectorElementType(),
- VecTy->getVectorNumElements() / Factor);
+ auto *ResultTy = FixedVectorType::get(VecTy->getElementType(),
+ VecTy->getNumElements() / Factor);
unsigned NumOfResults =
getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
NumOfLoadsInInterleaveGrp;
@@ -3796,15 +4487,12 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
return Cost;
}
-int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int X86TTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
- Type *EltTy = VecTy->getVectorElementType();
+ Type *EltTy = cast<VectorType>(VecTy)->getElementType();
if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
EltTy->isIntegerTy(32) || EltTy->isPointerTy())
return true;
@@ -3813,15 +4501,15 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
return false;
};
if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI()))
- return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
- UseMaskForCond, UseMaskForGaps);
+ return getInterleavedMemoryOpCostAVX512(
+ Opcode, cast<FixedVectorType>(VecTy), Factor, Indices, Alignment,
+ AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps);
if (ST->hasAVX2())
- return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
- UseMaskForCond, UseMaskForGaps);
+ return getInterleavedMemoryOpCostAVX2(
+ Opcode, cast<FixedVectorType>(VecTy), Factor, Indices, Alignment,
+ AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps);
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
+ Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
index b9c2dbd78058..d462e1f96ca2 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -16,11 +16,9 @@
#ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
-#include "X86.h"
#include "X86TargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/CodeGen/TargetLowering.h"
namespace llvm {
@@ -107,9 +105,9 @@ public:
/// \name Cache TTI Implementation
/// @{
llvm::Optional<unsigned> getCacheSize(
- TargetTransformInfo::CacheLevel Level) const;
+ TargetTransformInfo::CacheLevel Level) const override;
llvm::Optional<unsigned> getCacheAssociativity(
- TargetTransformInfo::CacheLevel Level) const;
+ TargetTransformInfo::CacheLevel Level) const override;
/// @}
/// \name Vector TTI Implementations
@@ -121,76 +119,90 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF);
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
- int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
+ bool Insert, bool Extract);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr);
- int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
- int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
- bool VariableMask, unsigned Alignment);
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+ int getMaskedMemoryOpCost(
+ unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
+ int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+ bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I);
int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
const SCEV *Ptr);
unsigned getAtomicMemIntrinsicMaxElementSize() const;
- int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = UINT_MAX);
- int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF,
- unsigned VF = 1);
-
- int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwiseForm);
-
- int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
- bool IsUnsigned);
-
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
- int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
- unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
- int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
- unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
+ int getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
+ int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
+
+ int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
+
+ int getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
+
+ int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwiseForm, bool IsUnsigned,
+ TTI::TargetCostKind CostKind);
+
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
+ int getInterleavedMemoryOpCostAVX512(
+ unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
+ int getInterleavedMemoryOpCostAVX2(
+ unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
int getIntImmCost(int64_t);
- int getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
- unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
- int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ Type *Ty, TTI::TargetCostKind CostKind);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
bool canMacroFuseCmp();
- bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment);
- bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment);
+ bool isLegalMaskedLoad(Type *DataType, Align Alignment);
+ bool isLegalMaskedStore(Type *DataType, Align Alignment);
bool isLegalNTLoad(Type *DataType, Align Alignment);
bool isLegalNTStore(Type *DataType, Align Alignment);
- bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment);
- bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment);
+ bool isLegalMaskedGather(Type *DataType, Align Alignment);
+ bool isLegalMaskedScatter(Type *DataType, Align Alignment);
bool isLegalMaskedExpandLoad(Type *DataType);
bool isLegalMaskedCompressStore(Type *DataType);
bool hasDivRemOp(Type *DataType, bool IsSigned);
@@ -203,11 +215,20 @@ public:
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
+
+ /// Allow vectorizers to form reduction intrinsics in IR. The IR is expanded
+ /// into shuffles and vector math/logic by the backend
+ /// (see TTI::shouldExpandReduction)
+ bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const {
+ return true;
+ }
+
private:
int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
- unsigned Alignment, unsigned AddressSpace);
- int getGSVectorCost(unsigned Opcode, Type *DataTy, Value *Ptr,
- unsigned Alignment, unsigned AddressSpace);
+ Align Alignment, unsigned AddressSpace);
+ int getGSVectorCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+ Align Alignment, unsigned AddressSpace);
/// @}
};
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp
index 7a8308ef1ba9..c188c7443625 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -39,6 +39,11 @@ using namespace llvm;
#define DEBUG_TYPE "x86-vzeroupper"
+static cl::opt<bool>
+UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
+ cl::desc("Minimize AVX to SSE transition penalty"),
+ cl::init(true));
+
STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
namespace {
@@ -278,6 +283,9 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
/// Loop over all of the basic blocks, inserting vzeroupper instructions before
/// function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
+ if (!UseVZeroUpper)
+ return false;
+
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
if (!ST.hasAVX() || !ST.insertVZEROUPPER())
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86WinAllocaExpander.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
index 42e8fba2201e..72593afb2258 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
@@ -19,6 +19,7 @@
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
index 78d3f6460189..8627bbbf18d2 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -19,7 +19,7 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -67,13 +67,13 @@ private:
Function *generateLSDAInEAXThunk(Function *ParentFunc);
- bool isStateStoreNeeded(EHPersonality Personality, CallSite CS);
- void rewriteSetJmpCallSite(IRBuilder<> &Builder, Function &F, CallSite CS,
- Value *State);
+ bool isStateStoreNeeded(EHPersonality Personality, CallBase &Call);
+ void rewriteSetJmpCall(IRBuilder<> &Builder, Function &F, CallBase &Call,
+ Value *State);
int getBaseStateForBB(DenseMap<BasicBlock *, ColorVector> &BlockColors,
WinEHFuncInfo &FuncInfo, BasicBlock *BB);
- int getStateForCallSite(DenseMap<BasicBlock *, ColorVector> &BlockColors,
- WinEHFuncInfo &FuncInfo, CallSite CS);
+ int getStateForCall(DenseMap<BasicBlock *, ColorVector> &BlockColors,
+ WinEHFuncInfo &FuncInfo, CallBase &Call);
// Module-level type getters.
Type *getEHLinkRegistrationType();
@@ -455,16 +455,14 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
// The idea behind _setjmp3 is that it takes an optional number of personality
// specific parameters to indicate how to restore the personality-specific frame
// state when longjmp is initiated. Typically, the current TryLevel is saved.
-void WinEHStatePass::rewriteSetJmpCallSite(IRBuilder<> &Builder, Function &F,
- CallSite CS, Value *State) {
+void WinEHStatePass::rewriteSetJmpCall(IRBuilder<> &Builder, Function &F,
+ CallBase &Call, Value *State) {
// Don't rewrite calls with a weird number of arguments.
- if (CS.getNumArgOperands() != 2)
+ if (Call.getNumArgOperands() != 2)
return;
- Instruction *Inst = CS.getInstruction();
-
SmallVector<OperandBundleDef, 1> OpBundles;
- CS.getOperandBundlesAsDefs(OpBundles);
+ Call.getOperandBundlesAsDefs(OpBundles);
SmallVector<Value *, 3> OptionalArgs;
if (Personality == EHPersonality::MSVC_CXX) {
@@ -482,29 +480,27 @@ void WinEHStatePass::rewriteSetJmpCallSite(IRBuilder<> &Builder, Function &F,
SmallVector<Value *, 5> Args;
Args.push_back(
- Builder.CreateBitCast(CS.getArgOperand(0), Builder.getInt8PtrTy()));
+ Builder.CreateBitCast(Call.getArgOperand(0), Builder.getInt8PtrTy()));
Args.push_back(Builder.getInt32(OptionalArgs.size()));
Args.append(OptionalArgs.begin(), OptionalArgs.end());
- CallSite NewCS;
- if (CS.isCall()) {
- auto *CI = cast<CallInst>(Inst);
+ CallBase *NewCall;
+ if (auto *CI = dyn_cast<CallInst>(&Call)) {
CallInst *NewCI = Builder.CreateCall(SetJmp3, Args, OpBundles);
NewCI->setTailCallKind(CI->getTailCallKind());
- NewCS = NewCI;
+ NewCall = NewCI;
} else {
- auto *II = cast<InvokeInst>(Inst);
- NewCS = Builder.CreateInvoke(
+ auto *II = cast<InvokeInst>(&Call);
+ NewCall = Builder.CreateInvoke(
SetJmp3, II->getNormalDest(), II->getUnwindDest(), Args, OpBundles);
}
- NewCS.setCallingConv(CS.getCallingConv());
- NewCS.setAttributes(CS.getAttributes());
- NewCS->setDebugLoc(CS->getDebugLoc());
-
- Instruction *NewInst = NewCS.getInstruction();
- NewInst->takeName(Inst);
- Inst->replaceAllUsesWith(NewInst);
- Inst->eraseFromParent();
+ NewCall->setCallingConv(Call.getCallingConv());
+ NewCall->setAttributes(Call.getAttributes());
+ NewCall->setDebugLoc(Call.getDebugLoc());
+
+ NewCall->takeName(&Call);
+ Call.replaceAllUsesWith(NewCall);
+ Call.eraseFromParent();
}
// Figure out what state we should assign calls in this block.
@@ -527,17 +523,17 @@ int WinEHStatePass::getBaseStateForBB(
}
// Calculate the state a call-site is in.
-int WinEHStatePass::getStateForCallSite(
+int WinEHStatePass::getStateForCall(
DenseMap<BasicBlock *, ColorVector> &BlockColors, WinEHFuncInfo &FuncInfo,
- CallSite CS) {
- if (auto *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ CallBase &Call) {
+ if (auto *II = dyn_cast<InvokeInst>(&Call)) {
// Look up the state number of the EH pad this unwinds to.
assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
return FuncInfo.InvokeStateMap[II];
}
// Possibly throwing call instructions have no actions to take after
// an unwind. Ensure they are in the -1 state.
- return getBaseStateForBB(BlockColors, FuncInfo, CS.getParent());
+ return getBaseStateForBB(BlockColors, FuncInfo, Call.getParent());
}
// Calculate the intersection of all the FinalStates for a BasicBlock's
@@ -618,16 +614,13 @@ static int getSuccState(DenseMap<BasicBlock *, int> &InitialStates, Function &F,
}
bool WinEHStatePass::isStateStoreNeeded(EHPersonality Personality,
- CallSite CS) {
- if (!CS)
- return false;
-
+ CallBase &Call) {
// If the function touches memory, it needs a state store.
if (isAsynchronousEHPersonality(Personality))
- return !CS.doesNotAccessMemory();
+ return !Call.doesNotAccessMemory();
// If the function throws, it needs a state store.
- return !CS.doesNotThrow();
+ return !Call.doesNotThrow();
}
void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
@@ -672,11 +665,11 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
if (&F.getEntryBlock() == BB)
InitialState = FinalState = ParentBaseState;
for (Instruction &I : *BB) {
- CallSite CS(&I);
- if (!isStateStoreNeeded(Personality, CS))
+ auto *Call = dyn_cast<CallBase>(&I);
+ if (!Call || !isStateStoreNeeded(Personality, *Call))
continue;
- int State = getStateForCallSite(BlockColors, FuncInfo, CS);
+ int State = getStateForCall(BlockColors, FuncInfo, *Call);
if (InitialState == OverdefinedState)
InitialState = State;
FinalState = State;
@@ -739,11 +732,11 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
<< " PrevState=" << PrevState << '\n');
for (Instruction &I : *BB) {
- CallSite CS(&I);
- if (!isStateStoreNeeded(Personality, CS))
+ auto *Call = dyn_cast<CallBase>(&I);
+ if (!Call || !isStateStoreNeeded(Personality, *Call))
continue;
- int State = getStateForCallSite(BlockColors, FuncInfo, CS);
+ int State = getStateForCall(BlockColors, FuncInfo, *Call);
if (State != PrevState)
insertStateNumberStore(&I, State);
PrevState = State;
@@ -756,35 +749,35 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
insertStateNumberStore(BB->getTerminator(), EndState->second);
}
- SmallVector<CallSite, 1> SetJmp3CallSites;
+ SmallVector<CallBase *, 1> SetJmp3Calls;
for (BasicBlock *BB : RPOT) {
for (Instruction &I : *BB) {
- CallSite CS(&I);
- if (!CS)
+ auto *Call = dyn_cast<CallBase>(&I);
+ if (!Call)
continue;
- if (CS.getCalledValue()->stripPointerCasts() !=
+ if (Call->getCalledOperand()->stripPointerCasts() !=
SetJmp3.getCallee()->stripPointerCasts())
continue;
- SetJmp3CallSites.push_back(CS);
+ SetJmp3Calls.push_back(Call);
}
}
- for (CallSite CS : SetJmp3CallSites) {
- auto &BBColors = BlockColors[CS->getParent()];
+ for (CallBase *Call : SetJmp3Calls) {
+ auto &BBColors = BlockColors[Call->getParent()];
BasicBlock *FuncletEntryBB = BBColors.front();
bool InCleanup = isa<CleanupPadInst>(FuncletEntryBB->getFirstNonPHI());
- IRBuilder<> Builder(CS.getInstruction());
+ IRBuilder<> Builder(Call);
Value *State;
if (InCleanup) {
Value *StateField = Builder.CreateStructGEP(RegNode->getAllocatedType(),
RegNode, StateFieldIndex);
State = Builder.CreateLoad(Builder.getInt32Ty(), StateField);
} else {
- State = Builder.getInt32(getStateForCallSite(BlockColors, FuncInfo, CS));
+ State = Builder.getInt32(getStateForCall(BlockColors, FuncInfo, *Call));
}
- rewriteSetJmpCallSite(Builder, F, CS, State);
+ rewriteSetJmpCall(Builder, F, *Call, State);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index ae19e2a78eec..4c1c87cc1e68 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -28,5 +28,7 @@ XCoreMCAsmInfo::XCoreMCAsmInfo(const Triple &TT) {
// Debug
ExceptionsType = ExceptionHandling::DwarfCFI;
DwarfRegNumForCFI = true;
+
+ UseIntegratedAssembler = false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 46ebccee521e..4de252548961 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -60,7 +60,7 @@ static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI = new XCoreMCAsmInfo(TT);
// Initial state of the frame pointer is SP.
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, XCore::SP, 0);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, XCore::SP, 0);
MAI->addInitialFrameState(Inst);
return MAI;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index 3e56302f4add..096b22415a22 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -13,12 +13,6 @@
#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
-namespace llvm {
-
-class Target;
-
-} // end namespace llvm
-
// Defines symbolic names for XCore registers. This defines a mapping from
// register name to register number.
//
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCore.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCore.h
index b7b86be9ab51..d31c34910ef6 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCore.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCore.h
@@ -22,7 +22,6 @@ namespace llvm {
class ModulePass;
class TargetMachine;
class XCoreTargetMachine;
- class formatted_raw_ostream;
void initializeXCoreLowerThreadLocalPass(PassRegistry &p);
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 35dc56e90419..4ea775305e12 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -72,12 +72,12 @@ namespace {
const char *ExtraCode, raw_ostream &O) override;
void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
- void EmitGlobalVariable(const GlobalVariable *GV) override;
+ void emitGlobalVariable(const GlobalVariable *GV) override;
- void EmitFunctionEntryLabel() override;
- void EmitInstruction(const MachineInstr *MI) override;
- void EmitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
+ void emitFunctionEntryLabel() override;
+ void emitInstruction(const MachineInstr *MI) override;
+ void emitFunctionBodyStart() override;
+ void emitFunctionBodyEnd() override;
};
} // end of anonymous namespace
@@ -93,21 +93,20 @@ void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
MCSymbol *SymGlob = OutContext.getOrCreateSymbol(
Twine(Sym->getName() + StringRef(".globound")));
- OutStreamer->EmitSymbolAttribute(SymGlob, MCSA_Global);
- OutStreamer->EmitAssignment(SymGlob,
+ OutStreamer->emitSymbolAttribute(SymGlob, MCSA_Global);
+ OutStreamer->emitAssignment(SymGlob,
MCConstantExpr::create(ATy->getNumElements(),
OutContext));
if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
GV->hasCommonLinkage()) {
- OutStreamer->EmitSymbolAttribute(SymGlob, MCSA_Weak);
+ OutStreamer->emitSymbolAttribute(SymGlob, MCSA_Weak);
}
}
}
-void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+void XCoreAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
// Check to see if this is a special global used by LLVM, if so, emit it.
- if (!GV->hasInitializer() ||
- EmitSpecialLLVMGlobal(GV))
+ if (!GV->hasInitializer() || emitSpecialLLVMGlobal(GV))
return;
const DataLayout &DL = getDataLayout();
@@ -130,11 +129,11 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
case GlobalValue::ExternalLinkage:
case GlobalValue::CommonLinkage:
emitArrayBound(GVSym, GV);
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global);
if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
GV->hasCommonLinkage())
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak);
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_Weak);
LLVM_FALLTHROUGH;
case GlobalValue::InternalLinkage:
case GlobalValue::PrivateLinkage:
@@ -143,43 +142,43 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
llvm_unreachable("Unknown linkage type!");
}
- EmitAlignment(std::max(Alignment, Align(4)), GV);
+ emitAlignment(std::max(Alignment, Align(4)), GV);
if (GV->isThreadLocal()) {
report_fatal_error("TLS is not supported by this target!");
}
unsigned Size = DL.getTypeAllocSize(C->getType());
if (MAI->hasDotTypeDotSizeDirective()) {
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+ OutStreamer->emitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
OutStreamer->emitELFSize(GVSym, MCConstantExpr::create(Size, OutContext));
}
- OutStreamer->EmitLabel(GVSym);
+ OutStreamer->emitLabel(GVSym);
- EmitGlobalConstant(DL, C);
+ emitGlobalConstant(DL, C);
// The ABI requires that unsigned scalar types smaller than 32 bits
// are padded to 32 bits.
if (Size < 4)
- OutStreamer->EmitZeros(4 - Size);
+ OutStreamer->emitZeros(4 - Size);
// Mark the end of the global
getTargetStreamer().emitCCBottomData(GVSym->getName());
}
-void XCoreAsmPrinter::EmitFunctionBodyStart() {
+void XCoreAsmPrinter::emitFunctionBodyStart() {
MCInstLowering.Initialize(&MF->getContext());
}
/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
/// the last basic block in the function.
-void XCoreAsmPrinter::EmitFunctionBodyEnd() {
+void XCoreAsmPrinter::emitFunctionBodyEnd() {
// Emit function end directives
getTargetStreamer().emitCCBottomFunction(CurrentFnSym->getName());
}
-void XCoreAsmPrinter::EmitFunctionEntryLabel() {
+void XCoreAsmPrinter::emitFunctionEntryLabel() {
// Mark the start of the function
getTargetStreamer().emitCCTopFunction(CurrentFnSym->getName());
- OutStreamer->EmitLabel(CurrentFnSym);
+ OutStreamer->emitLabel(CurrentFnSym);
}
void XCoreAsmPrinter::
@@ -256,7 +255,7 @@ bool XCoreAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void XCoreAsmPrinter::emitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream O(Str);
@@ -268,7 +267,7 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
O << "\tmov "
<< XCoreInstPrinter::getRegisterName(MI->getOperand(0).getReg()) << ", "
<< XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg());
- OutStreamer->EmitRawText(O.str());
+ OutStreamer->emitRawText(O.str());
return;
}
break;
@@ -281,7 +280,7 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
else
printInlineJT32(MI, 0, O);
O << '\n';
- OutStreamer->EmitRawText(O.str());
+ OutStreamer->emitRawText(O.str());
return;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index fd8b37e26e47..27ac6a4d1439 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -73,7 +73,7 @@ static void EmitDefCfaOffset(MachineBasicBlock &MBB,
int Offset) {
MachineFunction &MF = *MBB.getParent();
unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
@@ -179,7 +179,7 @@ static MachineMemOperand *getFrameIndexMMO(MachineBasicBlock &MBB,
const MachineFrameInfo &MFI = MF->getFrameInfo();
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FrameIndex), flags,
- MFI.getObjectSize(FrameIndex), MFI.getObjectAlignment(FrameIndex));
+ MFI.getObjectSize(FrameIndex), MFI.getObjectAlign(FrameIndex));
return MMO;
}
@@ -233,9 +233,9 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc dl;
- if (MFI.getMaxAlignment() > getStackAlignment())
- report_fatal_error("emitPrologue unsupported alignment: "
- + Twine(MFI.getMaxAlignment()));
+ if (MFI.getMaxAlign() > getStackAlign())
+ report_fatal_error("emitPrologue unsupported alignment: " +
+ Twine(MFI.getMaxAlign().value()));
const AttributeList &PAL = MF.getFunction().getAttributes();
if (PAL.hasAttrSomewhere(Attribute::Nest))
@@ -412,11 +412,9 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
} // else Don't erase the return instruction.
}
-bool XCoreFrameLowering::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool XCoreFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return true;
@@ -429,8 +427,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
if (MI != MBB.end() && !MI->isDebugInstr())
DL = MI->getDebugLoc();
- for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
- it != CSI.end(); ++it) {
+ for (auto it = CSI.begin(); it != CSI.end(); ++it) {
unsigned Reg = it->getReg();
assert(Reg != XCore::LR && !(Reg == XCore::R10 && hasFP(*MF)) &&
"LR & FP are always handled in emitPrologue");
@@ -448,25 +445,22 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-bool XCoreFrameLowering::
-restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const{
+bool XCoreFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
bool AtStart = MI == MBB.begin();
MachineBasicBlock::iterator BeforeI = MI;
if (!AtStart)
--BeforeI;
- for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
- it != CSI.end(); ++it) {
- unsigned Reg = it->getReg();
+ for (const CalleeSavedInfo &CSR : CSI) {
+ unsigned Reg = CSR.getReg();
assert(Reg != XCore::LR && !(Reg == XCore::R10 && hasFP(*MF)) &&
"LR & FP are always handled in emitEpilogue");
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, MI, Reg, it->getFrameIdx(), RC, TRI);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSR.getFrameIdx(), RC, TRI);
assert(MI != MBB.begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert multiple
@@ -496,8 +490,7 @@ MachineBasicBlock::iterator XCoreFrameLowering::eliminateCallFramePseudoInstr(
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
+ Amount = alignTo(Amount, getStackAlign());
assert(Amount%4 == 0);
Amount /= 4;
@@ -582,9 +575,9 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF,
// When using SP for large frames, we may need 2 scratch registers.
// When using FP, for large or small frames, we may need 1 scratch register.
unsigned Size = TRI.getSpillSize(RC);
- unsigned Align = TRI.getSpillAlignment(RC);
+ Align Alignment = TRI.getSpillAlign(RC);
if (XFI->isLargeFrame(MF) || hasFP(MF))
- RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
if (XFI->isLargeFrame(MF) && !hasFP(MF))
- RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.h
index 95c3a2973033..a914d82e1989 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.h
@@ -31,14 +31,16 @@ namespace llvm {
void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
- bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
+ bool
+ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index b1f9717fbddc..b300697cc5ae 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -152,7 +152,7 @@ void XCoreDAGToDAGISel::Select(SDNode *N) {
CurDAG->getEntryNode());
MachineMemOperand *MemOp =
MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand::MOLoad, 4, Align(4));
CurDAG->setNodeMemRefs(cast<MachineSDNode>(node), {MemOp});
ReplaceNode(N, node);
return;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 70770f4c8e7c..c32653137a10 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -107,6 +107,7 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BITREVERSE , MVT::i32, Legal);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
@@ -328,10 +329,10 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
SDValue Res;
if (CP->isMachineConstantPoolEntry()) {
Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
- CP->getAlignment(), CP->getOffset());
+ CP->getAlign(), CP->getOffset());
} else {
- Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
- CP->getAlignment(), CP->getOffset());
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
+ CP->getOffset());
}
return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res);
}
@@ -434,7 +435,7 @@ SDValue XCoreTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Offset, DAG);
}
if (TLI.isGAPlusOffset(BasePtr.getNode(), GV, Offset) &&
- MinAlign(GV->getAlignment(), 4) == 4) {
+ GV->getPointerAlignment(DAG.getDataLayout()) >= 4) {
SDValue NewBasePtr = DAG.getGlobalAddress(GV, DL,
BasePtr->getValueType(0));
return lowerLoadWordFromAlignedBasePlusOffset(DL, Chain, NewBasePtr,
@@ -996,7 +997,7 @@ LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const {
}
MachineMemOperand::Flags
-XCoreTargetLowering::getMMOFlags(const Instruction &I) const {
+XCoreTargetLowering::getTargetMMOFlags(const Instruction &I) const {
// Because of how we convert atomic_load and atomic_store to normal loads and
// stores in the DAG, we need to ensure that the MMOs are marked volatile
// since DAGCombine hasn't been updated to account for atomic, but non
@@ -1118,7 +1119,7 @@ SDValue XCoreTargetLowering::LowerCCCCallTo(
// The ABI dictates there should be one stack slot available to the callee
// on function entry (for saving lr).
- CCInfo.AllocateStack(4, 4);
+ CCInfo.AllocateStack(4, Align(4));
CCInfo.AnalyzeCallOperands(Outs, CC_XCore);
@@ -1126,7 +1127,7 @@ SDValue XCoreTargetLowering::LowerCCCCallTo(
// Analyze return values to determine the number of bytes of stack required.
CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
- RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), 4);
+ RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), Align(4));
RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
// Get a count of how many bytes are to be pushed on the stack.
@@ -1391,16 +1392,16 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
ArgDI != ArgDE; ++ArgDI) {
if (ArgDI->Flags.isByVal() && ArgDI->Flags.getByValSize()) {
unsigned Size = ArgDI->Flags.getByValSize();
- unsigned Align = std::max(StackSlotSize, ArgDI->Flags.getByValAlign());
+ Align Alignment =
+ std::max(Align(StackSlotSize), ArgDI->Flags.getNonZeroByValAlign());
// Create a new object on the stack and copy the pointee into it.
- int FI = MFI.CreateStackObject(Size, Align, false);
+ int FI = MFI.CreateStackObject(Size, Alignment, false);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
InVals.push_back(FIN);
- MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV,
- DAG.getConstant(Size, dl, MVT::i32),
- Align, false, false, false,
- MachinePointerInfo(),
- MachinePointerInfo()));
+ MemOps.push_back(DAG.getMemcpy(
+ Chain, dl, FIN, ArgDI->SDV, DAG.getConstant(Size, dl, MVT::i32),
+ Alignment, false, false, false, MachinePointerInfo(),
+ MachinePointerInfo()));
} else {
InVals.push_back(ArgDI->SDV);
}
@@ -1454,7 +1455,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Analyze return values.
if (!isVarArg)
- CCInfo.AllocateStack(XFI->getReturnStackOffset(), 4);
+ CCInfo.AllocateStack(XFI->getReturnStackOffset(), Align(4));
CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
@@ -1800,11 +1801,10 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
!LD->isVolatile() && !LD->isIndexed() &&
Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
bool isTail = isInTailCallPosition(DAG, ST, Chain);
- return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
- LD->getBasePtr(),
- DAG.getConstant(StoreBits/8, dl, MVT::i32),
- Alignment, false, isTail, ST->getPointerInfo(),
- LD->getPointerInfo());
+ return DAG.getMemmove(Chain, dl, ST->getBasePtr(), LD->getBasePtr(),
+ DAG.getConstant(StoreBits / 8, dl, MVT::i32),
+ Align(Alignment), false, isTail,
+ ST->getPointerInfo(), LD->getPointerInfo());
}
}
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.h
index b4f25feda7fe..45c21fbf2b74 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -22,7 +22,6 @@ namespace llvm {
// Forward delcarations
class XCoreSubtarget;
- class XCoreTargetMachine;
namespace XCoreISD {
enum NodeType : unsigned {
@@ -127,14 +126,14 @@ namespace llvm {
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
return XCore::R0;
}
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
return XCore::R1;
}
@@ -188,7 +187,8 @@ namespace llvm {
SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
- MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
+ MachineMemOperand::Flags getTargetMMOFlags(
+ const Instruction &I) const override;
// Inline asm support
std::pair<unsigned, const TargetRegisterClass *>
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index db44a56be538..1b21e1ce195b 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -163,7 +163,7 @@ static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
}
}
-/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// analyzeBranch - Analyze the branching code at the end of MBB, returning
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
/// implemented for a target). Upon success, this returns false and returns
/// with the following information in various cases:
@@ -357,7 +357,7 @@ void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill,
+ Register SrcReg, bool isKill,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const
@@ -370,7 +370,7 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FrameIndex),
MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MFI.getObjectAlign(FrameIndex));
BuildMI(MBB, I, DL, get(XCore::STWFI))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FrameIndex)
@@ -380,7 +380,7 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const
{
@@ -392,7 +392,7 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FrameIndex),
MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MFI.getObjectAlign(FrameIndex));
BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
.addFrameIndex(FrameIndex)
.addImm(0)
@@ -443,7 +443,7 @@ MachineBasicBlock::iterator XCoreInstrInfo::loadImmediate(
MachineConstantPool *ConstantPool = MBB.getParent()->getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Value);
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align(4));
return BuildMI(MBB, MI, dl, get(XCore::LDWCP_lru6), Reg)
.addConstantPoolIndex(Idx)
.getInstr();
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.h
index 057fb763efbf..1fbb293bde60 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -68,13 +68,13 @@ public:
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.td
index 18f02e1d80f0..aa3739d0335e 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -535,7 +535,7 @@ let hasSideEffects = 0, isReMaterializable = 1 in
def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins i32imm:$b),
"ldaw $a, dp[$b]", []>;
-let isReMaterializable = 1 in
+let isReMaterializable = 1 in
def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins i32imm:$b),
"ldaw $a, dp[$b]",
[(set RRegs:$a, (dprelwrapper tglobaladdr:$b))]>;
@@ -974,17 +974,17 @@ def SETDP_1r : _F1R<0b001100, (outs), (ins GRRegs:$a), "set dp, $a", []>;
let hasSideEffects=0 in
def SETCP_1r : _F1R<0b001101, (outs), (ins GRRegs:$a), "set cp, $a", []>;
-let hasCtrlDep = 1 in
+let hasCtrlDep = 1 in
def ECALLT_1r : _F1R<0b010011, (outs), (ins GRRegs:$a),
"ecallt $a",
[]>;
-let hasCtrlDep = 1 in
+let hasCtrlDep = 1 in
def ECALLF_1r : _F1R<0b010010, (outs), (ins GRRegs:$a),
"ecallf $a",
[]>;
-let isCall=1,
+let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
def BLA_1r : _F1R<0b001000, (outs), (ins GRRegs:$a),
@@ -1141,7 +1141,7 @@ def : Pat<(truncstorei8 GRRegs:$val, (add GRRegs:$addr, GRRegs:$offset)),
(ST8_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
def : Pat<(truncstorei8 GRRegs:$val, GRRegs:$addr),
(ST8_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
-
+
def : Pat<(truncstorei16 GRRegs:$val, (lda16f GRRegs:$addr, GRRegs:$offset)),
(ST16_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr),
@@ -1154,6 +1154,9 @@ def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)),
def : Pat<(store GRRegs:$val, GRRegs:$addr),
(STW_2rus GRRegs:$val, GRRegs:$addr, 0)>;
+/// bitrev
+def : Pat<(bitreverse GRRegs:$src), (BITREV_l2r GRRegs:$src)>;
+
/// cttz
def : Pat<(cttz GRRegs:$src), (CLZ_l2r (BITREV_l2r GRRegs:$src))>;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMCInstLower.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMCInstLower.h
index 0eaa84ef736b..efb359cc57e1 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMCInstLower.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMCInstLower.h
@@ -8,6 +8,7 @@
#ifndef LLVM_LIB_TARGET_XCORE_XCOREMCINSTLOWER_H
#define LLVM_LIB_TARGET_XCORE_XCOREMCINSTLOWER_H
+
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/Support/Compiler.h"
@@ -16,8 +17,6 @@ namespace llvm {
class MCInst;
class MCOperand;
class MachineInstr;
- class MachineFunction;
- class Mangler;
class AsmPrinter;
/// This class is used to lower an MachineInstr into an MCInst.
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
index 0b4fcffbc655..ec44d2899dd5 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -43,7 +43,7 @@ int XCoreFunctionInfo::createLRSpillSlot(MachineFunction &MF) {
LRSpillSlot = MFI.CreateFixedObject(TRI.getSpillSize(RC), 0, true);
} else {
LRSpillSlot = MFI.CreateStackObject(TRI.getSpillSize(RC),
- TRI.getSpillAlignment(RC), true);
+ TRI.getSpillAlign(RC), true);
}
LRSpillSlotSet = true;
return LRSpillSlot;
@@ -56,8 +56,8 @@ int XCoreFunctionInfo::createFPSpillSlot(MachineFunction &MF) {
const TargetRegisterClass &RC = XCore::GRRegsRegClass;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
- FPSpillSlot = MFI.CreateStackObject(TRI.getSpillSize(RC),
- TRI.getSpillAlignment(RC), true);
+ FPSpillSlot =
+ MFI.CreateStackObject(TRI.getSpillSize(RC), TRI.getSpillAlign(RC), true);
FPSpillSlotSet = true;
return FPSpillSlot;
}
@@ -70,9 +70,9 @@ const int* XCoreFunctionInfo::createEHSpillSlot(MachineFunction &MF) {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Size = TRI.getSpillSize(RC);
- unsigned Align = TRI.getSpillAlignment(RC);
- EHSpillSlot[0] = MFI.CreateStackObject(Size, Align, true);
- EHSpillSlot[1] = MFI.CreateStackObject(Size, Align, true);
+ Align Alignment = TRI.getSpillAlign(RC);
+ EHSpillSlot[0] = MFI.CreateStackObject(Size, Alignment, true);
+ EHSpillSlot[1] = MFI.CreateStackObject(Size, Alignment, true);
EHSpillSlotSet = true;
return EHSpillSlot;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
index 56fed26ebd7b..6799823f6fcb 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -246,11 +246,6 @@ XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
}
bool
-XCoreRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- return true;
-}
-
-bool
XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.h
index 35a42e1a1457..f1eec7bc87b4 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -20,8 +20,6 @@
namespace llvm {
-class TargetInstrInfo;
-
struct XCoreRegisterInfo : public XCoreGenRegisterInfo {
public:
XCoreRegisterInfo();
@@ -34,8 +32,6 @@ public:
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
-
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.td
index d9502939bae3..82f61d5865ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreRegisterInfo.td
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Declarations that describe the XCore register file
+// Declarations that describe the XCore register file
//===----------------------------------------------------------------------===//
class XCoreReg<string n> : Register<n> {
@@ -24,17 +24,17 @@ class Ri<bits<4> num, string n> : XCoreReg<n> {
// CPU registers
def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
-def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
-def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
-def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
-def CP : Ri<12, "cp">, DwarfRegNum<[12]>;
+def CP : Ri<12, "cp">, DwarfRegNum<[12]>;
def DP : Ri<13, "dp">, DwarfRegNum<[13]>;
def SP : Ri<14, "sp">, DwarfRegNum<[14]>;
def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index c86756e345a9..0d097076348c 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -17,11 +17,11 @@ using namespace llvm;
SDValue XCoreSelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
unsigned SizeBitWidth = Size.getValueSizeInBits();
// Call __memcpy_4 if the src, dst and size are all 4 byte aligned.
- if (!AlwaysInline && (Align & 3) == 0 &&
+ if (!AlwaysInline && Alignment >= Align(4) &&
DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) {
const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
TargetLowering::ArgListTy Args;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
index 5dcef08391c9..2abf52677978 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -17,13 +17,11 @@
namespace llvm {
-class XCoreTargetMachine;
-
class XCoreSelectionDAGInfo : public SelectionDAGTargetInfo {
public:
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1, SDValue Op2,
- SDValue Op3, unsigned Align, bool isVolatile,
+ SDValue Op3, Align Alignment, bool isVolatile,
bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 736bc4148a19..1eea1e37c253 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -54,7 +54,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT,
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
getEffectiveXCoreCodeModel(CM), OL),
TLOF(std::make_unique<XCoreTargetObjectFile>()),
- Subtarget(TT, CPU, FS, *this) {
+ Subtarget(TT, std::string(CPU), std::string(FS), *this) {
initAsmInfo();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
index fe743b28b4b4..9fec74a372fb 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -140,10 +140,9 @@ MCSection *XCoreTargetObjectFile::SelectSectionForGlobal(
report_fatal_error("Target does not support TLS or Common sections");
}
-MCSection *XCoreTargetObjectFile::getSectionForConstant(const DataLayout &DL,
- SectionKind Kind,
- const Constant *C,
- unsigned &Align) const {
+MCSection *XCoreTargetObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ Align &Alignment) const {
if (Kind.isMergeableConst4()) return MergeableConst4Section;
if (Kind.isMergeableConst8()) return MergeableConst8Section;
if (Kind.isMergeableConst16()) return MergeableConst16Section;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.h b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
index fd172c55919f..73cc6686d775 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -32,7 +32,7 @@ static const unsigned CodeModelLargeSize = 256;
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
- unsigned &Align) const override;
+ Align &Alignment) const override;
};
} // end namespace llvm